246
266
size_t use_mem, uint32_t division_limit,
247
267
uint32_t age_threshold)
250
(void)key_cache_block_size;
252
(void)division_limit;
254
memset(keycache, 0, sizeof(KEY_CACHE));
269
uint32_t blocks, hash_links;
272
assert(key_cache_block_size >= 512);
274
if (keycache->key_cache_inited && keycache->disk_blocks > 0)
279
keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
280
keycache->global_cache_read= keycache->global_cache_write= 0;
281
keycache->disk_blocks= -1;
282
if (! keycache->key_cache_inited)
284
keycache->key_cache_inited= 1;
286
Initialize these variables once only.
287
Their value must survive re-initialization during resizing.
289
keycache->in_resize= 0;
290
keycache->resize_in_flush= 0;
291
keycache->cnt_for_resize_op= 0;
292
keycache->waiting_for_resize_cnt.last_thread= NULL;
293
keycache->in_init= 0;
294
pthread_mutex_init(&keycache->cache_lock, MY_MUTEX_INIT_FAST);
295
keycache->resize_queue.last_thread= NULL;
298
keycache->key_cache_mem_size= use_mem;
299
keycache->key_cache_block_size= key_cache_block_size;
301
blocks= (uint32_t) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
302
sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
303
/* It doesn't make sense to have too few blocks (less than 8) */
308
/* Set my_hash_entries to the next bigger 2 power */
309
if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
310
keycache->hash_entries<<= 1;
311
hash_links= 2 * blocks;
312
#if defined(MAX_THREADS)
313
if (hash_links < MAX_THREADS + blocks - 1)
314
hash_links= MAX_THREADS + blocks - 1;
316
while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
317
ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
318
ALIGN_SIZE(sizeof(HASH_LINK*) *
319
keycache->hash_entries))) +
320
((size_t) blocks * keycache->key_cache_block_size) > use_mem)
322
/* Allocate memory for cache page buffers */
323
if ((keycache->block_mem= (unsigned char *)malloc((size_t) blocks * keycache->key_cache_block_size)))
326
Allocate memory for blocks, hash_links and hash entries;
327
For each block 2 hash links are allocated
329
if ((keycache->block_root= (BLOCK_LINK*) malloc(length)))
331
free(keycache->block_mem);
332
keycache->block_mem= 0;
337
my_error(EE_OUTOFMEMORY, MYF(0), blocks * keycache->key_cache_block_size);
340
blocks= blocks / 4*3;
342
keycache->blocks_unused= blocks;
343
keycache->disk_blocks= (int) blocks;
344
keycache->hash_links= hash_links;
345
keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
346
ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
347
keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
348
ALIGN_SIZE((sizeof(HASH_LINK*) *
349
keycache->hash_entries)));
350
memset(keycache->block_root, 0,
351
keycache->disk_blocks * sizeof(BLOCK_LINK));
352
memset(keycache->hash_root, 0,
353
keycache->hash_entries * sizeof(HASH_LINK*));
354
memset(keycache->hash_link_root, 0,
355
keycache->hash_links * sizeof(HASH_LINK));
356
keycache->hash_links_used= 0;
357
keycache->free_hash_list= NULL;
358
keycache->blocks_used= keycache->blocks_changed= 0;
360
keycache->global_blocks_changed= 0;
361
keycache->blocks_available=0; /* For debugging */
363
/* The LRU chain is empty after initialization */
364
keycache->used_last= NULL;
365
keycache->used_ins= NULL;
366
keycache->free_block_list= NULL;
367
keycache->keycache_time= 0;
368
keycache->warm_blocks= 0;
369
keycache->min_warm_blocks= (division_limit ?
370
blocks * division_limit / 100 + 1 :
372
keycache->age_threshold= (age_threshold ?
373
blocks * age_threshold / 100 :
376
keycache->can_be_used= 1;
378
keycache->waiting_for_hash_link.last_thread= NULL;
379
keycache->waiting_for_block.last_thread= NULL;
380
memset(keycache->changed_blocks, 0,
381
sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
382
memset(keycache->file_blocks, 0,
383
sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
387
/* key_buffer_size is specified too small. Disable the cache. */
388
keycache->can_be_used= 0;
391
keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
392
return((int) keycache->disk_blocks);
396
keycache->disk_blocks= 0;
398
if (keycache->block_mem)
400
free(keycache->block_mem);
401
keycache->block_mem= NULL;
403
if (keycache->block_root)
405
free((unsigned char*) keycache->block_root);
406
keycache->block_root= NULL;
409
keycache->can_be_used= 0;
419
keycache pointer to a key cache data structure
420
key_cache_block_size size of blocks to keep cached data
421
use_mem total memory to use for the new key cache
422
division_limit new division limit (if not zero)
423
age_threshold new age threshold (if not zero)
426
number of blocks in the key cache, if successful,
430
The function first compares the memory size and the block size parameters
431
with the key cache values.
433
If they differ the function free the the memory allocated for the
434
old key cache blocks by calling the end_key_cache function and
435
then rebuilds the key cache with new blocks by calling
438
The function starts the operation only when all other threads
439
performing operations with the key cache let her to proceed
440
(when cnt_for_resize=0).
443
int resize_key_cache(KEY_CACHE *keycache, uint32_t key_cache_block_size,
444
size_t use_mem, uint32_t division_limit,
445
uint32_t age_threshold)
449
if (!keycache->key_cache_inited)
450
return(keycache->disk_blocks);
452
if(key_cache_block_size == keycache->key_cache_block_size &&
453
use_mem == keycache->key_cache_mem_size)
455
change_key_cache_param(keycache, division_limit, age_threshold);
456
return(keycache->disk_blocks);
459
keycache_pthread_mutex_lock(&keycache->cache_lock);
462
We may need to wait for another thread which is doing a resize
463
already. This cannot happen in the MySQL server though. It allows
464
one resizer only. In set_var.cc keycache->in_init is used to block
467
while (keycache->in_resize)
469
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
473
Mark the operation in progress. This blocks other threads from doing
474
a resize in parallel. It prohibits new blocks to enter the cache.
475
Read/write requests can bypass the cache during the flush phase.
477
keycache->in_resize= 1;
479
/* Need to flush only if keycache is enabled. */
480
if (keycache->can_be_used)
482
/* Start the flush phase. */
483
keycache->resize_in_flush= 1;
485
if (flush_all_key_blocks(keycache))
487
/* TODO: if this happens, we should write a warning in the log file ! */
488
keycache->resize_in_flush= 0;
490
keycache->can_be_used= 0;
494
/* End the flush phase. */
495
keycache->resize_in_flush= 0;
499
Some direct read/write operations (bypassing the cache) may still be
500
unfinished. Wait until they are done. If the key cache can be used,
501
direct I/O is done in increments of key_cache_block_size. That is,
502
every block is checked if it is in the cache. We need to wait for
503
pending I/O before re-initializing the cache, because we may change
504
the block size. Otherwise they could check for blocks at file
505
positions where the new block division has none. We do also want to
506
wait for I/O done when (if) the cache was disabled. It must not
507
run in parallel with normal cache operation.
509
while (keycache->cnt_for_resize_op)
510
wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
513
Free old cache structures, allocate new structures, and initialize
514
them. Note that the cache_lock mutex and the resize_queue are left
515
untouched. We do not lose the cache_lock and will release it only at
516
the end of this function.
518
end_key_cache(keycache, 0); /* Don't free mutex */
519
/* The following will work even if use_mem is 0 */
520
blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
521
division_limit, age_threshold);
525
Mark the resize finished. This allows other threads to start a
526
resize or to request new cache blocks.
528
keycache->in_resize= 0;
530
/* Signal waiting threads. */
531
release_whole_queue(&keycache->resize_queue);
533
keycache_pthread_mutex_unlock(&keycache->cache_lock);
539
Increment counter blocking resize key cache operation
541
static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
543
keycache->cnt_for_resize_op++;
548
Decrement counter blocking resize key cache operation;
549
Signal the operation to proceed when counter becomes equal zero
551
static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
553
if (!--keycache->cnt_for_resize_op)
554
release_whole_queue(&keycache->waiting_for_resize_cnt);
558
Change the key cache parameters
561
change_key_cache_param()
562
keycache pointer to a key cache data structure
563
division_limit new division limit (if not zero)
564
age_threshold new age threshold (if not zero)
570
Presently the function resets the key cache parameters
571
concerning midpoint insertion strategy - division_limit and
575
void change_key_cache_param(KEY_CACHE *keycache, uint32_t division_limit,
576
uint32_t age_threshold)
578
keycache_pthread_mutex_lock(&keycache->cache_lock);
580
keycache->min_warm_blocks= (keycache->disk_blocks *
581
division_limit / 100 + 1);
583
keycache->age_threshold= (keycache->disk_blocks *
584
age_threshold / 100);
585
keycache_pthread_mutex_unlock(&keycache->cache_lock);
272
602
void end_key_cache(KEY_CACHE *keycache, bool cleanup)
604
if (!keycache->key_cache_inited)
607
if (keycache->disk_blocks > 0)
609
if (keycache->block_mem)
611
free(keycache->block_mem);
612
keycache->block_mem= NULL;
613
free((unsigned char*) keycache->block_root);
614
keycache->block_root= NULL;
616
keycache->disk_blocks= -1;
617
/* Reset blocks_changed to be safe if flush_all_key_blocks is called */
618
keycache->blocks_changed= 0;
623
pthread_mutex_destroy(&keycache->cache_lock);
624
keycache->key_cache_inited= keycache->can_be_used= 0;
276
627
} /* end_key_cache */
631
Link a thread into double-linked queue of waiting threads.
635
wqueue pointer to the queue structure
636
thread pointer to the thread to be added to the queue
642
Queue is represented by a circular list of the thread structures
643
The list is double-linked of the type (**prev,*next), accessed by
644
a pointer to the last element.
647
static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
648
struct st_my_thread_var *thread)
650
struct st_my_thread_var *last;
652
assert(!thread->next && !thread->prev);
653
if (! (last= wqueue->last_thread))
656
thread->next= thread;
657
thread->prev= &thread->next;
661
thread->prev= last->next->prev;
662
last->next->prev= &thread->next;
663
thread->next= last->next;
666
wqueue->last_thread= thread;
670
Unlink a thread from double-linked queue of waiting threads
674
wqueue pointer to the queue structure
675
thread pointer to the thread to be removed from the queue
681
See NOTES for link_into_queue
684
static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
685
struct st_my_thread_var *thread)
687
assert(thread->next && thread->prev);
688
if (thread->next == thread)
689
/* The queue contains only one member */
690
wqueue->last_thread= NULL;
693
thread->next->prev= thread->prev;
694
*thread->prev=thread->next;
695
if (wqueue->last_thread == thread)
696
wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
705
Add a thread to single-linked queue of waiting threads
709
wqueue Pointer to the queue structure.
710
mutex Cache_lock to acquire after awake.
716
Queue is represented by a circular list of the thread structures
717
The list is single-linked of the type (*next), accessed by a pointer
720
The function protects against stray signals by verifying that the
721
current thread is unlinked from the queue when awaking. However,
722
since several threads can wait for the same event, it might be
723
necessary for the caller of the function to check again if the
724
condition for awake is indeed matched.
727
static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
728
pthread_mutex_t *mutex)
730
struct st_my_thread_var *last;
731
struct st_my_thread_var *thread= my_thread_var;
734
assert(!thread->next);
735
assert(!thread->prev); /* Not required, but must be true anyway. */
736
if (! (last= wqueue->last_thread))
737
thread->next= thread;
740
thread->next= last->next;
743
wqueue->last_thread= thread;
746
Wait until thread is removed from queue by the signalling thread.
747
The loop protects against stray signals.
751
keycache_pthread_cond_wait(&thread->suspend, mutex);
753
while (thread->next);
758
Remove all threads from queue signaling them to proceed
761
release_whole_queue()
762
wqueue pointer to the queue structure
768
See notes for wait_on_queue().
769
When removed from the queue each thread is signaled via condition
770
variable thread->suspend.
773
static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
775
struct st_my_thread_var *last;
776
struct st_my_thread_var *next;
777
struct st_my_thread_var *thread;
779
/* Queue may be empty. */
780
if (!(last= wqueue->last_thread))
787
/* Signal the thread. */
788
keycache_pthread_cond_signal(&thread->suspend);
789
/* Take thread from queue. */
793
while (thread != last);
795
/* Now queue is definitely empty. */
796
wqueue->last_thread= NULL;
801
Unlink a block from the chain of dirty/clean blocks
803
static void unlink_changed(BLOCK_LINK *block)
805
assert(block->prev_changed && *block->prev_changed == block);
806
if (block->next_changed)
807
block->next_changed->prev_changed= block->prev_changed;
808
*block->prev_changed= block->next_changed;
809
block->next_changed= NULL;
810
block->prev_changed= NULL;
815
Link a block into the chain of dirty/clean blocks
818
static void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
820
assert(!block->next_changed);
821
assert(!block->prev_changed);
822
block->prev_changed= phead;
823
if ((block->next_changed= *phead))
824
(*phead)->prev_changed= &block->next_changed;
830
Link a block in a chain of clean blocks of a file.
834
keycache Key cache handle
835
block Block to relink
836
file File to be linked to
837
unlink If to unlink first
840
Unlink a block from whichever chain it is linked in, if it's
841
asked for, and link it to the chain of clean blocks of the
845
Please do never set/clear BLOCK_CHANGED outside of
846
link_to_file_list() or link_to_changed_list().
847
You would risk to damage correct counting of changed blocks
848
and to find blocks in the wrong hash.
854
static void link_to_file_list(KEY_CACHE *keycache,
855
BLOCK_LINK *block, int file,
858
assert(block->status & BLOCK_IN_USE);
859
assert(block->hash_link && block->hash_link->block == block);
860
assert(block->hash_link->file == file);
862
unlink_changed(block);
863
link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
864
if (block->status & BLOCK_CHANGED)
866
block->status&= ~BLOCK_CHANGED;
867
keycache->blocks_changed--;
868
keycache->global_blocks_changed--;
874
Re-link a block from the clean chain to the dirty chain of a file.
877
link_to_changed_list()
878
keycache key cache handle
879
block block to relink
882
Unlink a block from the chain of clean blocks of a file
883
and link it to the chain of dirty blocks of the same file.
886
Please do never set/clear BLOCK_CHANGED outside of
887
link_to_file_list() or link_to_changed_list().
888
You would risk to damage correct counting of changed blocks
889
and to find blocks in the wrong hash.
895
static void link_to_changed_list(KEY_CACHE *keycache,
898
assert(block->status & BLOCK_IN_USE);
899
assert(!(block->status & BLOCK_CHANGED));
900
assert(block->hash_link && block->hash_link->block == block);
902
unlink_changed(block);
904
&keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
905
block->status|=BLOCK_CHANGED;
906
keycache->blocks_changed++;
907
keycache->global_blocks_changed++;
912
Link a block to the LRU chain at the beginning or at the end of
917
keycache pointer to a key cache data structure
918
block pointer to the block to link to the LRU chain
919
hot <-> to link the block into the hot subchain
920
at_end <-> to link the block at the end of the subchain
926
The LRU ring is represented by a circular list of block structures.
927
The list is double-linked of the type (**prev,*next) type.
928
The LRU ring is divided into two parts - hot and warm.
929
There are two pointers to access the last blocks of these two
930
parts. The beginning of the warm part follows right after the
932
Only blocks of the warm part can be used for eviction.
933
The first block from the beginning of this subchain is always
934
taken for eviction (keycache->last_used->next)
936
LRU chain: +------+ H O T +------+
937
+----| end |----...<----| beg |----+
938
| +------+last +------+ |
939
v<-link in latest hot (new end) |
940
| link in latest warm (new end)->^
941
| +------+ W A R M +------+ |
942
+----| beg |---->...----| end |----+
946
It is also possible that the block is selected for eviction and thus
947
not linked in the LRU ring.
950
static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, bool hot,
956
assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
957
assert(block->hash_link); /*backptr to block NULL from free_block()*/
958
assert(!block->requests);
959
assert(block->prev_changed && *block->prev_changed == block);
960
assert(!block->next_used);
961
assert(!block->prev_used);
962
if (!hot && keycache->waiting_for_block.last_thread)
964
/* Signal that in the LRU warm sub-chain an available block has appeared */
965
struct st_my_thread_var *last_thread=
966
keycache->waiting_for_block.last_thread;
967
struct st_my_thread_var *first_thread= last_thread->next;
968
struct st_my_thread_var *next_thread= first_thread;
969
HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
970
struct st_my_thread_var *thread;
974
next_thread= thread->next;
976
We notify about the event all threads that ask
977
for the same page as the first thread in the queue
979
if ((HASH_LINK *) thread->opt_info == hash_link)
981
keycache_pthread_cond_signal(&thread->suspend);
982
unlink_from_queue(&keycache->waiting_for_block, thread);
986
while (thread != last_thread);
987
hash_link->block= block;
989
NOTE: We assigned the block to the hash_link and signalled the
990
requesting thread(s). But it is possible that other threads runs
991
first. These threads see the hash_link assigned to a block which
992
is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
993
This can be a problem for functions that do not select the block
994
via its hash_link: flush and free. They do only see a block which
995
is in a "normal" state and don't know that it will be evicted soon.
997
We cannot set BLOCK_IN_SWITCH here because only one of the
998
requesting threads must handle the eviction. All others must wait
999
for it to complete. If we set the flag here, the threads would not
1000
know who is in charge of the eviction. Without the flag, the first
1001
thread takes the stick and sets the flag.
1003
But we need to note in the block that is has been selected for
1004
eviction. It must not be freed. The evicting thread will not
1005
expect the block in the free list. Before freeing we could also
1006
check if block->requests > 1. But I think including another flag
1007
in the check of block->status is slightly more efficient and
1008
probably easier to read.
1010
block->status|= BLOCK_IN_EVICTION;
1013
pins= hot ? &keycache->used_ins : &keycache->used_last;
1017
ins->next_used->prev_used= &block->next_used;
1018
block->next_used= ins->next_used;
1019
block->prev_used= &ins->next_used;
1020
ins->next_used= block;
1026
/* The LRU ring is empty. Let the block point to itself. */
1027
keycache->used_last= keycache->used_ins= block->next_used= block;
1028
block->prev_used= &block->next_used;
1034
Unlink a block from the LRU chain
1038
keycache pointer to a key cache data structure
1039
block pointer to the block to unlink from the LRU chain
1045
See NOTES for link_block
1048
static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1050
assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1051
assert(block->hash_link); /*backptr to block NULL from free_block()*/
1052
assert(!block->requests);
1053
assert(block->prev_changed && *block->prev_changed == block);
1054
assert(block->next_used && block->prev_used &&
1055
(block->next_used->prev_used == &block->next_used) &&
1056
(*block->prev_used == block));
1057
if (block->next_used == block)
1058
/* The list contains only one member */
1059
keycache->used_last= keycache->used_ins= NULL;
1062
block->next_used->prev_used= block->prev_used;
1063
*block->prev_used= block->next_used;
1064
if (keycache->used_last == block)
1065
keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1066
if (keycache->used_ins == block)
1067
keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1069
block->next_used= NULL;
1070
block->prev_used= NULL;
1075
Register requests for a block.
1079
keycache Pointer to a key cache data structure.
1080
block Pointer to the block to register a request on.
1081
count Number of requests. Always 1.
1084
The first request unlinks the block from the LRU ring. This means
1085
that it is protected against eveiction.
1090
static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1092
assert(block->status & BLOCK_IN_USE);
1093
assert(block->hash_link);
1095
if (!block->requests)
1096
unlink_block(keycache, block);
1097
block->requests+=count;
1102
Unregister request for a block
1103
linking it to the LRU chain if it's the last request
1107
keycache pointer to a key cache data structure
1108
block pointer to the block to link to the LRU chain
1109
at_end <-> to link the block at the end of the LRU chain
1115
Every linking to the LRU ring decrements by one a special block
1116
counter (if it's positive). If the at_end parameter is true the block is
1117
added either at the end of warm sub-chain or at the end of hot sub-chain.
1118
It is added to the hot subchain if its counter is zero and number of
1119
blocks in warm sub-chain is not less than some low limit (determined by
1120
the division_limit parameter). Otherwise the block is added to the warm
1121
sub-chain. If the at_end parameter is false the block is always added
1122
at beginning of the warm sub-chain.
1123
Thus a warm block can be promoted to the hot sub-chain when its counter
1124
becomes zero for the first time.
1125
At the same time the block at the very beginning of the hot subchain
1126
might be moved to the beginning of the warm subchain if it stays untouched
1127
for a too long time (this time is determined by parameter age_threshold).
1129
It is also possible that the block is selected for eviction and thus
1130
not linked in the LRU ring.
1133
static void unreg_request(KEY_CACHE *keycache,
1134
BLOCK_LINK *block, int at_end)
1136
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1137
assert(block->hash_link); /*backptr to block NULL from free_block()*/
1138
assert(block->requests);
1139
assert(block->prev_changed && *block->prev_changed == block);
1140
assert(!block->next_used);
1141
assert(!block->prev_used);
1142
if (! --block->requests)
1145
if (block->hits_left)
1147
hot= !block->hits_left && at_end &&
1148
keycache->warm_blocks > keycache->min_warm_blocks;
1151
if (block->temperature == BLOCK_WARM)
1152
keycache->warm_blocks--;
1153
block->temperature= BLOCK_HOT;
1155
link_block(keycache, block, hot, (bool)at_end);
1156
block->last_hit_time= keycache->keycache_time;
1157
keycache->keycache_time++;
1159
At this place, the block might be in the LRU ring or not. If an
1160
evicter was waiting for a block, it was selected for eviction and
1161
not linked in the LRU ring.
1165
Check if we should link a hot block to the warm block sub-chain.
1166
It is possible that we select the same block as above. But it can
1167
also be another block. In any case a block from the LRU ring is
1168
selected. In other words it works even if the above block was
1169
selected for eviction and not linked in the LRU ring. Since this
1170
happens only if the LRU ring is empty, the block selected below
1171
would be NULL and the rest of the function skipped.
1173
block= keycache->used_ins;
1174
if (block && keycache->keycache_time - block->last_hit_time >
1175
keycache->age_threshold)
1177
unlink_block(keycache, block);
1178
link_block(keycache, block, 0, 0);
1179
if (block->temperature != BLOCK_WARM)
1181
keycache->warm_blocks++;
1182
block->temperature= BLOCK_WARM;
1189
Remove a reader of the page in block
1192
static void remove_reader(BLOCK_LINK *block)
1194
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1195
assert(block->hash_link && block->hash_link->block == block);
1196
assert(block->prev_changed && *block->prev_changed == block);
1197
assert(!block->next_used);
1198
assert(!block->prev_used);
1199
assert(block->hash_link->requests);
1200
if (! --block->hash_link->requests && block->condvar)
1201
keycache_pthread_cond_signal(block->condvar);
1206
Wait until the last reader of the page in block
1207
signals on its termination
1210
static void wait_for_readers(KEY_CACHE *keycache,
1213
struct st_my_thread_var *thread= my_thread_var;
1214
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1215
assert(!(block->status & (BLOCK_ERROR | BLOCK_IN_FLUSH |
1217
assert(block->hash_link);
1218
assert(block->hash_link->block == block);
1219
/* Linked in file_blocks or changed_blocks hash. */
1220
assert(block->prev_changed && *block->prev_changed == block);
1221
/* Not linked in LRU ring. */
1222
assert(!block->next_used);
1223
assert(!block->prev_used);
1224
while (block->hash_link->requests)
1226
/* There must be no other waiter. We have no queue here. */
1227
assert(!block->condvar);
1228
block->condvar= &thread->suspend;
1229
keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1230
block->condvar= NULL;
280
1236
Add a hash link to a bucket in the hash_table
1250
Remove a hash link from the hash table
1253
static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1255
assert(hash_link->requests == 0);
1256
if ((*hash_link->prev= hash_link->next))
1257
hash_link->next->prev= hash_link->prev;
1258
hash_link->block= NULL;
1259
if (keycache->waiting_for_hash_link.last_thread)
1261
/* Signal that a free hash link has appeared */
1262
struct st_my_thread_var *last_thread=
1263
keycache->waiting_for_hash_link.last_thread;
1264
struct st_my_thread_var *first_thread= last_thread->next;
1265
struct st_my_thread_var *next_thread= first_thread;
1266
KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1267
struct st_my_thread_var *thread;
1269
hash_link->file= first_page->file;
1270
hash_link->diskpos= first_page->filepos;
1273
KEYCACHE_PAGE *page;
1274
thread= next_thread;
1275
page= (KEYCACHE_PAGE *) thread->opt_info;
1276
next_thread= thread->next;
1278
We notify about the event all threads that ask
1279
for the same page as the first thread in the queue
1281
if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1283
keycache_pthread_cond_signal(&thread->suspend);
1284
unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1287
while (thread != last_thread);
1288
link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1289
hash_link->diskpos)],
1293
hash_link->next= keycache->free_hash_list;
1294
keycache->free_hash_list= hash_link;
1299
Get the hash link for a page
1302
static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1303
int file, my_off_t filepos)
1305
register HASH_LINK *hash_link, **start;
1309
Find the bucket in the hash table for the pair (file, filepos);
1310
start contains the head of the bucket list,
1311
hash_link points to the first member of the list
1313
hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1314
/* Look for an element for the pair (file, filepos) in the bucket chain */
1316
(hash_link->diskpos != filepos || hash_link->file != file))
1318
hash_link= hash_link->next;
1322
/* There is no hash link in the hash table for the pair (file, filepos) */
1323
if (keycache->free_hash_list)
1325
hash_link= keycache->free_hash_list;
1326
keycache->free_hash_list= hash_link->next;
1328
else if (keycache->hash_links_used < keycache->hash_links)
1330
hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1334
/* Wait for a free hash link */
1335
struct st_my_thread_var *thread= my_thread_var;
1338
page.filepos= filepos;
1339
thread->opt_info= (void *) &page;
1340
link_into_queue(&keycache->waiting_for_hash_link, thread);
1341
keycache_pthread_cond_wait(&thread->suspend,
1342
&keycache->cache_lock);
1343
thread->opt_info= NULL;
1346
hash_link->file= file;
1347
hash_link->diskpos= filepos;
1348
link_hash(start, hash_link);
1350
/* Register the request for the page */
1351
hash_link->requests++;
1358
Get a block for the file page requested by a keycache read/write operation;
1359
If the page is not in the cache return a free block, if there is none
1360
return the lru block after saving its buffer if the page is dirty.
1365
keycache pointer to a key cache data structure
1366
file handler for the file to read page from
1367
filepos position of the page in the file
1368
init_hits_left how initialize the block counter for the page
1369
wrmode <-> get for writing
1370
page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1373
Pointer to the found block if successful, 0 - otherwise
1376
For the page from file positioned at filepos the function checks whether
1377
the page is in the key cache specified by the first parameter.
1378
If this is the case it immediately returns the block.
1379
If not, the function first chooses a block for this page. If there is
1380
no not used blocks in the key cache yet, the function takes the block
1381
at the very beginning of the warm sub-chain. It saves the page in that
1382
block if it's dirty before returning the pointer to it.
1383
The function returns in the page_st parameter the following values:
1384
PAGE_READ - if page already in the block,
1385
PAGE_TO_BE_READ - if it is to be read yet by the current thread
1386
WAIT_TO_BE_READ - if it is to be read by another thread
1387
If an error occurs THE BLOCK_ERROR bit is set in the block status.
1388
It might happen that there are no blocks in LRU chain (in warm part) -
1389
all blocks are unlinked for some read/write operations. Then the function
1390
waits until first of this operations links any block back.
1393
static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1394
File file, my_off_t filepos,
1396
int wrmode, int *page_st)
1398
HASH_LINK *hash_link;
1405
If the flush phase of a resize operation fails, the cache is left
1406
unusable. This will be detected only after "goto restart".
1408
if (!keycache->can_be_used)
1412
Find the hash_link for the requested file block (file, filepos). We
1413
do always get a hash_link here. It has registered our request so
1414
that no other thread can use it for another file block until we
1415
release the request (which is done by remove_reader() usually). The
1416
hash_link can have a block assigned to it or not. If there is a
1417
block, it may be assigned to this hash_link or not. In cases where a
1418
block is evicted from the cache, it is taken from the LRU ring and
1419
referenced by the new hash_link. But the block can still be assigned
1420
to its old hash_link for some time if it needs to be flushed first,
1421
or if there are other threads still reading it.
1424
hash_link is always returned.
1425
hash_link->block can be:
1427
- not assigned to this hash_link or
1428
- assigned to this hash_link. If assigned, the block can have
1429
- invalid data (when freshly assigned) or
1430
- valid data. Valid data can be
1431
- changed over the file contents (dirty) or
1432
- not changed (clean).
1434
hash_link= get_hash_link(keycache, file, filepos);
1435
assert((hash_link->file == file) && (hash_link->diskpos == filepos));
1438
if ((block= hash_link->block) &&
1439
block->hash_link == hash_link && (block->status & BLOCK_READ))
1441
/* Assigned block with valid (changed or unchanged) contents. */
1442
page_status= PAGE_READ;
1445
else (page_status == -1)
1447
- block not assigned to this hash_link or
1448
- block assigned but not yet read from file (invalid data).
1451
if (keycache->in_resize)
1453
/* This is a request during a resize operation */
1457
struct st_my_thread_var *thread;
1460
The file block is not in the cache. We don't need it in the
1461
cache: we are going to read or write directly to file. Cancel
1462
the request. We can simply decrement hash_link->requests because
1463
we did not release cache_lock since increasing it. So no other
1464
thread can wait for our request to become released.
1466
if (hash_link->requests == 1)
1469
We are the only one to request this hash_link (this file/pos).
1472
hash_link->requests--;
1473
unlink_hash(keycache, hash_link);
1478
More requests on the hash_link. Someone tries to evict a block
1479
for this hash_link (could have started before resizing started).
1480
This means that the LRU ring is empty. Otherwise a block could
1481
be assigned immediately. Behave like a thread that wants to
1482
evict a block for this file/pos. Add to the queue of threads
1483
waiting for a block. Wait until there is one assigned.
1485
Refresh the request on the hash-link so that it cannot be reused
1486
for another file/pos.
1488
thread= my_thread_var;
1489
thread->opt_info= (void *) hash_link;
1490
link_into_queue(&keycache->waiting_for_block, thread);
1493
keycache_pthread_cond_wait(&thread->suspend,
1494
&keycache->cache_lock);
1495
} while (thread->next);
1496
thread->opt_info= NULL;
1498
A block should now be assigned to the hash_link. But it may
1499
still need to be evicted. Anyway, we should re-check the
1500
situation. page_status must be set correctly.
1502
hash_link->requests--;
1504
} /* end of if (!block) */
1507
There is a block for this file/pos in the cache. Register a
1508
request on it. This unlinks it from the LRU ring (if it is there)
1509
and hence protects it against eviction (if not already in
1510
eviction). We need this for returning the block to the caller, for
1511
calling remove_reader() (for debugging purposes), and for calling
1512
free_block(). The only case where we don't need the request is if
1513
the block is in eviction. In that case we have to unregister the
1516
reg_requests(keycache, block, 1);
1518
if (page_status != PAGE_READ)
1521
- block not assigned to this hash_link or
1522
- block assigned but not yet read from file (invalid data).
1524
This must be a block in eviction. It will be read soon. We need
1525
to wait here until this happened. Otherwise the caller could
1526
access a wrong block or a block which is in read. While waiting
1527
we cannot lose hash_link nor block. We have registered a request
1528
on the hash_link. Everything can happen to the block but changes
1529
in the hash_link -> block relationship. In other words:
1530
everything can happen to the block but free or another completed
1533
Note that we bahave like a secondary requestor here. We just
1534
cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1535
read requests and writes on dirty blocks that are not in flush
1536
only. Waiting here on COND_FOR_REQUESTED works in all
1539
assert(((block->hash_link != hash_link) &&
1540
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1541
((block->hash_link == hash_link) &&
1542
!(block->status & BLOCK_READ)));
1543
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
1545
Here we can trust that the block has been assigned to this
1546
hash_link (block->hash_link == hash_link) and read into the
1547
buffer (BLOCK_READ). The worst things possible here are that the
1548
block is in free (BLOCK_REASSIGNED). But the block is still
1549
assigned to the hash_link. The freeing thread waits until we
1550
release our request on the hash_link. The block must not be
1551
again in eviction because we registered an request on it before
1554
assert(block->hash_link == hash_link);
1555
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1556
assert(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1559
The block is in the cache. Assigned to the hash_link. Valid data.
1560
Note that in case of page_st == PAGE_READ, the block can be marked
1561
for eviction. In any case it can be marked for freeing.
1566
/* A reader can just read the block. */
1567
*page_st= PAGE_READ;
1568
assert((hash_link->file == file) &&
1569
(hash_link->diskpos == filepos) &&
1570
(block->hash_link == hash_link));
1575
This is a writer. No two writers for the same block can exist.
1576
This must be assured by locks outside of the key cache.
1578
assert(!(block->status & BLOCK_FOR_UPDATE));
1580
while (block->status & BLOCK_IN_FLUSH)
1583
Wait until the block is flushed to file. Do not release the
1584
request on the hash_link yet to prevent that the block is freed
1585
or reassigned while we wait. While we wait, several things can
1586
happen to the block, including another flush. But the block
1587
cannot be reassigned to another hash_link until we release our
1588
request on it. But it can be marked BLOCK_REASSIGNED from free
1589
or eviction, while they wait for us to release the hash_link.
1591
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1593
If the flush phase failed, the resize could have finished while
1596
if (!keycache->in_resize)
1598
remove_reader(block);
1599
unreg_request(keycache, block, 1);
1602
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1603
assert(!(block->status & BLOCK_FOR_UPDATE));
1604
assert(block->hash_link == hash_link);
1607
if (block->status & BLOCK_CHANGED)
1610
We want to write a block with changed contents. If the cache
1611
block size is bigger than the callers block size (e.g. MyISAM),
1612
the caller may replace part of the block only. Changes of the
1613
other part of the block must be preserved. Since the block has
1614
not yet been selected for flush, we can still add our changes.
1616
*page_st= PAGE_READ;
1617
assert((hash_link->file == file) &&
1618
(hash_link->diskpos == filepos) &&
1619
(block->hash_link == hash_link));
1624
This is a write request for a clean block. We do not want to have
1625
new dirty blocks in the cache while resizing. We will free the
1626
block and write directly to file. If the block is in eviction or
1627
in free, we just let it go.
1629
Unregister from the hash_link. This must be done before freeing
1630
the block. And it must be done if not freeing the block. Because
1631
we could have waited above, we need to call remove_reader(). Other
1632
threads could wait for us to release our request on the hash_link.
1634
remove_reader(block);
1636
/* If the block is not in eviction and not in free, we can free it. */
1637
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1641
Free block as we are going to write directly to file.
1642
Although we have an exlusive lock for the updated key part,
1643
the control can be yielded by the current thread as we might
1644
have unfinished readers of other key parts in the block
1645
buffer. Still we are guaranteed not to have any readers
1646
of the key part we are writing into until the block is
1647
removed from the cache as we set the BLOCK_REASSIGNED
1648
flag (see the code below that handles reading requests).
1650
free_block(keycache, block);
1655
The block will be evicted/freed soon. Don't touch it in any way.
1656
Unregister the request that we registered above.
1658
unreg_request(keycache, block, 1);
1661
The block is still assigned to the hash_link (the file/pos that
1662
we are going to write to). Wait until the eviction/free is
1663
complete. Otherwise the direct write could complete before all
1664
readers are done with the block. So they could read outdated
1667
Since we released our request on the hash_link, it can be reused
1668
for another file/pos. Hence we cannot just check for
1669
block->hash_link == hash_link. As long as the resize is
1670
proceeding the block cannot be reassigned to the same file/pos
1671
again. So we can terminate the loop when the block is no longer
1672
assigned to this file/pos.
1676
wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1677
&keycache->cache_lock);
1679
If the flush phase failed, the resize could have finished
1680
while we waited here.
1682
if (!keycache->in_resize)
1684
} while (block->hash_link &&
1685
(block->hash_link->file == file) &&
1686
(block->hash_link->diskpos == filepos));
1691
if (page_status == PAGE_READ &&
1692
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1696
This is a request for a block to be removed from cache. The block
1697
is assigned to this hash_link and contains valid data, but is
1698
marked for eviction or to be freed. Possible reasons why it has
1699
not yet been evicted/freed can be a flush before reassignment
1700
(BLOCK_IN_SWITCH), readers of the block have not finished yet
1701
(BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1702
the block has been selected for it (BLOCK_IN_EVICTION).
1704
Only reading requests can proceed until the old dirty page is flushed,
1705
all others are to be suspended, then resubmitted
1707
if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1710
This is a read request and the block not yet reassigned. We can
1711
register our request and proceed. This unlinks the block from
1712
the LRU ring and protects it against eviction.
1714
reg_requests(keycache, block, 1);
1719
Either this is a write request for a block that is in eviction
1720
or in free. We must not use it any more. Instead we must evict
1721
another block. But we cannot do this before the eviction/free is
1722
done. Otherwise we would find the same hash_link + block again
1725
Or this is a read request for a block in eviction/free that does
1726
not require a flush, but waits for readers to finish with the
1727
block. We do not read this block to let the eviction/free happen
1728
as soon as possible. Again we must wait so that we don't find
1729
the same hash_link + block again and again.
1731
assert(hash_link->requests);
1732
hash_link->requests--;
1733
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1735
The block is no longer assigned to this hash_link.
1744
This is a request for a new block or for a block not to be removed.
1747
- block not assigned to this hash_link or
1748
- block assigned but not yet read from file,
1750
- block assigned with valid (changed or unchanged) data and
1751
- it will not be reassigned/freed.
1755
/* No block is assigned to the hash_link yet. */
1756
if (keycache->blocks_unused)
1758
if (keycache->free_block_list)
1760
/* There is a block in the free list. */
1761
block= keycache->free_block_list;
1762
keycache->free_block_list= block->next_used;
1763
block->next_used= NULL;
1767
/* There are some never used blocks, take first of them */
1768
assert(keycache->blocks_used <
1769
(uint32_t) keycache->disk_blocks);
1770
block= &keycache->block_root[keycache->blocks_used];
1771
block->buffer= ADD_TO_PTR(keycache->block_mem,
1772
((uint32_t) keycache->blocks_used*
1773
keycache->key_cache_block_size),
1775
keycache->blocks_used++;
1776
assert(!block->next_used);
1778
assert(!block->prev_used);
1779
assert(!block->next_changed);
1780
assert(!block->prev_changed);
1781
assert(!block->hash_link);
1782
assert(!block->status);
1783
assert(!block->requests);
1784
keycache->blocks_unused--;
1785
block->status= BLOCK_IN_USE;
1787
block->offset= keycache->key_cache_block_size;
1789
block->temperature= BLOCK_COLD;
1790
block->hits_left= init_hits_left;
1791
block->last_hit_time= 0;
1792
block->hash_link= hash_link;
1793
hash_link->block= block;
1794
link_to_file_list(keycache, block, file, 0);
1795
page_status= PAGE_TO_BE_READ;
1800
There are no free blocks and no never used blocks, use a block
1804
if (! keycache->used_last)
1807
The LRU ring is empty. Wait until a new block is added to
1808
it. Several threads might wait here for the same hash_link,
1809
all of them must get the same block. While waiting for a
1810
block, after a block is selected for this hash_link, other
1811
threads can run first before this one awakes. During this
1812
time interval other threads find this hash_link pointing to
1813
the block, which is still assigned to another hash_link. In
1814
this case the block is not marked BLOCK_IN_SWITCH yet, but
1815
it is marked BLOCK_IN_EVICTION.
1818
struct st_my_thread_var *thread= my_thread_var;
1819
thread->opt_info= (void *) hash_link;
1820
link_into_queue(&keycache->waiting_for_block, thread);
1823
keycache_pthread_cond_wait(&thread->suspend,
1824
&keycache->cache_lock);
1826
while (thread->next);
1827
thread->opt_info= NULL;
1828
/* Assert that block has a request registered. */
1829
assert(hash_link->block->requests);
1830
/* Assert that block is not in LRU ring. */
1831
assert(!hash_link->block->next_used);
1832
assert(!hash_link->block->prev_used);
1835
If we waited above, hash_link->block has been assigned by
1836
link_block(). Otherwise it is still NULL. In the latter case
1837
we need to grab a block from the LRU ring ourselves.
1839
block= hash_link->block;
1842
/* Select the last block from the LRU ring. */
1843
block= keycache->used_last->next_used;
1844
block->hits_left= init_hits_left;
1845
block->last_hit_time= 0;
1846
hash_link->block= block;
1848
Register a request on the block. This unlinks it from the
1849
LRU ring and protects it against eviction.
1851
assert(!block->requests);
1852
reg_requests(keycache, block,1);
1854
We do not need to set block->status|= BLOCK_IN_EVICTION here
1855
because we will set block->status|= BLOCK_IN_SWITCH
1856
immediately without releasing the lock in between. This does
1857
also support debugging. When looking at the block, one can
1858
see if the block has been selected by link_block() after the
1859
LRU ring was empty, or if it was grabbed directly from the
1860
LRU ring in this branch.
1865
If we had to wait above, there is a small chance that another
1866
thread grabbed this block for the same file block already. But
1867
in most cases the first condition is true.
1869
if (block->hash_link != hash_link &&
1870
! (block->status & BLOCK_IN_SWITCH) )
1872
/* this is a primary request for a new page */
1873
block->status|= BLOCK_IN_SWITCH;
1875
if (block->status & BLOCK_CHANGED)
1877
/* The block contains a dirty page - push it out of the cache */
1879
if (block->status & BLOCK_IN_FLUSH)
1882
The block is marked for flush. If we do not wait here,
1883
it could happen that we write the block, reassign it to
1884
another file block, then, before the new owner can read
1885
the new file block, the flusher writes the cache block
1886
(which still has the old contents) to the new file block!
1888
wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1889
&keycache->cache_lock);
1891
The block is marked BLOCK_IN_SWITCH. It should be left
1892
alone except for reading. No free, no write.
1894
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1895
assert(!(block->status & (BLOCK_REASSIGNED |
1897
BLOCK_FOR_UPDATE)));
1901
block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
1903
BLOCK_IN_EVICTION may be true or not. Other flags must
1906
assert((block->status & ~BLOCK_IN_EVICTION) ==
1907
(BLOCK_READ | BLOCK_IN_SWITCH |
1908
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1909
BLOCK_CHANGED | BLOCK_IN_USE));
1910
assert(block->hash_link);
1912
keycache_pthread_mutex_unlock(&keycache->cache_lock);
1914
The call is thread safe because only the current
1915
thread might change the block->hash_link value
1917
error= (pwrite(block->hash_link->file,
1918
block->buffer+block->offset,
1919
block->length - block->offset,
1920
block->hash_link->diskpos+ block->offset) == 0);
1921
keycache_pthread_mutex_lock(&keycache->cache_lock);
1923
/* Block status must not have changed. */
1924
assert((block->status & ~BLOCK_IN_EVICTION) ==
1925
(BLOCK_READ | BLOCK_IN_SWITCH |
1926
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1927
BLOCK_CHANGED | BLOCK_IN_USE));
1928
keycache->global_cache_write++;
1932
block->status|= BLOCK_REASSIGNED;
1934
The block comes from the LRU ring. It must have a hash_link
1937
assert(block->hash_link);
1938
if (block->hash_link)
1941
All pending requests for this page must be resubmitted.
1942
This must be done before waiting for readers. They could
1943
wait for the flush to complete. And we must also do it
1944
after the wait. Flushers might try to free the block while
1945
we wait. They would wait until the reassignment is
1946
complete. Also the block status must reflect the correct
1947
situation: The block is not changed nor in flush any more.
1948
Note that we must not change the BLOCK_CHANGED flag
1949
outside of link_to_file_list() so that it is always in the
1950
correct queue and the *blocks_changed counters are
1953
block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
1954
link_to_file_list(keycache, block, block->hash_link->file, 1);
1955
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1957
The block is still assigned to its old hash_link.
1958
Wait until all pending read requests
1959
for this page are executed
1960
(we could have avoided this waiting, if we had read
1961
a page in the cache in a sweep, without yielding control)
1963
wait_for_readers(keycache, block);
1964
assert(block->hash_link && block->hash_link->block == block &&
1965
block->prev_changed);
1966
/* The reader must not have been a writer. */
1967
assert(!(block->status & BLOCK_CHANGED));
1969
/* Wake flushers that might have found the block in between. */
1970
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1972
/* Remove the hash link for the old file block from the hash. */
1973
unlink_hash(keycache, block->hash_link);
1976
For sanity checks link_to_file_list() asserts that block
1977
and hash_link refer to each other. Hence we need to assign
1978
the hash_link first, but then we would not know if it was
1979
linked before. Hence we would not know if to unlink it. So
1980
unlink it here and call link_to_file_list(..., false).
1982
unlink_changed(block);
1984
block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
1986
block->offset= keycache->key_cache_block_size;
1987
block->hash_link= hash_link;
1988
link_to_file_list(keycache, block, file, 0);
1989
page_status= PAGE_TO_BE_READ;
1991
assert(block->hash_link->block == block);
1992
assert(hash_link->block->hash_link == hash_link);
1997
Either (block->hash_link == hash_link),
1998
or (block->status & BLOCK_IN_SWITCH).
2000
This is for secondary requests for a new file block only.
2001
Either it is already assigned to the new hash_link meanwhile
2002
(if we had to wait due to empty LRU), or it is already in
2003
eviction by another thread. Since this block has been
2004
grabbed from the LRU ring and attached to this hash_link,
2005
another thread cannot grab the same block from the LRU ring
2006
anymore. If the block is in eviction already, it must become
2007
attached to the same hash_link and as such destined for the
2010
page_status= (((block->hash_link == hash_link) &&
2011
(block->status & BLOCK_READ)) ?
2012
PAGE_READ : PAGE_WAIT_TO_BE_READ);
2019
Block is not NULL. This hash_link points to a block.
2021
- block not assigned to this hash_link (yet) or
2022
- block assigned but not yet read from file,
2024
- block assigned with valid (changed or unchanged) data and
2025
- it will not be reassigned/freed.
2027
The first condition means hash_link points to a block in
2028
eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2029
But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2030
link_block(). In both cases it is destined for this hash_link
2031
and its file block address. When this hash_link got its block
2032
address, the block was removed from the LRU ring and cannot be
2033
selected for eviction (for another hash_link) again.
2035
Register a request on the block. This is another protection
2038
assert(((block->hash_link != hash_link) &&
2039
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2040
((block->hash_link == hash_link) &&
2041
!(block->status & BLOCK_READ)) ||
2042
((block->status & BLOCK_READ) &&
2043
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2044
reg_requests(keycache, block, 1);
2045
page_status= (((block->hash_link == hash_link) &&
2046
(block->status & BLOCK_READ)) ?
2047
PAGE_READ : PAGE_WAIT_TO_BE_READ);
2051
assert(page_status != -1);
2052
/* Same assert basically, but be very sure. */
2054
/* Assert that block has a request and is not in LRU ring. */
2055
assert(block->requests);
2056
assert(!block->next_used);
2057
assert(!block->prev_used);
2058
/* Assert that we return the correct block. */
2059
assert((page_status == PAGE_WAIT_TO_BE_READ) ||
2060
((block->hash_link->file == file) &&
2061
(block->hash_link->diskpos == filepos)));
2062
*page_st=page_status;
2069
Read into a key cache block buffer from disk.
2074
keycache pointer to a key cache data structure
2075
block block to which buffer the data is to be read
2076
read_length size of data to be read
2077
min_length at least so much data must be read
2078
primary <-> the current thread will read the data
2084
The function either reads a page data from file to the block buffer,
2085
or waits until another thread reads it. What page to read is determined
2086
by a block parameter - reference to a hash link for this page.
2087
If an error occurs THE BLOCK_ERROR bit is set in the block status.
2088
We do not report error when the size of successfully read
2089
portion is less than read_length, but not less than min_length.
2092
static void read_block(KEY_CACHE *keycache,
2093
BLOCK_LINK *block, uint32_t read_length,
2094
uint32_t min_length, bool primary)
2096
uint32_t got_length;
2098
/* On entry cache_lock is locked */
2103
This code is executed only by threads that submitted primary
2104
requests. Until block->status contains BLOCK_READ, all other
2105
request for the block become secondary requests. For a primary
2106
request the block must be properly initialized.
2108
assert(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE));
2109
assert((block->length == 0));
2110
assert((block->offset == keycache->key_cache_block_size));
2111
assert((block->requests > 0));
2113
keycache->global_cache_read++;
2114
/* Page is not in buffer yet, is to be read from disk */
2115
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2117
Here other threads may step in and register as secondary readers.
2118
They will register in block->wqueue[COND_FOR_REQUESTED].
2120
got_length= pread(block->hash_link->file, block->buffer, read_length, block->hash_link->diskpos);
2121
keycache_pthread_mutex_lock(&keycache->cache_lock);
2123
The block can now have been marked for free (in case of
2124
FLUSH_RELEASE). Otherwise the state must be unchanged.
2126
assert(((block->status & ~(BLOCK_REASSIGNED |
2127
BLOCK_FOR_UPDATE)) == BLOCK_IN_USE));
2128
assert((block->length == 0));
2129
assert((block->offset == keycache->key_cache_block_size));
2130
assert((block->requests > 0));
2132
if (got_length < min_length)
2133
block->status|= BLOCK_ERROR;
2136
block->status|= BLOCK_READ;
2137
block->length= got_length;
2139
Do not set block->offset here. If this block is marked
2140
BLOCK_CHANGED later, we want to flush only the modified part. So
2141
only a writer may set block->offset down from
2142
keycache->key_cache_block_size.
2145
/* Signal that all pending requests for this page now can be processed */
2146
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2151
This code is executed only by threads that submitted secondary
2152
requests. At this point it could happen that the cache block is
2153
not yet assigned to the hash_link for the requested file block.
2154
But at awake from the wait this should be the case. Unfortunately
2155
we cannot assert this here because we do not know the hash_link
2156
for the requested file block nor the file and position. So we have
2157
to assert this in the caller.
2159
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
294
2165
Read a block of data from a cached file into a buffer;
361
2386
int key_cache_insert(KEY_CACHE *keycache,
362
int file, internal::my_off_t filepos, int level,
2387
File file, my_off_t filepos, int level,
363
2388
unsigned char *buff, uint32_t length)
371
assert (!keycache->key_cache_inited);
2392
if (keycache->key_cache_inited)
2394
/* Key cache is used */
2395
register BLOCK_LINK *block;
2396
uint32_t read_length;
2399
bool locked_and_incremented= false;
2402
When the keycache is once initialized, we use the cache_lock to
2403
reliably distinguish the cases of normal operation, resizing, and
2404
disabled cache. We always increment and decrement
2405
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2407
keycache_pthread_mutex_lock(&keycache->cache_lock);
2409
We do not load index data into a disabled cache nor into an
2412
if (!keycache->can_be_used || keycache->in_resize)
2414
/* Register the pseudo I/O for the next resize. */
2415
inc_counter_for_resize_op(keycache);
2416
locked_and_incremented= true;
2417
/* Loaded data may not always be aligned to cache blocks. */
2418
offset= (uint) (filepos % keycache->key_cache_block_size);
2419
/* Load data in key_cache_block_size increments. */
2422
/* Cache could be disabled or resizing in a later iteration. */
2423
if (!keycache->can_be_used || keycache->in_resize)
2425
/* Start loading at the beginning of the cache block. */
2427
/* Do not load beyond the end of the cache block. */
2428
read_length= length;
2429
set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2430
assert(read_length > 0);
2432
/* The block has been read by the caller already. */
2433
keycache->global_cache_read++;
2434
/* Request the cache block that matches file/pos. */
2435
keycache->global_cache_r_requests++;
2436
block= find_key_block(keycache, file, filepos, level, 0, &page_st);
2440
This happens only for requests submitted during key cache
2441
resize. The block is not in the cache and shall not go in.
2442
Stop loading index data.
2446
if (!(block->status & BLOCK_ERROR))
2448
if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2449
((page_st == PAGE_TO_BE_READ) &&
2450
(offset || (read_length < keycache->key_cache_block_size))))
2455
this is a secondary request for a block to be read into the
2456
cache. The block is in eviction. It is not yet assigned to
2457
the requested file block (It does not point to the right
2458
hash_link). So we cannot call remove_reader() on the block.
2459
And we cannot access the hash_link directly here. We need to
2460
wait until the assignment is complete. read_block() executes
2461
the correct wait when called with primary == false.
2465
this is a primary request for a block to be read into the
2466
cache and the supplied data does not fill the whole block.
2468
This function is called on behalf of a LOAD INDEX INTO CACHE
2469
statement, which is a read-only task and allows other
2470
readers. It is possible that a parallel running reader tries
2471
to access this block. If it needs more data than has been
2472
supplied here, it would report an error. To be sure that we
2473
have all data in the block that is available in the file, we
2474
read the block ourselves.
2476
Though reading again what the caller did read already is an
2477
expensive operation, we need to do this for correctness.
2479
read_block(keycache, block, keycache->key_cache_block_size,
2480
read_length + offset, (page_st == PAGE_TO_BE_READ));
2482
A secondary request must now have the block assigned to the
2483
requested file block. It does not hurt to check it for
2484
primary requests too.
2486
assert(keycache->can_be_used);
2487
assert(block->hash_link->file == file);
2488
assert(block->hash_link->diskpos == filepos);
2489
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2491
else if (page_st == PAGE_TO_BE_READ)
2494
This is a new block in the cache. If we come here, we have
2495
data for the whole block.
2497
assert(block->hash_link->requests);
2498
assert(block->status & BLOCK_IN_USE);
2499
assert((page_st == PAGE_TO_BE_READ) ||
2500
(block->status & BLOCK_READ));
2502
#if !defined(SERIALIZED_READ_FROM_CACHE)
2503
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2505
Here other threads may step in and register as secondary readers.
2506
They will register in block->wqueue[COND_FOR_REQUESTED].
2510
/* Copy data from buff */
2511
memcpy(block->buffer+offset, buff, (size_t) read_length);
2513
#if !defined(SERIALIZED_READ_FROM_CACHE)
2514
keycache_pthread_mutex_lock(&keycache->cache_lock);
2515
assert(block->status & BLOCK_IN_USE);
2516
assert((page_st == PAGE_TO_BE_READ) ||
2517
(block->status & BLOCK_READ));
2520
After the data is in the buffer, we can declare the block
2521
valid. Now other threads do not need to register as
2522
secondary readers any more. They can immediately access the
2525
block->status|= BLOCK_READ;
2526
block->length= read_length+offset;
2528
Do not set block->offset here. If this block is marked
2529
BLOCK_CHANGED later, we want to flush only the modified part. So
2530
only a writer may set block->offset down from
2531
keycache->key_cache_block_size.
2533
/* Signal all pending requests. */
2534
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2539
page_st == PAGE_READ. The block is in the buffer. All data
2540
must already be present. Blocks are always read with all
2541
data available on file. Assert that the block does not have
2542
less contents than the preloader supplies. If the caller has
2543
data beyond block->length, it means that a file write has
2544
been done while this block was in cache and not extended
2545
with the new data. If the condition is met, we can simply
2548
assert((page_st == PAGE_READ) &&
2549
(read_length + offset <= block->length));
2553
A secondary request must now have the block assigned to the
2554
requested file block. It does not hurt to check it for primary
2557
assert(block->hash_link->file == file);
2558
assert(block->hash_link->diskpos == filepos);
2559
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2560
} /* end of if (!(block->status & BLOCK_ERROR)) */
2563
remove_reader(block);
2566
Link the block into the LRU ring if it's the last submitted
2567
request for the block. This enables eviction for the block.
2569
unreg_request(keycache, block, 1);
2571
error= (block->status & BLOCK_ERROR);
2577
filepos+= read_length+offset;
2580
} while ((length-= read_length));
2583
if (locked_and_incremented)
2584
dec_counter_for_resize_op(keycache);
2585
keycache_pthread_mutex_unlock(&keycache->cache_lock);
405
2620
int key_cache_write(KEY_CACHE *keycache,
406
int file, internal::my_off_t filepos, int level,
2621
File file, my_off_t filepos, int level,
407
2622
unsigned char *buff, uint32_t length,
408
2623
uint32_t block_length,
411
2626
(void)block_length;
2627
bool locked_and_incremented= false;
415
2630
if (!dont_write)
417
2632
/* Not used in the server. */
418
2633
/* Force writing from buff into disk. */
2634
keycache->global_cache_w_requests++;
2635
keycache->global_cache_write++;
419
2636
if (pwrite(file, buff, length, filepos) == 0)
423
assert (!keycache->key_cache_inited);
2640
if (keycache->key_cache_inited)
2642
/* Key cache is used */
2643
register BLOCK_LINK *block;
2644
uint32_t read_length;
2649
When the key cache is once initialized, we use the cache_lock to
2650
reliably distinguish the cases of normal operation, resizing, and
2651
disabled cache. We always increment and decrement
2652
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2654
keycache_pthread_mutex_lock(&keycache->cache_lock);
2656
Cache resizing has two phases: Flushing and re-initializing. In
2657
the flush phase write requests can modify dirty blocks that are
2658
not yet in flush. Otherwise they are allowed to bypass the cache.
2659
find_key_block() returns NULL in both cases (clean blocks and
2662
After the flush phase new I/O requests must wait until the
2663
re-initialization is done. The re-initialization can be done only
2664
if no I/O request is in progress. The reason is that
2665
key_cache_block_size can change. With enabled cache I/O is done in
2666
chunks of key_cache_block_size. Every chunk tries to use a cache
2667
block first. If the block size changes in the middle, a block
2668
could be missed and data could be written below a cached block.
2670
while (keycache->in_resize && !keycache->resize_in_flush)
2671
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2672
/* Register the I/O for the next resize. */
2673
inc_counter_for_resize_op(keycache);
2674
locked_and_incremented= true;
2675
/* Requested data may not always be aligned to cache blocks. */
2676
offset= (uint) (filepos % keycache->key_cache_block_size);
2677
/* Write data in key_cache_block_size increments. */
2680
/* Cache could be disabled in a later iteration. */
2681
if (!keycache->can_be_used)
2683
/* Start writing at the beginning of the cache block. */
2685
/* Do not write beyond the end of the cache block. */
2686
read_length= length;
2687
set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2688
assert(read_length > 0);
2690
/* Request the cache block that matches file/pos. */
2691
keycache->global_cache_w_requests++;
2692
block= find_key_block(keycache, file, filepos, level, 1, &page_st);
2696
This happens only for requests submitted during key cache
2697
resize. The block is not in the cache and shall not go in.
2698
Write directly to file.
2702
/* Used in the server. */
2703
keycache->global_cache_write++;
2704
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2705
if (pwrite(file, (unsigned char*) buff, read_length, filepos + offset) == 0)
2707
keycache_pthread_mutex_lock(&keycache->cache_lock);
2712
Prevent block from flushing and from being selected for to be
2713
freed. This must be set when we release the cache_lock.
2714
However, we must not set the status of the block before it is
2715
assigned to this file/pos.
2717
if (page_st != PAGE_WAIT_TO_BE_READ)
2718
block->status|= BLOCK_FOR_UPDATE;
2720
We must read the file block first if it is not yet in the cache
2721
and we do not replace all of its contents.
2723
In cases where the cache block is big enough to contain (parts
2724
of) index blocks of different indexes, our request can be
2725
secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
2726
reading the file block. If the read completes after us, it
2727
overwrites our new contents with the old contents. So we have to
2728
wait for the other thread to complete the read of this block.
2729
read_block() takes care for the wait.
2731
if (!(block->status & BLOCK_ERROR) &&
2732
((page_st == PAGE_TO_BE_READ &&
2733
(offset || read_length < keycache->key_cache_block_size)) ||
2734
(page_st == PAGE_WAIT_TO_BE_READ)))
2736
read_block(keycache, block,
2737
offset + read_length >= keycache->key_cache_block_size?
2738
offset : keycache->key_cache_block_size,
2739
offset, (page_st == PAGE_TO_BE_READ));
2740
assert(keycache->can_be_used);
2741
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2743
Prevent block from flushing and from being selected for to be
2744
freed. This must be set when we release the cache_lock.
2745
Here we set it in case we could not set it above.
2747
block->status|= BLOCK_FOR_UPDATE;
2750
The block should always be assigned to the requested file block
2751
here. It need not be BLOCK_READ when overwriting the whole block.
2753
assert(block->hash_link->file == file);
2754
assert(block->hash_link->diskpos == filepos);
2755
assert(block->status & BLOCK_IN_USE);
2756
assert((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
2758
The block to be written must not be marked BLOCK_REASSIGNED.
2759
Otherwise it could be freed in dirty state or reused without
2760
another flush during eviction. It must also not be in flush.
2761
Otherwise the old contens may have been flushed already and
2762
the flusher could clear BLOCK_CHANGED without flushing the
2765
assert(!(block->status & BLOCK_REASSIGNED));
2767
while (block->status & BLOCK_IN_FLUSHWRITE)
2770
Another thread is flushing the block. It was dirty already.
2771
Wait until the block is flushed to file. Otherwise we could
2772
modify the buffer contents just while it is written to file.
2773
An unpredictable file block contents would be the result.
2774
While we wait, several things can happen to the block,
2775
including another flush. But the block cannot be reassigned to
2776
another hash_link until we release our request on it.
2778
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2779
assert(keycache->can_be_used);
2780
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2781
/* Still must not be marked for free. */
2782
assert(!(block->status & BLOCK_REASSIGNED));
2783
assert(block->hash_link && (block->hash_link->block == block));
2787
We could perhaps release the cache_lock during access of the
2788
data like in the other functions. Locks outside of the key cache
2789
assure that readers and a writer do not access the same range of
2790
data. Parallel accesses should happen only if the cache block
2791
contains multiple index block(fragment)s. So different parts of
2792
the buffer would be read/written. An attempt to flush during
2793
memcpy() is prevented with BLOCK_FOR_UPDATE.
2795
if (!(block->status & BLOCK_ERROR))
2797
#if !defined(SERIALIZED_READ_FROM_CACHE)
2798
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2800
memcpy(block->buffer+offset, buff, (size_t) read_length);
2802
#if !defined(SERIALIZED_READ_FROM_CACHE)
2803
keycache_pthread_mutex_lock(&keycache->cache_lock);
2809
/* Not used in the server. buff has been written to disk at start. */
2810
if ((block->status & BLOCK_CHANGED) &&
2811
(!offset && read_length >= keycache->key_cache_block_size))
2812
link_to_file_list(keycache, block, block->hash_link->file, 1);
2814
else if (! (block->status & BLOCK_CHANGED))
2815
link_to_changed_list(keycache, block);
2816
block->status|=BLOCK_READ;
2818
Allow block to be selected for to be freed. Since it is marked
2819
BLOCK_CHANGED too, it won't be selected for to be freed without
2822
block->status&= ~BLOCK_FOR_UPDATE;
2823
set_if_smaller(block->offset, offset);
2824
set_if_bigger(block->length, read_length+offset);
2826
/* Threads may be waiting for the changes to be complete. */
2827
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2830
If only a part of the cache block is to be replaced, and the
2831
rest has been read from file, then the cache lock has been
2832
released for I/O and it could be possible that another thread
2833
wants to evict or free the block and waits for it to be
2834
released. So we must not just decrement hash_link->requests, but
2835
also wake a waiting thread.
2837
remove_reader(block);
2840
Link the block into the LRU ring if it's the last submitted
2841
request for the block. This enables eviction for the block.
2843
unreg_request(keycache, block, 1);
2845
if (block->status & BLOCK_ERROR)
2853
filepos+= read_length+offset;
2856
} while ((length-= read_length));
425
2861
/* Key cache is not used */
428
2864
/* Used in the server. */
2865
keycache->global_cache_w_requests++;
2866
keycache->global_cache_write++;
2867
if (locked_and_incremented)
2868
keycache_pthread_mutex_unlock(&keycache->cache_lock);
429
2869
if (pwrite(file, (unsigned char*) buff, length, filepos) == 0)
2871
if (locked_and_incremented)
2872
keycache_pthread_mutex_lock(&keycache->cache_lock);
2876
if (locked_and_incremented)
2878
dec_counter_for_resize_op(keycache);
2879
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2890
keycache Pointer to a key cache data structure
2891
block Pointer to the block to free
2894
Remove reference to block from hash table.
2895
Remove block from the chain of clean blocks.
2896
Add block to the free list.
2899
Block must not be free (status == 0).
2900
Block must not be in free_block_list.
2901
Block must not be in the LRU ring.
2902
Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
2903
Block must not be in free (BLOCK_REASSIGNED).
2904
Block must not be in flush (BLOCK_IN_FLUSH).
2905
Block must not be dirty (BLOCK_CHANGED).
2906
Block must not be in changed_blocks (dirty) hash.
2907
Block must be in file_blocks (clean) hash.
2908
Block must refer to a hash_link.
2909
Block must have a request registered on it.
2912
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
2915
Assert that the block is not free already. And that it is in a clean
2916
state. Note that the block might just be assigned to a hash_link and
2917
not yet read (BLOCK_READ may not be set here). In this case a reader
2918
is registered in the hash_link and free_block() will wait for it
2921
assert((block->status & BLOCK_IN_USE) &&
2922
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2923
BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
2924
BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
2925
/* Assert that the block is in a file_blocks chain. */
2926
assert(block->prev_changed && *block->prev_changed == block);
2927
/* Assert that the block is not in the LRU ring. */
2928
assert(!block->next_used && !block->prev_used);
2930
IMHO the below condition (if()) makes no sense. I can't see how it
2931
could be possible that free_block() is entered with a NULL hash_link
2932
pointer. The only place where it can become NULL is in free_block()
2933
(or before its first use ever, but for those blocks free_block() is
2934
not called). I don't remove the conditional as it cannot harm, but
2935
place an assert to confirm my hypothesis. Eventually the
2936
condition (if()) can be removed.
2938
assert(block->hash_link && block->hash_link->block == block);
2939
if (block->hash_link)
2942
While waiting for readers to finish, new readers might request the
2943
block. But since we set block->status|= BLOCK_REASSIGNED, they
2944
will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
2947
block->status|= BLOCK_REASSIGNED;
2948
wait_for_readers(keycache, block);
2950
The block must not have been freed by another thread. Repeat some
2951
checks. An additional requirement is that it must be read now
2954
assert(block->hash_link && block->hash_link->block == block);
2955
assert((block->status & (BLOCK_READ | BLOCK_IN_USE |
2956
BLOCK_REASSIGNED)) &&
2957
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2958
BLOCK_IN_FLUSH | BLOCK_CHANGED |
2959
BLOCK_FOR_UPDATE)));
2960
assert(block->prev_changed && *block->prev_changed == block);
2961
assert(!block->prev_used);
2963
Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
2964
thread (through unreg_request() below), other threads must not see
2965
this flag. They could become confused.
2967
block->status&= ~BLOCK_REASSIGNED;
2969
Do not release the hash_link until the block is off all lists.
2970
At least not if we hand it over for eviction in unreg_request().
2975
Unregister the block request and link the block into the LRU ring.
2976
This enables eviction for the block. If the LRU ring was empty and
2977
threads are waiting for a block, then the block wil be handed over
2978
for eviction immediately. Otherwise we will unlink it from the LRU
2979
ring again, without releasing the lock in between. So decrementing
2980
the request counter and updating statistics are the only relevant
2981
operation in this case. Assert that there are no other requests
2984
assert(block->requests == 1);
2985
unreg_request(keycache, block, 0);
2987
Note that even without releasing the cache lock it is possible that
2988
the block is immediately selected for eviction by link_block() and
2989
thus not added to the LRU ring. In this case we must not touch the
2992
if (block->status & BLOCK_IN_EVICTION)
2995
/* Here the block must be in the LRU ring. Unlink it again. */
2996
assert(block->next_used && block->prev_used &&
2997
*block->prev_used == block);
2998
unlink_block(keycache, block);
2999
if (block->temperature == BLOCK_WARM)
3000
keycache->warm_blocks--;
3001
block->temperature= BLOCK_COLD;
3003
/* Remove from file_blocks hash. */
3004
unlink_changed(block);
3006
/* Remove reference to block from hash table. */
3007
unlink_hash(keycache, block->hash_link);
3008
block->hash_link= NULL;
3012
block->offset= keycache->key_cache_block_size;
3014
/* Enforced by unlink_changed(), but just to be sure. */
3015
assert(!block->next_changed && !block->prev_changed);
3016
/* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3017
assert(!block->next_used && !block->prev_used);
3018
/* Insert the free block in the free list. */
3019
block->next_used= keycache->free_block_list;
3020
keycache->free_block_list= block;
3021
/* Keep track of the number of currently unused blocks. */
3022
keycache->blocks_unused++;
3024
/* All pending requests for this page must be resubmitted. */
3025
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3029
static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3031
return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3032
((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3037
Flush a portion of changed blocks to disk,
3038
free used blocks if requested
3041
static int flush_cached_blocks(KEY_CACHE *keycache,
3042
File file, BLOCK_LINK **cache,
3044
enum flush_type type)
3048
uint32_t count= (uint) (end-cache);
3050
/* Don't lock the cache during the flush */
3051
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3053
As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3054
we are guarunteed no thread will change them
3056
my_qsort((unsigned char*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3058
keycache_pthread_mutex_lock(&keycache->cache_lock);
3060
Note: Do not break the loop. We have registered a request on every
3061
block in 'cache'. These must be unregistered by free_block() or
3064
for ( ; cache != end ; cache++)
3066
BLOCK_LINK *block= *cache;
3068
If the block contents is going to be changed, we abandon the flush
3069
for this block. flush_key_blocks_int() will restart its search and
3070
handle the block properly.
3072
if (!(block->status & BLOCK_FOR_UPDATE))
3074
/* Blocks coming here must have a certain status. */
3075
assert(block->hash_link);
3076
assert(block->hash_link->block == block);
3077
assert(block->hash_link->file == file);
3078
assert((block->status & ~BLOCK_IN_EVICTION) ==
3079
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3080
block->status|= BLOCK_IN_FLUSHWRITE;
3081
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3082
error= (pwrite(file,
3083
block->buffer+block->offset,
3084
block->length - block->offset,
3085
block->hash_link->diskpos+ block->offset) == 0);
3086
keycache_pthread_mutex_lock(&keycache->cache_lock);
3087
keycache->global_cache_write++;
3090
block->status|= BLOCK_ERROR;
3092
last_errno= errno ? errno : -1;
3094
block->status&= ~BLOCK_IN_FLUSHWRITE;
3095
/* Block must not have changed status except BLOCK_FOR_UPDATE. */
3096
assert(block->hash_link);
3097
assert(block->hash_link->block == block);
3098
assert(block->hash_link->file == file);
3099
assert((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3100
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3102
Set correct status and link in right queue for free or later use.
3103
free_block() must not see BLOCK_CHANGED and it may need to wait
3104
for readers of the block. These should not see the block in the
3105
wrong hash. If not freeing the block, we need to have it in the
3108
link_to_file_list(keycache, block, file, 1);
3111
block->status&= ~BLOCK_IN_FLUSH;
3113
Let to proceed for possible waiting requests to write to the block page.
3114
It might happen only during an operation to resize the key cache.
3116
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3117
/* type will never be FLUSH_IGNORE_CHANGED here */
3118
if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3119
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3123
Note that a request has been registered against the block in
3124
flush_key_blocks_int().
3126
free_block(keycache, block);
3131
Link the block into the LRU ring if it's the last submitted
3132
request for the block. This enables eviction for the block.
3133
Note that a request has been registered against the block in
3134
flush_key_blocks_int().
3136
unreg_request(keycache, block, 1);
3139
} /* end of for ( ; cache != end ; cache++) */
3145
flush all key blocks for a file to disk, but don't do any mutex locks.
3148
flush_key_blocks_int()
3149
keycache pointer to a key cache data structure
3150
file handler for the file to flush to
3151
flush_type type of the flush
3154
This function doesn't do any mutex locks because it needs to be called both
3155
from flush_key_blocks and flush_all_key_blocks (the later one does the
3156
mutex lock in the resize_key_cache() function).
3158
We do only care about changed blocks that exist when the function is
3159
entered. We do not guarantee that all changed blocks of the file are
3160
flushed if more blocks change while this function is running.
3167
static int flush_key_blocks_int(KEY_CACHE *keycache,
3168
File file, enum flush_type type)
3170
BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3175
if (keycache->disk_blocks > 0 &&
3176
(!my_disable_flush_key_blocks || type != FLUSH_KEEP))
3178
/* Key cache exists and flush is not disabled */
3180
uint32_t count= FLUSH_CACHE;
3181
BLOCK_LINK **pos,**end;
3182
BLOCK_LINK *first_in_switch= NULL;
3183
BLOCK_LINK *last_in_flush;
3184
BLOCK_LINK *last_for_update;
3185
BLOCK_LINK *last_in_switch;
3186
BLOCK_LINK *block, *next;
3188
if (type != FLUSH_IGNORE_CHANGED)
3191
Count how many key blocks we have to cache to be able
3192
to flush all dirty pages with minimum seek moves
3195
for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3197
block= block->next_changed)
3199
if ((block->hash_link->file == file) &&
3200
!(block->status & BLOCK_IN_FLUSH))
3203
assert(count<= keycache->blocks_used);
3207
Allocate a new buffer only if its bigger than the one we have.
3208
Assure that we always have some entries for the case that new
3209
changed blocks appear while we need to wait for something.
3211
if ((count > FLUSH_CACHE) &&
3212
!(cache= (BLOCK_LINK**) malloc(sizeof(BLOCK_LINK*)*count)))
3215
After a restart there could be more changed blocks than now.
3216
So we should not let count become smaller than the fixed buffer.
3218
if (cache == cache_buff)
3222
/* Retrieve the blocks and write them to a buffer to be flushed */
3224
last_in_flush= NULL;
3225
last_for_update= NULL;
3226
end= (pos= cache)+count;
3227
for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3231
next= block->next_changed;
3232
if (block->hash_link->file == file)
3234
if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3237
Note: The special handling of BLOCK_IN_SWITCH is obsolete
3238
since we set BLOCK_IN_FLUSH if the eviction includes a
3239
flush. It can be removed in a later version.
3241
if (!(block->status & BLOCK_IN_SWITCH))
3244
We care only for the blocks for which flushing was not
3245
initiated by another thread and which are not in eviction.
3246
Registering a request on the block unlinks it from the LRU
3247
ring and protects against eviction.
3249
reg_requests(keycache, block, 1);
3250
if (type != FLUSH_IGNORE_CHANGED)
3252
/* It's not a temporary file */
3256
This should happen relatively seldom. Remove the
3257
request because we won't do anything with the block
3258
but restart and pick it again in the next iteration.
3260
unreg_request(keycache, block, 0);
3262
This happens only if there is not enough
3263
memory for the big block
3265
if ((error= flush_cached_blocks(keycache, file, cache,
3268
/* Do not loop infinitely trying to flush in vain. */
3269
if ((last_errno == error) && (++last_errcnt > 5))
3274
Restart the scan as some other thread might have changed
3275
the changed blocks chain: the blocks that were in switch
3276
state before the flush started have to be excluded
3281
Mark the block with BLOCK_IN_FLUSH in order not to let
3282
other threads to use it for new pages and interfere with
3283
our sequence of flushing dirty file pages. We must not
3284
set this flag before actually putting the block on the
3285
write burst array called 'cache'.
3287
block->status|= BLOCK_IN_FLUSH;
3288
/* Add block to the array for a write burst. */
3293
/* It's a temporary file */
3294
assert(!(block->status & BLOCK_REASSIGNED));
3297
free_block() must not be called with BLOCK_CHANGED. Note
3298
that we must not change the BLOCK_CHANGED flag outside of
3299
link_to_file_list() so that it is always in the correct
3300
queue and the *blocks_changed counters are correct.
3302
link_to_file_list(keycache, block, file, 1);
3303
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3305
/* A request has been registered against the block above. */
3306
free_block(keycache, block);
3311
Link the block into the LRU ring if it's the last
3312
submitted request for the block. This enables eviction
3313
for the block. A request has been registered against
3316
unreg_request(keycache, block, 1);
3323
Link the block into a list of blocks 'in switch'.
3325
WARNING: Here we introduce a place where a changed block
3326
is not in the changed_blocks hash! This is acceptable for
3327
a BLOCK_IN_SWITCH. Never try this for another situation.
3328
Other parts of the key cache code rely on changed blocks
3329
being in the changed_blocks hash.
3331
unlink_changed(block);
3332
link_changed(block, &first_in_switch);
3335
else if (type != FLUSH_KEEP)
3338
During the normal flush at end of statement (FLUSH_KEEP) we
3339
do not need to ensure that blocks in flush or update by
3340
other threads are flushed. They will be flushed by them
3341
later. In all other cases we must assure that we do not have
3342
any changed block of this file in the cache when this
3345
if (block->status & BLOCK_IN_FLUSH)
3347
/* Remember the last block found to be in flush. */
3348
last_in_flush= block;
3352
/* Remember the last block found to be selected for update. */
3353
last_for_update= block;
3360
if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
3362
/* Do not loop inifnitely trying to flush in vain. */
3363
if ((last_errno == error) && (++last_errcnt > 5))
3368
Do not restart here during the normal flush at end of statement
3369
(FLUSH_KEEP). We have now flushed at least all blocks that were
3370
changed when entering this function. In all other cases we must
3371
assure that we do not have any changed block of this file in the
3372
cache when this function returns.
3374
if (type != FLUSH_KEEP)
3380
There are no blocks to be flushed by this thread, but blocks in
3381
flush by other threads. Wait until one of the blocks is flushed.
3382
Re-check the condition for last_in_flush. We may have unlocked
3383
the cache_lock in flush_cached_blocks(). The state of the block
3386
if (last_in_flush->status & BLOCK_IN_FLUSH)
3387
wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3388
&keycache->cache_lock);
3389
/* Be sure not to lose a block. They may be flushed in random order. */
3392
if (last_for_update)
3395
There are no blocks to be flushed by this thread, but blocks for
3396
update by other threads. Wait until one of the blocks is updated.
3397
Re-check the condition for last_for_update. We may have unlocked
3398
the cache_lock in flush_cached_blocks(). The state of the block
3401
if (last_for_update->status & BLOCK_FOR_UPDATE)
3402
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3403
&keycache->cache_lock);
3404
/* The block is now changed. Flush it. */
3409
Wait until the list of blocks in switch is empty. The threads that
3410
are switching these blocks will relink them to clean file chains
3411
while we wait and thus empty the 'first_in_switch' chain.
3413
while (first_in_switch)
3415
wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3416
&keycache->cache_lock);
3418
Do not restart here. We have flushed all blocks that were
3419
changed when entering this function and were not marked for
3420
eviction. Other threads have now flushed all remaining blocks in
3421
the course of their eviction.
3425
if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3427
last_for_update= NULL;
3428
last_in_switch= NULL;
3429
uint32_t total_found= 0;
3433
Finally free all clean blocks for this file.
3434
During resize this may be run by two threads in parallel.
3439
for (block= keycache->file_blocks[FILE_HASH(file)] ;
3443
/* Remember the next block. After freeing we cannot get at it. */
3444
next= block->next_changed;
3446
/* Changed blocks cannot appear in the file_blocks hash. */
3447
assert(!(block->status & BLOCK_CHANGED));
3448
if (block->hash_link->file == file)
3450
/* We must skip blocks that will be changed. */
3451
if (block->status & BLOCK_FOR_UPDATE)
3453
last_for_update= block;
3458
We must not free blocks in eviction (BLOCK_IN_EVICTION |
3459
BLOCK_IN_SWITCH) or blocks intended to be freed
3462
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3465
struct st_hash_link *next_hash_link= NULL;
3466
my_off_t next_diskpos= 0;
3468
uint32_t next_status= 0;
3469
uint32_t hash_requests= 0;
3473
assert(found <= keycache->blocks_used);
3476
Register a request. This unlinks the block from the LRU
3477
ring and protects it against eviction. This is required
3480
reg_requests(keycache, block, 1);
3483
free_block() may need to wait for readers of the block.
3484
This is the moment where the other thread can move the
3485
'next' block from the chain. free_block() needs to wait
3486
if there are requests for the block pending.
3488
if (next && (hash_requests= block->hash_link->requests))
3490
/* Copy values from the 'next' block and its hash_link. */
3491
next_status= next->status;
3492
next_hash_link= next->hash_link;
3493
next_diskpos= next_hash_link->diskpos;
3494
next_file= next_hash_link->file;
3495
assert(next == next_hash_link->block);
3498
free_block(keycache, block);
3500
If we had to wait and the state of the 'next' block
3501
changed, break the inner loop. 'next' may no longer be
3502
part of the current chain.
3504
We do not want to break the loop after every free_block(),
3505
not even only after waits. The chain might be quite long
3506
and contain blocks for many files. Traversing it again and
3507
again to find more blocks for this file could become quite
3510
if (next && hash_requests &&
3511
((next_status != next->status) ||
3512
(next_hash_link != next->hash_link) ||
3513
(next_file != next_hash_link->file) ||
3514
(next_diskpos != next_hash_link->diskpos) ||
3515
(next != next_hash_link->block)))
3520
last_in_switch= block;
3523
} /* end for block in file_blocks */
3527
If any clean block has been found, we may have waited for it to
3528
become free. In this case it could be possible that another clean
3529
block became dirty. This is possible if the write request existed
3530
before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3536
To avoid an infinite loop, wait until one of the blocks marked
3537
for update is updated.
3539
if (last_for_update)
3541
/* We did not wait. Block must not have changed status. */
3542
assert(last_for_update->status & BLOCK_FOR_UPDATE);
3543
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3544
&keycache->cache_lock);
3549
To avoid an infinite loop wait until one of the blocks marked
3550
for eviction is switched.
3554
/* We did not wait. Block must not have changed status. */
3555
assert(last_in_switch->status & (BLOCK_IN_EVICTION |
3558
wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
3559
&keycache->cache_lock);
3563
} /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
3565
} /* if (keycache->disk_blocks > 0 */
3568
if (cache != cache_buff)
3569
free((unsigned char*) cache);
3571
errno=last_errno; /* Return first error */
3572
return(last_errno != 0);
438
3577
Flush all blocks for a file to disk
452
3591
int flush_key_blocks(KEY_CACHE *keycache,
453
int file, enum flush_type type)
457
assert (!keycache->key_cache_inited);
3592
File file, enum flush_type type)
3596
if (!keycache->key_cache_inited)
3599
keycache_pthread_mutex_lock(&keycache->cache_lock);
3600
/* While waiting for lock, keycache could have been ended. */
3601
if (keycache->disk_blocks > 0)
3603
inc_counter_for_resize_op(keycache);
3604
res= flush_key_blocks_int(keycache, file, type);
3605
dec_counter_for_resize_op(keycache);
3607
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3613
Flush all blocks in the key cache to disk.
3616
flush_all_key_blocks()
3617
keycache pointer to key cache root structure
3621
Flushing of the whole key cache is done in two phases.
3623
1. Flush all changed blocks, waiting for them if necessary. Loop
3624
until there is no changed block left in the cache.
3626
2. Free all clean blocks. Normally this means free all blocks. The
3627
changed blocks were flushed in phase 1 and became clean. However we
3628
may need to wait for blocks that are read by other threads. While we
3629
wait, a clean block could become changed if that operation started
3630
before the resize operation started. To be safe we must restart at
3633
When we can run through the changed_blocks and file_blocks hashes
3634
without finding a block any more, then we are done.
3636
Note that we hold keycache->cache_lock all the time unless we need
3637
to wait for something.
3644
static int flush_all_key_blocks(KEY_CACHE *keycache)
3647
uint32_t total_found;
3653
safe_mutex_assert_owner(&keycache->cache_lock);
3657
Phase1: Flush all changed blocks, waiting for them if necessary.
3658
Loop until there is no changed block left in the cache.
3663
/* Step over the whole changed_blocks hash array. */
3664
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3667
If an array element is non-empty, use the first block from its
3668
chain to find a file for flush. All changed blocks for this
3669
file are flushed. So the same block will not appear at this
3670
place again with the next iteration. New writes for blocks are
3671
not accepted during the flush. If multiple files share the
3672
same hash bucket, one of them will be flushed per iteration
3673
of the outer loop of phase 1.
3675
if ((block= keycache->changed_blocks[idx]))
3679
Flush dirty blocks but do not free them yet. They can be used
3680
for reading until all other blocks are flushed too.
3682
if (flush_key_blocks_int(keycache, block->hash_link->file,
3691
Phase 2: Free all clean blocks. Normally this means free all
3692
blocks. The changed blocks were flushed in phase 1 and became
3693
clean. However we may need to wait for blocks that are read by
3694
other threads. While we wait, a clean block could become changed
3695
if that operation started before the resize operation started. To
3696
be safe we must restart at phase 1.
3701
/* Step over the whole file_blocks hash array. */
3702
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3705
If an array element is non-empty, use the first block from its
3706
chain to find a file for flush. All blocks for this file are
3707
freed. So the same block will not appear at this place again
3708
with the next iteration. If multiple files share the
3709
same hash bucket, one of them will be flushed per iteration
3710
of the outer loop of phase 2.
3712
if ((block= keycache->file_blocks[idx]))
3716
if (flush_key_blocks_int(keycache, block->hash_link->file,
3725
If any clean block has been found, we may have waited for it to
3726
become free. In this case it could be possible that another clean
3727
block became dirty. This is possible if the write request existed
3728
before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
3730
} while (total_found);
3736
Reset the counters of a key cache.
3739
reset_key_cache_counters()
3742
This procedure is used by process_key_caches() to reset the key_cache.
3745
0 on success (always because it can't fail)
3748
void reset_key_cache_counters()
3750
dflt_key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
3751
dflt_key_cache->global_cache_r_requests= 0; /* Key_read_requests */
3752
dflt_key_cache->global_cache_read= 0; /* Key_reads */
3753
dflt_key_cache->global_cache_w_requests= 0; /* Key_write_requests */
3754
dflt_key_cache->global_cache_write= 0; /* Key_writes */
3757
#if defined(KEYCACHE_TIMEOUT)
3761
unsigned int hash_link_number(HASH_LINK *hash_link, KEY_CACHE *keycache)
3763
return ((unsigned int) (((char*)hash_link-(char *) keycache->hash_link_root)/
3764
sizeof(HASH_LINK)));
3768
unsigned int block_number(BLOCK_LINK *block, KEY_CACHE *keycache)
3770
return ((unsigned int) (((char*)block-(char *)keycache->block_root)/
3771
sizeof(BLOCK_LINK)));
3775
#define KEYCACHE_DUMP_FILE "keycache_dump.txt"
3776
#define MAX_QUEUE_LEN 100
3779
static void keycache_dump(KEY_CACHE *keycache)
3781
FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
3782
struct st_my_thread_var *last;
3783
struct st_my_thread_var *thread;
3785
HASH_LINK *hash_link;
3786
KEYCACHE_PAGE *page;
3789
fprintf(keycache_dump_file, "thread:%u\n", thread->id);
3792
thread=last=waiting_for_hash_link.last_thread;
3793
fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
3797
thread=thread->next;
3798
page= (KEYCACHE_PAGE *) thread->opt_info;
3799
fprintf(keycache_dump_file,
3800
"thread:%u, (file,filepos)=(%u,%lu)\n",
3801
thread->id,(uint) page->file,(uint32_t) page->filepos);
3802
if (++i == MAX_QUEUE_LEN)
3805
while (thread != last);
3808
thread=last=waiting_for_block.last_thread;
3809
fprintf(keycache_dump_file, "queue of threads waiting for block\n");
3813
thread=thread->next;
3814
hash_link= (HASH_LINK *) thread->opt_info;
3815
fprintf(keycache_dump_file,
3816
"thread:%u hash_link:%u (file,filepos)=(%u,%u)\n",
3817
thread->id, (uint) hash_link_number(hash_link, keycache),
3818
(uint) hash_link->file,(uint32_t) hash_link->diskpos);
3819
if (++i == MAX_QUEUE_LEN)
3822
while (thread != last);
3824
for (i=0 ; i< keycache->blocks_used ; i++)
3827
block= &keycache->block_root[i];
3828
hash_link= block->hash_link;
3829
fprintf(keycache_dump_file,
3830
"block:%u hash_link:%d status:%x #requests=%u "
3831
"waiting_for_readers:%d\n",
3832
i, (int) (hash_link ? hash_link_number(hash_link, keycache) : -1),
3833
block->status, block->requests, block->condvar ? 1 : 0);
3834
for (j=0 ; j < 2; j++)
3836
KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
3837
thread= last= wqueue->last_thread;
3838
fprintf(keycache_dump_file, "queue #%d\n", j);
3843
thread=thread->next;
3844
fprintf(keycache_dump_file,
3845
"thread:%u\n", thread->id);
3846
if (++i == MAX_QUEUE_LEN)
3849
while (thread != last);
3853
fprintf(keycache_dump_file, "LRU chain:");
3854
block= keycache= used_last;
3859
block= block->next_used;
3860
fprintf(keycache_dump_file,
3861
"block:%u, ", block_number(block, keycache));
3863
while (block != keycache->used_last);
3865
fprintf(keycache_dump_file, "\n");
3867
fclose(keycache_dump_file);
3870
static int keycache_pthread_cond_wait(pthread_cond_t *cond,
3871
pthread_mutex_t *mutex)
3874
struct timeval now; /* time when we started waiting */
3875
struct timespec timeout; /* timeout value for the wait function */
3878
/* Get current time */
3879
gettimeofday(&now, &tz);
3880
/* Prepare timeout value */
3881
timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
3883
timeval uses microseconds.
3884
timespec uses nanoseconds.
3885
1 nanosecond = 1000 micro seconds
3887
timeout.tv_nsec= now.tv_usec * 1000;
3888
rc= pthread_cond_timedwait(cond, mutex, &timeout);
3889
if (rc == ETIMEDOUT || rc == ETIME)
3894
assert(rc != ETIMEDOUT);
3897
#endif /* defined(KEYCACHE_TIMEOUT) */