246
266
size_t use_mem, uint32_t division_limit,
247
267
uint32_t age_threshold)
250
(void)key_cache_block_size;
252
(void)division_limit;
254
memset(keycache, 0, sizeof(KEY_CACHE));
269
uint32_t blocks, hash_links;
272
assert(key_cache_block_size >= 512);
274
if (keycache->key_cache_inited && keycache->disk_blocks > 0)
279
keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
280
keycache->global_cache_read= keycache->global_cache_write= 0;
281
keycache->disk_blocks= -1;
282
if (! keycache->key_cache_inited)
284
keycache->key_cache_inited= 1;
286
Initialize these variables once only.
287
Their value must survive re-initialization during resizing.
289
keycache->in_resize= 0;
290
keycache->resize_in_flush= 0;
291
keycache->cnt_for_resize_op= 0;
292
keycache->waiting_for_resize_cnt.last_thread= NULL;
293
keycache->in_init= 0;
294
pthread_mutex_init(&keycache->cache_lock, MY_MUTEX_INIT_FAST);
295
keycache->resize_queue.last_thread= NULL;
298
keycache->key_cache_mem_size= use_mem;
299
keycache->key_cache_block_size= key_cache_block_size;
301
blocks= (uint32_t) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
302
sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
303
/* It doesn't make sense to have too few blocks (less than 8) */
308
/* Set my_hash_entries to the next bigger 2 power */
309
if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
310
keycache->hash_entries<<= 1;
311
hash_links= 2 * blocks;
312
#if defined(MAX_THREADS)
313
if (hash_links < MAX_THREADS + blocks - 1)
314
hash_links= MAX_THREADS + blocks - 1;
316
while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
317
ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
318
ALIGN_SIZE(sizeof(HASH_LINK*) *
319
keycache->hash_entries))) +
320
((size_t) blocks * keycache->key_cache_block_size) > use_mem)
322
/* Allocate memory for cache page buffers */
323
if ((keycache->block_mem= malloc((size_t) blocks * keycache->key_cache_block_size)))
326
Allocate memory for blocks, hash_links and hash entries;
327
For each block 2 hash links are allocated
329
if ((keycache->block_root= (BLOCK_LINK*) malloc(length)))
331
free(keycache->block_mem);
332
keycache->block_mem= 0;
337
my_error(EE_OUTOFMEMORY, MYF(0), blocks * keycache->key_cache_block_size);
340
blocks= blocks / 4*3;
342
keycache->blocks_unused= blocks;
343
keycache->disk_blocks= (int) blocks;
344
keycache->hash_links= hash_links;
345
keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
346
ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
347
keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
348
ALIGN_SIZE((sizeof(HASH_LINK*) *
349
keycache->hash_entries)));
350
memset(keycache->block_root, 0,
351
keycache->disk_blocks * sizeof(BLOCK_LINK));
352
memset(keycache->hash_root, 0,
353
keycache->hash_entries * sizeof(HASH_LINK*));
354
memset(keycache->hash_link_root, 0,
355
keycache->hash_links * sizeof(HASH_LINK));
356
keycache->hash_links_used= 0;
357
keycache->free_hash_list= NULL;
358
keycache->blocks_used= keycache->blocks_changed= 0;
360
keycache->global_blocks_changed= 0;
361
keycache->blocks_available=0; /* For debugging */
363
/* The LRU chain is empty after initialization */
364
keycache->used_last= NULL;
365
keycache->used_ins= NULL;
366
keycache->free_block_list= NULL;
367
keycache->keycache_time= 0;
368
keycache->warm_blocks= 0;
369
keycache->min_warm_blocks= (division_limit ?
370
blocks * division_limit / 100 + 1 :
372
keycache->age_threshold= (age_threshold ?
373
blocks * age_threshold / 100 :
376
keycache->can_be_used= 1;
378
keycache->waiting_for_hash_link.last_thread= NULL;
379
keycache->waiting_for_block.last_thread= NULL;
380
memset(keycache->changed_blocks, 0,
381
sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
382
memset(keycache->file_blocks, 0,
383
sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
387
/* key_buffer_size is specified too small. Disable the cache. */
388
keycache->can_be_used= 0;
391
keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
392
return((int) keycache->disk_blocks);
396
keycache->disk_blocks= 0;
398
if (keycache->block_mem)
400
free(keycache->block_mem);
401
keycache->block_mem= NULL;
403
if (keycache->block_root)
405
free((unsigned char*) keycache->block_root);
406
keycache->block_root= NULL;
409
keycache->can_be_used= 0;
419
keycache pointer to a key cache data structure
420
key_cache_block_size size of blocks to keep cached data
421
use_mem total memory to use for the new key cache
422
division_limit new division limit (if not zero)
423
age_threshold new age threshold (if not zero)
426
number of blocks in the key cache, if successful,
430
The function first compares the memory size and the block size parameters
431
with the key cache values.
433
If they differ the function free the the memory allocated for the
434
old key cache blocks by calling the end_key_cache function and
435
then rebuilds the key cache with new blocks by calling
438
The function starts the operation only when all other threads
439
performing operations with the key cache let her to proceed
440
(when cnt_for_resize=0).
443
int resize_key_cache(KEY_CACHE *keycache, uint32_t key_cache_block_size,
444
size_t use_mem, uint32_t division_limit,
445
uint32_t age_threshold)
449
if (!keycache->key_cache_inited)
450
return(keycache->disk_blocks);
452
if(key_cache_block_size == keycache->key_cache_block_size &&
453
use_mem == keycache->key_cache_mem_size)
455
change_key_cache_param(keycache, division_limit, age_threshold);
456
return(keycache->disk_blocks);
459
keycache_pthread_mutex_lock(&keycache->cache_lock);
462
We may need to wait for another thread which is doing a resize
463
already. This cannot happen in the MySQL server though. It allows
464
one resizer only. In set_var.cc keycache->in_init is used to block
467
while (keycache->in_resize)
469
/* purecov: begin inspected */
470
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
475
Mark the operation in progress. This blocks other threads from doing
476
a resize in parallel. It prohibits new blocks to enter the cache.
477
Read/write requests can bypass the cache during the flush phase.
479
keycache->in_resize= 1;
481
/* Need to flush only if keycache is enabled. */
482
if (keycache->can_be_used)
484
/* Start the flush phase. */
485
keycache->resize_in_flush= 1;
487
if (flush_all_key_blocks(keycache))
489
/* TODO: if this happens, we should write a warning in the log file ! */
490
keycache->resize_in_flush= 0;
492
keycache->can_be_used= 0;
496
/* End the flush phase. */
497
keycache->resize_in_flush= 0;
501
Some direct read/write operations (bypassing the cache) may still be
502
unfinished. Wait until they are done. If the key cache can be used,
503
direct I/O is done in increments of key_cache_block_size. That is,
504
every block is checked if it is in the cache. We need to wait for
505
pending I/O before re-initializing the cache, because we may change
506
the block size. Otherwise they could check for blocks at file
507
positions where the new block division has none. We do also want to
508
wait for I/O done when (if) the cache was disabled. It must not
509
run in parallel with normal cache operation.
511
while (keycache->cnt_for_resize_op)
512
wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
515
Free old cache structures, allocate new structures, and initialize
516
them. Note that the cache_lock mutex and the resize_queue are left
517
untouched. We do not lose the cache_lock and will release it only at
518
the end of this function.
520
end_key_cache(keycache, 0); /* Don't free mutex */
521
/* The following will work even if use_mem is 0 */
522
blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
523
division_limit, age_threshold);
527
Mark the resize finished. This allows other threads to start a
528
resize or to request new cache blocks.
530
keycache->in_resize= 0;
532
/* Signal waiting threads. */
533
release_whole_queue(&keycache->resize_queue);
535
keycache_pthread_mutex_unlock(&keycache->cache_lock);
541
Increment counter blocking resize key cache operation
543
static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
545
keycache->cnt_for_resize_op++;
550
Decrement counter blocking resize key cache operation;
551
Signal the operation to proceed when counter becomes equal zero
553
static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
555
if (!--keycache->cnt_for_resize_op)
556
release_whole_queue(&keycache->waiting_for_resize_cnt);
560
Change the key cache parameters
563
change_key_cache_param()
564
keycache pointer to a key cache data structure
565
division_limit new division limit (if not zero)
566
age_threshold new age threshold (if not zero)
572
Presently the function resets the key cache parameters
573
concerning midpoint insertion strategy - division_limit and
577
void change_key_cache_param(KEY_CACHE *keycache, uint32_t division_limit,
578
uint32_t age_threshold)
580
keycache_pthread_mutex_lock(&keycache->cache_lock);
582
keycache->min_warm_blocks= (keycache->disk_blocks *
583
division_limit / 100 + 1);
585
keycache->age_threshold= (keycache->disk_blocks *
586
age_threshold / 100);
587
keycache_pthread_mutex_unlock(&keycache->cache_lock);
272
604
void end_key_cache(KEY_CACHE *keycache, bool cleanup)
606
if (!keycache->key_cache_inited)
609
if (keycache->disk_blocks > 0)
611
if (keycache->block_mem)
613
free(keycache->block_mem);
614
keycache->block_mem= NULL;
615
free((unsigned char*) keycache->block_root);
616
keycache->block_root= NULL;
618
keycache->disk_blocks= -1;
619
/* Reset blocks_changed to be safe if flush_all_key_blocks is called */
620
keycache->blocks_changed= 0;
625
pthread_mutex_destroy(&keycache->cache_lock);
626
keycache->key_cache_inited= keycache->can_be_used= 0;
276
629
} /* end_key_cache */
633
Link a thread into double-linked queue of waiting threads.
637
wqueue pointer to the queue structure
638
thread pointer to the thread to be added to the queue
644
Queue is represented by a circular list of the thread structures
645
The list is double-linked of the type (**prev,*next), accessed by
646
a pointer to the last element.
649
static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
650
struct st_my_thread_var *thread)
652
struct st_my_thread_var *last;
654
assert(!thread->next && !thread->prev);
655
if (! (last= wqueue->last_thread))
658
thread->next= thread;
659
thread->prev= &thread->next;
663
thread->prev= last->next->prev;
664
last->next->prev= &thread->next;
665
thread->next= last->next;
668
wqueue->last_thread= thread;
672
Unlink a thread from double-linked queue of waiting threads
676
wqueue pointer to the queue structure
677
thread pointer to the thread to be removed from the queue
683
See NOTES for link_into_queue
686
static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
687
struct st_my_thread_var *thread)
689
assert(thread->next && thread->prev);
690
if (thread->next == thread)
691
/* The queue contains only one member */
692
wqueue->last_thread= NULL;
695
thread->next->prev= thread->prev;
696
*thread->prev=thread->next;
697
if (wqueue->last_thread == thread)
698
wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
707
Add a thread to single-linked queue of waiting threads
711
wqueue Pointer to the queue structure.
712
mutex Cache_lock to acquire after awake.
718
Queue is represented by a circular list of the thread structures
719
The list is single-linked of the type (*next), accessed by a pointer
722
The function protects against stray signals by verifying that the
723
current thread is unlinked from the queue when awaking. However,
724
since several threads can wait for the same event, it might be
725
necessary for the caller of the function to check again if the
726
condition for awake is indeed matched.
729
static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
730
pthread_mutex_t *mutex)
732
struct st_my_thread_var *last;
733
struct st_my_thread_var *thread= my_thread_var;
736
assert(!thread->next);
737
assert(!thread->prev); /* Not required, but must be true anyway. */
738
if (! (last= wqueue->last_thread))
739
thread->next= thread;
742
thread->next= last->next;
745
wqueue->last_thread= thread;
748
Wait until thread is removed from queue by the signalling thread.
749
The loop protects against stray signals.
753
keycache_pthread_cond_wait(&thread->suspend, mutex);
755
while (thread->next);
760
Remove all threads from queue signaling them to proceed
763
release_whole_queue()
764
wqueue pointer to the queue structure
770
See notes for wait_on_queue().
771
When removed from the queue each thread is signaled via condition
772
variable thread->suspend.
775
static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
777
struct st_my_thread_var *last;
778
struct st_my_thread_var *next;
779
struct st_my_thread_var *thread;
781
/* Queue may be empty. */
782
if (!(last= wqueue->last_thread))
789
/* Signal the thread. */
790
keycache_pthread_cond_signal(&thread->suspend);
791
/* Take thread from queue. */
795
while (thread != last);
797
/* Now queue is definitely empty. */
798
wqueue->last_thread= NULL;
803
Unlink a block from the chain of dirty/clean blocks
805
static inline void unlink_changed(BLOCK_LINK *block)
807
assert(block->prev_changed && *block->prev_changed == block);
808
if (block->next_changed)
809
block->next_changed->prev_changed= block->prev_changed;
810
*block->prev_changed= block->next_changed;
811
block->next_changed= NULL;
812
block->prev_changed= NULL;
817
Link a block into the chain of dirty/clean blocks
820
static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
822
assert(!block->next_changed);
823
assert(!block->prev_changed);
824
block->prev_changed= phead;
825
if ((block->next_changed= *phead))
826
(*phead)->prev_changed= &block->next_changed;
832
Link a block in a chain of clean blocks of a file.
836
keycache Key cache handle
837
block Block to relink
838
file File to be linked to
839
unlink If to unlink first
842
Unlink a block from whichever chain it is linked in, if it's
843
asked for, and link it to the chain of clean blocks of the
847
Please do never set/clear BLOCK_CHANGED outside of
848
link_to_file_list() or link_to_changed_list().
849
You would risk to damage correct counting of changed blocks
850
and to find blocks in the wrong hash.
856
static void link_to_file_list(KEY_CACHE *keycache,
857
BLOCK_LINK *block, int file,
860
assert(block->status & BLOCK_IN_USE);
861
assert(block->hash_link && block->hash_link->block == block);
862
assert(block->hash_link->file == file);
864
unlink_changed(block);
865
link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
866
if (block->status & BLOCK_CHANGED)
868
block->status&= ~BLOCK_CHANGED;
869
keycache->blocks_changed--;
870
keycache->global_blocks_changed--;
876
Re-link a block from the clean chain to the dirty chain of a file.
879
link_to_changed_list()
880
keycache key cache handle
881
block block to relink
884
Unlink a block from the chain of clean blocks of a file
885
and link it to the chain of dirty blocks of the same file.
888
Please do never set/clear BLOCK_CHANGED outside of
889
link_to_file_list() or link_to_changed_list().
890
You would risk to damage correct counting of changed blocks
891
and to find blocks in the wrong hash.
897
static void link_to_changed_list(KEY_CACHE *keycache,
900
assert(block->status & BLOCK_IN_USE);
901
assert(!(block->status & BLOCK_CHANGED));
902
assert(block->hash_link && block->hash_link->block == block);
904
unlink_changed(block);
906
&keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
907
block->status|=BLOCK_CHANGED;
908
keycache->blocks_changed++;
909
keycache->global_blocks_changed++;
914
Link a block to the LRU chain at the beginning or at the end of
919
keycache pointer to a key cache data structure
920
block pointer to the block to link to the LRU chain
921
hot <-> to link the block into the hot subchain
922
at_end <-> to link the block at the end of the subchain
928
The LRU ring is represented by a circular list of block structures.
929
The list is double-linked of the type (**prev,*next) type.
930
The LRU ring is divided into two parts - hot and warm.
931
There are two pointers to access the last blocks of these two
932
parts. The beginning of the warm part follows right after the
934
Only blocks of the warm part can be used for eviction.
935
The first block from the beginning of this subchain is always
936
taken for eviction (keycache->last_used->next)
938
LRU chain: +------+ H O T +------+
939
+----| end |----...<----| beg |----+
940
| +------+last +------+ |
941
v<-link in latest hot (new end) |
942
| link in latest warm (new end)->^
943
| +------+ W A R M +------+ |
944
+----| beg |---->...----| end |----+
948
It is also possible that the block is selected for eviction and thus
949
not linked in the LRU ring.
952
static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, bool hot,
958
assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
959
assert(block->hash_link); /*backptr to block NULL from free_block()*/
960
assert(!block->requests);
961
assert(block->prev_changed && *block->prev_changed == block);
962
assert(!block->next_used);
963
assert(!block->prev_used);
964
if (!hot && keycache->waiting_for_block.last_thread)
966
/* Signal that in the LRU warm sub-chain an available block has appeared */
967
struct st_my_thread_var *last_thread=
968
keycache->waiting_for_block.last_thread;
969
struct st_my_thread_var *first_thread= last_thread->next;
970
struct st_my_thread_var *next_thread= first_thread;
971
HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
972
struct st_my_thread_var *thread;
976
next_thread= thread->next;
978
We notify about the event all threads that ask
979
for the same page as the first thread in the queue
981
if ((HASH_LINK *) thread->opt_info == hash_link)
983
keycache_pthread_cond_signal(&thread->suspend);
984
unlink_from_queue(&keycache->waiting_for_block, thread);
988
while (thread != last_thread);
989
hash_link->block= block;
991
NOTE: We assigned the block to the hash_link and signalled the
992
requesting thread(s). But it is possible that other threads runs
993
first. These threads see the hash_link assigned to a block which
994
is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
995
This can be a problem for functions that do not select the block
996
via its hash_link: flush and free. They do only see a block which
997
is in a "normal" state and don't know that it will be evicted soon.
999
We cannot set BLOCK_IN_SWITCH here because only one of the
1000
requesting threads must handle the eviction. All others must wait
1001
for it to complete. If we set the flag here, the threads would not
1002
know who is in charge of the eviction. Without the flag, the first
1003
thread takes the stick and sets the flag.
1005
But we need to note in the block that is has been selected for
1006
eviction. It must not be freed. The evicting thread will not
1007
expect the block in the free list. Before freeing we could also
1008
check if block->requests > 1. But I think including another flag
1009
in the check of block->status is slightly more efficient and
1010
probably easier to read.
1012
block->status|= BLOCK_IN_EVICTION;
1015
pins= hot ? &keycache->used_ins : &keycache->used_last;
1019
ins->next_used->prev_used= &block->next_used;
1020
block->next_used= ins->next_used;
1021
block->prev_used= &ins->next_used;
1022
ins->next_used= block;
1028
/* The LRU ring is empty. Let the block point to itself. */
1029
keycache->used_last= keycache->used_ins= block->next_used= block;
1030
block->prev_used= &block->next_used;
1036
Unlink a block from the LRU chain
1040
keycache pointer to a key cache data structure
1041
block pointer to the block to unlink from the LRU chain
1047
See NOTES for link_block
1050
static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1052
assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1053
assert(block->hash_link); /*backptr to block NULL from free_block()*/
1054
assert(!block->requests);
1055
assert(block->prev_changed && *block->prev_changed == block);
1056
assert(block->next_used && block->prev_used &&
1057
(block->next_used->prev_used == &block->next_used) &&
1058
(*block->prev_used == block));
1059
if (block->next_used == block)
1060
/* The list contains only one member */
1061
keycache->used_last= keycache->used_ins= NULL;
1064
block->next_used->prev_used= block->prev_used;
1065
*block->prev_used= block->next_used;
1066
if (keycache->used_last == block)
1067
keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1068
if (keycache->used_ins == block)
1069
keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1071
block->next_used= NULL;
1072
block->prev_used= NULL;
1077
Register requests for a block.
1081
keycache Pointer to a key cache data structure.
1082
block Pointer to the block to register a request on.
1083
count Number of requests. Always 1.
1086
The first request unlinks the block from the LRU ring. This means
1087
that it is protected against eveiction.
1092
static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1094
assert(block->status & BLOCK_IN_USE);
1095
assert(block->hash_link);
1097
if (!block->requests)
1098
unlink_block(keycache, block);
1099
block->requests+=count;
1104
Unregister request for a block
1105
linking it to the LRU chain if it's the last request
1109
keycache pointer to a key cache data structure
1110
block pointer to the block to link to the LRU chain
1111
at_end <-> to link the block at the end of the LRU chain
1117
Every linking to the LRU ring decrements by one a special block
1118
counter (if it's positive). If the at_end parameter is true the block is
1119
added either at the end of warm sub-chain or at the end of hot sub-chain.
1120
It is added to the hot subchain if its counter is zero and number of
1121
blocks in warm sub-chain is not less than some low limit (determined by
1122
the division_limit parameter). Otherwise the block is added to the warm
1123
sub-chain. If the at_end parameter is false the block is always added
1124
at beginning of the warm sub-chain.
1125
Thus a warm block can be promoted to the hot sub-chain when its counter
1126
becomes zero for the first time.
1127
At the same time the block at the very beginning of the hot subchain
1128
might be moved to the beginning of the warm subchain if it stays untouched
1129
for a too long time (this time is determined by parameter age_threshold).
1131
It is also possible that the block is selected for eviction and thus
1132
not linked in the LRU ring.
1135
static void unreg_request(KEY_CACHE *keycache,
1136
BLOCK_LINK *block, int at_end)
1138
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1139
assert(block->hash_link); /*backptr to block NULL from free_block()*/
1140
assert(block->requests);
1141
assert(block->prev_changed && *block->prev_changed == block);
1142
assert(!block->next_used);
1143
assert(!block->prev_used);
1144
if (! --block->requests)
1147
if (block->hits_left)
1149
hot= !block->hits_left && at_end &&
1150
keycache->warm_blocks > keycache->min_warm_blocks;
1153
if (block->temperature == BLOCK_WARM)
1154
keycache->warm_blocks--;
1155
block->temperature= BLOCK_HOT;
1157
link_block(keycache, block, hot, (bool)at_end);
1158
block->last_hit_time= keycache->keycache_time;
1159
keycache->keycache_time++;
1161
At this place, the block might be in the LRU ring or not. If an
1162
evicter was waiting for a block, it was selected for eviction and
1163
not linked in the LRU ring.
1167
Check if we should link a hot block to the warm block sub-chain.
1168
It is possible that we select the same block as above. But it can
1169
also be another block. In any case a block from the LRU ring is
1170
selected. In other words it works even if the above block was
1171
selected for eviction and not linked in the LRU ring. Since this
1172
happens only if the LRU ring is empty, the block selected below
1173
would be NULL and the rest of the function skipped.
1175
block= keycache->used_ins;
1176
if (block && keycache->keycache_time - block->last_hit_time >
1177
keycache->age_threshold)
1179
unlink_block(keycache, block);
1180
link_block(keycache, block, 0, 0);
1181
if (block->temperature != BLOCK_WARM)
1183
keycache->warm_blocks++;
1184
block->temperature= BLOCK_WARM;
1191
Remove a reader of the page in block
1194
static void remove_reader(BLOCK_LINK *block)
1196
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1197
assert(block->hash_link && block->hash_link->block == block);
1198
assert(block->prev_changed && *block->prev_changed == block);
1199
assert(!block->next_used);
1200
assert(!block->prev_used);
1201
assert(block->hash_link->requests);
1202
if (! --block->hash_link->requests && block->condvar)
1203
keycache_pthread_cond_signal(block->condvar);
1208
Wait until the last reader of the page in block
1209
signals on its termination
1212
static void wait_for_readers(KEY_CACHE *keycache,
1215
struct st_my_thread_var *thread= my_thread_var;
1216
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1217
assert(!(block->status & (BLOCK_ERROR | BLOCK_IN_FLUSH |
1219
assert(block->hash_link);
1220
assert(block->hash_link->block == block);
1221
/* Linked in file_blocks or changed_blocks hash. */
1222
assert(block->prev_changed && *block->prev_changed == block);
1223
/* Not linked in LRU ring. */
1224
assert(!block->next_used);
1225
assert(!block->prev_used);
1226
while (block->hash_link->requests)
1228
/* There must be no other waiter. We have no queue here. */
1229
assert(!block->condvar);
1230
block->condvar= &thread->suspend;
1231
keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1232
block->condvar= NULL;
280
1238
Add a hash link to a bucket in the hash_table
1252
Remove a hash link from the hash table
1255
static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1257
assert(hash_link->requests == 0);
1258
if ((*hash_link->prev= hash_link->next))
1259
hash_link->next->prev= hash_link->prev;
1260
hash_link->block= NULL;
1261
if (keycache->waiting_for_hash_link.last_thread)
1263
/* Signal that a free hash link has appeared */
1264
struct st_my_thread_var *last_thread=
1265
keycache->waiting_for_hash_link.last_thread;
1266
struct st_my_thread_var *first_thread= last_thread->next;
1267
struct st_my_thread_var *next_thread= first_thread;
1268
KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1269
struct st_my_thread_var *thread;
1271
hash_link->file= first_page->file;
1272
hash_link->diskpos= first_page->filepos;
1275
KEYCACHE_PAGE *page;
1276
thread= next_thread;
1277
page= (KEYCACHE_PAGE *) thread->opt_info;
1278
next_thread= thread->next;
1280
We notify about the event all threads that ask
1281
for the same page as the first thread in the queue
1283
if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1285
keycache_pthread_cond_signal(&thread->suspend);
1286
unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1289
while (thread != last_thread);
1290
link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1291
hash_link->diskpos)],
1295
hash_link->next= keycache->free_hash_list;
1296
keycache->free_hash_list= hash_link;
1301
Get the hash link for a page
1304
static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1305
int file, my_off_t filepos)
1307
register HASH_LINK *hash_link, **start;
1311
Find the bucket in the hash table for the pair (file, filepos);
1312
start contains the head of the bucket list,
1313
hash_link points to the first member of the list
1315
hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1316
/* Look for an element for the pair (file, filepos) in the bucket chain */
1318
(hash_link->diskpos != filepos || hash_link->file != file))
1320
hash_link= hash_link->next;
1324
/* There is no hash link in the hash table for the pair (file, filepos) */
1325
if (keycache->free_hash_list)
1327
hash_link= keycache->free_hash_list;
1328
keycache->free_hash_list= hash_link->next;
1330
else if (keycache->hash_links_used < keycache->hash_links)
1332
hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1336
/* Wait for a free hash link */
1337
struct st_my_thread_var *thread= my_thread_var;
1340
page.filepos= filepos;
1341
thread->opt_info= (void *) &page;
1342
link_into_queue(&keycache->waiting_for_hash_link, thread);
1343
keycache_pthread_cond_wait(&thread->suspend,
1344
&keycache->cache_lock);
1345
thread->opt_info= NULL;
1348
hash_link->file= file;
1349
hash_link->diskpos= filepos;
1350
link_hash(start, hash_link);
1352
/* Register the request for the page */
1353
hash_link->requests++;
1360
Get a block for the file page requested by a keycache read/write operation;
1361
If the page is not in the cache return a free block, if there is none
1362
return the lru block after saving its buffer if the page is dirty.
1367
keycache pointer to a key cache data structure
1368
file handler for the file to read page from
1369
filepos position of the page in the file
1370
init_hits_left how initialize the block counter for the page
1371
wrmode <-> get for writing
1372
page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1375
Pointer to the found block if successful, 0 - otherwise
1378
For the page from file positioned at filepos the function checks whether
1379
the page is in the key cache specified by the first parameter.
1380
If this is the case it immediately returns the block.
1381
If not, the function first chooses a block for this page. If there is
1382
no not used blocks in the key cache yet, the function takes the block
1383
at the very beginning of the warm sub-chain. It saves the page in that
1384
block if it's dirty before returning the pointer to it.
1385
The function returns in the page_st parameter the following values:
1386
PAGE_READ - if page already in the block,
1387
PAGE_TO_BE_READ - if it is to be read yet by the current thread
1388
WAIT_TO_BE_READ - if it is to be read by another thread
1389
If an error occurs THE BLOCK_ERROR bit is set in the block status.
1390
It might happen that there are no blocks in LRU chain (in warm part) -
1391
all blocks are unlinked for some read/write operations. Then the function
1392
waits until first of this operations links any block back.
1395
static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1396
File file, my_off_t filepos,
1398
int wrmode, int *page_st)
1400
HASH_LINK *hash_link;
1407
If the flush phase of a resize operation fails, the cache is left
1408
unusable. This will be detected only after "goto restart".
1410
if (!keycache->can_be_used)
1414
Find the hash_link for the requested file block (file, filepos). We
1415
do always get a hash_link here. It has registered our request so
1416
that no other thread can use it for another file block until we
1417
release the request (which is done by remove_reader() usually). The
1418
hash_link can have a block assigned to it or not. If there is a
1419
block, it may be assigned to this hash_link or not. In cases where a
1420
block is evicted from the cache, it is taken from the LRU ring and
1421
referenced by the new hash_link. But the block can still be assigned
1422
to its old hash_link for some time if it needs to be flushed first,
1423
or if there are other threads still reading it.
1426
hash_link is always returned.
1427
hash_link->block can be:
1429
- not assigned to this hash_link or
1430
- assigned to this hash_link. If assigned, the block can have
1431
- invalid data (when freshly assigned) or
1432
- valid data. Valid data can be
1433
- changed over the file contents (dirty) or
1434
- not changed (clean).
1436
hash_link= get_hash_link(keycache, file, filepos);
1437
assert((hash_link->file == file) && (hash_link->diskpos == filepos));
1440
if ((block= hash_link->block) &&
1441
block->hash_link == hash_link && (block->status & BLOCK_READ))
1443
/* Assigned block with valid (changed or unchanged) contents. */
1444
page_status= PAGE_READ;
1447
else (page_status == -1)
1449
- block not assigned to this hash_link or
1450
- block assigned but not yet read from file (invalid data).
1453
if (keycache->in_resize)
1455
/* This is a request during a resize operation */
1459
struct st_my_thread_var *thread;
1462
The file block is not in the cache. We don't need it in the
1463
cache: we are going to read or write directly to file. Cancel
1464
the request. We can simply decrement hash_link->requests because
1465
we did not release cache_lock since increasing it. So no other
1466
thread can wait for our request to become released.
1468
if (hash_link->requests == 1)
1471
We are the only one to request this hash_link (this file/pos).
1474
hash_link->requests--;
1475
unlink_hash(keycache, hash_link);
1480
More requests on the hash_link. Someone tries to evict a block
1481
for this hash_link (could have started before resizing started).
1482
This means that the LRU ring is empty. Otherwise a block could
1483
be assigned immediately. Behave like a thread that wants to
1484
evict a block for this file/pos. Add to the queue of threads
1485
waiting for a block. Wait until there is one assigned.
1487
Refresh the request on the hash-link so that it cannot be reused
1488
for another file/pos.
1490
thread= my_thread_var;
1491
thread->opt_info= (void *) hash_link;
1492
link_into_queue(&keycache->waiting_for_block, thread);
1495
keycache_pthread_cond_wait(&thread->suspend,
1496
&keycache->cache_lock);
1497
} while (thread->next);
1498
thread->opt_info= NULL;
1500
A block should now be assigned to the hash_link. But it may
1501
still need to be evicted. Anyway, we should re-check the
1502
situation. page_status must be set correctly.
1504
hash_link->requests--;
1506
} /* end of if (!block) */
1509
There is a block for this file/pos in the cache. Register a
1510
request on it. This unlinks it from the LRU ring (if it is there)
1511
and hence protects it against eviction (if not already in
1512
eviction). We need this for returning the block to the caller, for
1513
calling remove_reader() (for debugging purposes), and for calling
1514
free_block(). The only case where we don't need the request is if
1515
the block is in eviction. In that case we have to unregister the
1518
reg_requests(keycache, block, 1);
1520
if (page_status != PAGE_READ)
1523
- block not assigned to this hash_link or
1524
- block assigned but not yet read from file (invalid data).
1526
This must be a block in eviction. It will be read soon. We need
1527
to wait here until this happened. Otherwise the caller could
1528
access a wrong block or a block which is in read. While waiting
1529
we cannot lose hash_link nor block. We have registered a request
1530
on the hash_link. Everything can happen to the block but changes
1531
in the hash_link -> block relationship. In other words:
1532
everything can happen to the block but free or another completed
1535
Note that we bahave like a secondary requestor here. We just
1536
cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1537
read requests and writes on dirty blocks that are not in flush
1538
only. Waiting here on COND_FOR_REQUESTED works in all
1541
assert(((block->hash_link != hash_link) &&
1542
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1543
((block->hash_link == hash_link) &&
1544
!(block->status & BLOCK_READ)));
1545
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
1547
Here we can trust that the block has been assigned to this
1548
hash_link (block->hash_link == hash_link) and read into the
1549
buffer (BLOCK_READ). The worst things possible here are that the
1550
block is in free (BLOCK_REASSIGNED). But the block is still
1551
assigned to the hash_link. The freeing thread waits until we
1552
release our request on the hash_link. The block must not be
1553
again in eviction because we registered an request on it before
1556
assert(block->hash_link == hash_link);
1557
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1558
assert(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1561
The block is in the cache. Assigned to the hash_link. Valid data.
1562
Note that in case of page_st == PAGE_READ, the block can be marked
1563
for eviction. In any case it can be marked for freeing.
1568
/* A reader can just read the block. */
1569
*page_st= PAGE_READ;
1570
assert((hash_link->file == file) &&
1571
(hash_link->diskpos == filepos) &&
1572
(block->hash_link == hash_link));
1577
This is a writer. No two writers for the same block can exist.
1578
This must be assured by locks outside of the key cache.
1580
assert(!(block->status & BLOCK_FOR_UPDATE));
1582
while (block->status & BLOCK_IN_FLUSH)
1585
Wait until the block is flushed to file. Do not release the
1586
request on the hash_link yet to prevent that the block is freed
1587
or reassigned while we wait. While we wait, several things can
1588
happen to the block, including another flush. But the block
1589
cannot be reassigned to another hash_link until we release our
1590
request on it. But it can be marked BLOCK_REASSIGNED from free
1591
or eviction, while they wait for us to release the hash_link.
1593
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1595
If the flush phase failed, the resize could have finished while
1598
if (!keycache->in_resize)
1600
remove_reader(block);
1601
unreg_request(keycache, block, 1);
1604
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1605
assert(!(block->status & BLOCK_FOR_UPDATE));
1606
assert(block->hash_link == hash_link);
1609
if (block->status & BLOCK_CHANGED)
1612
We want to write a block with changed contents. If the cache
1613
block size is bigger than the callers block size (e.g. MyISAM),
1614
the caller may replace part of the block only. Changes of the
1615
other part of the block must be preserved. Since the block has
1616
not yet been selected for flush, we can still add our changes.
1618
*page_st= PAGE_READ;
1619
assert((hash_link->file == file) &&
1620
(hash_link->diskpos == filepos) &&
1621
(block->hash_link == hash_link));
1626
This is a write request for a clean block. We do not want to have
1627
new dirty blocks in the cache while resizing. We will free the
1628
block and write directly to file. If the block is in eviction or
1629
in free, we just let it go.
1631
Unregister from the hash_link. This must be done before freeing
1632
the block. And it must be done if not freeing the block. Because
1633
we could have waited above, we need to call remove_reader(). Other
1634
threads could wait for us to release our request on the hash_link.
1636
remove_reader(block);
1638
/* If the block is not in eviction and not in free, we can free it. */
1639
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1643
Free block as we are going to write directly to file.
1644
Although we have an exlusive lock for the updated key part,
1645
the control can be yielded by the current thread as we might
1646
have unfinished readers of other key parts in the block
1647
buffer. Still we are guaranteed not to have any readers
1648
of the key part we are writing into until the block is
1649
removed from the cache as we set the BLOCK_REASSIGNED
1650
flag (see the code below that handles reading requests).
1652
free_block(keycache, block);
1657
The block will be evicted/freed soon. Don't touch it in any way.
1658
Unregister the request that we registered above.
1660
unreg_request(keycache, block, 1);
1663
The block is still assigned to the hash_link (the file/pos that
1664
we are going to write to). Wait until the eviction/free is
1665
complete. Otherwise the direct write could complete before all
1666
readers are done with the block. So they could read outdated
1669
Since we released our request on the hash_link, it can be reused
1670
for another file/pos. Hence we cannot just check for
1671
block->hash_link == hash_link. As long as the resize is
1672
proceeding the block cannot be reassigned to the same file/pos
1673
again. So we can terminate the loop when the block is no longer
1674
assigned to this file/pos.
1678
wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1679
&keycache->cache_lock);
1681
If the flush phase failed, the resize could have finished
1682
while we waited here.
1684
if (!keycache->in_resize)
1686
} while (block->hash_link &&
1687
(block->hash_link->file == file) &&
1688
(block->hash_link->diskpos == filepos));
1693
if (page_status == PAGE_READ &&
1694
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1698
This is a request for a block to be removed from cache. The block
1699
is assigned to this hash_link and contains valid data, but is
1700
marked for eviction or to be freed. Possible reasons why it has
1701
not yet been evicted/freed can be a flush before reassignment
1702
(BLOCK_IN_SWITCH), readers of the block have not finished yet
1703
(BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1704
the block has been selected for it (BLOCK_IN_EVICTION).
1706
Only reading requests can proceed until the old dirty page is flushed,
1707
all others are to be suspended, then resubmitted
1709
if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1712
This is a read request and the block not yet reassigned. We can
1713
register our request and proceed. This unlinks the block from
1714
the LRU ring and protects it against eviction.
1716
reg_requests(keycache, block, 1);
1721
Either this is a write request for a block that is in eviction
1722
or in free. We must not use it any more. Instead we must evict
1723
another block. But we cannot do this before the eviction/free is
1724
done. Otherwise we would find the same hash_link + block again
1727
Or this is a read request for a block in eviction/free that does
1728
not require a flush, but waits for readers to finish with the
1729
block. We do not read this block to let the eviction/free happen
1730
as soon as possible. Again we must wait so that we don't find
1731
the same hash_link + block again and again.
1733
assert(hash_link->requests);
1734
hash_link->requests--;
1735
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1737
The block is no longer assigned to this hash_link.
1746
This is a request for a new block or for a block not to be removed.
1749
- block not assigned to this hash_link or
1750
- block assigned but not yet read from file,
1752
- block assigned with valid (changed or unchanged) data and
1753
- it will not be reassigned/freed.
1757
/* No block is assigned to the hash_link yet. */
1758
if (keycache->blocks_unused)
1760
if (keycache->free_block_list)
1762
/* There is a block in the free list. */
1763
block= keycache->free_block_list;
1764
keycache->free_block_list= block->next_used;
1765
block->next_used= NULL;
1769
/* There are some never used blocks, take first of them */
1770
assert(keycache->blocks_used <
1771
(uint32_t) keycache->disk_blocks);
1772
block= &keycache->block_root[keycache->blocks_used];
1773
block->buffer= ADD_TO_PTR(keycache->block_mem,
1774
((uint32_t) keycache->blocks_used*
1775
keycache->key_cache_block_size),
1777
keycache->blocks_used++;
1778
assert(!block->next_used);
1780
assert(!block->prev_used);
1781
assert(!block->next_changed);
1782
assert(!block->prev_changed);
1783
assert(!block->hash_link);
1784
assert(!block->status);
1785
assert(!block->requests);
1786
keycache->blocks_unused--;
1787
block->status= BLOCK_IN_USE;
1789
block->offset= keycache->key_cache_block_size;
1791
block->temperature= BLOCK_COLD;
1792
block->hits_left= init_hits_left;
1793
block->last_hit_time= 0;
1794
block->hash_link= hash_link;
1795
hash_link->block= block;
1796
link_to_file_list(keycache, block, file, 0);
1797
page_status= PAGE_TO_BE_READ;
1802
There are no free blocks and no never used blocks, use a block
1806
if (! keycache->used_last)
1809
The LRU ring is empty. Wait until a new block is added to
1810
it. Several threads might wait here for the same hash_link,
1811
all of them must get the same block. While waiting for a
1812
block, after a block is selected for this hash_link, other
1813
threads can run first before this one awakes. During this
1814
time interval other threads find this hash_link pointing to
1815
the block, which is still assigned to another hash_link. In
1816
this case the block is not marked BLOCK_IN_SWITCH yet, but
1817
it is marked BLOCK_IN_EVICTION.
1820
struct st_my_thread_var *thread= my_thread_var;
1821
thread->opt_info= (void *) hash_link;
1822
link_into_queue(&keycache->waiting_for_block, thread);
1825
keycache_pthread_cond_wait(&thread->suspend,
1826
&keycache->cache_lock);
1828
while (thread->next);
1829
thread->opt_info= NULL;
1830
/* Assert that block has a request registered. */
1831
assert(hash_link->block->requests);
1832
/* Assert that block is not in LRU ring. */
1833
assert(!hash_link->block->next_used);
1834
assert(!hash_link->block->prev_used);
1837
If we waited above, hash_link->block has been assigned by
1838
link_block(). Otherwise it is still NULL. In the latter case
1839
we need to grab a block from the LRU ring ourselves.
1841
block= hash_link->block;
1844
/* Select the last block from the LRU ring. */
1845
block= keycache->used_last->next_used;
1846
block->hits_left= init_hits_left;
1847
block->last_hit_time= 0;
1848
hash_link->block= block;
1850
Register a request on the block. This unlinks it from the
1851
LRU ring and protects it against eviction.
1853
assert(!block->requests);
1854
reg_requests(keycache, block,1);
1856
We do not need to set block->status|= BLOCK_IN_EVICTION here
1857
because we will set block->status|= BLOCK_IN_SWITCH
1858
immediately without releasing the lock in between. This does
1859
also support debugging. When looking at the block, one can
1860
see if the block has been selected by link_block() after the
1861
LRU ring was empty, or if it was grabbed directly from the
1862
LRU ring in this branch.
1867
If we had to wait above, there is a small chance that another
1868
thread grabbed this block for the same file block already. But
1869
in most cases the first condition is true.
1871
if (block->hash_link != hash_link &&
1872
! (block->status & BLOCK_IN_SWITCH) )
1874
/* this is a primary request for a new page */
1875
block->status|= BLOCK_IN_SWITCH;
1877
if (block->status & BLOCK_CHANGED)
1879
/* The block contains a dirty page - push it out of the cache */
1881
if (block->status & BLOCK_IN_FLUSH)
1884
The block is marked for flush. If we do not wait here,
1885
it could happen that we write the block, reassign it to
1886
another file block, then, before the new owner can read
1887
the new file block, the flusher writes the cache block
1888
(which still has the old contents) to the new file block!
1890
wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1891
&keycache->cache_lock);
1893
The block is marked BLOCK_IN_SWITCH. It should be left
1894
alone except for reading. No free, no write.
1896
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1897
assert(!(block->status & (BLOCK_REASSIGNED |
1899
BLOCK_FOR_UPDATE)));
1903
block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
1905
BLOCK_IN_EVICTION may be true or not. Other flags must
1908
assert((block->status & ~BLOCK_IN_EVICTION) ==
1909
(BLOCK_READ | BLOCK_IN_SWITCH |
1910
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1911
BLOCK_CHANGED | BLOCK_IN_USE));
1912
assert(block->hash_link);
1914
keycache_pthread_mutex_unlock(&keycache->cache_lock);
1916
The call is thread safe because only the current
1917
thread might change the block->hash_link value
1919
error= (pwrite(block->hash_link->file,
1920
block->buffer+block->offset,
1921
block->length - block->offset,
1922
block->hash_link->diskpos+ block->offset) == 0);
1923
keycache_pthread_mutex_lock(&keycache->cache_lock);
1925
/* Block status must not have changed. */
1926
assert((block->status & ~BLOCK_IN_EVICTION) ==
1927
(BLOCK_READ | BLOCK_IN_SWITCH |
1928
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1929
BLOCK_CHANGED | BLOCK_IN_USE));
1930
keycache->global_cache_write++;
1934
block->status|= BLOCK_REASSIGNED;
1936
The block comes from the LRU ring. It must have a hash_link
1939
assert(block->hash_link);
1940
if (block->hash_link)
1943
All pending requests for this page must be resubmitted.
1944
This must be done before waiting for readers. They could
1945
wait for the flush to complete. And we must also do it
1946
after the wait. Flushers might try to free the block while
1947
we wait. They would wait until the reassignment is
1948
complete. Also the block status must reflect the correct
1949
situation: The block is not changed nor in flush any more.
1950
Note that we must not change the BLOCK_CHANGED flag
1951
outside of link_to_file_list() so that it is always in the
1952
correct queue and the *blocks_changed counters are
1955
block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
1956
link_to_file_list(keycache, block, block->hash_link->file, 1);
1957
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1959
The block is still assigned to its old hash_link.
1960
Wait until all pending read requests
1961
for this page are executed
1962
(we could have avoided this waiting, if we had read
1963
a page in the cache in a sweep, without yielding control)
1965
wait_for_readers(keycache, block);
1966
assert(block->hash_link && block->hash_link->block == block &&
1967
block->prev_changed);
1968
/* The reader must not have been a writer. */
1969
assert(!(block->status & BLOCK_CHANGED));
1971
/* Wake flushers that might have found the block in between. */
1972
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1974
/* Remove the hash link for the old file block from the hash. */
1975
unlink_hash(keycache, block->hash_link);
1978
For sanity checks link_to_file_list() asserts that block
1979
and hash_link refer to each other. Hence we need to assign
1980
the hash_link first, but then we would not know if it was
1981
linked before. Hence we would not know if to unlink it. So
1982
unlink it here and call link_to_file_list(..., false).
1984
unlink_changed(block);
1986
block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
1988
block->offset= keycache->key_cache_block_size;
1989
block->hash_link= hash_link;
1990
link_to_file_list(keycache, block, file, 0);
1991
page_status= PAGE_TO_BE_READ;
1993
assert(block->hash_link->block == block);
1994
assert(hash_link->block->hash_link == hash_link);
1999
Either (block->hash_link == hash_link),
2000
or (block->status & BLOCK_IN_SWITCH).
2002
This is for secondary requests for a new file block only.
2003
Either it is already assigned to the new hash_link meanwhile
2004
(if we had to wait due to empty LRU), or it is already in
2005
eviction by another thread. Since this block has been
2006
grabbed from the LRU ring and attached to this hash_link,
2007
another thread cannot grab the same block from the LRU ring
2008
anymore. If the block is in eviction already, it must become
2009
attached to the same hash_link and as such destined for the
2012
page_status= (((block->hash_link == hash_link) &&
2013
(block->status & BLOCK_READ)) ?
2014
PAGE_READ : PAGE_WAIT_TO_BE_READ);
2021
Block is not NULL. This hash_link points to a block.
2023
- block not assigned to this hash_link (yet) or
2024
- block assigned but not yet read from file,
2026
- block assigned with valid (changed or unchanged) data and
2027
- it will not be reassigned/freed.
2029
The first condition means hash_link points to a block in
2030
eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2031
But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2032
link_block(). In both cases it is destined for this hash_link
2033
and its file block address. When this hash_link got its block
2034
address, the block was removed from the LRU ring and cannot be
2035
selected for eviction (for another hash_link) again.
2037
Register a request on the block. This is another protection
2040
assert(((block->hash_link != hash_link) &&
2041
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2042
((block->hash_link == hash_link) &&
2043
!(block->status & BLOCK_READ)) ||
2044
((block->status & BLOCK_READ) &&
2045
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2046
reg_requests(keycache, block, 1);
2047
page_status= (((block->hash_link == hash_link) &&
2048
(block->status & BLOCK_READ)) ?
2049
PAGE_READ : PAGE_WAIT_TO_BE_READ);
2053
assert(page_status != -1);
2054
/* Same assert basically, but be very sure. */
2056
/* Assert that block has a request and is not in LRU ring. */
2057
assert(block->requests);
2058
assert(!block->next_used);
2059
assert(!block->prev_used);
2060
/* Assert that we return the correct block. */
2061
assert((page_status == PAGE_WAIT_TO_BE_READ) ||
2062
((block->hash_link->file == file) &&
2063
(block->hash_link->diskpos == filepos)));
2064
*page_st=page_status;
2071
Read into a key cache block buffer from disk.
2076
keycache pointer to a key cache data structure
2077
block block to which buffer the data is to be read
2078
read_length size of data to be read
2079
min_length at least so much data must be read
2080
primary <-> the current thread will read the data
2086
The function either reads a page data from file to the block buffer,
2087
or waits until another thread reads it. What page to read is determined
2088
by a block parameter - reference to a hash link for this page.
2089
If an error occurs THE BLOCK_ERROR bit is set in the block status.
2090
We do not report error when the size of successfully read
2091
portion is less than read_length, but not less than min_length.
2094
static void read_block(KEY_CACHE *keycache,
2095
BLOCK_LINK *block, uint32_t read_length,
2096
uint32_t min_length, bool primary)
2098
uint32_t got_length;
2100
/* On entry cache_lock is locked */
2105
This code is executed only by threads that submitted primary
2106
requests. Until block->status contains BLOCK_READ, all other
2107
request for the block become secondary requests. For a primary
2108
request the block must be properly initialized.
2110
assert(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE));
2111
assert((block->length == 0));
2112
assert((block->offset == keycache->key_cache_block_size));
2113
assert((block->requests > 0));
2115
keycache->global_cache_read++;
2116
/* Page is not in buffer yet, is to be read from disk */
2117
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2119
Here other threads may step in and register as secondary readers.
2120
They will register in block->wqueue[COND_FOR_REQUESTED].
2122
got_length= pread(block->hash_link->file, block->buffer, read_length, block->hash_link->diskpos);
2123
keycache_pthread_mutex_lock(&keycache->cache_lock);
2125
The block can now have been marked for free (in case of
2126
FLUSH_RELEASE). Otherwise the state must be unchanged.
2128
assert(((block->status & ~(BLOCK_REASSIGNED |
2129
BLOCK_FOR_UPDATE)) == BLOCK_IN_USE));
2130
assert((block->length == 0));
2131
assert((block->offset == keycache->key_cache_block_size));
2132
assert((block->requests > 0));
2134
if (got_length < min_length)
2135
block->status|= BLOCK_ERROR;
2138
block->status|= BLOCK_READ;
2139
block->length= got_length;
2141
Do not set block->offset here. If this block is marked
2142
BLOCK_CHANGED later, we want to flush only the modified part. So
2143
only a writer may set block->offset down from
2144
keycache->key_cache_block_size.
2147
/* Signal that all pending requests for this page now can be processed */
2148
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2153
This code is executed only by threads that submitted secondary
2154
requests. At this point it could happen that the cache block is
2155
not yet assigned to the hash_link for the requested file block.
2156
But at awake from the wait this should be the case. Unfortunately
2157
we cannot assert this here because we do not know the hash_link
2158
for the requested file block nor the file and position. So we have
2159
to assert this in the caller.
2161
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
294
2167
Read a block of data from a cached file into a buffer;
361
2386
int key_cache_insert(KEY_CACHE *keycache,
362
int file, internal::my_off_t filepos, int level,
2387
File file, my_off_t filepos, int level,
363
2388
unsigned char *buff, uint32_t length)
371
assert (!keycache->key_cache_inited);
2392
if (keycache->key_cache_inited)
2394
/* Key cache is used */
2395
register BLOCK_LINK *block;
2396
uint32_t read_length;
2399
bool locked_and_incremented= false;
2402
When the keycache is once initialized, we use the cache_lock to
2403
reliably distinguish the cases of normal operation, resizing, and
2404
disabled cache. We always increment and decrement
2405
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2407
keycache_pthread_mutex_lock(&keycache->cache_lock);
2409
We do not load index data into a disabled cache nor into an
2412
if (!keycache->can_be_used || keycache->in_resize)
2414
/* Register the pseudo I/O for the next resize. */
2415
inc_counter_for_resize_op(keycache);
2416
locked_and_incremented= true;
2417
/* Loaded data may not always be aligned to cache blocks. */
2418
offset= (uint) (filepos % keycache->key_cache_block_size);
2419
/* Load data in key_cache_block_size increments. */
2422
/* Cache could be disabled or resizing in a later iteration. */
2423
if (!keycache->can_be_used || keycache->in_resize)
2425
/* Start loading at the beginning of the cache block. */
2427
/* Do not load beyond the end of the cache block. */
2428
read_length= length;
2429
set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2430
assert(read_length > 0);
2432
/* The block has been read by the caller already. */
2433
keycache->global_cache_read++;
2434
/* Request the cache block that matches file/pos. */
2435
keycache->global_cache_r_requests++;
2436
block= find_key_block(keycache, file, filepos, level, 0, &page_st);
2440
This happens only for requests submitted during key cache
2441
resize. The block is not in the cache and shall not go in.
2442
Stop loading index data.
2446
if (!(block->status & BLOCK_ERROR))
2448
if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2449
((page_st == PAGE_TO_BE_READ) &&
2450
(offset || (read_length < keycache->key_cache_block_size))))
2455
this is a secondary request for a block to be read into the
2456
cache. The block is in eviction. It is not yet assigned to
2457
the requested file block (It does not point to the right
2458
hash_link). So we cannot call remove_reader() on the block.
2459
And we cannot access the hash_link directly here. We need to
2460
wait until the assignment is complete. read_block() executes
2461
the correct wait when called with primary == false.
2465
this is a primary request for a block to be read into the
2466
cache and the supplied data does not fill the whole block.
2468
This function is called on behalf of a LOAD INDEX INTO CACHE
2469
statement, which is a read-only task and allows other
2470
readers. It is possible that a parallel running reader tries
2471
to access this block. If it needs more data than has been
2472
supplied here, it would report an error. To be sure that we
2473
have all data in the block that is available in the file, we
2474
read the block ourselves.
2476
Though reading again what the caller did read already is an
2477
expensive operation, we need to do this for correctness.
2479
read_block(keycache, block, keycache->key_cache_block_size,
2480
read_length + offset, (page_st == PAGE_TO_BE_READ));
2482
A secondary request must now have the block assigned to the
2483
requested file block. It does not hurt to check it for
2484
primary requests too.
2486
assert(keycache->can_be_used);
2487
assert(block->hash_link->file == file);
2488
assert(block->hash_link->diskpos == filepos);
2489
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2491
else if (page_st == PAGE_TO_BE_READ)
2494
This is a new block in the cache. If we come here, we have
2495
data for the whole block.
2497
assert(block->hash_link->requests);
2498
assert(block->status & BLOCK_IN_USE);
2499
assert((page_st == PAGE_TO_BE_READ) ||
2500
(block->status & BLOCK_READ));
2502
#if !defined(SERIALIZED_READ_FROM_CACHE)
2503
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2505
Here other threads may step in and register as secondary readers.
2506
They will register in block->wqueue[COND_FOR_REQUESTED].
2510
/* Copy data from buff */
2511
memcpy(block->buffer+offset, buff, (size_t) read_length);
2513
#if !defined(SERIALIZED_READ_FROM_CACHE)
2514
keycache_pthread_mutex_lock(&keycache->cache_lock);
2515
assert(block->status & BLOCK_IN_USE);
2516
assert((page_st == PAGE_TO_BE_READ) ||
2517
(block->status & BLOCK_READ));
2520
After the data is in the buffer, we can declare the block
2521
valid. Now other threads do not need to register as
2522
secondary readers any more. They can immediately access the
2525
block->status|= BLOCK_READ;
2526
block->length= read_length+offset;
2528
Do not set block->offset here. If this block is marked
2529
BLOCK_CHANGED later, we want to flush only the modified part. So
2530
only a writer may set block->offset down from
2531
keycache->key_cache_block_size.
2533
/* Signal all pending requests. */
2534
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2539
page_st == PAGE_READ. The block is in the buffer. All data
2540
must already be present. Blocks are always read with all
2541
data available on file. Assert that the block does not have
2542
less contents than the preloader supplies. If the caller has
2543
data beyond block->length, it means that a file write has
2544
been done while this block was in cache and not extended
2545
with the new data. If the condition is met, we can simply
2548
assert((page_st == PAGE_READ) &&
2549
(read_length + offset <= block->length));
2553
A secondary request must now have the block assigned to the
2554
requested file block. It does not hurt to check it for primary
2557
assert(block->hash_link->file == file);
2558
assert(block->hash_link->diskpos == filepos);
2559
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2560
} /* end of if (!(block->status & BLOCK_ERROR)) */
2563
remove_reader(block);
2566
Link the block into the LRU ring if it's the last submitted
2567
request for the block. This enables eviction for the block.
2569
unreg_request(keycache, block, 1);
2571
error= (block->status & BLOCK_ERROR);
2577
filepos+= read_length+offset;
2580
} while ((length-= read_length));
2583
if (locked_and_incremented)
2584
dec_counter_for_resize_op(keycache);
2585
keycache_pthread_mutex_unlock(&keycache->cache_lock);
405
2620
int key_cache_write(KEY_CACHE *keycache,
406
int file, internal::my_off_t filepos, int level,
2621
File file, my_off_t filepos, int level,
407
2622
unsigned char *buff, uint32_t length,
408
uint32_t block_length,
2623
uint32_t block_length __attribute__((unused)),
2626
bool locked_and_incremented= false;
415
2629
if (!dont_write)
2631
/* purecov: begin inspected */
417
2632
/* Not used in the server. */
418
2633
/* Force writing from buff into disk. */
2634
keycache->global_cache_w_requests++;
2635
keycache->global_cache_write++;
419
2636
if (pwrite(file, buff, length, filepos) == 0)
423
assert (!keycache->key_cache_inited);
2641
if (keycache->key_cache_inited)
2643
/* Key cache is used */
2644
register BLOCK_LINK *block;
2645
uint32_t read_length;
2650
When the key cache is once initialized, we use the cache_lock to
2651
reliably distinguish the cases of normal operation, resizing, and
2652
disabled cache. We always increment and decrement
2653
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2655
keycache_pthread_mutex_lock(&keycache->cache_lock);
2657
Cache resizing has two phases: Flushing and re-initializing. In
2658
the flush phase write requests can modify dirty blocks that are
2659
not yet in flush. Otherwise they are allowed to bypass the cache.
2660
find_key_block() returns NULL in both cases (clean blocks and
2663
After the flush phase new I/O requests must wait until the
2664
re-initialization is done. The re-initialization can be done only
2665
if no I/O request is in progress. The reason is that
2666
key_cache_block_size can change. With enabled cache I/O is done in
2667
chunks of key_cache_block_size. Every chunk tries to use a cache
2668
block first. If the block size changes in the middle, a block
2669
could be missed and data could be written below a cached block.
2671
while (keycache->in_resize && !keycache->resize_in_flush)
2672
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2673
/* Register the I/O for the next resize. */
2674
inc_counter_for_resize_op(keycache);
2675
locked_and_incremented= true;
2676
/* Requested data may not always be aligned to cache blocks. */
2677
offset= (uint) (filepos % keycache->key_cache_block_size);
2678
/* Write data in key_cache_block_size increments. */
2681
/* Cache could be disabled in a later iteration. */
2682
if (!keycache->can_be_used)
2684
/* Start writing at the beginning of the cache block. */
2686
/* Do not write beyond the end of the cache block. */
2687
read_length= length;
2688
set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2689
assert(read_length > 0);
2691
/* Request the cache block that matches file/pos. */
2692
keycache->global_cache_w_requests++;
2693
block= find_key_block(keycache, file, filepos, level, 1, &page_st);
2697
This happens only for requests submitted during key cache
2698
resize. The block is not in the cache and shall not go in.
2699
Write directly to file.
2703
/* Used in the server. */
2704
keycache->global_cache_write++;
2705
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2706
if (pwrite(file, (unsigned char*) buff, read_length, filepos + offset) == 0)
2708
keycache_pthread_mutex_lock(&keycache->cache_lock);
2713
Prevent block from flushing and from being selected for to be
2714
freed. This must be set when we release the cache_lock.
2715
However, we must not set the status of the block before it is
2716
assigned to this file/pos.
2718
if (page_st != PAGE_WAIT_TO_BE_READ)
2719
block->status|= BLOCK_FOR_UPDATE;
2721
We must read the file block first if it is not yet in the cache
2722
and we do not replace all of its contents.
2724
In cases where the cache block is big enough to contain (parts
2725
of) index blocks of different indexes, our request can be
2726
secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
2727
reading the file block. If the read completes after us, it
2728
overwrites our new contents with the old contents. So we have to
2729
wait for the other thread to complete the read of this block.
2730
read_block() takes care for the wait.
2732
if (!(block->status & BLOCK_ERROR) &&
2733
((page_st == PAGE_TO_BE_READ &&
2734
(offset || read_length < keycache->key_cache_block_size)) ||
2735
(page_st == PAGE_WAIT_TO_BE_READ)))
2737
read_block(keycache, block,
2738
offset + read_length >= keycache->key_cache_block_size?
2739
offset : keycache->key_cache_block_size,
2740
offset, (page_st == PAGE_TO_BE_READ));
2741
assert(keycache->can_be_used);
2742
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2744
Prevent block from flushing and from being selected for to be
2745
freed. This must be set when we release the cache_lock.
2746
Here we set it in case we could not set it above.
2748
block->status|= BLOCK_FOR_UPDATE;
2751
The block should always be assigned to the requested file block
2752
here. It need not be BLOCK_READ when overwriting the whole block.
2754
assert(block->hash_link->file == file);
2755
assert(block->hash_link->diskpos == filepos);
2756
assert(block->status & BLOCK_IN_USE);
2757
assert((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
2759
The block to be written must not be marked BLOCK_REASSIGNED.
2760
Otherwise it could be freed in dirty state or reused without
2761
another flush during eviction. It must also not be in flush.
2762
Otherwise the old contens may have been flushed already and
2763
the flusher could clear BLOCK_CHANGED without flushing the
2766
assert(!(block->status & BLOCK_REASSIGNED));
2768
while (block->status & BLOCK_IN_FLUSHWRITE)
2771
Another thread is flushing the block. It was dirty already.
2772
Wait until the block is flushed to file. Otherwise we could
2773
modify the buffer contents just while it is written to file.
2774
An unpredictable file block contents would be the result.
2775
While we wait, several things can happen to the block,
2776
including another flush. But the block cannot be reassigned to
2777
another hash_link until we release our request on it.
2779
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2780
assert(keycache->can_be_used);
2781
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2782
/* Still must not be marked for free. */
2783
assert(!(block->status & BLOCK_REASSIGNED));
2784
assert(block->hash_link && (block->hash_link->block == block));
2788
We could perhaps release the cache_lock during access of the
2789
data like in the other functions. Locks outside of the key cache
2790
assure that readers and a writer do not access the same range of
2791
data. Parallel accesses should happen only if the cache block
2792
contains multiple index block(fragment)s. So different parts of
2793
the buffer would be read/written. An attempt to flush during
2794
memcpy() is prevented with BLOCK_FOR_UPDATE.
2796
if (!(block->status & BLOCK_ERROR))
2798
#if !defined(SERIALIZED_READ_FROM_CACHE)
2799
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2801
memcpy(block->buffer+offset, buff, (size_t) read_length);
2803
#if !defined(SERIALIZED_READ_FROM_CACHE)
2804
keycache_pthread_mutex_lock(&keycache->cache_lock);
2810
/* Not used in the server. buff has been written to disk at start. */
2811
if ((block->status & BLOCK_CHANGED) &&
2812
(!offset && read_length >= keycache->key_cache_block_size))
2813
link_to_file_list(keycache, block, block->hash_link->file, 1);
2815
else if (! (block->status & BLOCK_CHANGED))
2816
link_to_changed_list(keycache, block);
2817
block->status|=BLOCK_READ;
2819
Allow block to be selected for to be freed. Since it is marked
2820
BLOCK_CHANGED too, it won't be selected for to be freed without
2823
block->status&= ~BLOCK_FOR_UPDATE;
2824
set_if_smaller(block->offset, offset);
2825
set_if_bigger(block->length, read_length+offset);
2827
/* Threads may be waiting for the changes to be complete. */
2828
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2831
If only a part of the cache block is to be replaced, and the
2832
rest has been read from file, then the cache lock has been
2833
released for I/O and it could be possible that another thread
2834
wants to evict or free the block and waits for it to be
2835
released. So we must not just decrement hash_link->requests, but
2836
also wake a waiting thread.
2838
remove_reader(block);
2841
Link the block into the LRU ring if it's the last submitted
2842
request for the block. This enables eviction for the block.
2844
unreg_request(keycache, block, 1);
2846
if (block->status & BLOCK_ERROR)
2854
filepos+= read_length+offset;
2857
} while ((length-= read_length));
425
2862
/* Key cache is not used */
428
2865
/* Used in the server. */
2866
keycache->global_cache_w_requests++;
2867
keycache->global_cache_write++;
2868
if (locked_and_incremented)
2869
keycache_pthread_mutex_unlock(&keycache->cache_lock);
429
2870
if (pwrite(file, (unsigned char*) buff, length, filepos) == 0)
2872
if (locked_and_incremented)
2873
keycache_pthread_mutex_lock(&keycache->cache_lock);
2877
if (locked_and_incremented)
2879
dec_counter_for_resize_op(keycache);
2880
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2891
keycache Pointer to a key cache data structure
2892
block Pointer to the block to free
2895
Remove reference to block from hash table.
2896
Remove block from the chain of clean blocks.
2897
Add block to the free list.
2900
Block must not be free (status == 0).
2901
Block must not be in free_block_list.
2902
Block must not be in the LRU ring.
2903
Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
2904
Block must not be in free (BLOCK_REASSIGNED).
2905
Block must not be in flush (BLOCK_IN_FLUSH).
2906
Block must not be dirty (BLOCK_CHANGED).
2907
Block must not be in changed_blocks (dirty) hash.
2908
Block must be in file_blocks (clean) hash.
2909
Block must refer to a hash_link.
2910
Block must have a request registered on it.
2913
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
2916
Assert that the block is not free already. And that it is in a clean
2917
state. Note that the block might just be assigned to a hash_link and
2918
not yet read (BLOCK_READ may not be set here). In this case a reader
2919
is registered in the hash_link and free_block() will wait for it
2922
assert((block->status & BLOCK_IN_USE) &&
2923
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2924
BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
2925
BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
2926
/* Assert that the block is in a file_blocks chain. */
2927
assert(block->prev_changed && *block->prev_changed == block);
2928
/* Assert that the block is not in the LRU ring. */
2929
assert(!block->next_used && !block->prev_used);
2931
IMHO the below condition (if()) makes no sense. I can't see how it
2932
could be possible that free_block() is entered with a NULL hash_link
2933
pointer. The only place where it can become NULL is in free_block()
2934
(or before its first use ever, but for those blocks free_block() is
2935
not called). I don't remove the conditional as it cannot harm, but
2936
place an assert to confirm my hypothesis. Eventually the
2937
condition (if()) can be removed.
2939
assert(block->hash_link && block->hash_link->block == block);
2940
if (block->hash_link)
2943
While waiting for readers to finish, new readers might request the
2944
block. But since we set block->status|= BLOCK_REASSIGNED, they
2945
will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
2948
block->status|= BLOCK_REASSIGNED;
2949
wait_for_readers(keycache, block);
2951
The block must not have been freed by another thread. Repeat some
2952
checks. An additional requirement is that it must be read now
2955
assert(block->hash_link && block->hash_link->block == block);
2956
assert((block->status & (BLOCK_READ | BLOCK_IN_USE |
2957
BLOCK_REASSIGNED)) &&
2958
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2959
BLOCK_IN_FLUSH | BLOCK_CHANGED |
2960
BLOCK_FOR_UPDATE)));
2961
assert(block->prev_changed && *block->prev_changed == block);
2962
assert(!block->prev_used);
2964
Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
2965
thread (through unreg_request() below), other threads must not see
2966
this flag. They could become confused.
2968
block->status&= ~BLOCK_REASSIGNED;
2970
Do not release the hash_link until the block is off all lists.
2971
At least not if we hand it over for eviction in unreg_request().
2976
Unregister the block request and link the block into the LRU ring.
2977
This enables eviction for the block. If the LRU ring was empty and
2978
threads are waiting for a block, then the block wil be handed over
2979
for eviction immediately. Otherwise we will unlink it from the LRU
2980
ring again, without releasing the lock in between. So decrementing
2981
the request counter and updating statistics are the only relevant
2982
operation in this case. Assert that there are no other requests
2985
assert(block->requests == 1);
2986
unreg_request(keycache, block, 0);
2988
Note that even without releasing the cache lock it is possible that
2989
the block is immediately selected for eviction by link_block() and
2990
thus not added to the LRU ring. In this case we must not touch the
2993
if (block->status & BLOCK_IN_EVICTION)
2996
/* Here the block must be in the LRU ring. Unlink it again. */
2997
assert(block->next_used && block->prev_used &&
2998
*block->prev_used == block);
2999
unlink_block(keycache, block);
3000
if (block->temperature == BLOCK_WARM)
3001
keycache->warm_blocks--;
3002
block->temperature= BLOCK_COLD;
3004
/* Remove from file_blocks hash. */
3005
unlink_changed(block);
3007
/* Remove reference to block from hash table. */
3008
unlink_hash(keycache, block->hash_link);
3009
block->hash_link= NULL;
3013
block->offset= keycache->key_cache_block_size;
3015
/* Enforced by unlink_changed(), but just to be sure. */
3016
assert(!block->next_changed && !block->prev_changed);
3017
/* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3018
assert(!block->next_used && !block->prev_used);
3019
/* Insert the free block in the free list. */
3020
block->next_used= keycache->free_block_list;
3021
keycache->free_block_list= block;
3022
/* Keep track of the number of currently unused blocks. */
3023
keycache->blocks_unused++;
3025
/* All pending requests for this page must be resubmitted. */
3026
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3030
static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3032
return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3033
((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3038
Flush a portion of changed blocks to disk,
3039
free used blocks if requested
3042
static int flush_cached_blocks(KEY_CACHE *keycache,
3043
File file, BLOCK_LINK **cache,
3045
enum flush_type type)
3049
uint32_t count= (uint) (end-cache);
3051
/* Don't lock the cache during the flush */
3052
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3054
As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3055
we are guarunteed no thread will change them
3057
my_qsort((unsigned char*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3059
keycache_pthread_mutex_lock(&keycache->cache_lock);
3061
Note: Do not break the loop. We have registered a request on every
3062
block in 'cache'. These must be unregistered by free_block() or
3065
for ( ; cache != end ; cache++)
3067
BLOCK_LINK *block= *cache;
3069
If the block contents is going to be changed, we abandon the flush
3070
for this block. flush_key_blocks_int() will restart its search and
3071
handle the block properly.
3073
if (!(block->status & BLOCK_FOR_UPDATE))
3075
/* Blocks coming here must have a certain status. */
3076
assert(block->hash_link);
3077
assert(block->hash_link->block == block);
3078
assert(block->hash_link->file == file);
3079
assert((block->status & ~BLOCK_IN_EVICTION) ==
3080
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3081
block->status|= BLOCK_IN_FLUSHWRITE;
3082
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3083
error= (pwrite(file,
3084
block->buffer+block->offset,
3085
block->length - block->offset,
3086
block->hash_link->diskpos+ block->offset) == 0);
3087
keycache_pthread_mutex_lock(&keycache->cache_lock);
3088
keycache->global_cache_write++;
3091
block->status|= BLOCK_ERROR;
3093
last_errno= errno ? errno : -1;
3095
block->status&= ~BLOCK_IN_FLUSHWRITE;
3096
/* Block must not have changed status except BLOCK_FOR_UPDATE. */
3097
assert(block->hash_link);
3098
assert(block->hash_link->block == block);
3099
assert(block->hash_link->file == file);
3100
assert((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3101
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3103
Set correct status and link in right queue for free or later use.
3104
free_block() must not see BLOCK_CHANGED and it may need to wait
3105
for readers of the block. These should not see the block in the
3106
wrong hash. If not freeing the block, we need to have it in the
3109
link_to_file_list(keycache, block, file, 1);
3112
block->status&= ~BLOCK_IN_FLUSH;
3114
Let to proceed for possible waiting requests to write to the block page.
3115
It might happen only during an operation to resize the key cache.
3117
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3118
/* type will never be FLUSH_IGNORE_CHANGED here */
3119
if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3120
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3124
Note that a request has been registered against the block in
3125
flush_key_blocks_int().
3127
free_block(keycache, block);
3132
Link the block into the LRU ring if it's the last submitted
3133
request for the block. This enables eviction for the block.
3134
Note that a request has been registered against the block in
3135
flush_key_blocks_int().
3137
unreg_request(keycache, block, 1);
3140
} /* end of for ( ; cache != end ; cache++) */
3146
flush all key blocks for a file to disk, but don't do any mutex locks.
3149
flush_key_blocks_int()
3150
keycache pointer to a key cache data structure
3151
file handler for the file to flush to
3152
flush_type type of the flush
3155
This function doesn't do any mutex locks because it needs to be called both
3156
from flush_key_blocks and flush_all_key_blocks (the later one does the
3157
mutex lock in the resize_key_cache() function).
3159
We do only care about changed blocks that exist when the function is
3160
entered. We do not guarantee that all changed blocks of the file are
3161
flushed if more blocks change while this function is running.
3168
static int flush_key_blocks_int(KEY_CACHE *keycache,
3169
File file, enum flush_type type)
3171
BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3176
if (keycache->disk_blocks > 0 &&
3177
(!my_disable_flush_key_blocks || type != FLUSH_KEEP))
3179
/* Key cache exists and flush is not disabled */
3181
uint32_t count= FLUSH_CACHE;
3182
BLOCK_LINK **pos,**end;
3183
BLOCK_LINK *first_in_switch= NULL;
3184
BLOCK_LINK *last_in_flush;
3185
BLOCK_LINK *last_for_update;
3186
BLOCK_LINK *block, *next;
3188
if (type != FLUSH_IGNORE_CHANGED)
3191
Count how many key blocks we have to cache to be able
3192
to flush all dirty pages with minimum seek moves
3195
for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3197
block= block->next_changed)
3199
if ((block->hash_link->file == file) &&
3200
!(block->status & BLOCK_IN_FLUSH))
3203
assert(count<= keycache->blocks_used);
3207
Allocate a new buffer only if its bigger than the one we have.
3208
Assure that we always have some entries for the case that new
3209
changed blocks appear while we need to wait for something.
3211
if ((count > FLUSH_CACHE) &&
3212
!(cache= (BLOCK_LINK**) malloc(sizeof(BLOCK_LINK*)*count)))
3215
After a restart there could be more changed blocks than now.
3216
So we should not let count become smaller than the fixed buffer.
3218
if (cache == cache_buff)
3222
/* Retrieve the blocks and write them to a buffer to be flushed */
3224
last_in_flush= NULL;
3225
last_for_update= NULL;
3226
end= (pos= cache)+count;
3227
for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3231
next= block->next_changed;
3232
if (block->hash_link->file == file)
3234
if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3237
Note: The special handling of BLOCK_IN_SWITCH is obsolete
3238
since we set BLOCK_IN_FLUSH if the eviction includes a
3239
flush. It can be removed in a later version.
3241
if (!(block->status & BLOCK_IN_SWITCH))
3244
We care only for the blocks for which flushing was not
3245
initiated by another thread and which are not in eviction.
3246
Registering a request on the block unlinks it from the LRU
3247
ring and protects against eviction.
3249
reg_requests(keycache, block, 1);
3250
if (type != FLUSH_IGNORE_CHANGED)
3252
/* It's not a temporary file */
3256
This should happen relatively seldom. Remove the
3257
request because we won't do anything with the block
3258
but restart and pick it again in the next iteration.
3260
unreg_request(keycache, block, 0);
3262
This happens only if there is not enough
3263
memory for the big block
3265
if ((error= flush_cached_blocks(keycache, file, cache,
3268
/* Do not loop infinitely trying to flush in vain. */
3269
if ((last_errno == error) && (++last_errcnt > 5))
3274
Restart the scan as some other thread might have changed
3275
the changed blocks chain: the blocks that were in switch
3276
state before the flush started have to be excluded
3281
Mark the block with BLOCK_IN_FLUSH in order not to let
3282
other threads to use it for new pages and interfere with
3283
our sequence of flushing dirty file pages. We must not
3284
set this flag before actually putting the block on the
3285
write burst array called 'cache'.
3287
block->status|= BLOCK_IN_FLUSH;
3288
/* Add block to the array for a write burst. */
3293
/* It's a temporary file */
3294
assert(!(block->status & BLOCK_REASSIGNED));
3297
free_block() must not be called with BLOCK_CHANGED. Note
3298
that we must not change the BLOCK_CHANGED flag outside of
3299
link_to_file_list() so that it is always in the correct
3300
queue and the *blocks_changed counters are correct.
3302
link_to_file_list(keycache, block, file, 1);
3303
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3305
/* A request has been registered against the block above. */
3306
free_block(keycache, block);
3311
Link the block into the LRU ring if it's the last
3312
submitted request for the block. This enables eviction
3313
for the block. A request has been registered against
3316
unreg_request(keycache, block, 1);
3323
Link the block into a list of blocks 'in switch'.
3325
WARNING: Here we introduce a place where a changed block
3326
is not in the changed_blocks hash! This is acceptable for
3327
a BLOCK_IN_SWITCH. Never try this for another situation.
3328
Other parts of the key cache code rely on changed blocks
3329
being in the changed_blocks hash.
3331
unlink_changed(block);
3332
link_changed(block, &first_in_switch);
3335
else if (type != FLUSH_KEEP)
3338
During the normal flush at end of statement (FLUSH_KEEP) we
3339
do not need to ensure that blocks in flush or update by
3340
other threads are flushed. They will be flushed by them
3341
later. In all other cases we must assure that we do not have
3342
any changed block of this file in the cache when this
3345
if (block->status & BLOCK_IN_FLUSH)
3347
/* Remember the last block found to be in flush. */
3348
last_in_flush= block;
3352
/* Remember the last block found to be selected for update. */
3353
last_for_update= block;
3360
if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
3362
/* Do not loop inifnitely trying to flush in vain. */
3363
if ((last_errno == error) && (++last_errcnt > 5))
3368
Do not restart here during the normal flush at end of statement
3369
(FLUSH_KEEP). We have now flushed at least all blocks that were
3370
changed when entering this function. In all other cases we must
3371
assure that we do not have any changed block of this file in the
3372
cache when this function returns.
3374
if (type != FLUSH_KEEP)
3380
There are no blocks to be flushed by this thread, but blocks in
3381
flush by other threads. Wait until one of the blocks is flushed.
3382
Re-check the condition for last_in_flush. We may have unlocked
3383
the cache_lock in flush_cached_blocks(). The state of the block
3386
if (last_in_flush->status & BLOCK_IN_FLUSH)
3387
wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3388
&keycache->cache_lock);
3389
/* Be sure not to lose a block. They may be flushed in random order. */
3392
if (last_for_update)
3395
There are no blocks to be flushed by this thread, but blocks for
3396
update by other threads. Wait until one of the blocks is updated.
3397
Re-check the condition for last_for_update. We may have unlocked
3398
the cache_lock in flush_cached_blocks(). The state of the block
3401
if (last_for_update->status & BLOCK_FOR_UPDATE)
3402
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3403
&keycache->cache_lock);
3404
/* The block is now changed. Flush it. */
3409
Wait until the list of blocks in switch is empty. The threads that
3410
are switching these blocks will relink them to clean file chains
3411
while we wait and thus empty the 'first_in_switch' chain.
3413
while (first_in_switch)
3415
wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3416
&keycache->cache_lock);
3418
Do not restart here. We have flushed all blocks that were
3419
changed when entering this function and were not marked for
3420
eviction. Other threads have now flushed all remaining blocks in
3421
the course of their eviction.
3425
if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3427
BLOCK_LINK *last_for_update= NULL;
3428
BLOCK_LINK *last_in_switch= NULL;
3429
uint32_t total_found= 0;
3433
Finally free all clean blocks for this file.
3434
During resize this may be run by two threads in parallel.
3439
for (block= keycache->file_blocks[FILE_HASH(file)] ;
3443
/* Remember the next block. After freeing we cannot get at it. */
3444
next= block->next_changed;
3446
/* Changed blocks cannot appear in the file_blocks hash. */
3447
assert(!(block->status & BLOCK_CHANGED));
3448
if (block->hash_link->file == file)
3450
/* We must skip blocks that will be changed. */
3451
if (block->status & BLOCK_FOR_UPDATE)
3453
last_for_update= block;
3458
We must not free blocks in eviction (BLOCK_IN_EVICTION |
3459
BLOCK_IN_SWITCH) or blocks intended to be freed
3462
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3465
struct st_hash_link *next_hash_link= NULL;
3466
my_off_t next_diskpos= 0;
3468
uint32_t next_status= 0;
3469
uint32_t hash_requests= 0;
3473
assert(found <= keycache->blocks_used);
3476
Register a request. This unlinks the block from the LRU
3477
ring and protects it against eviction. This is required
3480
reg_requests(keycache, block, 1);
3483
free_block() may need to wait for readers of the block.
3484
This is the moment where the other thread can move the
3485
'next' block from the chain. free_block() needs to wait
3486
if there are requests for the block pending.
3488
if (next && (hash_requests= block->hash_link->requests))
3490
/* Copy values from the 'next' block and its hash_link. */
3491
next_status= next->status;
3492
next_hash_link= next->hash_link;
3493
next_diskpos= next_hash_link->diskpos;
3494
next_file= next_hash_link->file;
3495
assert(next == next_hash_link->block);
3498
free_block(keycache, block);
3500
If we had to wait and the state of the 'next' block
3501
changed, break the inner loop. 'next' may no longer be
3502
part of the current chain.
3504
We do not want to break the loop after every free_block(),
3505
not even only after waits. The chain might be quite long
3506
and contain blocks for many files. Traversing it again and
3507
again to find more blocks for this file could become quite
3510
if (next && hash_requests &&
3511
((next_status != next->status) ||
3512
(next_hash_link != next->hash_link) ||
3513
(next_file != next_hash_link->file) ||
3514
(next_diskpos != next_hash_link->diskpos) ||
3515
(next != next_hash_link->block)))
3520
last_in_switch= block;
3523
} /* end for block in file_blocks */
3527
If any clean block has been found, we may have waited for it to
3528
become free. In this case it could be possible that another clean
3529
block became dirty. This is possible if the write request existed
3530
before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3536
To avoid an infinite loop, wait until one of the blocks marked
3537
for update is updated.
3539
if (last_for_update)
3541
/* We did not wait. Block must not have changed status. */
3542
assert(last_for_update->status & BLOCK_FOR_UPDATE);
3543
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3544
&keycache->cache_lock);
3549
To avoid an infinite loop wait until one of the blocks marked
3550
for eviction is switched.
3554
/* We did not wait. Block must not have changed status. */
3555
assert(last_in_switch->status & (BLOCK_IN_EVICTION |
3558
wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
3559
&keycache->cache_lock);
3563
} /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
3565
} /* if (keycache->disk_blocks > 0 */
3568
if (cache != cache_buff)
3569
free((unsigned char*) cache);
3571
errno=last_errno; /* Return first error */
3572
return(last_errno != 0);
438
3577
Flush all blocks for a file to disk
452
3591
int flush_key_blocks(KEY_CACHE *keycache,
453
int file, enum flush_type type)
457
assert (!keycache->key_cache_inited);
3592
File file, enum flush_type type)
3596
if (!keycache->key_cache_inited)
3599
keycache_pthread_mutex_lock(&keycache->cache_lock);
3600
/* While waiting for lock, keycache could have been ended. */
3601
if (keycache->disk_blocks > 0)
3603
inc_counter_for_resize_op(keycache);
3604
res= flush_key_blocks_int(keycache, file, type);
3605
dec_counter_for_resize_op(keycache);
3607
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3613
Flush all blocks in the key cache to disk.
3616
flush_all_key_blocks()
3617
keycache pointer to key cache root structure
3621
Flushing of the whole key cache is done in two phases.
3623
1. Flush all changed blocks, waiting for them if necessary. Loop
3624
until there is no changed block left in the cache.
3626
2. Free all clean blocks. Normally this means free all blocks. The
3627
changed blocks were flushed in phase 1 and became clean. However we
3628
may need to wait for blocks that are read by other threads. While we
3629
wait, a clean block could become changed if that operation started
3630
before the resize operation started. To be safe we must restart at
3633
When we can run through the changed_blocks and file_blocks hashes
3634
without finding a block any more, then we are done.
3636
Note that we hold keycache->cache_lock all the time unless we need
3637
to wait for something.
3644
static int flush_all_key_blocks(KEY_CACHE *keycache)
3647
uint32_t total_found;
3653
safe_mutex_assert_owner(&keycache->cache_lock);
3657
Phase1: Flush all changed blocks, waiting for them if necessary.
3658
Loop until there is no changed block left in the cache.
3663
/* Step over the whole changed_blocks hash array. */
3664
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3667
If an array element is non-empty, use the first block from its
3668
chain to find a file for flush. All changed blocks for this
3669
file are flushed. So the same block will not appear at this
3670
place again with the next iteration. New writes for blocks are
3671
not accepted during the flush. If multiple files share the
3672
same hash bucket, one of them will be flushed per iteration
3673
of the outer loop of phase 1.
3675
if ((block= keycache->changed_blocks[idx]))
3679
Flush dirty blocks but do not free them yet. They can be used
3680
for reading until all other blocks are flushed too.
3682
if (flush_key_blocks_int(keycache, block->hash_link->file,
3691
Phase 2: Free all clean blocks. Normally this means free all
3692
blocks. The changed blocks were flushed in phase 1 and became
3693
clean. However we may need to wait for blocks that are read by
3694
other threads. While we wait, a clean block could become changed
3695
if that operation started before the resize operation started. To
3696
be safe we must restart at phase 1.
3701
/* Step over the whole file_blocks hash array. */
3702
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3705
If an array element is non-empty, use the first block from its
3706
chain to find a file for flush. All blocks for this file are
3707
freed. So the same block will not appear at this place again
3708
with the next iteration. If multiple files share the
3709
same hash bucket, one of them will be flushed per iteration
3710
of the outer loop of phase 2.
3712
if ((block= keycache->file_blocks[idx]))
3716
if (flush_key_blocks_int(keycache, block->hash_link->file,
3725
If any clean block has been found, we may have waited for it to
3726
become free. In this case it could be possible that another clean
3727
block became dirty. This is possible if the write request existed
3728
before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
3730
} while (total_found);
3736
Reset the counters of a key cache.
3739
reset_key_cache_counters()
3740
name the name of a key cache
3741
key_cache pointer to the key kache to be reset
3744
This procedure is used by process_key_caches() to reset the counters of all
3745
currently used key caches, both the default one and the named ones.
3748
0 on success (always because it can't fail)
3751
int reset_key_cache_counters(const char *name __attribute__((unused)),
3752
KEY_CACHE *key_cache)
3754
if (!key_cache->key_cache_inited)
3758
key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
3759
key_cache->global_cache_r_requests= 0; /* Key_read_requests */
3760
key_cache->global_cache_read= 0; /* Key_reads */
3761
key_cache->global_cache_w_requests= 0; /* Key_write_requests */
3762
key_cache->global_cache_write= 0; /* Key_writes */
3766
#if defined(KEYCACHE_TIMEOUT)
3770
unsigned int hash_link_number(HASH_LINK *hash_link, KEY_CACHE *keycache)
3772
return ((unsigned int) (((char*)hash_link-(char *) keycache->hash_link_root)/
3773
sizeof(HASH_LINK)));
3777
unsigned int block_number(BLOCK_LINK *block, KEY_CACHE *keycache)
3779
return ((unsigned int) (((char*)block-(char *)keycache->block_root)/
3780
sizeof(BLOCK_LINK)));
3784
#define KEYCACHE_DUMP_FILE "keycache_dump.txt"
3785
#define MAX_QUEUE_LEN 100
3788
static void keycache_dump(KEY_CACHE *keycache)
3790
FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
3791
struct st_my_thread_var *last;
3792
struct st_my_thread_var *thread;
3794
HASH_LINK *hash_link;
3795
KEYCACHE_PAGE *page;
3798
fprintf(keycache_dump_file, "thread:%u\n", thread->id);
3801
thread=last=waiting_for_hash_link.last_thread;
3802
fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
3806
thread=thread->next;
3807
page= (KEYCACHE_PAGE *) thread->opt_info;
3808
fprintf(keycache_dump_file,
3809
"thread:%u, (file,filepos)=(%u,%lu)\n",
3810
thread->id,(uint) page->file,(uint32_t) page->filepos);
3811
if (++i == MAX_QUEUE_LEN)
3814
while (thread != last);
3817
thread=last=waiting_for_block.last_thread;
3818
fprintf(keycache_dump_file, "queue of threads waiting for block\n");
3822
thread=thread->next;
3823
hash_link= (HASH_LINK *) thread->opt_info;
3824
fprintf(keycache_dump_file,
3825
"thread:%u hash_link:%u (file,filepos)=(%u,%u)\n",
3826
thread->id, (uint) hash_link_number(hash_link, keycache),
3827
(uint) hash_link->file,(uint32_t) hash_link->diskpos);
3828
if (++i == MAX_QUEUE_LEN)
3831
while (thread != last);
3833
for (i=0 ; i< keycache->blocks_used ; i++)
3836
block= &keycache->block_root[i];
3837
hash_link= block->hash_link;
3838
fprintf(keycache_dump_file,
3839
"block:%u hash_link:%d status:%x #requests=%u "
3840
"waiting_for_readers:%d\n",
3841
i, (int) (hash_link ? hash_link_number(hash_link, keycache) : -1),
3842
block->status, block->requests, block->condvar ? 1 : 0);
3843
for (j=0 ; j < 2; j++)
3845
KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
3846
thread= last= wqueue->last_thread;
3847
fprintf(keycache_dump_file, "queue #%d\n", j);
3852
thread=thread->next;
3853
fprintf(keycache_dump_file,
3854
"thread:%u\n", thread->id);
3855
if (++i == MAX_QUEUE_LEN)
3858
while (thread != last);
3862
fprintf(keycache_dump_file, "LRU chain:");
3863
block= keycache= used_last;
3868
block= block->next_used;
3869
fprintf(keycache_dump_file,
3870
"block:%u, ", block_number(block, keycache));
3872
while (block != keycache->used_last);
3874
fprintf(keycache_dump_file, "\n");
3876
fclose(keycache_dump_file);
3879
static int keycache_pthread_cond_wait(pthread_cond_t *cond,
3880
pthread_mutex_t *mutex)
3883
struct timeval now; /* time when we started waiting */
3884
struct timespec timeout; /* timeout value for the wait function */
3887
/* Get current time */
3888
gettimeofday(&now, &tz);
3889
/* Prepare timeout value */
3890
timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
3892
timeval uses microseconds.
3893
timespec uses nanoseconds.
3894
1 nanosecond = 1000 micro seconds
3896
timeout.tv_nsec= now.tv_usec * 1000;
3897
rc= pthread_cond_timedwait(cond, mutex, &timeout);
3898
if (rc == ETIMEDOUT || rc == ETIME)
3903
assert(rc != ETIMEDOUT);
3906
#endif /* defined(KEYCACHE_TIMEOUT) */