247
272
size_t use_mem, uint32_t division_limit,
248
273
uint32_t age_threshold)
251
(void)key_cache_block_size;
253
(void)division_limit;
255
memset(keycache, 0, sizeof(KEY_CACHE));
275
uint32_t blocks, hash_links;
278
assert(key_cache_block_size >= 512);
280
if (keycache->key_cache_inited && keycache->disk_blocks > 0)
285
keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
286
keycache->global_cache_read= keycache->global_cache_write= 0;
287
keycache->disk_blocks= -1;
288
if (! keycache->key_cache_inited)
290
keycache->key_cache_inited= 1;
292
Initialize these variables once only.
293
Their value must survive re-initialization during resizing.
295
keycache->in_resize= 0;
296
keycache->resize_in_flush= 0;
297
keycache->cnt_for_resize_op= 0;
298
keycache->waiting_for_resize_cnt.last_thread= NULL;
299
keycache->in_init= 0;
300
pthread_mutex_init(&keycache->cache_lock, MY_MUTEX_INIT_FAST);
301
keycache->resize_queue.last_thread= NULL;
304
keycache->key_cache_mem_size= use_mem;
305
keycache->key_cache_block_size= key_cache_block_size;
307
blocks= (uint32_t) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
308
sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
309
/* It doesn't make sense to have too few blocks (less than 8) */
314
/* Set my_hash_entries to the next bigger 2 power */
315
if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
316
keycache->hash_entries<<= 1;
317
hash_links= 2 * blocks;
318
#if defined(MAX_THREADS)
319
if (hash_links < MAX_THREADS + blocks - 1)
320
hash_links= MAX_THREADS + blocks - 1;
322
while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
323
ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
324
ALIGN_SIZE(sizeof(HASH_LINK*) *
325
keycache->hash_entries))) +
326
((size_t) blocks * keycache->key_cache_block_size) > use_mem)
328
/* Allocate memory for cache page buffers */
329
if ((keycache->block_mem= (unsigned char *)malloc((size_t) blocks * keycache->key_cache_block_size)))
332
Allocate memory for blocks, hash_links and hash entries;
333
For each block 2 hash links are allocated
335
if ((keycache->block_root= (BLOCK_LINK*) malloc(length)))
337
free(keycache->block_mem);
338
keycache->block_mem= 0;
343
my_error(EE_OUTOFMEMORY, MYF(0), blocks * keycache->key_cache_block_size);
346
blocks= blocks / 4*3;
348
keycache->blocks_unused= blocks;
349
keycache->disk_blocks= (int) blocks;
350
keycache->hash_links= hash_links;
351
keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
352
ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
353
keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
354
ALIGN_SIZE((sizeof(HASH_LINK*) *
355
keycache->hash_entries)));
356
memset(keycache->block_root, 0,
357
keycache->disk_blocks * sizeof(BLOCK_LINK));
358
memset(keycache->hash_root, 0,
359
keycache->hash_entries * sizeof(HASH_LINK*));
360
memset(keycache->hash_link_root, 0,
361
keycache->hash_links * sizeof(HASH_LINK));
362
keycache->hash_links_used= 0;
363
keycache->free_hash_list= NULL;
364
keycache->blocks_used= keycache->blocks_changed= 0;
366
keycache->global_blocks_changed= 0;
367
keycache->blocks_available=0; /* For debugging */
369
/* The LRU chain is empty after initialization */
370
keycache->used_last= NULL;
371
keycache->used_ins= NULL;
372
keycache->free_block_list= NULL;
373
keycache->keycache_time= 0;
374
keycache->warm_blocks= 0;
375
keycache->min_warm_blocks= (division_limit ?
376
blocks * division_limit / 100 + 1 :
378
keycache->age_threshold= (age_threshold ?
379
blocks * age_threshold / 100 :
382
keycache->can_be_used= 1;
384
keycache->waiting_for_hash_link.last_thread= NULL;
385
keycache->waiting_for_block.last_thread= NULL;
386
memset(keycache->changed_blocks, 0,
387
sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
388
memset(keycache->file_blocks, 0,
389
sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
393
/* myisam_key_buffer_size is specified too small. Disable the cache. */
394
keycache->can_be_used= 0;
397
keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
398
return((int) keycache->disk_blocks);
402
keycache->disk_blocks= 0;
404
if (keycache->block_mem)
406
free(keycache->block_mem);
407
keycache->block_mem= NULL;
409
if (keycache->block_root)
411
free((unsigned char*) keycache->block_root);
412
keycache->block_root= NULL;
415
keycache->can_be_used= 0;
425
keycache pointer to a key cache data structure
426
key_cache_block_size size of blocks to keep cached data
427
use_mem total memory to use for the new key cache
428
division_limit new division limit (if not zero)
429
age_threshold new age threshold (if not zero)
432
number of blocks in the key cache, if successful,
436
The function first compares the memory size and the block size parameters
437
with the key cache values.
439
If they differ the function free the the memory allocated for the
440
old key cache blocks by calling the end_key_cache function and
441
then rebuilds the key cache with new blocks by calling
444
The function starts the operation only when all other threads
445
performing operations with the key cache let her to proceed
446
(when cnt_for_resize=0).
449
int resize_key_cache(KEY_CACHE *keycache, uint32_t key_cache_block_size,
450
size_t use_mem, uint32_t division_limit,
451
uint32_t age_threshold)
455
if (!keycache->key_cache_inited)
456
return(keycache->disk_blocks);
458
if(key_cache_block_size == keycache->key_cache_block_size &&
459
use_mem == keycache->key_cache_mem_size)
461
change_key_cache_param(keycache, division_limit, age_threshold);
462
return(keycache->disk_blocks);
465
keycache_pthread_mutex_lock(&keycache->cache_lock);
468
We may need to wait for another thread which is doing a resize
469
already. This cannot happen in the MySQL server though. It allows
470
one resizer only. In set_var.cc keycache->in_init is used to block
473
while (keycache->in_resize)
475
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
479
Mark the operation in progress. This blocks other threads from doing
480
a resize in parallel. It prohibits new blocks to enter the cache.
481
Read/write requests can bypass the cache during the flush phase.
483
keycache->in_resize= 1;
485
/* Need to flush only if keycache is enabled. */
486
if (keycache->can_be_used)
488
/* Start the flush phase. */
489
keycache->resize_in_flush= 1;
491
if (flush_all_key_blocks(keycache))
493
/* TODO: if this happens, we should write a warning in the log file ! */
494
keycache->resize_in_flush= 0;
496
keycache->can_be_used= 0;
500
/* End the flush phase. */
501
keycache->resize_in_flush= 0;
505
Some direct read/write operations (bypassing the cache) may still be
506
unfinished. Wait until they are done. If the key cache can be used,
507
direct I/O is done in increments of key_cache_block_size. That is,
508
every block is checked if it is in the cache. We need to wait for
509
pending I/O before re-initializing the cache, because we may change
510
the block size. Otherwise they could check for blocks at file
511
positions where the new block division has none. We do also want to
512
wait for I/O done when (if) the cache was disabled. It must not
513
run in parallel with normal cache operation.
515
while (keycache->cnt_for_resize_op)
516
wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
519
Free old cache structures, allocate new structures, and initialize
520
them. Note that the cache_lock mutex and the resize_queue are left
521
untouched. We do not lose the cache_lock and will release it only at
522
the end of this function.
524
end_key_cache(keycache, 0); /* Don't free mutex */
525
/* The following will work even if use_mem is 0 */
526
blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
527
division_limit, age_threshold);
531
Mark the resize finished. This allows other threads to start a
532
resize or to request new cache blocks.
534
keycache->in_resize= 0;
536
/* Signal waiting threads. */
537
release_whole_queue(&keycache->resize_queue);
539
keycache_pthread_mutex_unlock(&keycache->cache_lock);
282
608
void end_key_cache(KEY_CACHE *keycache, bool cleanup)
610
if (!keycache->key_cache_inited)
613
if (keycache->disk_blocks > 0)
615
if (keycache->block_mem)
617
free(keycache->block_mem);
618
keycache->block_mem= NULL;
619
free((unsigned char*) keycache->block_root);
620
keycache->block_root= NULL;
622
keycache->disk_blocks= -1;
623
/* Reset blocks_changed to be safe if flush_all_key_blocks is called */
624
keycache->blocks_changed= 0;
629
pthread_mutex_destroy(&keycache->cache_lock);
630
keycache->key_cache_inited= keycache->can_be_used= 0;
286
633
} /* end_key_cache */
637
Link a thread into double-linked queue of waiting threads.
641
wqueue pointer to the queue structure
642
thread pointer to the thread to be added to the queue
648
Queue is represented by a circular list of the thread structures
649
The list is double-linked of the type (**prev,*next), accessed by
650
a pointer to the last element.
653
static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
654
internal::st_my_thread_var *thread)
656
internal::st_my_thread_var *last;
658
assert(!thread->next && !thread->prev);
659
if (! (last= wqueue->last_thread))
662
thread->next= thread;
663
thread->prev= &thread->next;
667
thread->prev= last->next->prev;
668
last->next->prev= &thread->next;
669
thread->next= last->next;
672
wqueue->last_thread= thread;
676
Unlink a thread from double-linked queue of waiting threads
680
wqueue pointer to the queue structure
681
thread pointer to the thread to be removed from the queue
687
See NOTES for link_into_queue
690
static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
691
internal::st_my_thread_var *thread)
693
assert(thread->next && thread->prev);
694
if (thread->next == thread)
695
/* The queue contains only one member */
696
wqueue->last_thread= NULL;
699
thread->next->prev= thread->prev;
700
*thread->prev=thread->next;
701
if (wqueue->last_thread == thread)
702
wqueue->last_thread= STRUCT_PTR(internal::st_my_thread_var, next,
711
Add a thread to single-linked queue of waiting threads
715
wqueue Pointer to the queue structure.
716
mutex Cache_lock to acquire after awake.
722
Queue is represented by a circular list of the thread structures
723
The list is single-linked of the type (*next), accessed by a pointer
726
The function protects against stray signals by verifying that the
727
current thread is unlinked from the queue when awaking. However,
728
since several threads can wait for the same event, it might be
729
necessary for the caller of the function to check again if the
730
condition for awake is indeed matched.
733
static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
734
pthread_mutex_t *mutex)
736
internal::st_my_thread_var *last;
737
internal::st_my_thread_var *thread= my_thread_var;
740
assert(!thread->next);
741
assert(!thread->prev); /* Not required, but must be true anyway. */
742
if (! (last= wqueue->last_thread))
743
thread->next= thread;
746
thread->next= last->next;
749
wqueue->last_thread= thread;
752
Wait until thread is removed from queue by the signalling thread.
753
The loop protects against stray signals.
757
keycache_pthread_cond_wait(&thread->suspend, mutex);
759
while (thread->next);
764
Remove all threads from queue signaling them to proceed
767
release_whole_queue()
768
wqueue pointer to the queue structure
774
See notes for wait_on_queue().
775
When removed from the queue each thread is signaled via condition
776
variable thread->suspend.
779
static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
781
internal::st_my_thread_var *last;
782
internal::st_my_thread_var *next;
783
internal::st_my_thread_var *thread;
785
/* Queue may be empty. */
786
if (!(last= wqueue->last_thread))
793
/* Signal the thread. */
794
keycache_pthread_cond_signal(&thread->suspend);
795
/* Take thread from queue. */
799
while (thread != last);
801
/* Now queue is definitely empty. */
802
wqueue->last_thread= NULL;
807
Unlink a block from the chain of dirty/clean blocks
809
static void unlink_changed(BLOCK_LINK *block)
811
assert(block->prev_changed && *block->prev_changed == block);
812
if (block->next_changed)
813
block->next_changed->prev_changed= block->prev_changed;
814
*block->prev_changed= block->next_changed;
815
block->next_changed= NULL;
816
block->prev_changed= NULL;
821
Link a block into the chain of dirty/clean blocks
824
static void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
826
assert(!block->next_changed);
827
assert(!block->prev_changed);
828
block->prev_changed= phead;
829
if ((block->next_changed= *phead))
830
(*phead)->prev_changed= &block->next_changed;
836
Link a block in a chain of clean blocks of a file.
840
keycache Key cache handle
841
block Block to relink
842
file File to be linked to
843
unlink If to unlink first
846
Unlink a block from whichever chain it is linked in, if it's
847
asked for, and link it to the chain of clean blocks of the
851
Please do never set/clear BLOCK_CHANGED outside of
852
link_to_file_list() or link_to_changed_list().
853
You would risk to damage correct counting of changed blocks
854
and to find blocks in the wrong hash.
860
static void link_to_file_list(KEY_CACHE *keycache,
861
BLOCK_LINK *block, int file,
864
assert(block->status & BLOCK_IN_USE);
865
assert(block->hash_link && block->hash_link->block == block);
866
assert(block->hash_link->file == file);
868
unlink_changed(block);
869
link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
870
if (block->status & BLOCK_CHANGED)
872
block->status&= ~BLOCK_CHANGED;
873
keycache->blocks_changed--;
874
keycache->global_blocks_changed--;
880
Re-link a block from the clean chain to the dirty chain of a file.
883
link_to_changed_list()
884
keycache key cache handle
885
block block to relink
888
Unlink a block from the chain of clean blocks of a file
889
and link it to the chain of dirty blocks of the same file.
892
Please do never set/clear BLOCK_CHANGED outside of
893
link_to_file_list() or link_to_changed_list().
894
You would risk to damage correct counting of changed blocks
895
and to find blocks in the wrong hash.
901
static void link_to_changed_list(KEY_CACHE *keycache,
904
assert(block->status & BLOCK_IN_USE);
905
assert(!(block->status & BLOCK_CHANGED));
906
assert(block->hash_link && block->hash_link->block == block);
908
unlink_changed(block);
910
&keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
911
block->status|=BLOCK_CHANGED;
912
keycache->blocks_changed++;
913
keycache->global_blocks_changed++;
918
Link a block to the LRU chain at the beginning or at the end of
923
keycache pointer to a key cache data structure
924
block pointer to the block to link to the LRU chain
925
hot <-> to link the block into the hot subchain
926
at_end <-> to link the block at the end of the subchain
932
The LRU ring is represented by a circular list of block structures.
933
The list is double-linked of the type (**prev,*next) type.
934
The LRU ring is divided into two parts - hot and warm.
935
There are two pointers to access the last blocks of these two
936
parts. The beginning of the warm part follows right after the
938
Only blocks of the warm part can be used for eviction.
939
The first block from the beginning of this subchain is always
940
taken for eviction (keycache->last_used->next)
942
LRU chain: +------+ H O T +------+
943
+----| end |----...<----| beg |----+
944
| +------+last +------+ |
945
v<-link in latest hot (new end) |
946
| link in latest warm (new end)->^
947
| +------+ W A R M +------+ |
948
+----| beg |---->...----| end |----+
952
It is also possible that the block is selected for eviction and thus
953
not linked in the LRU ring.
956
static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, bool hot,
962
assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
963
assert(block->hash_link); /*backptr to block NULL from free_block()*/
964
assert(!block->requests);
965
assert(block->prev_changed && *block->prev_changed == block);
966
assert(!block->next_used);
967
assert(!block->prev_used);
968
if (!hot && keycache->waiting_for_block.last_thread)
970
/* Signal that in the LRU warm sub-chain an available block has appeared */
971
internal::st_my_thread_var *last_thread=
972
keycache->waiting_for_block.last_thread;
973
internal::st_my_thread_var *first_thread= last_thread->next;
974
internal::st_my_thread_var *next_thread= first_thread;
975
HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
976
internal::st_my_thread_var *thread;
980
next_thread= thread->next;
982
We notify about the event all threads that ask
983
for the same page as the first thread in the queue
985
if ((HASH_LINK *) thread->opt_info == hash_link)
987
keycache_pthread_cond_signal(&thread->suspend);
988
unlink_from_queue(&keycache->waiting_for_block, thread);
992
while (thread != last_thread);
993
hash_link->block= block;
995
NOTE: We assigned the block to the hash_link and signalled the
996
requesting thread(s). But it is possible that other threads runs
997
first. These threads see the hash_link assigned to a block which
998
is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
999
This can be a problem for functions that do not select the block
1000
via its hash_link: flush and free. They do only see a block which
1001
is in a "normal" state and don't know that it will be evicted soon.
1003
We cannot set BLOCK_IN_SWITCH here because only one of the
1004
requesting threads must handle the eviction. All others must wait
1005
for it to complete. If we set the flag here, the threads would not
1006
know who is in charge of the eviction. Without the flag, the first
1007
thread takes the stick and sets the flag.
1009
But we need to note in the block that is has been selected for
1010
eviction. It must not be freed. The evicting thread will not
1011
expect the block in the free list. Before freeing we could also
1012
check if block->requests > 1. But I think including another flag
1013
in the check of block->status is slightly more efficient and
1014
probably easier to read.
1016
block->status|= BLOCK_IN_EVICTION;
1019
pins= hot ? &keycache->used_ins : &keycache->used_last;
1023
ins->next_used->prev_used= &block->next_used;
1024
block->next_used= ins->next_used;
1025
block->prev_used= &ins->next_used;
1026
ins->next_used= block;
1032
/* The LRU ring is empty. Let the block point to itself. */
1033
keycache->used_last= keycache->used_ins= block->next_used= block;
1034
block->prev_used= &block->next_used;
1040
Unlink a block from the LRU chain
1044
keycache pointer to a key cache data structure
1045
block pointer to the block to unlink from the LRU chain
1051
See NOTES for link_block
1054
static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1056
assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1057
assert(block->hash_link); /*backptr to block NULL from free_block()*/
1058
assert(!block->requests);
1059
assert(block->prev_changed && *block->prev_changed == block);
1060
assert(block->next_used && block->prev_used &&
1061
(block->next_used->prev_used == &block->next_used) &&
1062
(*block->prev_used == block));
1063
if (block->next_used == block)
1064
/* The list contains only one member */
1065
keycache->used_last= keycache->used_ins= NULL;
1068
block->next_used->prev_used= block->prev_used;
1069
*block->prev_used= block->next_used;
1070
if (keycache->used_last == block)
1071
keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1072
if (keycache->used_ins == block)
1073
keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1075
block->next_used= NULL;
1076
block->prev_used= NULL;
1081
Register requests for a block.
1085
keycache Pointer to a key cache data structure.
1086
block Pointer to the block to register a request on.
1087
count Number of requests. Always 1.
1090
The first request unlinks the block from the LRU ring. This means
1091
that it is protected against eveiction.
1096
static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1098
assert(block->status & BLOCK_IN_USE);
1099
assert(block->hash_link);
1101
if (!block->requests)
1102
unlink_block(keycache, block);
1103
block->requests+=count;
1108
Unregister request for a block
1109
linking it to the LRU chain if it's the last request
1113
keycache pointer to a key cache data structure
1114
block pointer to the block to link to the LRU chain
1115
at_end <-> to link the block at the end of the LRU chain
1121
Every linking to the LRU ring decrements by one a special block
1122
counter (if it's positive). If the at_end parameter is true the block is
1123
added either at the end of warm sub-chain or at the end of hot sub-chain.
1124
It is added to the hot subchain if its counter is zero and number of
1125
blocks in warm sub-chain is not less than some low limit (determined by
1126
the division_limit parameter). Otherwise the block is added to the warm
1127
sub-chain. If the at_end parameter is false the block is always added
1128
at beginning of the warm sub-chain.
1129
Thus a warm block can be promoted to the hot sub-chain when its counter
1130
becomes zero for the first time.
1131
At the same time the block at the very beginning of the hot subchain
1132
might be moved to the beginning of the warm subchain if it stays untouched
1133
for a too long time (this time is determined by parameter age_threshold).
1135
It is also possible that the block is selected for eviction and thus
1136
not linked in the LRU ring.
1139
static void unreg_request(KEY_CACHE *keycache,
1140
BLOCK_LINK *block, int at_end)
1142
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1143
assert(block->hash_link); /*backptr to block NULL from free_block()*/
1144
assert(block->requests);
1145
assert(block->prev_changed && *block->prev_changed == block);
1146
assert(!block->next_used);
1147
assert(!block->prev_used);
1148
if (! --block->requests)
1151
if (block->hits_left)
1153
hot= !block->hits_left && at_end &&
1154
keycache->warm_blocks > keycache->min_warm_blocks;
1157
if (block->temperature == BLOCK_WARM)
1158
keycache->warm_blocks--;
1159
block->temperature= BLOCK_HOT;
1161
link_block(keycache, block, hot, (bool)at_end);
1162
block->last_hit_time= keycache->keycache_time;
1163
keycache->keycache_time++;
1165
At this place, the block might be in the LRU ring or not. If an
1166
evicter was waiting for a block, it was selected for eviction and
1167
not linked in the LRU ring.
1171
Check if we should link a hot block to the warm block sub-chain.
1172
It is possible that we select the same block as above. But it can
1173
also be another block. In any case a block from the LRU ring is
1174
selected. In other words it works even if the above block was
1175
selected for eviction and not linked in the LRU ring. Since this
1176
happens only if the LRU ring is empty, the block selected below
1177
would be NULL and the rest of the function skipped.
1179
block= keycache->used_ins;
1180
if (block && keycache->keycache_time - block->last_hit_time >
1181
keycache->age_threshold)
1183
unlink_block(keycache, block);
1184
link_block(keycache, block, 0, 0);
1185
if (block->temperature != BLOCK_WARM)
1187
keycache->warm_blocks++;
1188
block->temperature= BLOCK_WARM;
1195
Remove a reader of the page in block
1198
static void remove_reader(BLOCK_LINK *block)
1200
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1201
assert(block->hash_link && block->hash_link->block == block);
1202
assert(block->prev_changed && *block->prev_changed == block);
1203
assert(!block->next_used);
1204
assert(!block->prev_used);
1205
assert(block->hash_link->requests);
1206
if (! --block->hash_link->requests && block->condvar)
1207
keycache_pthread_cond_signal(block->condvar);
1212
Wait until the last reader of the page in block
1213
signals on its termination
1216
static void wait_for_readers(KEY_CACHE *keycache,
1219
internal::st_my_thread_var *thread= my_thread_var;
1220
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1221
assert(!(block->status & (BLOCK_ERROR | BLOCK_IN_FLUSH |
1223
assert(block->hash_link);
1224
assert(block->hash_link->block == block);
1225
/* Linked in file_blocks or changed_blocks hash. */
1226
assert(block->prev_changed && *block->prev_changed == block);
1227
/* Not linked in LRU ring. */
1228
assert(!block->next_used);
1229
assert(!block->prev_used);
1230
while (block->hash_link->requests)
1232
/* There must be no other waiter. We have no queue here. */
1233
assert(!block->condvar);
1234
block->condvar= &thread->suspend;
1235
keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1236
block->condvar= NULL;
290
1242
Add a hash link to a bucket in the hash_table
1256
Remove a hash link from the hash table
1259
static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1261
assert(hash_link->requests == 0);
1262
if ((*hash_link->prev= hash_link->next))
1263
hash_link->next->prev= hash_link->prev;
1264
hash_link->block= NULL;
1265
if (keycache->waiting_for_hash_link.last_thread)
1267
/* Signal that a free hash link has appeared */
1268
internal::st_my_thread_var *last_thread=
1269
keycache->waiting_for_hash_link.last_thread;
1270
internal::st_my_thread_var *first_thread= last_thread->next;
1271
internal::st_my_thread_var *next_thread= first_thread;
1272
KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1273
internal::st_my_thread_var *thread;
1275
hash_link->file= first_page->file;
1276
hash_link->diskpos= first_page->filepos;
1279
KEYCACHE_PAGE *page;
1280
thread= next_thread;
1281
page= (KEYCACHE_PAGE *) thread->opt_info;
1282
next_thread= thread->next;
1284
We notify about the event all threads that ask
1285
for the same page as the first thread in the queue
1287
if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1289
keycache_pthread_cond_signal(&thread->suspend);
1290
unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1293
while (thread != last_thread);
1294
link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1295
hash_link->diskpos)],
1299
hash_link->next= keycache->free_hash_list;
1300
keycache->free_hash_list= hash_link;
1305
Get the hash link for a page
1308
static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1309
int file, internal::my_off_t filepos)
1311
register HASH_LINK *hash_link, **start;
1315
Find the bucket in the hash table for the pair (file, filepos);
1316
start contains the head of the bucket list,
1317
hash_link points to the first member of the list
1319
hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1320
/* Look for an element for the pair (file, filepos) in the bucket chain */
1322
(hash_link->diskpos != filepos || hash_link->file != file))
1324
hash_link= hash_link->next;
1328
/* There is no hash link in the hash table for the pair (file, filepos) */
1329
if (keycache->free_hash_list)
1331
hash_link= keycache->free_hash_list;
1332
keycache->free_hash_list= hash_link->next;
1334
else if (keycache->hash_links_used < keycache->hash_links)
1336
hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1340
/* Wait for a free hash link */
1341
internal::st_my_thread_var *thread= my_thread_var;
1344
page.filepos= filepos;
1345
thread->opt_info= (void *) &page;
1346
link_into_queue(&keycache->waiting_for_hash_link, thread);
1347
keycache_pthread_cond_wait(&thread->suspend,
1348
&keycache->cache_lock);
1349
thread->opt_info= NULL;
1352
hash_link->file= file;
1353
hash_link->diskpos= filepos;
1354
link_hash(start, hash_link);
1356
/* Register the request for the page */
1357
hash_link->requests++;
1364
Get a block for the file page requested by a keycache read/write operation;
1365
If the page is not in the cache return a free block, if there is none
1366
return the lru block after saving its buffer if the page is dirty.
1371
keycache pointer to a key cache data structure
1372
file handler for the file to read page from
1373
filepos position of the page in the file
1374
init_hits_left how initialize the block counter for the page
1375
wrmode <-> get for writing
1376
page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1379
Pointer to the found block if successful, 0 - otherwise
1382
For the page from file positioned at filepos the function checks whether
1383
the page is in the key cache specified by the first parameter.
1384
If this is the case it immediately returns the block.
1385
If not, the function first chooses a block for this page. If there is
1386
no not used blocks in the key cache yet, the function takes the block
1387
at the very beginning of the warm sub-chain. It saves the page in that
1388
block if it's dirty before returning the pointer to it.
1389
The function returns in the page_st parameter the following values:
1390
PAGE_READ - if page already in the block,
1391
PAGE_TO_BE_READ - if it is to be read yet by the current thread
1392
WAIT_TO_BE_READ - if it is to be read by another thread
1393
If an error occurs THE BLOCK_ERROR bit is set in the block status.
1394
It might happen that there are no blocks in LRU chain (in warm part) -
1395
all blocks are unlinked for some read/write operations. Then the function
1396
waits until first of this operations links any block back.
1399
static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1400
int file, internal::my_off_t filepos,
1402
int wrmode, int *page_st)
1404
HASH_LINK *hash_link;
1411
If the flush phase of a resize operation fails, the cache is left
1412
unusable. This will be detected only after "goto restart".
1414
if (!keycache->can_be_used)
1418
Find the hash_link for the requested file block (file, filepos). We
1419
do always get a hash_link here. It has registered our request so
1420
that no other thread can use it for another file block until we
1421
release the request (which is done by remove_reader() usually). The
1422
hash_link can have a block assigned to it or not. If there is a
1423
block, it may be assigned to this hash_link or not. In cases where a
1424
block is evicted from the cache, it is taken from the LRU ring and
1425
referenced by the new hash_link. But the block can still be assigned
1426
to its old hash_link for some time if it needs to be flushed first,
1427
or if there are other threads still reading it.
1430
hash_link is always returned.
1431
hash_link->block can be:
1433
- not assigned to this hash_link or
1434
- assigned to this hash_link. If assigned, the block can have
1435
- invalid data (when freshly assigned) or
1436
- valid data. Valid data can be
1437
- changed over the file contents (dirty) or
1438
- not changed (clean).
1440
hash_link= get_hash_link(keycache, file, filepos);
1441
assert((hash_link->file == file) && (hash_link->diskpos == filepos));
1444
if ((block= hash_link->block) &&
1445
block->hash_link == hash_link && (block->status & BLOCK_READ))
1447
/* Assigned block with valid (changed or unchanged) contents. */
1448
page_status= PAGE_READ;
1451
else (page_status == -1)
1453
- block not assigned to this hash_link or
1454
- block assigned but not yet read from file (invalid data).
1457
if (keycache->in_resize)
1459
/* This is a request during a resize operation */
1463
internal::st_my_thread_var *thread;
1466
The file block is not in the cache. We don't need it in the
1467
cache: we are going to read or write directly to file. Cancel
1468
the request. We can simply decrement hash_link->requests because
1469
we did not release cache_lock since increasing it. So no other
1470
thread can wait for our request to become released.
1472
if (hash_link->requests == 1)
1475
We are the only one to request this hash_link (this file/pos).
1478
hash_link->requests--;
1479
unlink_hash(keycache, hash_link);
1484
More requests on the hash_link. Someone tries to evict a block
1485
for this hash_link (could have started before resizing started).
1486
This means that the LRU ring is empty. Otherwise a block could
1487
be assigned immediately. Behave like a thread that wants to
1488
evict a block for this file/pos. Add to the queue of threads
1489
waiting for a block. Wait until there is one assigned.
1491
Refresh the request on the hash-link so that it cannot be reused
1492
for another file/pos.
1494
thread= my_thread_var;
1495
thread->opt_info= (void *) hash_link;
1496
link_into_queue(&keycache->waiting_for_block, thread);
1499
keycache_pthread_cond_wait(&thread->suspend,
1500
&keycache->cache_lock);
1501
} while (thread->next);
1502
thread->opt_info= NULL;
1504
A block should now be assigned to the hash_link. But it may
1505
still need to be evicted. Anyway, we should re-check the
1506
situation. page_status must be set correctly.
1508
hash_link->requests--;
1510
} /* end of if (!block) */
1513
There is a block for this file/pos in the cache. Register a
1514
request on it. This unlinks it from the LRU ring (if it is there)
1515
and hence protects it against eviction (if not already in
1516
eviction). We need this for returning the block to the caller, for
1517
calling remove_reader() (for debugging purposes), and for calling
1518
free_block(). The only case where we don't need the request is if
1519
the block is in eviction. In that case we have to unregister the
1522
reg_requests(keycache, block, 1);
1524
if (page_status != PAGE_READ)
1527
- block not assigned to this hash_link or
1528
- block assigned but not yet read from file (invalid data).
1530
This must be a block in eviction. It will be read soon. We need
1531
to wait here until this happened. Otherwise the caller could
1532
access a wrong block or a block which is in read. While waiting
1533
we cannot lose hash_link nor block. We have registered a request
1534
on the hash_link. Everything can happen to the block but changes
1535
in the hash_link -> block relationship. In other words:
1536
everything can happen to the block but free or another completed
1539
Note that we bahave like a secondary requestor here. We just
1540
cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1541
read requests and writes on dirty blocks that are not in flush
1542
only. Waiting here on COND_FOR_REQUESTED works in all
1545
assert(((block->hash_link != hash_link) &&
1546
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1547
((block->hash_link == hash_link) &&
1548
!(block->status & BLOCK_READ)));
1549
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
1551
Here we can trust that the block has been assigned to this
1552
hash_link (block->hash_link == hash_link) and read into the
1553
buffer (BLOCK_READ). The worst things possible here are that the
1554
block is in free (BLOCK_REASSIGNED). But the block is still
1555
assigned to the hash_link. The freeing thread waits until we
1556
release our request on the hash_link. The block must not be
1557
again in eviction because we registered an request on it before
1560
assert(block->hash_link == hash_link);
1561
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1562
assert(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1565
The block is in the cache. Assigned to the hash_link. Valid data.
1566
Note that in case of page_st == PAGE_READ, the block can be marked
1567
for eviction. In any case it can be marked for freeing.
1572
/* A reader can just read the block. */
1573
*page_st= PAGE_READ;
1574
assert((hash_link->file == file) &&
1575
(hash_link->diskpos == filepos) &&
1576
(block->hash_link == hash_link));
1581
This is a writer. No two writers for the same block can exist.
1582
This must be assured by locks outside of the key cache.
1584
assert(!(block->status & BLOCK_FOR_UPDATE));
1586
while (block->status & BLOCK_IN_FLUSH)
1589
Wait until the block is flushed to file. Do not release the
1590
request on the hash_link yet to prevent that the block is freed
1591
or reassigned while we wait. While we wait, several things can
1592
happen to the block, including another flush. But the block
1593
cannot be reassigned to another hash_link until we release our
1594
request on it. But it can be marked BLOCK_REASSIGNED from free
1595
or eviction, while they wait for us to release the hash_link.
1597
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1599
If the flush phase failed, the resize could have finished while
1602
if (!keycache->in_resize)
1604
remove_reader(block);
1605
unreg_request(keycache, block, 1);
1608
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1609
assert(!(block->status & BLOCK_FOR_UPDATE));
1610
assert(block->hash_link == hash_link);
1613
if (block->status & BLOCK_CHANGED)
1616
We want to write a block with changed contents. If the cache
1617
block size is bigger than the callers block size (e.g. MyISAM),
1618
the caller may replace part of the block only. Changes of the
1619
other part of the block must be preserved. Since the block has
1620
not yet been selected for flush, we can still add our changes.
1622
*page_st= PAGE_READ;
1623
assert((hash_link->file == file) &&
1624
(hash_link->diskpos == filepos) &&
1625
(block->hash_link == hash_link));
1630
This is a write request for a clean block. We do not want to have
1631
new dirty blocks in the cache while resizing. We will free the
1632
block and write directly to file. If the block is in eviction or
1633
in free, we just let it go.
1635
Unregister from the hash_link. This must be done before freeing
1636
the block. And it must be done if not freeing the block. Because
1637
we could have waited above, we need to call remove_reader(). Other
1638
threads could wait for us to release our request on the hash_link.
1640
remove_reader(block);
1642
/* If the block is not in eviction and not in free, we can free it. */
1643
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1647
Free block as we are going to write directly to file.
1648
Although we have an exlusive lock for the updated key part,
1649
the control can be yielded by the current thread as we might
1650
have unfinished readers of other key parts in the block
1651
buffer. Still we are guaranteed not to have any readers
1652
of the key part we are writing into until the block is
1653
removed from the cache as we set the BLOCK_REASSIGNED
1654
flag (see the code below that handles reading requests).
1656
free_block(keycache, block);
1661
The block will be evicted/freed soon. Don't touch it in any way.
1662
Unregister the request that we registered above.
1664
unreg_request(keycache, block, 1);
1667
The block is still assigned to the hash_link (the file/pos that
1668
we are going to write to). Wait until the eviction/free is
1669
complete. Otherwise the direct write could complete before all
1670
readers are done with the block. So they could read outdated
1673
Since we released our request on the hash_link, it can be reused
1674
for another file/pos. Hence we cannot just check for
1675
block->hash_link == hash_link. As long as the resize is
1676
proceeding the block cannot be reassigned to the same file/pos
1677
again. So we can terminate the loop when the block is no longer
1678
assigned to this file/pos.
1682
wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1683
&keycache->cache_lock);
1685
If the flush phase failed, the resize could have finished
1686
while we waited here.
1688
if (!keycache->in_resize)
1690
} while (block->hash_link &&
1691
(block->hash_link->file == file) &&
1692
(block->hash_link->diskpos == filepos));
1697
if (page_status == PAGE_READ &&
1698
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1702
This is a request for a block to be removed from cache. The block
1703
is assigned to this hash_link and contains valid data, but is
1704
marked for eviction or to be freed. Possible reasons why it has
1705
not yet been evicted/freed can be a flush before reassignment
1706
(BLOCK_IN_SWITCH), readers of the block have not finished yet
1707
(BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1708
the block has been selected for it (BLOCK_IN_EVICTION).
1710
Only reading requests can proceed until the old dirty page is flushed,
1711
all others are to be suspended, then resubmitted
1713
if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1716
This is a read request and the block not yet reassigned. We can
1717
register our request and proceed. This unlinks the block from
1718
the LRU ring and protects it against eviction.
1720
reg_requests(keycache, block, 1);
1725
Either this is a write request for a block that is in eviction
1726
or in free. We must not use it any more. Instead we must evict
1727
another block. But we cannot do this before the eviction/free is
1728
done. Otherwise we would find the same hash_link + block again
1731
Or this is a read request for a block in eviction/free that does
1732
not require a flush, but waits for readers to finish with the
1733
block. We do not read this block to let the eviction/free happen
1734
as soon as possible. Again we must wait so that we don't find
1735
the same hash_link + block again and again.
1737
assert(hash_link->requests);
1738
hash_link->requests--;
1739
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1741
The block is no longer assigned to this hash_link.
1750
This is a request for a new block or for a block not to be removed.
1753
- block not assigned to this hash_link or
1754
- block assigned but not yet read from file,
1756
- block assigned with valid (changed or unchanged) data and
1757
- it will not be reassigned/freed.
1761
/* No block is assigned to the hash_link yet. */
1762
if (keycache->blocks_unused)
1764
if (keycache->free_block_list)
1766
/* There is a block in the free list. */
1767
block= keycache->free_block_list;
1768
keycache->free_block_list= block->next_used;
1769
block->next_used= NULL;
1773
/* There are some never used blocks, take first of them */
1774
assert(keycache->blocks_used <
1775
(uint32_t) keycache->disk_blocks);
1776
block= &keycache->block_root[keycache->blocks_used];
1777
block->buffer= ADD_TO_PTR(keycache->block_mem,
1778
((uint32_t) keycache->blocks_used*
1779
keycache->key_cache_block_size),
1781
keycache->blocks_used++;
1782
assert(!block->next_used);
1784
assert(!block->prev_used);
1785
assert(!block->next_changed);
1786
assert(!block->prev_changed);
1787
assert(!block->hash_link);
1788
assert(!block->status);
1789
assert(!block->requests);
1790
keycache->blocks_unused--;
1791
block->status= BLOCK_IN_USE;
1793
block->offset= keycache->key_cache_block_size;
1795
block->temperature= BLOCK_COLD;
1796
block->hits_left= init_hits_left;
1797
block->last_hit_time= 0;
1798
block->hash_link= hash_link;
1799
hash_link->block= block;
1800
link_to_file_list(keycache, block, file, 0);
1801
page_status= PAGE_TO_BE_READ;
1806
There are no free blocks and no never used blocks, use a block
1810
if (! keycache->used_last)
1813
The LRU ring is empty. Wait until a new block is added to
1814
it. Several threads might wait here for the same hash_link,
1815
all of them must get the same block. While waiting for a
1816
block, after a block is selected for this hash_link, other
1817
threads can run first before this one awakes. During this
1818
time interval other threads find this hash_link pointing to
1819
the block, which is still assigned to another hash_link. In
1820
this case the block is not marked BLOCK_IN_SWITCH yet, but
1821
it is marked BLOCK_IN_EVICTION.
1824
internal::st_my_thread_var *thread= my_thread_var;
1825
thread->opt_info= (void *) hash_link;
1826
link_into_queue(&keycache->waiting_for_block, thread);
1829
keycache_pthread_cond_wait(&thread->suspend,
1830
&keycache->cache_lock);
1832
while (thread->next);
1833
thread->opt_info= NULL;
1834
/* Assert that block has a request registered. */
1835
assert(hash_link->block->requests);
1836
/* Assert that block is not in LRU ring. */
1837
assert(!hash_link->block->next_used);
1838
assert(!hash_link->block->prev_used);
1841
If we waited above, hash_link->block has been assigned by
1842
link_block(). Otherwise it is still NULL. In the latter case
1843
we need to grab a block from the LRU ring ourselves.
1845
block= hash_link->block;
1848
/* Select the last block from the LRU ring. */
1849
block= keycache->used_last->next_used;
1850
block->hits_left= init_hits_left;
1851
block->last_hit_time= 0;
1852
hash_link->block= block;
1854
Register a request on the block. This unlinks it from the
1855
LRU ring and protects it against eviction.
1857
assert(!block->requests);
1858
reg_requests(keycache, block,1);
1860
We do not need to set block->status|= BLOCK_IN_EVICTION here
1861
because we will set block->status|= BLOCK_IN_SWITCH
1862
immediately without releasing the lock in between. This does
1863
also support debugging. When looking at the block, one can
1864
see if the block has been selected by link_block() after the
1865
LRU ring was empty, or if it was grabbed directly from the
1866
LRU ring in this branch.
1871
If we had to wait above, there is a small chance that another
1872
thread grabbed this block for the same file block already. But
1873
in most cases the first condition is true.
1875
if (block->hash_link != hash_link &&
1876
! (block->status & BLOCK_IN_SWITCH) )
1878
/* this is a primary request for a new page */
1879
block->status|= BLOCK_IN_SWITCH;
1881
if (block->status & BLOCK_CHANGED)
1883
/* The block contains a dirty page - push it out of the cache */
1885
if (block->status & BLOCK_IN_FLUSH)
1888
The block is marked for flush. If we do not wait here,
1889
it could happen that we write the block, reassign it to
1890
another file block, then, before the new owner can read
1891
the new file block, the flusher writes the cache block
1892
(which still has the old contents) to the new file block!
1894
wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1895
&keycache->cache_lock);
1897
The block is marked BLOCK_IN_SWITCH. It should be left
1898
alone except for reading. No free, no write.
1900
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1901
assert(!(block->status & (BLOCK_REASSIGNED |
1903
BLOCK_FOR_UPDATE)));
1907
block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
1909
BLOCK_IN_EVICTION may be true or not. Other flags must
1912
assert((block->status & ~BLOCK_IN_EVICTION) ==
1913
(BLOCK_READ | BLOCK_IN_SWITCH |
1914
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1915
BLOCK_CHANGED | BLOCK_IN_USE));
1916
assert(block->hash_link);
1918
keycache_pthread_mutex_unlock(&keycache->cache_lock);
1920
The call is thread safe because only the current
1921
thread might change the block->hash_link value
1923
error= (pwrite(block->hash_link->file,
1924
block->buffer+block->offset,
1925
block->length - block->offset,
1926
block->hash_link->diskpos+ block->offset) == 0);
1927
keycache_pthread_mutex_lock(&keycache->cache_lock);
1929
/* Block status must not have changed. */
1930
assert((block->status & ~BLOCK_IN_EVICTION) ==
1931
(BLOCK_READ | BLOCK_IN_SWITCH |
1932
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1933
BLOCK_CHANGED | BLOCK_IN_USE));
1934
keycache->global_cache_write++;
1938
block->status|= BLOCK_REASSIGNED;
1940
The block comes from the LRU ring. It must have a hash_link
1943
assert(block->hash_link);
1944
if (block->hash_link)
1947
All pending requests for this page must be resubmitted.
1948
This must be done before waiting for readers. They could
1949
wait for the flush to complete. And we must also do it
1950
after the wait. Flushers might try to free the block while
1951
we wait. They would wait until the reassignment is
1952
complete. Also the block status must reflect the correct
1953
situation: The block is not changed nor in flush any more.
1954
Note that we must not change the BLOCK_CHANGED flag
1955
outside of link_to_file_list() so that it is always in the
1956
correct queue and the *blocks_changed counters are
1959
block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
1960
link_to_file_list(keycache, block, block->hash_link->file, 1);
1961
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1963
The block is still assigned to its old hash_link.
1964
Wait until all pending read requests
1965
for this page are executed
1966
(we could have avoided this waiting, if we had read
1967
a page in the cache in a sweep, without yielding control)
1969
wait_for_readers(keycache, block);
1970
assert(block->hash_link && block->hash_link->block == block &&
1971
block->prev_changed);
1972
/* The reader must not have been a writer. */
1973
assert(!(block->status & BLOCK_CHANGED));
1975
/* Wake flushers that might have found the block in between. */
1976
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1978
/* Remove the hash link for the old file block from the hash. */
1979
unlink_hash(keycache, block->hash_link);
1982
For sanity checks link_to_file_list() asserts that block
1983
and hash_link refer to each other. Hence we need to assign
1984
the hash_link first, but then we would not know if it was
1985
linked before. Hence we would not know if to unlink it. So
1986
unlink it here and call link_to_file_list(..., false).
1988
unlink_changed(block);
1990
block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
1992
block->offset= keycache->key_cache_block_size;
1993
block->hash_link= hash_link;
1994
link_to_file_list(keycache, block, file, 0);
1995
page_status= PAGE_TO_BE_READ;
1997
assert(block->hash_link->block == block);
1998
assert(hash_link->block->hash_link == hash_link);
2003
Either (block->hash_link == hash_link),
2004
or (block->status & BLOCK_IN_SWITCH).
2006
This is for secondary requests for a new file block only.
2007
Either it is already assigned to the new hash_link meanwhile
2008
(if we had to wait due to empty LRU), or it is already in
2009
eviction by another thread. Since this block has been
2010
grabbed from the LRU ring and attached to this hash_link,
2011
another thread cannot grab the same block from the LRU ring
2012
anymore. If the block is in eviction already, it must become
2013
attached to the same hash_link and as such destined for the
2016
page_status= (((block->hash_link == hash_link) &&
2017
(block->status & BLOCK_READ)) ?
2018
PAGE_READ : PAGE_WAIT_TO_BE_READ);
2025
Block is not NULL. This hash_link points to a block.
2027
- block not assigned to this hash_link (yet) or
2028
- block assigned but not yet read from file,
2030
- block assigned with valid (changed or unchanged) data and
2031
- it will not be reassigned/freed.
2033
The first condition means hash_link points to a block in
2034
eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2035
But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2036
link_block(). In both cases it is destined for this hash_link
2037
and its file block address. When this hash_link got its block
2038
address, the block was removed from the LRU ring and cannot be
2039
selected for eviction (for another hash_link) again.
2041
Register a request on the block. This is another protection
2044
assert(((block->hash_link != hash_link) &&
2045
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2046
((block->hash_link == hash_link) &&
2047
!(block->status & BLOCK_READ)) ||
2048
((block->status & BLOCK_READ) &&
2049
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2050
reg_requests(keycache, block, 1);
2051
page_status= (((block->hash_link == hash_link) &&
2052
(block->status & BLOCK_READ)) ?
2053
PAGE_READ : PAGE_WAIT_TO_BE_READ);
2057
assert(page_status != -1);
2058
/* Same assert basically, but be very sure. */
2060
/* Assert that block has a request and is not in LRU ring. */
2061
assert(block->requests);
2062
assert(!block->next_used);
2063
assert(!block->prev_used);
2064
/* Assert that we return the correct block. */
2065
assert((page_status == PAGE_WAIT_TO_BE_READ) ||
2066
((block->hash_link->file == file) &&
2067
(block->hash_link->diskpos == filepos)));
2068
*page_st=page_status;
2075
Read into a key cache block buffer from disk.
2080
keycache pointer to a key cache data structure
2081
block block to which buffer the data is to be read
2082
read_length size of data to be read
2083
min_length at least so much data must be read
2084
primary <-> the current thread will read the data
2090
The function either reads a page data from file to the block buffer,
2091
or waits until another thread reads it. What page to read is determined
2092
by a block parameter - reference to a hash link for this page.
2093
If an error occurs THE BLOCK_ERROR bit is set in the block status.
2094
We do not report error when the size of successfully read
2095
portion is less than read_length, but not less than min_length.
2098
static void read_block(KEY_CACHE *keycache,
2099
BLOCK_LINK *block, uint32_t read_length,
2100
uint32_t min_length, bool primary)
2102
uint32_t got_length;
2104
/* On entry cache_lock is locked */
2109
This code is executed only by threads that submitted primary
2110
requests. Until block->status contains BLOCK_READ, all other
2111
request for the block become secondary requests. For a primary
2112
request the block must be properly initialized.
2114
assert(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE));
2115
assert((block->length == 0));
2116
assert((block->offset == keycache->key_cache_block_size));
2117
assert((block->requests > 0));
2119
keycache->global_cache_read++;
2120
/* Page is not in buffer yet, is to be read from disk */
2121
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2123
Here other threads may step in and register as secondary readers.
2124
They will register in block->wqueue[COND_FOR_REQUESTED].
2126
got_length= pread(block->hash_link->file, block->buffer, read_length, block->hash_link->diskpos);
2127
keycache_pthread_mutex_lock(&keycache->cache_lock);
2129
The block can now have been marked for free (in case of
2130
FLUSH_RELEASE). Otherwise the state must be unchanged.
2132
assert(((block->status & ~(BLOCK_REASSIGNED |
2133
BLOCK_FOR_UPDATE)) == BLOCK_IN_USE));
2134
assert((block->length == 0));
2135
assert((block->offset == keycache->key_cache_block_size));
2136
assert((block->requests > 0));
2138
if (got_length < min_length)
2139
block->status|= BLOCK_ERROR;
2142
block->status|= BLOCK_READ;
2143
block->length= got_length;
2145
Do not set block->offset here. If this block is marked
2146
BLOCK_CHANGED later, we want to flush only the modified part. So
2147
only a writer may set block->offset down from
2148
keycache->key_cache_block_size.
2151
/* Signal that all pending requests for this page now can be processed */
2152
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2157
This code is executed only by threads that submitted secondary
2158
requests. At this point it could happen that the cache block is
2159
not yet assigned to the hash_link for the requested file block.
2160
But at awake from the wait this should be the case. Unfortunately
2161
we cannot assert this here because we do not know the hash_link
2162
for the requested file block nor the file and position. So we have
2163
to assert this in the caller.
2165
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
304
2171
Read a block of data from a cached file into a buffer;
377
2393
int file, internal::my_off_t filepos, int level,
378
2394
unsigned char *buff, uint32_t length)
386
assert (!keycache->key_cache_inited);
2398
if (keycache->key_cache_inited)
2400
/* Key cache is used */
2401
register BLOCK_LINK *block;
2402
uint32_t read_length;
2405
bool locked_and_incremented= false;
2408
When the keycache is once initialized, we use the cache_lock to
2409
reliably distinguish the cases of normal operation, resizing, and
2410
disabled cache. We always increment and decrement
2411
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2413
keycache_pthread_mutex_lock(&keycache->cache_lock);
2415
We do not load index data into a disabled cache nor into an
2418
if (!keycache->can_be_used || keycache->in_resize)
2420
/* Register the pseudo I/O for the next resize. */
2421
inc_counter_for_resize_op(keycache);
2422
locked_and_incremented= true;
2423
/* Loaded data may not always be aligned to cache blocks. */
2424
offset= (uint) (filepos % keycache->key_cache_block_size);
2425
/* Load data in key_cache_block_size increments. */
2428
/* Cache could be disabled or resizing in a later iteration. */
2429
if (!keycache->can_be_used || keycache->in_resize)
2431
/* Start loading at the beginning of the cache block. */
2433
/* Do not load beyond the end of the cache block. */
2434
read_length= length;
2435
set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2436
assert(read_length > 0);
2438
/* The block has been read by the caller already. */
2439
keycache->global_cache_read++;
2440
/* Request the cache block that matches file/pos. */
2441
keycache->global_cache_r_requests++;
2442
block= find_key_block(keycache, file, filepos, level, 0, &page_st);
2446
This happens only for requests submitted during key cache
2447
resize. The block is not in the cache and shall not go in.
2448
Stop loading index data.
2452
if (!(block->status & BLOCK_ERROR))
2454
if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2455
((page_st == PAGE_TO_BE_READ) &&
2456
(offset || (read_length < keycache->key_cache_block_size))))
2461
this is a secondary request for a block to be read into the
2462
cache. The block is in eviction. It is not yet assigned to
2463
the requested file block (It does not point to the right
2464
hash_link). So we cannot call remove_reader() on the block.
2465
And we cannot access the hash_link directly here. We need to
2466
wait until the assignment is complete. read_block() executes
2467
the correct wait when called with primary == false.
2471
this is a primary request for a block to be read into the
2472
cache and the supplied data does not fill the whole block.
2474
This function is called on behalf of a LOAD INDEX INTO CACHE
2475
statement, which is a read-only task and allows other
2476
readers. It is possible that a parallel running reader tries
2477
to access this block. If it needs more data than has been
2478
supplied here, it would report an error. To be sure that we
2479
have all data in the block that is available in the file, we
2480
read the block ourselves.
2482
Though reading again what the caller did read already is an
2483
expensive operation, we need to do this for correctness.
2485
read_block(keycache, block, keycache->key_cache_block_size,
2486
read_length + offset, (page_st == PAGE_TO_BE_READ));
2488
A secondary request must now have the block assigned to the
2489
requested file block. It does not hurt to check it for
2490
primary requests too.
2492
assert(keycache->can_be_used);
2493
assert(block->hash_link->file == file);
2494
assert(block->hash_link->diskpos == filepos);
2495
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2497
else if (page_st == PAGE_TO_BE_READ)
2500
This is a new block in the cache. If we come here, we have
2501
data for the whole block.
2503
assert(block->hash_link->requests);
2504
assert(block->status & BLOCK_IN_USE);
2505
assert((page_st == PAGE_TO_BE_READ) ||
2506
(block->status & BLOCK_READ));
2508
#if !defined(SERIALIZED_READ_FROM_CACHE)
2509
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2511
Here other threads may step in and register as secondary readers.
2512
They will register in block->wqueue[COND_FOR_REQUESTED].
2516
/* Copy data from buff */
2517
memcpy(block->buffer+offset, buff, (size_t) read_length);
2519
#if !defined(SERIALIZED_READ_FROM_CACHE)
2520
keycache_pthread_mutex_lock(&keycache->cache_lock);
2521
assert(block->status & BLOCK_IN_USE);
2522
assert((page_st == PAGE_TO_BE_READ) ||
2523
(block->status & BLOCK_READ));
2526
After the data is in the buffer, we can declare the block
2527
valid. Now other threads do not need to register as
2528
secondary readers any more. They can immediately access the
2531
block->status|= BLOCK_READ;
2532
block->length= read_length+offset;
2534
Do not set block->offset here. If this block is marked
2535
BLOCK_CHANGED later, we want to flush only the modified part. So
2536
only a writer may set block->offset down from
2537
keycache->key_cache_block_size.
2539
/* Signal all pending requests. */
2540
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2545
page_st == PAGE_READ. The block is in the buffer. All data
2546
must already be present. Blocks are always read with all
2547
data available on file. Assert that the block does not have
2548
less contents than the preloader supplies. If the caller has
2549
data beyond block->length, it means that a file write has
2550
been done while this block was in cache and not extended
2551
with the new data. If the condition is met, we can simply
2554
assert((page_st == PAGE_READ) &&
2555
(read_length + offset <= block->length));
2559
A secondary request must now have the block assigned to the
2560
requested file block. It does not hurt to check it for primary
2563
assert(block->hash_link->file == file);
2564
assert(block->hash_link->diskpos == filepos);
2565
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2566
} /* end of if (!(block->status & BLOCK_ERROR)) */
2569
remove_reader(block);
2572
Link the block into the LRU ring if it's the last submitted
2573
request for the block. This enables eviction for the block.
2575
unreg_request(keycache, block, 1);
2577
error= (block->status & BLOCK_ERROR);
2583
filepos+= read_length+offset;
2586
} while ((length-= read_length));
2589
if (locked_and_incremented)
2590
dec_counter_for_resize_op(keycache);
2591
keycache_pthread_mutex_unlock(&keycache->cache_lock);
440
assert (!keycache->key_cache_inited);
2646
if (keycache->key_cache_inited)
2648
/* Key cache is used */
2649
register BLOCK_LINK *block;
2650
uint32_t read_length;
2655
When the key cache is once initialized, we use the cache_lock to
2656
reliably distinguish the cases of normal operation, resizing, and
2657
disabled cache. We always increment and decrement
2658
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2660
keycache_pthread_mutex_lock(&keycache->cache_lock);
2662
Cache resizing has two phases: Flushing and re-initializing. In
2663
the flush phase write requests can modify dirty blocks that are
2664
not yet in flush. Otherwise they are allowed to bypass the cache.
2665
find_key_block() returns NULL in both cases (clean blocks and
2668
After the flush phase new I/O requests must wait until the
2669
re-initialization is done. The re-initialization can be done only
2670
if no I/O request is in progress. The reason is that
2671
key_cache_block_size can change. With enabled cache I/O is done in
2672
chunks of key_cache_block_size. Every chunk tries to use a cache
2673
block first. If the block size changes in the middle, a block
2674
could be missed and data could be written below a cached block.
2676
while (keycache->in_resize && !keycache->resize_in_flush)
2677
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2678
/* Register the I/O for the next resize. */
2679
inc_counter_for_resize_op(keycache);
2680
locked_and_incremented= true;
2681
/* Requested data may not always be aligned to cache blocks. */
2682
offset= (uint) (filepos % keycache->key_cache_block_size);
2683
/* Write data in key_cache_block_size increments. */
2686
/* Cache could be disabled in a later iteration. */
2687
if (!keycache->can_be_used)
2689
/* Start writing at the beginning of the cache block. */
2691
/* Do not write beyond the end of the cache block. */
2692
read_length= length;
2693
set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2694
assert(read_length > 0);
2696
/* Request the cache block that matches file/pos. */
2697
keycache->global_cache_w_requests++;
2698
block= find_key_block(keycache, file, filepos, level, 1, &page_st);
2702
This happens only for requests submitted during key cache
2703
resize. The block is not in the cache and shall not go in.
2704
Write directly to file.
2708
/* Used in the server. */
2709
keycache->global_cache_write++;
2710
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2711
if (pwrite(file, (unsigned char*) buff, read_length, filepos + offset) == 0)
2713
keycache_pthread_mutex_lock(&keycache->cache_lock);
2718
Prevent block from flushing and from being selected for to be
2719
freed. This must be set when we release the cache_lock.
2720
However, we must not set the status of the block before it is
2721
assigned to this file/pos.
2723
if (page_st != PAGE_WAIT_TO_BE_READ)
2724
block->status|= BLOCK_FOR_UPDATE;
2726
We must read the file block first if it is not yet in the cache
2727
and we do not replace all of its contents.
2729
In cases where the cache block is big enough to contain (parts
2730
of) index blocks of different indexes, our request can be
2731
secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
2732
reading the file block. If the read completes after us, it
2733
overwrites our new contents with the old contents. So we have to
2734
wait for the other thread to complete the read of this block.
2735
read_block() takes care for the wait.
2737
if (!(block->status & BLOCK_ERROR) &&
2738
((page_st == PAGE_TO_BE_READ &&
2739
(offset || read_length < keycache->key_cache_block_size)) ||
2740
(page_st == PAGE_WAIT_TO_BE_READ)))
2742
read_block(keycache, block,
2743
offset + read_length >= keycache->key_cache_block_size?
2744
offset : keycache->key_cache_block_size,
2745
offset, (page_st == PAGE_TO_BE_READ));
2746
assert(keycache->can_be_used);
2747
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2749
Prevent block from flushing and from being selected for to be
2750
freed. This must be set when we release the cache_lock.
2751
Here we set it in case we could not set it above.
2753
block->status|= BLOCK_FOR_UPDATE;
2756
The block should always be assigned to the requested file block
2757
here. It need not be BLOCK_READ when overwriting the whole block.
2759
assert(block->hash_link->file == file);
2760
assert(block->hash_link->diskpos == filepos);
2761
assert(block->status & BLOCK_IN_USE);
2762
assert((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
2764
The block to be written must not be marked BLOCK_REASSIGNED.
2765
Otherwise it could be freed in dirty state or reused without
2766
another flush during eviction. It must also not be in flush.
2767
Otherwise the old contens may have been flushed already and
2768
the flusher could clear BLOCK_CHANGED without flushing the
2771
assert(!(block->status & BLOCK_REASSIGNED));
2773
while (block->status & BLOCK_IN_FLUSHWRITE)
2776
Another thread is flushing the block. It was dirty already.
2777
Wait until the block is flushed to file. Otherwise we could
2778
modify the buffer contents just while it is written to file.
2779
An unpredictable file block contents would be the result.
2780
While we wait, several things can happen to the block,
2781
including another flush. But the block cannot be reassigned to
2782
another hash_link until we release our request on it.
2784
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2785
assert(keycache->can_be_used);
2786
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2787
/* Still must not be marked for free. */
2788
assert(!(block->status & BLOCK_REASSIGNED));
2789
assert(block->hash_link && (block->hash_link->block == block));
2793
We could perhaps release the cache_lock during access of the
2794
data like in the other functions. Locks outside of the key cache
2795
assure that readers and a writer do not access the same range of
2796
data. Parallel accesses should happen only if the cache block
2797
contains multiple index block(fragment)s. So different parts of
2798
the buffer would be read/written. An attempt to flush during
2799
memcpy() is prevented with BLOCK_FOR_UPDATE.
2801
if (!(block->status & BLOCK_ERROR))
2803
#if !defined(SERIALIZED_READ_FROM_CACHE)
2804
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2806
memcpy(block->buffer+offset, buff, (size_t) read_length);
2808
#if !defined(SERIALIZED_READ_FROM_CACHE)
2809
keycache_pthread_mutex_lock(&keycache->cache_lock);
2815
/* Not used in the server. buff has been written to disk at start. */
2816
if ((block->status & BLOCK_CHANGED) &&
2817
(!offset && read_length >= keycache->key_cache_block_size))
2818
link_to_file_list(keycache, block, block->hash_link->file, 1);
2820
else if (! (block->status & BLOCK_CHANGED))
2821
link_to_changed_list(keycache, block);
2822
block->status|=BLOCK_READ;
2824
Allow block to be selected for to be freed. Since it is marked
2825
BLOCK_CHANGED too, it won't be selected for to be freed without
2828
block->status&= ~BLOCK_FOR_UPDATE;
2829
set_if_smaller(block->offset, offset);
2830
set_if_bigger(block->length, read_length+offset);
2832
/* Threads may be waiting for the changes to be complete. */
2833
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2836
If only a part of the cache block is to be replaced, and the
2837
rest has been read from file, then the cache lock has been
2838
released for I/O and it could be possible that another thread
2839
wants to evict or free the block and waits for it to be
2840
released. So we must not just decrement hash_link->requests, but
2841
also wake a waiting thread.
2843
remove_reader(block);
2846
Link the block into the LRU ring if it's the last submitted
2847
request for the block. This enables eviction for the block.
2849
unreg_request(keycache, block, 1);
2851
if (block->status & BLOCK_ERROR)
2859
filepos+= read_length+offset;
2862
} while ((length-= read_length));
442
2867
/* Key cache is not used */
445
2870
/* Used in the server. */
446
2871
keycache->global_cache_w_requests++;
447
2872
keycache->global_cache_write++;
2873
if (locked_and_incremented)
2874
keycache_pthread_mutex_unlock(&keycache->cache_lock);
448
2875
if (pwrite(file, (unsigned char*) buff, length, filepos) == 0)
2877
if (locked_and_incremented)
2878
keycache_pthread_mutex_lock(&keycache->cache_lock);
2882
if (locked_and_incremented)
2884
dec_counter_for_resize_op(keycache);
2885
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2896
keycache Pointer to a key cache data structure
2897
block Pointer to the block to free
2900
Remove reference to block from hash table.
2901
Remove block from the chain of clean blocks.
2902
Add block to the free list.
2905
Block must not be free (status == 0).
2906
Block must not be in free_block_list.
2907
Block must not be in the LRU ring.
2908
Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
2909
Block must not be in free (BLOCK_REASSIGNED).
2910
Block must not be in flush (BLOCK_IN_FLUSH).
2911
Block must not be dirty (BLOCK_CHANGED).
2912
Block must not be in changed_blocks (dirty) hash.
2913
Block must be in file_blocks (clean) hash.
2914
Block must refer to a hash_link.
2915
Block must have a request registered on it.
2918
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
2921
Assert that the block is not free already. And that it is in a clean
2922
state. Note that the block might just be assigned to a hash_link and
2923
not yet read (BLOCK_READ may not be set here). In this case a reader
2924
is registered in the hash_link and free_block() will wait for it
2927
assert((block->status & BLOCK_IN_USE) &&
2928
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2929
BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
2930
BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
2931
/* Assert that the block is in a file_blocks chain. */
2932
assert(block->prev_changed && *block->prev_changed == block);
2933
/* Assert that the block is not in the LRU ring. */
2934
assert(!block->next_used && !block->prev_used);
2936
IMHO the below condition (if()) makes no sense. I can't see how it
2937
could be possible that free_block() is entered with a NULL hash_link
2938
pointer. The only place where it can become NULL is in free_block()
2939
(or before its first use ever, but for those blocks free_block() is
2940
not called). I don't remove the conditional as it cannot harm, but
2941
place an assert to confirm my hypothesis. Eventually the
2942
condition (if()) can be removed.
2944
assert(block->hash_link && block->hash_link->block == block);
2945
if (block->hash_link)
2948
While waiting for readers to finish, new readers might request the
2949
block. But since we set block->status|= BLOCK_REASSIGNED, they
2950
will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
2953
block->status|= BLOCK_REASSIGNED;
2954
wait_for_readers(keycache, block);
2956
The block must not have been freed by another thread. Repeat some
2957
checks. An additional requirement is that it must be read now
2960
assert(block->hash_link && block->hash_link->block == block);
2961
assert((block->status & (BLOCK_READ | BLOCK_IN_USE |
2962
BLOCK_REASSIGNED)) &&
2963
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2964
BLOCK_IN_FLUSH | BLOCK_CHANGED |
2965
BLOCK_FOR_UPDATE)));
2966
assert(block->prev_changed && *block->prev_changed == block);
2967
assert(!block->prev_used);
2969
Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
2970
thread (through unreg_request() below), other threads must not see
2971
this flag. They could become confused.
2973
block->status&= ~BLOCK_REASSIGNED;
2975
Do not release the hash_link until the block is off all lists.
2976
At least not if we hand it over for eviction in unreg_request().
2981
Unregister the block request and link the block into the LRU ring.
2982
This enables eviction for the block. If the LRU ring was empty and
2983
threads are waiting for a block, then the block wil be handed over
2984
for eviction immediately. Otherwise we will unlink it from the LRU
2985
ring again, without releasing the lock in between. So decrementing
2986
the request counter and updating statistics are the only relevant
2987
operation in this case. Assert that there are no other requests
2990
assert(block->requests == 1);
2991
unreg_request(keycache, block, 0);
2993
Note that even without releasing the cache lock it is possible that
2994
the block is immediately selected for eviction by link_block() and
2995
thus not added to the LRU ring. In this case we must not touch the
2998
if (block->status & BLOCK_IN_EVICTION)
3001
/* Here the block must be in the LRU ring. Unlink it again. */
3002
assert(block->next_used && block->prev_used &&
3003
*block->prev_used == block);
3004
unlink_block(keycache, block);
3005
if (block->temperature == BLOCK_WARM)
3006
keycache->warm_blocks--;
3007
block->temperature= BLOCK_COLD;
3009
/* Remove from file_blocks hash. */
3010
unlink_changed(block);
3012
/* Remove reference to block from hash table. */
3013
unlink_hash(keycache, block->hash_link);
3014
block->hash_link= NULL;
3018
block->offset= keycache->key_cache_block_size;
3020
/* Enforced by unlink_changed(), but just to be sure. */
3021
assert(!block->next_changed && !block->prev_changed);
3022
/* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3023
assert(!block->next_used && !block->prev_used);
3024
/* Insert the free block in the free list. */
3025
block->next_used= keycache->free_block_list;
3026
keycache->free_block_list= block;
3027
/* Keep track of the number of currently unused blocks. */
3028
keycache->blocks_unused++;
3030
/* All pending requests for this page must be resubmitted. */
3031
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3035
static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3037
return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3038
((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3043
Flush a portion of changed blocks to disk,
3044
free used blocks if requested
3047
static int flush_cached_blocks(KEY_CACHE *keycache,
3048
int file, BLOCK_LINK **cache,
3050
enum flush_type type)
3054
uint32_t count= (uint) (end-cache);
3056
/* Don't lock the cache during the flush */
3057
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3059
As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3060
we are guarunteed no thread will change them
3062
internal::my_qsort((unsigned char*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3064
keycache_pthread_mutex_lock(&keycache->cache_lock);
3066
Note: Do not break the loop. We have registered a request on every
3067
block in 'cache'. These must be unregistered by free_block() or
3070
for ( ; cache != end ; cache++)
3072
BLOCK_LINK *block= *cache;
3074
If the block contents is going to be changed, we abandon the flush
3075
for this block. flush_key_blocks_int() will restart its search and
3076
handle the block properly.
3078
if (!(block->status & BLOCK_FOR_UPDATE))
3080
/* Blocks coming here must have a certain status. */
3081
assert(block->hash_link);
3082
assert(block->hash_link->block == block);
3083
assert(block->hash_link->file == file);
3084
assert((block->status & ~BLOCK_IN_EVICTION) ==
3085
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3086
block->status|= BLOCK_IN_FLUSHWRITE;
3087
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3088
error= (pwrite(file,
3089
block->buffer+block->offset,
3090
block->length - block->offset,
3091
block->hash_link->diskpos+ block->offset) == 0);
3092
keycache_pthread_mutex_lock(&keycache->cache_lock);
3093
keycache->global_cache_write++;
3096
block->status|= BLOCK_ERROR;
3098
last_errno= errno ? errno : -1;
3100
block->status&= ~BLOCK_IN_FLUSHWRITE;
3101
/* Block must not have changed status except BLOCK_FOR_UPDATE. */
3102
assert(block->hash_link);
3103
assert(block->hash_link->block == block);
3104
assert(block->hash_link->file == file);
3105
assert((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3106
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3108
Set correct status and link in right queue for free or later use.
3109
free_block() must not see BLOCK_CHANGED and it may need to wait
3110
for readers of the block. These should not see the block in the
3111
wrong hash. If not freeing the block, we need to have it in the
3114
link_to_file_list(keycache, block, file, 1);
3117
block->status&= ~BLOCK_IN_FLUSH;
3119
Let to proceed for possible waiting requests to write to the block page.
3120
It might happen only during an operation to resize the key cache.
3122
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3123
/* type will never be FLUSH_IGNORE_CHANGED here */
3124
if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3125
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3129
Note that a request has been registered against the block in
3130
flush_key_blocks_int().
3132
free_block(keycache, block);
3137
Link the block into the LRU ring if it's the last submitted
3138
request for the block. This enables eviction for the block.
3139
Note that a request has been registered against the block in
3140
flush_key_blocks_int().
3142
unreg_request(keycache, block, 1);
3145
} /* end of for ( ; cache != end ; cache++) */
3151
flush all key blocks for a file to disk, but don't do any mutex locks.
3154
flush_key_blocks_int()
3155
keycache pointer to a key cache data structure
3156
file handler for the file to flush to
3157
flush_type type of the flush
3160
This function doesn't do any mutex locks because it needs to be called both
3161
from flush_key_blocks and flush_all_key_blocks (the later one does the
3162
mutex lock in the resize_key_cache() function).
3164
We do only care about changed blocks that exist when the function is
3165
entered. We do not guarantee that all changed blocks of the file are
3166
flushed if more blocks change while this function is running.
3173
static int flush_key_blocks_int(KEY_CACHE *keycache,
3174
int file, enum flush_type type)
3176
BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3181
if (keycache->disk_blocks > 0 &&
3182
(!internal::my_disable_flush_key_blocks || type != FLUSH_KEEP))
3184
/* Key cache exists and flush is not disabled */
3186
uint32_t count= FLUSH_CACHE;
3187
BLOCK_LINK **pos,**end;
3188
BLOCK_LINK *first_in_switch= NULL;
3189
BLOCK_LINK *last_in_flush;
3190
BLOCK_LINK *last_for_update;
3191
BLOCK_LINK *last_in_switch;
3192
BLOCK_LINK *block, *next;
3194
if (type != FLUSH_IGNORE_CHANGED)
3197
Count how many key blocks we have to cache to be able
3198
to flush all dirty pages with minimum seek moves
3201
for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3203
block= block->next_changed)
3205
if ((block->hash_link->file == file) &&
3206
!(block->status & BLOCK_IN_FLUSH))
3209
assert(count<= keycache->blocks_used);
3213
Allocate a new buffer only if its bigger than the one we have.
3214
Assure that we always have some entries for the case that new
3215
changed blocks appear while we need to wait for something.
3217
if ((count > FLUSH_CACHE) &&
3218
!(cache= (BLOCK_LINK**) malloc(sizeof(BLOCK_LINK*)*count)))
3221
After a restart there could be more changed blocks than now.
3222
So we should not let count become smaller than the fixed buffer.
3224
if (cache == cache_buff)
3228
/* Retrieve the blocks and write them to a buffer to be flushed */
3230
last_in_flush= NULL;
3231
last_for_update= NULL;
3232
end= (pos= cache)+count;
3233
for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3237
next= block->next_changed;
3238
if (block->hash_link->file == file)
3240
if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3243
Note: The special handling of BLOCK_IN_SWITCH is obsolete
3244
since we set BLOCK_IN_FLUSH if the eviction includes a
3245
flush. It can be removed in a later version.
3247
if (!(block->status & BLOCK_IN_SWITCH))
3250
We care only for the blocks for which flushing was not
3251
initiated by another thread and which are not in eviction.
3252
Registering a request on the block unlinks it from the LRU
3253
ring and protects against eviction.
3255
reg_requests(keycache, block, 1);
3256
if (type != FLUSH_IGNORE_CHANGED)
3258
/* It's not a temporary file */
3262
This should happen relatively seldom. Remove the
3263
request because we won't do anything with the block
3264
but restart and pick it again in the next iteration.
3266
unreg_request(keycache, block, 0);
3268
This happens only if there is not enough
3269
memory for the big block
3271
if ((error= flush_cached_blocks(keycache, file, cache,
3274
/* Do not loop infinitely trying to flush in vain. */
3275
if ((last_errno == error) && (++last_errcnt > 5))
3280
Restart the scan as some other thread might have changed
3281
the changed blocks chain: the blocks that were in switch
3282
state before the flush started have to be excluded
3287
Mark the block with BLOCK_IN_FLUSH in order not to let
3288
other threads to use it for new pages and interfere with
3289
our sequence of flushing dirty file pages. We must not
3290
set this flag before actually putting the block on the
3291
write burst array called 'cache'.
3293
block->status|= BLOCK_IN_FLUSH;
3294
/* Add block to the array for a write burst. */
3299
/* It's a temporary file */
3300
assert(!(block->status & BLOCK_REASSIGNED));
3303
free_block() must not be called with BLOCK_CHANGED. Note
3304
that we must not change the BLOCK_CHANGED flag outside of
3305
link_to_file_list() so that it is always in the correct
3306
queue and the *blocks_changed counters are correct.
3308
link_to_file_list(keycache, block, file, 1);
3309
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3311
/* A request has been registered against the block above. */
3312
free_block(keycache, block);
3317
Link the block into the LRU ring if it's the last
3318
submitted request for the block. This enables eviction
3319
for the block. A request has been registered against
3322
unreg_request(keycache, block, 1);
3329
Link the block into a list of blocks 'in switch'.
3331
WARNING: Here we introduce a place where a changed block
3332
is not in the changed_blocks hash! This is acceptable for
3333
a BLOCK_IN_SWITCH. Never try this for another situation.
3334
Other parts of the key cache code rely on changed blocks
3335
being in the changed_blocks hash.
3337
unlink_changed(block);
3338
link_changed(block, &first_in_switch);
3341
else if (type != FLUSH_KEEP)
3344
During the normal flush at end of statement (FLUSH_KEEP) we
3345
do not need to ensure that blocks in flush or update by
3346
other threads are flushed. They will be flushed by them
3347
later. In all other cases we must assure that we do not have
3348
any changed block of this file in the cache when this
3351
if (block->status & BLOCK_IN_FLUSH)
3353
/* Remember the last block found to be in flush. */
3354
last_in_flush= block;
3358
/* Remember the last block found to be selected for update. */
3359
last_for_update= block;
3366
if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
3368
/* Do not loop inifnitely trying to flush in vain. */
3369
if ((last_errno == error) && (++last_errcnt > 5))
3374
Do not restart here during the normal flush at end of statement
3375
(FLUSH_KEEP). We have now flushed at least all blocks that were
3376
changed when entering this function. In all other cases we must
3377
assure that we do not have any changed block of this file in the
3378
cache when this function returns.
3380
if (type != FLUSH_KEEP)
3386
There are no blocks to be flushed by this thread, but blocks in
3387
flush by other threads. Wait until one of the blocks is flushed.
3388
Re-check the condition for last_in_flush. We may have unlocked
3389
the cache_lock in flush_cached_blocks(). The state of the block
3392
if (last_in_flush->status & BLOCK_IN_FLUSH)
3393
wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3394
&keycache->cache_lock);
3395
/* Be sure not to lose a block. They may be flushed in random order. */
3398
if (last_for_update)
3401
There are no blocks to be flushed by this thread, but blocks for
3402
update by other threads. Wait until one of the blocks is updated.
3403
Re-check the condition for last_for_update. We may have unlocked
3404
the cache_lock in flush_cached_blocks(). The state of the block
3407
if (last_for_update->status & BLOCK_FOR_UPDATE)
3408
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3409
&keycache->cache_lock);
3410
/* The block is now changed. Flush it. */
3415
Wait until the list of blocks in switch is empty. The threads that
3416
are switching these blocks will relink them to clean file chains
3417
while we wait and thus empty the 'first_in_switch' chain.
3419
while (first_in_switch)
3421
wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3422
&keycache->cache_lock);
3424
Do not restart here. We have flushed all blocks that were
3425
changed when entering this function and were not marked for
3426
eviction. Other threads have now flushed all remaining blocks in
3427
the course of their eviction.
3431
if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3433
last_for_update= NULL;
3434
last_in_switch= NULL;
3435
uint32_t total_found= 0;
3439
Finally free all clean blocks for this file.
3440
During resize this may be run by two threads in parallel.
3445
for (block= keycache->file_blocks[FILE_HASH(file)] ;
3449
/* Remember the next block. After freeing we cannot get at it. */
3450
next= block->next_changed;
3452
/* Changed blocks cannot appear in the file_blocks hash. */
3453
assert(!(block->status & BLOCK_CHANGED));
3454
if (block->hash_link->file == file)
3456
/* We must skip blocks that will be changed. */
3457
if (block->status & BLOCK_FOR_UPDATE)
3459
last_for_update= block;
3464
We must not free blocks in eviction (BLOCK_IN_EVICTION |
3465
BLOCK_IN_SWITCH) or blocks intended to be freed
3468
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3471
struct st_hash_link *next_hash_link= NULL;
3472
internal::my_off_t next_diskpos= 0;
3474
uint32_t next_status= 0;
3475
uint32_t hash_requests= 0;
3479
assert(found <= keycache->blocks_used);
3482
Register a request. This unlinks the block from the LRU
3483
ring and protects it against eviction. This is required
3486
reg_requests(keycache, block, 1);
3489
free_block() may need to wait for readers of the block.
3490
This is the moment where the other thread can move the
3491
'next' block from the chain. free_block() needs to wait
3492
if there are requests for the block pending.
3494
if (next && (hash_requests= block->hash_link->requests))
3496
/* Copy values from the 'next' block and its hash_link. */
3497
next_status= next->status;
3498
next_hash_link= next->hash_link;
3499
next_diskpos= next_hash_link->diskpos;
3500
next_file= next_hash_link->file;
3501
assert(next == next_hash_link->block);
3504
free_block(keycache, block);
3506
If we had to wait and the state of the 'next' block
3507
changed, break the inner loop. 'next' may no longer be
3508
part of the current chain.
3510
We do not want to break the loop after every free_block(),
3511
not even only after waits. The chain might be quite long
3512
and contain blocks for many files. Traversing it again and
3513
again to find more blocks for this file could become quite
3516
if (next && hash_requests &&
3517
((next_status != next->status) ||
3518
(next_hash_link != next->hash_link) ||
3519
(next_file != next_hash_link->file) ||
3520
(next_diskpos != next_hash_link->diskpos) ||
3521
(next != next_hash_link->block)))
3526
last_in_switch= block;
3529
} /* end for block in file_blocks */
3533
If any clean block has been found, we may have waited for it to
3534
become free. In this case it could be possible that another clean
3535
block became dirty. This is possible if the write request existed
3536
before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3542
To avoid an infinite loop, wait until one of the blocks marked
3543
for update is updated.
3545
if (last_for_update)
3547
/* We did not wait. Block must not have changed status. */
3548
assert(last_for_update->status & BLOCK_FOR_UPDATE);
3549
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3550
&keycache->cache_lock);
3555
To avoid an infinite loop wait until one of the blocks marked
3556
for eviction is switched.
3560
/* We did not wait. Block must not have changed status. */
3561
assert(last_in_switch->status & (BLOCK_IN_EVICTION |
3564
wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
3565
&keycache->cache_lock);
3569
} /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
3571
} /* if (keycache->disk_blocks > 0 */
3574
if (cache != cache_buff)
3575
free((unsigned char*) cache);
3577
errno=last_errno; /* Return first error */
3578
return(last_errno != 0);
457
3583
Flush all blocks for a file to disk
471
3597
int flush_key_blocks(KEY_CACHE *keycache,
472
3598
int file, enum flush_type type)
476
assert (!keycache->key_cache_inited);
3602
if (!keycache->key_cache_inited)
3605
keycache_pthread_mutex_lock(&keycache->cache_lock);
3606
/* While waiting for lock, keycache could have been ended. */
3607
if (keycache->disk_blocks > 0)
3609
inc_counter_for_resize_op(keycache);
3610
res= flush_key_blocks_int(keycache, file, type);
3611
dec_counter_for_resize_op(keycache);
3613
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3619
Flush all blocks in the key cache to disk.
3622
flush_all_key_blocks()
3623
keycache pointer to key cache root structure
3627
Flushing of the whole key cache is done in two phases.
3629
1. Flush all changed blocks, waiting for them if necessary. Loop
3630
until there is no changed block left in the cache.
3632
2. Free all clean blocks. Normally this means free all blocks. The
3633
changed blocks were flushed in phase 1 and became clean. However we
3634
may need to wait for blocks that are read by other threads. While we
3635
wait, a clean block could become changed if that operation started
3636
before the resize operation started. To be safe we must restart at
3639
When we can run through the changed_blocks and file_blocks hashes
3640
without finding a block any more, then we are done.
3642
Note that we hold keycache->cache_lock all the time unless we need
3643
to wait for something.
3650
static int flush_all_key_blocks(KEY_CACHE *keycache)
3653
uint32_t total_found;
3659
safe_mutex_assert_owner(&keycache->cache_lock);
3663
Phase1: Flush all changed blocks, waiting for them if necessary.
3664
Loop until there is no changed block left in the cache.
3669
/* Step over the whole changed_blocks hash array. */
3670
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3673
If an array element is non-empty, use the first block from its
3674
chain to find a file for flush. All changed blocks for this
3675
file are flushed. So the same block will not appear at this
3676
place again with the next iteration. New writes for blocks are
3677
not accepted during the flush. If multiple files share the
3678
same hash bucket, one of them will be flushed per iteration
3679
of the outer loop of phase 1.
3681
if ((block= keycache->changed_blocks[idx]))
3685
Flush dirty blocks but do not free them yet. They can be used
3686
for reading until all other blocks are flushed too.
3688
if (flush_key_blocks_int(keycache, block->hash_link->file,
3697
Phase 2: Free all clean blocks. Normally this means free all
3698
blocks. The changed blocks were flushed in phase 1 and became
3699
clean. However we may need to wait for blocks that are read by
3700
other threads. While we wait, a clean block could become changed
3701
if that operation started before the resize operation started. To
3702
be safe we must restart at phase 1.
3707
/* Step over the whole file_blocks hash array. */
3708
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3711
If an array element is non-empty, use the first block from its
3712
chain to find a file for flush. All blocks for this file are
3713
freed. So the same block will not appear at this place again
3714
with the next iteration. If multiple files share the
3715
same hash bucket, one of them will be flushed per iteration
3716
of the outer loop of phase 2.
3718
if ((block= keycache->file_blocks[idx]))
3722
if (flush_key_blocks_int(keycache, block->hash_link->file,
3731
If any clean block has been found, we may have waited for it to
3732
become free. In this case it could be possible that another clean
3733
block became dirty. This is possible if the write request existed
3734
before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
3736
} while (total_found);
3742
Reset the counters of a key cache.
3745
reset_key_cache_counters()
3748
This procedure is used by process_key_caches() to reset the key_cache.
3751
0 on success (always because it can't fail)
3754
void reset_key_cache_counters()
3756
dflt_key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
3757
dflt_key_cache->global_cache_r_requests= 0; /* Key_read_requests */
3758
dflt_key_cache->global_cache_read= 0; /* Key_reads */
3759
dflt_key_cache->global_cache_w_requests= 0; /* Key_write_requests */
3760
dflt_key_cache->global_cache_write= 0; /* Key_writes */
3763
#if defined(KEYCACHE_TIMEOUT)
3767
unsigned int hash_link_number(HASH_LINK *hash_link, KEY_CACHE *keycache)
3769
return ((unsigned int) (((char*)hash_link-(char *) keycache->hash_link_root)/
3770
sizeof(HASH_LINK)));
3774
unsigned int block_number(BLOCK_LINK *block, KEY_CACHE *keycache)
3776
return ((unsigned int) (((char*)block-(char *)keycache->block_root)/
3777
sizeof(BLOCK_LINK)));
3781
#define KEYCACHE_DUMP_FILE "keycache_dump.txt"
3782
#define MAX_QUEUE_LEN 100
3785
static void keycache_dump(KEY_CACHE *keycache)
3787
FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
3788
internal::st_my_thread_var *last;
3789
internal::st_my_thread_var *thread;
3791
HASH_LINK *hash_link;
3792
KEYCACHE_PAGE *page;
3795
fprintf(keycache_dump_file, "thread:%u\n", thread->id);
3798
thread=last=waiting_for_hash_link.last_thread;
3799
fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
3803
thread=thread->next;
3804
page= (KEYCACHE_PAGE *) thread->opt_info;
3805
fprintf(keycache_dump_file,
3806
"thread:%u, (file,filepos)=(%u,%lu)\n",
3807
thread->id,(uint) page->file,(uint32_t) page->filepos);
3808
if (++i == MAX_QUEUE_LEN)
3811
while (thread != last);
3814
thread=last=waiting_for_block.last_thread;
3815
fprintf(keycache_dump_file, "queue of threads waiting for block\n");
3819
thread=thread->next;
3820
hash_link= (HASH_LINK *) thread->opt_info;
3821
fprintf(keycache_dump_file,
3822
"thread:%u hash_link:%u (file,filepos)=(%u,%u)\n",
3823
thread->id, (uint) hash_link_number(hash_link, keycache),
3824
(uint) hash_link->file,(uint32_t) hash_link->diskpos);
3825
if (++i == MAX_QUEUE_LEN)
3828
while (thread != last);
3830
for (i=0 ; i< keycache->blocks_used ; i++)
3833
block= &keycache->block_root[i];
3834
hash_link= block->hash_link;
3835
fprintf(keycache_dump_file,
3836
"block:%u hash_link:%d status:%x #requests=%u "
3837
"waiting_for_readers:%d\n",
3838
i, (int) (hash_link ? hash_link_number(hash_link, keycache) : -1),
3839
block->status, block->requests, block->condvar ? 1 : 0);
3840
for (j=0 ; j < 2; j++)
3842
KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
3843
thread= last= wqueue->last_thread;
3844
fprintf(keycache_dump_file, "queue #%d\n", j);
3849
thread=thread->next;
3850
fprintf(keycache_dump_file,
3851
"thread:%u\n", thread->id);
3852
if (++i == MAX_QUEUE_LEN)
3855
while (thread != last);
3859
fprintf(keycache_dump_file, "LRU chain:");
3860
block= keycache= used_last;
3865
block= block->next_used;
3866
fprintf(keycache_dump_file,
3867
"block:%u, ", block_number(block, keycache));
3869
while (block != keycache->used_last);
3871
fprintf(keycache_dump_file, "\n");
3873
fclose(keycache_dump_file);
3876
static int keycache_pthread_cond_wait(pthread_cond_t *cond,
3877
pthread_mutex_t *mutex)
3880
struct timeval now; /* time when we started waiting */
3881
struct timespec timeout; /* timeout value for the wait function */
3884
/* Get current time */
3885
gettimeofday(&now, &tz);
3886
/* Prepare timeout value */
3887
timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
3889
timeval uses microseconds.
3890
timespec uses nanoseconds.
3891
1 nanosecond = 1000 micro seconds
3893
timeout.tv_nsec= now.tv_usec * 1000;
3894
rc= pthread_cond_timedwait(cond, mutex, &timeout);
3895
if (rc == ETIMEDOUT || rc == ETIME)
3900
assert(rc != ETIMEDOUT);
3903
#endif /* defined(KEYCACHE_TIMEOUT) */