270
int init_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
271
size_t use_mem, uint division_limit,
274
uint32_t blocks, hash_links;
277
assert(key_cache_block_size >= 512);
279
if (keycache->key_cache_inited && keycache->disk_blocks > 0)
284
keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
285
keycache->global_cache_read= keycache->global_cache_write= 0;
286
keycache->disk_blocks= -1;
287
if (! keycache->key_cache_inited)
289
keycache->key_cache_inited= 1;
291
Initialize these variables once only.
292
Their value must survive re-initialization during resizing.
294
keycache->in_resize= 0;
295
keycache->resize_in_flush= 0;
296
keycache->cnt_for_resize_op= 0;
297
keycache->waiting_for_resize_cnt.last_thread= NULL;
298
keycache->in_init= 0;
299
pthread_mutex_init(&keycache->cache_lock, MY_MUTEX_INIT_FAST);
300
keycache->resize_queue.last_thread= NULL;
303
keycache->key_cache_mem_size= use_mem;
304
keycache->key_cache_block_size= key_cache_block_size;
306
blocks= (uint32_t) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
307
sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
308
/* It doesn't make sense to have too few blocks (less than 8) */
313
/* Set my_hash_entries to the next bigger 2 power */
314
if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
315
keycache->hash_entries<<= 1;
316
hash_links= 2 * blocks;
317
#if defined(MAX_THREADS)
318
if (hash_links < MAX_THREADS + blocks - 1)
319
hash_links= MAX_THREADS + blocks - 1;
321
while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
322
ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
323
ALIGN_SIZE(sizeof(HASH_LINK*) *
324
keycache->hash_entries))) +
325
((size_t) blocks * keycache->key_cache_block_size) > use_mem)
327
/* Allocate memory for cache page buffers */
328
if ((keycache->block_mem= malloc((size_t) blocks * keycache->key_cache_block_size)))
331
Allocate memory for blocks, hash_links and hash entries;
332
For each block 2 hash links are allocated
334
if ((keycache->block_root= (BLOCK_LINK*) my_malloc(length,
337
free(keycache->block_mem);
338
keycache->block_mem= 0;
343
my_error(EE_OUTOFMEMORY, MYF(0), blocks * keycache->key_cache_block_size);
346
blocks= blocks / 4*3;
348
keycache->blocks_unused= blocks;
349
keycache->disk_blocks= (int) blocks;
350
keycache->hash_links= hash_links;
351
keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
352
ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
353
keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
354
ALIGN_SIZE((sizeof(HASH_LINK*) *
355
keycache->hash_entries)));
356
memset(keycache->block_root, 0,
357
keycache->disk_blocks * sizeof(BLOCK_LINK));
358
memset(keycache->hash_root, 0,
359
keycache->hash_entries * sizeof(HASH_LINK*));
360
memset(keycache->hash_link_root, 0,
361
keycache->hash_links * sizeof(HASH_LINK));
362
keycache->hash_links_used= 0;
363
keycache->free_hash_list= NULL;
364
keycache->blocks_used= keycache->blocks_changed= 0;
366
keycache->global_blocks_changed= 0;
367
keycache->blocks_available=0; /* For debugging */
369
/* The LRU chain is empty after initialization */
370
keycache->used_last= NULL;
371
keycache->used_ins= NULL;
372
keycache->free_block_list= NULL;
373
keycache->keycache_time= 0;
374
keycache->warm_blocks= 0;
375
keycache->min_warm_blocks= (division_limit ?
376
blocks * division_limit / 100 + 1 :
378
keycache->age_threshold= (age_threshold ?
379
blocks * age_threshold / 100 :
382
keycache->can_be_used= 1;
384
keycache->waiting_for_hash_link.last_thread= NULL;
385
keycache->waiting_for_block.last_thread= NULL;
386
memset(keycache->changed_blocks, 0,
387
sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
388
memset(keycache->file_blocks, 0,
389
sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
393
/* key_buffer_size is specified too small. Disable the cache. */
394
keycache->can_be_used= 0;
397
keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
398
return((int) keycache->disk_blocks);
402
keycache->disk_blocks= 0;
404
if (keycache->block_mem)
406
free(keycache->block_mem);
407
keycache->block_mem= NULL;
409
if (keycache->block_root)
411
my_free((uchar*) keycache->block_root, MYF(0));
412
keycache->block_root= NULL;
415
keycache->can_be_used= 0;
425
keycache pointer to a key cache data structure
426
key_cache_block_size size of blocks to keep cached data
427
use_mem total memory to use for the new key cache
428
division_limit new division limit (if not zero)
429
age_threshold new age threshold (if not zero)
432
number of blocks in the key cache, if successful,
436
The function first compares the memory size and the block size parameters
437
with the key cache values.
439
If they differ the function free the the memory allocated for the
440
old key cache blocks by calling the end_key_cache function and
441
then rebuilds the key cache with new blocks by calling
444
The function starts the operation only when all other threads
445
performing operations with the key cache let her to proceed
446
(when cnt_for_resize=0).
449
int resize_key_cache(KEY_CACHE *keycache, uint key_cache_block_size,
450
size_t use_mem, uint division_limit,
455
if (!keycache->key_cache_inited)
456
return(keycache->disk_blocks);
458
if(key_cache_block_size == keycache->key_cache_block_size &&
459
use_mem == keycache->key_cache_mem_size)
461
change_key_cache_param(keycache, division_limit, age_threshold);
462
return(keycache->disk_blocks);
465
keycache_pthread_mutex_lock(&keycache->cache_lock);
468
We may need to wait for another thread which is doing a resize
469
already. This cannot happen in the MySQL server though. It allows
470
one resizer only. In set_var.cc keycache->in_init is used to block
473
while (keycache->in_resize)
475
/* purecov: begin inspected */
476
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
481
Mark the operation in progress. This blocks other threads from doing
482
a resize in parallel. It prohibits new blocks to enter the cache.
483
Read/write requests can bypass the cache during the flush phase.
485
keycache->in_resize= 1;
487
/* Need to flush only if keycache is enabled. */
488
if (keycache->can_be_used)
490
/* Start the flush phase. */
491
keycache->resize_in_flush= 1;
493
if (flush_all_key_blocks(keycache))
495
/* TODO: if this happens, we should write a warning in the log file ! */
496
keycache->resize_in_flush= 0;
498
keycache->can_be_used= 0;
502
/* End the flush phase. */
503
keycache->resize_in_flush= 0;
507
Some direct read/write operations (bypassing the cache) may still be
508
unfinished. Wait until they are done. If the key cache can be used,
509
direct I/O is done in increments of key_cache_block_size. That is,
510
every block is checked if it is in the cache. We need to wait for
511
pending I/O before re-initializing the cache, because we may change
512
the block size. Otherwise they could check for blocks at file
513
positions where the new block division has none. We do also want to
514
wait for I/O done when (if) the cache was disabled. It must not
515
run in parallel with normal cache operation.
517
while (keycache->cnt_for_resize_op)
518
wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
521
Free old cache structures, allocate new structures, and initialize
522
them. Note that the cache_lock mutex and the resize_queue are left
523
untouched. We do not lose the cache_lock and will release it only at
524
the end of this function.
526
end_key_cache(keycache, 0); /* Don't free mutex */
527
/* The following will work even if use_mem is 0 */
528
blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
529
division_limit, age_threshold);
533
Mark the resize finished. This allows other threads to start a
534
resize or to request new cache blocks.
536
keycache->in_resize= 0;
538
/* Signal waiting threads. */
539
release_whole_queue(&keycache->resize_queue);
541
keycache_pthread_mutex_unlock(&keycache->cache_lock);
547
Increment counter blocking resize key cache operation
549
static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
551
keycache->cnt_for_resize_op++;
556
Decrement counter blocking resize key cache operation;
557
Signal the operation to proceed when counter becomes equal zero
559
static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
561
if (!--keycache->cnt_for_resize_op)
562
release_whole_queue(&keycache->waiting_for_resize_cnt);
566
Change the key cache parameters
569
change_key_cache_param()
570
keycache pointer to a key cache data structure
571
division_limit new division limit (if not zero)
572
age_threshold new age threshold (if not zero)
578
Presently the function resets the key cache parameters
579
concerning midpoint insertion strategy - division_limit and
583
void change_key_cache_param(KEY_CACHE *keycache, uint division_limit,
586
keycache_pthread_mutex_lock(&keycache->cache_lock);
588
keycache->min_warm_blocks= (keycache->disk_blocks *
589
division_limit / 100 + 1);
591
keycache->age_threshold= (keycache->disk_blocks *
592
age_threshold / 100);
593
keycache_pthread_mutex_unlock(&keycache->cache_lock);
245
int init_key_cache(KEY_CACHE *keycache, uint32_t key_cache_block_size,
246
size_t use_mem, uint32_t division_limit,
247
uint32_t age_threshold)
250
(void)key_cache_block_size;
252
(void)division_limit;
254
memset(keycache, 0, sizeof(KEY_CACHE));
610
272
void end_key_cache(KEY_CACHE *keycache, bool cleanup)
612
if (!keycache->key_cache_inited)
615
if (keycache->disk_blocks > 0)
617
if (keycache->block_mem)
619
free(keycache->block_mem);
620
keycache->block_mem= NULL;
621
my_free((uchar*) keycache->block_root, MYF(0));
622
keycache->block_root= NULL;
624
keycache->disk_blocks= -1;
625
/* Reset blocks_changed to be safe if flush_all_key_blocks is called */
626
keycache->blocks_changed= 0;
631
pthread_mutex_destroy(&keycache->cache_lock);
632
keycache->key_cache_inited= keycache->can_be_used= 0;
635
276
} /* end_key_cache */
639
Link a thread into double-linked queue of waiting threads.
643
wqueue pointer to the queue structure
644
thread pointer to the thread to be added to the queue
650
Queue is represented by a circular list of the thread structures
651
The list is double-linked of the type (**prev,*next), accessed by
652
a pointer to the last element.
655
static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
656
struct st_my_thread_var *thread)
658
struct st_my_thread_var *last;
660
assert(!thread->next && !thread->prev);
661
if (! (last= wqueue->last_thread))
664
thread->next= thread;
665
thread->prev= &thread->next;
669
thread->prev= last->next->prev;
670
last->next->prev= &thread->next;
671
thread->next= last->next;
674
wqueue->last_thread= thread;
678
Unlink a thread from double-linked queue of waiting threads
682
wqueue pointer to the queue structure
683
thread pointer to the thread to be removed from the queue
689
See NOTES for link_into_queue
692
static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
693
struct st_my_thread_var *thread)
695
assert(thread->next && thread->prev);
696
if (thread->next == thread)
697
/* The queue contains only one member */
698
wqueue->last_thread= NULL;
701
thread->next->prev= thread->prev;
702
*thread->prev=thread->next;
703
if (wqueue->last_thread == thread)
704
wqueue->last_thread= STRUCT_PTR(struct st_my_thread_var, next,
713
Add a thread to single-linked queue of waiting threads
717
wqueue Pointer to the queue structure.
718
mutex Cache_lock to acquire after awake.
724
Queue is represented by a circular list of the thread structures
725
The list is single-linked of the type (*next), accessed by a pointer
728
The function protects against stray signals by verifying that the
729
current thread is unlinked from the queue when awaking. However,
730
since several threads can wait for the same event, it might be
731
necessary for the caller of the function to check again if the
732
condition for awake is indeed matched.
735
static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
736
pthread_mutex_t *mutex)
738
struct st_my_thread_var *last;
739
struct st_my_thread_var *thread= my_thread_var;
742
assert(!thread->next);
743
assert(!thread->prev); /* Not required, but must be true anyway. */
744
if (! (last= wqueue->last_thread))
745
thread->next= thread;
748
thread->next= last->next;
751
wqueue->last_thread= thread;
754
Wait until thread is removed from queue by the signalling thread.
755
The loop protects against stray signals.
759
keycache_pthread_cond_wait(&thread->suspend, mutex);
761
while (thread->next);
766
Remove all threads from queue signaling them to proceed
769
release_whole_queue()
770
wqueue pointer to the queue structure
776
See notes for wait_on_queue().
777
When removed from the queue each thread is signaled via condition
778
variable thread->suspend.
781
static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
783
struct st_my_thread_var *last;
784
struct st_my_thread_var *next;
785
struct st_my_thread_var *thread;
787
/* Queue may be empty. */
788
if (!(last= wqueue->last_thread))
795
/* Signal the thread. */
796
keycache_pthread_cond_signal(&thread->suspend);
797
/* Take thread from queue. */
801
while (thread != last);
803
/* Now queue is definitely empty. */
804
wqueue->last_thread= NULL;
809
Unlink a block from the chain of dirty/clean blocks
811
static inline void unlink_changed(BLOCK_LINK *block)
813
assert(block->prev_changed && *block->prev_changed == block);
814
if (block->next_changed)
815
block->next_changed->prev_changed= block->prev_changed;
816
*block->prev_changed= block->next_changed;
817
block->next_changed= NULL;
818
block->prev_changed= NULL;
823
Link a block into the chain of dirty/clean blocks
826
static inline void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
828
assert(!block->next_changed);
829
assert(!block->prev_changed);
830
block->prev_changed= phead;
831
if ((block->next_changed= *phead))
832
(*phead)->prev_changed= &block->next_changed;
838
Link a block in a chain of clean blocks of a file.
842
keycache Key cache handle
843
block Block to relink
844
file File to be linked to
845
unlink If to unlink first
848
Unlink a block from whichever chain it is linked in, if it's
849
asked for, and link it to the chain of clean blocks of the
853
Please do never set/clear BLOCK_CHANGED outside of
854
link_to_file_list() or link_to_changed_list().
855
You would risk to damage correct counting of changed blocks
856
and to find blocks in the wrong hash.
862
static void link_to_file_list(KEY_CACHE *keycache,
863
BLOCK_LINK *block, int file,
866
assert(block->status & BLOCK_IN_USE);
867
assert(block->hash_link && block->hash_link->block == block);
868
assert(block->hash_link->file == file);
870
unlink_changed(block);
871
link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
872
if (block->status & BLOCK_CHANGED)
874
block->status&= ~BLOCK_CHANGED;
875
keycache->blocks_changed--;
876
keycache->global_blocks_changed--;
882
Re-link a block from the clean chain to the dirty chain of a file.
885
link_to_changed_list()
886
keycache key cache handle
887
block block to relink
890
Unlink a block from the chain of clean blocks of a file
891
and link it to the chain of dirty blocks of the same file.
894
Please do never set/clear BLOCK_CHANGED outside of
895
link_to_file_list() or link_to_changed_list().
896
You would risk to damage correct counting of changed blocks
897
and to find blocks in the wrong hash.
903
static void link_to_changed_list(KEY_CACHE *keycache,
906
assert(block->status & BLOCK_IN_USE);
907
assert(!(block->status & BLOCK_CHANGED));
908
assert(block->hash_link && block->hash_link->block == block);
910
unlink_changed(block);
912
&keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
913
block->status|=BLOCK_CHANGED;
914
keycache->blocks_changed++;
915
keycache->global_blocks_changed++;
920
Link a block to the LRU chain at the beginning or at the end of
925
keycache pointer to a key cache data structure
926
block pointer to the block to link to the LRU chain
927
hot <-> to link the block into the hot subchain
928
at_end <-> to link the block at the end of the subchain
934
The LRU ring is represented by a circular list of block structures.
935
The list is double-linked of the type (**prev,*next) type.
936
The LRU ring is divided into two parts - hot and warm.
937
There are two pointers to access the last blocks of these two
938
parts. The beginning of the warm part follows right after the
940
Only blocks of the warm part can be used for eviction.
941
The first block from the beginning of this subchain is always
942
taken for eviction (keycache->last_used->next)
944
LRU chain: +------+ H O T +------+
945
+----| end |----...<----| beg |----+
946
| +------+last +------+ |
947
v<-link in latest hot (new end) |
948
| link in latest warm (new end)->^
949
| +------+ W A R M +------+ |
950
+----| beg |---->...----| end |----+
954
It is also possible that the block is selected for eviction and thus
955
not linked in the LRU ring.
958
static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, bool hot,
964
assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
965
assert(block->hash_link); /*backptr to block NULL from free_block()*/
966
assert(!block->requests);
967
assert(block->prev_changed && *block->prev_changed == block);
968
assert(!block->next_used);
969
assert(!block->prev_used);
970
if (!hot && keycache->waiting_for_block.last_thread)
972
/* Signal that in the LRU warm sub-chain an available block has appeared */
973
struct st_my_thread_var *last_thread=
974
keycache->waiting_for_block.last_thread;
975
struct st_my_thread_var *first_thread= last_thread->next;
976
struct st_my_thread_var *next_thread= first_thread;
977
HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
978
struct st_my_thread_var *thread;
982
next_thread= thread->next;
984
We notify about the event all threads that ask
985
for the same page as the first thread in the queue
987
if ((HASH_LINK *) thread->opt_info == hash_link)
989
keycache_pthread_cond_signal(&thread->suspend);
990
unlink_from_queue(&keycache->waiting_for_block, thread);
994
while (thread != last_thread);
995
hash_link->block= block;
997
NOTE: We assigned the block to the hash_link and signalled the
998
requesting thread(s). But it is possible that other threads runs
999
first. These threads see the hash_link assigned to a block which
1000
is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
1001
This can be a problem for functions that do not select the block
1002
via its hash_link: flush and free. They do only see a block which
1003
is in a "normal" state and don't know that it will be evicted soon.
1005
We cannot set BLOCK_IN_SWITCH here because only one of the
1006
requesting threads must handle the eviction. All others must wait
1007
for it to complete. If we set the flag here, the threads would not
1008
know who is in charge of the eviction. Without the flag, the first
1009
thread takes the stick and sets the flag.
1011
But we need to note in the block that is has been selected for
1012
eviction. It must not be freed. The evicting thread will not
1013
expect the block in the free list. Before freeing we could also
1014
check if block->requests > 1. But I think including another flag
1015
in the check of block->status is slightly more efficient and
1016
probably easier to read.
1018
block->status|= BLOCK_IN_EVICTION;
1021
pins= hot ? &keycache->used_ins : &keycache->used_last;
1025
ins->next_used->prev_used= &block->next_used;
1026
block->next_used= ins->next_used;
1027
block->prev_used= &ins->next_used;
1028
ins->next_used= block;
1034
/* The LRU ring is empty. Let the block point to itself. */
1035
keycache->used_last= keycache->used_ins= block->next_used= block;
1036
block->prev_used= &block->next_used;
1042
Unlink a block from the LRU chain
1046
keycache pointer to a key cache data structure
1047
block pointer to the block to unlink from the LRU chain
1053
See NOTES for link_block
1056
static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1058
assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1059
assert(block->hash_link); /*backptr to block NULL from free_block()*/
1060
assert(!block->requests);
1061
assert(block->prev_changed && *block->prev_changed == block);
1062
assert(block->next_used && block->prev_used &&
1063
(block->next_used->prev_used == &block->next_used) &&
1064
(*block->prev_used == block));
1065
if (block->next_used == block)
1066
/* The list contains only one member */
1067
keycache->used_last= keycache->used_ins= NULL;
1070
block->next_used->prev_used= block->prev_used;
1071
*block->prev_used= block->next_used;
1072
if (keycache->used_last == block)
1073
keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1074
if (keycache->used_ins == block)
1075
keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1077
block->next_used= NULL;
1078
block->prev_used= NULL;
1083
Register requests for a block.
1087
keycache Pointer to a key cache data structure.
1088
block Pointer to the block to register a request on.
1089
count Number of requests. Always 1.
1092
The first request unlinks the block from the LRU ring. This means
1093
that it is protected against eveiction.
1098
static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1100
assert(block->status & BLOCK_IN_USE);
1101
assert(block->hash_link);
1103
if (!block->requests)
1104
unlink_block(keycache, block);
1105
block->requests+=count;
1110
Unregister request for a block
1111
linking it to the LRU chain if it's the last request
1115
keycache pointer to a key cache data structure
1116
block pointer to the block to link to the LRU chain
1117
at_end <-> to link the block at the end of the LRU chain
1123
Every linking to the LRU ring decrements by one a special block
1124
counter (if it's positive). If the at_end parameter is true the block is
1125
added either at the end of warm sub-chain or at the end of hot sub-chain.
1126
It is added to the hot subchain if its counter is zero and number of
1127
blocks in warm sub-chain is not less than some low limit (determined by
1128
the division_limit parameter). Otherwise the block is added to the warm
1129
sub-chain. If the at_end parameter is false the block is always added
1130
at beginning of the warm sub-chain.
1131
Thus a warm block can be promoted to the hot sub-chain when its counter
1132
becomes zero for the first time.
1133
At the same time the block at the very beginning of the hot subchain
1134
might be moved to the beginning of the warm subchain if it stays untouched
1135
for a too long time (this time is determined by parameter age_threshold).
1137
It is also possible that the block is selected for eviction and thus
1138
not linked in the LRU ring.
1141
static void unreg_request(KEY_CACHE *keycache,
1142
BLOCK_LINK *block, int at_end)
1144
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1145
assert(block->hash_link); /*backptr to block NULL from free_block()*/
1146
assert(block->requests);
1147
assert(block->prev_changed && *block->prev_changed == block);
1148
assert(!block->next_used);
1149
assert(!block->prev_used);
1150
if (! --block->requests)
1153
if (block->hits_left)
1155
hot= !block->hits_left && at_end &&
1156
keycache->warm_blocks > keycache->min_warm_blocks;
1159
if (block->temperature == BLOCK_WARM)
1160
keycache->warm_blocks--;
1161
block->temperature= BLOCK_HOT;
1163
link_block(keycache, block, hot, (bool)at_end);
1164
block->last_hit_time= keycache->keycache_time;
1165
keycache->keycache_time++;
1167
At this place, the block might be in the LRU ring or not. If an
1168
evicter was waiting for a block, it was selected for eviction and
1169
not linked in the LRU ring.
1173
Check if we should link a hot block to the warm block sub-chain.
1174
It is possible that we select the same block as above. But it can
1175
also be another block. In any case a block from the LRU ring is
1176
selected. In other words it works even if the above block was
1177
selected for eviction and not linked in the LRU ring. Since this
1178
happens only if the LRU ring is empty, the block selected below
1179
would be NULL and the rest of the function skipped.
1181
block= keycache->used_ins;
1182
if (block && keycache->keycache_time - block->last_hit_time >
1183
keycache->age_threshold)
1185
unlink_block(keycache, block);
1186
link_block(keycache, block, 0, 0);
1187
if (block->temperature != BLOCK_WARM)
1189
keycache->warm_blocks++;
1190
block->temperature= BLOCK_WARM;
1197
Remove a reader of the page in block
1200
static void remove_reader(BLOCK_LINK *block)
1202
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1203
assert(block->hash_link && block->hash_link->block == block);
1204
assert(block->prev_changed && *block->prev_changed == block);
1205
assert(!block->next_used);
1206
assert(!block->prev_used);
1207
assert(block->hash_link->requests);
1208
if (! --block->hash_link->requests && block->condvar)
1209
keycache_pthread_cond_signal(block->condvar);
1214
Wait until the last reader of the page in block
1215
signals on its termination
1218
static void wait_for_readers(KEY_CACHE *keycache,
1221
struct st_my_thread_var *thread= my_thread_var;
1222
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1223
assert(!(block->status & (BLOCK_ERROR | BLOCK_IN_FLUSH |
1225
assert(block->hash_link);
1226
assert(block->hash_link->block == block);
1227
/* Linked in file_blocks or changed_blocks hash. */
1228
assert(block->prev_changed && *block->prev_changed == block);
1229
/* Not linked in LRU ring. */
1230
assert(!block->next_used);
1231
assert(!block->prev_used);
1232
while (block->hash_link->requests)
1234
/* There must be no other waiter. We have no queue here. */
1235
assert(!block->condvar);
1236
block->condvar= &thread->suspend;
1237
keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1238
block->condvar= NULL;
1244
280
Add a hash link to a bucket in the hash_table
1258
Remove a hash link from the hash table
1261
static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1263
assert(hash_link->requests == 0);
1264
if ((*hash_link->prev= hash_link->next))
1265
hash_link->next->prev= hash_link->prev;
1266
hash_link->block= NULL;
1267
if (keycache->waiting_for_hash_link.last_thread)
1269
/* Signal that a free hash link has appeared */
1270
struct st_my_thread_var *last_thread=
1271
keycache->waiting_for_hash_link.last_thread;
1272
struct st_my_thread_var *first_thread= last_thread->next;
1273
struct st_my_thread_var *next_thread= first_thread;
1274
KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1275
struct st_my_thread_var *thread;
1277
hash_link->file= first_page->file;
1278
hash_link->diskpos= first_page->filepos;
1281
KEYCACHE_PAGE *page;
1282
thread= next_thread;
1283
page= (KEYCACHE_PAGE *) thread->opt_info;
1284
next_thread= thread->next;
1286
We notify about the event all threads that ask
1287
for the same page as the first thread in the queue
1289
if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1291
keycache_pthread_cond_signal(&thread->suspend);
1292
unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1295
while (thread != last_thread);
1296
link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1297
hash_link->diskpos)],
1301
hash_link->next= keycache->free_hash_list;
1302
keycache->free_hash_list= hash_link;
1307
Get the hash link for a page
1310
static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1311
int file, my_off_t filepos)
1313
register HASH_LINK *hash_link, **start;
1317
Find the bucket in the hash table for the pair (file, filepos);
1318
start contains the head of the bucket list,
1319
hash_link points to the first member of the list
1321
hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1322
/* Look for an element for the pair (file, filepos) in the bucket chain */
1324
(hash_link->diskpos != filepos || hash_link->file != file))
1326
hash_link= hash_link->next;
1330
/* There is no hash link in the hash table for the pair (file, filepos) */
1331
if (keycache->free_hash_list)
1333
hash_link= keycache->free_hash_list;
1334
keycache->free_hash_list= hash_link->next;
1336
else if (keycache->hash_links_used < keycache->hash_links)
1338
hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1342
/* Wait for a free hash link */
1343
struct st_my_thread_var *thread= my_thread_var;
1346
page.filepos= filepos;
1347
thread->opt_info= (void *) &page;
1348
link_into_queue(&keycache->waiting_for_hash_link, thread);
1349
keycache_pthread_cond_wait(&thread->suspend,
1350
&keycache->cache_lock);
1351
thread->opt_info= NULL;
1354
hash_link->file= file;
1355
hash_link->diskpos= filepos;
1356
link_hash(start, hash_link);
1358
/* Register the request for the page */
1359
hash_link->requests++;
1366
Get a block for the file page requested by a keycache read/write operation;
1367
If the page is not in the cache return a free block, if there is none
1368
return the lru block after saving its buffer if the page is dirty.
1373
keycache pointer to a key cache data structure
1374
file handler for the file to read page from
1375
filepos position of the page in the file
1376
init_hits_left how initialize the block counter for the page
1377
wrmode <-> get for writing
1378
page_st out {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1381
Pointer to the found block if successful, 0 - otherwise
1384
For the page from file positioned at filepos the function checks whether
1385
the page is in the key cache specified by the first parameter.
1386
If this is the case it immediately returns the block.
1387
If not, the function first chooses a block for this page. If there is
1388
no not used blocks in the key cache yet, the function takes the block
1389
at the very beginning of the warm sub-chain. It saves the page in that
1390
block if it's dirty before returning the pointer to it.
1391
The function returns in the page_st parameter the following values:
1392
PAGE_READ - if page already in the block,
1393
PAGE_TO_BE_READ - if it is to be read yet by the current thread
1394
WAIT_TO_BE_READ - if it is to be read by another thread
1395
If an error occurs THE BLOCK_ERROR bit is set in the block status.
1396
It might happen that there are no blocks in LRU chain (in warm part) -
1397
all blocks are unlinked for some read/write operations. Then the function
1398
waits until first of this operations links any block back.
1401
static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1402
File file, my_off_t filepos,
1404
int wrmode, int *page_st)
1406
HASH_LINK *hash_link;
1413
If the flush phase of a resize operation fails, the cache is left
1414
unusable. This will be detected only after "goto restart".
1416
if (!keycache->can_be_used)
1420
Find the hash_link for the requested file block (file, filepos). We
1421
do always get a hash_link here. It has registered our request so
1422
that no other thread can use it for another file block until we
1423
release the request (which is done by remove_reader() usually). The
1424
hash_link can have a block assigned to it or not. If there is a
1425
block, it may be assigned to this hash_link or not. In cases where a
1426
block is evicted from the cache, it is taken from the LRU ring and
1427
referenced by the new hash_link. But the block can still be assigned
1428
to its old hash_link for some time if it needs to be flushed first,
1429
or if there are other threads still reading it.
1432
hash_link is always returned.
1433
hash_link->block can be:
1435
- not assigned to this hash_link or
1436
- assigned to this hash_link. If assigned, the block can have
1437
- invalid data (when freshly assigned) or
1438
- valid data. Valid data can be
1439
- changed over the file contents (dirty) or
1440
- not changed (clean).
1442
hash_link= get_hash_link(keycache, file, filepos);
1443
assert((hash_link->file == file) && (hash_link->diskpos == filepos));
1446
if ((block= hash_link->block) &&
1447
block->hash_link == hash_link && (block->status & BLOCK_READ))
1449
/* Assigned block with valid (changed or unchanged) contents. */
1450
page_status= PAGE_READ;
1453
else (page_status == -1)
1455
- block not assigned to this hash_link or
1456
- block assigned but not yet read from file (invalid data).
1459
if (keycache->in_resize)
1461
/* This is a request during a resize operation */
1465
struct st_my_thread_var *thread;
1468
The file block is not in the cache. We don't need it in the
1469
cache: we are going to read or write directly to file. Cancel
1470
the request. We can simply decrement hash_link->requests because
1471
we did not release cache_lock since increasing it. So no other
1472
thread can wait for our request to become released.
1474
if (hash_link->requests == 1)
1477
We are the only one to request this hash_link (this file/pos).
1480
hash_link->requests--;
1481
unlink_hash(keycache, hash_link);
1486
More requests on the hash_link. Someone tries to evict a block
1487
for this hash_link (could have started before resizing started).
1488
This means that the LRU ring is empty. Otherwise a block could
1489
be assigned immediately. Behave like a thread that wants to
1490
evict a block for this file/pos. Add to the queue of threads
1491
waiting for a block. Wait until there is one assigned.
1493
Refresh the request on the hash-link so that it cannot be reused
1494
for another file/pos.
1496
thread= my_thread_var;
1497
thread->opt_info= (void *) hash_link;
1498
link_into_queue(&keycache->waiting_for_block, thread);
1501
keycache_pthread_cond_wait(&thread->suspend,
1502
&keycache->cache_lock);
1503
} while (thread->next);
1504
thread->opt_info= NULL;
1506
A block should now be assigned to the hash_link. But it may
1507
still need to be evicted. Anyway, we should re-check the
1508
situation. page_status must be set correctly.
1510
hash_link->requests--;
1512
} /* end of if (!block) */
1515
There is a block for this file/pos in the cache. Register a
1516
request on it. This unlinks it from the LRU ring (if it is there)
1517
and hence protects it against eviction (if not already in
1518
eviction). We need this for returning the block to the caller, for
1519
calling remove_reader() (for debugging purposes), and for calling
1520
free_block(). The only case where we don't need the request is if
1521
the block is in eviction. In that case we have to unregister the
1524
reg_requests(keycache, block, 1);
1526
if (page_status != PAGE_READ)
1529
- block not assigned to this hash_link or
1530
- block assigned but not yet read from file (invalid data).
1532
This must be a block in eviction. It will be read soon. We need
1533
to wait here until this happened. Otherwise the caller could
1534
access a wrong block or a block which is in read. While waiting
1535
we cannot lose hash_link nor block. We have registered a request
1536
on the hash_link. Everything can happen to the block but changes
1537
in the hash_link -> block relationship. In other words:
1538
everything can happen to the block but free or another completed
1541
Note that we bahave like a secondary requestor here. We just
1542
cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1543
read requests and writes on dirty blocks that are not in flush
1544
only. Waiting here on COND_FOR_REQUESTED works in all
1547
assert(((block->hash_link != hash_link) &&
1548
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1549
((block->hash_link == hash_link) &&
1550
!(block->status & BLOCK_READ)));
1551
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
1553
Here we can trust that the block has been assigned to this
1554
hash_link (block->hash_link == hash_link) and read into the
1555
buffer (BLOCK_READ). The worst things possible here are that the
1556
block is in free (BLOCK_REASSIGNED). But the block is still
1557
assigned to the hash_link. The freeing thread waits until we
1558
release our request on the hash_link. The block must not be
1559
again in eviction because we registered an request on it before
1562
assert(block->hash_link == hash_link);
1563
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1564
assert(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1567
The block is in the cache. Assigned to the hash_link. Valid data.
1568
Note that in case of page_st == PAGE_READ, the block can be marked
1569
for eviction. In any case it can be marked for freeing.
1574
/* A reader can just read the block. */
1575
*page_st= PAGE_READ;
1576
assert((hash_link->file == file) &&
1577
(hash_link->diskpos == filepos) &&
1578
(block->hash_link == hash_link));
1583
This is a writer. No two writers for the same block can exist.
1584
This must be assured by locks outside of the key cache.
1586
assert(!(block->status & BLOCK_FOR_UPDATE));
1588
while (block->status & BLOCK_IN_FLUSH)
1591
Wait until the block is flushed to file. Do not release the
1592
request on the hash_link yet to prevent that the block is freed
1593
or reassigned while we wait. While we wait, several things can
1594
happen to the block, including another flush. But the block
1595
cannot be reassigned to another hash_link until we release our
1596
request on it. But it can be marked BLOCK_REASSIGNED from free
1597
or eviction, while they wait for us to release the hash_link.
1599
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1601
If the flush phase failed, the resize could have finished while
1604
if (!keycache->in_resize)
1606
remove_reader(block);
1607
unreg_request(keycache, block, 1);
1610
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1611
assert(!(block->status & BLOCK_FOR_UPDATE));
1612
assert(block->hash_link == hash_link);
1615
if (block->status & BLOCK_CHANGED)
1618
We want to write a block with changed contents. If the cache
1619
block size is bigger than the callers block size (e.g. MyISAM),
1620
the caller may replace part of the block only. Changes of the
1621
other part of the block must be preserved. Since the block has
1622
not yet been selected for flush, we can still add our changes.
1624
*page_st= PAGE_READ;
1625
assert((hash_link->file == file) &&
1626
(hash_link->diskpos == filepos) &&
1627
(block->hash_link == hash_link));
1632
This is a write request for a clean block. We do not want to have
1633
new dirty blocks in the cache while resizing. We will free the
1634
block and write directly to file. If the block is in eviction or
1635
in free, we just let it go.
1637
Unregister from the hash_link. This must be done before freeing
1638
the block. And it must be done if not freeing the block. Because
1639
we could have waited above, we need to call remove_reader(). Other
1640
threads could wait for us to release our request on the hash_link.
1642
remove_reader(block);
1644
/* If the block is not in eviction and not in free, we can free it. */
1645
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1649
Free block as we are going to write directly to file.
1650
Although we have an exlusive lock for the updated key part,
1651
the control can be yielded by the current thread as we might
1652
have unfinished readers of other key parts in the block
1653
buffer. Still we are guaranteed not to have any readers
1654
of the key part we are writing into until the block is
1655
removed from the cache as we set the BLOCK_REASSIGNED
1656
flag (see the code below that handles reading requests).
1658
free_block(keycache, block);
1663
The block will be evicted/freed soon. Don't touch it in any way.
1664
Unregister the request that we registered above.
1666
unreg_request(keycache, block, 1);
1669
The block is still assigned to the hash_link (the file/pos that
1670
we are going to write to). Wait until the eviction/free is
1671
complete. Otherwise the direct write could complete before all
1672
readers are done with the block. So they could read outdated
1675
Since we released our request on the hash_link, it can be reused
1676
for another file/pos. Hence we cannot just check for
1677
block->hash_link == hash_link. As long as the resize is
1678
proceeding the block cannot be reassigned to the same file/pos
1679
again. So we can terminate the loop when the block is no longer
1680
assigned to this file/pos.
1684
wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1685
&keycache->cache_lock);
1687
If the flush phase failed, the resize could have finished
1688
while we waited here.
1690
if (!keycache->in_resize)
1692
} while (block->hash_link &&
1693
(block->hash_link->file == file) &&
1694
(block->hash_link->diskpos == filepos));
1699
if (page_status == PAGE_READ &&
1700
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1704
This is a request for a block to be removed from cache. The block
1705
is assigned to this hash_link and contains valid data, but is
1706
marked for eviction or to be freed. Possible reasons why it has
1707
not yet been evicted/freed can be a flush before reassignment
1708
(BLOCK_IN_SWITCH), readers of the block have not finished yet
1709
(BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1710
the block has been selected for it (BLOCK_IN_EVICTION).
1712
Only reading requests can proceed until the old dirty page is flushed,
1713
all others are to be suspended, then resubmitted
1715
if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1718
This is a read request and the block not yet reassigned. We can
1719
register our request and proceed. This unlinks the block from
1720
the LRU ring and protects it against eviction.
1722
reg_requests(keycache, block, 1);
1727
Either this is a write request for a block that is in eviction
1728
or in free. We must not use it any more. Instead we must evict
1729
another block. But we cannot do this before the eviction/free is
1730
done. Otherwise we would find the same hash_link + block again
1733
Or this is a read request for a block in eviction/free that does
1734
not require a flush, but waits for readers to finish with the
1735
block. We do not read this block to let the eviction/free happen
1736
as soon as possible. Again we must wait so that we don't find
1737
the same hash_link + block again and again.
1739
assert(hash_link->requests);
1740
hash_link->requests--;
1741
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1743
The block is no longer assigned to this hash_link.
1752
This is a request for a new block or for a block not to be removed.
1755
- block not assigned to this hash_link or
1756
- block assigned but not yet read from file,
1758
- block assigned with valid (changed or unchanged) data and
1759
- it will not be reassigned/freed.
1763
/* No block is assigned to the hash_link yet. */
1764
if (keycache->blocks_unused)
1766
if (keycache->free_block_list)
1768
/* There is a block in the free list. */
1769
block= keycache->free_block_list;
1770
keycache->free_block_list= block->next_used;
1771
block->next_used= NULL;
1775
/* There are some never used blocks, take first of them */
1776
assert(keycache->blocks_used <
1777
(uint32_t) keycache->disk_blocks);
1778
block= &keycache->block_root[keycache->blocks_used];
1779
block->buffer= ADD_TO_PTR(keycache->block_mem,
1780
((uint32_t) keycache->blocks_used*
1781
keycache->key_cache_block_size),
1783
keycache->blocks_used++;
1784
assert(!block->next_used);
1786
assert(!block->prev_used);
1787
assert(!block->next_changed);
1788
assert(!block->prev_changed);
1789
assert(!block->hash_link);
1790
assert(!block->status);
1791
assert(!block->requests);
1792
keycache->blocks_unused--;
1793
block->status= BLOCK_IN_USE;
1795
block->offset= keycache->key_cache_block_size;
1797
block->temperature= BLOCK_COLD;
1798
block->hits_left= init_hits_left;
1799
block->last_hit_time= 0;
1800
block->hash_link= hash_link;
1801
hash_link->block= block;
1802
link_to_file_list(keycache, block, file, 0);
1803
page_status= PAGE_TO_BE_READ;
1808
There are no free blocks and no never used blocks, use a block
1812
if (! keycache->used_last)
1815
The LRU ring is empty. Wait until a new block is added to
1816
it. Several threads might wait here for the same hash_link,
1817
all of them must get the same block. While waiting for a
1818
block, after a block is selected for this hash_link, other
1819
threads can run first before this one awakes. During this
1820
time interval other threads find this hash_link pointing to
1821
the block, which is still assigned to another hash_link. In
1822
this case the block is not marked BLOCK_IN_SWITCH yet, but
1823
it is marked BLOCK_IN_EVICTION.
1826
struct st_my_thread_var *thread= my_thread_var;
1827
thread->opt_info= (void *) hash_link;
1828
link_into_queue(&keycache->waiting_for_block, thread);
1831
keycache_pthread_cond_wait(&thread->suspend,
1832
&keycache->cache_lock);
1834
while (thread->next);
1835
thread->opt_info= NULL;
1836
/* Assert that block has a request registered. */
1837
assert(hash_link->block->requests);
1838
/* Assert that block is not in LRU ring. */
1839
assert(!hash_link->block->next_used);
1840
assert(!hash_link->block->prev_used);
1843
If we waited above, hash_link->block has been assigned by
1844
link_block(). Otherwise it is still NULL. In the latter case
1845
we need to grab a block from the LRU ring ourselves.
1847
block= hash_link->block;
1850
/* Select the last block from the LRU ring. */
1851
block= keycache->used_last->next_used;
1852
block->hits_left= init_hits_left;
1853
block->last_hit_time= 0;
1854
hash_link->block= block;
1856
Register a request on the block. This unlinks it from the
1857
LRU ring and protects it against eviction.
1859
assert(!block->requests);
1860
reg_requests(keycache, block,1);
1862
We do not need to set block->status|= BLOCK_IN_EVICTION here
1863
because we will set block->status|= BLOCK_IN_SWITCH
1864
immediately without releasing the lock in between. This does
1865
also support debugging. When looking at the block, one can
1866
see if the block has been selected by link_block() after the
1867
LRU ring was empty, or if it was grabbed directly from the
1868
LRU ring in this branch.
1873
If we had to wait above, there is a small chance that another
1874
thread grabbed this block for the same file block already. But
1875
in most cases the first condition is true.
1877
if (block->hash_link != hash_link &&
1878
! (block->status & BLOCK_IN_SWITCH) )
1880
/* this is a primary request for a new page */
1881
block->status|= BLOCK_IN_SWITCH;
1883
if (block->status & BLOCK_CHANGED)
1885
/* The block contains a dirty page - push it out of the cache */
1887
if (block->status & BLOCK_IN_FLUSH)
1890
The block is marked for flush. If we do not wait here,
1891
it could happen that we write the block, reassign it to
1892
another file block, then, before the new owner can read
1893
the new file block, the flusher writes the cache block
1894
(which still has the old contents) to the new file block!
1896
wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1897
&keycache->cache_lock);
1899
The block is marked BLOCK_IN_SWITCH. It should be left
1900
alone except for reading. No free, no write.
1902
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1903
assert(!(block->status & (BLOCK_REASSIGNED |
1905
BLOCK_FOR_UPDATE)));
1909
block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
1911
BLOCK_IN_EVICTION may be true or not. Other flags must
1914
assert((block->status & ~BLOCK_IN_EVICTION) ==
1915
(BLOCK_READ | BLOCK_IN_SWITCH |
1916
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1917
BLOCK_CHANGED | BLOCK_IN_USE));
1918
assert(block->hash_link);
1920
keycache_pthread_mutex_unlock(&keycache->cache_lock);
1922
The call is thread safe because only the current
1923
thread might change the block->hash_link value
1925
error= (pwrite(block->hash_link->file,
1926
block->buffer+block->offset,
1927
block->length - block->offset,
1928
block->hash_link->diskpos+ block->offset) == 0);
1929
keycache_pthread_mutex_lock(&keycache->cache_lock);
1931
/* Block status must not have changed. */
1932
assert((block->status & ~BLOCK_IN_EVICTION) ==
1933
(BLOCK_READ | BLOCK_IN_SWITCH |
1934
BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1935
BLOCK_CHANGED | BLOCK_IN_USE));
1936
keycache->global_cache_write++;
1940
block->status|= BLOCK_REASSIGNED;
1942
The block comes from the LRU ring. It must have a hash_link
1945
assert(block->hash_link);
1946
if (block->hash_link)
1949
All pending requests for this page must be resubmitted.
1950
This must be done before waiting for readers. They could
1951
wait for the flush to complete. And we must also do it
1952
after the wait. Flushers might try to free the block while
1953
we wait. They would wait until the reassignment is
1954
complete. Also the block status must reflect the correct
1955
situation: The block is not changed nor in flush any more.
1956
Note that we must not change the BLOCK_CHANGED flag
1957
outside of link_to_file_list() so that it is always in the
1958
correct queue and the *blocks_changed counters are
1961
block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
1962
link_to_file_list(keycache, block, block->hash_link->file, 1);
1963
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1965
The block is still assigned to its old hash_link.
1966
Wait until all pending read requests
1967
for this page are executed
1968
(we could have avoided this waiting, if we had read
1969
a page in the cache in a sweep, without yielding control)
1971
wait_for_readers(keycache, block);
1972
assert(block->hash_link && block->hash_link->block == block &&
1973
block->prev_changed);
1974
/* The reader must not have been a writer. */
1975
assert(!(block->status & BLOCK_CHANGED));
1977
/* Wake flushers that might have found the block in between. */
1978
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1980
/* Remove the hash link for the old file block from the hash. */
1981
unlink_hash(keycache, block->hash_link);
1984
For sanity checks link_to_file_list() asserts that block
1985
and hash_link refer to each other. Hence we need to assign
1986
the hash_link first, but then we would not know if it was
1987
linked before. Hence we would not know if to unlink it. So
1988
unlink it here and call link_to_file_list(..., false).
1990
unlink_changed(block);
1992
block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
1994
block->offset= keycache->key_cache_block_size;
1995
block->hash_link= hash_link;
1996
link_to_file_list(keycache, block, file, 0);
1997
page_status= PAGE_TO_BE_READ;
1999
assert(block->hash_link->block == block);
2000
assert(hash_link->block->hash_link == hash_link);
2005
Either (block->hash_link == hash_link),
2006
or (block->status & BLOCK_IN_SWITCH).
2008
This is for secondary requests for a new file block only.
2009
Either it is already assigned to the new hash_link meanwhile
2010
(if we had to wait due to empty LRU), or it is already in
2011
eviction by another thread. Since this block has been
2012
grabbed from the LRU ring and attached to this hash_link,
2013
another thread cannot grab the same block from the LRU ring
2014
anymore. If the block is in eviction already, it must become
2015
attached to the same hash_link and as such destined for the
2018
page_status= (((block->hash_link == hash_link) &&
2019
(block->status & BLOCK_READ)) ?
2020
PAGE_READ : PAGE_WAIT_TO_BE_READ);
2027
Block is not NULL. This hash_link points to a block.
2029
- block not assigned to this hash_link (yet) or
2030
- block assigned but not yet read from file,
2032
- block assigned with valid (changed or unchanged) data and
2033
- it will not be reassigned/freed.
2035
The first condition means hash_link points to a block in
2036
eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2037
But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2038
link_block(). In both cases it is destined for this hash_link
2039
and its file block address. When this hash_link got its block
2040
address, the block was removed from the LRU ring and cannot be
2041
selected for eviction (for another hash_link) again.
2043
Register a request on the block. This is another protection
2046
assert(((block->hash_link != hash_link) &&
2047
(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2048
((block->hash_link == hash_link) &&
2049
!(block->status & BLOCK_READ)) ||
2050
((block->status & BLOCK_READ) &&
2051
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2052
reg_requests(keycache, block, 1);
2053
page_status= (((block->hash_link == hash_link) &&
2054
(block->status & BLOCK_READ)) ?
2055
PAGE_READ : PAGE_WAIT_TO_BE_READ);
2059
assert(page_status != -1);
2060
/* Same assert basically, but be very sure. */
2062
/* Assert that block has a request and is not in LRU ring. */
2063
assert(block->requests);
2064
assert(!block->next_used);
2065
assert(!block->prev_used);
2066
/* Assert that we return the correct block. */
2067
assert((page_status == PAGE_WAIT_TO_BE_READ) ||
2068
((block->hash_link->file == file) &&
2069
(block->hash_link->diskpos == filepos)));
2070
*page_st=page_status;
2077
Read into a key cache block buffer from disk.
2082
keycache pointer to a key cache data structure
2083
block block to which buffer the data is to be read
2084
read_length size of data to be read
2085
min_length at least so much data must be read
2086
primary <-> the current thread will read the data
2092
The function either reads a page data from file to the block buffer,
2093
or waits until another thread reads it. What page to read is determined
2094
by a block parameter - reference to a hash link for this page.
2095
If an error occurs THE BLOCK_ERROR bit is set in the block status.
2096
We do not report error when the size of successfully read
2097
portion is less than read_length, but not less than min_length.
2100
static void read_block(KEY_CACHE *keycache,
2101
BLOCK_LINK *block, uint read_length,
2102
uint min_length, bool primary)
2106
/* On entry cache_lock is locked */
2111
This code is executed only by threads that submitted primary
2112
requests. Until block->status contains BLOCK_READ, all other
2113
request for the block become secondary requests. For a primary
2114
request the block must be properly initialized.
2116
assert(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE));
2117
assert((block->length == 0));
2118
assert((block->offset == keycache->key_cache_block_size));
2119
assert((block->requests > 0));
2121
keycache->global_cache_read++;
2122
/* Page is not in buffer yet, is to be read from disk */
2123
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2125
Here other threads may step in and register as secondary readers.
2126
They will register in block->wqueue[COND_FOR_REQUESTED].
2128
got_length= pread(block->hash_link->file, block->buffer, read_length, block->hash_link->diskpos);
2129
keycache_pthread_mutex_lock(&keycache->cache_lock);
2131
The block can now have been marked for free (in case of
2132
FLUSH_RELEASE). Otherwise the state must be unchanged.
2134
assert(((block->status & ~(BLOCK_REASSIGNED |
2135
BLOCK_FOR_UPDATE)) == BLOCK_IN_USE));
2136
assert((block->length == 0));
2137
assert((block->offset == keycache->key_cache_block_size));
2138
assert((block->requests > 0));
2140
if (got_length < min_length)
2141
block->status|= BLOCK_ERROR;
2144
block->status|= BLOCK_READ;
2145
block->length= got_length;
2147
Do not set block->offset here. If this block is marked
2148
BLOCK_CHANGED later, we want to flush only the modified part. So
2149
only a writer may set block->offset down from
2150
keycache->key_cache_block_size.
2153
/* Signal that all pending requests for this page now can be processed */
2154
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2159
This code is executed only by threads that submitted secondary
2160
requests. At this point it could happen that the cache block is
2161
not yet assigned to the hash_link for the requested file block.
2162
But at awake from the wait this should be the case. Unfortunately
2163
we cannot assert this here because we do not know the hash_link
2164
for the requested file block nor the file and position. So we have
2165
to assert this in the caller.
2167
wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2173
294
Read a block of data from a cached file into a buffer;
2197
318
have to be a multiple of key_cache_block_size;
2200
uchar *key_cache_read(KEY_CACHE *keycache,
2201
File file, my_off_t filepos, int level,
2202
uchar *buff, uint length,
2203
uint block_length __attribute__((unused)),
2204
int return_buffer __attribute__((unused)))
321
unsigned char *key_cache_read(KEY_CACHE *keycache,
322
int file, internal::my_off_t filepos, int level,
323
unsigned char *buff, uint32_t length,
324
uint32_t block_length,
2206
bool locked_and_incremented= false;
2210
if (keycache->key_cache_inited)
2212
/* Key cache is used */
2213
register BLOCK_LINK *block;
2220
When the key cache is once initialized, we use the cache_lock to
2221
reliably distinguish the cases of normal operation, resizing, and
2222
disabled cache. We always increment and decrement
2223
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2225
keycache_pthread_mutex_lock(&keycache->cache_lock);
2227
Cache resizing has two phases: Flushing and re-initializing. In
2228
the flush phase read requests are allowed to bypass the cache for
2229
blocks not in the cache. find_key_block() returns NULL in this
2232
After the flush phase new I/O requests must wait until the
2233
re-initialization is done. The re-initialization can be done only
2234
if no I/O request is in progress. The reason is that
2235
key_cache_block_size can change. With enabled cache, I/O is done
2236
in chunks of key_cache_block_size. Every chunk tries to use a
2237
cache block first. If the block size changes in the middle, a
2238
block could be missed and old data could be read.
2240
while (keycache->in_resize && !keycache->resize_in_flush)
2241
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2242
/* Register the I/O for the next resize. */
2243
inc_counter_for_resize_op(keycache);
2244
locked_and_incremented= true;
2245
/* Requested data may not always be aligned to cache blocks. */
2246
offset= (uint) (filepos % keycache->key_cache_block_size);
2247
/* Read data in key_cache_block_size increments */
2250
/* Cache could be disabled in a later iteration. */
2252
if (!keycache->can_be_used)
2254
/* Start reading at the beginning of the cache block. */
2256
/* Do not read beyond the end of the cache block. */
2257
read_length= length;
2258
set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2259
assert(read_length > 0);
2261
/* Request the cache block that matches file/pos. */
2262
keycache->global_cache_r_requests++;
2263
block=find_key_block(keycache, file, filepos, level, 0, &page_st);
2267
This happens only for requests submitted during key cache
2268
resize. The block is not in the cache and shall not go in.
2269
Read directly from file.
2271
keycache->global_cache_read++;
2272
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2273
error= (pread(file, (uchar*) buff, read_length, filepos + offset) == 0);
2274
keycache_pthread_mutex_lock(&keycache->cache_lock);
2277
if (!(block->status & BLOCK_ERROR))
2279
if (page_st != PAGE_READ)
2281
/* The requested page is to be read into the block buffer */
2282
read_block(keycache, block,
2283
keycache->key_cache_block_size, read_length+offset,
2284
(bool)(page_st == PAGE_TO_BE_READ));
2286
A secondary request must now have the block assigned to the
2287
requested file block. It does not hurt to check it for
2288
primary requests too.
2290
assert(keycache->can_be_used);
2291
assert(block->hash_link->file == file);
2292
assert(block->hash_link->diskpos == filepos);
2293
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2295
else if (block->length < read_length + offset)
2298
Impossible if nothing goes wrong:
2299
this could only happen if we are using a file with
2300
small key blocks and are trying to read outside the file
2303
block->status|= BLOCK_ERROR;
2307
/* block status may have added BLOCK_ERROR in the above 'if'. */
2308
if (!((status= block->status) & BLOCK_ERROR))
2311
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2312
#if !defined(SERIALIZED_READ_FROM_CACHE)
2313
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2316
/* Copy data from the cache buffer */
2317
memcpy(buff, block->buffer+offset, (size_t) read_length);
2319
#if !defined(SERIALIZED_READ_FROM_CACHE)
2320
keycache_pthread_mutex_lock(&keycache->cache_lock);
2321
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2326
remove_reader(block);
2329
Link the block into the LRU ring if it's the last submitted
2330
request for the block. This enables eviction for the block.
2332
unreg_request(keycache, block, 1);
2334
if (status & BLOCK_ERROR)
2342
filepos+= read_length+offset;
2345
} while ((length-= read_length));
2350
/* Key cache is not used */
2352
keycache->global_cache_r_requests++;
2353
keycache->global_cache_read++;
2355
if (locked_and_incremented)
2356
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2357
if (pread(file, (uchar*) buff, length, filepos))
331
unsigned char *start= buff;
333
assert (! keycache->key_cache_inited);
335
if (!pread(file, (unsigned char*) buff, length, filepos))
2359
if (locked_and_incremented)
2360
keycache_pthread_mutex_lock(&keycache->cache_lock);
2363
if (locked_and_incremented)
2365
dec_counter_for_resize_op(keycache);
2366
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2368
return(error ? (uchar*) 0 : start);
337
return(error ? (unsigned char*) 0 : start);
2392
361
int key_cache_insert(KEY_CACHE *keycache,
2393
File file, my_off_t filepos, int level,
2394
uchar *buff, uint length)
362
int file, internal::my_off_t filepos, int level,
363
unsigned char *buff, uint32_t length)
2398
if (keycache->key_cache_inited)
2400
/* Key cache is used */
2401
register BLOCK_LINK *block;
2405
bool locked_and_incremented= false;
2408
When the keycache is once initialized, we use the cache_lock to
2409
reliably distinguish the cases of normal operation, resizing, and
2410
disabled cache. We always increment and decrement
2411
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2413
keycache_pthread_mutex_lock(&keycache->cache_lock);
2415
We do not load index data into a disabled cache nor into an
2418
if (!keycache->can_be_used || keycache->in_resize)
2420
/* Register the pseudo I/O for the next resize. */
2421
inc_counter_for_resize_op(keycache);
2422
locked_and_incremented= true;
2423
/* Loaded data may not always be aligned to cache blocks. */
2424
offset= (uint) (filepos % keycache->key_cache_block_size);
2425
/* Load data in key_cache_block_size increments. */
2428
/* Cache could be disabled or resizing in a later iteration. */
2429
if (!keycache->can_be_used || keycache->in_resize)
2431
/* Start loading at the beginning of the cache block. */
2433
/* Do not load beyond the end of the cache block. */
2434
read_length= length;
2435
set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2436
assert(read_length > 0);
2438
/* The block has been read by the caller already. */
2439
keycache->global_cache_read++;
2440
/* Request the cache block that matches file/pos. */
2441
keycache->global_cache_r_requests++;
2442
block= find_key_block(keycache, file, filepos, level, 0, &page_st);
2446
This happens only for requests submitted during key cache
2447
resize. The block is not in the cache and shall not go in.
2448
Stop loading index data.
2452
if (!(block->status & BLOCK_ERROR))
2454
if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2455
((page_st == PAGE_TO_BE_READ) &&
2456
(offset || (read_length < keycache->key_cache_block_size))))
2461
this is a secondary request for a block to be read into the
2462
cache. The block is in eviction. It is not yet assigned to
2463
the requested file block (It does not point to the right
2464
hash_link). So we cannot call remove_reader() on the block.
2465
And we cannot access the hash_link directly here. We need to
2466
wait until the assignment is complete. read_block() executes
2467
the correct wait when called with primary == false.
2471
this is a primary request for a block to be read into the
2472
cache and the supplied data does not fill the whole block.
2474
This function is called on behalf of a LOAD INDEX INTO CACHE
2475
statement, which is a read-only task and allows other
2476
readers. It is possible that a parallel running reader tries
2477
to access this block. If it needs more data than has been
2478
supplied here, it would report an error. To be sure that we
2479
have all data in the block that is available in the file, we
2480
read the block ourselves.
2482
Though reading again what the caller did read already is an
2483
expensive operation, we need to do this for correctness.
2485
read_block(keycache, block, keycache->key_cache_block_size,
2486
read_length + offset, (page_st == PAGE_TO_BE_READ));
2488
A secondary request must now have the block assigned to the
2489
requested file block. It does not hurt to check it for
2490
primary requests too.
2492
assert(keycache->can_be_used);
2493
assert(block->hash_link->file == file);
2494
assert(block->hash_link->diskpos == filepos);
2495
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2497
else if (page_st == PAGE_TO_BE_READ)
2500
This is a new block in the cache. If we come here, we have
2501
data for the whole block.
2503
assert(block->hash_link->requests);
2504
assert(block->status & BLOCK_IN_USE);
2505
assert((page_st == PAGE_TO_BE_READ) ||
2506
(block->status & BLOCK_READ));
2508
#if !defined(SERIALIZED_READ_FROM_CACHE)
2509
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2511
Here other threads may step in and register as secondary readers.
2512
They will register in block->wqueue[COND_FOR_REQUESTED].
2516
/* Copy data from buff */
2517
memcpy(block->buffer+offset, buff, (size_t) read_length);
2519
#if !defined(SERIALIZED_READ_FROM_CACHE)
2520
keycache_pthread_mutex_lock(&keycache->cache_lock);
2521
assert(block->status & BLOCK_IN_USE);
2522
assert((page_st == PAGE_TO_BE_READ) ||
2523
(block->status & BLOCK_READ));
2526
After the data is in the buffer, we can declare the block
2527
valid. Now other threads do not need to register as
2528
secondary readers any more. They can immediately access the
2531
block->status|= BLOCK_READ;
2532
block->length= read_length+offset;
2534
Do not set block->offset here. If this block is marked
2535
BLOCK_CHANGED later, we want to flush only the modified part. So
2536
only a writer may set block->offset down from
2537
keycache->key_cache_block_size.
2539
/* Signal all pending requests. */
2540
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2545
page_st == PAGE_READ. The block is in the buffer. All data
2546
must already be present. Blocks are always read with all
2547
data available on file. Assert that the block does not have
2548
less contents than the preloader supplies. If the caller has
2549
data beyond block->length, it means that a file write has
2550
been done while this block was in cache and not extended
2551
with the new data. If the condition is met, we can simply
2554
assert((page_st == PAGE_READ) &&
2555
(read_length + offset <= block->length));
2559
A secondary request must now have the block assigned to the
2560
requested file block. It does not hurt to check it for primary
2563
assert(block->hash_link->file == file);
2564
assert(block->hash_link->diskpos == filepos);
2565
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2566
} /* end of if (!(block->status & BLOCK_ERROR)) */
2569
remove_reader(block);
2572
Link the block into the LRU ring if it's the last submitted
2573
request for the block. This enables eviction for the block.
2575
unreg_request(keycache, block, 1);
2577
error= (block->status & BLOCK_ERROR);
2583
filepos+= read_length+offset;
2586
} while ((length-= read_length));
2589
if (locked_and_incremented)
2590
dec_counter_for_resize_op(keycache);
2591
keycache_pthread_mutex_unlock(&keycache->cache_lock);
371
assert (!keycache->key_cache_inited);
2626
405
int key_cache_write(KEY_CACHE *keycache,
2627
File file, my_off_t filepos, int level,
2628
uchar *buff, uint length,
2629
uint block_length __attribute__((unused)),
406
int file, internal::my_off_t filepos, int level,
407
unsigned char *buff, uint32_t length,
408
uint32_t block_length,
2632
bool locked_and_incremented= false;
2635
415
if (!dont_write)
2637
/* purecov: begin inspected */
2638
417
/* Not used in the server. */
2639
418
/* Force writing from buff into disk. */
2640
keycache->global_cache_w_requests++;
2641
keycache->global_cache_write++;
2642
419
if (pwrite(file, buff, length, filepos) == 0)
2647
if (keycache->key_cache_inited)
2649
/* Key cache is used */
2650
register BLOCK_LINK *block;
2656
When the key cache is once initialized, we use the cache_lock to
2657
reliably distinguish the cases of normal operation, resizing, and
2658
disabled cache. We always increment and decrement
2659
'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2661
keycache_pthread_mutex_lock(&keycache->cache_lock);
2663
Cache resizing has two phases: Flushing and re-initializing. In
2664
the flush phase write requests can modify dirty blocks that are
2665
not yet in flush. Otherwise they are allowed to bypass the cache.
2666
find_key_block() returns NULL in both cases (clean blocks and
2669
After the flush phase new I/O requests must wait until the
2670
re-initialization is done. The re-initialization can be done only
2671
if no I/O request is in progress. The reason is that
2672
key_cache_block_size can change. With enabled cache I/O is done in
2673
chunks of key_cache_block_size. Every chunk tries to use a cache
2674
block first. If the block size changes in the middle, a block
2675
could be missed and data could be written below a cached block.
2677
while (keycache->in_resize && !keycache->resize_in_flush)
2678
wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2679
/* Register the I/O for the next resize. */
2680
inc_counter_for_resize_op(keycache);
2681
locked_and_incremented= true;
2682
/* Requested data may not always be aligned to cache blocks. */
2683
offset= (uint) (filepos % keycache->key_cache_block_size);
2684
/* Write data in key_cache_block_size increments. */
2687
/* Cache could be disabled in a later iteration. */
2688
if (!keycache->can_be_used)
2690
/* Start writing at the beginning of the cache block. */
2692
/* Do not write beyond the end of the cache block. */
2693
read_length= length;
2694
set_if_smaller(read_length, keycache->key_cache_block_size-offset);
2695
assert(read_length > 0);
2697
/* Request the cache block that matches file/pos. */
2698
keycache->global_cache_w_requests++;
2699
block= find_key_block(keycache, file, filepos, level, 1, &page_st);
2703
This happens only for requests submitted during key cache
2704
resize. The block is not in the cache and shall not go in.
2705
Write directly to file.
2709
/* Used in the server. */
2710
keycache->global_cache_write++;
2711
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2712
if (pwrite(file, (uchar*) buff, read_length, filepos + offset) == 0)
2714
keycache_pthread_mutex_lock(&keycache->cache_lock);
2719
Prevent block from flushing and from being selected for to be
2720
freed. This must be set when we release the cache_lock.
2721
However, we must not set the status of the block before it is
2722
assigned to this file/pos.
2724
if (page_st != PAGE_WAIT_TO_BE_READ)
2725
block->status|= BLOCK_FOR_UPDATE;
2727
We must read the file block first if it is not yet in the cache
2728
and we do not replace all of its contents.
2730
In cases where the cache block is big enough to contain (parts
2731
of) index blocks of different indexes, our request can be
2732
secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
2733
reading the file block. If the read completes after us, it
2734
overwrites our new contents with the old contents. So we have to
2735
wait for the other thread to complete the read of this block.
2736
read_block() takes care for the wait.
2738
if (!(block->status & BLOCK_ERROR) &&
2739
((page_st == PAGE_TO_BE_READ &&
2740
(offset || read_length < keycache->key_cache_block_size)) ||
2741
(page_st == PAGE_WAIT_TO_BE_READ)))
2743
read_block(keycache, block,
2744
offset + read_length >= keycache->key_cache_block_size?
2745
offset : keycache->key_cache_block_size,
2746
offset, (page_st == PAGE_TO_BE_READ));
2747
assert(keycache->can_be_used);
2748
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2750
Prevent block from flushing and from being selected for to be
2751
freed. This must be set when we release the cache_lock.
2752
Here we set it in case we could not set it above.
2754
block->status|= BLOCK_FOR_UPDATE;
2757
The block should always be assigned to the requested file block
2758
here. It need not be BLOCK_READ when overwriting the whole block.
2760
assert(block->hash_link->file == file);
2761
assert(block->hash_link->diskpos == filepos);
2762
assert(block->status & BLOCK_IN_USE);
2763
assert((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
2765
The block to be written must not be marked BLOCK_REASSIGNED.
2766
Otherwise it could be freed in dirty state or reused without
2767
another flush during eviction. It must also not be in flush.
2768
Otherwise the old contens may have been flushed already and
2769
the flusher could clear BLOCK_CHANGED without flushing the
2772
assert(!(block->status & BLOCK_REASSIGNED));
2774
while (block->status & BLOCK_IN_FLUSHWRITE)
2777
Another thread is flushing the block. It was dirty already.
2778
Wait until the block is flushed to file. Otherwise we could
2779
modify the buffer contents just while it is written to file.
2780
An unpredictable file block contents would be the result.
2781
While we wait, several things can happen to the block,
2782
including another flush. But the block cannot be reassigned to
2783
another hash_link until we release our request on it.
2785
wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
2786
assert(keycache->can_be_used);
2787
assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
2788
/* Still must not be marked for free. */
2789
assert(!(block->status & BLOCK_REASSIGNED));
2790
assert(block->hash_link && (block->hash_link->block == block));
2794
We could perhaps release the cache_lock during access of the
2795
data like in the other functions. Locks outside of the key cache
2796
assure that readers and a writer do not access the same range of
2797
data. Parallel accesses should happen only if the cache block
2798
contains multiple index block(fragment)s. So different parts of
2799
the buffer would be read/written. An attempt to flush during
2800
memcpy() is prevented with BLOCK_FOR_UPDATE.
2802
if (!(block->status & BLOCK_ERROR))
2804
#if !defined(SERIALIZED_READ_FROM_CACHE)
2805
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2807
memcpy(block->buffer+offset, buff, (size_t) read_length);
2809
#if !defined(SERIALIZED_READ_FROM_CACHE)
2810
keycache_pthread_mutex_lock(&keycache->cache_lock);
2816
/* Not used in the server. buff has been written to disk at start. */
2817
if ((block->status & BLOCK_CHANGED) &&
2818
(!offset && read_length >= keycache->key_cache_block_size))
2819
link_to_file_list(keycache, block, block->hash_link->file, 1);
2821
else if (! (block->status & BLOCK_CHANGED))
2822
link_to_changed_list(keycache, block);
2823
block->status|=BLOCK_READ;
2825
Allow block to be selected for to be freed. Since it is marked
2826
BLOCK_CHANGED too, it won't be selected for to be freed without
2829
block->status&= ~BLOCK_FOR_UPDATE;
2830
set_if_smaller(block->offset, offset);
2831
set_if_bigger(block->length, read_length+offset);
2833
/* Threads may be waiting for the changes to be complete. */
2834
release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2837
If only a part of the cache block is to be replaced, and the
2838
rest has been read from file, then the cache lock has been
2839
released for I/O and it could be possible that another thread
2840
wants to evict or free the block and waits for it to be
2841
released. So we must not just decrement hash_link->requests, but
2842
also wake a waiting thread.
2844
remove_reader(block);
2847
Link the block into the LRU ring if it's the last submitted
2848
request for the block. This enables eviction for the block.
2850
unreg_request(keycache, block, 1);
2852
if (block->status & BLOCK_ERROR)
2860
filepos+= read_length+offset;
2863
} while ((length-= read_length));
423
assert (!keycache->key_cache_inited);
2868
425
/* Key cache is not used */
2871
428
/* Used in the server. */
2872
keycache->global_cache_w_requests++;
2873
keycache->global_cache_write++;
2874
if (locked_and_incremented)
2875
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2876
if (pwrite(file, (uchar*) buff, length, filepos) == 0)
429
if (pwrite(file, (unsigned char*) buff, length, filepos) == 0)
2878
if (locked_and_incremented)
2879
keycache_pthread_mutex_lock(&keycache->cache_lock);
2883
if (locked_and_incremented)
2885
dec_counter_for_resize_op(keycache);
2886
keycache_pthread_mutex_unlock(&keycache->cache_lock);
2897
keycache Pointer to a key cache data structure
2898
block Pointer to the block to free
2901
Remove reference to block from hash table.
2902
Remove block from the chain of clean blocks.
2903
Add block to the free list.
2906
Block must not be free (status == 0).
2907
Block must not be in free_block_list.
2908
Block must not be in the LRU ring.
2909
Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
2910
Block must not be in free (BLOCK_REASSIGNED).
2911
Block must not be in flush (BLOCK_IN_FLUSH).
2912
Block must not be dirty (BLOCK_CHANGED).
2913
Block must not be in changed_blocks (dirty) hash.
2914
Block must be in file_blocks (clean) hash.
2915
Block must refer to a hash_link.
2916
Block must have a request registered on it.
2919
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
2922
Assert that the block is not free already. And that it is in a clean
2923
state. Note that the block might just be assigned to a hash_link and
2924
not yet read (BLOCK_READ may not be set here). In this case a reader
2925
is registered in the hash_link and free_block() will wait for it
2928
assert((block->status & BLOCK_IN_USE) &&
2929
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2930
BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
2931
BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
2932
/* Assert that the block is in a file_blocks chain. */
2933
assert(block->prev_changed && *block->prev_changed == block);
2934
/* Assert that the block is not in the LRU ring. */
2935
assert(!block->next_used && !block->prev_used);
2937
IMHO the below condition (if()) makes no sense. I can't see how it
2938
could be possible that free_block() is entered with a NULL hash_link
2939
pointer. The only place where it can become NULL is in free_block()
2940
(or before its first use ever, but for those blocks free_block() is
2941
not called). I don't remove the conditional as it cannot harm, but
2942
place an assert to confirm my hypothesis. Eventually the
2943
condition (if()) can be removed.
2945
assert(block->hash_link && block->hash_link->block == block);
2946
if (block->hash_link)
2949
While waiting for readers to finish, new readers might request the
2950
block. But since we set block->status|= BLOCK_REASSIGNED, they
2951
will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
2954
block->status|= BLOCK_REASSIGNED;
2955
wait_for_readers(keycache, block);
2957
The block must not have been freed by another thread. Repeat some
2958
checks. An additional requirement is that it must be read now
2961
assert(block->hash_link && block->hash_link->block == block);
2962
assert((block->status & (BLOCK_READ | BLOCK_IN_USE |
2963
BLOCK_REASSIGNED)) &&
2964
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2965
BLOCK_IN_FLUSH | BLOCK_CHANGED |
2966
BLOCK_FOR_UPDATE)));
2967
assert(block->prev_changed && *block->prev_changed == block);
2968
assert(!block->prev_used);
2970
Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
2971
thread (through unreg_request() below), other threads must not see
2972
this flag. They could become confused.
2974
block->status&= ~BLOCK_REASSIGNED;
2976
Do not release the hash_link until the block is off all lists.
2977
At least not if we hand it over for eviction in unreg_request().
2982
Unregister the block request and link the block into the LRU ring.
2983
This enables eviction for the block. If the LRU ring was empty and
2984
threads are waiting for a block, then the block wil be handed over
2985
for eviction immediately. Otherwise we will unlink it from the LRU
2986
ring again, without releasing the lock in between. So decrementing
2987
the request counter and updating statistics are the only relevant
2988
operation in this case. Assert that there are no other requests
2991
assert(block->requests == 1);
2992
unreg_request(keycache, block, 0);
2994
Note that even without releasing the cache lock it is possible that
2995
the block is immediately selected for eviction by link_block() and
2996
thus not added to the LRU ring. In this case we must not touch the
2999
if (block->status & BLOCK_IN_EVICTION)
3002
/* Here the block must be in the LRU ring. Unlink it again. */
3003
assert(block->next_used && block->prev_used &&
3004
*block->prev_used == block);
3005
unlink_block(keycache, block);
3006
if (block->temperature == BLOCK_WARM)
3007
keycache->warm_blocks--;
3008
block->temperature= BLOCK_COLD;
3010
/* Remove from file_blocks hash. */
3011
unlink_changed(block);
3013
/* Remove reference to block from hash table. */
3014
unlink_hash(keycache, block->hash_link);
3015
block->hash_link= NULL;
3019
block->offset= keycache->key_cache_block_size;
3021
/* Enforced by unlink_changed(), but just to be sure. */
3022
assert(!block->next_changed && !block->prev_changed);
3023
/* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
3024
assert(!block->next_used && !block->prev_used);
3025
/* Insert the free block in the free list. */
3026
block->next_used= keycache->free_block_list;
3027
keycache->free_block_list= block;
3028
/* Keep track of the number of currently unused blocks. */
3029
keycache->blocks_unused++;
3031
/* All pending requests for this page must be resubmitted. */
3032
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3036
static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3038
return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3039
((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3044
Flush a portion of changed blocks to disk,
3045
free used blocks if requested
3048
static int flush_cached_blocks(KEY_CACHE *keycache,
3049
File file, BLOCK_LINK **cache,
3051
enum flush_type type)
3055
uint count= (uint) (end-cache);
3057
/* Don't lock the cache during the flush */
3058
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3060
As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3061
we are guarunteed no thread will change them
3063
my_qsort((uchar*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
3065
keycache_pthread_mutex_lock(&keycache->cache_lock);
3067
Note: Do not break the loop. We have registered a request on every
3068
block in 'cache'. These must be unregistered by free_block() or
3071
for ( ; cache != end ; cache++)
3073
BLOCK_LINK *block= *cache;
3075
If the block contents is going to be changed, we abandon the flush
3076
for this block. flush_key_blocks_int() will restart its search and
3077
handle the block properly.
3079
if (!(block->status & BLOCK_FOR_UPDATE))
3081
/* Blocks coming here must have a certain status. */
3082
assert(block->hash_link);
3083
assert(block->hash_link->block == block);
3084
assert(block->hash_link->file == file);
3085
assert((block->status & ~BLOCK_IN_EVICTION) ==
3086
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3087
block->status|= BLOCK_IN_FLUSHWRITE;
3088
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3089
error= (pwrite(file,
3090
block->buffer+block->offset,
3091
block->length - block->offset,
3092
block->hash_link->diskpos+ block->offset) == 0);
3093
keycache_pthread_mutex_lock(&keycache->cache_lock);
3094
keycache->global_cache_write++;
3097
block->status|= BLOCK_ERROR;
3099
last_errno= errno ? errno : -1;
3101
block->status&= ~BLOCK_IN_FLUSHWRITE;
3102
/* Block must not have changed status except BLOCK_FOR_UPDATE. */
3103
assert(block->hash_link);
3104
assert(block->hash_link->block == block);
3105
assert(block->hash_link->file == file);
3106
assert((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
3107
(BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3109
Set correct status and link in right queue for free or later use.
3110
free_block() must not see BLOCK_CHANGED and it may need to wait
3111
for readers of the block. These should not see the block in the
3112
wrong hash. If not freeing the block, we need to have it in the
3115
link_to_file_list(keycache, block, file, 1);
3118
block->status&= ~BLOCK_IN_FLUSH;
3120
Let to proceed for possible waiting requests to write to the block page.
3121
It might happen only during an operation to resize the key cache.
3123
release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3124
/* type will never be FLUSH_IGNORE_CHANGED here */
3125
if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3126
!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3130
Note that a request has been registered against the block in
3131
flush_key_blocks_int().
3133
free_block(keycache, block);
3138
Link the block into the LRU ring if it's the last submitted
3139
request for the block. This enables eviction for the block.
3140
Note that a request has been registered against the block in
3141
flush_key_blocks_int().
3143
unreg_request(keycache, block, 1);
3146
} /* end of for ( ; cache != end ; cache++) */
3152
flush all key blocks for a file to disk, but don't do any mutex locks.
3155
flush_key_blocks_int()
3156
keycache pointer to a key cache data structure
3157
file handler for the file to flush to
3158
flush_type type of the flush
3161
This function doesn't do any mutex locks because it needs to be called both
3162
from flush_key_blocks and flush_all_key_blocks (the later one does the
3163
mutex lock in the resize_key_cache() function).
3165
We do only care about changed blocks that exist when the function is
3166
entered. We do not guarantee that all changed blocks of the file are
3167
flushed if more blocks change while this function is running.
3174
static int flush_key_blocks_int(KEY_CACHE *keycache,
3175
File file, enum flush_type type)
3177
BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3182
if (keycache->disk_blocks > 0 &&
3183
(!my_disable_flush_key_blocks || type != FLUSH_KEEP))
3185
/* Key cache exists and flush is not disabled */
3187
uint count= FLUSH_CACHE;
3188
BLOCK_LINK **pos,**end;
3189
BLOCK_LINK *first_in_switch= NULL;
3190
BLOCK_LINK *last_in_flush;
3191
BLOCK_LINK *last_for_update;
3192
BLOCK_LINK *block, *next;
3194
if (type != FLUSH_IGNORE_CHANGED)
3197
Count how many key blocks we have to cache to be able
3198
to flush all dirty pages with minimum seek moves
3201
for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3203
block= block->next_changed)
3205
if ((block->hash_link->file == file) &&
3206
!(block->status & BLOCK_IN_FLUSH))
3209
assert(count<= keycache->blocks_used);
3213
Allocate a new buffer only if its bigger than the one we have.
3214
Assure that we always have some entries for the case that new
3215
changed blocks appear while we need to wait for something.
3217
if ((count > FLUSH_CACHE) &&
3218
!(cache= (BLOCK_LINK**) my_malloc(sizeof(BLOCK_LINK*)*count,
3222
After a restart there could be more changed blocks than now.
3223
So we should not let count become smaller than the fixed buffer.
3225
if (cache == cache_buff)
3229
/* Retrieve the blocks and write them to a buffer to be flushed */
3231
last_in_flush= NULL;
3232
last_for_update= NULL;
3233
end= (pos= cache)+count;
3234
for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3238
next= block->next_changed;
3239
if (block->hash_link->file == file)
3241
if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3244
Note: The special handling of BLOCK_IN_SWITCH is obsolete
3245
since we set BLOCK_IN_FLUSH if the eviction includes a
3246
flush. It can be removed in a later version.
3248
if (!(block->status & BLOCK_IN_SWITCH))
3251
We care only for the blocks for which flushing was not
3252
initiated by another thread and which are not in eviction.
3253
Registering a request on the block unlinks it from the LRU
3254
ring and protects against eviction.
3256
reg_requests(keycache, block, 1);
3257
if (type != FLUSH_IGNORE_CHANGED)
3259
/* It's not a temporary file */
3263
This should happen relatively seldom. Remove the
3264
request because we won't do anything with the block
3265
but restart and pick it again in the next iteration.
3267
unreg_request(keycache, block, 0);
3269
This happens only if there is not enough
3270
memory for the big block
3272
if ((error= flush_cached_blocks(keycache, file, cache,
3275
/* Do not loop infinitely trying to flush in vain. */
3276
if ((last_errno == error) && (++last_errcnt > 5))
3281
Restart the scan as some other thread might have changed
3282
the changed blocks chain: the blocks that were in switch
3283
state before the flush started have to be excluded
3288
Mark the block with BLOCK_IN_FLUSH in order not to let
3289
other threads to use it for new pages and interfere with
3290
our sequence of flushing dirty file pages. We must not
3291
set this flag before actually putting the block on the
3292
write burst array called 'cache'.
3294
block->status|= BLOCK_IN_FLUSH;
3295
/* Add block to the array for a write burst. */
3300
/* It's a temporary file */
3301
assert(!(block->status & BLOCK_REASSIGNED));
3304
free_block() must not be called with BLOCK_CHANGED. Note
3305
that we must not change the BLOCK_CHANGED flag outside of
3306
link_to_file_list() so that it is always in the correct
3307
queue and the *blocks_changed counters are correct.
3309
link_to_file_list(keycache, block, file, 1);
3310
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3312
/* A request has been registered against the block above. */
3313
free_block(keycache, block);
3318
Link the block into the LRU ring if it's the last
3319
submitted request for the block. This enables eviction
3320
for the block. A request has been registered against
3323
unreg_request(keycache, block, 1);
3330
Link the block into a list of blocks 'in switch'.
3332
WARNING: Here we introduce a place where a changed block
3333
is not in the changed_blocks hash! This is acceptable for
3334
a BLOCK_IN_SWITCH. Never try this for another situation.
3335
Other parts of the key cache code rely on changed blocks
3336
being in the changed_blocks hash.
3338
unlink_changed(block);
3339
link_changed(block, &first_in_switch);
3342
else if (type != FLUSH_KEEP)
3345
During the normal flush at end of statement (FLUSH_KEEP) we
3346
do not need to ensure that blocks in flush or update by
3347
other threads are flushed. They will be flushed by them
3348
later. In all other cases we must assure that we do not have
3349
any changed block of this file in the cache when this
3352
if (block->status & BLOCK_IN_FLUSH)
3354
/* Remember the last block found to be in flush. */
3355
last_in_flush= block;
3359
/* Remember the last block found to be selected for update. */
3360
last_for_update= block;
3367
if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
3369
/* Do not loop inifnitely trying to flush in vain. */
3370
if ((last_errno == error) && (++last_errcnt > 5))
3375
Do not restart here during the normal flush at end of statement
3376
(FLUSH_KEEP). We have now flushed at least all blocks that were
3377
changed when entering this function. In all other cases we must
3378
assure that we do not have any changed block of this file in the
3379
cache when this function returns.
3381
if (type != FLUSH_KEEP)
3387
There are no blocks to be flushed by this thread, but blocks in
3388
flush by other threads. Wait until one of the blocks is flushed.
3389
Re-check the condition for last_in_flush. We may have unlocked
3390
the cache_lock in flush_cached_blocks(). The state of the block
3393
if (last_in_flush->status & BLOCK_IN_FLUSH)
3394
wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3395
&keycache->cache_lock);
3396
/* Be sure not to lose a block. They may be flushed in random order. */
3399
if (last_for_update)
3402
There are no blocks to be flushed by this thread, but blocks for
3403
update by other threads. Wait until one of the blocks is updated.
3404
Re-check the condition for last_for_update. We may have unlocked
3405
the cache_lock in flush_cached_blocks(). The state of the block
3408
if (last_for_update->status & BLOCK_FOR_UPDATE)
3409
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3410
&keycache->cache_lock);
3411
/* The block is now changed. Flush it. */
3416
Wait until the list of blocks in switch is empty. The threads that
3417
are switching these blocks will relink them to clean file chains
3418
while we wait and thus empty the 'first_in_switch' chain.
3420
while (first_in_switch)
3422
wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3423
&keycache->cache_lock);
3425
Do not restart here. We have flushed all blocks that were
3426
changed when entering this function and were not marked for
3427
eviction. Other threads have now flushed all remaining blocks in
3428
the course of their eviction.
3432
if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3434
BLOCK_LINK *last_for_update= NULL;
3435
BLOCK_LINK *last_in_switch= NULL;
3436
uint total_found= 0;
3440
Finally free all clean blocks for this file.
3441
During resize this may be run by two threads in parallel.
3446
for (block= keycache->file_blocks[FILE_HASH(file)] ;
3450
/* Remember the next block. After freeing we cannot get at it. */
3451
next= block->next_changed;
3453
/* Changed blocks cannot appear in the file_blocks hash. */
3454
assert(!(block->status & BLOCK_CHANGED));
3455
if (block->hash_link->file == file)
3457
/* We must skip blocks that will be changed. */
3458
if (block->status & BLOCK_FOR_UPDATE)
3460
last_for_update= block;
3465
We must not free blocks in eviction (BLOCK_IN_EVICTION |
3466
BLOCK_IN_SWITCH) or blocks intended to be freed
3469
if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3472
struct st_hash_link *next_hash_link= NULL;
3473
my_off_t next_diskpos= 0;
3475
uint next_status= 0;
3476
uint hash_requests= 0;
3480
assert(found <= keycache->blocks_used);
3483
Register a request. This unlinks the block from the LRU
3484
ring and protects it against eviction. This is required
3487
reg_requests(keycache, block, 1);
3490
free_block() may need to wait for readers of the block.
3491
This is the moment where the other thread can move the
3492
'next' block from the chain. free_block() needs to wait
3493
if there are requests for the block pending.
3495
if (next && (hash_requests= block->hash_link->requests))
3497
/* Copy values from the 'next' block and its hash_link. */
3498
next_status= next->status;
3499
next_hash_link= next->hash_link;
3500
next_diskpos= next_hash_link->diskpos;
3501
next_file= next_hash_link->file;
3502
assert(next == next_hash_link->block);
3505
free_block(keycache, block);
3507
If we had to wait and the state of the 'next' block
3508
changed, break the inner loop. 'next' may no longer be
3509
part of the current chain.
3511
We do not want to break the loop after every free_block(),
3512
not even only after waits. The chain might be quite long
3513
and contain blocks for many files. Traversing it again and
3514
again to find more blocks for this file could become quite
3517
if (next && hash_requests &&
3518
((next_status != next->status) ||
3519
(next_hash_link != next->hash_link) ||
3520
(next_file != next_hash_link->file) ||
3521
(next_diskpos != next_hash_link->diskpos) ||
3522
(next != next_hash_link->block)))
3527
last_in_switch= block;
3530
} /* end for block in file_blocks */
3534
If any clean block has been found, we may have waited for it to
3535
become free. In this case it could be possible that another clean
3536
block became dirty. This is possible if the write request existed
3537
before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3543
To avoid an infinite loop, wait until one of the blocks marked
3544
for update is updated.
3546
if (last_for_update)
3548
/* We did not wait. Block must not have changed status. */
3549
assert(last_for_update->status & BLOCK_FOR_UPDATE);
3550
wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3551
&keycache->cache_lock);
3556
To avoid an infinite loop wait until one of the blocks marked
3557
for eviction is switched.
3561
/* We did not wait. Block must not have changed status. */
3562
assert(last_in_switch->status & (BLOCK_IN_EVICTION |
3565
wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
3566
&keycache->cache_lock);
3570
} /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
3572
} /* if (keycache->disk_blocks > 0 */
3575
if (cache != cache_buff)
3576
my_free((uchar*) cache, MYF(0));
3578
errno=last_errno; /* Return first error */
3579
return(last_errno != 0);
3584
438
Flush all blocks for a file to disk
3598
452
int flush_key_blocks(KEY_CACHE *keycache,
3599
File file, enum flush_type type)
3603
if (!keycache->key_cache_inited)
3606
keycache_pthread_mutex_lock(&keycache->cache_lock);
3607
/* While waiting for lock, keycache could have been ended. */
3608
if (keycache->disk_blocks > 0)
3610
inc_counter_for_resize_op(keycache);
3611
res= flush_key_blocks_int(keycache, file, type);
3612
dec_counter_for_resize_op(keycache);
3614
keycache_pthread_mutex_unlock(&keycache->cache_lock);
3620
Flush all blocks in the key cache to disk.
3623
flush_all_key_blocks()
3624
keycache pointer to key cache root structure
3628
Flushing of the whole key cache is done in two phases.
3630
1. Flush all changed blocks, waiting for them if necessary. Loop
3631
until there is no changed block left in the cache.
3633
2. Free all clean blocks. Normally this means free all blocks. The
3634
changed blocks were flushed in phase 1 and became clean. However we
3635
may need to wait for blocks that are read by other threads. While we
3636
wait, a clean block could become changed if that operation started
3637
before the resize operation started. To be safe we must restart at
3640
When we can run through the changed_blocks and file_blocks hashes
3641
without finding a block any more, then we are done.
3643
Note that we hold keycache->cache_lock all the time unless we need
3644
to wait for something.
3651
static int flush_all_key_blocks(KEY_CACHE *keycache)
3660
safe_mutex_assert_owner(&keycache->cache_lock);
3664
Phase1: Flush all changed blocks, waiting for them if necessary.
3665
Loop until there is no changed block left in the cache.
3670
/* Step over the whole changed_blocks hash array. */
3671
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3674
If an array element is non-empty, use the first block from its
3675
chain to find a file for flush. All changed blocks for this
3676
file are flushed. So the same block will not appear at this
3677
place again with the next iteration. New writes for blocks are
3678
not accepted during the flush. If multiple files share the
3679
same hash bucket, one of them will be flushed per iteration
3680
of the outer loop of phase 1.
3682
if ((block= keycache->changed_blocks[idx]))
3686
Flush dirty blocks but do not free them yet. They can be used
3687
for reading until all other blocks are flushed too.
3689
if (flush_key_blocks_int(keycache, block->hash_link->file,
3698
Phase 2: Free all clean blocks. Normally this means free all
3699
blocks. The changed blocks were flushed in phase 1 and became
3700
clean. However we may need to wait for blocks that are read by
3701
other threads. While we wait, a clean block could become changed
3702
if that operation started before the resize operation started. To
3703
be safe we must restart at phase 1.
3708
/* Step over the whole file_blocks hash array. */
3709
for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3712
If an array element is non-empty, use the first block from its
3713
chain to find a file for flush. All blocks for this file are
3714
freed. So the same block will not appear at this place again
3715
with the next iteration. If multiple files share the
3716
same hash bucket, one of them will be flushed per iteration
3717
of the outer loop of phase 2.
3719
if ((block= keycache->file_blocks[idx]))
3723
if (flush_key_blocks_int(keycache, block->hash_link->file,
3732
If any clean block has been found, we may have waited for it to
3733
become free. In this case it could be possible that another clean
3734
block became dirty. This is possible if the write request existed
3735
before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
3737
} while (total_found);
3743
Reset the counters of a key cache.
3746
reset_key_cache_counters()
3747
name the name of a key cache
3748
key_cache pointer to the key kache to be reset
3751
This procedure is used by process_key_caches() to reset the counters of all
3752
currently used key caches, both the default one and the named ones.
3755
0 on success (always because it can't fail)
3758
int reset_key_cache_counters(const char *name __attribute__((unused)),
3759
KEY_CACHE *key_cache)
3761
if (!key_cache->key_cache_inited)
3765
key_cache->global_blocks_changed= 0; /* Key_blocks_not_flushed */
3766
key_cache->global_cache_r_requests= 0; /* Key_read_requests */
3767
key_cache->global_cache_read= 0; /* Key_reads */
3768
key_cache->global_cache_w_requests= 0; /* Key_write_requests */
3769
key_cache->global_cache_write= 0; /* Key_writes */
3773
#if defined(KEYCACHE_TIMEOUT)
3775
#define KEYCACHE_DUMP_FILE "keycache_dump.txt"
3776
#define MAX_QUEUE_LEN 100
3779
static void keycache_dump(KEY_CACHE *keycache)
3781
FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
3782
struct st_my_thread_var *last;
3783
struct st_my_thread_var *thread;
3785
HASH_LINK *hash_link;
3786
KEYCACHE_PAGE *page;
3789
fprintf(keycache_dump_file, "thread:%u\n", thread->id);
3792
thread=last=waiting_for_hash_link.last_thread;
3793
fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
3797
thread=thread->next;
3798
page= (KEYCACHE_PAGE *) thread->opt_info;
3799
fprintf(keycache_dump_file,
3800
"thread:%u, (file,filepos)=(%u,%lu)\n",
3801
thread->id,(uint) page->file,(uint32_t) page->filepos);
3802
if (++i == MAX_QUEUE_LEN)
3805
while (thread != last);
3808
thread=last=waiting_for_block.last_thread;
3809
fprintf(keycache_dump_file, "queue of threads waiting for block\n");
3813
thread=thread->next;
3814
hash_link= (HASH_LINK *) thread->opt_info;
3815
fprintf(keycache_dump_file,
3816
"thread:%u hash_link:%u (file,filepos)=(%u,%u)\n",
3817
thread->id, (uint) HASH_LINK_NUMBER(hash_link),
3818
(uint) hash_link->file,(uint32_t) hash_link->diskpos);
3819
if (++i == MAX_QUEUE_LEN)
3822
while (thread != last);
3824
for (i=0 ; i< keycache->blocks_used ; i++)
3827
block= &keycache->block_root[i];
3828
hash_link= block->hash_link;
3829
fprintf(keycache_dump_file,
3830
"block:%u hash_link:%d status:%x #requests=%u waiting_for_readers:%d\n",
3831
i, (int) (hash_link ? HASH_LINK_NUMBER(hash_link) : -1),
3832
block->status, block->requests, block->condvar ? 1 : 0);
3833
for (j=0 ; j < 2; j++)
3835
KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
3836
thread= last= wqueue->last_thread;
3837
fprintf(keycache_dump_file, "queue #%d\n", j);
3842
thread=thread->next;
3843
fprintf(keycache_dump_file,
3844
"thread:%u\n", thread->id);
3845
if (++i == MAX_QUEUE_LEN)
3848
while (thread != last);
3852
fprintf(keycache_dump_file, "LRU chain:");
3853
block= keycache= used_last;
3858
block= block->next_used;
3859
fprintf(keycache_dump_file,
3860
"block:%u, ", BLOCK_NUMBER(block));
3862
while (block != keycache->used_last);
3864
fprintf(keycache_dump_file, "\n");
3866
fclose(keycache_dump_file);
3869
static int keycache_pthread_cond_wait(pthread_cond_t *cond,
3870
pthread_mutex_t *mutex)
3873
struct timeval now; /* time when we started waiting */
3874
struct timespec timeout; /* timeout value for the wait function */
3877
/* Get current time */
3878
gettimeofday(&now, &tz);
3879
/* Prepare timeout value */
3880
timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
3882
timeval uses microseconds.
3883
timespec uses nanoseconds.
3884
1 nanosecond = 1000 micro seconds
3886
timeout.tv_nsec= now.tv_usec * 1000;
3887
rc= pthread_cond_timedwait(cond, mutex, &timeout);
3888
if (rc == ETIMEDOUT || rc == ETIME)
3893
assert(rc != ETIMEDOUT);
3896
#endif /* defined(KEYCACHE_TIMEOUT) */
453
int file, enum flush_type type)
457
assert (!keycache->key_cache_inited);