~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/*
17
  These functions handle keyblock cacheing for ISAM and MyISAM tables.
18
19
  One cache can handle many files.
20
  It must contain buffers of the same blocksize.
21
  init_key_cache() should be used to init cache handler.
22
23
  The free list (free_block_list) is a stack like structure.
24
  When a block is freed by free_block(), it is pushed onto the stack.
25
  When a new block is required it is first tried to pop one from the stack.
26
  If the stack is empty, it is tried to get a never-used block from the pool.
27
  If this is empty too, then a block is taken from the LRU ring, flushing it
28
  to disk, if neccessary. This is handled in find_key_block().
29
  With the new free list, the blocks can have three temperatures:
30
  hot, warm and cold (which is free). This is remembered in the block header
31
  by the enum BLOCK_TEMPERATURE temperature variable. Remembering the
32
  temperature is neccessary to correctly count the number of warm blocks,
33
  which is required to decide when blocks are allowed to become hot. Whenever
34
  a block is inserted to another (sub-)chain, we take the old and new
35
  temperature into account to decide if we got one more or less warm block.
36
  blocks_unused is the sum of never used blocks in the pool and of currently
37
  free blocks. blocks_used is the number of blocks fetched from the pool and
38
  as such gives the maximum number of in-use blocks at any time.
39
40
  Key Cache Locking
41
  =================
42
43
  All key cache locking is done with a single mutex per key cache:
44
  keycache->cache_lock. This mutex is locked almost all the time
45
  when executing code in this file (mf_keycache.c).
46
  However it is released for I/O and some copy operations.
47
48
  The cache_lock is also released when waiting for some event. Waiting
49
  and signalling is done via condition variables. In most cases the
50
  thread waits on its thread->suspend condition variable. Every thread
51
  has a my_thread_var structure, which contains this variable and a
52
  '*next' and '**prev' pointer. These pointers are used to insert the
53
  thread into a wait queue.
54
55
  A thread can wait for one block and thus be in one wait queue at a
56
  time only.
57
58
  Before starting to wait on its condition variable with
59
  pthread_cond_wait(), the thread enters itself to a specific wait queue
60
  with link_into_queue() (double linked with '*next' + '**prev') or
61
  wait_on_queue() (single linked with '*next').
62
63
  Another thread, when releasing a resource, looks up the waiting thread
64
  in the related wait queue. It sends a signal with
65
  pthread_cond_signal() to the waiting thread.
66
67
  NOTE: Depending on the particular wait situation, either the sending
68
  thread removes the waiting thread from the wait queue with
69
  unlink_from_queue() or release_whole_queue() respectively, or the waiting
70
  thread removes itself.
71
72
  There is one exception from this locking scheme when one thread wants
73
  to reuse a block for some other address. This works by first marking
74
  the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all
75
  threads that are reading the block to finish. Each block has a
76
  reference to a condition variable (condvar). It holds a reference to
77
  the thread->suspend condition variable for the waiting thread (if such
78
  a thread exists). When that thread is signaled, the reference is
79
  cleared. The number of readers of a block is registered in
80
  block->hash_link->requests. See wait_for_readers() / remove_reader()
81
  for details. This is similar to the above, but it clearly means that
82
  only one thread can wait for a particular block. There is no queue in
83
  this case. Strangely enough block->convar is used for waiting for the
84
  assigned hash_link only. More precisely it is used to wait for all
85
  requests to be unregistered from the assigned hash_link.
86
87
  The resize_queue serves two purposes:
88
  1. Threads that want to do a resize wait there if in_resize is set.
89
     This is not used in the server. The server refuses a second resize
90
     request if one is already active. keycache->in_init is used for the
91
     synchronization. See set_var.cc.
92
  2. Threads that want to access blocks during resize wait here during
93
     the re-initialization phase.
94
  When the resize is done, all threads on the queue are signalled.
95
  Hypothetical resizers can compete for resizing, and read/write
96
  requests will restart to request blocks from the freshly resized
97
  cache. If the cache has been resized too small, it is disabled and
98
  'can_be_used' is false. In this case read/write requests bypass the
99
  cache. Since they increment and decrement 'cnt_for_resize_op', the
100
  next resizer can wait on the queue 'waiting_for_resize_cnt' until all
101
  I/O finished.
102
*/
103
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
104
#include "config.h"
1241.9.57 by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined.
105
#include "drizzled/my_error.h"
1241.9.64 by Monty Taylor
Moved remaining non-public portions of mysys and mystrings to drizzled/internal.
106
#include "drizzled/internal/my_sys.h"
685.1.3 by Monty Taylor
Turned off stdinc - and then fixed the carnage.
107
#include "keycache.h"
1241.9.64 by Monty Taylor
Moved remaining non-public portions of mysys and mystrings to drizzled/internal.
108
#include "drizzled/internal/m_string.h"
109
#include "drizzled/internal/my_bit.h"
1 by brian
clean slate
110
#include <errno.h>
111
#include <stdarg.h>
112
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
113
using namespace drizzled;
114
1165.1.161 by Stewart Smith
make change_key_cache_param() static to plugin/myisam/mf_keycache.cc
115
static void change_key_cache_param(KEY_CACHE *keycache, uint32_t division_limit,
116
                            uint32_t age_threshold);
117
1 by brian
clean slate
118
/*
119
  Some compilation flags have been added specifically for this module
120
  to control the following:
121
  - not to let a thread to yield the control when reading directly
122
    from key cache, which might improve performance in many cases;
123
    to enable this add:
124
    #define SERIALIZED_READ_FROM_CACHE
125
  - to set an upper bound for number of threads simultaneously
126
    using the key cache; this setting helps to determine an optimal
127
    size for hash table and improve performance when the number of
128
    blocks in the key cache much less than the number of threads
129
    accessing it;
130
    to set this number equal to <N> add
131
      #define MAX_THREADS <N>
132
  - to substitute calls of pthread_cond_wait for calls of
133
    pthread_cond_timedwait (wait with timeout set up);
134
    this setting should be used only when you want to trap a deadlock
135
    situation, which theoretically should not happen;
136
    to set timeout equal to <T> seconds add
137
      #define KEYCACHE_TIMEOUT <T>
138
139
  Example of the settings:
140
    #define SERIALIZED_READ_FROM_CACHE
141
    #define MAX_THREADS   100
142
    #define KEYCACHE_TIMEOUT  1
143
*/
144
145
#define STRUCT_PTR(TYPE, MEMBER, a)                                           \
146
          (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER))
147
148
/* types of condition variables */
149
#define  COND_FOR_REQUESTED 0
150
#define  COND_FOR_SAVED     1
151
152
typedef pthread_cond_t KEYCACHE_CONDVAR;
153
154
/* descriptor of the page in the key cache block buffer */
155
struct st_keycache_page
156
{
157
  int file;               /* file to which the page belongs to  */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
158
  internal::my_off_t filepos;       /* position of the page in the file   */
1 by brian
clean slate
159
};
160
161
/* element in the chain of a hash table bucket */
162
struct st_hash_link
163
{
164
  struct st_hash_link *next, **prev; /* to connect links in the same bucket  */
165
  struct st_block_link *block;       /* reference to the block for the page: */
1241.13.1 by Monty Taylor
Put my_off_t back... but this time localized only to myisam and mysys.
166
  int file;                         /* from such a file                     */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
167
  internal::my_off_t diskpos;                  /* with such an offset                  */
1241.13.1 by Monty Taylor
Put my_off_t back... but this time localized only to myisam and mysys.
168
  uint32_t requests;                     /* number of requests for the page      */
1 by brian
clean slate
169
};
170
171
/* simple states of a block */
172
#define BLOCK_ERROR           1 /* an error occured when performing file i/o */
173
#define BLOCK_READ            2 /* file block is in the block buffer         */
174
#define BLOCK_IN_SWITCH       4 /* block is preparing to read new page       */
175
#define BLOCK_REASSIGNED      8 /* blk does not accept requests for old page */
176
#define BLOCK_IN_FLUSH       16 /* block is selected for flush               */
177
#define BLOCK_CHANGED        32 /* block buffer contains a dirty page        */
178
#define BLOCK_IN_USE         64 /* block is not free                         */
179
#define BLOCK_IN_EVICTION   128 /* block is selected for eviction            */
180
#define BLOCK_IN_FLUSHWRITE 256 /* block is in write to file                 */
181
#define BLOCK_FOR_UPDATE    512 /* block is selected for buffer modification */
182
183
/* page status, returned by find_key_block */
184
#define PAGE_READ               0
185
#define PAGE_TO_BE_READ         1
186
#define PAGE_WAIT_TO_BE_READ    2
187
188
/* block temperature determines in which (sub-)chain the block currently is */
189
enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT };
190
191
/* key cache block */
192
struct st_block_link
193
{
194
  struct st_block_link
195
    *next_used, **prev_used;   /* to connect links in the LRU chain (ring)   */
196
  struct st_block_link
197
    *next_changed, **prev_changed; /* for lists of file dirty/clean blocks   */
198
  struct st_hash_link *hash_link; /* backward ptr to referring hash_link     */
199
  KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */
482 by Brian Aker
Remove uint.
200
  uint32_t requests;          /* number of requests for the block                */
481 by Brian Aker
Remove all of uchar.
201
  unsigned char *buffer;           /* buffer for the block page                       */
482 by Brian Aker
Remove uint.
202
  uint32_t offset;            /* beginning of modified data in the buffer        */
203
  uint32_t length;            /* end of data in the buffer                       */
204
  uint32_t status;            /* state of the block                              */
1 by brian
clean slate
205
  enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */
482 by Brian Aker
Remove uint.
206
  uint32_t hits_left;         /* number of hits left until promotion             */
151 by Brian Aker
Ulonglong to uint64_t
207
  uint64_t last_hit_time; /* timestamp of the last hit                      */
1 by brian
clean slate
208
  KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event    */
209
};
210
211
KEY_CACHE dflt_key_cache_var;
212
KEY_CACHE *dflt_key_cache= &dflt_key_cache_var;
213
214
#define FLUSH_CACHE         2000            /* sort this many blocks at once */
215
216
static int flush_all_key_blocks(KEY_CACHE *keycache);
217
static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
218
                          pthread_mutex_t *mutex);
219
static void release_whole_queue(KEYCACHE_WQUEUE *wqueue);
220
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block);
221
222
#define KEYCACHE_HASH(f, pos)                                                 \
303 by Brian Aker
First pass in removing ulong from MyISAM
223
(((uint32_t) ((pos) / keycache->key_cache_block_size) +                          \
224
                                     (uint32_t) (f)) & (keycache->hash_entries-1))
1 by brian
clean slate
225
#define FILE_HASH(f)                 ((uint) (f) & (CHANGED_BLOCKS_HASH-1))
226
227
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
228
#ifdef KEYCACHE_TIMEOUT
1 by brian
clean slate
229
static int keycache_pthread_cond_wait(pthread_cond_t *cond,
230
                                      pthread_mutex_t *mutex);
231
#else
232
#define  keycache_pthread_cond_wait pthread_cond_wait
233
#endif
234
235
#define keycache_pthread_mutex_lock pthread_mutex_lock
236
#define keycache_pthread_mutex_unlock pthread_mutex_unlock
237
#define keycache_pthread_cond_signal pthread_cond_signal
238
482 by Brian Aker
Remove uint.
239
static inline uint32_t next_power(uint32_t value)
1 by brian
clean slate
240
{
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
241
  return my_round_up_to_next_power(value) << 1;
1 by brian
clean slate
242
}
243
244
245
/*
246
  Initialize a key cache
247
248
  SYNOPSIS
249
    init_key_cache()
250
    keycache			pointer to a key cache data structure
251
    key_cache_block_size	size of blocks to keep cached data
252
    use_mem                 	total memory to use for the key cache
253
    division_limit		division limit (may be zero)
254
    age_threshold		age threshold (may be zero)
255
256
  RETURN VALUE
257
    number of blocks in the key cache, if successful,
258
    0 - otherwise.
259
260
  NOTES.
261
    if keycache->key_cache_inited != 0 we assume that the key cache
262
    is already initialized.  This is for now used by myisamchk, but shouldn't
263
    be something that a program should rely on!
264
265
    It's assumed that no two threads call this function simultaneously
266
    referring to the same key cache handle.
267
268
*/
269
482 by Brian Aker
Remove uint.
270
int init_key_cache(KEY_CACHE *keycache, uint32_t key_cache_block_size,
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
271
		   size_t use_mem, uint32_t division_limit,
482 by Brian Aker
Remove uint.
272
		   uint32_t age_threshold)
1 by brian
clean slate
273
{
303 by Brian Aker
First pass in removing ulong from MyISAM
274
  uint32_t blocks, hash_links;
1 by brian
clean slate
275
  size_t length;
276
  int error;
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
277
  assert(key_cache_block_size >= 512);
1 by brian
clean slate
278
279
  if (keycache->key_cache_inited && keycache->disk_blocks > 0)
280
  {
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
281
    return(0);
1 by brian
clean slate
282
  }
283
284
  keycache->global_cache_w_requests= keycache->global_cache_r_requests= 0;
285
  keycache->global_cache_read= keycache->global_cache_write= 0;
286
  keycache->disk_blocks= -1;
287
  if (! keycache->key_cache_inited)
288
  {
289
    keycache->key_cache_inited= 1;
290
    /*
291
      Initialize these variables once only.
292
      Their value must survive re-initialization during resizing.
293
    */
294
    keycache->in_resize= 0;
295
    keycache->resize_in_flush= 0;
296
    keycache->cnt_for_resize_op= 0;
297
    keycache->waiting_for_resize_cnt.last_thread= NULL;
298
    keycache->in_init= 0;
299
    pthread_mutex_init(&keycache->cache_lock, MY_MUTEX_INIT_FAST);
300
    keycache->resize_queue.last_thread= NULL;
301
  }
302
303
  keycache->key_cache_mem_size= use_mem;
304
  keycache->key_cache_block_size= key_cache_block_size;
305
303 by Brian Aker
First pass in removing ulong from MyISAM
306
  blocks= (uint32_t) (use_mem / (sizeof(BLOCK_LINK) + 2 * sizeof(HASH_LINK) +
1 by brian
clean slate
307
                              sizeof(HASH_LINK*) * 5/4 + key_cache_block_size));
308
  /* It doesn't make sense to have too few blocks (less than 8) */
309
  if (blocks >= 8)
310
  {
311
    for ( ; ; )
312
    {
313
      /* Set my_hash_entries to the next bigger 2 power */
314
      if ((keycache->hash_entries= next_power(blocks)) < blocks * 5/4)
315
        keycache->hash_entries<<= 1;
316
      hash_links= 2 * blocks;
317
#if defined(MAX_THREADS)
318
      if (hash_links < MAX_THREADS + blocks - 1)
319
        hash_links= MAX_THREADS + blocks - 1;
320
#endif
321
      while ((length= (ALIGN_SIZE(blocks * sizeof(BLOCK_LINK)) +
322
		       ALIGN_SIZE(hash_links * sizeof(HASH_LINK)) +
323
		       ALIGN_SIZE(sizeof(HASH_LINK*) *
324
                                  keycache->hash_entries))) +
325
	     ((size_t) blocks * keycache->key_cache_block_size) > use_mem)
326
        blocks--;
327
      /* Allocate memory for cache page buffers */
960.2.2 by Monty Taylor
Moved MyISAM files to C++ so we can continue to consolidate code.
328
      if ((keycache->block_mem= (unsigned char *)malloc((size_t) blocks * keycache->key_cache_block_size)))
1 by brian
clean slate
329
      {
330
        /*
331
	  Allocate memory for blocks, hash_links and hash entries;
332
	  For each block 2 hash links are allocated
333
        */
656.1.25 by Monty Taylor
Removed my_malloc stuff from storage/
334
        if ((keycache->block_root= (BLOCK_LINK*) malloc(length)))
1 by brian
clean slate
335
          break;
30 by Brian Aker
Large file and ftruncate() support
336
        free(keycache->block_mem);
1 by brian
clean slate
337
        keycache->block_mem= 0;
338
      }
339
      if (blocks < 8)
340
      {
1241.9.57 by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined.
341
        errno= ENOMEM;
1 by brian
clean slate
342
        my_error(EE_OUTOFMEMORY, MYF(0), blocks * keycache->key_cache_block_size);
343
        goto err;
344
      }
345
      blocks= blocks / 4*3;
346
    }
347
    keycache->blocks_unused= blocks;
348
    keycache->disk_blocks= (int) blocks;
349
    keycache->hash_links= hash_links;
350
    keycache->hash_root= (HASH_LINK**) ((char*) keycache->block_root +
351
				        ALIGN_SIZE(blocks*sizeof(BLOCK_LINK)));
352
    keycache->hash_link_root= (HASH_LINK*) ((char*) keycache->hash_root +
353
				            ALIGN_SIZE((sizeof(HASH_LINK*) *
354
							keycache->hash_entries)));
212.6.12 by Mats Kindahl
Removing redundant use of casts in MyISAM storage for memcmp(), memcpy(), memset(), and memmove().
355
    memset(keycache->block_root, 0,
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
356
           keycache->disk_blocks * sizeof(BLOCK_LINK));
212.6.12 by Mats Kindahl
Removing redundant use of casts in MyISAM storage for memcmp(), memcpy(), memset(), and memmove().
357
    memset(keycache->hash_root, 0,
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
358
           keycache->hash_entries * sizeof(HASH_LINK*));
212.6.12 by Mats Kindahl
Removing redundant use of casts in MyISAM storage for memcmp(), memcpy(), memset(), and memmove().
359
    memset(keycache->hash_link_root, 0,
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
360
           keycache->hash_links * sizeof(HASH_LINK));
1 by brian
clean slate
361
    keycache->hash_links_used= 0;
362
    keycache->free_hash_list= NULL;
363
    keycache->blocks_used= keycache->blocks_changed= 0;
364
365
    keycache->global_blocks_changed= 0;
366
    keycache->blocks_available=0;		/* For debugging */
367
368
    /* The LRU chain is empty after initialization */
369
    keycache->used_last= NULL;
370
    keycache->used_ins= NULL;
371
    keycache->free_block_list= NULL;
372
    keycache->keycache_time= 0;
373
    keycache->warm_blocks= 0;
374
    keycache->min_warm_blocks= (division_limit ?
375
				blocks * division_limit / 100 + 1 :
376
				blocks);
377
    keycache->age_threshold= (age_threshold ?
378
			      blocks * age_threshold / 100 :
379
			      blocks);
380
381
    keycache->can_be_used= 1;
382
383
    keycache->waiting_for_hash_link.last_thread= NULL;
384
    keycache->waiting_for_block.last_thread= NULL;
212.6.12 by Mats Kindahl
Removing redundant use of casts in MyISAM storage for memcmp(), memcpy(), memset(), and memmove().
385
    memset(keycache->changed_blocks, 0,
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
386
           sizeof(keycache->changed_blocks[0]) * CHANGED_BLOCKS_HASH);
212.6.12 by Mats Kindahl
Removing redundant use of casts in MyISAM storage for memcmp(), memcpy(), memset(), and memmove().
387
    memset(keycache->file_blocks, 0,
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
388
           sizeof(keycache->file_blocks[0]) * CHANGED_BLOCKS_HASH);
1 by brian
clean slate
389
  }
390
  else
391
  {
1251.2.2 by Jay Pipes
Pulls MyISAM-specific server variables into the MyISAM
392
    /* myisam_key_buffer_size is specified too small. Disable the cache. */
1 by brian
clean slate
393
    keycache->can_be_used= 0;
394
  }
395
396
  keycache->blocks= keycache->disk_blocks > 0 ? keycache->disk_blocks : 0;
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
397
  return((int) keycache->disk_blocks);
1 by brian
clean slate
398
399
err:
1241.9.57 by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined.
400
  error= errno;
1 by brian
clean slate
401
  keycache->disk_blocks= 0;
402
  keycache->blocks=  0;
403
  if (keycache->block_mem)
404
  {
30 by Brian Aker
Large file and ftruncate() support
405
    free(keycache->block_mem);
1 by brian
clean slate
406
    keycache->block_mem= NULL;
407
  }
408
  if (keycache->block_root)
409
  {
481 by Brian Aker
Remove all of uchar.
410
    free((unsigned char*) keycache->block_root);
1 by brian
clean slate
411
    keycache->block_root= NULL;
412
  }
1241.9.57 by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined.
413
  errno= error;
1 by brian
clean slate
414
  keycache->can_be_used= 0;
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
415
  return(0);
1 by brian
clean slate
416
}
417
418
419
/*
420
  Resize a key cache
421
422
  SYNOPSIS
423
    resize_key_cache()
424
    keycache     	        pointer to a key cache data structure
425
    key_cache_block_size        size of blocks to keep cached data
426
    use_mem			total memory to use for the new key cache
427
    division_limit		new division limit (if not zero)
428
    age_threshold		new age threshold (if not zero)
429
430
  RETURN VALUE
431
    number of blocks in the key cache, if successful,
432
    0 - otherwise.
433
434
  NOTES.
435
    The function first compares the memory size and the block size parameters
436
    with the key cache values.
437
438
    If they differ the function free the the memory allocated for the
439
    old key cache blocks by calling the end_key_cache function and
440
    then rebuilds the key cache with new blocks by calling
441
    init_key_cache.
442
443
    The function starts the operation only when all other threads
444
    performing operations with the key cache let her to proceed
445
    (when cnt_for_resize=0).
446
*/
447
482 by Brian Aker
Remove uint.
448
int resize_key_cache(KEY_CACHE *keycache, uint32_t key_cache_block_size,
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
449
		     size_t use_mem, uint32_t division_limit,
482 by Brian Aker
Remove uint.
450
		     uint32_t age_threshold)
1 by brian
clean slate
451
{
452
  int blocks;
453
454
  if (!keycache->key_cache_inited)
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
455
    return(keycache->disk_blocks);
1 by brian
clean slate
456
457
  if(key_cache_block_size == keycache->key_cache_block_size &&
458
     use_mem == keycache->key_cache_mem_size)
459
  {
460
    change_key_cache_param(keycache, division_limit, age_threshold);
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
461
    return(keycache->disk_blocks);
1 by brian
clean slate
462
  }
463
464
  keycache_pthread_mutex_lock(&keycache->cache_lock);
465
466
  /*
467
    We may need to wait for another thread which is doing a resize
468
    already. This cannot happen in the MySQL server though. It allows
469
    one resizer only. In set_var.cc keycache->in_init is used to block
470
    multiple attempts.
471
  */
472
  while (keycache->in_resize)
473
  {
474
    wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
475
  }
476
477
  /*
478
    Mark the operation in progress. This blocks other threads from doing
479
    a resize in parallel. It prohibits new blocks to enter the cache.
480
    Read/write requests can bypass the cache during the flush phase.
481
  */
482
  keycache->in_resize= 1;
483
484
  /* Need to flush only if keycache is enabled. */
485
  if (keycache->can_be_used)
486
  {
487
    /* Start the flush phase. */
488
    keycache->resize_in_flush= 1;
489
490
    if (flush_all_key_blocks(keycache))
491
    {
492
      /* TODO: if this happens, we should write a warning in the log file ! */
493
      keycache->resize_in_flush= 0;
494
      blocks= 0;
495
      keycache->can_be_used= 0;
496
      goto finish;
497
    }
498
499
    /* End the flush phase. */
500
    keycache->resize_in_flush= 0;
501
  }
502
503
  /*
504
    Some direct read/write operations (bypassing the cache) may still be
505
    unfinished. Wait until they are done. If the key cache can be used,
506
    direct I/O is done in increments of key_cache_block_size. That is,
507
    every block is checked if it is in the cache. We need to wait for
508
    pending I/O before re-initializing the cache, because we may change
509
    the block size. Otherwise they could check for blocks at file
510
    positions where the new block division has none. We do also want to
511
    wait for I/O done when (if) the cache was disabled. It must not
512
    run in parallel with normal cache operation.
513
  */
514
  while (keycache->cnt_for_resize_op)
515
    wait_on_queue(&keycache->waiting_for_resize_cnt, &keycache->cache_lock);
516
517
  /*
518
    Free old cache structures, allocate new structures, and initialize
519
    them. Note that the cache_lock mutex and the resize_queue are left
520
    untouched. We do not lose the cache_lock and will release it only at
521
    the end of this function.
522
  */
523
  end_key_cache(keycache, 0);			/* Don't free mutex */
524
  /* The following will work even if use_mem is 0 */
525
  blocks= init_key_cache(keycache, key_cache_block_size, use_mem,
526
			 division_limit, age_threshold);
527
528
finish:
529
  /*
530
    Mark the resize finished. This allows other threads to start a
531
    resize or to request new cache blocks.
532
  */
533
  keycache->in_resize= 0;
534
535
  /* Signal waiting threads. */
536
  release_whole_queue(&keycache->resize_queue);
537
538
  keycache_pthread_mutex_unlock(&keycache->cache_lock);
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
539
  return(blocks);
1 by brian
clean slate
540
}
541
542
543
/*
544
  Increment counter blocking resize key cache operation
545
*/
546
static inline void inc_counter_for_resize_op(KEY_CACHE *keycache)
547
{
548
  keycache->cnt_for_resize_op++;
549
}
550
551
552
/*
553
  Decrement counter blocking resize key cache operation;
554
  Signal the operation to proceed when counter becomes equal zero
555
*/
556
static inline void dec_counter_for_resize_op(KEY_CACHE *keycache)
557
{
558
  if (!--keycache->cnt_for_resize_op)
559
    release_whole_queue(&keycache->waiting_for_resize_cnt);
560
}
561
562
/*
563
  Change the key cache parameters
564
565
  SYNOPSIS
566
    change_key_cache_param()
567
    keycache			pointer to a key cache data structure
568
    division_limit		new division limit (if not zero)
569
    age_threshold		new age threshold (if not zero)
570
571
  RETURN VALUE
572
    none
573
574
  NOTES.
575
    Presently the function resets the key cache parameters
576
    concerning midpoint insertion strategy - division_limit and
577
    age_threshold.
578
*/
579
1165.1.161 by Stewart Smith
make change_key_cache_param() static to plugin/myisam/mf_keycache.cc
580
static void change_key_cache_param(KEY_CACHE *keycache, uint32_t division_limit,
482 by Brian Aker
Remove uint.
581
			    uint32_t age_threshold)
1 by brian
clean slate
582
{
583
  keycache_pthread_mutex_lock(&keycache->cache_lock);
584
  if (division_limit)
585
    keycache->min_warm_blocks= (keycache->disk_blocks *
586
				division_limit / 100 + 1);
587
  if (age_threshold)
588
    keycache->age_threshold=   (keycache->disk_blocks *
589
				age_threshold / 100);
590
  keycache_pthread_mutex_unlock(&keycache->cache_lock);
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
591
  return;
1 by brian
clean slate
592
}
593
594
595
/*
596
  Remove key_cache from memory
597
598
  SYNOPSIS
599
    end_key_cache()
600
    keycache		key cache handle
601
    cleanup		Complete free (Free also mutex for key cache)
602
603
  RETURN VALUE
604
    none
605
*/
606
281 by Brian Aker
Converted myisam away from my_bool
607
void end_key_cache(KEY_CACHE *keycache, bool cleanup)
1 by brian
clean slate
608
{
609
  if (!keycache->key_cache_inited)
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
610
    return;
1 by brian
clean slate
611
612
  if (keycache->disk_blocks > 0)
613
  {
614
    if (keycache->block_mem)
615
    {
30 by Brian Aker
Large file and ftruncate() support
616
      free(keycache->block_mem);
1 by brian
clean slate
617
      keycache->block_mem= NULL;
481 by Brian Aker
Remove all of uchar.
618
      free((unsigned char*) keycache->block_root);
1 by brian
clean slate
619
      keycache->block_root= NULL;
620
    }
621
    keycache->disk_blocks= -1;
622
    /* Reset blocks_changed to be safe if flush_all_key_blocks is called */
623
    keycache->blocks_changed= 0;
624
  }
625
626
  if (cleanup)
627
  {
628
    pthread_mutex_destroy(&keycache->cache_lock);
629
    keycache->key_cache_inited= keycache->can_be_used= 0;
630
  }
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
631
  return;
1 by brian
clean slate
632
} /* end_key_cache */
633
634
635
/*
636
  Link a thread into double-linked queue of waiting threads.
637
638
  SYNOPSIS
639
    link_into_queue()
640
      wqueue              pointer to the queue structure
641
      thread              pointer to the thread to be added to the queue
642
643
  RETURN VALUE
644
    none
645
646
  NOTES.
647
    Queue is represented by a circular list of the thread structures
648
    The list is double-linked of the type (**prev,*next), accessed by
649
    a pointer to the last element.
650
*/
651
652
static void link_into_queue(KEYCACHE_WQUEUE *wqueue,
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
653
                            internal::st_my_thread_var *thread)
1 by brian
clean slate
654
{
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
655
  internal::st_my_thread_var *last;
1 by brian
clean slate
656
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
657
  assert(!thread->next && !thread->prev);
1 by brian
clean slate
658
  if (! (last= wqueue->last_thread))
659
  {
660
    /* Queue is empty */
661
    thread->next= thread;
662
    thread->prev= &thread->next;
663
  }
664
  else
665
  {
666
    thread->prev= last->next->prev;
667
    last->next->prev= &thread->next;
668
    thread->next= last->next;
669
    last->next= thread;
670
  }
671
  wqueue->last_thread= thread;
672
}
673
674
/*
675
  Unlink a thread from double-linked queue of waiting threads
676
677
  SYNOPSIS
678
    unlink_from_queue()
679
      wqueue              pointer to the queue structure
680
      thread              pointer to the thread to be removed from the queue
681
682
  RETURN VALUE
683
    none
684
685
  NOTES.
686
    See NOTES for link_into_queue
687
*/
688
689
static void unlink_from_queue(KEYCACHE_WQUEUE *wqueue,
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
690
                                     internal::st_my_thread_var *thread)
1 by brian
clean slate
691
{
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
692
  assert(thread->next && thread->prev);
1 by brian
clean slate
693
  if (thread->next == thread)
694
    /* The queue contains only one member */
695
    wqueue->last_thread= NULL;
696
  else
697
  {
698
    thread->next->prev= thread->prev;
699
    *thread->prev=thread->next;
700
    if (wqueue->last_thread == thread)
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
701
      wqueue->last_thread= STRUCT_PTR(internal::st_my_thread_var, next,
1 by brian
clean slate
702
                                      thread->prev);
703
  }
704
  thread->next= NULL;
705
  thread->prev= NULL;
706
}
707
708
709
/*
710
  Add a thread to single-linked queue of waiting threads
711
712
  SYNOPSIS
713
    wait_on_queue()
714
      wqueue            Pointer to the queue structure.
715
      mutex             Cache_lock to acquire after awake.
716
717
  RETURN VALUE
718
    none
719
720
  NOTES.
721
    Queue is represented by a circular list of the thread structures
722
    The list is single-linked of the type (*next), accessed by a pointer
723
    to the last element.
724
725
    The function protects against stray signals by verifying that the
726
    current thread is unlinked from the queue when awaking. However,
727
    since several threads can wait for the same event, it might be
728
    necessary for the caller of the function to check again if the
729
    condition for awake is indeed matched.
730
*/
731
732
static void wait_on_queue(KEYCACHE_WQUEUE *wqueue,
733
                          pthread_mutex_t *mutex)
734
{
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
735
  internal::st_my_thread_var *last;
736
  internal::st_my_thread_var *thread= my_thread_var;
1 by brian
clean slate
737
738
  /* Add to queue. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
739
  assert(!thread->next);
740
  assert(!thread->prev); /* Not required, but must be true anyway. */
1 by brian
clean slate
741
  if (! (last= wqueue->last_thread))
742
    thread->next= thread;
743
  else
744
  {
745
    thread->next= last->next;
746
    last->next= thread;
747
  }
748
  wqueue->last_thread= thread;
749
750
  /*
751
    Wait until thread is removed from queue by the signalling thread.
752
    The loop protects against stray signals.
753
  */
754
  do
755
  {
756
    keycache_pthread_cond_wait(&thread->suspend, mutex);
757
  }
758
  while (thread->next);
759
}
760
761
762
/*
763
  Remove all threads from queue signaling them to proceed
764
765
  SYNOPSIS
766
    release_whole_queue()
767
      wqueue            pointer to the queue structure
768
769
  RETURN VALUE
770
    none
771
772
  NOTES.
773
    See notes for wait_on_queue().
774
    When removed from the queue each thread is signaled via condition
775
    variable thread->suspend.
776
*/
777
778
static void release_whole_queue(KEYCACHE_WQUEUE *wqueue)
779
{
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
780
  internal::st_my_thread_var *last;
781
  internal::st_my_thread_var *next;
782
  internal::st_my_thread_var *thread;
1 by brian
clean slate
783
784
  /* Queue may be empty. */
785
  if (!(last= wqueue->last_thread))
786
    return;
787
788
  next= last->next;
789
  do
790
  {
791
    thread=next;
792
    /* Signal the thread. */
793
    keycache_pthread_cond_signal(&thread->suspend);
794
    /* Take thread from queue. */
795
    next=thread->next;
796
    thread->next= NULL;
797
  }
798
  while (thread != last);
799
800
  /* Now queue is definitely empty. */
801
  wqueue->last_thread= NULL;
802
}
803
804
805
/*
806
  Unlink a block from the chain of dirty/clean blocks
807
*/
1022.2.29 by Monty Taylor
Fixed some no-inline warnings.
808
static void unlink_changed(BLOCK_LINK *block)
1 by brian
clean slate
809
{
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
810
  assert(block->prev_changed && *block->prev_changed == block);
1 by brian
clean slate
811
  if (block->next_changed)
812
    block->next_changed->prev_changed= block->prev_changed;
813
  *block->prev_changed= block->next_changed;
814
  block->next_changed= NULL;
815
  block->prev_changed= NULL;
816
}
817
818
819
/*
820
  Link a block into the chain of dirty/clean blocks
821
*/
822
1022.2.29 by Monty Taylor
Fixed some no-inline warnings.
823
static void link_changed(BLOCK_LINK *block, BLOCK_LINK **phead)
1 by brian
clean slate
824
{
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
825
  assert(!block->next_changed);
826
  assert(!block->prev_changed);
1 by brian
clean slate
827
  block->prev_changed= phead;
828
  if ((block->next_changed= *phead))
829
    (*phead)->prev_changed= &block->next_changed;
830
  *phead= block;
831
}
832
833
834
/*
835
  Link a block in a chain of clean blocks of a file.
836
837
  SYNOPSIS
838
    link_to_file_list()
839
      keycache		Key cache handle
840
      block             Block to relink
841
      file              File to be linked to
842
      unlink            If to unlink first
843
844
  DESCRIPTION
845
    Unlink a block from whichever chain it is linked in, if it's
846
    asked for, and link it to the chain of clean blocks of the
847
    specified file.
848
849
  NOTE
850
    Please do never set/clear BLOCK_CHANGED outside of
851
    link_to_file_list() or link_to_changed_list().
852
    You would risk to damage correct counting of changed blocks
853
    and to find blocks in the wrong hash.
854
855
  RETURN
856
    void
857
*/
858
859
static void link_to_file_list(KEY_CACHE *keycache,
860
                              BLOCK_LINK *block, int file,
281 by Brian Aker
Converted myisam away from my_bool
861
                              bool unlink_block)
1 by brian
clean slate
862
{
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
863
  assert(block->status & BLOCK_IN_USE);
864
  assert(block->hash_link && block->hash_link->block == block);
865
  assert(block->hash_link->file == file);
1 by brian
clean slate
866
  if (unlink_block)
867
    unlink_changed(block);
868
  link_changed(block, &keycache->file_blocks[FILE_HASH(file)]);
869
  if (block->status & BLOCK_CHANGED)
870
  {
871
    block->status&= ~BLOCK_CHANGED;
872
    keycache->blocks_changed--;
873
    keycache->global_blocks_changed--;
874
  }
875
}
876
877
878
/*
879
  Re-link a block from the clean chain to the dirty chain of a file.
880
881
  SYNOPSIS
882
    link_to_changed_list()
883
      keycache		key cache handle
884
      block             block to relink
885
886
  DESCRIPTION
887
    Unlink a block from the chain of clean blocks of a file
888
    and link it to the chain of dirty blocks of the same file.
889
890
  NOTE
891
    Please do never set/clear BLOCK_CHANGED outside of
892
    link_to_file_list() or link_to_changed_list().
893
    You would risk to damage correct counting of changed blocks
894
    and to find blocks in the wrong hash.
895
896
  RETURN
897
    void
898
*/
899
900
static void link_to_changed_list(KEY_CACHE *keycache,
901
                                 BLOCK_LINK *block)
902
{
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
903
  assert(block->status & BLOCK_IN_USE);
904
  assert(!(block->status & BLOCK_CHANGED));
905
  assert(block->hash_link && block->hash_link->block == block);
1 by brian
clean slate
906
907
  unlink_changed(block);
908
  link_changed(block,
909
               &keycache->changed_blocks[FILE_HASH(block->hash_link->file)]);
910
  block->status|=BLOCK_CHANGED;
911
  keycache->blocks_changed++;
912
  keycache->global_blocks_changed++;
913
}
914
915
916
/*
917
  Link a block to the LRU chain at the beginning or at the end of
918
  one of two parts.
919
920
  SYNOPSIS
921
    link_block()
922
      keycache            pointer to a key cache data structure
923
      block               pointer to the block to link to the LRU chain
924
      hot                 <-> to link the block into the hot subchain
925
      at_end              <-> to link the block at the end of the subchain
926
927
  RETURN VALUE
928
    none
929
930
  NOTES.
931
    The LRU ring is represented by a circular list of block structures.
932
    The list is double-linked of the type (**prev,*next) type.
933
    The LRU ring is divided into two parts - hot and warm.
934
    There are two pointers to access the last blocks of these two
935
    parts. The beginning of the warm part follows right after the
936
    end of the hot part.
937
    Only blocks of the warm part can be used for eviction.
938
    The first block from the beginning of this subchain is always
939
    taken for eviction (keycache->last_used->next)
940
941
    LRU chain:       +------+   H O T    +------+
942
                +----| end  |----...<----| beg  |----+
943
                |    +------+last        +------+    |
944
                v<-link in latest hot (new end)      |
945
                |     link in latest warm (new end)->^
946
                |    +------+  W A R M   +------+    |
947
                +----| beg  |---->...----| end  |----+
948
                     +------+            +------+ins
949
                  first for eviction
950
951
    It is also possible that the block is selected for eviction and thus
952
    not linked in the LRU ring.
953
*/
954
281 by Brian Aker
Converted myisam away from my_bool
955
static void link_block(KEY_CACHE *keycache, BLOCK_LINK *block, bool hot,
956
                       bool at_end)
1 by brian
clean slate
957
{
958
  BLOCK_LINK *ins;
959
  BLOCK_LINK **pins;
960
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
961
  assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
962
  assert(block->hash_link); /*backptr to block NULL from free_block()*/
963
  assert(!block->requests);
964
  assert(block->prev_changed && *block->prev_changed == block);
965
  assert(!block->next_used);
966
  assert(!block->prev_used);
1 by brian
clean slate
967
  if (!hot && keycache->waiting_for_block.last_thread)
968
  {
969
    /* Signal that in the LRU warm sub-chain an available block has appeared */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
970
    internal::st_my_thread_var *last_thread=
1 by brian
clean slate
971
                               keycache->waiting_for_block.last_thread;
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
972
    internal::st_my_thread_var *first_thread= last_thread->next;
973
    internal::st_my_thread_var *next_thread= first_thread;
1 by brian
clean slate
974
    HASH_LINK *hash_link= (HASH_LINK *) first_thread->opt_info;
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
975
    internal::st_my_thread_var *thread;
1 by brian
clean slate
976
    do
977
    {
978
      thread= next_thread;
979
      next_thread= thread->next;
980
      /*
981
         We notify about the event all threads that ask
982
         for the same page as the first thread in the queue
983
      */
984
      if ((HASH_LINK *) thread->opt_info == hash_link)
985
      {
986
        keycache_pthread_cond_signal(&thread->suspend);
987
        unlink_from_queue(&keycache->waiting_for_block, thread);
988
        block->requests++;
989
      }
990
    }
991
    while (thread != last_thread);
992
    hash_link->block= block;
993
    /*
994
      NOTE: We assigned the block to the hash_link and signalled the
995
      requesting thread(s). But it is possible that other threads runs
996
      first. These threads see the hash_link assigned to a block which
997
      is assigned to another hash_link and not marked BLOCK_IN_SWITCH.
998
      This can be a problem for functions that do not select the block
999
      via its hash_link: flush and free. They do only see a block which
1000
      is in a "normal" state and don't know that it will be evicted soon.
1001
1002
      We cannot set BLOCK_IN_SWITCH here because only one of the
1003
      requesting threads must handle the eviction. All others must wait
1004
      for it to complete. If we set the flag here, the threads would not
1005
      know who is in charge of the eviction. Without the flag, the first
1006
      thread takes the stick and sets the flag.
1007
1008
      But we need to note in the block that is has been selected for
1009
      eviction. It must not be freed. The evicting thread will not
1010
      expect the block in the free list. Before freeing we could also
1011
      check if block->requests > 1. But I think including another flag
1012
      in the check of block->status is slightly more efficient and
1013
      probably easier to read.
1014
    */
1015
    block->status|= BLOCK_IN_EVICTION;
1016
    return;
1017
  }
1018
  pins= hot ? &keycache->used_ins : &keycache->used_last;
1019
  ins= *pins;
1020
  if (ins)
1021
  {
1022
    ins->next_used->prev_used= &block->next_used;
1023
    block->next_used= ins->next_used;
1024
    block->prev_used= &ins->next_used;
1025
    ins->next_used= block;
1026
    if (at_end)
1027
      *pins= block;
1028
  }
1029
  else
1030
  {
1031
    /* The LRU ring is empty. Let the block point to itself. */
1032
    keycache->used_last= keycache->used_ins= block->next_used= block;
1033
    block->prev_used= &block->next_used;
1034
  }
1035
}
1036
1037
1038
/*
1039
  Unlink a block from the LRU chain
1040
1041
  SYNOPSIS
1042
    unlink_block()
1043
      keycache            pointer to a key cache data structure
1044
      block               pointer to the block to unlink from the LRU chain
1045
1046
  RETURN VALUE
1047
    none
1048
1049
  NOTES.
1050
    See NOTES for link_block
1051
*/
1052
1053
static void unlink_block(KEY_CACHE *keycache, BLOCK_LINK *block)
1054
{
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1055
  assert((block->status & ~BLOCK_CHANGED) == (BLOCK_READ | BLOCK_IN_USE));
1056
  assert(block->hash_link); /*backptr to block NULL from free_block()*/
1057
  assert(!block->requests);
1058
  assert(block->prev_changed && *block->prev_changed == block);
1059
  assert(block->next_used && block->prev_used &&
1 by brian
clean slate
1060
              (block->next_used->prev_used == &block->next_used) &&
1061
              (*block->prev_used == block));
1062
  if (block->next_used == block)
1063
    /* The list contains only one member */
1064
    keycache->used_last= keycache->used_ins= NULL;
1065
  else
1066
  {
1067
    block->next_used->prev_used= block->prev_used;
1068
    *block->prev_used= block->next_used;
1069
    if (keycache->used_last == block)
1070
      keycache->used_last= STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1071
    if (keycache->used_ins == block)
1072
      keycache->used_ins=STRUCT_PTR(BLOCK_LINK, next_used, block->prev_used);
1073
  }
1074
  block->next_used= NULL;
1075
  block->prev_used= NULL;
1076
}
1077
1078
1079
/*
1080
  Register requests for a block.
1081
1082
  SYNOPSIS
1083
    reg_requests()
1084
      keycache          Pointer to a key cache data structure.
1085
      block             Pointer to the block to register a request on.
1086
      count             Number of requests. Always 1.
1087
1088
  NOTE
1089
    The first request unlinks the block from the LRU ring. This means
1090
    that it is protected against eveiction.
1091
1092
  RETURN
1093
    void
1094
*/
1095
static void reg_requests(KEY_CACHE *keycache, BLOCK_LINK *block, int count)
1096
{
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1097
  assert(block->status & BLOCK_IN_USE);
1098
  assert(block->hash_link);
1 by brian
clean slate
1099
1100
  if (!block->requests)
1101
    unlink_block(keycache, block);
1102
  block->requests+=count;
1103
}
1104
1105
1106
/*
1107
  Unregister request for a block
1108
  linking it to the LRU chain if it's the last request
1109
1110
  SYNOPSIS
1111
    unreg_request()
1112
    keycache            pointer to a key cache data structure
1113
    block               pointer to the block to link to the LRU chain
1114
    at_end              <-> to link the block at the end of the LRU chain
1115
1116
  RETURN VALUE
1117
    none
1118
1119
  NOTES.
1120
    Every linking to the LRU ring decrements by one a special block
163 by Brian Aker
Merge Monty's code.
1121
    counter (if it's positive). If the at_end parameter is true the block is
1 by brian
clean slate
1122
    added either at the end of warm sub-chain or at the end of hot sub-chain.
1123
    It is added to the hot subchain if its counter is zero and number of
1124
    blocks in warm sub-chain is not less than some low limit (determined by
1125
    the division_limit parameter). Otherwise the block is added to the warm
163 by Brian Aker
Merge Monty's code.
1126
    sub-chain. If the at_end parameter is false the block is always added
1 by brian
clean slate
1127
    at beginning of the warm sub-chain.
1128
    Thus a warm block can be promoted to the hot sub-chain when its counter
1129
    becomes zero for the first time.
1130
    At the same time  the block at the very beginning of the hot subchain
1131
    might be moved to the beginning of the warm subchain if it stays untouched
1132
    for a too long time (this time is determined by parameter age_threshold).
1133
1134
    It is also possible that the block is selected for eviction and thus
1135
    not linked in the LRU ring.
1136
*/
1137
1138
static void unreg_request(KEY_CACHE *keycache,
1139
                          BLOCK_LINK *block, int at_end)
1140
{
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1141
  assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1142
  assert(block->hash_link); /*backptr to block NULL from free_block()*/
1143
  assert(block->requests);
1144
  assert(block->prev_changed && *block->prev_changed == block);
1145
  assert(!block->next_used);
1146
  assert(!block->prev_used);
1 by brian
clean slate
1147
  if (! --block->requests)
1148
  {
281 by Brian Aker
Converted myisam away from my_bool
1149
    bool hot;
1 by brian
clean slate
1150
    if (block->hits_left)
1151
      block->hits_left--;
1152
    hot= !block->hits_left && at_end &&
1153
      keycache->warm_blocks > keycache->min_warm_blocks;
1154
    if (hot)
1155
    {
1156
      if (block->temperature == BLOCK_WARM)
1157
        keycache->warm_blocks--;
1158
      block->temperature= BLOCK_HOT;
1159
    }
281 by Brian Aker
Converted myisam away from my_bool
1160
    link_block(keycache, block, hot, (bool)at_end);
1 by brian
clean slate
1161
    block->last_hit_time= keycache->keycache_time;
1162
    keycache->keycache_time++;
1163
    /*
1164
      At this place, the block might be in the LRU ring or not. If an
1165
      evicter was waiting for a block, it was selected for eviction and
1166
      not linked in the LRU ring.
1167
    */
1168
1169
    /*
1170
      Check if we should link a hot block to the warm block sub-chain.
1171
      It is possible that we select the same block as above. But it can
1172
      also be another block. In any case a block from the LRU ring is
1173
      selected. In other words it works even if the above block was
1174
      selected for eviction and not linked in the LRU ring. Since this
1175
      happens only if the LRU ring is empty, the block selected below
1176
      would be NULL and the rest of the function skipped.
1177
    */
1178
    block= keycache->used_ins;
1179
    if (block && keycache->keycache_time - block->last_hit_time >
1180
	keycache->age_threshold)
1181
    {
1182
      unlink_block(keycache, block);
1183
      link_block(keycache, block, 0, 0);
1184
      if (block->temperature != BLOCK_WARM)
1185
      {
1186
        keycache->warm_blocks++;
1187
        block->temperature= BLOCK_WARM;
1188
      }
1189
    }
1190
  }
1191
}
1192
1193
/*
1194
  Remove a reader of the page in block
1195
*/
1196
1197
static void remove_reader(BLOCK_LINK *block)
1198
{
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1199
  assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1200
  assert(block->hash_link && block->hash_link->block == block);
1201
  assert(block->prev_changed && *block->prev_changed == block);
1202
  assert(!block->next_used);
1203
  assert(!block->prev_used);
1204
  assert(block->hash_link->requests);
1 by brian
clean slate
1205
  if (! --block->hash_link->requests && block->condvar)
1206
    keycache_pthread_cond_signal(block->condvar);
1207
}
1208
1209
1210
/*
1211
  Wait until the last reader of the page in block
1212
  signals on its termination
1213
*/
1214
1215
static void wait_for_readers(KEY_CACHE *keycache,
1216
                             BLOCK_LINK *block)
1217
{
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1218
  internal::st_my_thread_var *thread= my_thread_var;
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1219
  assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1220
  assert(!(block->status & (BLOCK_ERROR | BLOCK_IN_FLUSH |
1 by brian
clean slate
1221
                                 BLOCK_CHANGED)));
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1222
  assert(block->hash_link);
1223
  assert(block->hash_link->block == block);
1 by brian
clean slate
1224
  /* Linked in file_blocks or changed_blocks hash. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1225
  assert(block->prev_changed && *block->prev_changed == block);
1 by brian
clean slate
1226
  /* Not linked in LRU ring. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1227
  assert(!block->next_used);
1228
  assert(!block->prev_used);
1 by brian
clean slate
1229
  while (block->hash_link->requests)
1230
  {
1231
    /* There must be no other waiter. We have no queue here. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1232
    assert(!block->condvar);
1 by brian
clean slate
1233
    block->condvar= &thread->suspend;
1234
    keycache_pthread_cond_wait(&thread->suspend, &keycache->cache_lock);
1235
    block->condvar= NULL;
1236
  }
1237
}
1238
1239
1240
/*
1241
  Add a hash link to a bucket in the hash_table
1242
*/
1243
1244
static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link)
1245
{
1246
  if (*start)
1247
    (*start)->prev= &hash_link->next;
1248
  hash_link->next= *start;
1249
  hash_link->prev= start;
1250
  *start= hash_link;
1251
}
1252
1253
1254
/*
1255
  Remove a hash link from the hash table
1256
*/
1257
1258
static void unlink_hash(KEY_CACHE *keycache, HASH_LINK *hash_link)
1259
{
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1260
  assert(hash_link->requests == 0);
1 by brian
clean slate
1261
  if ((*hash_link->prev= hash_link->next))
1262
    hash_link->next->prev= hash_link->prev;
1263
  hash_link->block= NULL;
1264
  if (keycache->waiting_for_hash_link.last_thread)
1265
  {
1266
    /* Signal that a free hash link has appeared */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1267
    internal::st_my_thread_var *last_thread=
1 by brian
clean slate
1268
                               keycache->waiting_for_hash_link.last_thread;
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1269
    internal::st_my_thread_var *first_thread= last_thread->next;
1270
    internal::st_my_thread_var *next_thread= first_thread;
1 by brian
clean slate
1271
    KEYCACHE_PAGE *first_page= (KEYCACHE_PAGE *) (first_thread->opt_info);
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1272
    internal::st_my_thread_var *thread;
1 by brian
clean slate
1273
1274
    hash_link->file= first_page->file;
1275
    hash_link->diskpos= first_page->filepos;
1276
    do
1277
    {
1278
      KEYCACHE_PAGE *page;
1279
      thread= next_thread;
1280
      page= (KEYCACHE_PAGE *) thread->opt_info;
1281
      next_thread= thread->next;
1282
      /*
1283
         We notify about the event all threads that ask
1284
         for the same page as the first thread in the queue
1285
      */
1286
      if (page->file == hash_link->file && page->filepos == hash_link->diskpos)
1287
      {
1288
        keycache_pthread_cond_signal(&thread->suspend);
1289
        unlink_from_queue(&keycache->waiting_for_hash_link, thread);
1290
      }
1291
    }
1292
    while (thread != last_thread);
1293
    link_hash(&keycache->hash_root[KEYCACHE_HASH(hash_link->file,
1294
					         hash_link->diskpos)],
1295
              hash_link);
1296
    return;
1297
  }
1298
  hash_link->next= keycache->free_hash_list;
1299
  keycache->free_hash_list= hash_link;
1300
}
1301
1302
1303
/*
1304
  Get the hash link for a page
1305
*/
1306
1307
static HASH_LINK *get_hash_link(KEY_CACHE *keycache,
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1308
                                int file, internal::my_off_t filepos)
1 by brian
clean slate
1309
{
1310
  register HASH_LINK *hash_link, **start;
1311
1312
restart:
1313
  /*
1314
     Find the bucket in the hash table for the pair (file, filepos);
1315
     start contains the head of the bucket list,
1316
     hash_link points to the first member of the list
1317
  */
1318
  hash_link= *(start= &keycache->hash_root[KEYCACHE_HASH(file, filepos)]);
1319
  /* Look for an element for the pair (file, filepos) in the bucket chain */
1320
  while (hash_link &&
1321
         (hash_link->diskpos != filepos || hash_link->file != file))
1322
  {
1323
    hash_link= hash_link->next;
1324
  }
1325
  if (! hash_link)
1326
  {
1327
    /* There is no hash link in the hash table for the pair (file, filepos) */
1328
    if (keycache->free_hash_list)
1329
    {
1330
      hash_link= keycache->free_hash_list;
1331
      keycache->free_hash_list= hash_link->next;
1332
    }
1333
    else if (keycache->hash_links_used < keycache->hash_links)
1334
    {
1335
      hash_link= &keycache->hash_link_root[keycache->hash_links_used++];
1336
    }
1337
    else
1338
    {
1339
      /* Wait for a free hash link */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1340
      internal::st_my_thread_var *thread= my_thread_var;
1 by brian
clean slate
1341
      KEYCACHE_PAGE page;
1342
      page.file= file;
1343
      page.filepos= filepos;
1344
      thread->opt_info= (void *) &page;
1345
      link_into_queue(&keycache->waiting_for_hash_link, thread);
1346
      keycache_pthread_cond_wait(&thread->suspend,
1347
                                 &keycache->cache_lock);
1348
      thread->opt_info= NULL;
1349
      goto restart;
1350
    }
1351
    hash_link->file= file;
1352
    hash_link->diskpos= filepos;
1353
    link_hash(start, hash_link);
1354
  }
1355
  /* Register the request for the page */
1356
  hash_link->requests++;
1357
1358
  return hash_link;
1359
}
1360
1361
1362
/*
1363
  Get a block for the file page requested by a keycache read/write operation;
1364
  If the page is not in the cache return a free block, if there is none
1365
  return the lru block after saving its buffer if the page is dirty.
1366
1367
  SYNOPSIS
1368
1369
    find_key_block()
1370
      keycache            pointer to a key cache data structure
1371
      file                handler for the file to read page from
1372
      filepos             position of the page in the file
1373
      init_hits_left      how initialize the block counter for the page
1374
      wrmode              <-> get for writing
1375
      page_st        out  {PAGE_READ,PAGE_TO_BE_READ,PAGE_WAIT_TO_BE_READ}
1376
1377
  RETURN VALUE
1378
    Pointer to the found block if successful, 0 - otherwise
1379
1380
  NOTES.
1381
    For the page from file positioned at filepos the function checks whether
1382
    the page is in the key cache specified by the first parameter.
1383
    If this is the case it immediately returns the block.
1384
    If not, the function first chooses  a block for this page. If there is
1385
    no not used blocks in the key cache yet, the function takes the block
1386
    at the very beginning of the warm sub-chain. It saves the page in that
1387
    block if it's dirty before returning the pointer to it.
1388
    The function returns in the page_st parameter the following values:
1389
      PAGE_READ         - if page already in the block,
1390
      PAGE_TO_BE_READ   - if it is to be read yet by the current thread
1391
      WAIT_TO_BE_READ   - if it is to be read by another thread
1392
    If an error occurs THE BLOCK_ERROR bit is set in the block status.
1393
    It might happen that there are no blocks in LRU chain (in warm part) -
1394
    all blocks  are unlinked for some read/write operations. Then the function
1395
    waits until first of this operations links any block back.
1396
*/
1397
1398
static BLOCK_LINK *find_key_block(KEY_CACHE *keycache,
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1399
                                  int file, internal::my_off_t filepos,
1 by brian
clean slate
1400
                                  int init_hits_left,
1401
                                  int wrmode, int *page_st)
1402
{
1403
  HASH_LINK *hash_link;
1404
  BLOCK_LINK *block;
1405
  int error= 0;
1406
  int page_status;
1407
1408
restart:
1409
  /*
1410
    If the flush phase of a resize operation fails, the cache is left
1411
    unusable. This will be detected only after "goto restart".
1412
  */
1413
  if (!keycache->can_be_used)
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
1414
    return(0);
1 by brian
clean slate
1415
1416
  /*
1417
    Find the hash_link for the requested file block (file, filepos). We
1418
    do always get a hash_link here. It has registered our request so
1419
    that no other thread can use it for another file block until we
1420
    release the request (which is done by remove_reader() usually). The
1421
    hash_link can have a block assigned to it or not. If there is a
1422
    block, it may be assigned to this hash_link or not. In cases where a
1423
    block is evicted from the cache, it is taken from the LRU ring and
1424
    referenced by the new hash_link. But the block can still be assigned
1425
    to its old hash_link for some time if it needs to be flushed first,
1426
    or if there are other threads still reading it.
1427
1428
    Summary:
1429
      hash_link is always returned.
1430
      hash_link->block can be:
1431
      - NULL or
1432
      - not assigned to this hash_link or
1433
      - assigned to this hash_link. If assigned, the block can have
1434
        - invalid data (when freshly assigned) or
1435
        - valid data. Valid data can be
1436
          - changed over the file contents (dirty) or
1437
          - not changed (clean).
1438
  */
1439
  hash_link= get_hash_link(keycache, file, filepos);
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1440
  assert((hash_link->file == file) && (hash_link->diskpos == filepos));
1 by brian
clean slate
1441
1442
  page_status= -1;
1443
  if ((block= hash_link->block) &&
1444
      block->hash_link == hash_link && (block->status & BLOCK_READ))
1445
  {
1446
    /* Assigned block with valid (changed or unchanged) contents. */
1447
    page_status= PAGE_READ;
1448
  }
1449
  /*
1450
    else (page_status == -1)
1451
      - block == NULL or
1452
      - block not assigned to this hash_link or
1453
      - block assigned but not yet read from file (invalid data).
1454
  */
1455
1456
  if (keycache->in_resize)
1457
  {
1458
    /* This is a request during a resize operation */
1459
1460
    if (!block)
1461
    {
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1462
      internal::st_my_thread_var *thread;
1 by brian
clean slate
1463
1464
      /*
1465
        The file block is not in the cache. We don't need it in the
1466
        cache: we are going to read or write directly to file. Cancel
1467
        the request. We can simply decrement hash_link->requests because
1468
        we did not release cache_lock since increasing it. So no other
1469
        thread can wait for our request to become released.
1470
      */
1471
      if (hash_link->requests == 1)
1472
      {
1473
        /*
1474
          We are the only one to request this hash_link (this file/pos).
1475
          Free the hash_link.
1476
        */
1477
        hash_link->requests--;
1478
        unlink_hash(keycache, hash_link);
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
1479
        return(0);
1 by brian
clean slate
1480
      }
1481
1482
      /*
1483
        More requests on the hash_link. Someone tries to evict a block
1484
        for this hash_link (could have started before resizing started).
1485
        This means that the LRU ring is empty. Otherwise a block could
1486
        be assigned immediately. Behave like a thread that wants to
1487
        evict a block for this file/pos. Add to the queue of threads
1488
        waiting for a block. Wait until there is one assigned.
1489
1490
        Refresh the request on the hash-link so that it cannot be reused
1491
        for another file/pos.
1492
      */
1493
      thread= my_thread_var;
1494
      thread->opt_info= (void *) hash_link;
1495
      link_into_queue(&keycache->waiting_for_block, thread);
1496
      do
1497
      {
1498
        keycache_pthread_cond_wait(&thread->suspend,
1499
                                   &keycache->cache_lock);
1500
      } while (thread->next);
1501
      thread->opt_info= NULL;
1502
      /*
1503
        A block should now be assigned to the hash_link. But it may
1504
        still need to be evicted. Anyway, we should re-check the
1505
        situation. page_status must be set correctly.
1506
      */
1507
      hash_link->requests--;
1508
      goto restart;
1509
    } /* end of if (!block) */
1510
1511
    /*
1512
      There is a block for this file/pos in the cache. Register a
1513
      request on it. This unlinks it from the LRU ring (if it is there)
1514
      and hence protects it against eviction (if not already in
1515
      eviction). We need this for returning the block to the caller, for
1516
      calling remove_reader() (for debugging purposes), and for calling
1517
      free_block(). The only case where we don't need the request is if
1518
      the block is in eviction. In that case we have to unregister the
1519
      request later.
1520
    */
1521
    reg_requests(keycache, block, 1);
1522
1523
    if (page_status != PAGE_READ)
1524
    {
1525
      /*
1526
        - block not assigned to this hash_link or
1527
        - block assigned but not yet read from file (invalid data).
1528
1529
        This must be a block in eviction. It will be read soon. We need
1530
        to wait here until this happened. Otherwise the caller could
1531
        access a wrong block or a block which is in read. While waiting
1532
        we cannot lose hash_link nor block. We have registered a request
1533
        on the hash_link. Everything can happen to the block but changes
1534
        in the hash_link -> block relationship. In other words:
1535
        everything can happen to the block but free or another completed
1536
        eviction.
1537
1538
        Note that we bahave like a secondary requestor here. We just
1539
        cannot return with PAGE_WAIT_TO_BE_READ. This would work for
1540
        read requests and writes on dirty blocks that are not in flush
1541
        only. Waiting here on COND_FOR_REQUESTED works in all
1542
        situations.
1543
      */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1544
      assert(((block->hash_link != hash_link) &&
1 by brian
clean slate
1545
                   (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
1546
                  ((block->hash_link == hash_link) &&
1547
                   !(block->status & BLOCK_READ)));
1548
      wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
1549
      /*
1550
        Here we can trust that the block has been assigned to this
1551
        hash_link (block->hash_link == hash_link) and read into the
1552
        buffer (BLOCK_READ). The worst things possible here are that the
1553
        block is in free (BLOCK_REASSIGNED). But the block is still
1554
        assigned to the hash_link. The freeing thread waits until we
1555
        release our request on the hash_link. The block must not be
1556
        again in eviction because we registered an request on it before
1557
        starting to wait.
1558
      */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1559
      assert(block->hash_link == hash_link);
1560
      assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1561
      assert(!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)));
1 by brian
clean slate
1562
    }
1563
    /*
1564
      The block is in the cache. Assigned to the hash_link. Valid data.
1565
      Note that in case of page_st == PAGE_READ, the block can be marked
1566
      for eviction. In any case it can be marked for freeing.
1567
    */
1568
1569
    if (!wrmode)
1570
    {
1571
      /* A reader can just read the block. */
1572
      *page_st= PAGE_READ;
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1573
      assert((hash_link->file == file) &&
1 by brian
clean slate
1574
                  (hash_link->diskpos == filepos) &&
1575
                  (block->hash_link == hash_link));
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
1576
      return(block);
1 by brian
clean slate
1577
    }
1578
1579
    /*
1580
      This is a writer. No two writers for the same block can exist.
1581
      This must be assured by locks outside of the key cache.
1582
    */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1583
    assert(!(block->status & BLOCK_FOR_UPDATE));
1 by brian
clean slate
1584
1585
    while (block->status & BLOCK_IN_FLUSH)
1586
    {
1587
      /*
1588
        Wait until the block is flushed to file. Do not release the
1589
        request on the hash_link yet to prevent that the block is freed
1590
        or reassigned while we wait. While we wait, several things can
1591
        happen to the block, including another flush. But the block
1592
        cannot be reassigned to another hash_link until we release our
1593
        request on it. But it can be marked BLOCK_REASSIGNED from free
1594
        or eviction, while they wait for us to release the hash_link.
1595
      */
1596
      wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1597
      /*
1598
        If the flush phase failed, the resize could have finished while
1599
        we waited here.
1600
      */
1601
      if (!keycache->in_resize)
1602
      {
1603
        remove_reader(block);
1604
        unreg_request(keycache, block, 1);
1605
        goto restart;
1606
      }
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1607
      assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1608
      assert(!(block->status & BLOCK_FOR_UPDATE));
1609
      assert(block->hash_link == hash_link);
1 by brian
clean slate
1610
    }
1611
1612
    if (block->status & BLOCK_CHANGED)
1613
    {
1614
      /*
1615
        We want to write a block with changed contents. If the cache
1616
        block size is bigger than the callers block size (e.g. MyISAM),
1617
        the caller may replace part of the block only. Changes of the
1618
        other part of the block must be preserved. Since the block has
1619
        not yet been selected for flush, we can still add our changes.
1620
      */
1621
      *page_st= PAGE_READ;
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1622
      assert((hash_link->file == file) &&
1 by brian
clean slate
1623
                  (hash_link->diskpos == filepos) &&
1624
                  (block->hash_link == hash_link));
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
1625
      return(block);
1 by brian
clean slate
1626
    }
1627
1628
    /*
1629
      This is a write request for a clean block. We do not want to have
1630
      new dirty blocks in the cache while resizing. We will free the
1631
      block and write directly to file. If the block is in eviction or
1632
      in free, we just let it go.
1633
1634
      Unregister from the hash_link. This must be done before freeing
1635
      the block. And it must be done if not freeing the block. Because
1636
      we could have waited above, we need to call remove_reader(). Other
1637
      threads could wait for us to release our request on the hash_link.
1638
    */
1639
    remove_reader(block);
1640
1641
    /* If the block is not in eviction and not in free, we can free it. */
1642
    if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1643
                           BLOCK_REASSIGNED)))
1644
    {
1645
      /*
1646
        Free block as we are going to write directly to file.
1647
        Although we have an exlusive lock for the updated key part,
1648
        the control can be yielded by the current thread as we might
1649
        have unfinished readers of other key parts in the block
1650
        buffer. Still we are guaranteed not to have any readers
1651
        of the key part we are writing into until the block is
1652
        removed from the cache as we set the BLOCK_REASSIGNED
1653
        flag (see the code below that handles reading requests).
1654
      */
1655
      free_block(keycache, block);
1656
    }
1657
    else
1658
    {
1659
      /*
1660
        The block will be evicted/freed soon. Don't touch it in any way.
1661
        Unregister the request that we registered above.
1662
      */
1663
      unreg_request(keycache, block, 1);
1664
1665
      /*
1666
        The block is still assigned to the hash_link (the file/pos that
1667
        we are going to write to). Wait until the eviction/free is
1668
        complete. Otherwise the direct write could complete before all
1669
        readers are done with the block. So they could read outdated
1670
        data.
1671
1672
        Since we released our request on the hash_link, it can be reused
1673
        for another file/pos. Hence we cannot just check for
1674
        block->hash_link == hash_link. As long as the resize is
1675
        proceeding the block cannot be reassigned to the same file/pos
1676
        again. So we can terminate the loop when the block is no longer
1677
        assigned to this file/pos.
1678
      */
1679
      do
1680
      {
1681
        wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1682
                      &keycache->cache_lock);
1683
        /*
1684
          If the flush phase failed, the resize could have finished
1685
          while we waited here.
1686
        */
1687
        if (!keycache->in_resize)
1688
          goto restart;
1689
      } while (block->hash_link &&
1690
               (block->hash_link->file == file) &&
1691
               (block->hash_link->diskpos == filepos));
1692
    }
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
1693
    return(0);
1 by brian
clean slate
1694
  }
1695
1696
  if (page_status == PAGE_READ &&
1697
      (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
1698
                        BLOCK_REASSIGNED)))
1699
  {
1700
    /*
1701
      This is a request for a block to be removed from cache. The block
1702
      is assigned to this hash_link and contains valid data, but is
1703
      marked for eviction or to be freed. Possible reasons why it has
1704
      not yet been evicted/freed can be a flush before reassignment
1705
      (BLOCK_IN_SWITCH), readers of the block have not finished yet
1706
      (BLOCK_REASSIGNED), or the evicting thread did not yet awake after
1707
      the block has been selected for it (BLOCK_IN_EVICTION).
1708
1709
       Only reading requests can proceed until the old dirty page is flushed,
1710
       all others are to be suspended, then resubmitted
1711
    */
1712
    if (!wrmode && !(block->status & BLOCK_REASSIGNED))
1713
    {
1714
      /*
1715
        This is a read request and the block not yet reassigned. We can
1716
        register our request and proceed. This unlinks the block from
1717
        the LRU ring and protects it against eviction.
1718
      */
1719
      reg_requests(keycache, block, 1);
1720
    }
1721
    else
1722
    {
1723
      /*
1724
        Either this is a write request for a block that is in eviction
1725
        or in free. We must not use it any more. Instead we must evict
1726
        another block. But we cannot do this before the eviction/free is
1727
        done. Otherwise we would find the same hash_link + block again
1728
        and again.
1729
1730
        Or this is a read request for a block in eviction/free that does
1731
        not require a flush, but waits for readers to finish with the
1732
        block. We do not read this block to let the eviction/free happen
1733
        as soon as possible. Again we must wait so that we don't find
1734
        the same hash_link + block again and again.
1735
      */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1736
      assert(hash_link->requests);
1 by brian
clean slate
1737
      hash_link->requests--;
1738
      wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
1739
      /*
1740
        The block is no longer assigned to this hash_link.
1741
        Get another one.
1742
      */
1743
      goto restart;
1744
    }
1745
  }
1746
  else
1747
  {
1748
    /*
1749
      This is a request for a new block or for a block not to be removed.
1750
      Either
1751
      - block == NULL or
1752
      - block not assigned to this hash_link or
1753
      - block assigned but not yet read from file,
1754
      or
1755
      - block assigned with valid (changed or unchanged) data and
1756
      - it will not be reassigned/freed.
1757
    */
1758
    if (! block)
1759
    {
1760
      /* No block is assigned to the hash_link yet. */
1761
      if (keycache->blocks_unused)
1762
      {
1763
        if (keycache->free_block_list)
1764
        {
1765
          /* There is a block in the free list. */
1766
          block= keycache->free_block_list;
1767
          keycache->free_block_list= block->next_used;
1768
          block->next_used= NULL;
1769
        }
1770
        else
1771
        {
1772
          /* There are some never used blocks, take first of them */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1773
          assert(keycache->blocks_used <
303 by Brian Aker
First pass in removing ulong from MyISAM
1774
                      (uint32_t) keycache->disk_blocks);
1 by brian
clean slate
1775
          block= &keycache->block_root[keycache->blocks_used];
1776
          block->buffer= ADD_TO_PTR(keycache->block_mem,
303 by Brian Aker
First pass in removing ulong from MyISAM
1777
                                    ((uint32_t) keycache->blocks_used*
1 by brian
clean slate
1778
                                     keycache->key_cache_block_size),
481 by Brian Aker
Remove all of uchar.
1779
                                    unsigned char*);
1 by brian
clean slate
1780
          keycache->blocks_used++;
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1781
          assert(!block->next_used);
1 by brian
clean slate
1782
        }
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1783
        assert(!block->prev_used);
1784
        assert(!block->next_changed);
1785
        assert(!block->prev_changed);
1786
        assert(!block->hash_link);
1787
        assert(!block->status);
1788
        assert(!block->requests);
1 by brian
clean slate
1789
        keycache->blocks_unused--;
1790
        block->status= BLOCK_IN_USE;
1791
        block->length= 0;
1792
        block->offset= keycache->key_cache_block_size;
1793
        block->requests= 1;
1794
        block->temperature= BLOCK_COLD;
1795
        block->hits_left= init_hits_left;
1796
        block->last_hit_time= 0;
1797
        block->hash_link= hash_link;
1798
        hash_link->block= block;
1799
        link_to_file_list(keycache, block, file, 0);
1800
        page_status= PAGE_TO_BE_READ;
1801
      }
1802
      else
1803
      {
1804
	/*
1805
          There are no free blocks and no never used blocks, use a block
1806
          from the LRU ring.
1807
        */
1808
1809
        if (! keycache->used_last)
1810
        {
1811
          /*
1812
            The LRU ring is empty. Wait until a new block is added to
1813
            it. Several threads might wait here for the same hash_link,
1814
            all of them must get the same block. While waiting for a
1815
            block, after a block is selected for this hash_link, other
1816
            threads can run first before this one awakes. During this
1817
            time interval other threads find this hash_link pointing to
1818
            the block, which is still assigned to another hash_link. In
1819
            this case the block is not marked BLOCK_IN_SWITCH yet, but
1820
            it is marked BLOCK_IN_EVICTION.
1821
          */
1822
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1823
          internal::st_my_thread_var *thread= my_thread_var;
1 by brian
clean slate
1824
          thread->opt_info= (void *) hash_link;
1825
          link_into_queue(&keycache->waiting_for_block, thread);
1826
          do
1827
          {
1828
            keycache_pthread_cond_wait(&thread->suspend,
1829
                                       &keycache->cache_lock);
1830
          }
1831
          while (thread->next);
1832
          thread->opt_info= NULL;
1833
          /* Assert that block has a request registered. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1834
          assert(hash_link->block->requests);
1 by brian
clean slate
1835
          /* Assert that block is not in LRU ring. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1836
          assert(!hash_link->block->next_used);
1837
          assert(!hash_link->block->prev_used);
1 by brian
clean slate
1838
        }
1839
        /*
1840
          If we waited above, hash_link->block has been assigned by
1841
          link_block(). Otherwise it is still NULL. In the latter case
1842
          we need to grab a block from the LRU ring ourselves.
1843
        */
1844
        block= hash_link->block;
1845
        if (! block)
1846
        {
1847
          /* Select the last block from the LRU ring. */
1848
          block= keycache->used_last->next_used;
1849
          block->hits_left= init_hits_left;
1850
          block->last_hit_time= 0;
1851
          hash_link->block= block;
1852
          /*
1853
            Register a request on the block. This unlinks it from the
1854
            LRU ring and protects it against eviction.
1855
          */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1856
          assert(!block->requests);
1 by brian
clean slate
1857
          reg_requests(keycache, block,1);
1858
          /*
1859
            We do not need to set block->status|= BLOCK_IN_EVICTION here
1860
            because we will set block->status|= BLOCK_IN_SWITCH
1861
            immediately without releasing the lock in between. This does
1862
            also support debugging. When looking at the block, one can
1863
            see if the block has been selected by link_block() after the
1864
            LRU ring was empty, or if it was grabbed directly from the
1865
            LRU ring in this branch.
1866
          */
1867
        }
1868
1869
        /*
1870
          If we had to wait above, there is a small chance that another
1871
          thread grabbed this block for the same file block already. But
1872
          in most cases the first condition is true.
1873
        */
1874
        if (block->hash_link != hash_link &&
1875
	    ! (block->status & BLOCK_IN_SWITCH) )
1876
        {
1877
	  /* this is a primary request for a new page */
1878
          block->status|= BLOCK_IN_SWITCH;
1879
1880
          if (block->status & BLOCK_CHANGED)
1881
          {
1882
	    /* The block contains a dirty page - push it out of the cache */
1883
1884
            if (block->status & BLOCK_IN_FLUSH)
1885
            {
1886
              /*
1887
                The block is marked for flush. If we do not wait here,
1888
                it could happen that we write the block, reassign it to
1889
                another file block, then, before the new owner can read
1890
                the new file block, the flusher writes the cache block
1891
                (which still has the old contents) to the new file block!
1892
              */
1893
              wait_on_queue(&block->wqueue[COND_FOR_SAVED],
1894
                            &keycache->cache_lock);
1895
              /*
1896
                The block is marked BLOCK_IN_SWITCH. It should be left
1897
                alone except for reading. No free, no write.
1898
              */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1899
              assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1900
              assert(!(block->status & (BLOCK_REASSIGNED |
1 by brian
clean slate
1901
                                             BLOCK_CHANGED |
1902
                                             BLOCK_FOR_UPDATE)));
1903
            }
1904
            else
1905
            {
1906
              block->status|= BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE;
1907
              /*
1908
                BLOCK_IN_EVICTION may be true or not. Other flags must
1909
                have a fixed value.
1910
              */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1911
              assert((block->status & ~BLOCK_IN_EVICTION) ==
1 by brian
clean slate
1912
                          (BLOCK_READ | BLOCK_IN_SWITCH |
1913
                           BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
1914
                           BLOCK_CHANGED | BLOCK_IN_USE));
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1915
              assert(block->hash_link);
1 by brian
clean slate
1916
1917
              keycache_pthread_mutex_unlock(&keycache->cache_lock);
1918
              /*
1919
                The call is thread safe because only the current
1920
                thread might change the block->hash_link value
1921
              */
32 by Brian Aker
More cleanup on pread()
1922
              error= (pwrite(block->hash_link->file,
1923
                             block->buffer+block->offset,
1924
                             block->length - block->offset,
1925
                             block->hash_link->diskpos+ block->offset) == 0);
1 by brian
clean slate
1926
              keycache_pthread_mutex_lock(&keycache->cache_lock);
1927
1928
              /* Block status must not have changed. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1929
              assert((block->status & ~BLOCK_IN_EVICTION) ==
1 by brian
clean slate
1930
                          (BLOCK_READ | BLOCK_IN_SWITCH |
1931
                           BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE |
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1932
                           BLOCK_CHANGED | BLOCK_IN_USE));
1 by brian
clean slate
1933
              keycache->global_cache_write++;
1934
            }
1935
          }
1936
1937
          block->status|= BLOCK_REASSIGNED;
1938
          /*
1939
            The block comes from the LRU ring. It must have a hash_link
1940
            assigned.
1941
          */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1942
          assert(block->hash_link);
1 by brian
clean slate
1943
          if (block->hash_link)
1944
          {
1945
            /*
1946
              All pending requests for this page must be resubmitted.
1947
              This must be done before waiting for readers. They could
1948
              wait for the flush to complete. And we must also do it
1949
              after the wait. Flushers might try to free the block while
1950
              we wait. They would wait until the reassignment is
1951
              complete. Also the block status must reflect the correct
1952
              situation: The block is not changed nor in flush any more.
1953
              Note that we must not change the BLOCK_CHANGED flag
1954
              outside of link_to_file_list() so that it is always in the
1955
              correct queue and the *blocks_changed counters are
1956
              correct.
1957
            */
1958
            block->status&= ~(BLOCK_IN_FLUSH | BLOCK_IN_FLUSHWRITE);
1959
            link_to_file_list(keycache, block, block->hash_link->file, 1);
1960
            release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1961
            /*
1962
              The block is still assigned to its old hash_link.
1963
	      Wait until all pending read requests
1964
	      for this page are executed
1965
	      (we could have avoided this waiting, if we had read
1966
	      a page in the cache in a sweep, without yielding control)
1967
            */
1968
            wait_for_readers(keycache, block);
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1969
            assert(block->hash_link && block->hash_link->block == block &&
1 by brian
clean slate
1970
                        block->prev_changed);
1971
            /* The reader must not have been a writer. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1972
            assert(!(block->status & BLOCK_CHANGED));
1 by brian
clean slate
1973
1974
            /* Wake flushers that might have found the block in between. */
1975
            release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
1976
1977
            /* Remove the hash link for the old file block from the hash. */
1978
            unlink_hash(keycache, block->hash_link);
1979
1980
            /*
1981
              For sanity checks link_to_file_list() asserts that block
1982
              and hash_link refer to each other. Hence we need to assign
1983
              the hash_link first, but then we would not know if it was
1984
              linked before. Hence we would not know if to unlink it. So
163 by Brian Aker
Merge Monty's code.
1985
              unlink it here and call link_to_file_list(..., false).
1 by brian
clean slate
1986
            */
1987
            unlink_changed(block);
1988
          }
1989
          block->status= error ? BLOCK_ERROR : BLOCK_IN_USE ;
1990
          block->length= 0;
1991
          block->offset= keycache->key_cache_block_size;
1992
          block->hash_link= hash_link;
1993
          link_to_file_list(keycache, block, file, 0);
1994
          page_status= PAGE_TO_BE_READ;
1995
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
1996
          assert(block->hash_link->block == block);
1997
          assert(hash_link->block->hash_link == hash_link);
1 by brian
clean slate
1998
        }
1999
        else
2000
        {
2001
          /*
2002
            Either (block->hash_link == hash_link),
2003
	    or     (block->status & BLOCK_IN_SWITCH).
2004
2005
            This is for secondary requests for a new file block only.
2006
            Either it is already assigned to the new hash_link meanwhile
2007
            (if we had to wait due to empty LRU), or it is already in
2008
            eviction by another thread. Since this block has been
2009
            grabbed from the LRU ring and attached to this hash_link,
2010
            another thread cannot grab the same block from the LRU ring
2011
            anymore. If the block is in eviction already, it must become
2012
            attached to the same hash_link and as such destined for the
2013
            same file block.
2014
          */
2015
          page_status= (((block->hash_link == hash_link) &&
2016
                         (block->status & BLOCK_READ)) ?
2017
                        PAGE_READ : PAGE_WAIT_TO_BE_READ);
2018
        }
2019
      }
2020
    }
2021
    else
2022
    {
2023
      /*
2024
        Block is not NULL. This hash_link points to a block.
2025
        Either
2026
        - block not assigned to this hash_link (yet) or
2027
        - block assigned but not yet read from file,
2028
        or
2029
        - block assigned with valid (changed or unchanged) data and
2030
        - it will not be reassigned/freed.
2031
2032
        The first condition means hash_link points to a block in
2033
        eviction. This is not necessarily marked by BLOCK_IN_SWITCH yet.
2034
        But then it is marked BLOCK_IN_EVICTION. See the NOTE in
2035
        link_block(). In both cases it is destined for this hash_link
2036
        and its file block address. When this hash_link got its block
2037
        address, the block was removed from the LRU ring and cannot be
2038
        selected for eviction (for another hash_link) again.
2039
2040
        Register a request on the block. This is another protection
2041
        against eviction.
2042
      */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2043
      assert(((block->hash_link != hash_link) &&
1 by brian
clean slate
2044
                   (block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))) ||
2045
                  ((block->hash_link == hash_link) &&
2046
                   !(block->status & BLOCK_READ)) ||
2047
                  ((block->status & BLOCK_READ) &&
2048
                   !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH))));
2049
      reg_requests(keycache, block, 1);
2050
      page_status= (((block->hash_link == hash_link) &&
2051
                     (block->status & BLOCK_READ)) ?
2052
                    PAGE_READ : PAGE_WAIT_TO_BE_READ);
2053
    }
2054
  }
2055
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2056
  assert(page_status != -1);
1 by brian
clean slate
2057
  /* Same assert basically, but be very sure. */
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2058
  assert(block);
1 by brian
clean slate
2059
  /* Assert that block has a request and is not in LRU ring. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2060
  assert(block->requests);
2061
  assert(!block->next_used);
2062
  assert(!block->prev_used);
1 by brian
clean slate
2063
  /* Assert that we return the correct block. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2064
  assert((page_status == PAGE_WAIT_TO_BE_READ) ||
1 by brian
clean slate
2065
              ((block->hash_link->file == file) &&
2066
               (block->hash_link->diskpos == filepos)));
2067
  *page_st=page_status;
2068
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
2069
  return(block);
1 by brian
clean slate
2070
}
2071
2072
2073
/*
2074
  Read into a key cache block buffer from disk.
2075
2076
  SYNOPSIS
2077
2078
    read_block()
2079
      keycache            pointer to a key cache data structure
2080
      block               block to which buffer the data is to be read
2081
      read_length         size of data to be read
2082
      min_length          at least so much data must be read
2083
      primary             <-> the current thread will read the data
2084
2085
  RETURN VALUE
2086
    None
2087
2088
  NOTES.
2089
    The function either reads a page data from file to the block buffer,
2090
    or waits until another thread reads it. What page to read is determined
2091
    by a block parameter - reference to a hash link for this page.
2092
    If an error occurs THE BLOCK_ERROR bit is set in the block status.
2093
    We do not report error when the size of successfully read
2094
    portion is less than read_length, but not less than min_length.
2095
*/
2096
2097
static void read_block(KEY_CACHE *keycache,
482 by Brian Aker
Remove uint.
2098
                       BLOCK_LINK *block, uint32_t read_length,
2099
                       uint32_t min_length, bool primary)
1 by brian
clean slate
2100
{
482 by Brian Aker
Remove uint.
2101
  uint32_t got_length;
1 by brian
clean slate
2102
2103
  /* On entry cache_lock is locked */
2104
2105
  if (primary)
2106
  {
2107
    /*
2108
      This code is executed only by threads that submitted primary
2109
      requests. Until block->status contains BLOCK_READ, all other
2110
      request for the block become secondary requests. For a primary
2111
      request the block must be properly initialized.
2112
    */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2113
    assert(((block->status & ~BLOCK_FOR_UPDATE) == BLOCK_IN_USE));
2114
    assert((block->length == 0));
2115
    assert((block->offset == keycache->key_cache_block_size));
2116
    assert((block->requests > 0));
1 by brian
clean slate
2117
2118
    keycache->global_cache_read++;
2119
    /* Page is not in buffer yet, is to be read from disk */
2120
    keycache_pthread_mutex_unlock(&keycache->cache_lock);
2121
    /*
2122
      Here other threads may step in and register as secondary readers.
2123
      They will register in block->wqueue[COND_FOR_REQUESTED].
2124
    */
32 by Brian Aker
More cleanup on pread()
2125
    got_length= pread(block->hash_link->file, block->buffer, read_length, block->hash_link->diskpos);
1 by brian
clean slate
2126
    keycache_pthread_mutex_lock(&keycache->cache_lock);
2127
    /*
2128
      The block can now have been marked for free (in case of
2129
      FLUSH_RELEASE). Otherwise the state must be unchanged.
2130
    */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2131
    assert(((block->status & ~(BLOCK_REASSIGNED |
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2132
                                    BLOCK_FOR_UPDATE)) == BLOCK_IN_USE));
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2133
    assert((block->length == 0));
2134
    assert((block->offset == keycache->key_cache_block_size));
2135
    assert((block->requests > 0));
1 by brian
clean slate
2136
2137
    if (got_length < min_length)
2138
      block->status|= BLOCK_ERROR;
2139
    else
2140
    {
2141
      block->status|= BLOCK_READ;
2142
      block->length= got_length;
2143
      /*
2144
        Do not set block->offset here. If this block is marked
2145
        BLOCK_CHANGED later, we want to flush only the modified part. So
2146
        only a writer may set block->offset down from
2147
        keycache->key_cache_block_size.
2148
      */
2149
    }
2150
    /* Signal that all pending requests for this page now can be processed */
2151
    release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2152
  }
2153
  else
2154
  {
2155
    /*
2156
      This code is executed only by threads that submitted secondary
2157
      requests. At this point it could happen that the cache block is
2158
      not yet assigned to the hash_link for the requested file block.
2159
      But at awake from the wait this should be the case. Unfortunately
2160
      we cannot assert this here because we do not know the hash_link
2161
      for the requested file block nor the file and position. So we have
2162
      to assert this in the caller.
2163
    */
2164
    wait_on_queue(&block->wqueue[COND_FOR_REQUESTED], &keycache->cache_lock);
2165
  }
2166
}
2167
2168
2169
/*
2170
  Read a block of data from a cached file into a buffer;
2171
2172
  SYNOPSIS
2173
2174
    key_cache_read()
2175
      keycache            pointer to a key cache data structure
2176
      file                handler for the file for the block of data to be read
2177
      filepos             position of the block of data in the file
2178
      level               determines the weight of the data
2179
      buff                buffer to where the data must be placed
2180
      length              length of the buffer
2181
      block_length        length of the block in the key cache buffer
2182
      return_buffer       return pointer to the key cache buffer with the data
2183
2184
  RETURN VALUE
2185
    Returns address from where the data is placed if sucessful, 0 - otherwise.
2186
2187
  NOTES.
2188
    The function ensures that a block of data of size length from file
2189
    positioned at filepos is in the buffers for some key cache blocks.
2190
    Then the function either copies the data into the buffer buff, or,
163 by Brian Aker
Merge Monty's code.
2191
    if return_buffer is true, it just returns the pointer to the key cache
1 by brian
clean slate
2192
    buffer with the data.
2193
    Filepos must be a multiple of 'block_length', but it doesn't
2194
    have to be a multiple of key_cache_block_size;
2195
*/
2196
481 by Brian Aker
Remove all of uchar.
2197
unsigned char *key_cache_read(KEY_CACHE *keycache,
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
2198
                      int file, internal::my_off_t filepos, int level,
482 by Brian Aker
Remove uint.
2199
                      unsigned char *buff, uint32_t length,
779.3.1 by Monty Taylor
More cleanup.
2200
                      uint32_t block_length,
2201
                      int return_buffer)
1 by brian
clean slate
2202
{
779.3.1 by Monty Taylor
More cleanup.
2203
  (void)block_length;
2204
  (void)return_buffer;
281 by Brian Aker
Converted myisam away from my_bool
2205
  bool locked_and_incremented= false;
1 by brian
clean slate
2206
  int error=0;
481 by Brian Aker
Remove all of uchar.
2207
  unsigned char *start= buff;
1 by brian
clean slate
2208
2209
  if (keycache->key_cache_inited)
2210
  {
2211
    /* Key cache is used */
2212
    register BLOCK_LINK *block;
482 by Brian Aker
Remove uint.
2213
    uint32_t read_length;
2214
    uint32_t offset;
2215
    uint32_t status;
1 by brian
clean slate
2216
    int page_st;
2217
2218
    /*
2219
      When the key cache is once initialized, we use the cache_lock to
2220
      reliably distinguish the cases of normal operation, resizing, and
2221
      disabled cache. We always increment and decrement
2222
      'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2223
    */
2224
    keycache_pthread_mutex_lock(&keycache->cache_lock);
2225
    /*
2226
      Cache resizing has two phases: Flushing and re-initializing. In
2227
      the flush phase read requests are allowed to bypass the cache for
2228
      blocks not in the cache. find_key_block() returns NULL in this
2229
      case.
2230
2231
      After the flush phase new I/O requests must wait until the
2232
      re-initialization is done. The re-initialization can be done only
2233
      if no I/O request is in progress. The reason is that
2234
      key_cache_block_size can change. With enabled cache, I/O is done
2235
      in chunks of key_cache_block_size. Every chunk tries to use a
2236
      cache block first. If the block size changes in the middle, a
2237
      block could be missed and old data could be read.
2238
    */
2239
    while (keycache->in_resize && !keycache->resize_in_flush)
2240
      wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2241
    /* Register the I/O for the next resize. */
2242
    inc_counter_for_resize_op(keycache);
163 by Brian Aker
Merge Monty's code.
2243
    locked_and_incremented= true;
1 by brian
clean slate
2244
    /* Requested data may not always be aligned to cache blocks. */
2245
    offset= (uint) (filepos % keycache->key_cache_block_size);
2246
    /* Read data in key_cache_block_size increments */
2247
    do
2248
    {
2249
      /* Cache could be disabled in a later iteration. */
2250
2251
      if (!keycache->can_be_used)
2252
	goto no_key_cache;
2253
      /* Start reading at the beginning of the cache block. */
2254
      filepos-= offset;
2255
      /* Do not read beyond the end of the cache block. */
2256
      read_length= length;
2257
      set_if_smaller(read_length, keycache->key_cache_block_size-offset);
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2258
      assert(read_length > 0);
1 by brian
clean slate
2259
2260
      /* Request the cache block that matches file/pos. */
2261
      keycache->global_cache_r_requests++;
2262
      block=find_key_block(keycache, file, filepos, level, 0, &page_st);
2263
      if (!block)
2264
      {
2265
        /*
2266
          This happens only for requests submitted during key cache
2267
          resize. The block is not in the cache and shall not go in.
2268
          Read directly from file.
2269
        */
2270
        keycache->global_cache_read++;
2271
        keycache_pthread_mutex_unlock(&keycache->cache_lock);
481 by Brian Aker
Remove all of uchar.
2272
        error= (pread(file, (unsigned char*) buff, read_length, filepos + offset) == 0);
1 by brian
clean slate
2273
        keycache_pthread_mutex_lock(&keycache->cache_lock);
2274
        goto next_block;
2275
      }
2276
      if (!(block->status & BLOCK_ERROR))
2277
      {
2278
        if (page_st != PAGE_READ)
2279
        {
2280
          /* The requested page is to be read into the block buffer */
2281
          read_block(keycache, block,
2282
                     keycache->key_cache_block_size, read_length+offset,
281 by Brian Aker
Converted myisam away from my_bool
2283
                     (bool)(page_st == PAGE_TO_BE_READ));
1 by brian
clean slate
2284
          /*
2285
            A secondary request must now have the block assigned to the
2286
            requested file block. It does not hurt to check it for
2287
            primary requests too.
2288
          */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2289
          assert(keycache->can_be_used);
2290
          assert(block->hash_link->file == file);
2291
          assert(block->hash_link->diskpos == filepos);
2292
          assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1 by brian
clean slate
2293
        }
2294
        else if (block->length < read_length + offset)
2295
        {
2296
          /*
2297
            Impossible if nothing goes wrong:
2298
            this could only happen if we are using a file with
2299
            small key blocks and are trying to read outside the file
2300
          */
1241.9.57 by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined.
2301
          errno= -1;
1 by brian
clean slate
2302
          block->status|= BLOCK_ERROR;
2303
        }
2304
      }
2305
2306
      /* block status may have added BLOCK_ERROR in the above 'if'. */
2307
      if (!((status= block->status) & BLOCK_ERROR))
2308
      {
2309
        {
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2310
          assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1 by brian
clean slate
2311
#if !defined(SERIALIZED_READ_FROM_CACHE)
2312
          keycache_pthread_mutex_unlock(&keycache->cache_lock);
2313
#endif
2314
2315
          /* Copy data from the cache buffer */
266.1.22 by Monty Taylor
Removed bmove512.
2316
          memcpy(buff, block->buffer+offset, (size_t) read_length);
1 by brian
clean slate
2317
2318
#if !defined(SERIALIZED_READ_FROM_CACHE)
2319
          keycache_pthread_mutex_lock(&keycache->cache_lock);
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2320
          assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1 by brian
clean slate
2321
#endif
2322
        }
2323
      }
2324
2325
      remove_reader(block);
2326
2327
      /*
2328
         Link the block into the LRU ring if it's the last submitted
2329
         request for the block. This enables eviction for the block.
2330
           */
2331
      unreg_request(keycache, block, 1);
2332
2333
      if (status & BLOCK_ERROR)
2334
      {
2335
        error= 1;
2336
        break;
2337
      }
2338
2339
  next_block:
2340
      buff+= read_length;
2341
      filepos+= read_length+offset;
2342
      offset= 0;
2343
2344
    } while ((length-= read_length));
2345
    goto end;
2346
  }
2347
2348
no_key_cache:
2349
  /* Key cache is not used */
2350
2351
  keycache->global_cache_r_requests++;
2352
  keycache->global_cache_read++;
2353
2354
  if (locked_and_incremented)
2355
    keycache_pthread_mutex_unlock(&keycache->cache_lock);
665.3.1 by Moriyoshi Koizumi
* Fixing "too much" my_pread(,,,,MY_NABP) to pread() conversion.
2356
  if (!pread(file, (unsigned char*) buff, length, filepos))
1 by brian
clean slate
2357
    error= 1;
2358
  if (locked_and_incremented)
2359
    keycache_pthread_mutex_lock(&keycache->cache_lock);
2360
2361
end:
2362
  if (locked_and_incremented)
2363
  {
2364
    dec_counter_for_resize_op(keycache);
2365
    keycache_pthread_mutex_unlock(&keycache->cache_lock);
2366
  }
481 by Brian Aker
Remove all of uchar.
2367
  return(error ? (unsigned char*) 0 : start);
1 by brian
clean slate
2368
}
2369
2370
2371
/*
2372
  Insert a block of file data from a buffer into key cache
2373
2374
  SYNOPSIS
2375
    key_cache_insert()
2376
    keycache            pointer to a key cache data structure
2377
    file                handler for the file to insert data from
2378
    filepos             position of the block of data in the file to insert
2379
    level               determines the weight of the data
2380
    buff                buffer to read data from
2381
    length              length of the data in the buffer
2382
2383
  NOTES
2384
    This is used by MyISAM to move all blocks from a index file to the key
2385
    cache
2386
2387
  RETURN VALUE
2388
    0 if a success, 1 - otherwise.
2389
*/
2390
2391
int key_cache_insert(KEY_CACHE *keycache,
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
2392
                     int file, internal::my_off_t filepos, int level,
482 by Brian Aker
Remove uint.
2393
                     unsigned char *buff, uint32_t length)
1 by brian
clean slate
2394
{
2395
  int error= 0;
2396
2397
  if (keycache->key_cache_inited)
2398
  {
2399
    /* Key cache is used */
2400
    register BLOCK_LINK *block;
482 by Brian Aker
Remove uint.
2401
    uint32_t read_length;
2402
    uint32_t offset;
1 by brian
clean slate
2403
    int page_st;
281 by Brian Aker
Converted myisam away from my_bool
2404
    bool locked_and_incremented= false;
1 by brian
clean slate
2405
2406
    /*
2407
      When the keycache is once initialized, we use the cache_lock to
2408
      reliably distinguish the cases of normal operation, resizing, and
2409
      disabled cache. We always increment and decrement
2410
      'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2411
    */
2412
    keycache_pthread_mutex_lock(&keycache->cache_lock);
2413
    /*
2414
      We do not load index data into a disabled cache nor into an
2415
      ongoing resize.
2416
    */
2417
    if (!keycache->can_be_used || keycache->in_resize)
2418
	goto no_key_cache;
2419
    /* Register the pseudo I/O for the next resize. */
2420
    inc_counter_for_resize_op(keycache);
163 by Brian Aker
Merge Monty's code.
2421
    locked_and_incremented= true;
1 by brian
clean slate
2422
    /* Loaded data may not always be aligned to cache blocks. */
2423
    offset= (uint) (filepos % keycache->key_cache_block_size);
2424
    /* Load data in key_cache_block_size increments. */
2425
    do
2426
    {
2427
      /* Cache could be disabled or resizing in a later iteration. */
2428
      if (!keycache->can_be_used || keycache->in_resize)
2429
	goto no_key_cache;
2430
      /* Start loading at the beginning of the cache block. */
2431
      filepos-= offset;
2432
      /* Do not load beyond the end of the cache block. */
2433
      read_length= length;
2434
      set_if_smaller(read_length, keycache->key_cache_block_size-offset);
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2435
      assert(read_length > 0);
1 by brian
clean slate
2436
2437
      /* The block has been read by the caller already. */
2438
      keycache->global_cache_read++;
2439
      /* Request the cache block that matches file/pos. */
2440
      keycache->global_cache_r_requests++;
2441
      block= find_key_block(keycache, file, filepos, level, 0, &page_st);
2442
      if (!block)
2443
      {
2444
        /*
2445
          This happens only for requests submitted during key cache
2446
          resize. The block is not in the cache and shall not go in.
2447
          Stop loading index data.
2448
        */
2449
        goto no_key_cache;
2450
      }
2451
      if (!(block->status & BLOCK_ERROR))
2452
      {
2453
        if ((page_st == PAGE_WAIT_TO_BE_READ) ||
2454
            ((page_st == PAGE_TO_BE_READ) &&
2455
             (offset || (read_length < keycache->key_cache_block_size))))
2456
        {
2457
          /*
2458
            Either
2459
2460
            this is a secondary request for a block to be read into the
2461
            cache. The block is in eviction. It is not yet assigned to
2462
            the requested file block (It does not point to the right
2463
            hash_link). So we cannot call remove_reader() on the block.
2464
            And we cannot access the hash_link directly here. We need to
2465
            wait until the assignment is complete. read_block() executes
163 by Brian Aker
Merge Monty's code.
2466
            the correct wait when called with primary == false.
1 by brian
clean slate
2467
2468
            Or
2469
2470
            this is a primary request for a block to be read into the
2471
            cache and the supplied data does not fill the whole block.
2472
2473
            This function is called on behalf of a LOAD INDEX INTO CACHE
2474
            statement, which is a read-only task and allows other
2475
            readers. It is possible that a parallel running reader tries
2476
            to access this block. If it needs more data than has been
2477
            supplied here, it would report an error. To be sure that we
2478
            have all data in the block that is available in the file, we
2479
            read the block ourselves.
2480
2481
            Though reading again what the caller did read already is an
2482
            expensive operation, we need to do this for correctness.
2483
          */
2484
          read_block(keycache, block, keycache->key_cache_block_size,
2485
                     read_length + offset, (page_st == PAGE_TO_BE_READ));
2486
          /*
2487
            A secondary request must now have the block assigned to the
2488
            requested file block. It does not hurt to check it for
2489
            primary requests too.
2490
          */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2491
          assert(keycache->can_be_used);
2492
          assert(block->hash_link->file == file);
2493
          assert(block->hash_link->diskpos == filepos);
2494
          assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1 by brian
clean slate
2495
        }
2496
        else if (page_st == PAGE_TO_BE_READ)
2497
        {
2498
          /*
2499
            This is a new block in the cache. If we come here, we have
2500
            data for the whole block.
2501
          */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2502
          assert(block->hash_link->requests);
2503
          assert(block->status & BLOCK_IN_USE);
2504
          assert((page_st == PAGE_TO_BE_READ) ||
1 by brian
clean slate
2505
                      (block->status & BLOCK_READ));
2506
2507
#if !defined(SERIALIZED_READ_FROM_CACHE)
2508
          keycache_pthread_mutex_unlock(&keycache->cache_lock);
2509
          /*
2510
            Here other threads may step in and register as secondary readers.
2511
            They will register in block->wqueue[COND_FOR_REQUESTED].
2512
          */
2513
#endif
2514
2515
          /* Copy data from buff */
266.1.22 by Monty Taylor
Removed bmove512.
2516
          memcpy(block->buffer+offset, buff, (size_t) read_length);
1 by brian
clean slate
2517
2518
#if !defined(SERIALIZED_READ_FROM_CACHE)
2519
          keycache_pthread_mutex_lock(&keycache->cache_lock);
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2520
          assert(block->status & BLOCK_IN_USE);
2521
          assert((page_st == PAGE_TO_BE_READ) ||
1 by brian
clean slate
2522
                      (block->status & BLOCK_READ));
2523
#endif
2524
          /*
2525
            After the data is in the buffer, we can declare the block
2526
            valid. Now other threads do not need to register as
2527
            secondary readers any more. They can immediately access the
2528
            block.
2529
          */
2530
          block->status|= BLOCK_READ;
2531
          block->length= read_length+offset;
2532
          /*
2533
            Do not set block->offset here. If this block is marked
2534
            BLOCK_CHANGED later, we want to flush only the modified part. So
2535
            only a writer may set block->offset down from
2536
            keycache->key_cache_block_size.
2537
          */
2538
          /* Signal all pending requests. */
2539
          release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2540
        }
2541
        else
2542
        {
2543
          /*
2544
            page_st == PAGE_READ. The block is in the buffer. All data
2545
            must already be present. Blocks are always read with all
2546
            data available on file. Assert that the block does not have
2547
            less contents than the preloader supplies. If the caller has
2548
            data beyond block->length, it means that a file write has
2549
            been done while this block was in cache and not extended
2550
            with the new data. If the condition is met, we can simply
2551
            ignore the block.
2552
          */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2553
          assert((page_st == PAGE_READ) &&
1 by brian
clean slate
2554
                      (read_length + offset <= block->length));
2555
        }
2556
2557
        /*
2558
          A secondary request must now have the block assigned to the
2559
          requested file block. It does not hurt to check it for primary
2560
          requests too.
2561
        */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2562
        assert(block->hash_link->file == file);
2563
        assert(block->hash_link->diskpos == filepos);
2564
        assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1 by brian
clean slate
2565
      } /* end of if (!(block->status & BLOCK_ERROR)) */
2566
2567
2568
      remove_reader(block);
2569
2570
      /*
2571
         Link the block into the LRU ring if it's the last submitted
2572
         request for the block. This enables eviction for the block.
2573
      */
2574
      unreg_request(keycache, block, 1);
2575
2576
      error= (block->status & BLOCK_ERROR);
2577
2578
      if (error)
2579
        break;
2580
2581
      buff+= read_length;
2582
      filepos+= read_length+offset;
2583
      offset= 0;
2584
2585
    } while ((length-= read_length));
2586
2587
  no_key_cache:
2588
    if (locked_and_incremented)
2589
      dec_counter_for_resize_op(keycache);
2590
    keycache_pthread_mutex_unlock(&keycache->cache_lock);
2591
  }
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
2592
  return(error);
1 by brian
clean slate
2593
}
2594
2595
2596
/*
2597
  Write a buffer into a cached file.
2598
2599
  SYNOPSIS
2600
2601
    key_cache_write()
2602
      keycache            pointer to a key cache data structure
2603
      file                handler for the file to write data to
2604
      filepos             position in the file to write data to
2605
      level               determines the weight of the data
2606
      buff                buffer with the data
2607
      length              length of the buffer
2608
      dont_write          if is 0 then all dirty pages involved in writing
2609
                          should have been flushed from key cache
2610
2611
  RETURN VALUE
2612
    0 if a success, 1 - otherwise.
2613
2614
  NOTES.
2615
    The function copies the data of size length from buff into buffers
2616
    for key cache blocks that are  assigned to contain the portion of
2617
    the file starting with position filepos.
163 by Brian Aker
Merge Monty's code.
2618
    It ensures that this data is flushed to the file if dont_write is false.
1 by brian
clean slate
2619
    Filepos must be a multiple of 'block_length', but it doesn't
2620
    have to be a multiple of key_cache_block_size;
2621
163 by Brian Aker
Merge Monty's code.
2622
    dont_write is always true in the server (info->lock_type is never F_UNLCK).
1 by brian
clean slate
2623
*/
2624
2625
int key_cache_write(KEY_CACHE *keycache,
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
2626
                    int file, internal::my_off_t filepos, int level,
482 by Brian Aker
Remove uint.
2627
                    unsigned char *buff, uint32_t length,
779.3.1 by Monty Taylor
More cleanup.
2628
                    uint32_t block_length,
1 by brian
clean slate
2629
                    int dont_write)
2630
{
779.3.1 by Monty Taylor
More cleanup.
2631
  (void)block_length;
281 by Brian Aker
Converted myisam away from my_bool
2632
  bool locked_and_incremented= false;
1 by brian
clean slate
2633
  int error=0;
2634
2635
  if (!dont_write)
2636
  {
2637
    /* Not used in the server. */
2638
    /* Force writing from buff into disk. */
2639
    keycache->global_cache_w_requests++;
2640
    keycache->global_cache_write++;
32 by Brian Aker
More cleanup on pread()
2641
    if (pwrite(file, buff, length, filepos) == 0)
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
2642
      return(1);
1 by brian
clean slate
2643
  }
2644
2645
  if (keycache->key_cache_inited)
2646
  {
2647
    /* Key cache is used */
2648
    register BLOCK_LINK *block;
482 by Brian Aker
Remove uint.
2649
    uint32_t read_length;
2650
    uint32_t offset;
1 by brian
clean slate
2651
    int page_st;
2652
2653
    /*
2654
      When the key cache is once initialized, we use the cache_lock to
2655
      reliably distinguish the cases of normal operation, resizing, and
2656
      disabled cache. We always increment and decrement
2657
      'cnt_for_resize_op' so that a resizer can wait for pending I/O.
2658
    */
2659
    keycache_pthread_mutex_lock(&keycache->cache_lock);
2660
    /*
2661
      Cache resizing has two phases: Flushing and re-initializing. In
2662
      the flush phase write requests can modify dirty blocks that are
2663
      not yet in flush. Otherwise they are allowed to bypass the cache.
2664
      find_key_block() returns NULL in both cases (clean blocks and
2665
      non-cached blocks).
2666
2667
      After the flush phase new I/O requests must wait until the
2668
      re-initialization is done. The re-initialization can be done only
2669
      if no I/O request is in progress. The reason is that
2670
      key_cache_block_size can change. With enabled cache I/O is done in
2671
      chunks of key_cache_block_size. Every chunk tries to use a cache
2672
      block first. If the block size changes in the middle, a block
2673
      could be missed and data could be written below a cached block.
2674
    */
2675
    while (keycache->in_resize && !keycache->resize_in_flush)
2676
      wait_on_queue(&keycache->resize_queue, &keycache->cache_lock);
2677
    /* Register the I/O for the next resize. */
2678
    inc_counter_for_resize_op(keycache);
163 by Brian Aker
Merge Monty's code.
2679
    locked_and_incremented= true;
1 by brian
clean slate
2680
    /* Requested data may not always be aligned to cache blocks. */
2681
    offset= (uint) (filepos % keycache->key_cache_block_size);
2682
    /* Write data in key_cache_block_size increments. */
2683
    do
2684
    {
2685
      /* Cache could be disabled in a later iteration. */
2686
      if (!keycache->can_be_used)
2687
	goto no_key_cache;
2688
      /* Start writing at the beginning of the cache block. */
2689
      filepos-= offset;
2690
      /* Do not write beyond the end of the cache block. */
2691
      read_length= length;
2692
      set_if_smaller(read_length, keycache->key_cache_block_size-offset);
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2693
      assert(read_length > 0);
1 by brian
clean slate
2694
2695
      /* Request the cache block that matches file/pos. */
2696
      keycache->global_cache_w_requests++;
2697
      block= find_key_block(keycache, file, filepos, level, 1, &page_st);
2698
      if (!block)
2699
      {
2700
        /*
2701
          This happens only for requests submitted during key cache
2702
          resize. The block is not in the cache and shall not go in.
2703
          Write directly to file.
2704
        */
2705
        if (dont_write)
2706
        {
2707
          /* Used in the server. */
2708
          keycache->global_cache_write++;
2709
          keycache_pthread_mutex_unlock(&keycache->cache_lock);
481 by Brian Aker
Remove all of uchar.
2710
          if (pwrite(file, (unsigned char*) buff, read_length, filepos + offset) == 0)
1 by brian
clean slate
2711
            error=1;
2712
          keycache_pthread_mutex_lock(&keycache->cache_lock);
2713
        }
2714
        goto next_block;
2715
      }
2716
      /*
2717
        Prevent block from flushing and from being selected for to be
2718
        freed. This must be set when we release the cache_lock.
2719
        However, we must not set the status of the block before it is
2720
        assigned to this file/pos.
2721
      */
2722
      if (page_st != PAGE_WAIT_TO_BE_READ)
2723
        block->status|= BLOCK_FOR_UPDATE;
2724
      /*
2725
        We must read the file block first if it is not yet in the cache
2726
        and we do not replace all of its contents.
2727
2728
        In cases where the cache block is big enough to contain (parts
2729
        of) index blocks of different indexes, our request can be
2730
        secondary (PAGE_WAIT_TO_BE_READ). In this case another thread is
2731
        reading the file block. If the read completes after us, it
2732
        overwrites our new contents with the old contents. So we have to
2733
        wait for the other thread to complete the read of this block.
2734
        read_block() takes care for the wait.
2735
      */
2736
      if (!(block->status & BLOCK_ERROR) &&
2737
          ((page_st == PAGE_TO_BE_READ &&
2738
            (offset || read_length < keycache->key_cache_block_size)) ||
2739
           (page_st == PAGE_WAIT_TO_BE_READ)))
2740
      {
2741
        read_block(keycache, block,
2742
                   offset + read_length >= keycache->key_cache_block_size?
2743
                   offset : keycache->key_cache_block_size,
2744
                   offset, (page_st == PAGE_TO_BE_READ));
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2745
        assert(keycache->can_be_used);
2746
        assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1 by brian
clean slate
2747
        /*
2748
          Prevent block from flushing and from being selected for to be
2749
          freed. This must be set when we release the cache_lock.
2750
          Here we set it in case we could not set it above.
2751
        */
2752
        block->status|= BLOCK_FOR_UPDATE;
2753
      }
2754
      /*
2755
        The block should always be assigned to the requested file block
2756
        here. It need not be BLOCK_READ when overwriting the whole block.
2757
      */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2758
      assert(block->hash_link->file == file);
2759
      assert(block->hash_link->diskpos == filepos);
2760
      assert(block->status & BLOCK_IN_USE);
2761
      assert((page_st == PAGE_TO_BE_READ) || (block->status & BLOCK_READ));
1 by brian
clean slate
2762
      /*
2763
        The block to be written must not be marked BLOCK_REASSIGNED.
2764
        Otherwise it could be freed in dirty state or reused without
2765
        another flush during eviction. It must also not be in flush.
2766
        Otherwise the old contens may have been flushed already and
2767
        the flusher could clear BLOCK_CHANGED without flushing the
2768
        new changes again.
2769
      */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2770
      assert(!(block->status & BLOCK_REASSIGNED));
1 by brian
clean slate
2771
2772
      while (block->status & BLOCK_IN_FLUSHWRITE)
2773
      {
2774
        /*
2775
          Another thread is flushing the block. It was dirty already.
2776
          Wait until the block is flushed to file. Otherwise we could
2777
          modify the buffer contents just while it is written to file.
2778
          An unpredictable file block contents would be the result.
2779
          While we wait, several things can happen to the block,
2780
          including another flush. But the block cannot be reassigned to
2781
          another hash_link until we release our request on it.
2782
        */
2783
        wait_on_queue(&block->wqueue[COND_FOR_SAVED], &keycache->cache_lock);
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2784
        assert(keycache->can_be_used);
2785
        assert(block->status & (BLOCK_READ | BLOCK_IN_USE));
1 by brian
clean slate
2786
        /* Still must not be marked for free. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2787
        assert(!(block->status & BLOCK_REASSIGNED));
2788
        assert(block->hash_link && (block->hash_link->block == block));
1 by brian
clean slate
2789
      }
2790
2791
      /*
2792
        We could perhaps release the cache_lock during access of the
2793
        data like in the other functions. Locks outside of the key cache
2794
        assure that readers and a writer do not access the same range of
2795
        data. Parallel accesses should happen only if the cache block
2796
        contains multiple index block(fragment)s. So different parts of
2797
        the buffer would be read/written. An attempt to flush during
2798
        memcpy() is prevented with BLOCK_FOR_UPDATE.
2799
      */
2800
      if (!(block->status & BLOCK_ERROR))
2801
      {
2802
#if !defined(SERIALIZED_READ_FROM_CACHE)
2803
        keycache_pthread_mutex_unlock(&keycache->cache_lock);
2804
#endif
266.1.22 by Monty Taylor
Removed bmove512.
2805
        memcpy(block->buffer+offset, buff, (size_t) read_length);
1 by brian
clean slate
2806
2807
#if !defined(SERIALIZED_READ_FROM_CACHE)
2808
        keycache_pthread_mutex_lock(&keycache->cache_lock);
2809
#endif
2810
      }
2811
2812
      if (!dont_write)
2813
      {
2814
	/* Not used in the server. buff has been written to disk at start. */
2815
        if ((block->status & BLOCK_CHANGED) &&
2816
            (!offset && read_length >= keycache->key_cache_block_size))
2817
             link_to_file_list(keycache, block, block->hash_link->file, 1);
2818
      }
2819
      else if (! (block->status & BLOCK_CHANGED))
2820
        link_to_changed_list(keycache, block);
2821
      block->status|=BLOCK_READ;
2822
      /*
2823
        Allow block to be selected for to be freed. Since it is marked
2824
        BLOCK_CHANGED too, it won't be selected for to be freed without
2825
        a flush.
2826
      */
2827
      block->status&= ~BLOCK_FOR_UPDATE;
2828
      set_if_smaller(block->offset, offset);
2829
      set_if_bigger(block->length, read_length+offset);
2830
2831
      /* Threads may be waiting for the changes to be complete. */
2832
      release_whole_queue(&block->wqueue[COND_FOR_REQUESTED]);
2833
2834
      /*
2835
        If only a part of the cache block is to be replaced, and the
2836
        rest has been read from file, then the cache lock has been
2837
        released for I/O and it could be possible that another thread
2838
        wants to evict or free the block and waits for it to be
2839
        released. So we must not just decrement hash_link->requests, but
2840
        also wake a waiting thread.
2841
      */
2842
      remove_reader(block);
2843
2844
      /*
2845
         Link the block into the LRU ring if it's the last submitted
2846
         request for the block. This enables eviction for the block.
2847
      */
2848
      unreg_request(keycache, block, 1);
2849
2850
      if (block->status & BLOCK_ERROR)
2851
      {
2852
        error= 1;
2853
        break;
2854
      }
2855
2856
    next_block:
2857
      buff+= read_length;
2858
      filepos+= read_length+offset;
2859
      offset= 0;
2860
2861
    } while ((length-= read_length));
2862
    goto end;
2863
  }
2864
2865
no_key_cache:
2866
  /* Key cache is not used */
2867
  if (dont_write)
2868
  {
2869
    /* Used in the server. */
2870
    keycache->global_cache_w_requests++;
2871
    keycache->global_cache_write++;
2872
    if (locked_and_incremented)
2873
      keycache_pthread_mutex_unlock(&keycache->cache_lock);
481 by Brian Aker
Remove all of uchar.
2874
    if (pwrite(file, (unsigned char*) buff, length, filepos) == 0)
1 by brian
clean slate
2875
      error=1;
2876
    if (locked_and_incremented)
2877
      keycache_pthread_mutex_lock(&keycache->cache_lock);
2878
  }
2879
2880
end:
2881
  if (locked_and_incremented)
2882
  {
2883
    dec_counter_for_resize_op(keycache);
2884
    keycache_pthread_mutex_unlock(&keycache->cache_lock);
2885
  }
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
2886
  return(error);
1 by brian
clean slate
2887
}
2888
2889
2890
/*
2891
  Free block.
2892
2893
  SYNOPSIS
2894
    free_block()
2895
      keycache          Pointer to a key cache data structure
2896
      block             Pointer to the block to free
2897
2898
  DESCRIPTION
2899
    Remove reference to block from hash table.
2900
    Remove block from the chain of clean blocks.
2901
    Add block to the free list.
2902
2903
  NOTE
2904
    Block must not be free (status == 0).
2905
    Block must not be in free_block_list.
2906
    Block must not be in the LRU ring.
2907
    Block must not be in eviction (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH).
2908
    Block must not be in free (BLOCK_REASSIGNED).
2909
    Block must not be in flush (BLOCK_IN_FLUSH).
2910
    Block must not be dirty (BLOCK_CHANGED).
2911
    Block must not be in changed_blocks (dirty) hash.
2912
    Block must be in file_blocks (clean) hash.
2913
    Block must refer to a hash_link.
2914
    Block must have a request registered on it.
2915
*/
2916
2917
static void free_block(KEY_CACHE *keycache, BLOCK_LINK *block)
2918
{
2919
  /*
2920
    Assert that the block is not free already. And that it is in a clean
2921
    state. Note that the block might just be assigned to a hash_link and
2922
    not yet read (BLOCK_READ may not be set here). In this case a reader
2923
    is registered in the hash_link and free_block() will wait for it
2924
    below.
2925
  */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2926
  assert((block->status & BLOCK_IN_USE) &&
1 by brian
clean slate
2927
              !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2928
                                 BLOCK_REASSIGNED | BLOCK_IN_FLUSH |
2929
                                 BLOCK_CHANGED | BLOCK_FOR_UPDATE)));
2930
  /* Assert that the block is in a file_blocks chain. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2931
  assert(block->prev_changed && *block->prev_changed == block);
1 by brian
clean slate
2932
  /* Assert that the block is not in the LRU ring. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2933
  assert(!block->next_used && !block->prev_used);
1 by brian
clean slate
2934
  /*
2935
    IMHO the below condition (if()) makes no sense. I can't see how it
2936
    could be possible that free_block() is entered with a NULL hash_link
2937
    pointer. The only place where it can become NULL is in free_block()
2938
    (or before its first use ever, but for those blocks free_block() is
2939
    not called). I don't remove the conditional as it cannot harm, but
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2940
    place an assert to confirm my hypothesis. Eventually the
1 by brian
clean slate
2941
    condition (if()) can be removed.
2942
  */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2943
  assert(block->hash_link && block->hash_link->block == block);
1 by brian
clean slate
2944
  if (block->hash_link)
2945
  {
2946
    /*
2947
      While waiting for readers to finish, new readers might request the
2948
      block. But since we set block->status|= BLOCK_REASSIGNED, they
2949
      will wait on block->wqueue[COND_FOR_SAVED]. They must be signalled
2950
      later.
2951
    */
2952
    block->status|= BLOCK_REASSIGNED;
2953
    wait_for_readers(keycache, block);
2954
    /*
2955
      The block must not have been freed by another thread. Repeat some
2956
      checks. An additional requirement is that it must be read now
2957
      (BLOCK_READ).
2958
    */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2959
    assert(block->hash_link && block->hash_link->block == block);
2960
    assert((block->status & (BLOCK_READ | BLOCK_IN_USE |
1 by brian
clean slate
2961
                                  BLOCK_REASSIGNED)) &&
2962
                !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
2963
                                   BLOCK_IN_FLUSH | BLOCK_CHANGED |
2964
                                   BLOCK_FOR_UPDATE)));
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2965
    assert(block->prev_changed && *block->prev_changed == block);
2966
    assert(!block->prev_used);
1 by brian
clean slate
2967
    /*
2968
      Unset BLOCK_REASSIGNED again. If we hand the block to an evicting
2969
      thread (through unreg_request() below), other threads must not see
2970
      this flag. They could become confused.
2971
    */
2972
    block->status&= ~BLOCK_REASSIGNED;
2973
    /*
2974
      Do not release the hash_link until the block is off all lists.
2975
      At least not if we hand it over for eviction in unreg_request().
2976
    */
2977
  }
2978
2979
  /*
2980
    Unregister the block request and link the block into the LRU ring.
2981
    This enables eviction for the block. If the LRU ring was empty and
2982
    threads are waiting for a block, then the block wil be handed over
2983
    for eviction immediately. Otherwise we will unlink it from the LRU
2984
    ring again, without releasing the lock in between. So decrementing
2985
    the request counter and updating statistics are the only relevant
2986
    operation in this case. Assert that there are no other requests
2987
    registered.
2988
  */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
2989
  assert(block->requests == 1);
1 by brian
clean slate
2990
  unreg_request(keycache, block, 0);
2991
  /*
2992
    Note that even without releasing the cache lock it is possible that
2993
    the block is immediately selected for eviction by link_block() and
2994
    thus not added to the LRU ring. In this case we must not touch the
2995
    block any more.
2996
  */
2997
  if (block->status & BLOCK_IN_EVICTION)
2998
    return;
2999
3000
  /* Here the block must be in the LRU ring. Unlink it again. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3001
  assert(block->next_used && block->prev_used &&
1 by brian
clean slate
3002
              *block->prev_used == block);
3003
  unlink_block(keycache, block);
3004
  if (block->temperature == BLOCK_WARM)
3005
    keycache->warm_blocks--;
3006
  block->temperature= BLOCK_COLD;
3007
3008
  /* Remove from file_blocks hash. */
3009
  unlink_changed(block);
3010
3011
  /* Remove reference to block from hash table. */
3012
  unlink_hash(keycache, block->hash_link);
3013
  block->hash_link= NULL;
3014
3015
  block->status= 0;
3016
  block->length= 0;
3017
  block->offset= keycache->key_cache_block_size;
3018
3019
  /* Enforced by unlink_changed(), but just to be sure. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3020
  assert(!block->next_changed && !block->prev_changed);
1 by brian
clean slate
3021
  /* Enforced by unlink_block(): not in LRU ring nor in free_block_list. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3022
  assert(!block->next_used && !block->prev_used);
1 by brian
clean slate
3023
  /* Insert the free block in the free list. */
3024
  block->next_used= keycache->free_block_list;
3025
  keycache->free_block_list= block;
3026
  /* Keep track of the number of currently unused blocks. */
3027
  keycache->blocks_unused++;
3028
3029
  /* All pending requests for this page must be resubmitted. */
3030
  release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3031
}
3032
3033
3034
static int cmp_sec_link(BLOCK_LINK **a, BLOCK_LINK **b)
3035
{
3036
  return (((*a)->hash_link->diskpos < (*b)->hash_link->diskpos) ? -1 :
3037
      ((*a)->hash_link->diskpos > (*b)->hash_link->diskpos) ? 1 : 0);
3038
}
3039
3040
3041
/*
3042
  Flush a portion of changed blocks to disk,
3043
  free used blocks if requested
3044
*/
3045
3046
static int flush_cached_blocks(KEY_CACHE *keycache,
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
3047
                               int file, BLOCK_LINK **cache,
1 by brian
clean slate
3048
                               BLOCK_LINK **end,
3049
                               enum flush_type type)
3050
{
3051
  int error;
3052
  int last_errno= 0;
482 by Brian Aker
Remove uint.
3053
  uint32_t count= (uint) (end-cache);
1 by brian
clean slate
3054
3055
  /* Don't lock the cache during the flush */
3056
  keycache_pthread_mutex_unlock(&keycache->cache_lock);
3057
  /*
3058
     As all blocks referred in 'cache' are marked by BLOCK_IN_FLUSH
3059
     we are guarunteed no thread will change them
3060
  */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
3061
  internal::my_qsort((unsigned char*) cache, count, sizeof(*cache), (qsort_cmp) cmp_sec_link);
1 by brian
clean slate
3062
3063
  keycache_pthread_mutex_lock(&keycache->cache_lock);
3064
  /*
3065
    Note: Do not break the loop. We have registered a request on every
3066
    block in 'cache'. These must be unregistered by free_block() or
3067
    unreg_request().
3068
  */
3069
  for ( ; cache != end ; cache++)
3070
  {
3071
    BLOCK_LINK *block= *cache;
3072
    /*
3073
      If the block contents is going to be changed, we abandon the flush
3074
      for this block. flush_key_blocks_int() will restart its search and
3075
      handle the block properly.
3076
    */
3077
    if (!(block->status & BLOCK_FOR_UPDATE))
3078
    {
3079
      /* Blocks coming here must have a certain status. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3080
      assert(block->hash_link);
3081
      assert(block->hash_link->block == block);
3082
      assert(block->hash_link->file == file);
3083
      assert((block->status & ~BLOCK_IN_EVICTION) ==
1 by brian
clean slate
3084
                  (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3085
      block->status|= BLOCK_IN_FLUSHWRITE;
3086
      keycache_pthread_mutex_unlock(&keycache->cache_lock);
32 by Brian Aker
More cleanup on pread()
3087
      error= (pwrite(file,
3088
                     block->buffer+block->offset,
3089
                     block->length - block->offset,
3090
                     block->hash_link->diskpos+ block->offset) == 0);
1 by brian
clean slate
3091
      keycache_pthread_mutex_lock(&keycache->cache_lock);
3092
      keycache->global_cache_write++;
3093
      if (error)
3094
      {
3095
        block->status|= BLOCK_ERROR;
3096
        if (!last_errno)
3097
          last_errno= errno ? errno : -1;
3098
      }
3099
      block->status&= ~BLOCK_IN_FLUSHWRITE;
3100
      /* Block must not have changed status except BLOCK_FOR_UPDATE. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3101
      assert(block->hash_link);
3102
      assert(block->hash_link->block == block);
3103
      assert(block->hash_link->file == file);
3104
      assert((block->status & ~(BLOCK_FOR_UPDATE | BLOCK_IN_EVICTION)) ==
1 by brian
clean slate
3105
                  (BLOCK_READ | BLOCK_IN_FLUSH | BLOCK_CHANGED | BLOCK_IN_USE));
3106
      /*
3107
        Set correct status and link in right queue for free or later use.
3108
        free_block() must not see BLOCK_CHANGED and it may need to wait
3109
        for readers of the block. These should not see the block in the
3110
        wrong hash. If not freeing the block, we need to have it in the
3111
        right queue anyway.
3112
      */
3113
      link_to_file_list(keycache, block, file, 1);
3114
3115
    }
3116
    block->status&= ~BLOCK_IN_FLUSH;
3117
    /*
3118
      Let to proceed for possible waiting requests to write to the block page.
3119
      It might happen only during an operation to resize the key cache.
3120
    */
3121
    release_whole_queue(&block->wqueue[COND_FOR_SAVED]);
3122
    /* type will never be FLUSH_IGNORE_CHANGED here */
3123
    if (!(type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE) &&
3124
        !(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3125
                           BLOCK_FOR_UPDATE)))
3126
    {
3127
      /*
3128
        Note that a request has been registered against the block in
3129
        flush_key_blocks_int().
3130
      */
3131
      free_block(keycache, block);
3132
    }
3133
    else
3134
    {
3135
      /*
3136
        Link the block into the LRU ring if it's the last submitted
3137
        request for the block. This enables eviction for the block.
3138
        Note that a request has been registered against the block in
3139
        flush_key_blocks_int().
3140
      */
3141
      unreg_request(keycache, block, 1);
3142
    }
3143
3144
  } /* end of for ( ; cache != end ; cache++) */
3145
  return last_errno;
3146
}
3147
3148
3149
/*
3150
  flush all key blocks for a file to disk, but don't do any mutex locks.
3151
3152
  SYNOPSIS
3153
    flush_key_blocks_int()
3154
      keycache            pointer to a key cache data structure
3155
      file                handler for the file to flush to
3156
      flush_type          type of the flush
3157
3158
  NOTES
3159
    This function doesn't do any mutex locks because it needs to be called both
3160
    from flush_key_blocks and flush_all_key_blocks (the later one does the
3161
    mutex lock in the resize_key_cache() function).
3162
3163
    We do only care about changed blocks that exist when the function is
3164
    entered. We do not guarantee that all changed blocks of the file are
3165
    flushed if more blocks change while this function is running.
3166
3167
  RETURN
3168
    0   ok
3169
    1  error
3170
*/
3171
3172
static int flush_key_blocks_int(KEY_CACHE *keycache,
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
3173
				int file, enum flush_type type)
1 by brian
clean slate
3174
{
3175
  BLOCK_LINK *cache_buff[FLUSH_CACHE],**cache;
3176
  int last_errno= 0;
3177
  int last_errcnt= 0;
3178
3179
  cache= cache_buff;
3180
  if (keycache->disk_blocks > 0 &&
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
3181
      (!internal::my_disable_flush_key_blocks || type != FLUSH_KEEP))
1 by brian
clean slate
3182
  {
3183
    /* Key cache exists and flush is not disabled */
3184
    int error= 0;
482 by Brian Aker
Remove uint.
3185
    uint32_t count= FLUSH_CACHE;
1 by brian
clean slate
3186
    BLOCK_LINK **pos,**end;
3187
    BLOCK_LINK *first_in_switch= NULL;
3188
    BLOCK_LINK *last_in_flush;
3189
    BLOCK_LINK *last_for_update;
779.3.10 by Monty Taylor
Turned on -Wshadow.
3190
    BLOCK_LINK *last_in_switch;
1 by brian
clean slate
3191
    BLOCK_LINK *block, *next;
3192
3193
    if (type != FLUSH_IGNORE_CHANGED)
3194
    {
3195
      /*
3196
         Count how many key blocks we have to cache to be able
3197
         to flush all dirty pages with minimum seek moves
3198
      */
3199
      count= 0;
3200
      for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3201
           block ;
3202
           block= block->next_changed)
3203
      {
3204
        if ((block->hash_link->file == file) &&
3205
            !(block->status & BLOCK_IN_FLUSH))
3206
        {
3207
          count++;
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3208
          assert(count<= keycache->blocks_used);
1 by brian
clean slate
3209
        }
3210
      }
3211
      /*
3212
        Allocate a new buffer only if its bigger than the one we have.
3213
        Assure that we always have some entries for the case that new
3214
        changed blocks appear while we need to wait for something.
3215
      */
3216
      if ((count > FLUSH_CACHE) &&
656.1.25 by Monty Taylor
Removed my_malloc stuff from storage/
3217
          !(cache= (BLOCK_LINK**) malloc(sizeof(BLOCK_LINK*)*count)))
1 by brian
clean slate
3218
        cache= cache_buff;
3219
      /*
3220
        After a restart there could be more changed blocks than now.
3221
        So we should not let count become smaller than the fixed buffer.
3222
      */
3223
      if (cache == cache_buff)
3224
        count= FLUSH_CACHE;
3225
    }
3226
3227
    /* Retrieve the blocks and write them to a buffer to be flushed */
3228
restart:
3229
    last_in_flush= NULL;
3230
    last_for_update= NULL;
3231
    end= (pos= cache)+count;
3232
    for (block= keycache->changed_blocks[FILE_HASH(file)] ;
3233
         block ;
3234
         block= next)
3235
    {
3236
      next= block->next_changed;
3237
      if (block->hash_link->file == file)
3238
      {
3239
        if (!(block->status & (BLOCK_IN_FLUSH | BLOCK_FOR_UPDATE)))
3240
        {
3241
          /*
3242
            Note: The special handling of BLOCK_IN_SWITCH is obsolete
3243
            since we set BLOCK_IN_FLUSH if the eviction includes a
3244
            flush. It can be removed in a later version.
3245
          */
3246
          if (!(block->status & BLOCK_IN_SWITCH))
3247
          {
3248
            /*
3249
              We care only for the blocks for which flushing was not
3250
              initiated by another thread and which are not in eviction.
3251
              Registering a request on the block unlinks it from the LRU
3252
              ring and protects against eviction.
3253
            */
3254
            reg_requests(keycache, block, 1);
3255
            if (type != FLUSH_IGNORE_CHANGED)
3256
            {
3257
              /* It's not a temporary file */
3258
              if (pos == end)
3259
              {
3260
                /*
3261
                  This should happen relatively seldom. Remove the
3262
                  request because we won't do anything with the block
3263
                  but restart and pick it again in the next iteration.
3264
                */
3265
                unreg_request(keycache, block, 0);
3266
                /*
3267
                  This happens only if there is not enough
3268
                  memory for the big block
3269
                */
3270
                if ((error= flush_cached_blocks(keycache, file, cache,
3271
                                                end,type)))
3272
                {
3273
                  /* Do not loop infinitely trying to flush in vain. */
3274
                  if ((last_errno == error) && (++last_errcnt > 5))
3275
                    goto err;
3276
                  last_errno= error;
3277
                }
3278
                /*
3279
                  Restart the scan as some other thread might have changed
3280
                  the changed blocks chain: the blocks that were in switch
3281
                  state before the flush started have to be excluded
3282
                */
3283
                goto restart;
3284
              }
3285
              /*
3286
                Mark the block with BLOCK_IN_FLUSH in order not to let
3287
                other threads to use it for new pages and interfere with
3288
                our sequence of flushing dirty file pages. We must not
3289
                set this flag before actually putting the block on the
3290
                write burst array called 'cache'.
3291
              */
3292
              block->status|= BLOCK_IN_FLUSH;
3293
              /* Add block to the array for a write burst. */
3294
              *pos++= block;
3295
            }
3296
            else
3297
            {
3298
              /* It's a temporary file */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3299
              assert(!(block->status & BLOCK_REASSIGNED));
1 by brian
clean slate
3300
3301
              /*
3302
                free_block() must not be called with BLOCK_CHANGED. Note
3303
                that we must not change the BLOCK_CHANGED flag outside of
3304
                link_to_file_list() so that it is always in the correct
3305
                queue and the *blocks_changed counters are correct.
3306
              */
3307
              link_to_file_list(keycache, block, file, 1);
3308
              if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH)))
3309
              {
3310
                /* A request has been registered against the block above. */
3311
                free_block(keycache, block);
3312
              }
3313
              else
3314
              {
3315
                /*
3316
                  Link the block into the LRU ring if it's the last
3317
                  submitted request for the block. This enables eviction
3318
                  for the block. A request has been registered against
3319
                  the block above.
3320
                */
3321
                unreg_request(keycache, block, 1);
3322
              }
3323
            }
3324
          }
3325
          else
3326
          {
3327
            /*
3328
              Link the block into a list of blocks 'in switch'.
3329
3330
              WARNING: Here we introduce a place where a changed block
3331
              is not in the changed_blocks hash! This is acceptable for
3332
              a BLOCK_IN_SWITCH. Never try this for another situation.
3333
              Other parts of the key cache code rely on changed blocks
3334
              being in the changed_blocks hash.
3335
            */
3336
            unlink_changed(block);
3337
            link_changed(block, &first_in_switch);
3338
          }
3339
        }
3340
        else if (type != FLUSH_KEEP)
3341
        {
3342
          /*
3343
            During the normal flush at end of statement (FLUSH_KEEP) we
3344
            do not need to ensure that blocks in flush or update by
3345
            other threads are flushed. They will be flushed by them
3346
            later. In all other cases we must assure that we do not have
3347
            any changed block of this file in the cache when this
3348
            function returns.
3349
          */
3350
          if (block->status & BLOCK_IN_FLUSH)
3351
          {
3352
            /* Remember the last block found to be in flush. */
3353
            last_in_flush= block;
3354
          }
3355
          else
3356
          {
3357
            /* Remember the last block found to be selected for update. */
3358
            last_for_update= block;
3359
          }
3360
        }
3361
      }
3362
    }
3363
    if (pos != cache)
3364
    {
3365
      if ((error= flush_cached_blocks(keycache, file, cache, pos, type)))
3366
      {
3367
        /* Do not loop inifnitely trying to flush in vain. */
3368
        if ((last_errno == error) && (++last_errcnt > 5))
3369
          goto err;
3370
        last_errno= error;
3371
      }
3372
      /*
3373
        Do not restart here during the normal flush at end of statement
3374
        (FLUSH_KEEP). We have now flushed at least all blocks that were
3375
        changed when entering this function. In all other cases we must
3376
        assure that we do not have any changed block of this file in the
3377
        cache when this function returns.
3378
      */
3379
      if (type != FLUSH_KEEP)
3380
        goto restart;
3381
    }
3382
    if (last_in_flush)
3383
    {
3384
      /*
3385
        There are no blocks to be flushed by this thread, but blocks in
3386
        flush by other threads. Wait until one of the blocks is flushed.
3387
        Re-check the condition for last_in_flush. We may have unlocked
3388
        the cache_lock in flush_cached_blocks(). The state of the block
3389
        could have changed.
3390
      */
3391
      if (last_in_flush->status & BLOCK_IN_FLUSH)
3392
        wait_on_queue(&last_in_flush->wqueue[COND_FOR_SAVED],
3393
                      &keycache->cache_lock);
3394
      /* Be sure not to lose a block. They may be flushed in random order. */
3395
      goto restart;
3396
    }
3397
    if (last_for_update)
3398
    {
3399
      /*
3400
        There are no blocks to be flushed by this thread, but blocks for
3401
        update by other threads. Wait until one of the blocks is updated.
3402
        Re-check the condition for last_for_update. We may have unlocked
3403
        the cache_lock in flush_cached_blocks(). The state of the block
3404
        could have changed.
3405
      */
3406
      if (last_for_update->status & BLOCK_FOR_UPDATE)
3407
        wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3408
                      &keycache->cache_lock);
3409
      /* The block is now changed. Flush it. */
3410
      goto restart;
3411
    }
3412
3413
    /*
3414
      Wait until the list of blocks in switch is empty. The threads that
3415
      are switching these blocks will relink them to clean file chains
3416
      while we wait and thus empty the 'first_in_switch' chain.
3417
    */
3418
    while (first_in_switch)
3419
    {
3420
      wait_on_queue(&first_in_switch->wqueue[COND_FOR_SAVED],
3421
                    &keycache->cache_lock);
3422
      /*
3423
        Do not restart here. We have flushed all blocks that were
3424
        changed when entering this function and were not marked for
3425
        eviction. Other threads have now flushed all remaining blocks in
3426
        the course of their eviction.
3427
      */
3428
    }
3429
3430
    if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE))
3431
    {
779.3.10 by Monty Taylor
Turned on -Wshadow.
3432
      last_for_update= NULL;
3433
      last_in_switch= NULL;
482 by Brian Aker
Remove uint.
3434
      uint32_t total_found= 0;
3435
      uint32_t found;
1 by brian
clean slate
3436
3437
      /*
3438
        Finally free all clean blocks for this file.
3439
        During resize this may be run by two threads in parallel.
3440
      */
3441
      do
3442
      {
3443
        found= 0;
3444
        for (block= keycache->file_blocks[FILE_HASH(file)] ;
3445
             block ;
3446
             block= next)
3447
        {
3448
          /* Remember the next block. After freeing we cannot get at it. */
3449
          next= block->next_changed;
3450
3451
          /* Changed blocks cannot appear in the file_blocks hash. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3452
          assert(!(block->status & BLOCK_CHANGED));
1 by brian
clean slate
3453
          if (block->hash_link->file == file)
3454
          {
3455
            /* We must skip blocks that will be changed. */
3456
            if (block->status & BLOCK_FOR_UPDATE)
3457
            {
3458
              last_for_update= block;
3459
              continue;
3460
            }
3461
3462
            /*
3463
              We must not free blocks in eviction (BLOCK_IN_EVICTION |
3464
              BLOCK_IN_SWITCH) or blocks intended to be freed
3465
              (BLOCK_REASSIGNED).
3466
            */
3467
            if (!(block->status & (BLOCK_IN_EVICTION | BLOCK_IN_SWITCH |
3468
                                   BLOCK_REASSIGNED)))
3469
            {
236.2.2 by rbradfor
Using correct coding standards for variable initialization
3470
              struct st_hash_link *next_hash_link= NULL;
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
3471
              internal::my_off_t            next_diskpos= 0;
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
3472
              int                next_file= 0;
482 by Brian Aker
Remove uint.
3473
              uint32_t                next_status= 0;
3474
              uint32_t                hash_requests= 0;
1 by brian
clean slate
3475
3476
              total_found++;
3477
              found++;
51.1.123 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3478
              assert(found <= keycache->blocks_used);
1 by brian
clean slate
3479
3480
              /*
3481
                Register a request. This unlinks the block from the LRU
3482
                ring and protects it against eviction. This is required
3483
                by free_block().
3484
              */
3485
              reg_requests(keycache, block, 1);
3486
3487
              /*
3488
                free_block() may need to wait for readers of the block.
3489
                This is the moment where the other thread can move the
3490
                'next' block from the chain. free_block() needs to wait
3491
                if there are requests for the block pending.
3492
              */
3493
              if (next && (hash_requests= block->hash_link->requests))
3494
              {
3495
                /* Copy values from the 'next' block and its hash_link. */
3496
                next_status=    next->status;
3497
                next_hash_link= next->hash_link;
3498
                next_diskpos=   next_hash_link->diskpos;
3499
                next_file=      next_hash_link->file;
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3500
                assert(next == next_hash_link->block);
1 by brian
clean slate
3501
              }
3502
3503
              free_block(keycache, block);
3504
              /*
3505
                If we had to wait and the state of the 'next' block
3506
                changed, break the inner loop. 'next' may no longer be
3507
                part of the current chain.
3508
3509
                We do not want to break the loop after every free_block(),
3510
                not even only after waits. The chain might be quite long
3511
                and contain blocks for many files. Traversing it again and
3512
                again to find more blocks for this file could become quite
3513
                inefficient.
3514
              */
3515
              if (next && hash_requests &&
3516
                  ((next_status    != next->status) ||
3517
                   (next_hash_link != next->hash_link) ||
3518
                   (next_file      != next_hash_link->file) ||
3519
                   (next_diskpos   != next_hash_link->diskpos) ||
3520
                   (next           != next_hash_link->block)))
3521
                break;
3522
            }
3523
            else
3524
            {
3525
              last_in_switch= block;
3526
            }
3527
          }
3528
        } /* end for block in file_blocks */
3529
      } while (found);
3530
3531
      /*
3532
        If any clean block has been found, we may have waited for it to
3533
        become free. In this case it could be possible that another clean
3534
        block became dirty. This is possible if the write request existed
3535
        before the flush started (BLOCK_FOR_UPDATE). Re-check the hashes.
3536
      */
3537
      if (total_found)
3538
        goto restart;
3539
3540
      /*
3541
        To avoid an infinite loop, wait until one of the blocks marked
3542
        for update is updated.
3543
      */
3544
      if (last_for_update)
3545
      {
3546
        /* We did not wait. Block must not have changed status. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3547
        assert(last_for_update->status & BLOCK_FOR_UPDATE);
1 by brian
clean slate
3548
        wait_on_queue(&last_for_update->wqueue[COND_FOR_REQUESTED],
3549
                      &keycache->cache_lock);
3550
        goto restart;
3551
      }
3552
3553
      /*
3554
        To avoid an infinite loop wait until one of the blocks marked
3555
        for eviction is switched.
3556
      */
3557
      if (last_in_switch)
3558
      {
3559
        /* We did not wait. Block must not have changed status. */
51.1.124 by Jay Pipes
Round 3 - removal custom KEYCACHE_DEBUG_LOG and fail_test code
3560
        assert(last_in_switch->status & (BLOCK_IN_EVICTION |
1 by brian
clean slate
3561
                                              BLOCK_IN_SWITCH |
3562
                                              BLOCK_REASSIGNED));
3563
        wait_on_queue(&last_in_switch->wqueue[COND_FOR_SAVED],
3564
                      &keycache->cache_lock);
3565
        goto restart;
3566
      }
3567
3568
    } /* if (! (type == FLUSH_KEEP || type == FLUSH_FORCE_WRITE)) */
3569
3570
  } /* if (keycache->disk_blocks > 0 */
3571
3572
err:
3573
  if (cache != cache_buff)
481 by Brian Aker
Remove all of uchar.
3574
    free((unsigned char*) cache);
1 by brian
clean slate
3575
  if (last_errno)
3576
    errno=last_errno;                /* Return first error */
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
3577
  return(last_errno != 0);
1 by brian
clean slate
3578
}
3579
3580
3581
/*
3582
  Flush all blocks for a file to disk
3583
3584
  SYNOPSIS
3585
3586
    flush_key_blocks()
3587
      keycache            pointer to a key cache data structure
3588
      file                handler for the file to flush to
3589
      flush_type          type of the flush
3590
3591
  RETURN
3592
    0   ok
3593
    1  error
3594
*/
3595
3596
int flush_key_blocks(KEY_CACHE *keycache,
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
3597
                     int file, enum flush_type type)
1 by brian
clean slate
3598
{
3599
  int res= 0;
3600
3601
  if (!keycache->key_cache_inited)
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
3602
    return(0);
1 by brian
clean slate
3603
3604
  keycache_pthread_mutex_lock(&keycache->cache_lock);
3605
  /* While waiting for lock, keycache could have been ended. */
3606
  if (keycache->disk_blocks > 0)
3607
  {
3608
    inc_counter_for_resize_op(keycache);
3609
    res= flush_key_blocks_int(keycache, file, type);
3610
    dec_counter_for_resize_op(keycache);
3611
  }
3612
  keycache_pthread_mutex_unlock(&keycache->cache_lock);
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
3613
  return(res);
1 by brian
clean slate
3614
}
3615
3616
3617
/*
3618
  Flush all blocks in the key cache to disk.
3619
3620
  SYNOPSIS
3621
    flush_all_key_blocks()
3622
      keycache                  pointer to key cache root structure
3623
3624
  DESCRIPTION
3625
3626
    Flushing of the whole key cache is done in two phases.
3627
3628
    1. Flush all changed blocks, waiting for them if necessary. Loop
3629
    until there is no changed block left in the cache.
3630
3631
    2. Free all clean blocks. Normally this means free all blocks. The
3632
    changed blocks were flushed in phase 1 and became clean. However we
3633
    may need to wait for blocks that are read by other threads. While we
3634
    wait, a clean block could become changed if that operation started
3635
    before the resize operation started. To be safe we must restart at
3636
    phase 1.
3637
3638
    When we can run through the changed_blocks and file_blocks hashes
3639
    without finding a block any more, then we are done.
3640
3641
    Note that we hold keycache->cache_lock all the time unless we need
3642
    to wait for something.
3643
3644
  RETURN
3645
    0           OK
3646
    != 0        Error
3647
*/
3648
3649
static int flush_all_key_blocks(KEY_CACHE *keycache)
3650
{
3651
  BLOCK_LINK    *block;
482 by Brian Aker
Remove uint.
3652
  uint32_t          total_found;
3653
  uint32_t          found;
3654
  uint32_t          idx;
1 by brian
clean slate
3655
3656
  do
3657
  {
3658
    safe_mutex_assert_owner(&keycache->cache_lock);
3659
    total_found= 0;
3660
3661
    /*
3662
      Phase1: Flush all changed blocks, waiting for them if necessary.
3663
      Loop until there is no changed block left in the cache.
3664
    */
3665
    do
3666
    {
3667
      found= 0;
3668
      /* Step over the whole changed_blocks hash array. */
3669
      for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3670
      {
3671
        /*
3672
          If an array element is non-empty, use the first block from its
3673
          chain to find a file for flush. All changed blocks for this
3674
          file are flushed. So the same block will not appear at this
3675
          place again with the next iteration. New writes for blocks are
3676
          not accepted during the flush. If multiple files share the
3677
          same hash bucket, one of them will be flushed per iteration
3678
          of the outer loop of phase 1.
3679
        */
3680
        if ((block= keycache->changed_blocks[idx]))
3681
        {
3682
          found++;
3683
          /*
3684
            Flush dirty blocks but do not free them yet. They can be used
3685
            for reading until all other blocks are flushed too.
3686
          */
3687
          if (flush_key_blocks_int(keycache, block->hash_link->file,
3688
                                   FLUSH_FORCE_WRITE))
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
3689
            return(1);
1 by brian
clean slate
3690
        }
3691
      }
3692
3693
    } while (found);
3694
3695
    /*
3696
      Phase 2: Free all clean blocks. Normally this means free all
3697
      blocks. The changed blocks were flushed in phase 1 and became
3698
      clean. However we may need to wait for blocks that are read by
3699
      other threads. While we wait, a clean block could become changed
3700
      if that operation started before the resize operation started. To
3701
      be safe we must restart at phase 1.
3702
    */
3703
    do
3704
    {
3705
      found= 0;
3706
      /* Step over the whole file_blocks hash array. */
3707
      for (idx= 0; idx < CHANGED_BLOCKS_HASH; idx++)
3708
      {
3709
        /*
3710
          If an array element is non-empty, use the first block from its
3711
          chain to find a file for flush. All blocks for this file are
3712
          freed. So the same block will not appear at this place again
3713
          with the next iteration. If multiple files share the
3714
          same hash bucket, one of them will be flushed per iteration
3715
          of the outer loop of phase 2.
3716
        */
3717
        if ((block= keycache->file_blocks[idx]))
3718
        {
3719
          total_found++;
3720
          found++;
3721
          if (flush_key_blocks_int(keycache, block->hash_link->file,
3722
                                   FLUSH_RELEASE))
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
3723
            return(1);
1 by brian
clean slate
3724
        }
3725
      }
3726
3727
    } while (found);
3728
3729
    /*
3730
      If any clean block has been found, we may have waited for it to
3731
      become free. In this case it could be possible that another clean
3732
      block became dirty. This is possible if the write request existed
3733
      before the resize started (BLOCK_FOR_UPDATE). Re-check the hashes.
3734
    */
3735
  } while (total_found);
51.1.122 by Jay Pipes
Round 2 - removal custom KEYCACHE_DEBUG and THREAD_TRACE from keycache
3736
  return(0);
1 by brian
clean slate
3737
}
3738
3739
3740
/*
3741
  Reset the counters of a key cache.
3742
3743
  SYNOPSIS
3744
    reset_key_cache_counters()
3745
3746
  DESCRIPTION
1106.4.2 by Brian Aker
Remove multi key cache
3747
   This procedure is used by process_key_caches() to reset the key_cache.
1 by brian
clean slate
3748
3749
  RETURN
3750
    0 on success (always because it can't fail)
3751
*/
3752
1106.4.2 by Brian Aker
Remove multi key cache
3753
void reset_key_cache_counters()
1 by brian
clean slate
3754
{
1106.4.2 by Brian Aker
Remove multi key cache
3755
  dflt_key_cache->global_blocks_changed= 0;   /* Key_blocks_not_flushed */
3756
  dflt_key_cache->global_cache_r_requests= 0; /* Key_read_requests */
3757
  dflt_key_cache->global_cache_read= 0;       /* Key_reads */
3758
  dflt_key_cache->global_cache_w_requests= 0; /* Key_write_requests */
3759
  dflt_key_cache->global_cache_write= 0;      /* Key_writes */
1 by brian
clean slate
3760
}
3761
3762
#if defined(KEYCACHE_TIMEOUT)
3763
511.2.7 by Monty Taylor
We pass -Wunused-macros now!
3764
3765
static inline
3766
unsigned int hash_link_number(HASH_LINK *hash_link, KEY_CACHE *keycache)
3767
{
3768
  return ((unsigned int) (((char*)hash_link-(char *) keycache->hash_link_root)/
3769
		  sizeof(HASH_LINK)));
3770
}
3771
3772
static inline
3773
unsigned int block_number(BLOCK_LINK *block, KEY_CACHE *keycache)
3774
{
3775
  return ((unsigned int) (((char*)block-(char *)keycache->block_root)/
3776
		  sizeof(BLOCK_LINK)));
3777
}
3778
3779
1 by brian
clean slate
3780
#define KEYCACHE_DUMP_FILE  "keycache_dump.txt"
3781
#define MAX_QUEUE_LEN  100
3782
3783
3784
static void keycache_dump(KEY_CACHE *keycache)
3785
{
3786
  FILE *keycache_dump_file=fopen(KEYCACHE_DUMP_FILE, "w");
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
3787
  internal::st_my_thread_var *last;
3788
  internal::st_my_thread_var *thread;
1 by brian
clean slate
3789
  BLOCK_LINK *block;
3790
  HASH_LINK *hash_link;
3791
  KEYCACHE_PAGE *page;
482 by Brian Aker
Remove uint.
3792
  uint32_t i;
1 by brian
clean slate
3793
3794
  fprintf(keycache_dump_file, "thread:%u\n", thread->id);
3795
3796
  i=0;
3797
  thread=last=waiting_for_hash_link.last_thread;
3798
  fprintf(keycache_dump_file, "queue of threads waiting for hash link\n");
3799
  if (thread)
3800
    do
3801
    {
3802
      thread=thread->next;
3803
      page= (KEYCACHE_PAGE *) thread->opt_info;
3804
      fprintf(keycache_dump_file,
3805
              "thread:%u, (file,filepos)=(%u,%lu)\n",
303 by Brian Aker
First pass in removing ulong from MyISAM
3806
              thread->id,(uint) page->file,(uint32_t) page->filepos);
1 by brian
clean slate
3807
      if (++i == MAX_QUEUE_LEN)
3808
        break;
3809
    }
3810
    while (thread != last);
3811
3812
  i=0;
3813
  thread=last=waiting_for_block.last_thread;
3814
  fprintf(keycache_dump_file, "queue of threads waiting for block\n");
3815
  if (thread)
3816
    do
3817
    {
3818
      thread=thread->next;
3819
      hash_link= (HASH_LINK *) thread->opt_info;
3820
      fprintf(keycache_dump_file,
511.2.7 by Monty Taylor
We pass -Wunused-macros now!
3821
	      "thread:%u hash_link:%u (file,filepos)=(%u,%u)\n",
3822
	      thread->id, (uint) hash_link_number(hash_link, keycache),
3823
	      (uint) hash_link->file,(uint32_t) hash_link->diskpos);
1 by brian
clean slate
3824
      if (++i == MAX_QUEUE_LEN)
3825
        break;
3826
    }
3827
    while (thread != last);
3828
3829
  for (i=0 ; i< keycache->blocks_used ; i++)
3830
  {
3831
    int j;
3832
    block= &keycache->block_root[i];
3833
    hash_link= block->hash_link;
3834
    fprintf(keycache_dump_file,
511.2.7 by Monty Taylor
We pass -Wunused-macros now!
3835
	    "block:%u hash_link:%d status:%x #requests=%u "
3836
	    "waiting_for_readers:%d\n",
3837
	    i, (int) (hash_link ? hash_link_number(hash_link, keycache) : -1),
3838
	    block->status, block->requests, block->condvar ? 1 : 0);
1 by brian
clean slate
3839
    for (j=0 ; j < 2; j++)
3840
    {
3841
      KEYCACHE_WQUEUE *wqueue=&block->wqueue[j];
3842
      thread= last= wqueue->last_thread;
3843
      fprintf(keycache_dump_file, "queue #%d\n", j);
3844
      if (thread)
3845
      {
3846
        do
3847
        {
3848
          thread=thread->next;
3849
          fprintf(keycache_dump_file,
3850
                  "thread:%u\n", thread->id);
3851
          if (++i == MAX_QUEUE_LEN)
3852
            break;
3853
        }
3854
        while (thread != last);
3855
      }
3856
    }
3857
  }
3858
  fprintf(keycache_dump_file, "LRU chain:");
3859
  block= keycache= used_last;
3860
  if (block)
3861
  {
3862
    do
3863
    {
3864
      block= block->next_used;
3865
      fprintf(keycache_dump_file,
511.2.7 by Monty Taylor
We pass -Wunused-macros now!
3866
	      "block:%u, ", block_number(block, keycache));
1 by brian
clean slate
3867
    }
3868
    while (block != keycache->used_last);
3869
  }
3870
  fprintf(keycache_dump_file, "\n");
3871
3872
  fclose(keycache_dump_file);
3873
}
3874
3875
static int keycache_pthread_cond_wait(pthread_cond_t *cond,
3876
                                      pthread_mutex_t *mutex)
3877
{
3878
  int rc;
3879
  struct timeval  now;            /* time when we started waiting        */
3880
  struct timespec timeout;        /* timeout value for the wait function */
3881
  struct timezone tz;
3882
3883
  /* Get current time */
3884
  gettimeofday(&now, &tz);
3885
  /* Prepare timeout value */
3886
  timeout.tv_sec= now.tv_sec + KEYCACHE_TIMEOUT;
3887
 /*
3888
   timeval uses microseconds.
3889
   timespec uses nanoseconds.
3890
   1 nanosecond = 1000 micro seconds
3891
 */
3892
  timeout.tv_nsec= now.tv_usec * 1000;
3893
  rc= pthread_cond_timedwait(cond, mutex, &timeout);
3894
  if (rc == ETIMEDOUT || rc == ETIME)
3895
  {
3896
    keycache_dump();
3897
  }
3898
3899
  assert(rc != ETIMEDOUT);
3900
  return rc;
3901
}
3902
#endif /* defined(KEYCACHE_TIMEOUT) */