~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/pbxt/src/cache_xt.cc

Remove dead memset call.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* Copyright (c) 2005 PrimeBase Technologies GmbH, Germany
 
2
 *
 
3
 * PrimeBase XT
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 
18
 *
 
19
 * 2005-05-24   Paul McCullagh
 
20
 *
 
21
 * H&G2JCtL
 
22
 */
 
23
 
 
24
#include "xt_config.h"
 
25
 
 
26
#ifdef DRIZZLED
 
27
#include <bitset>
 
28
#endif
 
29
 
 
30
#ifndef XT_WIN
 
31
#include <unistd.h>
 
32
#endif
 
33
 
 
34
#include <stdio.h>
 
35
#include <time.h>
 
36
 
 
37
#include "pthread_xt.h"
 
38
#include "thread_xt.h"
 
39
#include "filesys_xt.h"
 
40
#include "cache_xt.h"
 
41
#include "table_xt.h"
 
42
#include "trace_xt.h"
 
43
#include "util_xt.h"
 
44
 
 
45
#define XT_TIME_DIFF(start, now) (\
 
46
        ((xtWord4) (now) < (xtWord4) (start)) ? \
 
47
        ((xtWord4) 0XFFFFFFFF - ((xtWord4) (start) - (xtWord4) (now))) : \
 
48
        ((xtWord4) (now) - (xtWord4) (start)))
 
49
 
 
50
/*
 
51
 * -----------------------------------------------------------------------
 
52
 * D I S K   C A C H E
 
53
 */
 
54
 
 
55
#define IDX_CAC_SEGMENT_COUNT           ((off_t) 1 << XT_INDEX_CACHE_SEGMENT_SHIFTS)
 
56
#define IDX_CAC_SEGMENT_MASK            (IDX_CAC_SEGMENT_COUNT - 1)
 
57
 
 
58
#ifdef XT_NO_ATOMICS
 
59
#define IDX_CAC_USE_PTHREAD_RW
 
60
#else
 
61
//#define IDX_CAC_USE_PTHREAD_RW
 
62
#define IDX_CAC_USE_XSMUTEX
 
63
//#define IDX_USE_SPINXSLOCK
 
64
#endif
 
65
 
 
66
#if defined(IDX_CAC_USE_PTHREAD_RW)
 
67
#define IDX_CAC_LOCK_TYPE                               xt_rwlock_type
 
68
#define IDX_CAC_INIT_LOCK(s, i)                 xt_init_rwlock_with_autoname(s, &(i)->cs_lock)
 
69
#define IDX_CAC_FREE_LOCK(s, i)                 xt_free_rwlock(&(i)->cs_lock)   
 
70
#define IDX_CAC_READ_LOCK(i, o)                 xt_slock_rwlock_ns(&(i)->cs_lock)
 
71
#define IDX_CAC_WRITE_LOCK(i, o)                xt_xlock_rwlock_ns(&(i)->cs_lock)
 
72
#define IDX_CAC_UNLOCK(i, o)                    xt_unlock_rwlock_ns(&(i)->cs_lock)
 
73
#elif defined(IDX_CAC_USE_XSMUTEX)
 
74
#define IDX_CAC_LOCK_TYPE                               XTMutexXSLockRec
 
75
#define IDX_CAC_INIT_LOCK(s, i)                 xt_xsmutex_init_with_autoname(s, &(i)->cs_lock)
 
76
#define IDX_CAC_FREE_LOCK(s, i)                 xt_xsmutex_free(s, &(i)->cs_lock)       
 
77
#define IDX_CAC_READ_LOCK(i, o)                 xt_xsmutex_slock(&(i)->cs_lock, (o)->t_id)
 
78
#define IDX_CAC_WRITE_LOCK(i, o)                xt_xsmutex_xlock(&(i)->cs_lock, (o)->t_id)
 
79
#define IDX_CAC_UNLOCK(i, o)                    xt_xsmutex_unlock(&(i)->cs_lock, (o)->t_id)
 
80
#elif defined(IDX_CAC_USE_SPINXSLOCK)
 
81
#define IDX_CAC_LOCK_TYPE                               XTSpinXSLockRec
 
82
#define IDX_CAC_INIT_LOCK(s, i)                 xt_spinxslock_init_with_autoname(s, &(i)->cs_lock)
 
83
#define IDX_CAC_FREE_LOCK(s, i)                 xt_spinxslock_free(s, &(i)->cs_lock)    
 
84
#define IDX_CAC_READ_LOCK(i, s)                 xt_spinxslock_slock(&(i)->cs_lock, (s)->t_id)
 
85
#define IDX_CAC_WRITE_LOCK(i, s)                xt_spinxslock_xlock(&(i)->cs_lock, FALSE, (s)->t_id)
 
86
#define IDX_CAC_UNLOCK(i, s)                    xt_spinxslock_unlock(&(i)->cs_lock, (s)->t_id)
 
87
#else
 
88
#error Please define the lock type
 
89
#endif
 
90
 
 
91
#ifdef XT_NO_ATOMICS
 
92
#define ID_HANDLE_USE_PTHREAD_RW
 
93
#else
 
94
//#define ID_HANDLE_USE_PTHREAD_RW
 
95
#define ID_HANDLE_USE_SPINLOCK
 
96
#endif
 
97
 
 
98
#if defined(ID_HANDLE_USE_PTHREAD_RW)
 
99
#define ID_HANDLE_LOCK_TYPE                             xt_mutex_type
 
100
#define ID_HANDLE_INIT_LOCK(s, i)               xt_init_mutex_with_autoname(s, i)
 
101
#define ID_HANDLE_FREE_LOCK(s, i)               xt_free_mutex(i)        
 
102
#define ID_HANDLE_LOCK(i)                               xt_lock_mutex_ns(i)
 
103
#define ID_HANDLE_UNLOCK(i)                             xt_unlock_mutex_ns(i)
 
104
#elif defined(ID_HANDLE_USE_SPINLOCK)
 
105
#define ID_HANDLE_LOCK_TYPE                             XTSpinLockRec
 
106
#define ID_HANDLE_INIT_LOCK(s, i)               xt_spinlock_init_with_autoname(s, i)
 
107
#define ID_HANDLE_FREE_LOCK(s, i)               xt_spinlock_free(s, i)  
 
108
#define ID_HANDLE_LOCK(i)                               xt_spinlock_lock(i)
 
109
#define ID_HANDLE_UNLOCK(i)                             xt_spinlock_unlock(i)
 
110
#endif
 
111
 
 
112
#define XT_HANDLE_SLOTS                                 37
 
113
 
 
114
/*
 
115
#ifdef DEBUG
 
116
#define XT_INIT_HANDLE_COUNT                    0
 
117
#define XT_INIT_HANDLE_BLOCKS                   0
 
118
#else
 
119
#define XT_INIT_HANDLE_COUNT                    40
 
120
#define XT_INIT_HANDLE_BLOCKS                   10
 
121
#endif
 
122
*/
 
123
 
 
124
/* A disk cache segment. The cache is divided into a number of segments
 
125
 * to improve concurrency.
 
126
 */
 
127
typedef struct DcSegment {
 
128
        IDX_CAC_LOCK_TYPE       cs_lock;                                                /* The cache segment lock. */
 
129
        XTIndBlockPtr           *cs_hash_table;
 
130
} DcSegmentRec, *DcSegmentPtr;
 
131
 
 
132
typedef struct DcHandleSlot {
 
133
        ID_HANDLE_LOCK_TYPE     hs_handles_lock;
 
134
        XTIndHandleBlockPtr     hs_free_blocks;
 
135
        XTIndHandlePtr          hs_free_handles;
 
136
        XTIndHandlePtr          hs_used_handles;
 
137
} DcHandleSlotRec, *DcHandleSlotPtr;
 
138
 
 
139
typedef struct DcGlobals {
 
140
        xt_mutex_type           cg_lock;                                                /* The public cache lock. */
 
141
        DcSegmentRec            cg_segment[IDX_CAC_SEGMENT_COUNT];
 
142
        XTIndBlockPtr           cg_blocks;
 
143
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
144
        xtWord1                         *cg_buffer;
 
145
#endif
 
146
        XTIndBlockPtr           cg_free_list;
 
147
        xtWord4                         cg_free_count;
 
148
        xtWord4                         cg_ru_now;                                              /* A counter as described by Jim Starkey (my thanks) */
 
149
        XTIndBlockPtr           cg_lru_block;
 
150
        XTIndBlockPtr           cg_mru_block;
 
151
        xtWord4                         cg_hash_size;
 
152
        xtWord4                         cg_block_count;
 
153
        xtWord4                         cg_max_free;
 
154
#ifdef DEBUG_CHECK_IND_CACHE
 
155
        u_int                           cg_reserved_by_ots;                             /* Number of blocks reserved by open tables. */
 
156
        u_int                           cg_read_count;                                  /* Number of blocks being read. */
 
157
#endif
 
158
 
 
159
        /* Index cache handles: */
 
160
        DcHandleSlotRec         cg_handle_slot[XT_HANDLE_SLOTS];
 
161
} DcGlobalsRec;
 
162
 
 
163
static DcGlobalsRec     ind_cac_globals;
 
164
 
 
165
#ifdef XT_USE_MYSYS
 
166
#ifdef xtPublic
 
167
#undef xtPublic
 
168
#endif
 
169
#include "my_global.h"
 
170
#include "my_sys.h"
 
171
#include "keycache.h"
 
172
KEY_CACHE my_cache;
 
173
#undef  pthread_rwlock_rdlock
 
174
#undef  pthread_rwlock_wrlock
 
175
#undef  pthread_rwlock_try_wrlock
 
176
#undef  pthread_rwlock_unlock
 
177
#undef  pthread_mutex_lock
 
178
#undef  pthread_mutex_unlock
 
179
#undef  pthread_cond_wait
 
180
#undef  pthread_cond_broadcast
 
181
#undef  xt_mutex_type
 
182
#define xtPublic
 
183
#endif
 
184
 
 
185
/*
 
186
 * -----------------------------------------------------------------------
 
187
 * INDEX CACHE HANDLES
 
188
 */
 
189
 
 
190
static XTIndHandlePtr ind_alloc_handle()
 
191
{
 
192
        XTIndHandlePtr handle;
 
193
 
 
194
        if (!(handle = (XTIndHandlePtr) xt_calloc_ns(sizeof(XTIndHandleRec))))
 
195
                return NULL;
 
196
        xt_spinlock_init_with_autoname(NULL, &handle->ih_lock);
 
197
        return handle;
 
198
}
 
199
 
 
200
static void ind_free_handle(XTIndHandlePtr handle)
 
201
{
 
202
        xt_spinlock_free(NULL, &handle->ih_lock);
 
203
        xt_free_ns(handle);
 
204
}
 
205
 
 
206
static void ind_handle_exit(XTThreadPtr self)
 
207
{
 
208
        DcHandleSlotPtr         hs;
 
209
        XTIndHandlePtr          handle;
 
210
        XTIndHandleBlockPtr     hptr;
 
211
 
 
212
        for (int i=0; i<XT_HANDLE_SLOTS; i++) {
 
213
                hs = &ind_cac_globals.cg_handle_slot[i];
 
214
 
 
215
                while (hs->hs_used_handles) {
 
216
                        handle = hs->hs_used_handles;
 
217
                        xt_ind_release_handle(handle, FALSE, self);
 
218
                }
 
219
 
 
220
                while (hs->hs_free_blocks) {
 
221
                        hptr = hs->hs_free_blocks;
 
222
                        hs->hs_free_blocks = hptr->hb_next;
 
223
                        xt_free(self, hptr);
 
224
                }
 
225
 
 
226
                while (hs->hs_free_handles) {
 
227
                        handle = hs->hs_free_handles;
 
228
                        hs->hs_free_handles = handle->ih_next;
 
229
                        ind_free_handle(handle);
 
230
                }
 
231
 
 
232
                ID_HANDLE_FREE_LOCK(self, &hs->hs_handles_lock);
 
233
        }
 
234
}
 
235
 
 
236
static void ind_handle_init(XTThreadPtr self)
 
237
{
 
238
        DcHandleSlotPtr         hs;
 
239
 
 
240
        for (int i=0; i<XT_HANDLE_SLOTS; i++) {
 
241
                hs = &ind_cac_globals.cg_handle_slot[i];
 
242
                memset(hs, 0, sizeof(DcHandleSlotRec));
 
243
                ID_HANDLE_INIT_LOCK(self, &hs->hs_handles_lock);
 
244
        }
 
245
}
 
246
 
 
247
//#define CHECK_HANDLE_STRUCTS
 
248
 
 
249
#ifdef CHECK_HANDLE_STRUCTS
 
250
static int gdummy = 0;
 
251
 
 
252
static void ic_stop_here()
 
253
{
 
254
        gdummy = gdummy + 1;
 
255
        printf("Nooo %d!\n", gdummy);
 
256
}
 
257
 
 
258
static void ic_check_handle_structs()
 
259
{
 
260
        XTIndHandlePtr          handle, phandle;
 
261
        XTIndHandleBlockPtr     hptr, phptr;
 
262
        int                                     count = 0;
 
263
        int                                     ctest;
 
264
 
 
265
        phandle = NULL;
 
266
        handle = ind_cac_globals.cg_used_handles;
 
267
        while (handle) {
 
268
                if (handle == phandle)
 
269
                        ic_stop_here();
 
270
                if (handle->ih_prev != phandle)
 
271
                        ic_stop_here();
 
272
                if (handle->ih_cache_reference) {
 
273
                        ctest = handle->x.ih_cache_block->cb_handle_count;
 
274
                        if (ctest == 0 || ctest > 100)
 
275
                                ic_stop_here();
 
276
                }
 
277
                else {
 
278
                        ctest = handle->x.ih_handle_block->hb_ref_count;
 
279
                        if (ctest == 0 || ctest > 100)
 
280
                                ic_stop_here();
 
281
                }
 
282
                phandle = handle;
 
283
                handle = handle->ih_next;
 
284
                count++;
 
285
                if (count > 1000)
 
286
                        ic_stop_here();
 
287
        }
 
288
 
 
289
        count = 0;
 
290
        hptr = ind_cac_globals.cg_free_blocks;
 
291
        while (hptr) {
 
292
                if (hptr == phptr)
 
293
                        ic_stop_here();
 
294
                phptr = hptr;
 
295
                hptr = hptr->hb_next;
 
296
                count++;
 
297
                if (count > 1000)
 
298
                        ic_stop_here();
 
299
        }
 
300
 
 
301
        count = 0;
 
302
        handle = ind_cac_globals.cg_free_handles;
 
303
        while (handle) {
 
304
                if (handle == phandle)
 
305
                        ic_stop_here();
 
306
                phandle = handle;
 
307
                handle = handle->ih_next;
 
308
                count++;
 
309
                if (count > 1000)
 
310
                        ic_stop_here();
 
311
        }
 
312
}
 
313
#endif
 
314
 
 
315
/*
 
316
 * Get a handle to the index block.
 
317
 * This function is called by index scanners (readers).
 
318
 */
 
319
xtPublic XTIndHandlePtr xt_ind_get_handle(XTOpenTablePtr ot, XTIndexPtr ind, XTIndReferencePtr iref)
 
320
{
 
321
        DcHandleSlotPtr hs;
 
322
        XTIndHandlePtr  handle;
 
323
 
 
324
        hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
 
325
 
 
326
        ASSERT_NS(iref->ir_xlock == FALSE);
 
327
        ASSERT_NS(iref->ir_updated == FALSE);
 
328
        ID_HANDLE_LOCK(&hs->hs_handles_lock);
 
329
#ifdef CHECK_HANDLE_STRUCTS
 
330
        ic_check_handle_structs();
 
331
#endif
 
332
        if ((handle = hs->hs_free_handles))
 
333
                hs->hs_free_handles = handle->ih_next;
 
334
        else {
 
335
                if (!(handle = ind_alloc_handle())) {
 
336
                        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
337
                        xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
 
338
                        return NULL;
 
339
                }
 
340
        }
 
341
        if (hs->hs_used_handles)
 
342
                hs->hs_used_handles->ih_prev = handle;
 
343
        handle->ih_next = hs->hs_used_handles;
 
344
        handle->ih_prev = NULL;
 
345
        handle->ih_address = iref->ir_block->cb_address;
 
346
        handle->ih_cache_reference = TRUE;
 
347
        handle->x.ih_cache_block = iref->ir_block;
 
348
        handle->ih_branch = iref->ir_branch;
 
349
        /* {HANDLE-COUNT-USAGE}
 
350
         * This is safe because:
 
351
         *
 
352
         * I have an Slock on the cache block, and I have
 
353
         * at least an Slock on the index.
 
354
         * So this excludes anyone who is reading 
 
355
         * cb_handle_count in the index.
 
356
         * (all cache block writers, and the freeer).
 
357
         *
 
358
         * The increment is safe because I have the list
 
359
         * lock (hs_handles_lock), which is required by anyone else
 
360
         * who increments or decrements this value.
 
361
         */
 
362
        iref->ir_block->cb_handle_count++;
 
363
        hs->hs_used_handles = handle;
 
364
#ifdef CHECK_HANDLE_STRUCTS
 
365
        ic_check_handle_structs();
 
366
#endif
 
367
        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
368
        xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
 
369
        return handle;
 
370
}
 
371
 
 
372
xtPublic void xt_ind_release_handle(XTIndHandlePtr handle, xtBool have_lock, XTThreadPtr thread)
 
373
{
 
374
        DcHandleSlotPtr hs;
 
375
        XTIndBlockPtr   block = NULL;
 
376
        u_int                   hash_idx = 0;
 
377
        DcSegmentPtr    seg = NULL;
 
378
        XTIndBlockPtr   xblock;
 
379
 
 
380
        (void) thread; /*DRIZZLED*/
 
381
 
 
382
        /* The lock order is:
 
383
         * 1. Cache segment (cs_lock) - This is only by ind_free_block()!
 
384
         * 1. S/Slock cache block (cb_lock)
 
385
         * 2. List lock (cg_handles_lock).
 
386
         * 3. Handle lock (ih_lock)
 
387
         */
 
388
        if (!have_lock)
 
389
                xt_spinlock_lock(&handle->ih_lock);
 
390
 
 
391
        /* Get the lock on the cache page if required: */
 
392
        if (handle->ih_cache_reference) {
 
393
                u_int                   file_id;
 
394
                xtIndexNodeID   address;
 
395
 
 
396
                block = handle->x.ih_cache_block;
 
397
 
 
398
                file_id = block->cb_file_id;
 
399
                address = block->cb_address;
 
400
                hash_idx = XT_NODE_ID(address) + (file_id * 223);
 
401
                seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
 
402
                hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
 
403
        }
 
404
 
 
405
        xt_spinlock_unlock(&handle->ih_lock);
 
406
 
 
407
        /* Because of the lock order, I have to release the
 
408
         * handle before I get a lock on the cache block.
 
409
         *
 
410
         * But, by doing this, this cache block may be gone!
 
411
         */
 
412
        if (block) {
 
413
                IDX_CAC_READ_LOCK(seg, thread);
 
414
                xblock = seg->cs_hash_table[hash_idx];
 
415
                while (xblock) {
 
416
                        if (block == xblock) {
 
417
                                /* Found the block... 
 
418
                                 * {HANDLE-COUNT-SLOCK}
 
419
                                 * 04.05.2009, changed to slock.
 
420
                                 * The xlock causes too much contention
 
421
                                 * on the cache block for read only loads.
 
422
                                 *
 
423
                                 * Is it safe?
 
424
                                 * See below...
 
425
                                 */
 
426
                                XT_IPAGE_READ_LOCK(&block->cb_lock);
 
427
                                goto block_found;
 
428
                        }
 
429
                        xblock = xblock->cb_next;
 
430
                }
 
431
                block = NULL;
 
432
                block_found:
 
433
                IDX_CAC_UNLOCK(seg, thread);
 
434
        }
 
435
 
 
436
        hs = &ind_cac_globals.cg_handle_slot[handle->ih_address % XT_HANDLE_SLOTS];
 
437
 
 
438
        ID_HANDLE_LOCK(&hs->hs_handles_lock);
 
439
#ifdef CHECK_HANDLE_STRUCTS
 
440
        ic_check_handle_structs();
 
441
#endif
 
442
 
 
443
        /* I don't need to lock the handle because I have locked
 
444
         * the list, and no other thread can change the
 
445
         * handle without first getting a lock on the list.
 
446
         *
 
447
         * In addition, the caller is the only owner of the
 
448
         * handle, and the only thread with an independent
 
449
         * reference to the handle.
 
450
         * All other access occur over the list.
 
451
         */
 
452
 
 
453
        /* Remove the reference to the cache or a handle block: */
 
454
        if (handle->ih_cache_reference) {
 
455
                ASSERT_NS(block == handle->x.ih_cache_block);
 
456
                ASSERT_NS(block && block->cb_handle_count > 0);
 
457
                /* {HANDLE-COUNT-USAGE}
 
458
                 * This is safe here because I have excluded
 
459
                 * all readers by taking an Xlock on the
 
460
                 * cache block (CHANGED - see below).
 
461
                 *
 
462
                 * {HANDLE-COUNT-SLOCK}
 
463
                 * 04.05.2009, changed to slock.
 
464
                 * Should be OK, because:
 
465
                 * A have a lock on the list lock (hs_handles_lock),
 
466
                 * which prevents concurrent updates to cb_handle_count.
 
467
                 *
 
468
                 * I have also have a read lock on the cache block
 
469
                 * but not a lock on the index. As a result, we cannot
 
470
                 * excluded all index writers (and readers of 
 
471
                 * cb_handle_count.
 
472
                 */
 
473
                block->cb_handle_count--;
 
474
        }
 
475
        else {
 
476
                XTIndHandleBlockPtr     hptr = handle->x.ih_handle_block;
 
477
 
 
478
                ASSERT_NS(!handle->ih_cache_reference);
 
479
                ASSERT_NS(hptr->hb_ref_count > 0);
 
480
                hptr->hb_ref_count--;
 
481
                if (!hptr->hb_ref_count) {
 
482
                        /* Put it back on the free list: */
 
483
                        hptr->hb_next = hs->hs_free_blocks;
 
484
                        hs->hs_free_blocks = hptr;
 
485
                }
 
486
        }
 
487
 
 
488
        /* Unlink the handle: */
 
489
        if (handle->ih_next)
 
490
                handle->ih_next->ih_prev = handle->ih_prev;
 
491
        if (handle->ih_prev)
 
492
                handle->ih_prev->ih_next = handle->ih_next;
 
493
        if (hs->hs_used_handles == handle)
 
494
                hs->hs_used_handles = handle->ih_next;
 
495
 
 
496
        /* Put it on the free list: */
 
497
        handle->ih_next = hs->hs_free_handles;
 
498
        hs->hs_free_handles = handle;
 
499
 
 
500
#ifdef CHECK_HANDLE_STRUCTS
 
501
        ic_check_handle_structs();
 
502
#endif
 
503
        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
504
 
 
505
        if (block)
 
506
                XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
 
507
}
 
508
 
 
509
/* Call this function before a referenced cache block is modified!
 
510
 * This function is called by index updaters.
 
511
 */
 
512
xtPublic xtBool xt_ind_copy_on_write(XTIndReferencePtr iref)
 
513
{
 
514
        DcHandleSlotPtr         hs;
 
515
        XTIndHandleBlockPtr     hptr;
 
516
        u_int                           branch_size;
 
517
        XTIndHandlePtr          handle;
 
518
        u_int                           i = 0;
 
519
 
 
520
        hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
 
521
 
 
522
        ID_HANDLE_LOCK(&hs->hs_handles_lock);
 
523
 
 
524
        /* {HANDLE-COUNT-USAGE}
 
525
         * This is only called by updaters of this index block, or
 
526
         * the free which holds an Xlock on the index block.
 
527
         * These are all mutually exclusive for the index block.
 
528
         *
 
529
         * {HANDLE-COUNT-SLOCK}
 
530
         * Do this check again, after we have the list lock (hs_handles_lock).
 
531
         * There is a small chance that the count has changed, since we last
 
532
         * checked because xt_ind_release_handle() only holds
 
533
         * an slock on the index page.
 
534
         *
 
535
         * An updater can sometimes have a XLOCK on the index and an slock
 
536
         * on the cache block. In this case xt_ind_release_handle()
 
537
         * could have run through.
 
538
         */
 
539
        if (!iref->ir_block->cb_handle_count) {
 
540
                ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
541
                return OK;
 
542
        }
 
543
 
 
544
#ifdef CHECK_HANDLE_STRUCTS
 
545
        ic_check_handle_structs();
 
546
#endif
 
547
        if ((hptr = hs->hs_free_blocks))
 
548
                hs->hs_free_blocks = hptr->hb_next;
 
549
        else {
 
550
                if (!(hptr = (XTIndHandleBlockPtr) xt_malloc_ns(sizeof(XTIndHandleBlockRec)))) {
 
551
                        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
552
                        return FAILED;
 
553
                }
 
554
        }
 
555
 
 
556
        branch_size = XT_GET_INDEX_BLOCK_LEN(XT_GET_DISK_2(iref->ir_branch->tb_size_2));
 
557
        memcpy(&hptr->hb_branch, iref->ir_branch, branch_size);
 
558
        hptr->hb_ref_count = iref->ir_block->cb_handle_count;
 
559
 
 
560
        handle = hs->hs_used_handles;
 
561
        while (handle) {
 
562
                if (handle->ih_branch == iref->ir_branch) {
 
563
                        i++;
 
564
                        xt_spinlock_lock(&handle->ih_lock);
 
565
                        ASSERT_NS(handle->ih_cache_reference);
 
566
                        handle->ih_cache_reference = FALSE;
 
567
                        handle->x.ih_handle_block = hptr;
 
568
                        handle->ih_branch = &hptr->hb_branch;
 
569
                        xt_spinlock_unlock(&handle->ih_lock);
 
570
#ifndef DEBUG
 
571
                        if (i == hptr->hb_ref_count)
 
572
                                break;
 
573
#endif
 
574
                }
 
575
                handle = handle->ih_next;
 
576
        }
 
577
#ifdef DEBUG
 
578
        ASSERT_NS(hptr->hb_ref_count == i);
 
579
#endif
 
580
        /* {HANDLE-COUNT-USAGE}
 
581
         * It is safe to modify cb_handle_count when I have the
 
582
         * list lock, and I have excluded all readers!
 
583
         */
 
584
        iref->ir_block->cb_handle_count = 0;
 
585
#ifdef CHECK_HANDLE_STRUCTS
 
586
        ic_check_handle_structs();
 
587
#endif
 
588
        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
589
 
 
590
        return OK;
 
591
}
 
592
 
 
593
xtPublic void xt_ind_lock_handle(XTIndHandlePtr handle)
 
594
{
 
595
        xt_spinlock_lock(&handle->ih_lock);
 
596
}
 
597
 
 
598
xtPublic void xt_ind_unlock_handle(XTIndHandlePtr handle)
 
599
{
 
600
        xt_spinlock_unlock(&handle->ih_lock);
 
601
}
 
602
 
 
603
/*
 
604
 * -----------------------------------------------------------------------
 
605
 * INIT/EXIT
 
606
 */
 
607
 
 
608
/*
 
609
 * Initialize the disk cache.
 
610
 */
 
611
xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size)
 
612
{
 
613
        XTIndBlockPtr   block;
 
614
 
 
615
#ifdef XT_USE_MYSYS
 
616
        init_key_cache(&my_cache, 1024, cache_size, 100, 300);
 
617
#endif
 
618
        /* Memory is devoted to the page data alone, I no longer count the size of the directory,
 
619
         * or the page overhead: */
 
620
        ind_cac_globals.cg_block_count = cache_size / XT_INDEX_PAGE_SIZE;
 
621
        ind_cac_globals.cg_hash_size = ind_cac_globals.cg_block_count / (IDX_CAC_SEGMENT_COUNT >> 1);
 
622
        ind_cac_globals.cg_max_free = ind_cac_globals.cg_block_count / 10;
 
623
        if (ind_cac_globals.cg_max_free < 8)
 
624
                ind_cac_globals.cg_max_free = 8;
 
625
        if (ind_cac_globals.cg_max_free > 128)
 
626
                ind_cac_globals.cg_max_free = 128;
 
627
 
 
628
        try_(a) {
 
629
                for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
 
630
                        ind_cac_globals.cg_segment[i].cs_hash_table = (XTIndBlockPtr *) xt_calloc(self, ind_cac_globals.cg_hash_size * sizeof(XTIndBlockPtr));
 
631
                        IDX_CAC_INIT_LOCK(self, &ind_cac_globals.cg_segment[i]);
 
632
                }
 
633
 
 
634
                block = (XTIndBlockPtr) xt_malloc(self, ind_cac_globals.cg_block_count * sizeof(XTIndBlockRec));
 
635
                ind_cac_globals.cg_blocks = block;
 
636
                xt_init_mutex_with_autoname(self, &ind_cac_globals.cg_lock);
 
637
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
638
                xtWord1 *buffer;
 
639
#ifdef XT_WIN
 
640
                size_t  psize = 512;
 
641
#else
 
642
                size_t  psize = getpagesize();
 
643
#endif
 
644
                size_t  diff;
 
645
 
 
646
                buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE));
 
647
                diff = (size_t) buffer % psize;
 
648
                if (diff != 0) {
 
649
                        xt_free(self, buffer);
 
650
                        buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE) + psize);
 
651
                        diff = (size_t) buffer % psize;
 
652
                        if (diff != 0)
 
653
                                diff = psize - diff;
 
654
                }
 
655
                ind_cac_globals.cg_buffer = buffer;
 
656
                buffer += diff;
 
657
#endif
 
658
 
 
659
                for (u_int i=0; i<ind_cac_globals.cg_block_count; i++) {
 
660
                        XT_IPAGE_INIT_LOCK(self, &block->cb_lock);
 
661
                        block->cb_state = IDX_CAC_BLOCK_FREE;
 
662
                        block->cb_next = ind_cac_globals.cg_free_list;
 
663
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
664
                        block->cb_data = buffer;
 
665
                        buffer += XT_INDEX_PAGE_SIZE;
 
666
#endif
 
667
                        ind_cac_globals.cg_free_list = block;
 
668
                        block++;
 
669
                }
 
670
                ind_cac_globals.cg_free_count = ind_cac_globals.cg_block_count;
 
671
#ifdef DEBUG_CHECK_IND_CACHE
 
672
                ind_cac_globals.cg_reserved_by_ots = 0;
 
673
#endif
 
674
                ind_handle_init(self);
 
675
        }
 
676
        catch_(a) {
 
677
                xt_ind_exit(self);
 
678
                throw_();
 
679
        }
 
680
        cont_(a);
 
681
}
 
682
 
 
683
xtPublic void xt_ind_exit(XTThreadPtr self)
 
684
{
 
685
#ifdef XT_USE_MYSYS
 
686
        end_key_cache(&my_cache, 1);
 
687
#endif
 
688
        for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
 
689
                if (ind_cac_globals.cg_segment[i].cs_hash_table) {
 
690
                        xt_free(self, ind_cac_globals.cg_segment[i].cs_hash_table);
 
691
                        ind_cac_globals.cg_segment[i].cs_hash_table = NULL;
 
692
                        IDX_CAC_FREE_LOCK(self, &ind_cac_globals.cg_segment[i]);
 
693
                }
 
694
        }
 
695
 
 
696
        /* Must be done before freeing the blocks! */
 
697
        ind_handle_exit(self);
 
698
 
 
699
        if (ind_cac_globals.cg_blocks) {
 
700
                xt_free(self, ind_cac_globals.cg_blocks);
 
701
                ind_cac_globals.cg_blocks = NULL;
 
702
                xt_free_mutex(&ind_cac_globals.cg_lock);
 
703
        }
 
704
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
705
        if (ind_cac_globals.cg_buffer) {
 
706
                xt_free(self, ind_cac_globals.cg_buffer);
 
707
                ind_cac_globals.cg_buffer = NULL;
 
708
        }
 
709
#endif
 
710
 
 
711
        memset(&ind_cac_globals, 0, sizeof(ind_cac_globals));
 
712
}
 
713
 
 
714
xtPublic xtInt8 xt_ind_get_usage()
 
715
{
 
716
        xtInt8 size = 0;
 
717
 
 
718
        size = (xtInt8) (ind_cac_globals.cg_block_count - ind_cac_globals.cg_free_count) * (xtInt8) XT_INDEX_PAGE_SIZE;
 
719
        return size;
 
720
}
 
721
 
 
722
xtPublic xtInt8 xt_ind_get_size()
 
723
{
 
724
        xtInt8 size = 0;
 
725
 
 
726
        size = (xtInt8) ind_cac_globals.cg_block_count * (xtInt8) XT_INDEX_PAGE_SIZE;
 
727
        return size;
 
728
}
 
729
 
 
730
xtPublic u_int xt_ind_get_blocks()
 
731
{
 
732
        return ind_cac_globals.cg_block_count;
 
733
}
 
734
 
 
735
/*
 
736
 * -----------------------------------------------------------------------
 
737
 * INDEX CHECKING
 
738
 */
 
739
 
 
740
xtPublic void xt_ind_check_cache(XTIndexPtr ind)
 
741
{
 
742
        XTIndBlockPtr   block;
 
743
        u_int                   free_count, inuse_count, clean_count;
 
744
        xtBool                  check_count = FALSE;
 
745
 
 
746
        if (ind == (XTIndex *) 1) {
 
747
                ind = NULL;
 
748
                check_count = TRUE;
 
749
        }
 
750
 
 
751
        // Check the dirty list:
 
752
        if (ind) {
 
753
                u_int cnt = 0;
 
754
 
 
755
                block = ind->mi_dirty_list;
 
756
                while (block) {
 
757
                        cnt++;
 
758
                        ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_DIRTY);
 
759
                        block = block->cb_dirty_next;
 
760
                }
 
761
                ASSERT_NS(ind->mi_dirty_blocks == cnt);
 
762
        }
 
763
 
 
764
        xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
 
765
 
 
766
        // Check the free list:
 
767
        free_count = 0;
 
768
        block = ind_cac_globals.cg_free_list;
 
769
        while (block) {
 
770
                free_count++;
 
771
                ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
 
772
                block = block->cb_next;
 
773
        }
 
774
        ASSERT_NS(ind_cac_globals.cg_free_count == free_count);
 
775
 
 
776
        /* Check the LRU list: */
 
777
        XTIndBlockPtr list_block, plist_block;
 
778
        
 
779
        plist_block = NULL;
 
780
        list_block = ind_cac_globals.cg_lru_block;
 
781
        if (list_block) {
 
782
                ASSERT_NS(ind_cac_globals.cg_mru_block != NULL);
 
783
                ASSERT_NS(ind_cac_globals.cg_mru_block->cb_mr_used == NULL);
 
784
                ASSERT_NS(list_block->cb_lr_used == NULL);
 
785
                inuse_count = 0;
 
786
                clean_count = 0;
 
787
                while (list_block) {
 
788
                        inuse_count++;
 
789
                        ASSERT_NS(IDX_CAC_NOT_FREE(list_block->cb_state));
 
790
                        if (list_block->cb_state == IDX_CAC_BLOCK_CLEAN)
 
791
                                clean_count++;
 
792
                        ASSERT_NS(block != list_block);
 
793
                        ASSERT_NS(list_block->cb_lr_used == plist_block);
 
794
                        plist_block = list_block;
 
795
                        list_block = list_block->cb_mr_used;
 
796
                }
 
797
                ASSERT_NS(ind_cac_globals.cg_mru_block == plist_block);
 
798
        }
 
799
        else {
 
800
                inuse_count = 0;
 
801
                clean_count = 0;
 
802
                ASSERT_NS(ind_cac_globals.cg_mru_block == NULL);
 
803
        }
 
804
 
 
805
#ifdef DEBUG_CHECK_IND_CACHE
 
806
        ASSERT_NS(free_count + inuse_count + ind_cac_globals.cg_reserved_by_ots + ind_cac_globals.cg_read_count == ind_cac_globals.cg_block_count);
 
807
#endif
 
808
        xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
 
809
        if (check_count) {
 
810
                /* We have just flushed, check how much is now free/clean. */
 
811
                if (free_count + clean_count < 10) {
 
812
                        /* This could be a problem: */
 
813
                        printf("Cache very low!\n");
 
814
                }
 
815
        }
 
816
}
 
817
 
 
818
/*
 
819
 * -----------------------------------------------------------------------
 
820
 * FREEING INDEX CACHE
 
821
 */
 
822
 
 
823
/*
 
824
 * This function return TRUE if the block is freed. 
 
825
 * This function returns FALSE if the block cannot be found, or the
 
826
 * block is not clean.
 
827
 *
 
828
 * We also return FALSE if we cannot copy the block to the handle
 
829
 * (if this is required). This will be due to out-of-memory!
 
830
 */
 
831
static xtBool ind_free_block(XTOpenTablePtr ot, XTIndBlockPtr block)
 
832
{
 
833
        XTIndBlockPtr   xblock, pxblock;
 
834
        u_int                   hash_idx;
 
835
        u_int                   file_id;
 
836
        xtIndexNodeID   address;
 
837
        DcSegmentPtr    seg;
 
838
 
 
839
        (void) ot; /*DRIZZLED*/
 
840
 
 
841
#ifdef DEBUG_CHECK_IND_CACHE
 
842
        xt_ind_check_cache(NULL);
 
843
#endif
 
844
        file_id = block->cb_file_id;
 
845
        address = block->cb_address;
 
846
 
 
847
        hash_idx = XT_NODE_ID(address) + (file_id * 223);
 
848
        seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
 
849
        hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
 
850
 
 
851
        IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
 
852
 
 
853
        pxblock = NULL;
 
854
        xblock = seg->cs_hash_table[hash_idx];
 
855
        while (xblock) {
 
856
                if (block == xblock) {
 
857
                        /* Found the block... */
 
858
                        /* It is possible that a thread enters this code holding a
 
859
                         * lock on a page. This can cause a deadlock:
 
860
                         *
 
861
                         * #0   0x91faa2ce in semaphore_wait_signal_trap
 
862
                         * #1   0x91fb1da5 in pthread_mutex_lock
 
863
                         * #2   0x00e2ec13 in xt_p_mutex_lock at pthread_xt.cc:544
 
864
                         * #3   0x00e6c30a in xt_xsmutex_xlock at lock_xt.cc:1547
 
865
                         * #4   0x00dee402 in ind_free_block at cache_xt.cc:879
 
866
                         * #5   0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
 
867
                         * #6   0x00def8d1 in xt_ind_reserve at cache_xt.cc:1513
 
868
                         * #7   0x00e22118 in xt_idx_insert at index_xt.cc:2047
 
869
                         * #8   0x00e4d7ee in xt_tab_new_record at table_xt.cc:4702
 
870
                         * #9   0x00e0ff0b in ha_pbxt::write_row at ha_pbxt.cc:2340
 
871
                         * #10  0x0023a00f in handler::ha_write_row at handler.cc:4570
 
872
                         * #11  0x001a32c8 in write_record at sql_insert.cc:1568
 
873
                         * #12  0x001ab635 in mysql_insert at sql_insert.cc:812
 
874
                         * #13  0x0010e068 in mysql_execute_command at sql_parse.cc:3066
 
875
                         * #14  0x0011480d in mysql_parse at sql_parse.cc:5787
 
876
                         * #15  0x00115afb in dispatch_command at sql_parse.cc:1200
 
877
                         * #16  0x00116de2 in do_command at sql_parse.cc:857
 
878
                         * #17  0x00101ee4 in handle_one_connection at sql_connect.cc:1115
 
879
                         * #18  0x91fdb155 in _pthread_start
 
880
                         * #19  0x91fdb012 in thread_start
 
881
                         * 
 
882
                         * #0   0x91fb146e in __semwait_signal
 
883
                         * #1   0x91fb12ef in nanosleep$UNIX2003
 
884
                         * #2   0x91fb1236 in usleep$UNIX2003
 
885
                         * #3   0x00e52112 in xt_yield at thread_xt.cc:1274
 
886
                         * #4   0x00e6c0eb in xt_spinxslock_xlock at lock_xt.cc:1456
 
887
                         * #5   0x00dee444 in ind_free_block at cache_xt.cc:886
 
888
                         * #6   0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
 
889
                         * #7   0x00deeaf0 in ind_cac_fetch at cache_xt.cc:1130
 
890
                         * #8   0x00def604 in xt_ind_fetch at cache_xt.cc:1386
 
891
                         * #9   0x00e2159a in xt_idx_update_row_id at index_xt.cc:2489
 
892
                         * #10  0x00e603c8 in xn_sw_clean_indices at xaction_xt.cc:1932
 
893
                         * #11  0x00e606d4 in xn_sw_cleanup_variation at xaction_xt.cc:2056
 
894
                         * #12  0x00e60e29 in xn_sw_cleanup_xact at xaction_xt.cc:2276
 
895
                         * #13  0x00e615ed in xn_sw_main at xaction_xt.cc:2433
 
896
                         * #14  0x00e61919 in xn_sw_run_thread at xaction_xt.cc:2564
 
897
                         * #15  0x00e53f80 in thr_main at thread_xt.cc:1017
 
898
                         * #16  0x91fdb155 in _pthread_start
 
899
                         * #17  0x91fdb012 in thread_start
 
900
                         *
 
901
                         * So we back off if a lock is held!
 
902
                         */
 
903
                        if (!XT_IPAGE_WRITE_TRY_LOCK(&block->cb_lock, ot->ot_thread->t_id)) {
 
904
                                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
905
#ifdef DEBUG_CHECK_IND_CACHE
 
906
                                xt_ind_check_cache(NULL);
 
907
#endif
 
908
                                return FALSE;
 
909
                        }
 
910
                        if (block->cb_state != IDX_CAC_BLOCK_CLEAN) {
 
911
                                /* This block cannot be freeed: */
 
912
                                XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
913
                                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
914
#ifdef DEBUG_CHECK_IND_CACHE
 
915
                                xt_ind_check_cache(NULL);
 
916
#endif
 
917
                                return FALSE;
 
918
                        }
 
919
                        
 
920
                        goto free_the_block;
 
921
                }
 
922
                pxblock = xblock;
 
923
                xblock = xblock->cb_next;
 
924
        }
 
925
 
 
926
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
927
 
 
928
        /* Not found (this can happen, if block was freed by another thread) */
 
929
#ifdef DEBUG_CHECK_IND_CACHE
 
930
        xt_ind_check_cache(NULL);
 
931
#endif
 
932
        return FALSE;
 
933
 
 
934
        free_the_block:
 
935
 
 
936
        /* If the block is reference by a handle, then we
 
937
         * have to copy the data to the handle before we
 
938
         * free the page:
 
939
         */
 
940
        /* {HANDLE-COUNT-USAGE}
 
941
         * This access is safe because:
 
942
         *
 
943
         * We have an Xlock on the cache block, which excludes
 
944
         * all other writers that want to change the cache block
 
945
         * and also all readers of the cache block, because
 
946
         * they all have at least an Slock on the cache block.
 
947
         */
 
948
        if (block->cb_handle_count) {
 
949
                XTIndReferenceRec       iref;
 
950
                
 
951
                iref.ir_xlock = TRUE;
 
952
                iref.ir_updated = FALSE;
 
953
                iref.ir_block = block;
 
954
                iref.ir_branch = (XTIdxBranchDPtr) block->cb_data;
 
955
                if (!xt_ind_copy_on_write(&iref)) {
 
956
                        XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
957
                        return FALSE;
 
958
                }
 
959
        }
 
960
 
 
961
        /* Block is clean, remove from the hash table: */
 
962
        if (pxblock)
 
963
                pxblock->cb_next = block->cb_next;
 
964
        else
 
965
                seg->cs_hash_table[hash_idx] = block->cb_next;
 
966
 
 
967
        xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
 
968
 
 
969
        /* Remove from the MRU list: */
 
970
        if (ind_cac_globals.cg_lru_block == block)
 
971
                ind_cac_globals.cg_lru_block = block->cb_mr_used;
 
972
        if (ind_cac_globals.cg_mru_block == block)
 
973
                ind_cac_globals.cg_mru_block = block->cb_lr_used;
 
974
        
 
975
        /* Note, I am updating blocks for which I have no lock
 
976
         * here. But I think this is OK because I have a lock
 
977
         * for the MRU list.
 
978
         */
 
979
        if (block->cb_lr_used)
 
980
                block->cb_lr_used->cb_mr_used = block->cb_mr_used;
 
981
        if (block->cb_mr_used)
 
982
                block->cb_mr_used->cb_lr_used = block->cb_lr_used;
 
983
 
 
984
        /* The block is now free: */
 
985
        block->cb_next = ind_cac_globals.cg_free_list;
 
986
        ind_cac_globals.cg_free_list = block;
 
987
        ind_cac_globals.cg_free_count++;
 
988
        ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
989
        block->cb_state = IDX_CAC_BLOCK_FREE;
 
990
        IDX_TRACE("%d- f%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(block->cb_data));
 
991
 
 
992
        /* Unlock BEFORE the block is reused! */
 
993
        XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
994
 
 
995
        xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
 
996
 
 
997
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
998
 
 
999
#ifdef DEBUG_CHECK_IND_CACHE
 
1000
        xt_ind_check_cache(NULL);
 
1001
#endif
 
1002
        return TRUE;
 
1003
}
 
1004
 
 
1005
#define IND_CACHE_MAX_BLOCKS_TO_FREE            100
 
1006
 
 
1007
/*
 
1008
 * Return the number of blocks freed.
 
1009
 *
 
1010
 * The idea is to grab a list of blocks to free.
 
1011
 * The list consists of the LRU blocks that are
 
1012
 * clean.
 
1013
 *
 
1014
 * Free as many as possible (up to max of blocks_required)
 
1015
 * from the list, even if LRU position has changed
 
1016
 * (or we have a race if there are too few blocks).
 
1017
 * However, if the block cannot be found, or is dirty
 
1018
 * we must skip it.
 
1019
 *
 
1020
 * Repeat until we find no blocks for the list, or
 
1021
 * we have freed 'blocks_required'.
 
1022
 *
 
1023
 * 'not_this' is a block that must not be freed because
 
1024
 * it is locked by the calling thread!
 
1025
 */
 
1026
static u_int ind_cac_free_lru_blocks(XTOpenTablePtr ot, u_int blocks_required, XTIdxBranchDPtr not_this)
 
1027
{
 
1028
        register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1029
        XTIndBlockPtr                   to_free[IND_CACHE_MAX_BLOCKS_TO_FREE];
 
1030
        int                                             count;
 
1031
        XTIndBlockPtr                   block;
 
1032
        u_int                                   blocks_freed = 0;
 
1033
        XTIndBlockPtr                   locked_block;
 
1034
 
 
1035
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
1036
#error This will not work!
 
1037
#endif
 
1038
        locked_block = (XTIndBlockPtr) ((xtWord1 *) not_this - offsetof(XTIndBlockRec, cb_data));
 
1039
 
 
1040
        retry:
 
1041
        xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
 
1042
        block = dcg->cg_lru_block;
 
1043
        count = 0;
 
1044
        while (block && count < IND_CACHE_MAX_BLOCKS_TO_FREE) {
 
1045
                if (block != locked_block && block->cb_state == IDX_CAC_BLOCK_CLEAN) {
 
1046
                        to_free[count] = block;
 
1047
                        count++;
 
1048
                }
 
1049
                block = block->cb_mr_used;
 
1050
        }
 
1051
        xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
 
1052
 
 
1053
        if (!count)
 
1054
                return blocks_freed;
 
1055
 
 
1056
        for (int i=0; i<count; i++) {
 
1057
                if (ind_free_block(ot, to_free[i]))
 
1058
                        blocks_freed++;
 
1059
                if (blocks_freed >= blocks_required &&
 
1060
                        ind_cac_globals.cg_free_count >= ind_cac_globals.cg_max_free + blocks_required)
 
1061
                return blocks_freed;
 
1062
        }
 
1063
 
 
1064
        goto retry;
 
1065
}
 
1066
 
 
1067
/*
 
1068
 * -----------------------------------------------------------------------
 
1069
 * MAIN CACHE FUNCTIONS
 
1070
 */
 
1071
 
 
1072
/*
 
1073
 * Fetch the block. Note, if we are about to write the block
 
1074
 * then there is no need to read it from disk!
 
1075
 */
 
1076
static XTIndBlockPtr ind_cac_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, DcSegmentPtr *ret_seg, xtBool read_data)
 
1077
{
 
1078
        register XTOpenFilePtr  file = ot->ot_ind_file;
 
1079
        register XTIndBlockPtr  block, new_block;
 
1080
        register DcSegmentPtr   seg;
 
1081
        register u_int                  hash_idx;
 
1082
        register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1083
        size_t                                  red_size;
 
1084
 
 
1085
#ifdef DEBUG_CHECK_IND_CACHE
 
1086
        xt_ind_check_cache(NULL);
 
1087
#endif
 
1088
        /* Address, plus file ID multiplied by my favorite prime number! */
 
1089
        hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
 
1090
        seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
 
1091
        hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
 
1092
 
 
1093
        IDX_CAC_READ_LOCK(seg, ot->ot_thread);
 
1094
        block = seg->cs_hash_table[hash_idx];
 
1095
        while (block) {
 
1096
                if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
 
1097
                        ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
 
1098
 
 
1099
                        /* Check how recently this page has been used: */
 
1100
                        if (XT_TIME_DIFF(block->cb_ru_time, dcg->cg_ru_now) > (dcg->cg_block_count >> 1)) {
 
1101
                                xt_lock_mutex_ns(&dcg->cg_lock);
 
1102
 
 
1103
                                /* Move to the front of the MRU list: */
 
1104
                                block->cb_ru_time = ++dcg->cg_ru_now;
 
1105
                                if (dcg->cg_mru_block != block) {
 
1106
                                        /* Remove from the MRU list: */
 
1107
                                        if (dcg->cg_lru_block == block)
 
1108
                                                dcg->cg_lru_block = block->cb_mr_used;
 
1109
                                        if (block->cb_lr_used)
 
1110
                                                block->cb_lr_used->cb_mr_used = block->cb_mr_used;
 
1111
                                        if (block->cb_mr_used)
 
1112
                                                block->cb_mr_used->cb_lr_used = block->cb_lr_used;
 
1113
 
 
1114
                                        /* Make the block the most recently used: */
 
1115
                                        if ((block->cb_lr_used = dcg->cg_mru_block))
 
1116
                                                dcg->cg_mru_block->cb_mr_used = block;
 
1117
                                        block->cb_mr_used = NULL;
 
1118
                                        dcg->cg_mru_block = block;
 
1119
                                        if (!dcg->cg_lru_block)
 
1120
                                                dcg->cg_lru_block = block;
 
1121
                                }
 
1122
 
 
1123
                                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1124
                        }
 
1125
                
 
1126
                        *ret_seg = seg;
 
1127
#ifdef DEBUG_CHECK_IND_CACHE
 
1128
                        xt_ind_check_cache(NULL);
 
1129
#endif
 
1130
                        ot->ot_thread->st_statistics.st_ind_cache_hit++;
 
1131
                        return block;
 
1132
                }
 
1133
                block = block->cb_next;
 
1134
        }
 
1135
        
 
1136
        /* Block not found... */
 
1137
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1138
 
 
1139
        /* Check the open table reserve list first: */
 
1140
        if ((new_block = ot->ot_ind_res_bufs)) {
 
1141
                ot->ot_ind_res_bufs = new_block->cb_next;
 
1142
                ot->ot_ind_res_count--;
 
1143
#ifdef DEBUG_CHECK_IND_CACHE
 
1144
                xt_lock_mutex_ns(&dcg->cg_lock);
 
1145
                dcg->cg_reserved_by_ots--;
 
1146
                dcg->cg_read_count++;
 
1147
                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1148
#endif
 
1149
                goto use_free_block;
 
1150
        }
 
1151
 
 
1152
        free_some_blocks:
 
1153
        if (!dcg->cg_free_list) {
 
1154
                if (!ind_cac_free_lru_blocks(ot, 1, NULL)) {
 
1155
                        if (!dcg->cg_free_list) {
 
1156
                                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
 
1157
#ifdef DEBUG_CHECK_IND_CACHE
 
1158
                                xt_ind_check_cache(NULL);
 
1159
#endif
 
1160
                                return NULL;
 
1161
                        }
 
1162
                }
 
1163
        }
 
1164
 
 
1165
        /* Get a free block: */
 
1166
        xt_lock_mutex_ns(&dcg->cg_lock);
 
1167
        if (!(new_block = dcg->cg_free_list)) {
 
1168
                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1169
                goto free_some_blocks;
 
1170
        }
 
1171
        ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
 
1172
        dcg->cg_free_list = new_block->cb_next;
 
1173
        dcg->cg_free_count--;
 
1174
#ifdef DEBUG_CHECK_IND_CACHE
 
1175
        dcg->cg_read_count++;
 
1176
#endif
 
1177
        xt_unlock_mutex_ns(&dcg->cg_lock);
 
1178
 
 
1179
        use_free_block:
 
1180
        new_block->cb_address = address;
 
1181
        new_block->cb_file_id = file->fr_id;
 
1182
        ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
 
1183
        new_block->cb_state = IDX_CAC_BLOCK_CLEAN;
 
1184
        new_block->cb_handle_count = 0;
 
1185
        new_block->cp_del_count = 0;
 
1186
        new_block->cb_dirty_next = NULL;
 
1187
        new_block->cb_dirty_prev = NULL;
 
1188
#ifdef IND_OPT_DATA_WRITTEN
 
1189
        new_block->cb_header = FALSE;
 
1190
        new_block->cb_min_pos = 0xFFFF;
 
1191
        new_block->cb_max_pos = 0;
 
1192
#endif
 
1193
 
 
1194
        if (read_data) {
 
1195
                if (!xt_pread_file(file, xt_ind_node_to_offset(ot->ot_table, address), XT_INDEX_PAGE_SIZE, 0, new_block->cb_data, &red_size, &ot->ot_thread->st_statistics.st_ind, ot->ot_thread)) {
 
1196
                        xt_lock_mutex_ns(&dcg->cg_lock);
 
1197
                        new_block->cb_next = dcg->cg_free_list;
 
1198
                        dcg->cg_free_list = new_block;
 
1199
                        dcg->cg_free_count++;
 
1200
#ifdef DEBUG_CHECK_IND_CACHE
 
1201
                        dcg->cg_read_count--;
 
1202
#endif
 
1203
                        ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1204
                        new_block->cb_state = IDX_CAC_BLOCK_FREE;
 
1205
                        IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
 
1206
                        xt_unlock_mutex_ns(&dcg->cg_lock);
 
1207
#ifdef DEBUG_CHECK_IND_CACHE
 
1208
                        xt_ind_check_cache(NULL);
 
1209
#endif
 
1210
                        return NULL;
 
1211
                }
 
1212
                IDX_TRACE("%d- R%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
 
1213
                ot->ot_thread->st_statistics.st_ind_cache_miss++;
 
1214
        }
 
1215
        else
 
1216
                red_size = 0;
 
1217
        // PMC - I don't think this is required! memset(new_block->cb_data + red_size, 0, XT_INDEX_PAGE_SIZE - red_size);
 
1218
 
 
1219
        IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
 
1220
        block = seg->cs_hash_table[hash_idx];
 
1221
        while (block) {
 
1222
                if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
 
1223
                        /* Oops, someone else was faster! */
 
1224
                        xt_lock_mutex_ns(&dcg->cg_lock);
 
1225
                        new_block->cb_next = dcg->cg_free_list;
 
1226
                        dcg->cg_free_list = new_block;
 
1227
                        dcg->cg_free_count++;
 
1228
#ifdef DEBUG_CHECK_IND_CACHE
 
1229
                        dcg->cg_read_count--;
 
1230
#endif
 
1231
                        ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1232
                        new_block->cb_state = IDX_CAC_BLOCK_FREE;
 
1233
                        IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
 
1234
                        xt_unlock_mutex_ns(&dcg->cg_lock);
 
1235
                        goto done_ok;
 
1236
                }
 
1237
                block = block->cb_next;
 
1238
        }
 
1239
        block = new_block;
 
1240
 
 
1241
        /* Make the block the most recently used: */
 
1242
        xt_lock_mutex_ns(&dcg->cg_lock);
 
1243
        block->cb_ru_time = ++dcg->cg_ru_now;
 
1244
        if ((block->cb_lr_used = dcg->cg_mru_block))
 
1245
                dcg->cg_mru_block->cb_mr_used = block;
 
1246
        block->cb_mr_used = NULL;
 
1247
        dcg->cg_mru_block = block;
 
1248
        if (!dcg->cg_lru_block)
 
1249
                dcg->cg_lru_block = block;
 
1250
#ifdef DEBUG_CHECK_IND_CACHE
 
1251
        dcg->cg_read_count--;
 
1252
#endif
 
1253
        xt_unlock_mutex_ns(&dcg->cg_lock);
 
1254
 
 
1255
        /* {LAZY-DEL-INDEX-ITEMS}
 
1256
         * Conditionally count the number of deleted entries in the index:
 
1257
         * We do this before other threads can read the block.
 
1258
         */
 
1259
        if (ind->mi_lazy_delete && read_data)
 
1260
                xt_ind_count_deleted_items(ot->ot_table, ind, block);
 
1261
 
 
1262
        /* Add to the hash table: */
 
1263
        block->cb_next = seg->cs_hash_table[hash_idx];
 
1264
        seg->cs_hash_table[hash_idx] = block;
 
1265
 
 
1266
        done_ok:
 
1267
        *ret_seg = seg;
 
1268
#ifdef DEBUG_CHECK_IND_CACHE
 
1269
        xt_ind_check_cache(NULL);
 
1270
#endif
 
1271
        return block;
 
1272
}
 
1273
 
 
1274
static xtBool ind_cac_get(XTOpenTablePtr ot, xtIndexNodeID address, DcSegmentPtr *ret_seg, XTIndBlockPtr *ret_block)
 
1275
{
 
1276
        register XTOpenFilePtr  file = ot->ot_ind_file;
 
1277
        register XTIndBlockPtr  block;
 
1278
        register DcSegmentPtr   seg;
 
1279
        register u_int                  hash_idx;
 
1280
        register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1281
 
 
1282
        hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
 
1283
        seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
 
1284
        hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
 
1285
 
 
1286
        IDX_CAC_READ_LOCK(seg, ot->ot_thread);
 
1287
        block = seg->cs_hash_table[hash_idx];
 
1288
        while (block) {
 
1289
                if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
 
1290
                        ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
 
1291
 
 
1292
                        *ret_seg = seg;
 
1293
                        *ret_block = block;
 
1294
                        return OK;
 
1295
                }
 
1296
                block = block->cb_next;
 
1297
        }
 
1298
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1299
        
 
1300
        /* Block not found: */
 
1301
        *ret_seg = NULL;
 
1302
        *ret_block = NULL;
 
1303
        return OK;
 
1304
}
 
1305
 
 
1306
xtPublic xtBool xt_ind_write(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
 
1307
{
 
1308
        XTIndBlockPtr   block;
 
1309
        DcSegmentPtr    seg;
 
1310
 
 
1311
        if (!(block = ind_cac_fetch(ot, ind, address, &seg, FALSE)))
 
1312
                return FAILED;
 
1313
 
 
1314
        XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1315
        if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
 
1316
                if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
 
1317
                        XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1318
                        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1319
                        return FAILED;
 
1320
                }
 
1321
        }
 
1322
#ifdef IND_OPT_DATA_WRITTEN
 
1323
        block->cb_header = TRUE;
 
1324
        block->cb_min_pos = 0;
 
1325
        if (size-XT_INDEX_PAGE_HEAD_SIZE > block->cb_max_pos)
 
1326
                block->cb_max_pos = size-XT_INDEX_PAGE_HEAD_SIZE;
 
1327
        ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-XT_INDEX_PAGE_HEAD_SIZE);
 
1328
        ASSERT_NS(block->cb_min_pos < block->cb_max_pos);
 
1329
#endif
 
1330
        ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
 
1331
        memcpy(block->cb_data, data, size);
 
1332
        if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
 
1333
                TRACK_BLOCK_WRITE(offset);
 
1334
                xt_spinlock_lock(&ind->mi_dirty_lock);
 
1335
                if ((block->cb_dirty_next = ind->mi_dirty_list))
 
1336
                        ind->mi_dirty_list->cb_dirty_prev = block;
 
1337
                block->cb_dirty_prev = NULL;
 
1338
                ind->mi_dirty_list = block;
 
1339
                ind->mi_dirty_blocks++;
 
1340
                xt_spinlock_unlock(&ind->mi_dirty_lock);
 
1341
                if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
 
1342
                        ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1343
                        ot->ot_thread->st_statistics.st_ind_cache_dirty++;
 
1344
                }
 
1345
                block->cb_state = IDX_CAC_BLOCK_DIRTY;
 
1346
        }
 
1347
        XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1348
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1349
#ifdef XT_TRACK_INDEX_UPDATES
 
1350
        ot->ot_ind_changed++;
 
1351
#endif
 
1352
        return OK;
 
1353
}
 
1354
 
 
1355
/*
 
1356
 * Update the cache, if in RAM.
 
1357
 */
 
1358
xtPublic xtBool xt_ind_write_cache(XTOpenTablePtr ot, xtIndexNodeID address, size_t size, xtWord1 *data)
 
1359
{
 
1360
        XTIndBlockPtr   block;
 
1361
        DcSegmentPtr    seg;
 
1362
 
 
1363
        if (!ind_cac_get(ot, address, &seg, &block))
 
1364
                return FAILED;
 
1365
 
 
1366
        if (block) {
 
1367
                XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1368
                /* This should only be done to pages that are free, which
 
1369
                 * are not on the dirty list, so they must be clean!
 
1370
                 */
 
1371
                ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1372
                memcpy(block->cb_data, data, size);
 
1373
 
 
1374
                XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1375
                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1376
        }
 
1377
 
 
1378
        return OK;
 
1379
}
 
1380
 
 
1381
xtPublic xtBool xt_ind_get(XTOpenTablePtr ot, xtIndexNodeID address, XTIndReferencePtr iref)
 
1382
{
 
1383
        XTIndBlockPtr   block;
 
1384
        DcSegmentPtr    seg;
 
1385
 
 
1386
        if (!ind_cac_get(ot, address, &seg, &block))
 
1387
                return FAILED;
 
1388
 
 
1389
        if (block) {
 
1390
                XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1391
                ASSERT_NS(IDX_CAC_NOT_FREE(block->cb_state));
 
1392
                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1393
                iref->ir_block = block;
 
1394
                iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
 
1395
        }
 
1396
        else {
 
1397
                iref->ir_block = NULL;
 
1398
                iref->ir_branch = NULL;
 
1399
        }
 
1400
        iref->ir_xlock = TRUE;
 
1401
        iref->ir_updated = FALSE;
 
1402
 
 
1403
        return OK;
 
1404
}
 
1405
 
 
1406
/* 
 
1407
 * Note, this function may only be called if the block has
 
1408
 * been freed.
 
1409
 */
 
1410
xtPublic xtBool xt_ind_free_block(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address)
 
1411
{
 
1412
        XTIndBlockPtr   block;
 
1413
        DcSegmentPtr    seg;
 
1414
 
 
1415
        if (!ind_cac_get(ot, address, &seg, &block))
 
1416
                return FAILED;
 
1417
        if (block) {
 
1418
                XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1419
 
 
1420
                if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
 
1421
                        if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
 
1422
                                XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1423
                                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1424
                                return FAILED;
 
1425
                        }
 
1426
                }
 
1427
 
 
1428
                /* {PAGE-NO-IN-INDEX-FILE}
 
1429
                 * This is the one exeption to the rule that a block
 
1430
                 * that is in the IDX_CAC_BLOCK_LOGGED may be released
 
1431
                 * from the cache!
 
1432
                 */
 
1433
                ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
 
1434
 
 
1435
                if (block->cb_state == IDX_CAC_BLOCK_DIRTY) {
 
1436
                        /* Take the block off the dirty list: */
 
1437
                        xt_spinlock_lock(&ind->mi_dirty_lock);
 
1438
                        if (block->cb_dirty_next)
 
1439
                                block->cb_dirty_next->cb_dirty_prev = block->cb_dirty_prev;
 
1440
                        if (block->cb_dirty_prev)
 
1441
                                block->cb_dirty_prev->cb_dirty_next = block->cb_dirty_next;
 
1442
                        if (ind->mi_dirty_list == block)
 
1443
                                ind->mi_dirty_list = block->cb_dirty_next;
 
1444
                        ind->mi_dirty_blocks--;
 
1445
                        xt_spinlock_unlock(&ind->mi_dirty_lock);
 
1446
                        block->cb_state = IDX_CAC_BLOCK_CLEAN;
 
1447
                        ot->ot_thread->st_statistics.st_ind_cache_dirty--;
 
1448
#ifdef IND_OPT_DATA_WRITTEN
 
1449
                        block->cb_header = FALSE;
 
1450
                        block->cb_min_pos = 0xFFFF;
 
1451
                        block->cb_max_pos = 0;
 
1452
#endif
 
1453
                }
 
1454
                XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1455
 
 
1456
                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1457
        }
 
1458
 
 
1459
        return OK;
 
1460
}
 
1461
 
 
1462
xtPublic xtBool xt_ind_read_bytes(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
 
1463
{
 
1464
        XTIndBlockPtr   block;
 
1465
        DcSegmentPtr    seg;
 
1466
 
 
1467
        if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
 
1468
                return FAILED;
 
1469
 
 
1470
        XT_IPAGE_READ_LOCK(&block->cb_lock);
 
1471
        memcpy(data, block->cb_data, size);
 
1472
        XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
 
1473
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1474
        return OK;
 
1475
}
 
1476
 
 
1477
xtPublic xtBool xt_ind_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, XTPageLockType ltype, XTIndReferencePtr iref)
 
1478
{
 
1479
        register XTIndBlockPtr  block;
 
1480
        DcSegmentPtr                    seg;
 
1481
        xtWord2                                 branch_size;
 
1482
        u_int                                   rec_size;
 
1483
        xtBool                                  xlock = FALSE;
 
1484
 
 
1485
#ifdef DEBUG
 
1486
        ASSERT_NS(iref->ir_xlock == 2);
 
1487
        ASSERT_NS(iref->ir_xlock == 2);
 
1488
#endif
 
1489
        if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
 
1490
                return FAILED;
 
1491
 
 
1492
        branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
 
1493
        rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
 
1494
        if (rec_size < 2 || rec_size > XT_INDEX_PAGE_SIZE)
 
1495
                goto failed_corrupt;
 
1496
        if (ind->mi_fix_key) {
 
1497
                rec_size -= 2;
 
1498
                if (XT_IS_NODE(branch_size)) {
 
1499
                        if (rec_size != 0) {
 
1500
                                if (rec_size < XT_NODE_REF_SIZE)
 
1501
                                        goto failed_corrupt;
 
1502
                                rec_size -= XT_NODE_REF_SIZE;
 
1503
                                if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE + XT_NODE_REF_SIZE)) != 0)
 
1504
                                        goto failed_corrupt;
 
1505
                        }
 
1506
                }
 
1507
                else {
 
1508
                        if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE)) != 0)
 
1509
                                goto failed_corrupt;
 
1510
                }
 
1511
        }
 
1512
 
 
1513
        switch (ltype) {
 
1514
                case XT_LOCK_READ:
 
1515
                        break;
 
1516
                case XT_LOCK_WRITE:
 
1517
                        xlock = TRUE;
 
1518
                        break;
 
1519
                case XT_XLOCK_LEAF:
 
1520
                        if (!XT_IS_NODE(branch_size))
 
1521
                                xlock = TRUE;
 
1522
                        break;
 
1523
                case XT_XLOCK_DEL_LEAF:
 
1524
                        if (!XT_IS_NODE(branch_size)) {
 
1525
                                if (ot->ot_table->tab_dic.dic_no_lazy_delete)
 
1526
                                        xlock = TRUE;
 
1527
                                else {
 
1528
                                        /*
 
1529
                                         * {LAZY-DEL-INDEX-ITEMS}
 
1530
                                         *
 
1531
                                         * We are fetch a page for delete purpose.
 
1532
                                         * we decide here if we plan to do a lazy delete,
 
1533
                                         * Or if we plan to compact the node.
 
1534
                                         *
 
1535
                                         * A lazy delete just requires a shared lock.
 
1536
                                         *
 
1537
                                         */
 
1538
                                        if (ind->mi_lazy_delete) {
 
1539
                                                /* If the number of deleted items is greater than
 
1540
                                                 * half of the number of times that can fit in the
 
1541
                                                 * page, then we will compact the node.
 
1542
                                                 */
 
1543
                                                if (!xt_idx_lazy_delete_on_leaf(ind, block, XT_GET_INDEX_BLOCK_LEN(branch_size)))
 
1544
                                                        xlock = TRUE;
 
1545
                                        }
 
1546
                                        else
 
1547
                                                xlock = TRUE;
 
1548
                                }
 
1549
                        }
 
1550
                        break;
 
1551
        }
 
1552
 
 
1553
        if ((iref->ir_xlock = xlock))
 
1554
                XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1555
        else
 
1556
                XT_IPAGE_READ_LOCK(&block->cb_lock);
 
1557
 
 
1558
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1559
 
 
1560
        /* {DIRECT-IO}
 
1561
         * Direct I/O requires that the buffer is 512 byte aligned.
 
1562
         * To do this, cb_data is turned into a pointer, instead
 
1563
         * of an array.
 
1564
         * As a result, we need to pass a pointer to both the
 
1565
         * cache block and the cache block data:
 
1566
         */
 
1567
        iref->ir_updated = FALSE;
 
1568
        iref->ir_block = block;
 
1569
        iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
 
1570
        return OK;
 
1571
 
 
1572
        failed_corrupt:
 
1573
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1574
        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name);
 
1575
        return FAILED;
 
1576
}
 
1577
 
 
1578
xtPublic xtBool xt_ind_release(XTOpenTablePtr ot, XTIndexPtr ind, XTPageUnlockType XT_NDEBUG_UNUSED(utype), XTIndReferencePtr iref)
 
1579
{
 
1580
        register XTIndBlockPtr  block;
 
1581
 
 
1582
        block = iref->ir_block;
 
1583
 
 
1584
#ifdef DEBUG
 
1585
        ASSERT_NS(iref->ir_xlock != 2);
 
1586
        ASSERT_NS(iref->ir_updated != 2);
 
1587
        if (iref->ir_updated)
 
1588
                ASSERT_NS(utype == XT_UNLOCK_R_UPDATE || utype == XT_UNLOCK_W_UPDATE);
 
1589
        else
 
1590
                ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_WRITE);
 
1591
        if (iref->ir_xlock)
 
1592
                ASSERT_NS(utype == XT_UNLOCK_WRITE || utype == XT_UNLOCK_W_UPDATE);
 
1593
        else
 
1594
                ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_R_UPDATE);
 
1595
#endif
 
1596
        if (iref->ir_updated) {
 
1597
#ifdef DEBUG
 
1598
#ifdef IND_OPT_DATA_WRITTEN
 
1599
                xtWord2 branch_size;
 
1600
                u_int   rec_size;
 
1601
 
 
1602
                branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
 
1603
                rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
 
1604
 
 
1605
                ASSERT_NS(block->cb_min_pos <= rec_size-2);
 
1606
                ASSERT_NS(block->cb_min_pos <= block->cb_max_pos);
 
1607
                ASSERT_NS(block->cb_max_pos <= rec_size-2);
 
1608
                ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-2);
 
1609
#endif
 
1610
#endif
 
1611
                /* The page was update: */
 
1612
                ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
 
1613
                if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
 
1614
                        TRACK_BLOCK_WRITE(offset);
 
1615
                        xt_spinlock_lock(&ind->mi_dirty_lock);
 
1616
                        if ((block->cb_dirty_next = ind->mi_dirty_list))
 
1617
                                ind->mi_dirty_list->cb_dirty_prev = block;
 
1618
                        block->cb_dirty_prev = NULL;
 
1619
                        ind->mi_dirty_list = block;
 
1620
                        ind->mi_dirty_blocks++;
 
1621
                        xt_spinlock_unlock(&ind->mi_dirty_lock);
 
1622
                        if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
 
1623
                                ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1624
                                ot->ot_thread->st_statistics.st_ind_cache_dirty++;
 
1625
                        }
 
1626
                        block->cb_state = IDX_CAC_BLOCK_DIRTY;
 
1627
                }
 
1628
        }
 
1629
 
 
1630
        XT_IPAGE_UNLOCK(&block->cb_lock, iref->ir_xlock);
 
1631
#ifdef DEBUG
 
1632
        iref->ir_xlock = 2;
 
1633
        iref->ir_updated = 2;
 
1634
#endif
 
1635
        return OK;
 
1636
}
 
1637
 
 
1638
xtPublic xtBool xt_ind_reserve(XTOpenTablePtr ot, u_int count, XTIdxBranchDPtr not_this)
 
1639
{
 
1640
        register XTIndBlockPtr  block;
 
1641
        register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1642
 
 
1643
#ifdef XT_TRACK_INDEX_UPDATES
 
1644
        ot->ot_ind_reserved = count;
 
1645
        ot->ot_ind_reads = 0;
 
1646
#endif
 
1647
#ifdef DEBUG_CHECK_IND_CACHE
 
1648
        xt_ind_check_cache(NULL);
 
1649
#endif
 
1650
        while (ot->ot_ind_res_count < count) {
 
1651
                if (!dcg->cg_free_list) {
 
1652
                        if (!ind_cac_free_lru_blocks(ot, count - ot->ot_ind_res_count, not_this)) {
 
1653
                                if (!dcg->cg_free_list) {
 
1654
                                        xt_ind_free_reserved(ot);
 
1655
                                        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
 
1656
#ifdef DEBUG_CHECK_IND_CACHE
 
1657
                                        xt_ind_check_cache(NULL);
 
1658
#endif
 
1659
                                        return FAILED;
 
1660
                                }
 
1661
                        }
 
1662
                }
 
1663
 
 
1664
                /* Get a free block: */
 
1665
                xt_lock_mutex_ns(&dcg->cg_lock);
 
1666
                while (ot->ot_ind_res_count < count && (block = dcg->cg_free_list)) {
 
1667
                        ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
 
1668
                        dcg->cg_free_list = block->cb_next;
 
1669
                        dcg->cg_free_count--;
 
1670
                        block->cb_next = ot->ot_ind_res_bufs;
 
1671
                        ot->ot_ind_res_bufs = block;
 
1672
                        ot->ot_ind_res_count++;
 
1673
#ifdef DEBUG_CHECK_IND_CACHE
 
1674
                        dcg->cg_reserved_by_ots++;
 
1675
#endif
 
1676
                }
 
1677
                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1678
        }
 
1679
#ifdef DEBUG_CHECK_IND_CACHE
 
1680
        xt_ind_check_cache(NULL);
 
1681
#endif
 
1682
        return OK;
 
1683
}
 
1684
 
 
1685
xtPublic void xt_ind_free_reserved(XTOpenTablePtr ot)
 
1686
{
 
1687
#ifdef DEBUG_CHECK_IND_CACHE
 
1688
        xt_ind_check_cache(NULL);
 
1689
#endif
 
1690
        if (ot->ot_ind_res_bufs) {
 
1691
                register XTIndBlockPtr  block, fblock;
 
1692
                register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1693
 
 
1694
                xt_lock_mutex_ns(&dcg->cg_lock);
 
1695
                block = ot->ot_ind_res_bufs;
 
1696
                while (block) {
 
1697
                        fblock = block;
 
1698
                        block = block->cb_next;
 
1699
 
 
1700
                        fblock->cb_next = dcg->cg_free_list;
 
1701
                        dcg->cg_free_list = fblock;
 
1702
#ifdef DEBUG_CHECK_IND_CACHE
 
1703
                        dcg->cg_reserved_by_ots--;
 
1704
#endif
 
1705
                        dcg->cg_free_count++;
 
1706
                }
 
1707
                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1708
                ot->ot_ind_res_bufs = NULL;
 
1709
                ot->ot_ind_res_count = 0;
 
1710
        }
 
1711
#ifdef DEBUG_CHECK_IND_CACHE
 
1712
        xt_ind_check_cache(NULL);
 
1713
#endif
 
1714
}
 
1715
 
 
1716
xtPublic void xt_ind_unreserve(XTOpenTablePtr ot)
 
1717
{
 
1718
        if (!ind_cac_globals.cg_free_list)
 
1719
                xt_ind_free_reserved(ot);
 
1720
}
 
1721