~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/pbxt/src/cache_xt.cc

lp:drizzle + pbxt 1.1 + test results

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* Copyright (c) 2005 PrimeBase Technologies GmbH, Germany
 
2
 *
 
3
 * PrimeBase XT
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 
18
 *
 
19
 * 2005-05-24   Paul McCullagh
 
20
 *
 
21
 * H&G2JCtL
 
22
 */
 
23
 
 
24
#include "xt_config.h"
 
25
 
 
26
#ifdef DRIZZLED
 
27
#include <bitset>
 
28
#endif
 
29
 
 
30
#ifndef XT_WIN
 
31
#include <unistd.h>
 
32
#endif
 
33
 
 
34
#include <stdio.h>
 
35
#include <time.h>
 
36
 
 
37
#include "pthread_xt.h"
 
38
#include "thread_xt.h"
 
39
#include "filesys_xt.h"
 
40
#include "cache_xt.h"
 
41
#include "table_xt.h"
 
42
#include "trace_xt.h"
 
43
#include "util_xt.h"
 
44
 
 
45
#define XT_TIME_DIFF(start, now) (\
 
46
        ((xtWord4) (now) < (xtWord4) (start)) ? \
 
47
        ((xtWord4) 0XFFFFFFFF - ((xtWord4) (start) - (xtWord4) (now))) : \
 
48
        ((xtWord4) (now) - (xtWord4) (start)))
 
49
 
 
50
/*
 
51
 * -----------------------------------------------------------------------
 
52
 * D I S K   C A C H E
 
53
 */
 
54
 
 
55
#define IDX_CAC_SEGMENT_COUNT           ((off_t) 1 << XT_INDEX_CACHE_SEGMENT_SHIFTS)
 
56
#define IDX_CAC_SEGMENT_MASK            (IDX_CAC_SEGMENT_COUNT - 1)
 
57
 
 
58
#ifdef XT_NO_ATOMICS
 
59
#define IDX_CAC_USE_PTHREAD_RW
 
60
#else
 
61
//#define IDX_CAC_USE_PTHREAD_RW
 
62
#define IDX_CAC_USE_XSMUTEX
 
63
//#define IDX_USE_SPINXSLOCK
 
64
#endif
 
65
 
 
66
#if defined(IDX_CAC_USE_PTHREAD_RW)
 
67
#define IDX_CAC_LOCK_TYPE                               xt_rwlock_type
 
68
#define IDX_CAC_INIT_LOCK(s, i)                 xt_init_rwlock_with_autoname(s, &(i)->cs_lock)
 
69
#define IDX_CAC_FREE_LOCK(s, i)                 xt_free_rwlock(&(i)->cs_lock)   
 
70
#define IDX_CAC_READ_LOCK(i, o)                 xt_slock_rwlock_ns(&(i)->cs_lock)
 
71
#define IDX_CAC_WRITE_LOCK(i, o)                xt_xlock_rwlock_ns(&(i)->cs_lock)
 
72
#define IDX_CAC_UNLOCK(i, o)                    xt_unlock_rwlock_ns(&(i)->cs_lock)
 
73
#elif defined(IDX_CAC_USE_XSMUTEX)
 
74
#define IDX_CAC_LOCK_TYPE                               XTMutexXSLockRec
 
75
#define IDX_CAC_INIT_LOCK(s, i)                 xt_xsmutex_init_with_autoname(s, &(i)->cs_lock)
 
76
#define IDX_CAC_FREE_LOCK(s, i)                 xt_xsmutex_free(s, &(i)->cs_lock)       
 
77
#define IDX_CAC_READ_LOCK(i, o)                 xt_xsmutex_slock(&(i)->cs_lock, (o)->t_id)
 
78
#define IDX_CAC_WRITE_LOCK(i, o)                xt_xsmutex_xlock(&(i)->cs_lock, (o)->t_id)
 
79
#define IDX_CAC_UNLOCK(i, o)                    xt_xsmutex_unlock(&(i)->cs_lock, (o)->t_id)
 
80
#elif defined(IDX_CAC_USE_SPINXSLOCK)
 
81
#define IDX_CAC_LOCK_TYPE                               XTSpinXSLockRec
 
82
#define IDX_CAC_INIT_LOCK(s, i)                 xt_spinxslock_init_with_autoname(s, &(i)->cs_lock)
 
83
#define IDX_CAC_FREE_LOCK(s, i)                 xt_spinxslock_free(s, &(i)->cs_lock)    
 
84
#define IDX_CAC_READ_LOCK(i, s)                 xt_spinxslock_slock(&(i)->cs_lock, (s)->t_id)
 
85
#define IDX_CAC_WRITE_LOCK(i, s)                xt_spinxslock_xlock(&(i)->cs_lock, FALSE, (s)->t_id)
 
86
#define IDX_CAC_UNLOCK(i, s)                    xt_spinxslock_unlock(&(i)->cs_lock, (s)->t_id)
 
87
#else
 
88
#error Please define the lock type
 
89
#endif
 
90
 
 
91
#ifdef XT_NO_ATOMICS
 
92
#define ID_HANDLE_USE_PTHREAD_RW
 
93
#else
 
94
//#define ID_HANDLE_USE_PTHREAD_RW
 
95
#define ID_HANDLE_USE_SPINLOCK
 
96
#endif
 
97
 
 
98
#if defined(ID_HANDLE_USE_PTHREAD_RW)
 
99
#define ID_HANDLE_LOCK_TYPE                             xt_mutex_type
 
100
#define ID_HANDLE_INIT_LOCK(s, i)               xt_init_mutex_with_autoname(s, i)
 
101
#define ID_HANDLE_FREE_LOCK(s, i)               xt_free_mutex(i)        
 
102
#define ID_HANDLE_LOCK(i)                               xt_lock_mutex_ns(i)
 
103
#define ID_HANDLE_UNLOCK(i)                             xt_unlock_mutex_ns(i)
 
104
#elif defined(ID_HANDLE_USE_SPINLOCK)
 
105
#define ID_HANDLE_LOCK_TYPE                             XTSpinLockRec
 
106
#define ID_HANDLE_INIT_LOCK(s, i)               xt_spinlock_init_with_autoname(s, i)
 
107
#define ID_HANDLE_FREE_LOCK(s, i)               xt_spinlock_free(s, i)  
 
108
#define ID_HANDLE_LOCK(i)                               xt_spinlock_lock(i)
 
109
#define ID_HANDLE_UNLOCK(i)                             xt_spinlock_unlock(i)
 
110
#endif
 
111
 
 
112
#define XT_HANDLE_SLOTS                                 37
 
113
 
 
114
/*
 
115
#ifdef DEBUG
 
116
#define XT_INIT_HANDLE_COUNT                    0
 
117
#define XT_INIT_HANDLE_BLOCKS                   0
 
118
#else
 
119
#define XT_INIT_HANDLE_COUNT                    40
 
120
#define XT_INIT_HANDLE_BLOCKS                   10
 
121
#endif
 
122
*/
 
123
 
 
124
/* A disk cache segment. The cache is divided into a number of segments
 
125
 * to improve concurrency.
 
126
 */
 
127
typedef struct DcSegment {
 
128
        IDX_CAC_LOCK_TYPE       cs_lock;                                                /* The cache segment lock. */
 
129
        XTIndBlockPtr           *cs_hash_table;
 
130
} DcSegmentRec, *DcSegmentPtr;
 
131
 
 
132
typedef struct DcHandleSlot {
 
133
        ID_HANDLE_LOCK_TYPE     hs_handles_lock;
 
134
        XTIndHandleBlockPtr     hs_free_blocks;
 
135
        XTIndHandlePtr          hs_free_handles;
 
136
        XTIndHandlePtr          hs_used_handles;
 
137
} DcHandleSlotRec, *DcHandleSlotPtr;
 
138
 
 
139
typedef struct DcGlobals {
 
140
        xt_mutex_type           cg_lock;                                                /* The public cache lock. */
 
141
        DcSegmentRec            cg_segment[IDX_CAC_SEGMENT_COUNT];
 
142
        XTIndBlockPtr           cg_blocks;
 
143
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
144
        xtWord1                         *cg_buffer;
 
145
#endif
 
146
        XTIndBlockPtr           cg_free_list;
 
147
        xtWord4                         cg_free_count;
 
148
        xtWord4                         cg_ru_now;                                              /* A counter as described by Jim Starkey (my thanks) */
 
149
        XTIndBlockPtr           cg_lru_block;
 
150
        XTIndBlockPtr           cg_mru_block;
 
151
        xtWord4                         cg_hash_size;
 
152
        xtWord4                         cg_block_count;
 
153
        xtWord4                         cg_max_free;
 
154
#ifdef DEBUG_CHECK_IND_CACHE
 
155
        u_int                           cg_reserved_by_ots;                             /* Number of blocks reserved by open tables. */
 
156
        u_int                           cg_read_count;                                  /* Number of blocks being read. */
 
157
#endif
 
158
 
 
159
        /* Index cache handles: */
 
160
        DcHandleSlotRec         cg_handle_slot[XT_HANDLE_SLOTS];
 
161
} DcGlobalsRec;
 
162
 
 
163
static DcGlobalsRec     ind_cac_globals;
 
164
 
 
165
#ifdef XT_USE_MYSYS
 
166
#ifdef xtPublic
 
167
#undef xtPublic
 
168
#endif
 
169
#include "my_global.h"
 
170
#include "my_sys.h"
 
171
#include "keycache.h"
 
172
KEY_CACHE my_cache;
 
173
#undef  pthread_rwlock_rdlock
 
174
#undef  pthread_rwlock_wrlock
 
175
#undef  pthread_rwlock_try_wrlock
 
176
#undef  pthread_rwlock_unlock
 
177
#undef  pthread_mutex_lock
 
178
#undef  pthread_mutex_unlock
 
179
#undef  pthread_cond_wait
 
180
#undef  pthread_cond_broadcast
 
181
#undef  xt_mutex_type
 
182
#define xtPublic
 
183
#endif
 
184
 
 
185
/*
 
186
 * -----------------------------------------------------------------------
 
187
 * INDEX CACHE HANDLES
 
188
 */
 
189
 
 
190
static XTIndHandlePtr ind_alloc_handle()
 
191
{
 
192
        XTIndHandlePtr handle;
 
193
 
 
194
        if (!(handle = (XTIndHandlePtr) xt_calloc_ns(sizeof(XTIndHandleRec))))
 
195
                return NULL;
 
196
        xt_spinlock_init_with_autoname(NULL, &handle->ih_lock);
 
197
        return handle;
 
198
}
 
199
 
 
200
static void ind_free_handle(XTIndHandlePtr handle)
 
201
{
 
202
        xt_spinlock_free(NULL, &handle->ih_lock);
 
203
        xt_free_ns(handle);
 
204
}
 
205
 
 
206
static void ind_handle_exit(XTThreadPtr self)
 
207
{
 
208
        DcHandleSlotPtr         hs;
 
209
        XTIndHandlePtr          handle;
 
210
        XTIndHandleBlockPtr     hptr;
 
211
 
 
212
        for (int i=0; i<XT_HANDLE_SLOTS; i++) {
 
213
                hs = &ind_cac_globals.cg_handle_slot[i];
 
214
 
 
215
                while (hs->hs_used_handles) {
 
216
                        handle = hs->hs_used_handles;
 
217
                        xt_ind_release_handle(handle, FALSE, self);
 
218
                }
 
219
 
 
220
                while (hs->hs_free_blocks) {
 
221
                        hptr = hs->hs_free_blocks;
 
222
                        hs->hs_free_blocks = hptr->hb_next;
 
223
                        xt_free(self, hptr);
 
224
                }
 
225
 
 
226
                while (hs->hs_free_handles) {
 
227
                        handle = hs->hs_free_handles;
 
228
                        hs->hs_free_handles = handle->ih_next;
 
229
                        ind_free_handle(handle);
 
230
                }
 
231
 
 
232
                ID_HANDLE_FREE_LOCK(self, &hs->hs_handles_lock);
 
233
        }
 
234
}
 
235
 
 
236
static void ind_handle_init(XTThreadPtr self)
 
237
{
 
238
        DcHandleSlotPtr         hs;
 
239
 
 
240
        for (int i=0; i<XT_HANDLE_SLOTS; i++) {
 
241
                hs = &ind_cac_globals.cg_handle_slot[i];
 
242
                memset(hs, 0, sizeof(DcHandleSlotRec));
 
243
                ID_HANDLE_INIT_LOCK(self, &hs->hs_handles_lock);
 
244
        }
 
245
}
 
246
 
 
247
//#define CHECK_HANDLE_STRUCTS
 
248
 
 
249
#ifdef CHECK_HANDLE_STRUCTS
 
250
static int gdummy = 0;
 
251
 
 
252
static void ic_stop_here()
 
253
{
 
254
        gdummy = gdummy + 1;
 
255
        printf("Nooo %d!\n", gdummy);
 
256
}
 
257
 
 
258
static void ic_check_handle_structs()
 
259
{
 
260
        XTIndHandlePtr          handle, phandle;
 
261
        XTIndHandleBlockPtr     hptr, phptr;
 
262
        int                                     count = 0;
 
263
        int                                     ctest;
 
264
 
 
265
        phandle = NULL;
 
266
        handle = ind_cac_globals.cg_used_handles;
 
267
        while (handle) {
 
268
                if (handle == phandle)
 
269
                        ic_stop_here();
 
270
                if (handle->ih_prev != phandle)
 
271
                        ic_stop_here();
 
272
                if (handle->ih_cache_reference) {
 
273
                        ctest = handle->x.ih_cache_block->cb_handle_count;
 
274
                        if (ctest == 0 || ctest > 100)
 
275
                                ic_stop_here();
 
276
                }
 
277
                else {
 
278
                        ctest = handle->x.ih_handle_block->hb_ref_count;
 
279
                        if (ctest == 0 || ctest > 100)
 
280
                                ic_stop_here();
 
281
                }
 
282
                phandle = handle;
 
283
                handle = handle->ih_next;
 
284
                count++;
 
285
                if (count > 1000)
 
286
                        ic_stop_here();
 
287
        }
 
288
 
 
289
        count = 0;
 
290
        hptr = ind_cac_globals.cg_free_blocks;
 
291
        while (hptr) {
 
292
                if (hptr == phptr)
 
293
                        ic_stop_here();
 
294
                phptr = hptr;
 
295
                hptr = hptr->hb_next;
 
296
                count++;
 
297
                if (count > 1000)
 
298
                        ic_stop_here();
 
299
        }
 
300
 
 
301
        count = 0;
 
302
        handle = ind_cac_globals.cg_free_handles;
 
303
        while (handle) {
 
304
                if (handle == phandle)
 
305
                        ic_stop_here();
 
306
                phandle = handle;
 
307
                handle = handle->ih_next;
 
308
                count++;
 
309
                if (count > 1000)
 
310
                        ic_stop_here();
 
311
        }
 
312
}
 
313
#endif
 
314
 
 
315
/*
 
316
 * Get a handle to the index block.
 
317
 * This function is called by index scanners (readers).
 
318
 */
 
319
xtPublic XTIndHandlePtr xt_ind_get_handle(XTOpenTablePtr ot, XTIndexPtr ind, XTIndReferencePtr iref)
 
320
{
 
321
        DcHandleSlotPtr hs;
 
322
        XTIndHandlePtr  handle;
 
323
 
 
324
        hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
 
325
 
 
326
        ASSERT_NS(iref->ir_xlock == FALSE);
 
327
        ASSERT_NS(iref->ir_updated == FALSE);
 
328
        ID_HANDLE_LOCK(&hs->hs_handles_lock);
 
329
#ifdef CHECK_HANDLE_STRUCTS
 
330
        ic_check_handle_structs();
 
331
#endif
 
332
        if ((handle = hs->hs_free_handles))
 
333
                hs->hs_free_handles = handle->ih_next;
 
334
        else {
 
335
                if (!(handle = ind_alloc_handle())) {
 
336
                        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
337
                        xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
 
338
                        return NULL;
 
339
                }
 
340
        }
 
341
        if (hs->hs_used_handles)
 
342
                hs->hs_used_handles->ih_prev = handle;
 
343
        handle->ih_next = hs->hs_used_handles;
 
344
        handle->ih_prev = NULL;
 
345
        handle->ih_address = iref->ir_block->cb_address;
 
346
        handle->ih_cache_reference = TRUE;
 
347
        handle->x.ih_cache_block = iref->ir_block;
 
348
        handle->ih_branch = iref->ir_branch;
 
349
        /* {HANDLE-COUNT-USAGE}
 
350
         * This is safe because:
 
351
         *
 
352
         * I have an Slock on the cache block, and I have
 
353
         * at least an Slock on the index.
 
354
         * So this excludes anyone who is reading 
 
355
         * cb_handle_count in the index.
 
356
         * (all cache block writers, and the freeer).
 
357
         *
 
358
         * The increment is safe because I have the list
 
359
         * lock (hs_handles_lock), which is required by anyone else
 
360
         * who increments or decrements this value.
 
361
         */
 
362
        iref->ir_block->cb_handle_count++;
 
363
        hs->hs_used_handles = handle;
 
364
#ifdef CHECK_HANDLE_STRUCTS
 
365
        ic_check_handle_structs();
 
366
#endif
 
367
        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
368
        xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
 
369
        return handle;
 
370
}
 
371
 
 
372
xtPublic void xt_ind_release_handle(XTIndHandlePtr handle, xtBool have_lock, XTThreadPtr thread)
 
373
{
 
374
        DcHandleSlotPtr hs;
 
375
        XTIndBlockPtr   block = NULL;
 
376
        u_int                   hash_idx = 0;
 
377
        DcSegmentPtr    seg = NULL;
 
378
        XTIndBlockPtr   xblock;
 
379
 
 
380
        /* The lock order is:
 
381
         * 1. Cache segment (cs_lock) - This is only by ind_free_block()!
 
382
         * 1. S/Slock cache block (cb_lock)
 
383
         * 2. List lock (cg_handles_lock).
 
384
         * 3. Handle lock (ih_lock)
 
385
         */
 
386
        if (!have_lock)
 
387
                xt_spinlock_lock(&handle->ih_lock);
 
388
 
 
389
        /* Get the lock on the cache page if required: */
 
390
        if (handle->ih_cache_reference) {
 
391
                u_int                   file_id;
 
392
                xtIndexNodeID   address;
 
393
 
 
394
                block = handle->x.ih_cache_block;
 
395
 
 
396
                file_id = block->cb_file_id;
 
397
                address = block->cb_address;
 
398
                hash_idx = XT_NODE_ID(address) + (file_id * 223);
 
399
                seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
 
400
                hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
 
401
        }
 
402
 
 
403
        xt_spinlock_unlock(&handle->ih_lock);
 
404
 
 
405
        /* Because of the lock order, I have to release the
 
406
         * handle before I get a lock on the cache block.
 
407
         *
 
408
         * But, by doing this, this cache block may be gone!
 
409
         */
 
410
        if (block) {
 
411
                IDX_CAC_READ_LOCK(seg, thread);
 
412
                xblock = seg->cs_hash_table[hash_idx];
 
413
                while (xblock) {
 
414
                        if (block == xblock) {
 
415
                                /* Found the block... 
 
416
                                 * {HANDLE-COUNT-SLOCK}
 
417
                                 * 04.05.2009, changed to slock.
 
418
                                 * The xlock causes too much contention
 
419
                                 * on the cache block for read only loads.
 
420
                                 *
 
421
                                 * Is it safe?
 
422
                                 * See below...
 
423
                                 */
 
424
                                XT_IPAGE_READ_LOCK(&block->cb_lock);
 
425
                                goto block_found;
 
426
                        }
 
427
                        xblock = xblock->cb_next;
 
428
                }
 
429
                block = NULL;
 
430
                block_found:
 
431
                IDX_CAC_UNLOCK(seg, thread);
 
432
        }
 
433
 
 
434
        hs = &ind_cac_globals.cg_handle_slot[handle->ih_address % XT_HANDLE_SLOTS];
 
435
 
 
436
        ID_HANDLE_LOCK(&hs->hs_handles_lock);
 
437
#ifdef CHECK_HANDLE_STRUCTS
 
438
        ic_check_handle_structs();
 
439
#endif
 
440
 
 
441
        /* I don't need to lock the handle because I have locked
 
442
         * the list, and no other thread can change the
 
443
         * handle without first getting a lock on the list.
 
444
         *
 
445
         * In addition, the caller is the only owner of the
 
446
         * handle, and the only thread with an independent
 
447
         * reference to the handle.
 
448
         * All other access occur over the list.
 
449
         */
 
450
 
 
451
        /* Remove the reference to the cache or a handle block: */
 
452
        if (handle->ih_cache_reference) {
 
453
                ASSERT_NS(block == handle->x.ih_cache_block);
 
454
                ASSERT_NS(block && block->cb_handle_count > 0);
 
455
                /* {HANDLE-COUNT-USAGE}
 
456
                 * This is safe here because I have excluded
 
457
                 * all readers by taking an Xlock on the
 
458
                 * cache block (CHANGED - see below).
 
459
                 *
 
460
                 * {HANDLE-COUNT-SLOCK}
 
461
                 * 04.05.2009, changed to slock.
 
462
                 * Should be OK, because:
 
463
                 * A have a lock on the list lock (hs_handles_lock),
 
464
                 * which prevents concurrent updates to cb_handle_count.
 
465
                 *
 
466
                 * I have also have a read lock on the cache block
 
467
                 * but not a lock on the index. As a result, we cannot
 
468
                 * excluded all index writers (and readers of 
 
469
                 * cb_handle_count.
 
470
                 */
 
471
                block->cb_handle_count--;
 
472
        }
 
473
        else {
 
474
                XTIndHandleBlockPtr     hptr = handle->x.ih_handle_block;
 
475
 
 
476
                ASSERT_NS(!handle->ih_cache_reference);
 
477
                ASSERT_NS(hptr->hb_ref_count > 0);
 
478
                hptr->hb_ref_count--;
 
479
                if (!hptr->hb_ref_count) {
 
480
                        /* Put it back on the free list: */
 
481
                        hptr->hb_next = hs->hs_free_blocks;
 
482
                        hs->hs_free_blocks = hptr;
 
483
                }
 
484
        }
 
485
 
 
486
        /* Unlink the handle: */
 
487
        if (handle->ih_next)
 
488
                handle->ih_next->ih_prev = handle->ih_prev;
 
489
        if (handle->ih_prev)
 
490
                handle->ih_prev->ih_next = handle->ih_next;
 
491
        if (hs->hs_used_handles == handle)
 
492
                hs->hs_used_handles = handle->ih_next;
 
493
 
 
494
        /* Put it on the free list: */
 
495
        handle->ih_next = hs->hs_free_handles;
 
496
        hs->hs_free_handles = handle;
 
497
 
 
498
#ifdef CHECK_HANDLE_STRUCTS
 
499
        ic_check_handle_structs();
 
500
#endif
 
501
        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
502
 
 
503
        if (block)
 
504
                XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
 
505
}
 
506
 
 
507
/* Call this function before a referenced cache block is modified!
 
508
 * This function is called by index updaters.
 
509
 */
 
510
xtPublic xtBool xt_ind_copy_on_write(XTIndReferencePtr iref)
 
511
{
 
512
        DcHandleSlotPtr         hs;
 
513
        XTIndHandleBlockPtr     hptr;
 
514
        u_int                           branch_size;
 
515
        XTIndHandlePtr          handle;
 
516
        u_int                           i = 0;
 
517
 
 
518
        hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
 
519
 
 
520
        ID_HANDLE_LOCK(&hs->hs_handles_lock);
 
521
 
 
522
        /* {HANDLE-COUNT-USAGE}
 
523
         * This is only called by updaters of this index block, or
 
524
         * the free which holds an Xlock on the index block.
 
525
         * These are all mutually exclusive for the index block.
 
526
         *
 
527
         * {HANDLE-COUNT-SLOCK}
 
528
         * Do this check again, after we have the list lock (hs_handles_lock).
 
529
         * There is a small chance that the count has changed, since we last
 
530
         * checked because xt_ind_release_handle() only holds
 
531
         * an slock on the index page.
 
532
         *
 
533
         * An updater can sometimes have a XLOCK on the index and an slock
 
534
         * on the cache block. In this case xt_ind_release_handle()
 
535
         * could have run through.
 
536
         */
 
537
        if (!iref->ir_block->cb_handle_count) {
 
538
                ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
539
                return OK;
 
540
        }
 
541
 
 
542
#ifdef CHECK_HANDLE_STRUCTS
 
543
        ic_check_handle_structs();
 
544
#endif
 
545
        if ((hptr = hs->hs_free_blocks))
 
546
                hs->hs_free_blocks = hptr->hb_next;
 
547
        else {
 
548
                if (!(hptr = (XTIndHandleBlockPtr) xt_malloc_ns(sizeof(XTIndHandleBlockRec)))) {
 
549
                        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
550
                        return FAILED;
 
551
                }
 
552
        }
 
553
 
 
554
        branch_size = XT_GET_INDEX_BLOCK_LEN(XT_GET_DISK_2(iref->ir_branch->tb_size_2));
 
555
        memcpy(&hptr->hb_branch, iref->ir_branch, branch_size);
 
556
        hptr->hb_ref_count = iref->ir_block->cb_handle_count;
 
557
 
 
558
        handle = hs->hs_used_handles;
 
559
        while (handle) {
 
560
                if (handle->ih_branch == iref->ir_branch) {
 
561
                        i++;
 
562
                        xt_spinlock_lock(&handle->ih_lock);
 
563
                        ASSERT_NS(handle->ih_cache_reference);
 
564
                        handle->ih_cache_reference = FALSE;
 
565
                        handle->x.ih_handle_block = hptr;
 
566
                        handle->ih_branch = &hptr->hb_branch;
 
567
                        xt_spinlock_unlock(&handle->ih_lock);
 
568
#ifndef DEBUG
 
569
                        if (i == hptr->hb_ref_count)
 
570
                                break;
 
571
#endif
 
572
                }
 
573
                handle = handle->ih_next;
 
574
        }
 
575
#ifdef DEBUG
 
576
        ASSERT_NS(hptr->hb_ref_count == i);
 
577
#endif
 
578
        /* {HANDLE-COUNT-USAGE}
 
579
         * It is safe to modify cb_handle_count when I have the
 
580
         * list lock, and I have excluded all readers!
 
581
         */
 
582
        iref->ir_block->cb_handle_count = 0;
 
583
#ifdef CHECK_HANDLE_STRUCTS
 
584
        ic_check_handle_structs();
 
585
#endif
 
586
        ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
 
587
 
 
588
        return OK;
 
589
}
 
590
 
 
591
xtPublic void xt_ind_lock_handle(XTIndHandlePtr handle)
 
592
{
 
593
        xt_spinlock_lock(&handle->ih_lock);
 
594
}
 
595
 
 
596
xtPublic void xt_ind_unlock_handle(XTIndHandlePtr handle)
 
597
{
 
598
        xt_spinlock_unlock(&handle->ih_lock);
 
599
}
 
600
 
 
601
/*
 
602
 * -----------------------------------------------------------------------
 
603
 * INIT/EXIT
 
604
 */
 
605
 
 
606
/*
 
607
 * Initialize the disk cache.
 
608
 */
 
609
xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size)
 
610
{
 
611
        XTIndBlockPtr   block;
 
612
 
 
613
#ifdef XT_USE_MYSYS
 
614
        init_key_cache(&my_cache, 1024, cache_size, 100, 300);
 
615
#endif
 
616
        /* Memory is devoted to the page data alone, I no longer count the size of the directory,
 
617
         * or the page overhead: */
 
618
        ind_cac_globals.cg_block_count = cache_size / XT_INDEX_PAGE_SIZE;
 
619
        ind_cac_globals.cg_hash_size = ind_cac_globals.cg_block_count / (IDX_CAC_SEGMENT_COUNT >> 1);
 
620
        ind_cac_globals.cg_max_free = ind_cac_globals.cg_block_count / 10;
 
621
        if (ind_cac_globals.cg_max_free < 8)
 
622
                ind_cac_globals.cg_max_free = 8;
 
623
        if (ind_cac_globals.cg_max_free > 128)
 
624
                ind_cac_globals.cg_max_free = 128;
 
625
 
 
626
        try_(a) {
 
627
                for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
 
628
                        ind_cac_globals.cg_segment[i].cs_hash_table = (XTIndBlockPtr *) xt_calloc(self, ind_cac_globals.cg_hash_size * sizeof(XTIndBlockPtr));
 
629
                        IDX_CAC_INIT_LOCK(self, &ind_cac_globals.cg_segment[i]);
 
630
                }
 
631
 
 
632
                block = (XTIndBlockPtr) xt_malloc(self, ind_cac_globals.cg_block_count * sizeof(XTIndBlockRec));
 
633
                ind_cac_globals.cg_blocks = block;
 
634
                xt_init_mutex_with_autoname(self, &ind_cac_globals.cg_lock);
 
635
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
636
                xtWord1 *buffer;
 
637
#ifdef XT_WIN
 
638
                size_t  psize = 512;
 
639
#else
 
640
                size_t  psize = getpagesize();
 
641
#endif
 
642
                size_t  diff;
 
643
 
 
644
                buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE));
 
645
                diff = (size_t) buffer % psize;
 
646
                if (diff != 0) {
 
647
                        xt_free(self, buffer);
 
648
                        buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE) + psize);
 
649
                        diff = (size_t) buffer % psize;
 
650
                        if (diff != 0)
 
651
                                diff = psize - diff;
 
652
                }
 
653
                ind_cac_globals.cg_buffer = buffer;
 
654
                buffer += diff;
 
655
#endif
 
656
 
 
657
                for (u_int i=0; i<ind_cac_globals.cg_block_count; i++) {
 
658
                        XT_IPAGE_INIT_LOCK(self, &block->cb_lock);
 
659
                        block->cb_state = IDX_CAC_BLOCK_FREE;
 
660
                        block->cb_next = ind_cac_globals.cg_free_list;
 
661
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
662
                        block->cb_data = buffer;
 
663
                        buffer += XT_INDEX_PAGE_SIZE;
 
664
#endif
 
665
                        ind_cac_globals.cg_free_list = block;
 
666
                        block++;
 
667
                }
 
668
                ind_cac_globals.cg_free_count = ind_cac_globals.cg_block_count;
 
669
#ifdef DEBUG_CHECK_IND_CACHE
 
670
                ind_cac_globals.cg_reserved_by_ots = 0;
 
671
#endif
 
672
                ind_handle_init(self);
 
673
        }
 
674
        catch_(a) {
 
675
                xt_ind_exit(self);
 
676
                throw_();
 
677
        }
 
678
        cont_(a);
 
679
}
 
680
 
 
681
xtPublic void xt_ind_exit(XTThreadPtr self)
 
682
{
 
683
#ifdef XT_USE_MYSYS
 
684
        end_key_cache(&my_cache, 1);
 
685
#endif
 
686
        for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
 
687
                if (ind_cac_globals.cg_segment[i].cs_hash_table) {
 
688
                        xt_free(self, ind_cac_globals.cg_segment[i].cs_hash_table);
 
689
                        ind_cac_globals.cg_segment[i].cs_hash_table = NULL;
 
690
                        IDX_CAC_FREE_LOCK(self, &ind_cac_globals.cg_segment[i]);
 
691
                }
 
692
        }
 
693
 
 
694
        /* Must be done before freeing the blocks! */
 
695
        ind_handle_exit(self);
 
696
 
 
697
        if (ind_cac_globals.cg_blocks) {
 
698
                xt_free(self, ind_cac_globals.cg_blocks);
 
699
                ind_cac_globals.cg_blocks = NULL;
 
700
                xt_free_mutex(&ind_cac_globals.cg_lock);
 
701
        }
 
702
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
703
        if (ind_cac_globals.cg_buffer) {
 
704
                xt_free(self, ind_cac_globals.cg_buffer);
 
705
                ind_cac_globals.cg_buffer = NULL;
 
706
        }
 
707
#endif
 
708
 
 
709
        memset(&ind_cac_globals, 0, sizeof(ind_cac_globals));
 
710
}
 
711
 
 
712
xtPublic xtInt8 xt_ind_get_usage()
 
713
{
 
714
        xtInt8 size = 0;
 
715
 
 
716
        size = (xtInt8) (ind_cac_globals.cg_block_count - ind_cac_globals.cg_free_count) * (xtInt8) XT_INDEX_PAGE_SIZE;
 
717
        return size;
 
718
}
 
719
 
 
720
xtPublic xtInt8 xt_ind_get_size()
 
721
{
 
722
        xtInt8 size = 0;
 
723
 
 
724
        size = (xtInt8) ind_cac_globals.cg_block_count * (xtInt8) XT_INDEX_PAGE_SIZE;
 
725
        return size;
 
726
}
 
727
 
 
728
xtPublic u_int xt_ind_get_blocks()
 
729
{
 
730
        return ind_cac_globals.cg_block_count;
 
731
}
 
732
 
 
733
/*
 
734
 * -----------------------------------------------------------------------
 
735
 * INDEX CHECKING
 
736
 */
 
737
 
 
738
xtPublic void xt_ind_check_cache(XTIndexPtr ind)
 
739
{
 
740
        XTIndBlockPtr   block;
 
741
        u_int                   free_count, inuse_count, clean_count;
 
742
        xtBool                  check_count = FALSE;
 
743
 
 
744
        if (ind == (XTIndex *) 1) {
 
745
                ind = NULL;
 
746
                check_count = TRUE;
 
747
        }
 
748
 
 
749
        // Check the dirty list:
 
750
        if (ind) {
 
751
                u_int cnt = 0;
 
752
 
 
753
                block = ind->mi_dirty_list;
 
754
                while (block) {
 
755
                        cnt++;
 
756
                        ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_DIRTY);
 
757
                        block = block->cb_dirty_next;
 
758
                }
 
759
                ASSERT_NS(ind->mi_dirty_blocks == cnt);
 
760
        }
 
761
 
 
762
        xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
 
763
 
 
764
        // Check the free list:
 
765
        free_count = 0;
 
766
        block = ind_cac_globals.cg_free_list;
 
767
        while (block) {
 
768
                free_count++;
 
769
                ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
 
770
                block = block->cb_next;
 
771
        }
 
772
        ASSERT_NS(ind_cac_globals.cg_free_count == free_count);
 
773
 
 
774
        /* Check the LRU list: */
 
775
        XTIndBlockPtr list_block, plist_block;
 
776
        
 
777
        plist_block = NULL;
 
778
        list_block = ind_cac_globals.cg_lru_block;
 
779
        if (list_block) {
 
780
                ASSERT_NS(ind_cac_globals.cg_mru_block != NULL);
 
781
                ASSERT_NS(ind_cac_globals.cg_mru_block->cb_mr_used == NULL);
 
782
                ASSERT_NS(list_block->cb_lr_used == NULL);
 
783
                inuse_count = 0;
 
784
                clean_count = 0;
 
785
                while (list_block) {
 
786
                        inuse_count++;
 
787
                        ASSERT_NS(IDX_CAC_NOT_FREE(list_block->cb_state));
 
788
                        if (list_block->cb_state == IDX_CAC_BLOCK_CLEAN)
 
789
                                clean_count++;
 
790
                        ASSERT_NS(block != list_block);
 
791
                        ASSERT_NS(list_block->cb_lr_used == plist_block);
 
792
                        plist_block = list_block;
 
793
                        list_block = list_block->cb_mr_used;
 
794
                }
 
795
                ASSERT_NS(ind_cac_globals.cg_mru_block == plist_block);
 
796
        }
 
797
        else {
 
798
                inuse_count = 0;
 
799
                clean_count = 0;
 
800
                ASSERT_NS(ind_cac_globals.cg_mru_block == NULL);
 
801
        }
 
802
 
 
803
#ifdef DEBUG_CHECK_IND_CACHE
 
804
        ASSERT_NS(free_count + inuse_count + ind_cac_globals.cg_reserved_by_ots + ind_cac_globals.cg_read_count == ind_cac_globals.cg_block_count);
 
805
#endif
 
806
        xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
 
807
        if (check_count) {
 
808
                /* We have just flushed, check how much is now free/clean. */
 
809
                if (free_count + clean_count < 10) {
 
810
                        /* This could be a problem: */
 
811
                        printf("Cache very low!\n");
 
812
                }
 
813
        }
 
814
}
 
815
 
 
816
/*
 
817
 * -----------------------------------------------------------------------
 
818
 * FREEING INDEX CACHE
 
819
 */
 
820
 
 
821
/*
 
822
 * This function return TRUE if the block is freed. 
 
823
 * This function returns FALSE if the block cannot be found, or the
 
824
 * block is not clean.
 
825
 *
 
826
 * We also return FALSE if we cannot copy the block to the handle
 
827
 * (if this is required). This will be due to out-of-memory!
 
828
 */
 
829
static xtBool ind_free_block(XTOpenTablePtr ot, XTIndBlockPtr block)
 
830
{
 
831
        XTIndBlockPtr   xblock, pxblock;
 
832
        u_int                   hash_idx;
 
833
        u_int                   file_id;
 
834
        xtIndexNodeID   address;
 
835
        DcSegmentPtr    seg;
 
836
 
 
837
#ifdef DEBUG_CHECK_IND_CACHE
 
838
        xt_ind_check_cache(NULL);
 
839
#endif
 
840
        file_id = block->cb_file_id;
 
841
        address = block->cb_address;
 
842
 
 
843
        hash_idx = XT_NODE_ID(address) + (file_id * 223);
 
844
        seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
 
845
        hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
 
846
 
 
847
        IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
 
848
 
 
849
        pxblock = NULL;
 
850
        xblock = seg->cs_hash_table[hash_idx];
 
851
        while (xblock) {
 
852
                if (block == xblock) {
 
853
                        /* Found the block... */
 
854
                        /* It is possible that a thread enters this code holding a
 
855
                         * lock on a page. This can cause a deadlock:
 
856
                         *
 
857
                         * #0   0x91faa2ce in semaphore_wait_signal_trap
 
858
                         * #1   0x91fb1da5 in pthread_mutex_lock
 
859
                         * #2   0x00e2ec13 in xt_p_mutex_lock at pthread_xt.cc:544
 
860
                         * #3   0x00e6c30a in xt_xsmutex_xlock at lock_xt.cc:1547
 
861
                         * #4   0x00dee402 in ind_free_block at cache_xt.cc:879
 
862
                         * #5   0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
 
863
                         * #6   0x00def8d1 in xt_ind_reserve at cache_xt.cc:1513
 
864
                         * #7   0x00e22118 in xt_idx_insert at index_xt.cc:2047
 
865
                         * #8   0x00e4d7ee in xt_tab_new_record at table_xt.cc:4702
 
866
                         * #9   0x00e0ff0b in ha_pbxt::write_row at ha_pbxt.cc:2340
 
867
                         * #10  0x0023a00f in handler::ha_write_row at handler.cc:4570
 
868
                         * #11  0x001a32c8 in write_record at sql_insert.cc:1568
 
869
                         * #12  0x001ab635 in mysql_insert at sql_insert.cc:812
 
870
                         * #13  0x0010e068 in mysql_execute_command at sql_parse.cc:3066
 
871
                         * #14  0x0011480d in mysql_parse at sql_parse.cc:5787
 
872
                         * #15  0x00115afb in dispatch_command at sql_parse.cc:1200
 
873
                         * #16  0x00116de2 in do_command at sql_parse.cc:857
 
874
                         * #17  0x00101ee4 in handle_one_connection at sql_connect.cc:1115
 
875
                         * #18  0x91fdb155 in _pthread_start
 
876
                         * #19  0x91fdb012 in thread_start
 
877
                         * 
 
878
                         * #0   0x91fb146e in __semwait_signal
 
879
                         * #1   0x91fb12ef in nanosleep$UNIX2003
 
880
                         * #2   0x91fb1236 in usleep$UNIX2003
 
881
                         * #3   0x00e52112 in xt_yield at thread_xt.cc:1274
 
882
                         * #4   0x00e6c0eb in xt_spinxslock_xlock at lock_xt.cc:1456
 
883
                         * #5   0x00dee444 in ind_free_block at cache_xt.cc:886
 
884
                         * #6   0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
 
885
                         * #7   0x00deeaf0 in ind_cac_fetch at cache_xt.cc:1130
 
886
                         * #8   0x00def604 in xt_ind_fetch at cache_xt.cc:1386
 
887
                         * #9   0x00e2159a in xt_idx_update_row_id at index_xt.cc:2489
 
888
                         * #10  0x00e603c8 in xn_sw_clean_indices at xaction_xt.cc:1932
 
889
                         * #11  0x00e606d4 in xn_sw_cleanup_variation at xaction_xt.cc:2056
 
890
                         * #12  0x00e60e29 in xn_sw_cleanup_xact at xaction_xt.cc:2276
 
891
                         * #13  0x00e615ed in xn_sw_main at xaction_xt.cc:2433
 
892
                         * #14  0x00e61919 in xn_sw_run_thread at xaction_xt.cc:2564
 
893
                         * #15  0x00e53f80 in thr_main at thread_xt.cc:1017
 
894
                         * #16  0x91fdb155 in _pthread_start
 
895
                         * #17  0x91fdb012 in thread_start
 
896
                         *
 
897
                         * So we back off if a lock is held!
 
898
                         */
 
899
                        if (!XT_IPAGE_WRITE_TRY_LOCK(&block->cb_lock, ot->ot_thread->t_id)) {
 
900
                                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
901
#ifdef DEBUG_CHECK_IND_CACHE
 
902
                                xt_ind_check_cache(NULL);
 
903
#endif
 
904
                                return FALSE;
 
905
                        }
 
906
                        if (block->cb_state != IDX_CAC_BLOCK_CLEAN) {
 
907
                                /* This block cannot be freeed: */
 
908
                                XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
909
                                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
910
#ifdef DEBUG_CHECK_IND_CACHE
 
911
                                xt_ind_check_cache(NULL);
 
912
#endif
 
913
                                return FALSE;
 
914
                        }
 
915
                        
 
916
                        goto free_the_block;
 
917
                }
 
918
                pxblock = xblock;
 
919
                xblock = xblock->cb_next;
 
920
        }
 
921
 
 
922
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
923
 
 
924
        /* Not found (this can happen, if block was freed by another thread) */
 
925
#ifdef DEBUG_CHECK_IND_CACHE
 
926
        xt_ind_check_cache(NULL);
 
927
#endif
 
928
        return FALSE;
 
929
 
 
930
        free_the_block:
 
931
 
 
932
        /* If the block is reference by a handle, then we
 
933
         * have to copy the data to the handle before we
 
934
         * free the page:
 
935
         */
 
936
        /* {HANDLE-COUNT-USAGE}
 
937
         * This access is safe because:
 
938
         *
 
939
         * We have an Xlock on the cache block, which excludes
 
940
         * all other writers that want to change the cache block
 
941
         * and also all readers of the cache block, because
 
942
         * they all have at least an Slock on the cache block.
 
943
         */
 
944
        if (block->cb_handle_count) {
 
945
                XTIndReferenceRec       iref;
 
946
                
 
947
                iref.ir_xlock = TRUE;
 
948
                iref.ir_updated = FALSE;
 
949
                iref.ir_block = block;
 
950
                iref.ir_branch = (XTIdxBranchDPtr) block->cb_data;
 
951
                if (!xt_ind_copy_on_write(&iref)) {
 
952
                        XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
953
                        return FALSE;
 
954
                }
 
955
        }
 
956
 
 
957
        /* Block is clean, remove from the hash table: */
 
958
        if (pxblock)
 
959
                pxblock->cb_next = block->cb_next;
 
960
        else
 
961
                seg->cs_hash_table[hash_idx] = block->cb_next;
 
962
 
 
963
        xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
 
964
 
 
965
        /* Remove from the MRU list: */
 
966
        if (ind_cac_globals.cg_lru_block == block)
 
967
                ind_cac_globals.cg_lru_block = block->cb_mr_used;
 
968
        if (ind_cac_globals.cg_mru_block == block)
 
969
                ind_cac_globals.cg_mru_block = block->cb_lr_used;
 
970
        
 
971
        /* Note, I am updating blocks for which I have no lock
 
972
         * here. But I think this is OK because I have a lock
 
973
         * for the MRU list.
 
974
         */
 
975
        if (block->cb_lr_used)
 
976
                block->cb_lr_used->cb_mr_used = block->cb_mr_used;
 
977
        if (block->cb_mr_used)
 
978
                block->cb_mr_used->cb_lr_used = block->cb_lr_used;
 
979
 
 
980
        /* The block is now free: */
 
981
        block->cb_next = ind_cac_globals.cg_free_list;
 
982
        ind_cac_globals.cg_free_list = block;
 
983
        ind_cac_globals.cg_free_count++;
 
984
        ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
985
        block->cb_state = IDX_CAC_BLOCK_FREE;
 
986
        IDX_TRACE("%d- f%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(block->cb_data));
 
987
 
 
988
        /* Unlock BEFORE the block is reused! */
 
989
        XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
990
 
 
991
        xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
 
992
 
 
993
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
994
 
 
995
#ifdef DEBUG_CHECK_IND_CACHE
 
996
        xt_ind_check_cache(NULL);
 
997
#endif
 
998
        return TRUE;
 
999
}
 
1000
 
 
1001
#define IND_CACHE_MAX_BLOCKS_TO_FREE            100
 
1002
 
 
1003
/*
 
1004
 * Return the number of blocks freed.
 
1005
 *
 
1006
 * The idea is to grab a list of blocks to free.
 
1007
 * The list consists of the LRU blocks that are
 
1008
 * clean.
 
1009
 *
 
1010
 * Free as many as possible (up to max of blocks_required)
 
1011
 * from the list, even if LRU position has changed
 
1012
 * (or we have a race if there are too few blocks).
 
1013
 * However, if the block cannot be found, or is dirty
 
1014
 * we must skip it.
 
1015
 *
 
1016
 * Repeat until we find no blocks for the list, or
 
1017
 * we have freed 'blocks_required'.
 
1018
 *
 
1019
 * 'not_this' is a block that must not be freed because
 
1020
 * it is locked by the calling thread!
 
1021
 */
 
1022
static u_int ind_cac_free_lru_blocks(XTOpenTablePtr ot, u_int blocks_required, XTIdxBranchDPtr not_this)
 
1023
{
 
1024
        register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1025
        XTIndBlockPtr                   to_free[IND_CACHE_MAX_BLOCKS_TO_FREE];
 
1026
        int                                             count;
 
1027
        XTIndBlockPtr                   block;
 
1028
        u_int                                   blocks_freed = 0;
 
1029
        XTIndBlockPtr                   locked_block;
 
1030
 
 
1031
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
1032
#error This will not work!
 
1033
#endif
 
1034
        locked_block = (XTIndBlockPtr) ((xtWord1 *) not_this - offsetof(XTIndBlockRec, cb_data));
 
1035
 
 
1036
        retry:
 
1037
        xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
 
1038
        block = dcg->cg_lru_block;
 
1039
        count = 0;
 
1040
        while (block && count < IND_CACHE_MAX_BLOCKS_TO_FREE) {
 
1041
                if (block != locked_block && block->cb_state == IDX_CAC_BLOCK_CLEAN) {
 
1042
                        to_free[count] = block;
 
1043
                        count++;
 
1044
                }
 
1045
                block = block->cb_mr_used;
 
1046
        }
 
1047
        xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
 
1048
 
 
1049
        if (!count)
 
1050
                return blocks_freed;
 
1051
 
 
1052
        for (int i=0; i<count; i++) {
 
1053
                if (ind_free_block(ot, to_free[i]))
 
1054
                        blocks_freed++;
 
1055
                if (blocks_freed >= blocks_required &&
 
1056
                        ind_cac_globals.cg_free_count >= ind_cac_globals.cg_max_free + blocks_required)
 
1057
                return blocks_freed;
 
1058
        }
 
1059
 
 
1060
        goto retry;
 
1061
}
 
1062
 
 
1063
/*
 
1064
 * -----------------------------------------------------------------------
 
1065
 * MAIN CACHE FUNCTIONS
 
1066
 */
 
1067
 
 
1068
/*
 
1069
 * Fetch the block. Note, if we are about to write the block
 
1070
 * then there is no need to read it from disk!
 
1071
 */
 
1072
static XTIndBlockPtr ind_cac_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, DcSegmentPtr *ret_seg, xtBool read_data)
 
1073
{
 
1074
        register XTOpenFilePtr  file = ot->ot_ind_file;
 
1075
        register XTIndBlockPtr  block, new_block;
 
1076
        register DcSegmentPtr   seg;
 
1077
        register u_int                  hash_idx;
 
1078
        register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1079
        size_t                                  red_size;
 
1080
 
 
1081
#ifdef DEBUG_CHECK_IND_CACHE
 
1082
        xt_ind_check_cache(NULL);
 
1083
#endif
 
1084
        /* Address, plus file ID multiplied by my favorite prime number! */
 
1085
        hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
 
1086
        seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
 
1087
        hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
 
1088
 
 
1089
        IDX_CAC_READ_LOCK(seg, ot->ot_thread);
 
1090
        block = seg->cs_hash_table[hash_idx];
 
1091
        while (block) {
 
1092
                if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
 
1093
                        ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
 
1094
 
 
1095
                        /* Check how recently this page has been used: */
 
1096
                        if (XT_TIME_DIFF(block->cb_ru_time, dcg->cg_ru_now) > (dcg->cg_block_count >> 1)) {
 
1097
                                xt_lock_mutex_ns(&dcg->cg_lock);
 
1098
 
 
1099
                                /* Move to the front of the MRU list: */
 
1100
                                block->cb_ru_time = ++dcg->cg_ru_now;
 
1101
                                if (dcg->cg_mru_block != block) {
 
1102
                                        /* Remove from the MRU list: */
 
1103
                                        if (dcg->cg_lru_block == block)
 
1104
                                                dcg->cg_lru_block = block->cb_mr_used;
 
1105
                                        if (block->cb_lr_used)
 
1106
                                                block->cb_lr_used->cb_mr_used = block->cb_mr_used;
 
1107
                                        if (block->cb_mr_used)
 
1108
                                                block->cb_mr_used->cb_lr_used = block->cb_lr_used;
 
1109
 
 
1110
                                        /* Make the block the most recently used: */
 
1111
                                        if ((block->cb_lr_used = dcg->cg_mru_block))
 
1112
                                                dcg->cg_mru_block->cb_mr_used = block;
 
1113
                                        block->cb_mr_used = NULL;
 
1114
                                        dcg->cg_mru_block = block;
 
1115
                                        if (!dcg->cg_lru_block)
 
1116
                                                dcg->cg_lru_block = block;
 
1117
                                }
 
1118
 
 
1119
                                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1120
                        }
 
1121
                
 
1122
                        *ret_seg = seg;
 
1123
#ifdef DEBUG_CHECK_IND_CACHE
 
1124
                        xt_ind_check_cache(NULL);
 
1125
#endif
 
1126
                        ot->ot_thread->st_statistics.st_ind_cache_hit++;
 
1127
                        return block;
 
1128
                }
 
1129
                block = block->cb_next;
 
1130
        }
 
1131
        
 
1132
        /* Block not found... */
 
1133
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1134
 
 
1135
        /* Check the open table reserve list first: */
 
1136
        if ((new_block = ot->ot_ind_res_bufs)) {
 
1137
                ot->ot_ind_res_bufs = new_block->cb_next;
 
1138
                ot->ot_ind_res_count--;
 
1139
#ifdef DEBUG_CHECK_IND_CACHE
 
1140
                xt_lock_mutex_ns(&dcg->cg_lock);
 
1141
                dcg->cg_reserved_by_ots--;
 
1142
                dcg->cg_read_count++;
 
1143
                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1144
#endif
 
1145
                goto use_free_block;
 
1146
        }
 
1147
 
 
1148
        free_some_blocks:
 
1149
        if (!dcg->cg_free_list) {
 
1150
                if (!ind_cac_free_lru_blocks(ot, 1, NULL)) {
 
1151
                        if (!dcg->cg_free_list) {
 
1152
                                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
 
1153
#ifdef DEBUG_CHECK_IND_CACHE
 
1154
                                xt_ind_check_cache(NULL);
 
1155
#endif
 
1156
                                return NULL;
 
1157
                        }
 
1158
                }
 
1159
        }
 
1160
 
 
1161
        /* Get a free block: */
 
1162
        xt_lock_mutex_ns(&dcg->cg_lock);
 
1163
        if (!(new_block = dcg->cg_free_list)) {
 
1164
                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1165
                goto free_some_blocks;
 
1166
        }
 
1167
        ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
 
1168
        dcg->cg_free_list = new_block->cb_next;
 
1169
        dcg->cg_free_count--;
 
1170
#ifdef DEBUG_CHECK_IND_CACHE
 
1171
        dcg->cg_read_count++;
 
1172
#endif
 
1173
        xt_unlock_mutex_ns(&dcg->cg_lock);
 
1174
 
 
1175
        use_free_block:
 
1176
        new_block->cb_address = address;
 
1177
        new_block->cb_file_id = file->fr_id;
 
1178
        ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
 
1179
        new_block->cb_state = IDX_CAC_BLOCK_CLEAN;
 
1180
        new_block->cb_handle_count = 0;
 
1181
        new_block->cp_del_count = 0;
 
1182
        new_block->cb_dirty_next = NULL;
 
1183
        new_block->cb_dirty_prev = NULL;
 
1184
#ifdef IND_OPT_DATA_WRITTEN
 
1185
        new_block->cb_header = FALSE;
 
1186
        new_block->cb_min_pos = 0xFFFF;
 
1187
        new_block->cb_max_pos = 0;
 
1188
#endif
 
1189
 
 
1190
        if (read_data) {
 
1191
                if (!xt_pread_file(file, xt_ind_node_to_offset(ot->ot_table, address), XT_INDEX_PAGE_SIZE, 0, new_block->cb_data, &red_size, &ot->ot_thread->st_statistics.st_ind, ot->ot_thread)) {
 
1192
                        xt_lock_mutex_ns(&dcg->cg_lock);
 
1193
                        new_block->cb_next = dcg->cg_free_list;
 
1194
                        dcg->cg_free_list = new_block;
 
1195
                        dcg->cg_free_count++;
 
1196
#ifdef DEBUG_CHECK_IND_CACHE
 
1197
                        dcg->cg_read_count--;
 
1198
#endif
 
1199
                        ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1200
                        new_block->cb_state = IDX_CAC_BLOCK_FREE;
 
1201
                        IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
 
1202
                        xt_unlock_mutex_ns(&dcg->cg_lock);
 
1203
#ifdef DEBUG_CHECK_IND_CACHE
 
1204
                        xt_ind_check_cache(NULL);
 
1205
#endif
 
1206
                        return NULL;
 
1207
                }
 
1208
                IDX_TRACE("%d- R%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
 
1209
                ot->ot_thread->st_statistics.st_ind_cache_miss++;
 
1210
        }
 
1211
        else
 
1212
                red_size = 0;
 
1213
        // PMC - I don't think this is required! memset(new_block->cb_data + red_size, 0, XT_INDEX_PAGE_SIZE - red_size);
 
1214
 
 
1215
        IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
 
1216
        block = seg->cs_hash_table[hash_idx];
 
1217
        while (block) {
 
1218
                if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
 
1219
                        /* Oops, someone else was faster! */
 
1220
                        xt_lock_mutex_ns(&dcg->cg_lock);
 
1221
                        new_block->cb_next = dcg->cg_free_list;
 
1222
                        dcg->cg_free_list = new_block;
 
1223
                        dcg->cg_free_count++;
 
1224
#ifdef DEBUG_CHECK_IND_CACHE
 
1225
                        dcg->cg_read_count--;
 
1226
#endif
 
1227
                        ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1228
                        new_block->cb_state = IDX_CAC_BLOCK_FREE;
 
1229
                        IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
 
1230
                        xt_unlock_mutex_ns(&dcg->cg_lock);
 
1231
                        goto done_ok;
 
1232
                }
 
1233
                block = block->cb_next;
 
1234
        }
 
1235
        block = new_block;
 
1236
 
 
1237
        /* Make the block the most recently used: */
 
1238
        xt_lock_mutex_ns(&dcg->cg_lock);
 
1239
        block->cb_ru_time = ++dcg->cg_ru_now;
 
1240
        if ((block->cb_lr_used = dcg->cg_mru_block))
 
1241
                dcg->cg_mru_block->cb_mr_used = block;
 
1242
        block->cb_mr_used = NULL;
 
1243
        dcg->cg_mru_block = block;
 
1244
        if (!dcg->cg_lru_block)
 
1245
                dcg->cg_lru_block = block;
 
1246
#ifdef DEBUG_CHECK_IND_CACHE
 
1247
        dcg->cg_read_count--;
 
1248
#endif
 
1249
        xt_unlock_mutex_ns(&dcg->cg_lock);
 
1250
 
 
1251
        /* {LAZY-DEL-INDEX-ITEMS}
 
1252
         * Conditionally count the number of deleted entries in the index:
 
1253
         * We do this before other threads can read the block.
 
1254
         */
 
1255
        if (ind->mi_lazy_delete && read_data)
 
1256
                xt_ind_count_deleted_items(ot->ot_table, ind, block);
 
1257
 
 
1258
        /* Add to the hash table: */
 
1259
        block->cb_next = seg->cs_hash_table[hash_idx];
 
1260
        seg->cs_hash_table[hash_idx] = block;
 
1261
 
 
1262
        done_ok:
 
1263
        *ret_seg = seg;
 
1264
#ifdef DEBUG_CHECK_IND_CACHE
 
1265
        xt_ind_check_cache(NULL);
 
1266
#endif
 
1267
        return block;
 
1268
}
 
1269
 
 
1270
static xtBool ind_cac_get(XTOpenTablePtr ot, xtIndexNodeID address, DcSegmentPtr *ret_seg, XTIndBlockPtr *ret_block)
 
1271
{
 
1272
        register XTOpenFilePtr  file = ot->ot_ind_file;
 
1273
        register XTIndBlockPtr  block;
 
1274
        register DcSegmentPtr   seg;
 
1275
        register u_int                  hash_idx;
 
1276
        register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1277
 
 
1278
        hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
 
1279
        seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
 
1280
        hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
 
1281
 
 
1282
        IDX_CAC_READ_LOCK(seg, ot->ot_thread);
 
1283
        block = seg->cs_hash_table[hash_idx];
 
1284
        while (block) {
 
1285
                if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
 
1286
                        ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
 
1287
 
 
1288
                        *ret_seg = seg;
 
1289
                        *ret_block = block;
 
1290
                        return OK;
 
1291
                }
 
1292
                block = block->cb_next;
 
1293
        }
 
1294
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1295
        
 
1296
        /* Block not found: */
 
1297
        *ret_seg = NULL;
 
1298
        *ret_block = NULL;
 
1299
        return OK;
 
1300
}
 
1301
 
 
1302
xtPublic xtBool xt_ind_write(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
 
1303
{
 
1304
        XTIndBlockPtr   block;
 
1305
        DcSegmentPtr    seg;
 
1306
 
 
1307
        if (!(block = ind_cac_fetch(ot, ind, address, &seg, FALSE)))
 
1308
                return FAILED;
 
1309
 
 
1310
        XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1311
        if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
 
1312
                if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
 
1313
                        XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1314
                        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1315
                        return FAILED;
 
1316
                }
 
1317
        }
 
1318
#ifdef IND_OPT_DATA_WRITTEN
 
1319
        block->cb_header = TRUE;
 
1320
        block->cb_min_pos = 0;
 
1321
        if (size-XT_INDEX_PAGE_HEAD_SIZE > block->cb_max_pos)
 
1322
                block->cb_max_pos = size-XT_INDEX_PAGE_HEAD_SIZE;
 
1323
        ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-XT_INDEX_PAGE_HEAD_SIZE);
 
1324
        ASSERT_NS(block->cb_min_pos < block->cb_max_pos);
 
1325
#endif
 
1326
        ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
 
1327
        memcpy(block->cb_data, data, size);
 
1328
        if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
 
1329
                TRACK_BLOCK_WRITE(offset);
 
1330
                xt_spinlock_lock(&ind->mi_dirty_lock);
 
1331
                if ((block->cb_dirty_next = ind->mi_dirty_list))
 
1332
                        ind->mi_dirty_list->cb_dirty_prev = block;
 
1333
                block->cb_dirty_prev = NULL;
 
1334
                ind->mi_dirty_list = block;
 
1335
                ind->mi_dirty_blocks++;
 
1336
                xt_spinlock_unlock(&ind->mi_dirty_lock);
 
1337
                if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
 
1338
                        ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1339
                        ot->ot_thread->st_statistics.st_ind_cache_dirty++;
 
1340
                }
 
1341
                block->cb_state = IDX_CAC_BLOCK_DIRTY;
 
1342
        }
 
1343
        XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1344
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1345
#ifdef XT_TRACK_INDEX_UPDATES
 
1346
        ot->ot_ind_changed++;
 
1347
#endif
 
1348
        return OK;
 
1349
}
 
1350
 
 
1351
/*
 
1352
 * Update the cache, if in RAM.
 
1353
 */
 
1354
xtPublic xtBool xt_ind_write_cache(XTOpenTablePtr ot, xtIndexNodeID address, size_t size, xtWord1 *data)
 
1355
{
 
1356
        XTIndBlockPtr   block;
 
1357
        DcSegmentPtr    seg;
 
1358
 
 
1359
        if (!ind_cac_get(ot, address, &seg, &block))
 
1360
                return FAILED;
 
1361
 
 
1362
        if (block) {
 
1363
                XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1364
                /* This should only be done to pages that are free, which
 
1365
                 * are not on the dirty list, so they must be clean!
 
1366
                 */
 
1367
                ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1368
                memcpy(block->cb_data, data, size);
 
1369
 
 
1370
                XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1371
                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1372
        }
 
1373
 
 
1374
        return OK;
 
1375
}
 
1376
 
 
1377
xtPublic xtBool xt_ind_get(XTOpenTablePtr ot, xtIndexNodeID address, XTIndReferencePtr iref)
 
1378
{
 
1379
        XTIndBlockPtr   block;
 
1380
        DcSegmentPtr    seg;
 
1381
 
 
1382
        if (!ind_cac_get(ot, address, &seg, &block))
 
1383
                return FAILED;
 
1384
 
 
1385
        if (block) {
 
1386
                XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1387
                ASSERT_NS(IDX_CAC_NOT_FREE(block->cb_state));
 
1388
                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1389
                iref->ir_block = block;
 
1390
                iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
 
1391
        }
 
1392
        else {
 
1393
                iref->ir_block = NULL;
 
1394
                iref->ir_branch = NULL;
 
1395
        }
 
1396
        iref->ir_xlock = TRUE;
 
1397
        iref->ir_updated = FALSE;
 
1398
 
 
1399
        return OK;
 
1400
}
 
1401
 
 
1402
/* 
 
1403
 * Note, this function may only be called if the block has
 
1404
 * been freed.
 
1405
 */
 
1406
xtPublic xtBool xt_ind_free_block(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address)
 
1407
{
 
1408
        XTIndBlockPtr   block;
 
1409
        DcSegmentPtr    seg;
 
1410
 
 
1411
        if (!ind_cac_get(ot, address, &seg, &block))
 
1412
                return FAILED;
 
1413
        if (block) {
 
1414
                XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1415
 
 
1416
                if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
 
1417
                        if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
 
1418
                                XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1419
                                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1420
                                return FAILED;
 
1421
                        }
 
1422
                }
 
1423
 
 
1424
                /* {PAGE-NO-IN-INDEX-FILE}
 
1425
                 * This is the one exeption to the rule that a block
 
1426
                 * that is in the IDX_CAC_BLOCK_LOGGED may be released
 
1427
                 * from the cache!
 
1428
                 */
 
1429
                ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
 
1430
 
 
1431
                if (block->cb_state == IDX_CAC_BLOCK_DIRTY) {
 
1432
                        /* Take the block off the dirty list: */
 
1433
                        xt_spinlock_lock(&ind->mi_dirty_lock);
 
1434
                        if (block->cb_dirty_next)
 
1435
                                block->cb_dirty_next->cb_dirty_prev = block->cb_dirty_prev;
 
1436
                        if (block->cb_dirty_prev)
 
1437
                                block->cb_dirty_prev->cb_dirty_next = block->cb_dirty_next;
 
1438
                        if (ind->mi_dirty_list == block)
 
1439
                                ind->mi_dirty_list = block->cb_dirty_next;
 
1440
                        ind->mi_dirty_blocks--;
 
1441
                        xt_spinlock_unlock(&ind->mi_dirty_lock);
 
1442
                        block->cb_state = IDX_CAC_BLOCK_CLEAN;
 
1443
                        ot->ot_thread->st_statistics.st_ind_cache_dirty--;
 
1444
#ifdef IND_OPT_DATA_WRITTEN
 
1445
                        block->cb_header = FALSE;
 
1446
                        block->cb_min_pos = 0xFFFF;
 
1447
                        block->cb_max_pos = 0;
 
1448
#endif
 
1449
                }
 
1450
                XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
 
1451
 
 
1452
                IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1453
        }
 
1454
 
 
1455
        return OK;
 
1456
}
 
1457
 
 
1458
xtPublic xtBool xt_ind_read_bytes(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
 
1459
{
 
1460
        XTIndBlockPtr   block;
 
1461
        DcSegmentPtr    seg;
 
1462
 
 
1463
        if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
 
1464
                return FAILED;
 
1465
 
 
1466
        XT_IPAGE_READ_LOCK(&block->cb_lock);
 
1467
        memcpy(data, block->cb_data, size);
 
1468
        XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
 
1469
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1470
        return OK;
 
1471
}
 
1472
 
 
1473
xtPublic xtBool xt_ind_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, XTPageLockType ltype, XTIndReferencePtr iref)
 
1474
{
 
1475
        register XTIndBlockPtr  block;
 
1476
        DcSegmentPtr                    seg;
 
1477
        xtWord2                                 branch_size;
 
1478
        u_int                                   rec_size;
 
1479
        xtBool                                  xlock = FALSE;
 
1480
 
 
1481
#ifdef DEBUG
 
1482
        ASSERT_NS(iref->ir_xlock == 2);
 
1483
        ASSERT_NS(iref->ir_xlock == 2);
 
1484
#endif
 
1485
        if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
 
1486
                return FAILED;
 
1487
 
 
1488
        branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
 
1489
        rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
 
1490
        if (rec_size < 2 || rec_size > XT_INDEX_PAGE_SIZE)
 
1491
                goto failed_corrupt;
 
1492
        if (ind->mi_fix_key) {
 
1493
                rec_size -= 2;
 
1494
                if (XT_IS_NODE(branch_size)) {
 
1495
                        if (rec_size != 0) {
 
1496
                                if (rec_size < XT_NODE_REF_SIZE)
 
1497
                                        goto failed_corrupt;
 
1498
                                rec_size -= XT_NODE_REF_SIZE;
 
1499
                                if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE + XT_NODE_REF_SIZE)) != 0)
 
1500
                                        goto failed_corrupt;
 
1501
                        }
 
1502
                }
 
1503
                else {
 
1504
                        if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE)) != 0)
 
1505
                                goto failed_corrupt;
 
1506
                }
 
1507
        }
 
1508
 
 
1509
        switch (ltype) {
 
1510
                case XT_LOCK_READ:
 
1511
                        break;
 
1512
                case XT_LOCK_WRITE:
 
1513
                        xlock = TRUE;
 
1514
                        break;
 
1515
                case XT_XLOCK_LEAF:
 
1516
                        if (!XT_IS_NODE(branch_size))
 
1517
                                xlock = TRUE;
 
1518
                        break;
 
1519
                case XT_XLOCK_DEL_LEAF:
 
1520
                        if (!XT_IS_NODE(branch_size)) {
 
1521
                                if (ot->ot_table->tab_dic.dic_no_lazy_delete)
 
1522
                                        xlock = TRUE;
 
1523
                                else {
 
1524
                                        /*
 
1525
                                         * {LAZY-DEL-INDEX-ITEMS}
 
1526
                                         *
 
1527
                                         * We are fetch a page for delete purpose.
 
1528
                                         * we decide here if we plan to do a lazy delete,
 
1529
                                         * Or if we plan to compact the node.
 
1530
                                         *
 
1531
                                         * A lazy delete just requires a shared lock.
 
1532
                                         *
 
1533
                                         */
 
1534
                                        if (ind->mi_lazy_delete) {
 
1535
                                                /* If the number of deleted items is greater than
 
1536
                                                 * half of the number of times that can fit in the
 
1537
                                                 * page, then we will compact the node.
 
1538
                                                 */
 
1539
                                                if (!xt_idx_lazy_delete_on_leaf(ind, block, XT_GET_INDEX_BLOCK_LEN(branch_size)))
 
1540
                                                        xlock = TRUE;
 
1541
                                        }
 
1542
                                        else
 
1543
                                                xlock = TRUE;
 
1544
                                }
 
1545
                        }
 
1546
                        break;
 
1547
        }
 
1548
 
 
1549
        if ((iref->ir_xlock = xlock))
 
1550
                XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
 
1551
        else
 
1552
                XT_IPAGE_READ_LOCK(&block->cb_lock);
 
1553
 
 
1554
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1555
 
 
1556
        /* {DIRECT-IO}
 
1557
         * Direct I/O requires that the buffer is 512 byte aligned.
 
1558
         * To do this, cb_data is turned into a pointer, instead
 
1559
         * of an array.
 
1560
         * As a result, we need to pass a pointer to both the
 
1561
         * cache block and the cache block data:
 
1562
         */
 
1563
        iref->ir_updated = FALSE;
 
1564
        iref->ir_block = block;
 
1565
        iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
 
1566
        return OK;
 
1567
 
 
1568
        failed_corrupt:
 
1569
        IDX_CAC_UNLOCK(seg, ot->ot_thread);
 
1570
        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name);
 
1571
        return FAILED;
 
1572
}
 
1573
 
 
1574
xtPublic xtBool xt_ind_release(XTOpenTablePtr ot, XTIndexPtr ind, XTPageUnlockType XT_NDEBUG_UNUSED(utype), XTIndReferencePtr iref)
 
1575
{
 
1576
        register XTIndBlockPtr  block;
 
1577
 
 
1578
        block = iref->ir_block;
 
1579
 
 
1580
#ifdef DEBUG
 
1581
        ASSERT_NS(iref->ir_xlock != 2);
 
1582
        ASSERT_NS(iref->ir_updated != 2);
 
1583
        if (iref->ir_updated)
 
1584
                ASSERT_NS(utype == XT_UNLOCK_R_UPDATE || utype == XT_UNLOCK_W_UPDATE);
 
1585
        else
 
1586
                ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_WRITE);
 
1587
        if (iref->ir_xlock)
 
1588
                ASSERT_NS(utype == XT_UNLOCK_WRITE || utype == XT_UNLOCK_W_UPDATE);
 
1589
        else
 
1590
                ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_R_UPDATE);
 
1591
#endif
 
1592
        if (iref->ir_updated) {
 
1593
#ifdef DEBUG
 
1594
#ifdef IND_OPT_DATA_WRITTEN
 
1595
                xtWord2 branch_size;
 
1596
                u_int   rec_size;
 
1597
 
 
1598
                branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
 
1599
                rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
 
1600
 
 
1601
                ASSERT_NS(block->cb_min_pos <= rec_size-2);
 
1602
                ASSERT_NS(block->cb_min_pos <= block->cb_max_pos);
 
1603
                ASSERT_NS(block->cb_max_pos <= rec_size-2);
 
1604
                ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-2);
 
1605
#endif
 
1606
#endif
 
1607
                /* The page was update: */
 
1608
                ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
 
1609
                if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
 
1610
                        TRACK_BLOCK_WRITE(offset);
 
1611
                        xt_spinlock_lock(&ind->mi_dirty_lock);
 
1612
                        if ((block->cb_dirty_next = ind->mi_dirty_list))
 
1613
                                ind->mi_dirty_list->cb_dirty_prev = block;
 
1614
                        block->cb_dirty_prev = NULL;
 
1615
                        ind->mi_dirty_list = block;
 
1616
                        ind->mi_dirty_blocks++;
 
1617
                        xt_spinlock_unlock(&ind->mi_dirty_lock);
 
1618
                        if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
 
1619
                                ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
 
1620
                                ot->ot_thread->st_statistics.st_ind_cache_dirty++;
 
1621
                        }
 
1622
                        block->cb_state = IDX_CAC_BLOCK_DIRTY;
 
1623
                }
 
1624
        }
 
1625
 
 
1626
        XT_IPAGE_UNLOCK(&block->cb_lock, iref->ir_xlock);
 
1627
#ifdef DEBUG
 
1628
        iref->ir_xlock = 2;
 
1629
        iref->ir_updated = 2;
 
1630
#endif
 
1631
        return OK;
 
1632
}
 
1633
 
 
1634
xtPublic xtBool xt_ind_reserve(XTOpenTablePtr ot, u_int count, XTIdxBranchDPtr not_this)
 
1635
{
 
1636
        register XTIndBlockPtr  block;
 
1637
        register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1638
 
 
1639
#ifdef XT_TRACK_INDEX_UPDATES
 
1640
        ot->ot_ind_reserved = count;
 
1641
        ot->ot_ind_reads = 0;
 
1642
#endif
 
1643
#ifdef DEBUG_CHECK_IND_CACHE
 
1644
        xt_ind_check_cache(NULL);
 
1645
#endif
 
1646
        while (ot->ot_ind_res_count < count) {
 
1647
                if (!dcg->cg_free_list) {
 
1648
                        if (!ind_cac_free_lru_blocks(ot, count - ot->ot_ind_res_count, not_this)) {
 
1649
                                if (!dcg->cg_free_list) {
 
1650
                                        xt_ind_free_reserved(ot);
 
1651
                                        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
 
1652
#ifdef DEBUG_CHECK_IND_CACHE
 
1653
                                        xt_ind_check_cache(NULL);
 
1654
#endif
 
1655
                                        return FAILED;
 
1656
                                }
 
1657
                        }
 
1658
                }
 
1659
 
 
1660
                /* Get a free block: */
 
1661
                xt_lock_mutex_ns(&dcg->cg_lock);
 
1662
                while (ot->ot_ind_res_count < count && (block = dcg->cg_free_list)) {
 
1663
                        ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
 
1664
                        dcg->cg_free_list = block->cb_next;
 
1665
                        dcg->cg_free_count--;
 
1666
                        block->cb_next = ot->ot_ind_res_bufs;
 
1667
                        ot->ot_ind_res_bufs = block;
 
1668
                        ot->ot_ind_res_count++;
 
1669
#ifdef DEBUG_CHECK_IND_CACHE
 
1670
                        dcg->cg_reserved_by_ots++;
 
1671
#endif
 
1672
                }
 
1673
                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1674
        }
 
1675
#ifdef DEBUG_CHECK_IND_CACHE
 
1676
        xt_ind_check_cache(NULL);
 
1677
#endif
 
1678
        return OK;
 
1679
}
 
1680
 
 
1681
xtPublic void xt_ind_free_reserved(XTOpenTablePtr ot)
 
1682
{
 
1683
#ifdef DEBUG_CHECK_IND_CACHE
 
1684
        xt_ind_check_cache(NULL);
 
1685
#endif
 
1686
        if (ot->ot_ind_res_bufs) {
 
1687
                register XTIndBlockPtr  block, fblock;
 
1688
                register DcGlobalsRec   *dcg = &ind_cac_globals;
 
1689
 
 
1690
                xt_lock_mutex_ns(&dcg->cg_lock);
 
1691
                block = ot->ot_ind_res_bufs;
 
1692
                while (block) {
 
1693
                        fblock = block;
 
1694
                        block = block->cb_next;
 
1695
 
 
1696
                        fblock->cb_next = dcg->cg_free_list;
 
1697
                        dcg->cg_free_list = fblock;
 
1698
#ifdef DEBUG_CHECK_IND_CACHE
 
1699
                        dcg->cg_reserved_by_ots--;
 
1700
#endif
 
1701
                        dcg->cg_free_count++;
 
1702
                }
 
1703
                xt_unlock_mutex_ns(&dcg->cg_lock);
 
1704
                ot->ot_ind_res_bufs = NULL;
 
1705
                ot->ot_ind_res_count = 0;
 
1706
        }
 
1707
#ifdef DEBUG_CHECK_IND_CACHE
 
1708
        xt_ind_check_cache(NULL);
 
1709
#endif
 
1710
}
 
1711
 
 
1712
xtPublic void xt_ind_unreserve(XTOpenTablePtr ot)
 
1713
{
 
1714
        if (!ind_cac_globals.cg_free_list)
 
1715
                xt_ind_free_reserved(ot);
 
1716
}
 
1717