1
/* Copyright (C) 2005 PrimeBase Technologies GmbH, Germany
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
* 2005-05-24 Paul McCullagh
24
#include "xt_config.h"
37
#include "pthread_xt.h"
38
#include "thread_xt.h"
39
#include "filesys_xt.h"
45
#define XT_TIME_DIFF(start, now) (\
46
((xtWord4) (now) < (xtWord4) (start)) ? \
47
((xtWord4) 0XFFFFFFFF - ((xtWord4) (start) - (xtWord4) (now))) : \
48
((xtWord4) (now) - (xtWord4) (start)))
51
* -----------------------------------------------------------------------
55
#define IDX_CAC_SEGMENT_COUNT ((off_t) 1 << XT_INDEX_CACHE_SEGMENT_SHIFTS)
56
#define IDX_CAC_SEGMENT_MASK (IDX_CAC_SEGMENT_COUNT - 1)
59
#define IDX_CAC_USE_PTHREAD_RW
61
//#define IDX_CAC_USE_PTHREAD_RW
62
#define IDX_CAC_USE_XSMUTEX
63
//#define IDX_USE_SPINXSLOCK
66
#if defined(IDX_CAC_USE_PTHREAD_RW)
67
#define IDX_CAC_LOCK_TYPE xt_rwlock_type
68
#define IDX_CAC_INIT_LOCK(s, i) xt_init_rwlock_with_autoname(s, &(i)->cs_lock)
69
#define IDX_CAC_FREE_LOCK(s, i) xt_free_rwlock(&(i)->cs_lock)
70
#define IDX_CAC_READ_LOCK(i, o) xt_slock_rwlock_ns(&(i)->cs_lock)
71
#define IDX_CAC_WRITE_LOCK(i, o) xt_xlock_rwlock_ns(&(i)->cs_lock)
72
#define IDX_CAC_UNLOCK(i, o) xt_unlock_rwlock_ns(&(i)->cs_lock)
73
#elif defined(IDX_CAC_USE_XSMUTEX)
74
#define IDX_CAC_LOCK_TYPE XTMutexXSLockRec
75
#define IDX_CAC_INIT_LOCK(s, i) xt_xsmutex_init_with_autoname(s, &(i)->cs_lock)
76
#define IDX_CAC_FREE_LOCK(s, i) xt_xsmutex_free(s, &(i)->cs_lock)
77
#define IDX_CAC_READ_LOCK(i, o) xt_xsmutex_slock(&(i)->cs_lock, (o)->t_id)
78
#define IDX_CAC_WRITE_LOCK(i, o) xt_xsmutex_xlock(&(i)->cs_lock, (o)->t_id)
79
#define IDX_CAC_UNLOCK(i, o) xt_xsmutex_unlock(&(i)->cs_lock, (o)->t_id)
80
#elif defined(IDX_CAC_USE_SPINXSLOCK)
81
#define IDX_CAC_LOCK_TYPE XTSpinXSLockRec
82
#define IDX_CAC_INIT_LOCK(s, i) xt_spinxslock_init_with_autoname(s, &(i)->cs_lock)
83
#define IDX_CAC_FREE_LOCK(s, i) xt_spinxslock_free(s, &(i)->cs_lock)
84
#define IDX_CAC_READ_LOCK(i, s) xt_spinxslock_slock(&(i)->cs_lock, (s)->t_id)
85
#define IDX_CAC_WRITE_LOCK(i, s) xt_spinxslock_xlock(&(i)->cs_lock, FALSE, (s)->t_id)
86
#define IDX_CAC_UNLOCK(i, s) xt_spinxslock_unlock(&(i)->cs_lock, (s)->t_id)
88
#error Please define the lock type
92
#define ID_HANDLE_USE_PTHREAD_RW
94
//#define ID_HANDLE_USE_PTHREAD_RW
95
#define ID_HANDLE_USE_SPINLOCK
98
#if defined(ID_HANDLE_USE_PTHREAD_RW)
99
#define ID_HANDLE_LOCK_TYPE xt_mutex_type
100
#define ID_HANDLE_INIT_LOCK(s, i) xt_init_mutex_with_autoname(s, i)
101
#define ID_HANDLE_FREE_LOCK(s, i) xt_free_mutex(i)
102
#define ID_HANDLE_LOCK(i) xt_lock_mutex_ns(i)
103
#define ID_HANDLE_UNLOCK(i) xt_unlock_mutex_ns(i)
104
#elif defined(ID_HANDLE_USE_SPINLOCK)
105
#define ID_HANDLE_LOCK_TYPE XTSpinLockRec
106
#define ID_HANDLE_INIT_LOCK(s, i) xt_spinlock_init_with_autoname(s, i)
107
#define ID_HANDLE_FREE_LOCK(s, i) xt_spinlock_free(s, i)
108
#define ID_HANDLE_LOCK(i) xt_spinlock_lock(i)
109
#define ID_HANDLE_UNLOCK(i) xt_spinlock_unlock(i)
112
#define XT_HANDLE_SLOTS 37
116
#define XT_INIT_HANDLE_COUNT 0
117
#define XT_INIT_HANDLE_BLOCKS 0
119
#define XT_INIT_HANDLE_COUNT 40
120
#define XT_INIT_HANDLE_BLOCKS 10
124
/* A disk cache segment. The cache is divided into a number of segments
125
* to improve concurrency.
127
typedef struct DcSegment {
128
IDX_CAC_LOCK_TYPE cs_lock; /* The cache segment lock. */
129
XTIndBlockPtr *cs_hash_table;
130
} DcSegmentRec, *DcSegmentPtr;
132
typedef struct DcHandleSlot {
133
ID_HANDLE_LOCK_TYPE hs_handles_lock;
134
XTIndHandleBlockPtr hs_free_blocks;
135
XTIndHandlePtr hs_free_handles;
136
XTIndHandlePtr hs_used_handles;
137
} DcHandleSlotRec, *DcHandleSlotPtr;
139
typedef struct DcGlobals {
140
xt_mutex_type cg_lock; /* The public cache lock. */
141
DcSegmentRec cg_segment[IDX_CAC_SEGMENT_COUNT];
142
XTIndBlockPtr cg_blocks;
143
#ifdef XT_USE_DIRECT_IO_ON_INDEX
146
XTIndBlockPtr cg_free_list;
147
xtWord4 cg_free_count;
148
xtWord4 cg_ru_now; /* A counter as described by Jim Starkey (my thanks) */
149
XTIndBlockPtr cg_lru_block;
150
XTIndBlockPtr cg_mru_block;
151
xtWord4 cg_hash_size;
152
xtWord4 cg_block_count;
154
#ifdef DEBUG_CHECK_IND_CACHE
155
u_int cg_reserved_by_ots; /* Number of blocks reserved by open tables. */
156
u_int cg_read_count; /* Number of blocks being read. */
159
/* Index cache handles: */
160
DcHandleSlotRec cg_handle_slot[XT_HANDLE_SLOTS];
163
static DcGlobalsRec ind_cac_globals;
169
#include "my_global.h"
171
#include "keycache.h"
173
#undef pthread_rwlock_rdlock
174
#undef pthread_rwlock_wrlock
175
#undef pthread_rwlock_try_wrlock
176
#undef pthread_rwlock_unlock
177
#undef pthread_mutex_lock
178
#undef pthread_mutex_unlock
179
#undef pthread_cond_wait
180
#undef pthread_cond_broadcast
186
* -----------------------------------------------------------------------
187
* INDEX CACHE HANDLES
190
static XTIndHandlePtr ind_alloc_handle()
192
XTIndHandlePtr handle;
194
if (!(handle = (XTIndHandlePtr) xt_calloc_ns(sizeof(XTIndHandleRec))))
196
xt_spinlock_init_with_autoname(NULL, &handle->ih_lock);
200
static void ind_free_handle(XTIndHandlePtr handle)
202
xt_spinlock_free(NULL, &handle->ih_lock);
206
static void ind_handle_exit(XTThreadPtr self)
209
XTIndHandlePtr handle;
210
XTIndHandleBlockPtr hptr;
212
for (int i=0; i<XT_HANDLE_SLOTS; i++) {
213
hs = &ind_cac_globals.cg_handle_slot[i];
215
while (hs->hs_used_handles) {
216
handle = hs->hs_used_handles;
217
xt_ind_release_handle(handle, FALSE, self);
220
while (hs->hs_free_blocks) {
221
hptr = hs->hs_free_blocks;
222
hs->hs_free_blocks = hptr->hb_next;
226
while (hs->hs_free_handles) {
227
handle = hs->hs_free_handles;
228
hs->hs_free_handles = handle->ih_next;
229
ind_free_handle(handle);
232
ID_HANDLE_FREE_LOCK(self, &hs->hs_handles_lock);
236
static void ind_handle_init(XTThreadPtr self)
240
for (int i=0; i<XT_HANDLE_SLOTS; i++) {
241
hs = &ind_cac_globals.cg_handle_slot[i];
242
memset(hs, 0, sizeof(DcHandleSlotRec));
243
ID_HANDLE_INIT_LOCK(self, &hs->hs_handles_lock);
247
//#define CHECK_HANDLE_STRUCTS
249
#ifdef CHECK_HANDLE_STRUCTS
250
static int gdummy = 0;
252
static void ic_stop_here()
255
printf("Nooo %d!\n", gdummy);
258
static void ic_check_handle_structs()
260
XTIndHandlePtr handle, phandle;
261
XTIndHandleBlockPtr hptr, phptr;
266
handle = ind_cac_globals.cg_used_handles;
268
if (handle == phandle)
270
if (handle->ih_prev != phandle)
272
if (handle->ih_cache_reference) {
273
ctest = handle->x.ih_cache_block->cb_handle_count;
274
if (ctest == 0 || ctest > 100)
278
ctest = handle->x.ih_handle_block->hb_ref_count;
279
if (ctest == 0 || ctest > 100)
283
handle = handle->ih_next;
290
hptr = ind_cac_globals.cg_free_blocks;
295
hptr = hptr->hb_next;
302
handle = ind_cac_globals.cg_free_handles;
304
if (handle == phandle)
307
handle = handle->ih_next;
316
* Get a handle to the index block.
317
* This function is called by index scanners (readers).
319
xtPublic XTIndHandlePtr xt_ind_get_handle(XTOpenTablePtr ot, XTIndexPtr ind, XTIndReferencePtr iref)
322
XTIndHandlePtr handle;
324
hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
326
ASSERT_NS(iref->ir_xlock == FALSE);
327
ASSERT_NS(iref->ir_updated == FALSE);
328
ID_HANDLE_LOCK(&hs->hs_handles_lock);
329
#ifdef CHECK_HANDLE_STRUCTS
330
ic_check_handle_structs();
332
if ((handle = hs->hs_free_handles))
333
hs->hs_free_handles = handle->ih_next;
335
if (!(handle = ind_alloc_handle())) {
336
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
337
xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
341
if (hs->hs_used_handles)
342
hs->hs_used_handles->ih_prev = handle;
343
handle->ih_next = hs->hs_used_handles;
344
handle->ih_prev = NULL;
345
handle->ih_address = iref->ir_block->cb_address;
346
handle->ih_cache_reference = TRUE;
347
handle->x.ih_cache_block = iref->ir_block;
348
handle->ih_branch = iref->ir_branch;
349
/* {HANDLE-COUNT-USAGE}
350
* This is safe because:
352
* I have an Slock on the cache block, and I have
353
* at least an Slock on the index.
354
* So this excludes anyone who is reading
355
* cb_handle_count in the index.
356
* (all cache block writers, and the freeer).
358
* The increment is safe because I have the list
359
* lock (hs_handles_lock), which is required by anyone else
360
* who increments or decrements this value.
362
iref->ir_block->cb_handle_count++;
363
hs->hs_used_handles = handle;
364
#ifdef CHECK_HANDLE_STRUCTS
365
ic_check_handle_structs();
367
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
368
xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
372
xtPublic void xt_ind_release_handle(XTIndHandlePtr handle, xtBool have_lock, XTThreadPtr thread)
375
XTIndBlockPtr block = NULL;
377
DcSegmentPtr seg = NULL;
378
XTIndBlockPtr xblock;
380
(void) thread; /*DRIZZLED*/
382
/* The lock order is:
383
* 1. Cache segment (cs_lock) - This is only by ind_free_block()!
384
* 1. S/Slock cache block (cb_lock)
385
* 2. List lock (cg_handles_lock).
386
* 3. Handle lock (ih_lock)
389
xt_spinlock_lock(&handle->ih_lock);
391
/* Get the lock on the cache page if required: */
392
if (handle->ih_cache_reference) {
394
xtIndexNodeID address;
396
block = handle->x.ih_cache_block;
398
file_id = block->cb_file_id;
399
address = block->cb_address;
400
hash_idx = XT_NODE_ID(address) + (file_id * 223);
401
seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
402
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
405
xt_spinlock_unlock(&handle->ih_lock);
407
/* Because of the lock order, I have to release the
408
* handle before I get a lock on the cache block.
410
* But, by doing this, this cache block may be gone!
413
IDX_CAC_READ_LOCK(seg, thread);
414
xblock = seg->cs_hash_table[hash_idx];
416
if (block == xblock) {
417
/* Found the block...
418
* {HANDLE-COUNT-SLOCK}
419
* 04.05.2009, changed to slock.
420
* The xlock causes too much contention
421
* on the cache block for read only loads.
426
XT_IPAGE_READ_LOCK(&block->cb_lock);
429
xblock = xblock->cb_next;
433
IDX_CAC_UNLOCK(seg, thread);
436
hs = &ind_cac_globals.cg_handle_slot[handle->ih_address % XT_HANDLE_SLOTS];
438
ID_HANDLE_LOCK(&hs->hs_handles_lock);
439
#ifdef CHECK_HANDLE_STRUCTS
440
ic_check_handle_structs();
443
/* I don't need to lock the handle because I have locked
444
* the list, and no other thread can change the
445
* handle without first getting a lock on the list.
447
* In addition, the caller is the only owner of the
448
* handle, and the only thread with an independent
449
* reference to the handle.
450
* All other access occur over the list.
453
/* Remove the reference to the cache or a handle block: */
454
if (handle->ih_cache_reference) {
455
ASSERT_NS(block == handle->x.ih_cache_block);
456
ASSERT_NS(block && block->cb_handle_count > 0);
457
/* {HANDLE-COUNT-USAGE}
458
* This is safe here because I have excluded
459
* all readers by taking an Xlock on the
460
* cache block (CHANGED - see below).
462
* {HANDLE-COUNT-SLOCK}
463
* 04.05.2009, changed to slock.
464
* Should be OK, because:
465
* A have a lock on the list lock (hs_handles_lock),
466
* which prevents concurrent updates to cb_handle_count.
468
* I have also have a read lock on the cache block
469
* but not a lock on the index. As a result, we cannot
470
* excluded all index writers (and readers of
473
block->cb_handle_count--;
476
XTIndHandleBlockPtr hptr = handle->x.ih_handle_block;
478
ASSERT_NS(!handle->ih_cache_reference);
479
ASSERT_NS(hptr->hb_ref_count > 0);
480
hptr->hb_ref_count--;
481
if (!hptr->hb_ref_count) {
482
/* Put it back on the free list: */
483
hptr->hb_next = hs->hs_free_blocks;
484
hs->hs_free_blocks = hptr;
488
/* Unlink the handle: */
490
handle->ih_next->ih_prev = handle->ih_prev;
492
handle->ih_prev->ih_next = handle->ih_next;
493
if (hs->hs_used_handles == handle)
494
hs->hs_used_handles = handle->ih_next;
496
/* Put it on the free list: */
497
handle->ih_next = hs->hs_free_handles;
498
hs->hs_free_handles = handle;
500
#ifdef CHECK_HANDLE_STRUCTS
501
ic_check_handle_structs();
503
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
506
XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
509
/* Call this function before a referenced cache block is modified!
510
* This function is called by index updaters.
512
xtPublic xtBool xt_ind_copy_on_write(XTIndReferencePtr iref)
515
XTIndHandleBlockPtr hptr;
517
XTIndHandlePtr handle;
520
hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
522
ID_HANDLE_LOCK(&hs->hs_handles_lock);
524
/* {HANDLE-COUNT-USAGE}
525
* This is only called by updaters of this index block, or
526
* the free which holds an Xlock on the index block.
527
* These are all mutually exclusive for the index block.
529
* {HANDLE-COUNT-SLOCK}
530
* Do this check again, after we have the list lock (hs_handles_lock).
531
* There is a small chance that the count has changed, since we last
532
* checked because xt_ind_release_handle() only holds
533
* an slock on the index page.
535
* An updater can sometimes have a XLOCK on the index and an slock
536
* on the cache block. In this case xt_ind_release_handle()
537
* could have run through.
539
if (!iref->ir_block->cb_handle_count) {
540
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
544
#ifdef CHECK_HANDLE_STRUCTS
545
ic_check_handle_structs();
547
if ((hptr = hs->hs_free_blocks))
548
hs->hs_free_blocks = hptr->hb_next;
550
if (!(hptr = (XTIndHandleBlockPtr) xt_malloc_ns(sizeof(XTIndHandleBlockRec)))) {
551
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
556
branch_size = XT_GET_INDEX_BLOCK_LEN(XT_GET_DISK_2(iref->ir_branch->tb_size_2));
557
memcpy(&hptr->hb_branch, iref->ir_branch, branch_size);
558
hptr->hb_ref_count = iref->ir_block->cb_handle_count;
560
handle = hs->hs_used_handles;
562
if (handle->ih_branch == iref->ir_branch) {
564
xt_spinlock_lock(&handle->ih_lock);
565
ASSERT_NS(handle->ih_cache_reference);
566
handle->ih_cache_reference = FALSE;
567
handle->x.ih_handle_block = hptr;
568
handle->ih_branch = &hptr->hb_branch;
569
xt_spinlock_unlock(&handle->ih_lock);
571
if (i == hptr->hb_ref_count)
575
handle = handle->ih_next;
578
ASSERT_NS(hptr->hb_ref_count == i);
580
/* {HANDLE-COUNT-USAGE}
581
* It is safe to modify cb_handle_count when I have the
582
* list lock, and I have excluded all readers!
584
iref->ir_block->cb_handle_count = 0;
585
#ifdef CHECK_HANDLE_STRUCTS
586
ic_check_handle_structs();
588
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
593
xtPublic void xt_ind_lock_handle(XTIndHandlePtr handle)
595
xt_spinlock_lock(&handle->ih_lock);
598
xtPublic void xt_ind_unlock_handle(XTIndHandlePtr handle)
600
xt_spinlock_unlock(&handle->ih_lock);
604
* -----------------------------------------------------------------------
609
* Initialize the disk cache.
611
xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size)
616
init_key_cache(&my_cache, 1024, cache_size, 100, 300);
618
/* Memory is devoted to the page data alone, I no longer count the size of the directory,
619
* or the page overhead: */
620
ind_cac_globals.cg_block_count = cache_size / XT_INDEX_PAGE_SIZE;
621
ind_cac_globals.cg_hash_size = ind_cac_globals.cg_block_count / (IDX_CAC_SEGMENT_COUNT >> 1);
622
ind_cac_globals.cg_max_free = ind_cac_globals.cg_block_count / 10;
623
if (ind_cac_globals.cg_max_free < 8)
624
ind_cac_globals.cg_max_free = 8;
625
if (ind_cac_globals.cg_max_free > 128)
626
ind_cac_globals.cg_max_free = 128;
629
for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
630
ind_cac_globals.cg_segment[i].cs_hash_table = (XTIndBlockPtr *) xt_calloc(self, ind_cac_globals.cg_hash_size * sizeof(XTIndBlockPtr));
631
IDX_CAC_INIT_LOCK(self, &ind_cac_globals.cg_segment[i]);
634
block = (XTIndBlockPtr) xt_malloc(self, ind_cac_globals.cg_block_count * sizeof(XTIndBlockRec));
635
ind_cac_globals.cg_blocks = block;
636
xt_init_mutex_with_autoname(self, &ind_cac_globals.cg_lock);
637
#ifdef XT_USE_DIRECT_IO_ON_INDEX
642
size_t psize = getpagesize();
646
buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE));
647
diff = (size_t) buffer % psize;
649
xt_free(self, buffer);
650
buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE) + psize);
651
diff = (size_t) buffer % psize;
655
ind_cac_globals.cg_buffer = buffer;
659
for (u_int i=0; i<ind_cac_globals.cg_block_count; i++) {
660
XT_IPAGE_INIT_LOCK(self, &block->cb_lock);
661
block->cb_state = IDX_CAC_BLOCK_FREE;
662
block->cb_next = ind_cac_globals.cg_free_list;
663
#ifdef XT_USE_DIRECT_IO_ON_INDEX
664
block->cb_data = buffer;
665
buffer += XT_INDEX_PAGE_SIZE;
667
#ifdef CHECK_BLOCK_TRAILERS
668
XT_SET_DISK_4(block->cp_check, 0xDEADBEEF);
670
ind_cac_globals.cg_free_list = block;
673
ind_cac_globals.cg_free_count = ind_cac_globals.cg_block_count;
674
#ifdef DEBUG_CHECK_IND_CACHE
675
ind_cac_globals.cg_reserved_by_ots = 0;
677
ind_handle_init(self);
686
#ifdef CHECK_BLOCK_TRAILERS
687
xtPublic void check_block_trailers()
691
block = ind_cac_globals.cg_blocks;
692
for (u_int i=0; i<ind_cac_globals.cg_block_count; i++) {
693
ASSERT_NS(XT_GET_DISK_4(block->cp_check) == 0xDEADBEEF);
699
xtPublic void xt_ind_exit(XTThreadPtr self)
702
end_key_cache(&my_cache, 1);
704
for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
705
if (ind_cac_globals.cg_segment[i].cs_hash_table) {
706
xt_free(self, ind_cac_globals.cg_segment[i].cs_hash_table);
707
ind_cac_globals.cg_segment[i].cs_hash_table = NULL;
708
IDX_CAC_FREE_LOCK(self, &ind_cac_globals.cg_segment[i]);
712
/* Must be done before freeing the blocks! */
713
ind_handle_exit(self);
715
if (ind_cac_globals.cg_blocks) {
716
xt_free(self, ind_cac_globals.cg_blocks);
717
ind_cac_globals.cg_blocks = NULL;
718
xt_free_mutex(&ind_cac_globals.cg_lock);
720
#ifdef XT_USE_DIRECT_IO_ON_INDEX
721
if (ind_cac_globals.cg_buffer) {
722
xt_free(self, ind_cac_globals.cg_buffer);
723
ind_cac_globals.cg_buffer = NULL;
727
memset(&ind_cac_globals, 0, sizeof(ind_cac_globals));
730
xtPublic xtInt8 xt_ind_get_usage()
734
size = (xtInt8) (ind_cac_globals.cg_block_count - ind_cac_globals.cg_free_count) * (xtInt8) XT_INDEX_PAGE_SIZE;
738
xtPublic xtInt8 xt_ind_get_size()
742
size = (xtInt8) ind_cac_globals.cg_block_count * (xtInt8) XT_INDEX_PAGE_SIZE;
746
xtPublic u_int xt_ind_get_blocks()
748
return ind_cac_globals.cg_block_count;
752
* -----------------------------------------------------------------------
756
xtPublic void xt_ind_check_cache(XTIndexPtr ind)
759
u_int free_count, inuse_count, clean_count;
760
xtBool check_count = FALSE;
762
if (ind == (XTIndex *) 1) {
767
// Check the dirty list:
771
block = ind->mi_dirty_list;
774
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_DIRTY);
775
block = block->cb_dirty_next;
777
ASSERT_NS(ind->mi_dirty_blocks == cnt);
780
xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
782
// Check the free list:
784
block = ind_cac_globals.cg_free_list;
787
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
788
block = block->cb_next;
790
ASSERT_NS(ind_cac_globals.cg_free_count == free_count);
792
/* Check the LRU list: */
793
XTIndBlockPtr list_block, plist_block;
796
list_block = ind_cac_globals.cg_lru_block;
798
ASSERT_NS(ind_cac_globals.cg_mru_block != NULL);
799
ASSERT_NS(ind_cac_globals.cg_mru_block->cb_mr_used == NULL);
800
ASSERT_NS(list_block->cb_lr_used == NULL);
805
ASSERT_NS(IDX_CAC_NOT_FREE(list_block->cb_state));
806
if (list_block->cb_state == IDX_CAC_BLOCK_CLEAN)
808
ASSERT_NS(block != list_block);
809
ASSERT_NS(list_block->cb_lr_used == plist_block);
810
plist_block = list_block;
811
list_block = list_block->cb_mr_used;
813
ASSERT_NS(ind_cac_globals.cg_mru_block == plist_block);
818
ASSERT_NS(ind_cac_globals.cg_mru_block == NULL);
821
#ifdef DEBUG_CHECK_IND_CACHE
822
ASSERT_NS(free_count + inuse_count + ind_cac_globals.cg_reserved_by_ots + ind_cac_globals.cg_read_count == ind_cac_globals.cg_block_count);
824
xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
826
/* We have just flushed, check how much is now free/clean. */
827
if (free_count + clean_count < 10) {
828
/* This could be a problem: */
829
printf("Cache very low!\n");
835
* -----------------------------------------------------------------------
836
* FREEING INDEX CACHE
840
* This function return TRUE if the block is freed.
841
* This function returns FALSE if the block cannot be found, or the
842
* block is not clean.
844
* We also return FALSE if we cannot copy the block to the handle
845
* (if this is required). This will be due to out-of-memory!
847
static xtBool ind_free_block(XTOpenTablePtr ot, XTIndBlockPtr block)
849
XTIndBlockPtr xblock, pxblock;
852
xtIndexNodeID address;
855
(void) ot; /*DRIZZLED*/
857
#ifdef DEBUG_CHECK_IND_CACHE
858
xt_ind_check_cache(NULL);
860
file_id = block->cb_file_id;
861
address = block->cb_address;
863
hash_idx = XT_NODE_ID(address) + (file_id * 223);
864
seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
865
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
867
IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
870
xblock = seg->cs_hash_table[hash_idx];
872
if (block == xblock) {
873
/* Found the block... */
874
/* It is possible that a thread enters this code holding a
875
* lock on a page. This can cause a deadlock:
877
* #0 0x91faa2ce in semaphore_wait_signal_trap
878
* #1 0x91fb1da5 in pthread_mutex_lock
879
* #2 0x00e2ec13 in xt_p_mutex_lock at pthread_xt.cc:544
880
* #3 0x00e6c30a in xt_xsmutex_xlock at lock_xt.cc:1547
881
* #4 0x00dee402 in ind_free_block at cache_xt.cc:879
882
* #5 0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
883
* #6 0x00def8d1 in xt_ind_reserve at cache_xt.cc:1513
884
* #7 0x00e22118 in xt_idx_insert at index_xt.cc:2047
885
* #8 0x00e4d7ee in xt_tab_new_record at table_xt.cc:4702
886
* #9 0x00e0ff0b in ha_pbxt::write_row at ha_pbxt.cc:2340
887
* #10 0x0023a00f in handler::ha_write_row at handler.cc:4570
888
* #11 0x001a32c8 in write_record at sql_insert.cc:1568
889
* #12 0x001ab635 in insert_query at sql_insert.cc:812
890
* #13 0x0010e068 in mysql_execute_command at sql_parse.cc:3066
891
* #14 0x0011480d in mysql_parse at sql_parse.cc:5787
892
* #15 0x00115afb in dispatch_command at sql_parse.cc:1200
893
* #16 0x00116de2 in do_command at sql_parse.cc:857
894
* #17 0x00101ee4 in handle_one_connection at sql_connect.cc:1115
895
* #18 0x91fdb155 in _pthread_start
896
* #19 0x91fdb012 in thread_start
898
* #0 0x91fb146e in __semwait_signal
899
* #1 0x91fb12ef in nanosleep$UNIX2003
900
* #2 0x91fb1236 in usleep$UNIX2003
901
* #3 0x00e52112 in xt_yield at thread_xt.cc:1274
902
* #4 0x00e6c0eb in xt_spinxslock_xlock at lock_xt.cc:1456
903
* #5 0x00dee444 in ind_free_block at cache_xt.cc:886
904
* #6 0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
905
* #7 0x00deeaf0 in ind_cac_fetch at cache_xt.cc:1130
906
* #8 0x00def604 in xt_ind_fetch at cache_xt.cc:1386
907
* #9 0x00e2159a in xt_idx_update_row_id at index_xt.cc:2489
908
* #10 0x00e603c8 in xn_sw_clean_indices at xaction_xt.cc:1932
909
* #11 0x00e606d4 in xn_sw_cleanup_variation at xaction_xt.cc:2056
910
* #12 0x00e60e29 in xn_sw_cleanup_xact at xaction_xt.cc:2276
911
* #13 0x00e615ed in xn_sw_main at xaction_xt.cc:2433
912
* #14 0x00e61919 in xn_sw_run_thread at xaction_xt.cc:2564
913
* #15 0x00e53f80 in thr_main at thread_xt.cc:1017
914
* #16 0x91fdb155 in _pthread_start
915
* #17 0x91fdb012 in thread_start
917
* So we back off if a lock is held!
919
if (!XT_IPAGE_WRITE_TRY_LOCK(&block->cb_lock, ot->ot_thread->t_id)) {
920
IDX_CAC_UNLOCK(seg, ot->ot_thread);
921
#ifdef DEBUG_CHECK_IND_CACHE
922
xt_ind_check_cache(NULL);
926
if (block->cb_state != IDX_CAC_BLOCK_CLEAN) {
927
/* This block cannot be freeed: */
928
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
929
IDX_CAC_UNLOCK(seg, ot->ot_thread);
930
#ifdef DEBUG_CHECK_IND_CACHE
931
xt_ind_check_cache(NULL);
939
xblock = xblock->cb_next;
942
IDX_CAC_UNLOCK(seg, ot->ot_thread);
944
/* Not found (this can happen, if block was freed by another thread) */
945
#ifdef DEBUG_CHECK_IND_CACHE
946
xt_ind_check_cache(NULL);
952
/* If the block is reference by a handle, then we
953
* have to copy the data to the handle before we
956
/* {HANDLE-COUNT-USAGE}
957
* This access is safe because:
959
* We have an Xlock on the cache block, which excludes
960
* all other writers that want to change the cache block
961
* and also all readers of the cache block, because
962
* they all have at least an Slock on the cache block.
964
if (block->cb_handle_count) {
965
XTIndReferenceRec iref;
967
iref.ir_xlock = TRUE;
968
iref.ir_updated = FALSE;
969
iref.ir_block = block;
970
iref.ir_branch = (XTIdxBranchDPtr) block->cb_data;
971
if (!xt_ind_copy_on_write(&iref)) {
972
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
977
/* Block is clean, remove from the hash table: */
979
pxblock->cb_next = block->cb_next;
981
seg->cs_hash_table[hash_idx] = block->cb_next;
983
xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
985
/* Remove from the MRU list: */
986
if (ind_cac_globals.cg_lru_block == block)
987
ind_cac_globals.cg_lru_block = block->cb_mr_used;
988
if (ind_cac_globals.cg_mru_block == block)
989
ind_cac_globals.cg_mru_block = block->cb_lr_used;
991
/* Note, I am updating blocks for which I have no lock
992
* here. But I think this is OK because I have a lock
995
if (block->cb_lr_used)
996
block->cb_lr_used->cb_mr_used = block->cb_mr_used;
997
if (block->cb_mr_used)
998
block->cb_mr_used->cb_lr_used = block->cb_lr_used;
1000
/* The block is now free: */
1001
block->cb_next = ind_cac_globals.cg_free_list;
1002
ind_cac_globals.cg_free_list = block;
1003
ind_cac_globals.cg_free_count++;
1004
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1005
block->cb_state = IDX_CAC_BLOCK_FREE;
1006
IDX_TRACE("%d- f%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(block->cb_data));
1008
/* Unlock BEFORE the block is reused! */
1009
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1011
xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
1013
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1015
#ifdef DEBUG_CHECK_IND_CACHE
1016
xt_ind_check_cache(NULL);
1021
#define IND_CACHE_MAX_BLOCKS_TO_FREE 100
1024
* Return the number of blocks freed.
1026
* The idea is to grab a list of blocks to free.
1027
* The list consists of the LRU blocks that are
1030
* Free as many as possible (up to max of blocks_required)
1031
* from the list, even if LRU position has changed
1032
* (or we have a race if there are too few blocks).
1033
* However, if the block cannot be found, or is dirty
1036
* Repeat until we find no blocks for the list, or
1037
* we have freed 'blocks_required'.
1039
* 'not_this' is a block that must not be freed because
1040
* it is locked by the calling thread!
1042
static u_int ind_cac_free_lru_blocks(XTOpenTablePtr ot, u_int blocks_required, XTIdxBranchDPtr not_this)
1044
register DcGlobalsRec *dcg = &ind_cac_globals;
1045
XTIndBlockPtr to_free[IND_CACHE_MAX_BLOCKS_TO_FREE];
1047
XTIndBlockPtr block;
1048
u_int blocks_freed = 0;
1049
XTIndBlockPtr locked_block;
1051
#ifdef XT_USE_DIRECT_IO_ON_INDEX
1052
#error This will not work!
1054
locked_block = (XTIndBlockPtr) ((xtWord1 *) not_this - offsetof(XTIndBlockRec, cb_data));
1057
xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
1058
block = dcg->cg_lru_block;
1060
while (block && count < IND_CACHE_MAX_BLOCKS_TO_FREE) {
1061
if (block != locked_block && block->cb_state == IDX_CAC_BLOCK_CLEAN) {
1062
to_free[count] = block;
1065
block = block->cb_mr_used;
1067
xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
1070
return blocks_freed;
1072
for (int i=0; i<count; i++) {
1073
if (ind_free_block(ot, to_free[i]))
1075
if (blocks_freed >= blocks_required &&
1076
ind_cac_globals.cg_free_count >= ind_cac_globals.cg_max_free + blocks_required)
1077
return blocks_freed;
1084
* -----------------------------------------------------------------------
1085
* MAIN CACHE FUNCTIONS
1089
* Fetch the block. Note, if we are about to write the block
1090
* then there is no need to read it from disk!
1092
static XTIndBlockPtr ind_cac_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, DcSegmentPtr *ret_seg, xtBool read_data)
1094
register XTOpenFilePtr file = ot->ot_ind_file;
1095
register XTIndBlockPtr block, new_block;
1096
register DcSegmentPtr seg;
1097
register u_int hash_idx;
1098
register DcGlobalsRec *dcg = &ind_cac_globals;
1101
#ifdef DEBUG_CHECK_IND_CACHE
1102
xt_ind_check_cache(NULL);
1104
/* Address, plus file ID multiplied by my favorite prime number! */
1105
hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
1106
seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
1107
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
1109
IDX_CAC_READ_LOCK(seg, ot->ot_thread);
1110
block = seg->cs_hash_table[hash_idx];
1112
if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
1113
ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
1115
/* Check how recently this page has been used: */
1116
if (XT_TIME_DIFF(block->cb_ru_time, dcg->cg_ru_now) > (dcg->cg_block_count >> 1)) {
1117
xt_lock_mutex_ns(&dcg->cg_lock);
1119
/* Move to the front of the MRU list: */
1120
block->cb_ru_time = ++dcg->cg_ru_now;
1121
if (dcg->cg_mru_block != block) {
1122
/* Remove from the MRU list: */
1123
if (dcg->cg_lru_block == block)
1124
dcg->cg_lru_block = block->cb_mr_used;
1125
if (block->cb_lr_used)
1126
block->cb_lr_used->cb_mr_used = block->cb_mr_used;
1127
if (block->cb_mr_used)
1128
block->cb_mr_used->cb_lr_used = block->cb_lr_used;
1130
/* Make the block the most recently used: */
1131
if ((block->cb_lr_used = dcg->cg_mru_block))
1132
dcg->cg_mru_block->cb_mr_used = block;
1133
block->cb_mr_used = NULL;
1134
dcg->cg_mru_block = block;
1135
if (!dcg->cg_lru_block)
1136
dcg->cg_lru_block = block;
1139
xt_unlock_mutex_ns(&dcg->cg_lock);
1143
#ifdef DEBUG_CHECK_IND_CACHE
1144
xt_ind_check_cache(NULL);
1146
ot->ot_thread->st_statistics.st_ind_cache_hit++;
1149
block = block->cb_next;
1152
/* Block not found... */
1153
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1155
/* Check the open table reserve list first: */
1156
if ((new_block = ot->ot_ind_res_bufs)) {
1157
ot->ot_ind_res_bufs = new_block->cb_next;
1158
ot->ot_ind_res_count--;
1159
#ifdef DEBUG_CHECK_IND_CACHE
1160
xt_lock_mutex_ns(&dcg->cg_lock);
1161
dcg->cg_reserved_by_ots--;
1162
dcg->cg_read_count++;
1163
xt_unlock_mutex_ns(&dcg->cg_lock);
1165
goto use_free_block;
1169
if (!dcg->cg_free_list) {
1170
if (!ind_cac_free_lru_blocks(ot, 1, NULL)) {
1171
if (!dcg->cg_free_list) {
1172
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
1173
#ifdef DEBUG_CHECK_IND_CACHE
1174
xt_ind_check_cache(NULL);
1181
/* Get a free block: */
1182
xt_lock_mutex_ns(&dcg->cg_lock);
1183
if (!(new_block = dcg->cg_free_list)) {
1184
xt_unlock_mutex_ns(&dcg->cg_lock);
1185
goto free_some_blocks;
1187
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
1188
dcg->cg_free_list = new_block->cb_next;
1189
dcg->cg_free_count--;
1190
#ifdef DEBUG_CHECK_IND_CACHE
1191
dcg->cg_read_count++;
1193
xt_unlock_mutex_ns(&dcg->cg_lock);
1196
new_block->cb_address = address;
1197
new_block->cb_file_id = file->fr_id;
1198
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
1199
new_block->cb_state = IDX_CAC_BLOCK_CLEAN;
1200
new_block->cb_handle_count = 0;
1201
new_block->cp_del_count = 0;
1202
new_block->cb_dirty_next = NULL;
1203
new_block->cb_dirty_prev = NULL;
1204
#ifdef IND_OPT_DATA_WRITTEN
1205
new_block->cb_header = FALSE;
1206
new_block->cb_min_pos = 0xFFFF;
1207
new_block->cb_max_pos = 0;
1211
if (!xt_pread_file(file, xt_ind_node_to_offset(ot->ot_table, address), XT_INDEX_PAGE_SIZE, 0, new_block->cb_data, &red_size, &ot->ot_thread->st_statistics.st_ind, ot->ot_thread)) {
1212
xt_lock_mutex_ns(&dcg->cg_lock);
1213
new_block->cb_next = dcg->cg_free_list;
1214
dcg->cg_free_list = new_block;
1215
dcg->cg_free_count++;
1216
#ifdef DEBUG_CHECK_IND_CACHE
1217
dcg->cg_read_count--;
1219
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
1220
new_block->cb_state = IDX_CAC_BLOCK_FREE;
1221
IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
1222
xt_unlock_mutex_ns(&dcg->cg_lock);
1223
#ifdef DEBUG_CHECK_IND_CACHE
1224
xt_ind_check_cache(NULL);
1228
IDX_TRACE("%d- R%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
1229
ot->ot_thread->st_statistics.st_ind_cache_miss++;
1233
// PMC - I don't think this is required! memset(new_block->cb_data + red_size, 0, XT_INDEX_PAGE_SIZE - red_size);
1235
IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
1236
block = seg->cs_hash_table[hash_idx];
1238
if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
1239
/* Oops, someone else was faster! */
1240
xt_lock_mutex_ns(&dcg->cg_lock);
1241
new_block->cb_next = dcg->cg_free_list;
1242
dcg->cg_free_list = new_block;
1243
dcg->cg_free_count++;
1244
#ifdef DEBUG_CHECK_IND_CACHE
1245
dcg->cg_read_count--;
1247
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
1248
new_block->cb_state = IDX_CAC_BLOCK_FREE;
1249
IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
1250
xt_unlock_mutex_ns(&dcg->cg_lock);
1253
block = block->cb_next;
1257
/* Make the block the most recently used: */
1258
xt_lock_mutex_ns(&dcg->cg_lock);
1259
block->cb_ru_time = ++dcg->cg_ru_now;
1260
if ((block->cb_lr_used = dcg->cg_mru_block))
1261
dcg->cg_mru_block->cb_mr_used = block;
1262
block->cb_mr_used = NULL;
1263
dcg->cg_mru_block = block;
1264
if (!dcg->cg_lru_block)
1265
dcg->cg_lru_block = block;
1266
#ifdef DEBUG_CHECK_IND_CACHE
1267
dcg->cg_read_count--;
1269
xt_unlock_mutex_ns(&dcg->cg_lock);
1271
/* {LAZY-DEL-INDEX-ITEMS}
1272
* Conditionally count the number of deleted entries in the index:
1273
* We do this before other threads can read the block.
1275
if (ind && ind->mi_lazy_delete && read_data)
1276
xt_ind_count_deleted_items(ot->ot_table, ind, block);
1278
/* Add to the hash table: */
1279
block->cb_next = seg->cs_hash_table[hash_idx];
1280
seg->cs_hash_table[hash_idx] = block;
1284
#ifdef DEBUG_CHECK_IND_CACHE
1285
xt_ind_check_cache(NULL);
1290
static xtBool ind_cac_get(XTOpenTablePtr ot, xtIndexNodeID address, DcSegmentPtr *ret_seg, XTIndBlockPtr *ret_block)
1292
register XTOpenFilePtr file = ot->ot_ind_file;
1293
register XTIndBlockPtr block;
1294
register DcSegmentPtr seg;
1295
register u_int hash_idx;
1296
register DcGlobalsRec *dcg = &ind_cac_globals;
1298
hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
1299
seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
1300
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
1302
IDX_CAC_READ_LOCK(seg, ot->ot_thread);
1303
block = seg->cs_hash_table[hash_idx];
1305
if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
1306
ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
1312
block = block->cb_next;
1314
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1316
/* Block not found: */
1322
xtPublic xtBool xt_ind_write(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
1324
XTIndBlockPtr block;
1327
if (!(block = ind_cac_fetch(ot, ind, address, &seg, FALSE)))
1330
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1331
if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
1332
if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
1333
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1334
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1338
#ifdef IND_OPT_DATA_WRITTEN
1339
block->cb_header = TRUE;
1340
block->cb_min_pos = 0;
1341
if (size-XT_INDEX_PAGE_HEAD_SIZE > block->cb_max_pos)
1342
block->cb_max_pos = size-XT_INDEX_PAGE_HEAD_SIZE;
1343
ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-XT_INDEX_PAGE_HEAD_SIZE);
1344
ASSERT_NS(block->cb_min_pos < block->cb_max_pos);
1346
ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
1347
memcpy(block->cb_data, data, size);
1348
if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
1349
TRACK_BLOCK_WRITE(offset);
1350
xt_spinlock_lock(&ind->mi_dirty_lock);
1351
if ((block->cb_dirty_next = ind->mi_dirty_list))
1352
ind->mi_dirty_list->cb_dirty_prev = block;
1353
block->cb_dirty_prev = NULL;
1354
ind->mi_dirty_list = block;
1355
ind->mi_dirty_blocks++;
1356
xt_spinlock_unlock(&ind->mi_dirty_lock);
1357
if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
1358
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1359
ot->ot_thread->st_statistics.st_ind_cache_dirty++;
1361
block->cb_state = IDX_CAC_BLOCK_DIRTY;
1363
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1364
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1365
#ifdef XT_TRACK_INDEX_UPDATES
1366
ot->ot_ind_changed++;
1368
#ifdef CHECK_BLOCK_TRAILERS
1369
check_block_trailers();
1375
* Update the cache, if in RAM.
1377
xtPublic xtBool xt_ind_write_cache(XTOpenTablePtr ot, xtIndexNodeID address, size_t size, xtWord1 *data)
1379
XTIndBlockPtr block;
1382
if (!ind_cac_get(ot, address, &seg, &block))
1386
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1387
/* This should only be done to pages that are free, which
1388
* are not on the dirty list, so they must be clean!
1390
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1391
memcpy(block->cb_data, data, size);
1393
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1394
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1400
xtPublic xtBool xt_ind_get(XTOpenTablePtr ot, xtIndexNodeID address, XTIndReferencePtr iref)
1402
XTIndBlockPtr block;
1405
if (!ind_cac_get(ot, address, &seg, &block))
1409
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1410
ASSERT_NS(IDX_CAC_NOT_FREE(block->cb_state));
1411
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1412
iref->ir_block = block;
1413
iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
1416
iref->ir_block = NULL;
1417
iref->ir_branch = NULL;
1419
iref->ir_xlock = TRUE;
1420
iref->ir_updated = FALSE;
1426
* Note, this function may only be called if the block has
1429
xtPublic xtBool xt_ind_free_block(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address)
1431
XTIndBlockPtr block;
1434
if (!ind_cac_get(ot, address, &seg, &block))
1437
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1439
if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
1440
if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
1441
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1442
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1447
/* {PAGE-NO-IN-INDEX-FILE}
1448
* This is the one exeption to the rule that a block
1449
* that is in the IDX_CAC_BLOCK_LOGGED may be released
1452
ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
1454
if (block->cb_state == IDX_CAC_BLOCK_DIRTY) {
1455
/* Take the block off the dirty list: */
1456
xt_spinlock_lock(&ind->mi_dirty_lock);
1457
if (block->cb_dirty_next)
1458
block->cb_dirty_next->cb_dirty_prev = block->cb_dirty_prev;
1459
if (block->cb_dirty_prev)
1460
block->cb_dirty_prev->cb_dirty_next = block->cb_dirty_next;
1461
if (ind->mi_dirty_list == block)
1462
ind->mi_dirty_list = block->cb_dirty_next;
1463
ind->mi_dirty_blocks--;
1464
xt_spinlock_unlock(&ind->mi_dirty_lock);
1465
block->cb_state = IDX_CAC_BLOCK_CLEAN;
1466
ot->ot_thread->st_statistics.st_ind_cache_dirty--;
1467
#ifdef IND_OPT_DATA_WRITTEN
1468
block->cb_header = FALSE;
1469
block->cb_min_pos = 0xFFFF;
1470
block->cb_max_pos = 0;
1473
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1475
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1481
xtPublic xtBool xt_ind_read_bytes(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
1483
XTIndBlockPtr block;
1486
if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
1489
XT_IPAGE_READ_LOCK(&block->cb_lock);
1490
memcpy(data, block->cb_data, size);
1491
XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
1492
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1496
xtPublic xtBool xt_ind_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, XTPageLockType ltype, XTIndReferencePtr iref)
1498
register XTIndBlockPtr block;
1500
xtWord2 branch_size;
1502
xtBool xlock = FALSE;
1505
ASSERT_NS(iref->ir_xlock == 2);
1506
ASSERT_NS(iref->ir_xlock == 2);
1508
if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
1511
branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
1512
rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
1513
if (rec_size < 2 || rec_size > XT_INDEX_PAGE_SIZE)
1514
goto failed_corrupt;
1515
if (ind->mi_fix_key) {
1517
if (XT_IS_NODE(branch_size)) {
1518
if (rec_size != 0) {
1519
if (rec_size < XT_NODE_REF_SIZE)
1520
goto failed_corrupt;
1521
rec_size -= XT_NODE_REF_SIZE;
1522
if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE + XT_NODE_REF_SIZE)) != 0)
1523
goto failed_corrupt;
1527
if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE)) != 0)
1528
goto failed_corrupt;
1539
if (!XT_IS_NODE(branch_size))
1542
case XT_XLOCK_DEL_LEAF:
1543
if (!XT_IS_NODE(branch_size)) {
1544
if (ot->ot_table->tab_dic.dic_no_lazy_delete)
1548
* {LAZY-DEL-INDEX-ITEMS}
1550
* We are fetch a page for delete purpose.
1551
* we decide here if we plan to do a lazy delete,
1552
* Or if we plan to compact the node.
1554
* A lazy delete just requires a shared lock.
1557
if (ind->mi_lazy_delete) {
1558
/* If the number of deleted items is greater than
1559
* half of the number of times that can fit in the
1560
* page, then we will compact the node.
1562
if (!xt_idx_lazy_delete_on_leaf(ind, block, XT_GET_INDEX_BLOCK_LEN(branch_size)))
1572
if ((iref->ir_xlock = xlock))
1573
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1575
XT_IPAGE_READ_LOCK(&block->cb_lock);
1577
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1580
* Direct I/O requires that the buffer is 512 byte aligned.
1581
* To do this, cb_data is turned into a pointer, instead
1583
* As a result, we need to pass a pointer to both the
1584
* cache block and the cache block data:
1586
iref->ir_updated = FALSE;
1587
iref->ir_block = block;
1588
iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
1592
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1593
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name);
1597
xtPublic xtBool xt_ind_release(XTOpenTablePtr ot, XTIndexPtr ind, XTPageUnlockType XT_NDEBUG_UNUSED(utype), XTIndReferencePtr iref)
1599
register XTIndBlockPtr block;
1601
block = iref->ir_block;
1604
ASSERT_NS(iref->ir_xlock != 2);
1605
ASSERT_NS(iref->ir_updated != 2);
1606
if (iref->ir_updated)
1607
ASSERT_NS(utype == XT_UNLOCK_R_UPDATE || utype == XT_UNLOCK_W_UPDATE);
1609
ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_WRITE);
1611
ASSERT_NS(utype == XT_UNLOCK_WRITE || utype == XT_UNLOCK_W_UPDATE);
1613
ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_R_UPDATE);
1615
if (iref->ir_updated) {
1617
#ifdef IND_OPT_DATA_WRITTEN
1618
xtWord2 branch_size;
1621
branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
1622
rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
1624
ASSERT_NS(block->cb_min_pos <= rec_size-2);
1625
ASSERT_NS(block->cb_min_pos <= block->cb_max_pos);
1626
ASSERT_NS(block->cb_max_pos <= rec_size-2);
1627
ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-2);
1630
/* The page was update: */
1631
ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
1632
if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
1633
TRACK_BLOCK_WRITE(offset);
1634
xt_spinlock_lock(&ind->mi_dirty_lock);
1635
if ((block->cb_dirty_next = ind->mi_dirty_list))
1636
ind->mi_dirty_list->cb_dirty_prev = block;
1637
block->cb_dirty_prev = NULL;
1638
ind->mi_dirty_list = block;
1639
ind->mi_dirty_blocks++;
1640
xt_spinlock_unlock(&ind->mi_dirty_lock);
1641
if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
1642
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1643
ot->ot_thread->st_statistics.st_ind_cache_dirty++;
1645
block->cb_state = IDX_CAC_BLOCK_DIRTY;
1649
XT_IPAGE_UNLOCK(&block->cb_lock, iref->ir_xlock);
1652
iref->ir_updated = 2;
1657
xtPublic xtBool xt_ind_reserve(XTOpenTablePtr ot, u_int count, XTIdxBranchDPtr not_this)
1659
register XTIndBlockPtr block;
1660
register DcGlobalsRec *dcg = &ind_cac_globals;
1662
#ifdef XT_TRACK_INDEX_UPDATES
1663
ot->ot_ind_reserved = count;
1664
ot->ot_ind_reads = 0;
1666
#ifdef DEBUG_CHECK_IND_CACHE
1667
xt_ind_check_cache(NULL);
1669
while (ot->ot_ind_res_count < count) {
1670
if (!dcg->cg_free_list) {
1671
if (!ind_cac_free_lru_blocks(ot, count - ot->ot_ind_res_count, not_this)) {
1672
if (!dcg->cg_free_list) {
1673
xt_ind_free_reserved(ot);
1674
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
1675
#ifdef DEBUG_CHECK_IND_CACHE
1676
xt_ind_check_cache(NULL);
1683
/* Get a free block: */
1684
xt_lock_mutex_ns(&dcg->cg_lock);
1685
while (ot->ot_ind_res_count < count && (block = dcg->cg_free_list)) {
1686
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
1687
dcg->cg_free_list = block->cb_next;
1688
dcg->cg_free_count--;
1689
block->cb_next = ot->ot_ind_res_bufs;
1690
ot->ot_ind_res_bufs = block;
1691
ot->ot_ind_res_count++;
1692
#ifdef DEBUG_CHECK_IND_CACHE
1693
dcg->cg_reserved_by_ots++;
1696
xt_unlock_mutex_ns(&dcg->cg_lock);
1698
#ifdef DEBUG_CHECK_IND_CACHE
1699
xt_ind_check_cache(NULL);
1704
xtPublic void xt_ind_free_reserved(XTOpenTablePtr ot)
1706
#ifdef DEBUG_CHECK_IND_CACHE
1707
xt_ind_check_cache(NULL);
1709
if (ot->ot_ind_res_bufs) {
1710
register XTIndBlockPtr block, fblock;
1711
register DcGlobalsRec *dcg = &ind_cac_globals;
1713
xt_lock_mutex_ns(&dcg->cg_lock);
1714
block = ot->ot_ind_res_bufs;
1717
block = block->cb_next;
1719
fblock->cb_next = dcg->cg_free_list;
1720
dcg->cg_free_list = fblock;
1721
#ifdef DEBUG_CHECK_IND_CACHE
1722
dcg->cg_reserved_by_ots--;
1724
dcg->cg_free_count++;
1726
xt_unlock_mutex_ns(&dcg->cg_lock);
1727
ot->ot_ind_res_bufs = NULL;
1728
ot->ot_ind_res_count = 0;
1730
#ifdef DEBUG_CHECK_IND_CACHE
1731
xt_ind_check_cache(NULL);
1735
xtPublic void xt_ind_unreserve(XTOpenTablePtr ot)
1737
if (!ind_cac_globals.cg_free_list)
1738
xt_ind_free_reserved(ot);