1
/* Copyright (c) 2005 PrimeBase Technologies GmbH, Germany
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* 2005-05-24 Paul McCullagh
24
#include "xt_config.h"
37
#include "pthread_xt.h"
38
#include "thread_xt.h"
39
#include "filesys_xt.h"
45
#define XT_TIME_DIFF(start, now) (\
46
((xtWord4) (now) < (xtWord4) (start)) ? \
47
((xtWord4) 0XFFFFFFFF - ((xtWord4) (start) - (xtWord4) (now))) : \
48
((xtWord4) (now) - (xtWord4) (start)))
51
* -----------------------------------------------------------------------
55
#define IDX_CAC_SEGMENT_COUNT ((off_t) 1 << XT_INDEX_CACHE_SEGMENT_SHIFTS)
56
#define IDX_CAC_SEGMENT_MASK (IDX_CAC_SEGMENT_COUNT - 1)
59
#define IDX_CAC_USE_PTHREAD_RW
61
//#define IDX_CAC_USE_PTHREAD_RW
62
#define IDX_CAC_USE_XSMUTEX
63
//#define IDX_USE_SPINXSLOCK
66
#if defined(IDX_CAC_USE_PTHREAD_RW)
67
#define IDX_CAC_LOCK_TYPE xt_rwlock_type
68
#define IDX_CAC_INIT_LOCK(s, i) xt_init_rwlock_with_autoname(s, &(i)->cs_lock)
69
#define IDX_CAC_FREE_LOCK(s, i) xt_free_rwlock(&(i)->cs_lock)
70
#define IDX_CAC_READ_LOCK(i, o) xt_slock_rwlock_ns(&(i)->cs_lock)
71
#define IDX_CAC_WRITE_LOCK(i, o) xt_xlock_rwlock_ns(&(i)->cs_lock)
72
#define IDX_CAC_UNLOCK(i, o) xt_unlock_rwlock_ns(&(i)->cs_lock)
73
#elif defined(IDX_CAC_USE_XSMUTEX)
74
#define IDX_CAC_LOCK_TYPE XTMutexXSLockRec
75
#define IDX_CAC_INIT_LOCK(s, i) xt_xsmutex_init_with_autoname(s, &(i)->cs_lock)
76
#define IDX_CAC_FREE_LOCK(s, i) xt_xsmutex_free(s, &(i)->cs_lock)
77
#define IDX_CAC_READ_LOCK(i, o) xt_xsmutex_slock(&(i)->cs_lock, (o)->t_id)
78
#define IDX_CAC_WRITE_LOCK(i, o) xt_xsmutex_xlock(&(i)->cs_lock, (o)->t_id)
79
#define IDX_CAC_UNLOCK(i, o) xt_xsmutex_unlock(&(i)->cs_lock, (o)->t_id)
80
#elif defined(IDX_CAC_USE_SPINXSLOCK)
81
#define IDX_CAC_LOCK_TYPE XTSpinXSLockRec
82
#define IDX_CAC_INIT_LOCK(s, i) xt_spinxslock_init_with_autoname(s, &(i)->cs_lock)
83
#define IDX_CAC_FREE_LOCK(s, i) xt_spinxslock_free(s, &(i)->cs_lock)
84
#define IDX_CAC_READ_LOCK(i, s) xt_spinxslock_slock(&(i)->cs_lock, (s)->t_id)
85
#define IDX_CAC_WRITE_LOCK(i, s) xt_spinxslock_xlock(&(i)->cs_lock, FALSE, (s)->t_id)
86
#define IDX_CAC_UNLOCK(i, s) xt_spinxslock_unlock(&(i)->cs_lock, (s)->t_id)
88
#error Please define the lock type
92
#define ID_HANDLE_USE_PTHREAD_RW
94
//#define ID_HANDLE_USE_PTHREAD_RW
95
#define ID_HANDLE_USE_SPINLOCK
98
#if defined(ID_HANDLE_USE_PTHREAD_RW)
99
#define ID_HANDLE_LOCK_TYPE xt_mutex_type
100
#define ID_HANDLE_INIT_LOCK(s, i) xt_init_mutex_with_autoname(s, i)
101
#define ID_HANDLE_FREE_LOCK(s, i) xt_free_mutex(i)
102
#define ID_HANDLE_LOCK(i) xt_lock_mutex_ns(i)
103
#define ID_HANDLE_UNLOCK(i) xt_unlock_mutex_ns(i)
104
#elif defined(ID_HANDLE_USE_SPINLOCK)
105
#define ID_HANDLE_LOCK_TYPE XTSpinLockRec
106
#define ID_HANDLE_INIT_LOCK(s, i) xt_spinlock_init_with_autoname(s, i)
107
#define ID_HANDLE_FREE_LOCK(s, i) xt_spinlock_free(s, i)
108
#define ID_HANDLE_LOCK(i) xt_spinlock_lock(i)
109
#define ID_HANDLE_UNLOCK(i) xt_spinlock_unlock(i)
112
#define XT_HANDLE_SLOTS 37
116
#define XT_INIT_HANDLE_COUNT 0
117
#define XT_INIT_HANDLE_BLOCKS 0
119
#define XT_INIT_HANDLE_COUNT 40
120
#define XT_INIT_HANDLE_BLOCKS 10
124
/* A disk cache segment. The cache is divided into a number of segments
125
* to improve concurrency.
127
typedef struct DcSegment {
128
IDX_CAC_LOCK_TYPE cs_lock; /* The cache segment lock. */
129
XTIndBlockPtr *cs_hash_table;
130
} DcSegmentRec, *DcSegmentPtr;
132
typedef struct DcHandleSlot {
133
ID_HANDLE_LOCK_TYPE hs_handles_lock;
134
XTIndHandleBlockPtr hs_free_blocks;
135
XTIndHandlePtr hs_free_handles;
136
XTIndHandlePtr hs_used_handles;
137
} DcHandleSlotRec, *DcHandleSlotPtr;
139
typedef struct DcGlobals {
140
xt_mutex_type cg_lock; /* The public cache lock. */
141
DcSegmentRec cg_segment[IDX_CAC_SEGMENT_COUNT];
142
XTIndBlockPtr cg_blocks;
143
#ifdef XT_USE_DIRECT_IO_ON_INDEX
146
XTIndBlockPtr cg_free_list;
147
xtWord4 cg_free_count;
148
xtWord4 cg_ru_now; /* A counter as described by Jim Starkey (my thanks) */
149
XTIndBlockPtr cg_lru_block;
150
XTIndBlockPtr cg_mru_block;
151
xtWord4 cg_hash_size;
152
xtWord4 cg_block_count;
154
#ifdef DEBUG_CHECK_IND_CACHE
155
u_int cg_reserved_by_ots; /* Number of blocks reserved by open tables. */
156
u_int cg_read_count; /* Number of blocks being read. */
159
/* Index cache handles: */
160
DcHandleSlotRec cg_handle_slot[XT_HANDLE_SLOTS];
163
static DcGlobalsRec ind_cac_globals;
169
#include "my_global.h"
171
#include "keycache.h"
173
#undef pthread_rwlock_rdlock
174
#undef pthread_rwlock_wrlock
175
#undef pthread_rwlock_try_wrlock
176
#undef pthread_rwlock_unlock
177
#undef pthread_mutex_lock
178
#undef pthread_mutex_unlock
179
#undef pthread_cond_wait
180
#undef pthread_cond_broadcast
186
* -----------------------------------------------------------------------
187
* INDEX CACHE HANDLES
190
static XTIndHandlePtr ind_alloc_handle()
192
XTIndHandlePtr handle;
194
if (!(handle = (XTIndHandlePtr) xt_calloc_ns(sizeof(XTIndHandleRec))))
196
xt_spinlock_init_with_autoname(NULL, &handle->ih_lock);
200
static void ind_free_handle(XTIndHandlePtr handle)
202
xt_spinlock_free(NULL, &handle->ih_lock);
206
static void ind_handle_exit(XTThreadPtr self)
209
XTIndHandlePtr handle;
210
XTIndHandleBlockPtr hptr;
212
for (int i=0; i<XT_HANDLE_SLOTS; i++) {
213
hs = &ind_cac_globals.cg_handle_slot[i];
215
while (hs->hs_used_handles) {
216
handle = hs->hs_used_handles;
217
xt_ind_release_handle(handle, FALSE, self);
220
while (hs->hs_free_blocks) {
221
hptr = hs->hs_free_blocks;
222
hs->hs_free_blocks = hptr->hb_next;
226
while (hs->hs_free_handles) {
227
handle = hs->hs_free_handles;
228
hs->hs_free_handles = handle->ih_next;
229
ind_free_handle(handle);
232
ID_HANDLE_FREE_LOCK(self, &hs->hs_handles_lock);
236
static void ind_handle_init(XTThreadPtr self)
240
for (int i=0; i<XT_HANDLE_SLOTS; i++) {
241
hs = &ind_cac_globals.cg_handle_slot[i];
242
memset(hs, 0, sizeof(DcHandleSlotRec));
243
ID_HANDLE_INIT_LOCK(self, &hs->hs_handles_lock);
247
//#define CHECK_HANDLE_STRUCTS
249
#ifdef CHECK_HANDLE_STRUCTS
250
static int gdummy = 0;
252
static void ic_stop_here()
255
printf("Nooo %d!\n", gdummy);
258
static void ic_check_handle_structs()
260
XTIndHandlePtr handle, phandle;
261
XTIndHandleBlockPtr hptr, phptr;
266
handle = ind_cac_globals.cg_used_handles;
268
if (handle == phandle)
270
if (handle->ih_prev != phandle)
272
if (handle->ih_cache_reference) {
273
ctest = handle->x.ih_cache_block->cb_handle_count;
274
if (ctest == 0 || ctest > 100)
278
ctest = handle->x.ih_handle_block->hb_ref_count;
279
if (ctest == 0 || ctest > 100)
283
handle = handle->ih_next;
290
hptr = ind_cac_globals.cg_free_blocks;
295
hptr = hptr->hb_next;
302
handle = ind_cac_globals.cg_free_handles;
304
if (handle == phandle)
307
handle = handle->ih_next;
316
* Get a handle to the index block.
317
* This function is called by index scanners (readers).
319
xtPublic XTIndHandlePtr xt_ind_get_handle(XTOpenTablePtr ot, XTIndexPtr ind, XTIndReferencePtr iref)
322
XTIndHandlePtr handle;
324
hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
326
ASSERT_NS(iref->ir_xlock == FALSE);
327
ASSERT_NS(iref->ir_updated == FALSE);
328
ID_HANDLE_LOCK(&hs->hs_handles_lock);
329
#ifdef CHECK_HANDLE_STRUCTS
330
ic_check_handle_structs();
332
if ((handle = hs->hs_free_handles))
333
hs->hs_free_handles = handle->ih_next;
335
if (!(handle = ind_alloc_handle())) {
336
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
337
xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
341
if (hs->hs_used_handles)
342
hs->hs_used_handles->ih_prev = handle;
343
handle->ih_next = hs->hs_used_handles;
344
handle->ih_prev = NULL;
345
handle->ih_address = iref->ir_block->cb_address;
346
handle->ih_cache_reference = TRUE;
347
handle->x.ih_cache_block = iref->ir_block;
348
handle->ih_branch = iref->ir_branch;
349
/* {HANDLE-COUNT-USAGE}
350
* This is safe because:
352
* I have an Slock on the cache block, and I have
353
* at least an Slock on the index.
354
* So this excludes anyone who is reading
355
* cb_handle_count in the index.
356
* (all cache block writers, and the freeer).
358
* The increment is safe because I have the list
359
* lock (hs_handles_lock), which is required by anyone else
360
* who increments or decrements this value.
362
iref->ir_block->cb_handle_count++;
363
hs->hs_used_handles = handle;
364
#ifdef CHECK_HANDLE_STRUCTS
365
ic_check_handle_structs();
367
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
368
xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
372
xtPublic void xt_ind_release_handle(XTIndHandlePtr handle, xtBool have_lock, XTThreadPtr thread)
375
XTIndBlockPtr block = NULL;
377
DcSegmentPtr seg = NULL;
378
XTIndBlockPtr xblock;
380
(void) thread; /*DRIZZLED*/
382
/* The lock order is:
383
* 1. Cache segment (cs_lock) - This is only by ind_free_block()!
384
* 1. S/Slock cache block (cb_lock)
385
* 2. List lock (cg_handles_lock).
386
* 3. Handle lock (ih_lock)
389
xt_spinlock_lock(&handle->ih_lock);
391
/* Get the lock on the cache page if required: */
392
if (handle->ih_cache_reference) {
394
xtIndexNodeID address;
396
block = handle->x.ih_cache_block;
398
file_id = block->cb_file_id;
399
address = block->cb_address;
400
hash_idx = XT_NODE_ID(address) + (file_id * 223);
401
seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
402
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
405
xt_spinlock_unlock(&handle->ih_lock);
407
/* Because of the lock order, I have to release the
408
* handle before I get a lock on the cache block.
410
* But, by doing this, this cache block may be gone!
413
IDX_CAC_READ_LOCK(seg, thread);
414
xblock = seg->cs_hash_table[hash_idx];
416
if (block == xblock) {
417
/* Found the block...
418
* {HANDLE-COUNT-SLOCK}
419
* 04.05.2009, changed to slock.
420
* The xlock causes too much contention
421
* on the cache block for read only loads.
426
XT_IPAGE_READ_LOCK(&block->cb_lock);
429
xblock = xblock->cb_next;
433
IDX_CAC_UNLOCK(seg, thread);
436
hs = &ind_cac_globals.cg_handle_slot[handle->ih_address % XT_HANDLE_SLOTS];
438
ID_HANDLE_LOCK(&hs->hs_handles_lock);
439
#ifdef CHECK_HANDLE_STRUCTS
440
ic_check_handle_structs();
443
/* I don't need to lock the handle because I have locked
444
* the list, and no other thread can change the
445
* handle without first getting a lock on the list.
447
* In addition, the caller is the only owner of the
448
* handle, and the only thread with an independent
449
* reference to the handle.
450
* All other access occur over the list.
453
/* Remove the reference to the cache or a handle block: */
454
if (handle->ih_cache_reference) {
455
ASSERT_NS(block == handle->x.ih_cache_block);
456
ASSERT_NS(block && block->cb_handle_count > 0);
457
/* {HANDLE-COUNT-USAGE}
458
* This is safe here because I have excluded
459
* all readers by taking an Xlock on the
460
* cache block (CHANGED - see below).
462
* {HANDLE-COUNT-SLOCK}
463
* 04.05.2009, changed to slock.
464
* Should be OK, because:
465
* A have a lock on the list lock (hs_handles_lock),
466
* which prevents concurrent updates to cb_handle_count.
468
* I have also have a read lock on the cache block
469
* but not a lock on the index. As a result, we cannot
470
* excluded all index writers (and readers of
473
block->cb_handle_count--;
476
XTIndHandleBlockPtr hptr = handle->x.ih_handle_block;
478
ASSERT_NS(!handle->ih_cache_reference);
479
ASSERT_NS(hptr->hb_ref_count > 0);
480
hptr->hb_ref_count--;
481
if (!hptr->hb_ref_count) {
482
/* Put it back on the free list: */
483
hptr->hb_next = hs->hs_free_blocks;
484
hs->hs_free_blocks = hptr;
488
/* Unlink the handle: */
490
handle->ih_next->ih_prev = handle->ih_prev;
492
handle->ih_prev->ih_next = handle->ih_next;
493
if (hs->hs_used_handles == handle)
494
hs->hs_used_handles = handle->ih_next;
496
/* Put it on the free list: */
497
handle->ih_next = hs->hs_free_handles;
498
hs->hs_free_handles = handle;
500
#ifdef CHECK_HANDLE_STRUCTS
501
ic_check_handle_structs();
503
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
506
XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
509
/* Call this function before a referenced cache block is modified!
510
* This function is called by index updaters.
512
xtPublic xtBool xt_ind_copy_on_write(XTIndReferencePtr iref)
515
XTIndHandleBlockPtr hptr;
517
XTIndHandlePtr handle;
520
hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
522
ID_HANDLE_LOCK(&hs->hs_handles_lock);
524
/* {HANDLE-COUNT-USAGE}
525
* This is only called by updaters of this index block, or
526
* the free which holds an Xlock on the index block.
527
* These are all mutually exclusive for the index block.
529
* {HANDLE-COUNT-SLOCK}
530
* Do this check again, after we have the list lock (hs_handles_lock).
531
* There is a small chance that the count has changed, since we last
532
* checked because xt_ind_release_handle() only holds
533
* an slock on the index page.
535
* An updater can sometimes have a XLOCK on the index and an slock
536
* on the cache block. In this case xt_ind_release_handle()
537
* could have run through.
539
if (!iref->ir_block->cb_handle_count) {
540
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
544
#ifdef CHECK_HANDLE_STRUCTS
545
ic_check_handle_structs();
547
if ((hptr = hs->hs_free_blocks))
548
hs->hs_free_blocks = hptr->hb_next;
550
if (!(hptr = (XTIndHandleBlockPtr) xt_malloc_ns(sizeof(XTIndHandleBlockRec)))) {
551
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
556
branch_size = XT_GET_INDEX_BLOCK_LEN(XT_GET_DISK_2(iref->ir_branch->tb_size_2));
557
memcpy(&hptr->hb_branch, iref->ir_branch, branch_size);
558
hptr->hb_ref_count = iref->ir_block->cb_handle_count;
560
handle = hs->hs_used_handles;
562
if (handle->ih_branch == iref->ir_branch) {
564
xt_spinlock_lock(&handle->ih_lock);
565
ASSERT_NS(handle->ih_cache_reference);
566
handle->ih_cache_reference = FALSE;
567
handle->x.ih_handle_block = hptr;
568
handle->ih_branch = &hptr->hb_branch;
569
xt_spinlock_unlock(&handle->ih_lock);
571
if (i == hptr->hb_ref_count)
575
handle = handle->ih_next;
578
ASSERT_NS(hptr->hb_ref_count == i);
580
/* {HANDLE-COUNT-USAGE}
581
* It is safe to modify cb_handle_count when I have the
582
* list lock, and I have excluded all readers!
584
iref->ir_block->cb_handle_count = 0;
585
#ifdef CHECK_HANDLE_STRUCTS
586
ic_check_handle_structs();
588
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
593
xtPublic void xt_ind_lock_handle(XTIndHandlePtr handle)
595
xt_spinlock_lock(&handle->ih_lock);
598
xtPublic void xt_ind_unlock_handle(XTIndHandlePtr handle)
600
xt_spinlock_unlock(&handle->ih_lock);
604
* -----------------------------------------------------------------------
609
* Initialize the disk cache.
611
xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size)
616
init_key_cache(&my_cache, 1024, cache_size, 100, 300);
618
/* Memory is devoted to the page data alone, I no longer count the size of the directory,
619
* or the page overhead: */
620
ind_cac_globals.cg_block_count = cache_size / XT_INDEX_PAGE_SIZE;
621
ind_cac_globals.cg_hash_size = ind_cac_globals.cg_block_count / (IDX_CAC_SEGMENT_COUNT >> 1);
622
ind_cac_globals.cg_max_free = ind_cac_globals.cg_block_count / 10;
623
if (ind_cac_globals.cg_max_free < 8)
624
ind_cac_globals.cg_max_free = 8;
625
if (ind_cac_globals.cg_max_free > 128)
626
ind_cac_globals.cg_max_free = 128;
629
for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
630
ind_cac_globals.cg_segment[i].cs_hash_table = (XTIndBlockPtr *) xt_calloc(self, ind_cac_globals.cg_hash_size * sizeof(XTIndBlockPtr));
631
IDX_CAC_INIT_LOCK(self, &ind_cac_globals.cg_segment[i]);
634
block = (XTIndBlockPtr) xt_malloc(self, ind_cac_globals.cg_block_count * sizeof(XTIndBlockRec));
635
ind_cac_globals.cg_blocks = block;
636
xt_init_mutex_with_autoname(self, &ind_cac_globals.cg_lock);
637
#ifdef XT_USE_DIRECT_IO_ON_INDEX
642
size_t psize = getpagesize();
646
buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE));
647
diff = (size_t) buffer % psize;
649
xt_free(self, buffer);
650
buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE) + psize);
651
diff = (size_t) buffer % psize;
655
ind_cac_globals.cg_buffer = buffer;
659
for (u_int i=0; i<ind_cac_globals.cg_block_count; i++) {
660
XT_IPAGE_INIT_LOCK(self, &block->cb_lock);
661
block->cb_state = IDX_CAC_BLOCK_FREE;
662
block->cb_next = ind_cac_globals.cg_free_list;
663
#ifdef XT_USE_DIRECT_IO_ON_INDEX
664
block->cb_data = buffer;
665
buffer += XT_INDEX_PAGE_SIZE;
667
ind_cac_globals.cg_free_list = block;
670
ind_cac_globals.cg_free_count = ind_cac_globals.cg_block_count;
671
#ifdef DEBUG_CHECK_IND_CACHE
672
ind_cac_globals.cg_reserved_by_ots = 0;
674
ind_handle_init(self);
683
xtPublic void xt_ind_exit(XTThreadPtr self)
686
end_key_cache(&my_cache, 1);
688
for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
689
if (ind_cac_globals.cg_segment[i].cs_hash_table) {
690
xt_free(self, ind_cac_globals.cg_segment[i].cs_hash_table);
691
ind_cac_globals.cg_segment[i].cs_hash_table = NULL;
692
IDX_CAC_FREE_LOCK(self, &ind_cac_globals.cg_segment[i]);
696
/* Must be done before freeing the blocks! */
697
ind_handle_exit(self);
699
if (ind_cac_globals.cg_blocks) {
700
xt_free(self, ind_cac_globals.cg_blocks);
701
ind_cac_globals.cg_blocks = NULL;
702
xt_free_mutex(&ind_cac_globals.cg_lock);
704
#ifdef XT_USE_DIRECT_IO_ON_INDEX
705
if (ind_cac_globals.cg_buffer) {
706
xt_free(self, ind_cac_globals.cg_buffer);
707
ind_cac_globals.cg_buffer = NULL;
711
memset(&ind_cac_globals, 0, sizeof(ind_cac_globals));
714
xtPublic xtInt8 xt_ind_get_usage()
718
size = (xtInt8) (ind_cac_globals.cg_block_count - ind_cac_globals.cg_free_count) * (xtInt8) XT_INDEX_PAGE_SIZE;
722
xtPublic xtInt8 xt_ind_get_size()
726
size = (xtInt8) ind_cac_globals.cg_block_count * (xtInt8) XT_INDEX_PAGE_SIZE;
730
xtPublic u_int xt_ind_get_blocks()
732
return ind_cac_globals.cg_block_count;
736
* -----------------------------------------------------------------------
740
xtPublic void xt_ind_check_cache(XTIndexPtr ind)
743
u_int free_count, inuse_count, clean_count;
744
xtBool check_count = FALSE;
746
if (ind == (XTIndex *) 1) {
751
// Check the dirty list:
755
block = ind->mi_dirty_list;
758
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_DIRTY);
759
block = block->cb_dirty_next;
761
ASSERT_NS(ind->mi_dirty_blocks == cnt);
764
xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
766
// Check the free list:
768
block = ind_cac_globals.cg_free_list;
771
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
772
block = block->cb_next;
774
ASSERT_NS(ind_cac_globals.cg_free_count == free_count);
776
/* Check the LRU list: */
777
XTIndBlockPtr list_block, plist_block;
780
list_block = ind_cac_globals.cg_lru_block;
782
ASSERT_NS(ind_cac_globals.cg_mru_block != NULL);
783
ASSERT_NS(ind_cac_globals.cg_mru_block->cb_mr_used == NULL);
784
ASSERT_NS(list_block->cb_lr_used == NULL);
789
ASSERT_NS(IDX_CAC_NOT_FREE(list_block->cb_state));
790
if (list_block->cb_state == IDX_CAC_BLOCK_CLEAN)
792
ASSERT_NS(block != list_block);
793
ASSERT_NS(list_block->cb_lr_used == plist_block);
794
plist_block = list_block;
795
list_block = list_block->cb_mr_used;
797
ASSERT_NS(ind_cac_globals.cg_mru_block == plist_block);
802
ASSERT_NS(ind_cac_globals.cg_mru_block == NULL);
805
#ifdef DEBUG_CHECK_IND_CACHE
806
ASSERT_NS(free_count + inuse_count + ind_cac_globals.cg_reserved_by_ots + ind_cac_globals.cg_read_count == ind_cac_globals.cg_block_count);
808
xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
810
/* We have just flushed, check how much is now free/clean. */
811
if (free_count + clean_count < 10) {
812
/* This could be a problem: */
813
printf("Cache very low!\n");
819
* -----------------------------------------------------------------------
820
* FREEING INDEX CACHE
824
* This function return TRUE if the block is freed.
825
* This function returns FALSE if the block cannot be found, or the
826
* block is not clean.
828
* We also return FALSE if we cannot copy the block to the handle
829
* (if this is required). This will be due to out-of-memory!
831
static xtBool ind_free_block(XTOpenTablePtr ot, XTIndBlockPtr block)
833
XTIndBlockPtr xblock, pxblock;
836
xtIndexNodeID address;
839
(void) ot; /*DRIZZLED*/
841
#ifdef DEBUG_CHECK_IND_CACHE
842
xt_ind_check_cache(NULL);
844
file_id = block->cb_file_id;
845
address = block->cb_address;
847
hash_idx = XT_NODE_ID(address) + (file_id * 223);
848
seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
849
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
851
IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
854
xblock = seg->cs_hash_table[hash_idx];
856
if (block == xblock) {
857
/* Found the block... */
858
/* It is possible that a thread enters this code holding a
859
* lock on a page. This can cause a deadlock:
861
* #0 0x91faa2ce in semaphore_wait_signal_trap
862
* #1 0x91fb1da5 in pthread_mutex_lock
863
* #2 0x00e2ec13 in xt_p_mutex_lock at pthread_xt.cc:544
864
* #3 0x00e6c30a in xt_xsmutex_xlock at lock_xt.cc:1547
865
* #4 0x00dee402 in ind_free_block at cache_xt.cc:879
866
* #5 0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
867
* #6 0x00def8d1 in xt_ind_reserve at cache_xt.cc:1513
868
* #7 0x00e22118 in xt_idx_insert at index_xt.cc:2047
869
* #8 0x00e4d7ee in xt_tab_new_record at table_xt.cc:4702
870
* #9 0x00e0ff0b in ha_pbxt::write_row at ha_pbxt.cc:2340
871
* #10 0x0023a00f in handler::ha_write_row at handler.cc:4570
872
* #11 0x001a32c8 in write_record at sql_insert.cc:1568
873
* #12 0x001ab635 in mysql_insert at sql_insert.cc:812
874
* #13 0x0010e068 in mysql_execute_command at sql_parse.cc:3066
875
* #14 0x0011480d in mysql_parse at sql_parse.cc:5787
876
* #15 0x00115afb in dispatch_command at sql_parse.cc:1200
877
* #16 0x00116de2 in do_command at sql_parse.cc:857
878
* #17 0x00101ee4 in handle_one_connection at sql_connect.cc:1115
879
* #18 0x91fdb155 in _pthread_start
880
* #19 0x91fdb012 in thread_start
882
* #0 0x91fb146e in __semwait_signal
883
* #1 0x91fb12ef in nanosleep$UNIX2003
884
* #2 0x91fb1236 in usleep$UNIX2003
885
* #3 0x00e52112 in xt_yield at thread_xt.cc:1274
886
* #4 0x00e6c0eb in xt_spinxslock_xlock at lock_xt.cc:1456
887
* #5 0x00dee444 in ind_free_block at cache_xt.cc:886
888
* #6 0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
889
* #7 0x00deeaf0 in ind_cac_fetch at cache_xt.cc:1130
890
* #8 0x00def604 in xt_ind_fetch at cache_xt.cc:1386
891
* #9 0x00e2159a in xt_idx_update_row_id at index_xt.cc:2489
892
* #10 0x00e603c8 in xn_sw_clean_indices at xaction_xt.cc:1932
893
* #11 0x00e606d4 in xn_sw_cleanup_variation at xaction_xt.cc:2056
894
* #12 0x00e60e29 in xn_sw_cleanup_xact at xaction_xt.cc:2276
895
* #13 0x00e615ed in xn_sw_main at xaction_xt.cc:2433
896
* #14 0x00e61919 in xn_sw_run_thread at xaction_xt.cc:2564
897
* #15 0x00e53f80 in thr_main at thread_xt.cc:1017
898
* #16 0x91fdb155 in _pthread_start
899
* #17 0x91fdb012 in thread_start
901
* So we back off if a lock is held!
903
if (!XT_IPAGE_WRITE_TRY_LOCK(&block->cb_lock, ot->ot_thread->t_id)) {
904
IDX_CAC_UNLOCK(seg, ot->ot_thread);
905
#ifdef DEBUG_CHECK_IND_CACHE
906
xt_ind_check_cache(NULL);
910
if (block->cb_state != IDX_CAC_BLOCK_CLEAN) {
911
/* This block cannot be freeed: */
912
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
913
IDX_CAC_UNLOCK(seg, ot->ot_thread);
914
#ifdef DEBUG_CHECK_IND_CACHE
915
xt_ind_check_cache(NULL);
923
xblock = xblock->cb_next;
926
IDX_CAC_UNLOCK(seg, ot->ot_thread);
928
/* Not found (this can happen, if block was freed by another thread) */
929
#ifdef DEBUG_CHECK_IND_CACHE
930
xt_ind_check_cache(NULL);
936
/* If the block is reference by a handle, then we
937
* have to copy the data to the handle before we
940
/* {HANDLE-COUNT-USAGE}
941
* This access is safe because:
943
* We have an Xlock on the cache block, which excludes
944
* all other writers that want to change the cache block
945
* and also all readers of the cache block, because
946
* they all have at least an Slock on the cache block.
948
if (block->cb_handle_count) {
949
XTIndReferenceRec iref;
951
iref.ir_xlock = TRUE;
952
iref.ir_updated = FALSE;
953
iref.ir_block = block;
954
iref.ir_branch = (XTIdxBranchDPtr) block->cb_data;
955
if (!xt_ind_copy_on_write(&iref)) {
956
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
961
/* Block is clean, remove from the hash table: */
963
pxblock->cb_next = block->cb_next;
965
seg->cs_hash_table[hash_idx] = block->cb_next;
967
xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
969
/* Remove from the MRU list: */
970
if (ind_cac_globals.cg_lru_block == block)
971
ind_cac_globals.cg_lru_block = block->cb_mr_used;
972
if (ind_cac_globals.cg_mru_block == block)
973
ind_cac_globals.cg_mru_block = block->cb_lr_used;
975
/* Note, I am updating blocks for which I have no lock
976
* here. But I think this is OK because I have a lock
979
if (block->cb_lr_used)
980
block->cb_lr_used->cb_mr_used = block->cb_mr_used;
981
if (block->cb_mr_used)
982
block->cb_mr_used->cb_lr_used = block->cb_lr_used;
984
/* The block is now free: */
985
block->cb_next = ind_cac_globals.cg_free_list;
986
ind_cac_globals.cg_free_list = block;
987
ind_cac_globals.cg_free_count++;
988
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
989
block->cb_state = IDX_CAC_BLOCK_FREE;
990
IDX_TRACE("%d- f%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(block->cb_data));
992
/* Unlock BEFORE the block is reused! */
993
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
995
xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
997
IDX_CAC_UNLOCK(seg, ot->ot_thread);
999
#ifdef DEBUG_CHECK_IND_CACHE
1000
xt_ind_check_cache(NULL);
1005
#define IND_CACHE_MAX_BLOCKS_TO_FREE 100
1008
* Return the number of blocks freed.
1010
* The idea is to grab a list of blocks to free.
1011
* The list consists of the LRU blocks that are
1014
* Free as many as possible (up to max of blocks_required)
1015
* from the list, even if LRU position has changed
1016
* (or we have a race if there are too few blocks).
1017
* However, if the block cannot be found, or is dirty
1020
* Repeat until we find no blocks for the list, or
1021
* we have freed 'blocks_required'.
1023
* 'not_this' is a block that must not be freed because
1024
* it is locked by the calling thread!
1026
static u_int ind_cac_free_lru_blocks(XTOpenTablePtr ot, u_int blocks_required, XTIdxBranchDPtr not_this)
1028
register DcGlobalsRec *dcg = &ind_cac_globals;
1029
XTIndBlockPtr to_free[IND_CACHE_MAX_BLOCKS_TO_FREE];
1031
XTIndBlockPtr block;
1032
u_int blocks_freed = 0;
1033
XTIndBlockPtr locked_block;
1035
#ifdef XT_USE_DIRECT_IO_ON_INDEX
1036
#error This will not work!
1038
locked_block = (XTIndBlockPtr) ((xtWord1 *) not_this - offsetof(XTIndBlockRec, cb_data));
1041
xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
1042
block = dcg->cg_lru_block;
1044
while (block && count < IND_CACHE_MAX_BLOCKS_TO_FREE) {
1045
if (block != locked_block && block->cb_state == IDX_CAC_BLOCK_CLEAN) {
1046
to_free[count] = block;
1049
block = block->cb_mr_used;
1051
xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
1054
return blocks_freed;
1056
for (int i=0; i<count; i++) {
1057
if (ind_free_block(ot, to_free[i]))
1059
if (blocks_freed >= blocks_required &&
1060
ind_cac_globals.cg_free_count >= ind_cac_globals.cg_max_free + blocks_required)
1061
return blocks_freed;
1068
* -----------------------------------------------------------------------
1069
* MAIN CACHE FUNCTIONS
1073
* Fetch the block. Note, if we are about to write the block
1074
* then there is no need to read it from disk!
1076
static XTIndBlockPtr ind_cac_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, DcSegmentPtr *ret_seg, xtBool read_data)
1078
register XTOpenFilePtr file = ot->ot_ind_file;
1079
register XTIndBlockPtr block, new_block;
1080
register DcSegmentPtr seg;
1081
register u_int hash_idx;
1082
register DcGlobalsRec *dcg = &ind_cac_globals;
1085
#ifdef DEBUG_CHECK_IND_CACHE
1086
xt_ind_check_cache(NULL);
1088
/* Address, plus file ID multiplied by my favorite prime number! */
1089
hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
1090
seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
1091
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
1093
IDX_CAC_READ_LOCK(seg, ot->ot_thread);
1094
block = seg->cs_hash_table[hash_idx];
1096
if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
1097
ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
1099
/* Check how recently this page has been used: */
1100
if (XT_TIME_DIFF(block->cb_ru_time, dcg->cg_ru_now) > (dcg->cg_block_count >> 1)) {
1101
xt_lock_mutex_ns(&dcg->cg_lock);
1103
/* Move to the front of the MRU list: */
1104
block->cb_ru_time = ++dcg->cg_ru_now;
1105
if (dcg->cg_mru_block != block) {
1106
/* Remove from the MRU list: */
1107
if (dcg->cg_lru_block == block)
1108
dcg->cg_lru_block = block->cb_mr_used;
1109
if (block->cb_lr_used)
1110
block->cb_lr_used->cb_mr_used = block->cb_mr_used;
1111
if (block->cb_mr_used)
1112
block->cb_mr_used->cb_lr_used = block->cb_lr_used;
1114
/* Make the block the most recently used: */
1115
if ((block->cb_lr_used = dcg->cg_mru_block))
1116
dcg->cg_mru_block->cb_mr_used = block;
1117
block->cb_mr_used = NULL;
1118
dcg->cg_mru_block = block;
1119
if (!dcg->cg_lru_block)
1120
dcg->cg_lru_block = block;
1123
xt_unlock_mutex_ns(&dcg->cg_lock);
1127
#ifdef DEBUG_CHECK_IND_CACHE
1128
xt_ind_check_cache(NULL);
1130
ot->ot_thread->st_statistics.st_ind_cache_hit++;
1133
block = block->cb_next;
1136
/* Block not found... */
1137
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1139
/* Check the open table reserve list first: */
1140
if ((new_block = ot->ot_ind_res_bufs)) {
1141
ot->ot_ind_res_bufs = new_block->cb_next;
1142
ot->ot_ind_res_count--;
1143
#ifdef DEBUG_CHECK_IND_CACHE
1144
xt_lock_mutex_ns(&dcg->cg_lock);
1145
dcg->cg_reserved_by_ots--;
1146
dcg->cg_read_count++;
1147
xt_unlock_mutex_ns(&dcg->cg_lock);
1149
goto use_free_block;
1153
if (!dcg->cg_free_list) {
1154
if (!ind_cac_free_lru_blocks(ot, 1, NULL)) {
1155
if (!dcg->cg_free_list) {
1156
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
1157
#ifdef DEBUG_CHECK_IND_CACHE
1158
xt_ind_check_cache(NULL);
1165
/* Get a free block: */
1166
xt_lock_mutex_ns(&dcg->cg_lock);
1167
if (!(new_block = dcg->cg_free_list)) {
1168
xt_unlock_mutex_ns(&dcg->cg_lock);
1169
goto free_some_blocks;
1171
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
1172
dcg->cg_free_list = new_block->cb_next;
1173
dcg->cg_free_count--;
1174
#ifdef DEBUG_CHECK_IND_CACHE
1175
dcg->cg_read_count++;
1177
xt_unlock_mutex_ns(&dcg->cg_lock);
1180
new_block->cb_address = address;
1181
new_block->cb_file_id = file->fr_id;
1182
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
1183
new_block->cb_state = IDX_CAC_BLOCK_CLEAN;
1184
new_block->cb_handle_count = 0;
1185
new_block->cp_del_count = 0;
1186
new_block->cb_dirty_next = NULL;
1187
new_block->cb_dirty_prev = NULL;
1188
#ifdef IND_OPT_DATA_WRITTEN
1189
new_block->cb_header = FALSE;
1190
new_block->cb_min_pos = 0xFFFF;
1191
new_block->cb_max_pos = 0;
1195
if (!xt_pread_file(file, xt_ind_node_to_offset(ot->ot_table, address), XT_INDEX_PAGE_SIZE, 0, new_block->cb_data, &red_size, &ot->ot_thread->st_statistics.st_ind, ot->ot_thread)) {
1196
xt_lock_mutex_ns(&dcg->cg_lock);
1197
new_block->cb_next = dcg->cg_free_list;
1198
dcg->cg_free_list = new_block;
1199
dcg->cg_free_count++;
1200
#ifdef DEBUG_CHECK_IND_CACHE
1201
dcg->cg_read_count--;
1203
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
1204
new_block->cb_state = IDX_CAC_BLOCK_FREE;
1205
IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
1206
xt_unlock_mutex_ns(&dcg->cg_lock);
1207
#ifdef DEBUG_CHECK_IND_CACHE
1208
xt_ind_check_cache(NULL);
1212
IDX_TRACE("%d- R%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
1213
ot->ot_thread->st_statistics.st_ind_cache_miss++;
1217
// PMC - I don't think this is required! memset(new_block->cb_data + red_size, 0, XT_INDEX_PAGE_SIZE - red_size);
1219
IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
1220
block = seg->cs_hash_table[hash_idx];
1222
if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
1223
/* Oops, someone else was faster! */
1224
xt_lock_mutex_ns(&dcg->cg_lock);
1225
new_block->cb_next = dcg->cg_free_list;
1226
dcg->cg_free_list = new_block;
1227
dcg->cg_free_count++;
1228
#ifdef DEBUG_CHECK_IND_CACHE
1229
dcg->cg_read_count--;
1231
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
1232
new_block->cb_state = IDX_CAC_BLOCK_FREE;
1233
IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
1234
xt_unlock_mutex_ns(&dcg->cg_lock);
1237
block = block->cb_next;
1241
/* Make the block the most recently used: */
1242
xt_lock_mutex_ns(&dcg->cg_lock);
1243
block->cb_ru_time = ++dcg->cg_ru_now;
1244
if ((block->cb_lr_used = dcg->cg_mru_block))
1245
dcg->cg_mru_block->cb_mr_used = block;
1246
block->cb_mr_used = NULL;
1247
dcg->cg_mru_block = block;
1248
if (!dcg->cg_lru_block)
1249
dcg->cg_lru_block = block;
1250
#ifdef DEBUG_CHECK_IND_CACHE
1251
dcg->cg_read_count--;
1253
xt_unlock_mutex_ns(&dcg->cg_lock);
1255
/* {LAZY-DEL-INDEX-ITEMS}
1256
* Conditionally count the number of deleted entries in the index:
1257
* We do this before other threads can read the block.
1259
if (ind->mi_lazy_delete && read_data)
1260
xt_ind_count_deleted_items(ot->ot_table, ind, block);
1262
/* Add to the hash table: */
1263
block->cb_next = seg->cs_hash_table[hash_idx];
1264
seg->cs_hash_table[hash_idx] = block;
1268
#ifdef DEBUG_CHECK_IND_CACHE
1269
xt_ind_check_cache(NULL);
1274
static xtBool ind_cac_get(XTOpenTablePtr ot, xtIndexNodeID address, DcSegmentPtr *ret_seg, XTIndBlockPtr *ret_block)
1276
register XTOpenFilePtr file = ot->ot_ind_file;
1277
register XTIndBlockPtr block;
1278
register DcSegmentPtr seg;
1279
register u_int hash_idx;
1280
register DcGlobalsRec *dcg = &ind_cac_globals;
1282
hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
1283
seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
1284
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
1286
IDX_CAC_READ_LOCK(seg, ot->ot_thread);
1287
block = seg->cs_hash_table[hash_idx];
1289
if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
1290
ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
1296
block = block->cb_next;
1298
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1300
/* Block not found: */
1306
xtPublic xtBool xt_ind_write(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
1308
XTIndBlockPtr block;
1311
if (!(block = ind_cac_fetch(ot, ind, address, &seg, FALSE)))
1314
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1315
if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
1316
if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
1317
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1318
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1322
#ifdef IND_OPT_DATA_WRITTEN
1323
block->cb_header = TRUE;
1324
block->cb_min_pos = 0;
1325
if (size-XT_INDEX_PAGE_HEAD_SIZE > block->cb_max_pos)
1326
block->cb_max_pos = size-XT_INDEX_PAGE_HEAD_SIZE;
1327
ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-XT_INDEX_PAGE_HEAD_SIZE);
1328
ASSERT_NS(block->cb_min_pos < block->cb_max_pos);
1330
ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
1331
memcpy(block->cb_data, data, size);
1332
if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
1333
TRACK_BLOCK_WRITE(offset);
1334
xt_spinlock_lock(&ind->mi_dirty_lock);
1335
if ((block->cb_dirty_next = ind->mi_dirty_list))
1336
ind->mi_dirty_list->cb_dirty_prev = block;
1337
block->cb_dirty_prev = NULL;
1338
ind->mi_dirty_list = block;
1339
ind->mi_dirty_blocks++;
1340
xt_spinlock_unlock(&ind->mi_dirty_lock);
1341
if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
1342
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1343
ot->ot_thread->st_statistics.st_ind_cache_dirty++;
1345
block->cb_state = IDX_CAC_BLOCK_DIRTY;
1347
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1348
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1349
#ifdef XT_TRACK_INDEX_UPDATES
1350
ot->ot_ind_changed++;
1356
* Update the cache, if in RAM.
1358
xtPublic xtBool xt_ind_write_cache(XTOpenTablePtr ot, xtIndexNodeID address, size_t size, xtWord1 *data)
1360
XTIndBlockPtr block;
1363
if (!ind_cac_get(ot, address, &seg, &block))
1367
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1368
/* This should only be done to pages that are free, which
1369
* are not on the dirty list, so they must be clean!
1371
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1372
memcpy(block->cb_data, data, size);
1374
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1375
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1381
xtPublic xtBool xt_ind_get(XTOpenTablePtr ot, xtIndexNodeID address, XTIndReferencePtr iref)
1383
XTIndBlockPtr block;
1386
if (!ind_cac_get(ot, address, &seg, &block))
1390
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1391
ASSERT_NS(IDX_CAC_NOT_FREE(block->cb_state));
1392
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1393
iref->ir_block = block;
1394
iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
1397
iref->ir_block = NULL;
1398
iref->ir_branch = NULL;
1400
iref->ir_xlock = TRUE;
1401
iref->ir_updated = FALSE;
1407
* Note, this function may only be called if the block has
1410
xtPublic xtBool xt_ind_free_block(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address)
1412
XTIndBlockPtr block;
1415
if (!ind_cac_get(ot, address, &seg, &block))
1418
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1420
if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
1421
if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
1422
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1423
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1428
/* {PAGE-NO-IN-INDEX-FILE}
1429
* This is the one exeption to the rule that a block
1430
* that is in the IDX_CAC_BLOCK_LOGGED may be released
1433
ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
1435
if (block->cb_state == IDX_CAC_BLOCK_DIRTY) {
1436
/* Take the block off the dirty list: */
1437
xt_spinlock_lock(&ind->mi_dirty_lock);
1438
if (block->cb_dirty_next)
1439
block->cb_dirty_next->cb_dirty_prev = block->cb_dirty_prev;
1440
if (block->cb_dirty_prev)
1441
block->cb_dirty_prev->cb_dirty_next = block->cb_dirty_next;
1442
if (ind->mi_dirty_list == block)
1443
ind->mi_dirty_list = block->cb_dirty_next;
1444
ind->mi_dirty_blocks--;
1445
xt_spinlock_unlock(&ind->mi_dirty_lock);
1446
block->cb_state = IDX_CAC_BLOCK_CLEAN;
1447
ot->ot_thread->st_statistics.st_ind_cache_dirty--;
1448
#ifdef IND_OPT_DATA_WRITTEN
1449
block->cb_header = FALSE;
1450
block->cb_min_pos = 0xFFFF;
1451
block->cb_max_pos = 0;
1454
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1456
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1462
xtPublic xtBool xt_ind_read_bytes(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
1464
XTIndBlockPtr block;
1467
if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
1470
XT_IPAGE_READ_LOCK(&block->cb_lock);
1471
memcpy(data, block->cb_data, size);
1472
XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
1473
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1477
xtPublic xtBool xt_ind_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, XTPageLockType ltype, XTIndReferencePtr iref)
1479
register XTIndBlockPtr block;
1481
xtWord2 branch_size;
1483
xtBool xlock = FALSE;
1486
ASSERT_NS(iref->ir_xlock == 2);
1487
ASSERT_NS(iref->ir_xlock == 2);
1489
if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
1492
branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
1493
rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
1494
if (rec_size < 2 || rec_size > XT_INDEX_PAGE_SIZE)
1495
goto failed_corrupt;
1496
if (ind->mi_fix_key) {
1498
if (XT_IS_NODE(branch_size)) {
1499
if (rec_size != 0) {
1500
if (rec_size < XT_NODE_REF_SIZE)
1501
goto failed_corrupt;
1502
rec_size -= XT_NODE_REF_SIZE;
1503
if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE + XT_NODE_REF_SIZE)) != 0)
1504
goto failed_corrupt;
1508
if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE)) != 0)
1509
goto failed_corrupt;
1520
if (!XT_IS_NODE(branch_size))
1523
case XT_XLOCK_DEL_LEAF:
1524
if (!XT_IS_NODE(branch_size)) {
1525
if (ot->ot_table->tab_dic.dic_no_lazy_delete)
1529
* {LAZY-DEL-INDEX-ITEMS}
1531
* We are fetch a page for delete purpose.
1532
* we decide here if we plan to do a lazy delete,
1533
* Or if we plan to compact the node.
1535
* A lazy delete just requires a shared lock.
1538
if (ind->mi_lazy_delete) {
1539
/* If the number of deleted items is greater than
1540
* half of the number of times that can fit in the
1541
* page, then we will compact the node.
1543
if (!xt_idx_lazy_delete_on_leaf(ind, block, XT_GET_INDEX_BLOCK_LEN(branch_size)))
1553
if ((iref->ir_xlock = xlock))
1554
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1556
XT_IPAGE_READ_LOCK(&block->cb_lock);
1558
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1561
* Direct I/O requires that the buffer is 512 byte aligned.
1562
* To do this, cb_data is turned into a pointer, instead
1564
* As a result, we need to pass a pointer to both the
1565
* cache block and the cache block data:
1567
iref->ir_updated = FALSE;
1568
iref->ir_block = block;
1569
iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
1573
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1574
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name);
1578
xtPublic xtBool xt_ind_release(XTOpenTablePtr ot, XTIndexPtr ind, XTPageUnlockType XT_NDEBUG_UNUSED(utype), XTIndReferencePtr iref)
1580
register XTIndBlockPtr block;
1582
block = iref->ir_block;
1585
ASSERT_NS(iref->ir_xlock != 2);
1586
ASSERT_NS(iref->ir_updated != 2);
1587
if (iref->ir_updated)
1588
ASSERT_NS(utype == XT_UNLOCK_R_UPDATE || utype == XT_UNLOCK_W_UPDATE);
1590
ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_WRITE);
1592
ASSERT_NS(utype == XT_UNLOCK_WRITE || utype == XT_UNLOCK_W_UPDATE);
1594
ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_R_UPDATE);
1596
if (iref->ir_updated) {
1598
#ifdef IND_OPT_DATA_WRITTEN
1599
xtWord2 branch_size;
1602
branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
1603
rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
1605
ASSERT_NS(block->cb_min_pos <= rec_size-2);
1606
ASSERT_NS(block->cb_min_pos <= block->cb_max_pos);
1607
ASSERT_NS(block->cb_max_pos <= rec_size-2);
1608
ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-2);
1611
/* The page was update: */
1612
ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
1613
if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
1614
TRACK_BLOCK_WRITE(offset);
1615
xt_spinlock_lock(&ind->mi_dirty_lock);
1616
if ((block->cb_dirty_next = ind->mi_dirty_list))
1617
ind->mi_dirty_list->cb_dirty_prev = block;
1618
block->cb_dirty_prev = NULL;
1619
ind->mi_dirty_list = block;
1620
ind->mi_dirty_blocks++;
1621
xt_spinlock_unlock(&ind->mi_dirty_lock);
1622
if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
1623
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1624
ot->ot_thread->st_statistics.st_ind_cache_dirty++;
1626
block->cb_state = IDX_CAC_BLOCK_DIRTY;
1630
XT_IPAGE_UNLOCK(&block->cb_lock, iref->ir_xlock);
1633
iref->ir_updated = 2;
1638
xtPublic xtBool xt_ind_reserve(XTOpenTablePtr ot, u_int count, XTIdxBranchDPtr not_this)
1640
register XTIndBlockPtr block;
1641
register DcGlobalsRec *dcg = &ind_cac_globals;
1643
#ifdef XT_TRACK_INDEX_UPDATES
1644
ot->ot_ind_reserved = count;
1645
ot->ot_ind_reads = 0;
1647
#ifdef DEBUG_CHECK_IND_CACHE
1648
xt_ind_check_cache(NULL);
1650
while (ot->ot_ind_res_count < count) {
1651
if (!dcg->cg_free_list) {
1652
if (!ind_cac_free_lru_blocks(ot, count - ot->ot_ind_res_count, not_this)) {
1653
if (!dcg->cg_free_list) {
1654
xt_ind_free_reserved(ot);
1655
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
1656
#ifdef DEBUG_CHECK_IND_CACHE
1657
xt_ind_check_cache(NULL);
1664
/* Get a free block: */
1665
xt_lock_mutex_ns(&dcg->cg_lock);
1666
while (ot->ot_ind_res_count < count && (block = dcg->cg_free_list)) {
1667
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
1668
dcg->cg_free_list = block->cb_next;
1669
dcg->cg_free_count--;
1670
block->cb_next = ot->ot_ind_res_bufs;
1671
ot->ot_ind_res_bufs = block;
1672
ot->ot_ind_res_count++;
1673
#ifdef DEBUG_CHECK_IND_CACHE
1674
dcg->cg_reserved_by_ots++;
1677
xt_unlock_mutex_ns(&dcg->cg_lock);
1679
#ifdef DEBUG_CHECK_IND_CACHE
1680
xt_ind_check_cache(NULL);
1685
xtPublic void xt_ind_free_reserved(XTOpenTablePtr ot)
1687
#ifdef DEBUG_CHECK_IND_CACHE
1688
xt_ind_check_cache(NULL);
1690
if (ot->ot_ind_res_bufs) {
1691
register XTIndBlockPtr block, fblock;
1692
register DcGlobalsRec *dcg = &ind_cac_globals;
1694
xt_lock_mutex_ns(&dcg->cg_lock);
1695
block = ot->ot_ind_res_bufs;
1698
block = block->cb_next;
1700
fblock->cb_next = dcg->cg_free_list;
1701
dcg->cg_free_list = fblock;
1702
#ifdef DEBUG_CHECK_IND_CACHE
1703
dcg->cg_reserved_by_ots--;
1705
dcg->cg_free_count++;
1707
xt_unlock_mutex_ns(&dcg->cg_lock);
1708
ot->ot_ind_res_bufs = NULL;
1709
ot->ot_ind_res_count = 0;
1711
#ifdef DEBUG_CHECK_IND_CACHE
1712
xt_ind_check_cache(NULL);
1716
xtPublic void xt_ind_unreserve(XTOpenTablePtr ot)
1718
if (!ind_cac_globals.cg_free_list)
1719
xt_ind_free_reserved(ot);