1
/* Copyright (c) 2005 PrimeBase Technologies GmbH, Germany
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* 2005-05-24 Paul McCullagh
24
#include "xt_config.h"
37
#include "pthread_xt.h"
38
#include "thread_xt.h"
39
#include "filesys_xt.h"
45
#define XT_TIME_DIFF(start, now) (\
46
((xtWord4) (now) < (xtWord4) (start)) ? \
47
((xtWord4) 0XFFFFFFFF - ((xtWord4) (start) - (xtWord4) (now))) : \
48
((xtWord4) (now) - (xtWord4) (start)))
51
* -----------------------------------------------------------------------
55
#define IDX_CAC_SEGMENT_COUNT ((off_t) 1 << XT_INDEX_CACHE_SEGMENT_SHIFTS)
56
#define IDX_CAC_SEGMENT_MASK (IDX_CAC_SEGMENT_COUNT - 1)
59
#define IDX_CAC_USE_PTHREAD_RW
61
//#define IDX_CAC_USE_PTHREAD_RW
62
#define IDX_CAC_USE_XSMUTEX
63
//#define IDX_USE_SPINXSLOCK
66
#if defined(IDX_CAC_USE_PTHREAD_RW)
67
#define IDX_CAC_LOCK_TYPE xt_rwlock_type
68
#define IDX_CAC_INIT_LOCK(s, i) xt_init_rwlock_with_autoname(s, &(i)->cs_lock)
69
#define IDX_CAC_FREE_LOCK(s, i) xt_free_rwlock(&(i)->cs_lock)
70
#define IDX_CAC_READ_LOCK(i, o) xt_slock_rwlock_ns(&(i)->cs_lock)
71
#define IDX_CAC_WRITE_LOCK(i, o) xt_xlock_rwlock_ns(&(i)->cs_lock)
72
#define IDX_CAC_UNLOCK(i, o) xt_unlock_rwlock_ns(&(i)->cs_lock)
73
#elif defined(IDX_CAC_USE_XSMUTEX)
74
#define IDX_CAC_LOCK_TYPE XTMutexXSLockRec
75
#define IDX_CAC_INIT_LOCK(s, i) xt_xsmutex_init_with_autoname(s, &(i)->cs_lock)
76
#define IDX_CAC_FREE_LOCK(s, i) xt_xsmutex_free(s, &(i)->cs_lock)
77
#define IDX_CAC_READ_LOCK(i, o) xt_xsmutex_slock(&(i)->cs_lock, (o)->t_id)
78
#define IDX_CAC_WRITE_LOCK(i, o) xt_xsmutex_xlock(&(i)->cs_lock, (o)->t_id)
79
#define IDX_CAC_UNLOCK(i, o) xt_xsmutex_unlock(&(i)->cs_lock, (o)->t_id)
80
#elif defined(IDX_CAC_USE_SPINXSLOCK)
81
#define IDX_CAC_LOCK_TYPE XTSpinXSLockRec
82
#define IDX_CAC_INIT_LOCK(s, i) xt_spinxslock_init_with_autoname(s, &(i)->cs_lock)
83
#define IDX_CAC_FREE_LOCK(s, i) xt_spinxslock_free(s, &(i)->cs_lock)
84
#define IDX_CAC_READ_LOCK(i, s) xt_spinxslock_slock(&(i)->cs_lock, (s)->t_id)
85
#define IDX_CAC_WRITE_LOCK(i, s) xt_spinxslock_xlock(&(i)->cs_lock, FALSE, (s)->t_id)
86
#define IDX_CAC_UNLOCK(i, s) xt_spinxslock_unlock(&(i)->cs_lock, (s)->t_id)
88
#error Please define the lock type
92
#define ID_HANDLE_USE_PTHREAD_RW
94
//#define ID_HANDLE_USE_PTHREAD_RW
95
#define ID_HANDLE_USE_SPINLOCK
98
#if defined(ID_HANDLE_USE_PTHREAD_RW)
99
#define ID_HANDLE_LOCK_TYPE xt_mutex_type
100
#define ID_HANDLE_INIT_LOCK(s, i) xt_init_mutex_with_autoname(s, i)
101
#define ID_HANDLE_FREE_LOCK(s, i) xt_free_mutex(i)
102
#define ID_HANDLE_LOCK(i) xt_lock_mutex_ns(i)
103
#define ID_HANDLE_UNLOCK(i) xt_unlock_mutex_ns(i)
104
#elif defined(ID_HANDLE_USE_SPINLOCK)
105
#define ID_HANDLE_LOCK_TYPE XTSpinLockRec
106
#define ID_HANDLE_INIT_LOCK(s, i) xt_spinlock_init_with_autoname(s, i)
107
#define ID_HANDLE_FREE_LOCK(s, i) xt_spinlock_free(s, i)
108
#define ID_HANDLE_LOCK(i) xt_spinlock_lock(i)
109
#define ID_HANDLE_UNLOCK(i) xt_spinlock_unlock(i)
112
#define XT_HANDLE_SLOTS 37
116
#define XT_INIT_HANDLE_COUNT 0
117
#define XT_INIT_HANDLE_BLOCKS 0
119
#define XT_INIT_HANDLE_COUNT 40
120
#define XT_INIT_HANDLE_BLOCKS 10
124
/* A disk cache segment. The cache is divided into a number of segments
125
* to improve concurrency.
127
typedef struct DcSegment {
128
IDX_CAC_LOCK_TYPE cs_lock; /* The cache segment lock. */
129
XTIndBlockPtr *cs_hash_table;
130
} DcSegmentRec, *DcSegmentPtr;
132
typedef struct DcHandleSlot {
133
ID_HANDLE_LOCK_TYPE hs_handles_lock;
134
XTIndHandleBlockPtr hs_free_blocks;
135
XTIndHandlePtr hs_free_handles;
136
XTIndHandlePtr hs_used_handles;
137
} DcHandleSlotRec, *DcHandleSlotPtr;
139
typedef struct DcGlobals {
140
xt_mutex_type cg_lock; /* The public cache lock. */
141
DcSegmentRec cg_segment[IDX_CAC_SEGMENT_COUNT];
142
XTIndBlockPtr cg_blocks;
143
#ifdef XT_USE_DIRECT_IO_ON_INDEX
146
XTIndBlockPtr cg_free_list;
147
xtWord4 cg_free_count;
148
xtWord4 cg_ru_now; /* A counter as described by Jim Starkey (my thanks) */
149
XTIndBlockPtr cg_lru_block;
150
XTIndBlockPtr cg_mru_block;
151
xtWord4 cg_hash_size;
152
xtWord4 cg_block_count;
154
#ifdef DEBUG_CHECK_IND_CACHE
155
u_int cg_reserved_by_ots; /* Number of blocks reserved by open tables. */
156
u_int cg_read_count; /* Number of blocks being read. */
159
/* Index cache handles: */
160
DcHandleSlotRec cg_handle_slot[XT_HANDLE_SLOTS];
163
static DcGlobalsRec ind_cac_globals;
169
#include "my_global.h"
171
#include "keycache.h"
173
#undef pthread_rwlock_rdlock
174
#undef pthread_rwlock_wrlock
175
#undef pthread_rwlock_try_wrlock
176
#undef pthread_rwlock_unlock
177
#undef pthread_mutex_lock
178
#undef pthread_mutex_unlock
179
#undef pthread_cond_wait
180
#undef pthread_cond_broadcast
186
* -----------------------------------------------------------------------
187
* INDEX CACHE HANDLES
190
static XTIndHandlePtr ind_alloc_handle()
192
XTIndHandlePtr handle;
194
if (!(handle = (XTIndHandlePtr) xt_calloc_ns(sizeof(XTIndHandleRec))))
196
xt_spinlock_init_with_autoname(NULL, &handle->ih_lock);
200
static void ind_free_handle(XTIndHandlePtr handle)
202
xt_spinlock_free(NULL, &handle->ih_lock);
206
static void ind_handle_exit(XTThreadPtr self)
209
XTIndHandlePtr handle;
210
XTIndHandleBlockPtr hptr;
212
for (int i=0; i<XT_HANDLE_SLOTS; i++) {
213
hs = &ind_cac_globals.cg_handle_slot[i];
215
while (hs->hs_used_handles) {
216
handle = hs->hs_used_handles;
217
xt_ind_release_handle(handle, FALSE, self);
220
while (hs->hs_free_blocks) {
221
hptr = hs->hs_free_blocks;
222
hs->hs_free_blocks = hptr->hb_next;
226
while (hs->hs_free_handles) {
227
handle = hs->hs_free_handles;
228
hs->hs_free_handles = handle->ih_next;
229
ind_free_handle(handle);
232
ID_HANDLE_FREE_LOCK(self, &hs->hs_handles_lock);
236
static void ind_handle_init(XTThreadPtr self)
240
for (int i=0; i<XT_HANDLE_SLOTS; i++) {
241
hs = &ind_cac_globals.cg_handle_slot[i];
242
memset(hs, 0, sizeof(DcHandleSlotRec));
243
ID_HANDLE_INIT_LOCK(self, &hs->hs_handles_lock);
247
//#define CHECK_HANDLE_STRUCTS
249
#ifdef CHECK_HANDLE_STRUCTS
250
static int gdummy = 0;
252
static void ic_stop_here()
255
printf("Nooo %d!\n", gdummy);
258
static void ic_check_handle_structs()
260
XTIndHandlePtr handle, phandle;
261
XTIndHandleBlockPtr hptr, phptr;
266
handle = ind_cac_globals.cg_used_handles;
268
if (handle == phandle)
270
if (handle->ih_prev != phandle)
272
if (handle->ih_cache_reference) {
273
ctest = handle->x.ih_cache_block->cb_handle_count;
274
if (ctest == 0 || ctest > 100)
278
ctest = handle->x.ih_handle_block->hb_ref_count;
279
if (ctest == 0 || ctest > 100)
283
handle = handle->ih_next;
290
hptr = ind_cac_globals.cg_free_blocks;
295
hptr = hptr->hb_next;
302
handle = ind_cac_globals.cg_free_handles;
304
if (handle == phandle)
307
handle = handle->ih_next;
316
* Get a handle to the index block.
317
* This function is called by index scanners (readers).
319
xtPublic XTIndHandlePtr xt_ind_get_handle(XTOpenTablePtr ot, XTIndexPtr ind, XTIndReferencePtr iref)
322
XTIndHandlePtr handle;
324
hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
326
ASSERT_NS(iref->ir_xlock == FALSE);
327
ASSERT_NS(iref->ir_updated == FALSE);
328
ID_HANDLE_LOCK(&hs->hs_handles_lock);
329
#ifdef CHECK_HANDLE_STRUCTS
330
ic_check_handle_structs();
332
if ((handle = hs->hs_free_handles))
333
hs->hs_free_handles = handle->ih_next;
335
if (!(handle = ind_alloc_handle())) {
336
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
337
xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
341
if (hs->hs_used_handles)
342
hs->hs_used_handles->ih_prev = handle;
343
handle->ih_next = hs->hs_used_handles;
344
handle->ih_prev = NULL;
345
handle->ih_address = iref->ir_block->cb_address;
346
handle->ih_cache_reference = TRUE;
347
handle->x.ih_cache_block = iref->ir_block;
348
handle->ih_branch = iref->ir_branch;
349
/* {HANDLE-COUNT-USAGE}
350
* This is safe because:
352
* I have an Slock on the cache block, and I have
353
* at least an Slock on the index.
354
* So this excludes anyone who is reading
355
* cb_handle_count in the index.
356
* (all cache block writers, and the freeer).
358
* The increment is safe because I have the list
359
* lock (hs_handles_lock), which is required by anyone else
360
* who increments or decrements this value.
362
iref->ir_block->cb_handle_count++;
363
hs->hs_used_handles = handle;
364
#ifdef CHECK_HANDLE_STRUCTS
365
ic_check_handle_structs();
367
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
368
xt_ind_release(ot, ind, XT_UNLOCK_READ, iref);
372
xtPublic void xt_ind_release_handle(XTIndHandlePtr handle, xtBool have_lock, XTThreadPtr thread)
375
XTIndBlockPtr block = NULL;
377
DcSegmentPtr seg = NULL;
378
XTIndBlockPtr xblock;
380
/* The lock order is:
381
* 1. Cache segment (cs_lock) - This is only by ind_free_block()!
382
* 1. S/Slock cache block (cb_lock)
383
* 2. List lock (cg_handles_lock).
384
* 3. Handle lock (ih_lock)
387
xt_spinlock_lock(&handle->ih_lock);
389
/* Get the lock on the cache page if required: */
390
if (handle->ih_cache_reference) {
392
xtIndexNodeID address;
394
block = handle->x.ih_cache_block;
396
file_id = block->cb_file_id;
397
address = block->cb_address;
398
hash_idx = XT_NODE_ID(address) + (file_id * 223);
399
seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
400
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
403
xt_spinlock_unlock(&handle->ih_lock);
405
/* Because of the lock order, I have to release the
406
* handle before I get a lock on the cache block.
408
* But, by doing this, this cache block may be gone!
411
IDX_CAC_READ_LOCK(seg, thread);
412
xblock = seg->cs_hash_table[hash_idx];
414
if (block == xblock) {
415
/* Found the block...
416
* {HANDLE-COUNT-SLOCK}
417
* 04.05.2009, changed to slock.
418
* The xlock causes too much contention
419
* on the cache block for read only loads.
424
XT_IPAGE_READ_LOCK(&block->cb_lock);
427
xblock = xblock->cb_next;
431
IDX_CAC_UNLOCK(seg, thread);
434
hs = &ind_cac_globals.cg_handle_slot[handle->ih_address % XT_HANDLE_SLOTS];
436
ID_HANDLE_LOCK(&hs->hs_handles_lock);
437
#ifdef CHECK_HANDLE_STRUCTS
438
ic_check_handle_structs();
441
/* I don't need to lock the handle because I have locked
442
* the list, and no other thread can change the
443
* handle without first getting a lock on the list.
445
* In addition, the caller is the only owner of the
446
* handle, and the only thread with an independent
447
* reference to the handle.
448
* All other access occur over the list.
451
/* Remove the reference to the cache or a handle block: */
452
if (handle->ih_cache_reference) {
453
ASSERT_NS(block == handle->x.ih_cache_block);
454
ASSERT_NS(block && block->cb_handle_count > 0);
455
/* {HANDLE-COUNT-USAGE}
456
* This is safe here because I have excluded
457
* all readers by taking an Xlock on the
458
* cache block (CHANGED - see below).
460
* {HANDLE-COUNT-SLOCK}
461
* 04.05.2009, changed to slock.
462
* Should be OK, because:
463
* A have a lock on the list lock (hs_handles_lock),
464
* which prevents concurrent updates to cb_handle_count.
466
* I have also have a read lock on the cache block
467
* but not a lock on the index. As a result, we cannot
468
* excluded all index writers (and readers of
471
block->cb_handle_count--;
474
XTIndHandleBlockPtr hptr = handle->x.ih_handle_block;
476
ASSERT_NS(!handle->ih_cache_reference);
477
ASSERT_NS(hptr->hb_ref_count > 0);
478
hptr->hb_ref_count--;
479
if (!hptr->hb_ref_count) {
480
/* Put it back on the free list: */
481
hptr->hb_next = hs->hs_free_blocks;
482
hs->hs_free_blocks = hptr;
486
/* Unlink the handle: */
488
handle->ih_next->ih_prev = handle->ih_prev;
490
handle->ih_prev->ih_next = handle->ih_next;
491
if (hs->hs_used_handles == handle)
492
hs->hs_used_handles = handle->ih_next;
494
/* Put it on the free list: */
495
handle->ih_next = hs->hs_free_handles;
496
hs->hs_free_handles = handle;
498
#ifdef CHECK_HANDLE_STRUCTS
499
ic_check_handle_structs();
501
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
504
XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
507
/* Call this function before a referenced cache block is modified!
508
* This function is called by index updaters.
510
xtPublic xtBool xt_ind_copy_on_write(XTIndReferencePtr iref)
513
XTIndHandleBlockPtr hptr;
515
XTIndHandlePtr handle;
518
hs = &ind_cac_globals.cg_handle_slot[iref->ir_block->cb_address % XT_HANDLE_SLOTS];
520
ID_HANDLE_LOCK(&hs->hs_handles_lock);
522
/* {HANDLE-COUNT-USAGE}
523
* This is only called by updaters of this index block, or
524
* the free which holds an Xlock on the index block.
525
* These are all mutually exclusive for the index block.
527
* {HANDLE-COUNT-SLOCK}
528
* Do this check again, after we have the list lock (hs_handles_lock).
529
* There is a small chance that the count has changed, since we last
530
* checked because xt_ind_release_handle() only holds
531
* an slock on the index page.
533
* An updater can sometimes have a XLOCK on the index and an slock
534
* on the cache block. In this case xt_ind_release_handle()
535
* could have run through.
537
if (!iref->ir_block->cb_handle_count) {
538
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
542
#ifdef CHECK_HANDLE_STRUCTS
543
ic_check_handle_structs();
545
if ((hptr = hs->hs_free_blocks))
546
hs->hs_free_blocks = hptr->hb_next;
548
if (!(hptr = (XTIndHandleBlockPtr) xt_malloc_ns(sizeof(XTIndHandleBlockRec)))) {
549
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
554
branch_size = XT_GET_INDEX_BLOCK_LEN(XT_GET_DISK_2(iref->ir_branch->tb_size_2));
555
memcpy(&hptr->hb_branch, iref->ir_branch, branch_size);
556
hptr->hb_ref_count = iref->ir_block->cb_handle_count;
558
handle = hs->hs_used_handles;
560
if (handle->ih_branch == iref->ir_branch) {
562
xt_spinlock_lock(&handle->ih_lock);
563
ASSERT_NS(handle->ih_cache_reference);
564
handle->ih_cache_reference = FALSE;
565
handle->x.ih_handle_block = hptr;
566
handle->ih_branch = &hptr->hb_branch;
567
xt_spinlock_unlock(&handle->ih_lock);
569
if (i == hptr->hb_ref_count)
573
handle = handle->ih_next;
576
ASSERT_NS(hptr->hb_ref_count == i);
578
/* {HANDLE-COUNT-USAGE}
579
* It is safe to modify cb_handle_count when I have the
580
* list lock, and I have excluded all readers!
582
iref->ir_block->cb_handle_count = 0;
583
#ifdef CHECK_HANDLE_STRUCTS
584
ic_check_handle_structs();
586
ID_HANDLE_UNLOCK(&hs->hs_handles_lock);
591
xtPublic void xt_ind_lock_handle(XTIndHandlePtr handle)
593
xt_spinlock_lock(&handle->ih_lock);
596
xtPublic void xt_ind_unlock_handle(XTIndHandlePtr handle)
598
xt_spinlock_unlock(&handle->ih_lock);
602
* -----------------------------------------------------------------------
607
* Initialize the disk cache.
609
xtPublic void xt_ind_init(XTThreadPtr self, size_t cache_size)
614
init_key_cache(&my_cache, 1024, cache_size, 100, 300);
616
/* Memory is devoted to the page data alone, I no longer count the size of the directory,
617
* or the page overhead: */
618
ind_cac_globals.cg_block_count = cache_size / XT_INDEX_PAGE_SIZE;
619
ind_cac_globals.cg_hash_size = ind_cac_globals.cg_block_count / (IDX_CAC_SEGMENT_COUNT >> 1);
620
ind_cac_globals.cg_max_free = ind_cac_globals.cg_block_count / 10;
621
if (ind_cac_globals.cg_max_free < 8)
622
ind_cac_globals.cg_max_free = 8;
623
if (ind_cac_globals.cg_max_free > 128)
624
ind_cac_globals.cg_max_free = 128;
627
for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
628
ind_cac_globals.cg_segment[i].cs_hash_table = (XTIndBlockPtr *) xt_calloc(self, ind_cac_globals.cg_hash_size * sizeof(XTIndBlockPtr));
629
IDX_CAC_INIT_LOCK(self, &ind_cac_globals.cg_segment[i]);
632
block = (XTIndBlockPtr) xt_malloc(self, ind_cac_globals.cg_block_count * sizeof(XTIndBlockRec));
633
ind_cac_globals.cg_blocks = block;
634
xt_init_mutex_with_autoname(self, &ind_cac_globals.cg_lock);
635
#ifdef XT_USE_DIRECT_IO_ON_INDEX
640
size_t psize = getpagesize();
644
buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE));
645
diff = (size_t) buffer % psize;
647
xt_free(self, buffer);
648
buffer = (xtWord1 *) xt_malloc(self, (ind_cac_globals.cg_block_count * XT_INDEX_PAGE_SIZE) + psize);
649
diff = (size_t) buffer % psize;
653
ind_cac_globals.cg_buffer = buffer;
657
for (u_int i=0; i<ind_cac_globals.cg_block_count; i++) {
658
XT_IPAGE_INIT_LOCK(self, &block->cb_lock);
659
block->cb_state = IDX_CAC_BLOCK_FREE;
660
block->cb_next = ind_cac_globals.cg_free_list;
661
#ifdef XT_USE_DIRECT_IO_ON_INDEX
662
block->cb_data = buffer;
663
buffer += XT_INDEX_PAGE_SIZE;
665
ind_cac_globals.cg_free_list = block;
668
ind_cac_globals.cg_free_count = ind_cac_globals.cg_block_count;
669
#ifdef DEBUG_CHECK_IND_CACHE
670
ind_cac_globals.cg_reserved_by_ots = 0;
672
ind_handle_init(self);
681
xtPublic void xt_ind_exit(XTThreadPtr self)
684
end_key_cache(&my_cache, 1);
686
for (u_int i=0; i<IDX_CAC_SEGMENT_COUNT; i++) {
687
if (ind_cac_globals.cg_segment[i].cs_hash_table) {
688
xt_free(self, ind_cac_globals.cg_segment[i].cs_hash_table);
689
ind_cac_globals.cg_segment[i].cs_hash_table = NULL;
690
IDX_CAC_FREE_LOCK(self, &ind_cac_globals.cg_segment[i]);
694
/* Must be done before freeing the blocks! */
695
ind_handle_exit(self);
697
if (ind_cac_globals.cg_blocks) {
698
xt_free(self, ind_cac_globals.cg_blocks);
699
ind_cac_globals.cg_blocks = NULL;
700
xt_free_mutex(&ind_cac_globals.cg_lock);
702
#ifdef XT_USE_DIRECT_IO_ON_INDEX
703
if (ind_cac_globals.cg_buffer) {
704
xt_free(self, ind_cac_globals.cg_buffer);
705
ind_cac_globals.cg_buffer = NULL;
709
memset(&ind_cac_globals, 0, sizeof(ind_cac_globals));
712
xtPublic xtInt8 xt_ind_get_usage()
716
size = (xtInt8) (ind_cac_globals.cg_block_count - ind_cac_globals.cg_free_count) * (xtInt8) XT_INDEX_PAGE_SIZE;
720
xtPublic xtInt8 xt_ind_get_size()
724
size = (xtInt8) ind_cac_globals.cg_block_count * (xtInt8) XT_INDEX_PAGE_SIZE;
728
xtPublic u_int xt_ind_get_blocks()
730
return ind_cac_globals.cg_block_count;
734
* -----------------------------------------------------------------------
738
xtPublic void xt_ind_check_cache(XTIndexPtr ind)
741
u_int free_count, inuse_count, clean_count;
742
xtBool check_count = FALSE;
744
if (ind == (XTIndex *) 1) {
749
// Check the dirty list:
753
block = ind->mi_dirty_list;
756
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_DIRTY);
757
block = block->cb_dirty_next;
759
ASSERT_NS(ind->mi_dirty_blocks == cnt);
762
xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
764
// Check the free list:
766
block = ind_cac_globals.cg_free_list;
769
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
770
block = block->cb_next;
772
ASSERT_NS(ind_cac_globals.cg_free_count == free_count);
774
/* Check the LRU list: */
775
XTIndBlockPtr list_block, plist_block;
778
list_block = ind_cac_globals.cg_lru_block;
780
ASSERT_NS(ind_cac_globals.cg_mru_block != NULL);
781
ASSERT_NS(ind_cac_globals.cg_mru_block->cb_mr_used == NULL);
782
ASSERT_NS(list_block->cb_lr_used == NULL);
787
ASSERT_NS(IDX_CAC_NOT_FREE(list_block->cb_state));
788
if (list_block->cb_state == IDX_CAC_BLOCK_CLEAN)
790
ASSERT_NS(block != list_block);
791
ASSERT_NS(list_block->cb_lr_used == plist_block);
792
plist_block = list_block;
793
list_block = list_block->cb_mr_used;
795
ASSERT_NS(ind_cac_globals.cg_mru_block == plist_block);
800
ASSERT_NS(ind_cac_globals.cg_mru_block == NULL);
803
#ifdef DEBUG_CHECK_IND_CACHE
804
ASSERT_NS(free_count + inuse_count + ind_cac_globals.cg_reserved_by_ots + ind_cac_globals.cg_read_count == ind_cac_globals.cg_block_count);
806
xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
808
/* We have just flushed, check how much is now free/clean. */
809
if (free_count + clean_count < 10) {
810
/* This could be a problem: */
811
printf("Cache very low!\n");
817
* -----------------------------------------------------------------------
818
* FREEING INDEX CACHE
822
* This function return TRUE if the block is freed.
823
* This function returns FALSE if the block cannot be found, or the
824
* block is not clean.
826
* We also return FALSE if we cannot copy the block to the handle
827
* (if this is required). This will be due to out-of-memory!
829
static xtBool ind_free_block(XTOpenTablePtr ot, XTIndBlockPtr block)
831
XTIndBlockPtr xblock, pxblock;
834
xtIndexNodeID address;
837
#ifdef DEBUG_CHECK_IND_CACHE
838
xt_ind_check_cache(NULL);
840
file_id = block->cb_file_id;
841
address = block->cb_address;
843
hash_idx = XT_NODE_ID(address) + (file_id * 223);
844
seg = &ind_cac_globals.cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
845
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % ind_cac_globals.cg_hash_size;
847
IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
850
xblock = seg->cs_hash_table[hash_idx];
852
if (block == xblock) {
853
/* Found the block... */
854
/* It is possible that a thread enters this code holding a
855
* lock on a page. This can cause a deadlock:
857
* #0 0x91faa2ce in semaphore_wait_signal_trap
858
* #1 0x91fb1da5 in pthread_mutex_lock
859
* #2 0x00e2ec13 in xt_p_mutex_lock at pthread_xt.cc:544
860
* #3 0x00e6c30a in xt_xsmutex_xlock at lock_xt.cc:1547
861
* #4 0x00dee402 in ind_free_block at cache_xt.cc:879
862
* #5 0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
863
* #6 0x00def8d1 in xt_ind_reserve at cache_xt.cc:1513
864
* #7 0x00e22118 in xt_idx_insert at index_xt.cc:2047
865
* #8 0x00e4d7ee in xt_tab_new_record at table_xt.cc:4702
866
* #9 0x00e0ff0b in ha_pbxt::write_row at ha_pbxt.cc:2340
867
* #10 0x0023a00f in handler::ha_write_row at handler.cc:4570
868
* #11 0x001a32c8 in write_record at sql_insert.cc:1568
869
* #12 0x001ab635 in mysql_insert at sql_insert.cc:812
870
* #13 0x0010e068 in mysql_execute_command at sql_parse.cc:3066
871
* #14 0x0011480d in mysql_parse at sql_parse.cc:5787
872
* #15 0x00115afb in dispatch_command at sql_parse.cc:1200
873
* #16 0x00116de2 in do_command at sql_parse.cc:857
874
* #17 0x00101ee4 in handle_one_connection at sql_connect.cc:1115
875
* #18 0x91fdb155 in _pthread_start
876
* #19 0x91fdb012 in thread_start
878
* #0 0x91fb146e in __semwait_signal
879
* #1 0x91fb12ef in nanosleep$UNIX2003
880
* #2 0x91fb1236 in usleep$UNIX2003
881
* #3 0x00e52112 in xt_yield at thread_xt.cc:1274
882
* #4 0x00e6c0eb in xt_spinxslock_xlock at lock_xt.cc:1456
883
* #5 0x00dee444 in ind_free_block at cache_xt.cc:886
884
* #6 0x00dee76a in ind_cac_free_lru_blocks at cache_xt.cc:1033
885
* #7 0x00deeaf0 in ind_cac_fetch at cache_xt.cc:1130
886
* #8 0x00def604 in xt_ind_fetch at cache_xt.cc:1386
887
* #9 0x00e2159a in xt_idx_update_row_id at index_xt.cc:2489
888
* #10 0x00e603c8 in xn_sw_clean_indices at xaction_xt.cc:1932
889
* #11 0x00e606d4 in xn_sw_cleanup_variation at xaction_xt.cc:2056
890
* #12 0x00e60e29 in xn_sw_cleanup_xact at xaction_xt.cc:2276
891
* #13 0x00e615ed in xn_sw_main at xaction_xt.cc:2433
892
* #14 0x00e61919 in xn_sw_run_thread at xaction_xt.cc:2564
893
* #15 0x00e53f80 in thr_main at thread_xt.cc:1017
894
* #16 0x91fdb155 in _pthread_start
895
* #17 0x91fdb012 in thread_start
897
* So we back off if a lock is held!
899
if (!XT_IPAGE_WRITE_TRY_LOCK(&block->cb_lock, ot->ot_thread->t_id)) {
900
IDX_CAC_UNLOCK(seg, ot->ot_thread);
901
#ifdef DEBUG_CHECK_IND_CACHE
902
xt_ind_check_cache(NULL);
906
if (block->cb_state != IDX_CAC_BLOCK_CLEAN) {
907
/* This block cannot be freeed: */
908
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
909
IDX_CAC_UNLOCK(seg, ot->ot_thread);
910
#ifdef DEBUG_CHECK_IND_CACHE
911
xt_ind_check_cache(NULL);
919
xblock = xblock->cb_next;
922
IDX_CAC_UNLOCK(seg, ot->ot_thread);
924
/* Not found (this can happen, if block was freed by another thread) */
925
#ifdef DEBUG_CHECK_IND_CACHE
926
xt_ind_check_cache(NULL);
932
/* If the block is reference by a handle, then we
933
* have to copy the data to the handle before we
936
/* {HANDLE-COUNT-USAGE}
937
* This access is safe because:
939
* We have an Xlock on the cache block, which excludes
940
* all other writers that want to change the cache block
941
* and also all readers of the cache block, because
942
* they all have at least an Slock on the cache block.
944
if (block->cb_handle_count) {
945
XTIndReferenceRec iref;
947
iref.ir_xlock = TRUE;
948
iref.ir_updated = FALSE;
949
iref.ir_block = block;
950
iref.ir_branch = (XTIdxBranchDPtr) block->cb_data;
951
if (!xt_ind_copy_on_write(&iref)) {
952
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
957
/* Block is clean, remove from the hash table: */
959
pxblock->cb_next = block->cb_next;
961
seg->cs_hash_table[hash_idx] = block->cb_next;
963
xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
965
/* Remove from the MRU list: */
966
if (ind_cac_globals.cg_lru_block == block)
967
ind_cac_globals.cg_lru_block = block->cb_mr_used;
968
if (ind_cac_globals.cg_mru_block == block)
969
ind_cac_globals.cg_mru_block = block->cb_lr_used;
971
/* Note, I am updating blocks for which I have no lock
972
* here. But I think this is OK because I have a lock
975
if (block->cb_lr_used)
976
block->cb_lr_used->cb_mr_used = block->cb_mr_used;
977
if (block->cb_mr_used)
978
block->cb_mr_used->cb_lr_used = block->cb_lr_used;
980
/* The block is now free: */
981
block->cb_next = ind_cac_globals.cg_free_list;
982
ind_cac_globals.cg_free_list = block;
983
ind_cac_globals.cg_free_count++;
984
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
985
block->cb_state = IDX_CAC_BLOCK_FREE;
986
IDX_TRACE("%d- f%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(block->cb_data));
988
/* Unlock BEFORE the block is reused! */
989
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
991
xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
993
IDX_CAC_UNLOCK(seg, ot->ot_thread);
995
#ifdef DEBUG_CHECK_IND_CACHE
996
xt_ind_check_cache(NULL);
1001
#define IND_CACHE_MAX_BLOCKS_TO_FREE 100
1004
* Return the number of blocks freed.
1006
* The idea is to grab a list of blocks to free.
1007
* The list consists of the LRU blocks that are
1010
* Free as many as possible (up to max of blocks_required)
1011
* from the list, even if LRU position has changed
1012
* (or we have a race if there are too few blocks).
1013
* However, if the block cannot be found, or is dirty
1016
* Repeat until we find no blocks for the list, or
1017
* we have freed 'blocks_required'.
1019
* 'not_this' is a block that must not be freed because
1020
* it is locked by the calling thread!
1022
static u_int ind_cac_free_lru_blocks(XTOpenTablePtr ot, u_int blocks_required, XTIdxBranchDPtr not_this)
1024
register DcGlobalsRec *dcg = &ind_cac_globals;
1025
XTIndBlockPtr to_free[IND_CACHE_MAX_BLOCKS_TO_FREE];
1027
XTIndBlockPtr block;
1028
u_int blocks_freed = 0;
1029
XTIndBlockPtr locked_block;
1031
#ifdef XT_USE_DIRECT_IO_ON_INDEX
1032
#error This will not work!
1034
locked_block = (XTIndBlockPtr) ((xtWord1 *) not_this - offsetof(XTIndBlockRec, cb_data));
1037
xt_lock_mutex_ns(&ind_cac_globals.cg_lock);
1038
block = dcg->cg_lru_block;
1040
while (block && count < IND_CACHE_MAX_BLOCKS_TO_FREE) {
1041
if (block != locked_block && block->cb_state == IDX_CAC_BLOCK_CLEAN) {
1042
to_free[count] = block;
1045
block = block->cb_mr_used;
1047
xt_unlock_mutex_ns(&ind_cac_globals.cg_lock);
1050
return blocks_freed;
1052
for (int i=0; i<count; i++) {
1053
if (ind_free_block(ot, to_free[i]))
1055
if (blocks_freed >= blocks_required &&
1056
ind_cac_globals.cg_free_count >= ind_cac_globals.cg_max_free + blocks_required)
1057
return blocks_freed;
1064
* -----------------------------------------------------------------------
1065
* MAIN CACHE FUNCTIONS
1069
* Fetch the block. Note, if we are about to write the block
1070
* then there is no need to read it from disk!
1072
static XTIndBlockPtr ind_cac_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, DcSegmentPtr *ret_seg, xtBool read_data)
1074
register XTOpenFilePtr file = ot->ot_ind_file;
1075
register XTIndBlockPtr block, new_block;
1076
register DcSegmentPtr seg;
1077
register u_int hash_idx;
1078
register DcGlobalsRec *dcg = &ind_cac_globals;
1081
#ifdef DEBUG_CHECK_IND_CACHE
1082
xt_ind_check_cache(NULL);
1084
/* Address, plus file ID multiplied by my favorite prime number! */
1085
hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
1086
seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
1087
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
1089
IDX_CAC_READ_LOCK(seg, ot->ot_thread);
1090
block = seg->cs_hash_table[hash_idx];
1092
if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
1093
ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
1095
/* Check how recently this page has been used: */
1096
if (XT_TIME_DIFF(block->cb_ru_time, dcg->cg_ru_now) > (dcg->cg_block_count >> 1)) {
1097
xt_lock_mutex_ns(&dcg->cg_lock);
1099
/* Move to the front of the MRU list: */
1100
block->cb_ru_time = ++dcg->cg_ru_now;
1101
if (dcg->cg_mru_block != block) {
1102
/* Remove from the MRU list: */
1103
if (dcg->cg_lru_block == block)
1104
dcg->cg_lru_block = block->cb_mr_used;
1105
if (block->cb_lr_used)
1106
block->cb_lr_used->cb_mr_used = block->cb_mr_used;
1107
if (block->cb_mr_used)
1108
block->cb_mr_used->cb_lr_used = block->cb_lr_used;
1110
/* Make the block the most recently used: */
1111
if ((block->cb_lr_used = dcg->cg_mru_block))
1112
dcg->cg_mru_block->cb_mr_used = block;
1113
block->cb_mr_used = NULL;
1114
dcg->cg_mru_block = block;
1115
if (!dcg->cg_lru_block)
1116
dcg->cg_lru_block = block;
1119
xt_unlock_mutex_ns(&dcg->cg_lock);
1123
#ifdef DEBUG_CHECK_IND_CACHE
1124
xt_ind_check_cache(NULL);
1126
ot->ot_thread->st_statistics.st_ind_cache_hit++;
1129
block = block->cb_next;
1132
/* Block not found... */
1133
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1135
/* Check the open table reserve list first: */
1136
if ((new_block = ot->ot_ind_res_bufs)) {
1137
ot->ot_ind_res_bufs = new_block->cb_next;
1138
ot->ot_ind_res_count--;
1139
#ifdef DEBUG_CHECK_IND_CACHE
1140
xt_lock_mutex_ns(&dcg->cg_lock);
1141
dcg->cg_reserved_by_ots--;
1142
dcg->cg_read_count++;
1143
xt_unlock_mutex_ns(&dcg->cg_lock);
1145
goto use_free_block;
1149
if (!dcg->cg_free_list) {
1150
if (!ind_cac_free_lru_blocks(ot, 1, NULL)) {
1151
if (!dcg->cg_free_list) {
1152
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
1153
#ifdef DEBUG_CHECK_IND_CACHE
1154
xt_ind_check_cache(NULL);
1161
/* Get a free block: */
1162
xt_lock_mutex_ns(&dcg->cg_lock);
1163
if (!(new_block = dcg->cg_free_list)) {
1164
xt_unlock_mutex_ns(&dcg->cg_lock);
1165
goto free_some_blocks;
1167
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
1168
dcg->cg_free_list = new_block->cb_next;
1169
dcg->cg_free_count--;
1170
#ifdef DEBUG_CHECK_IND_CACHE
1171
dcg->cg_read_count++;
1173
xt_unlock_mutex_ns(&dcg->cg_lock);
1176
new_block->cb_address = address;
1177
new_block->cb_file_id = file->fr_id;
1178
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_FREE);
1179
new_block->cb_state = IDX_CAC_BLOCK_CLEAN;
1180
new_block->cb_handle_count = 0;
1181
new_block->cp_del_count = 0;
1182
new_block->cb_dirty_next = NULL;
1183
new_block->cb_dirty_prev = NULL;
1184
#ifdef IND_OPT_DATA_WRITTEN
1185
new_block->cb_header = FALSE;
1186
new_block->cb_min_pos = 0xFFFF;
1187
new_block->cb_max_pos = 0;
1191
if (!xt_pread_file(file, xt_ind_node_to_offset(ot->ot_table, address), XT_INDEX_PAGE_SIZE, 0, new_block->cb_data, &red_size, &ot->ot_thread->st_statistics.st_ind, ot->ot_thread)) {
1192
xt_lock_mutex_ns(&dcg->cg_lock);
1193
new_block->cb_next = dcg->cg_free_list;
1194
dcg->cg_free_list = new_block;
1195
dcg->cg_free_count++;
1196
#ifdef DEBUG_CHECK_IND_CACHE
1197
dcg->cg_read_count--;
1199
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
1200
new_block->cb_state = IDX_CAC_BLOCK_FREE;
1201
IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
1202
xt_unlock_mutex_ns(&dcg->cg_lock);
1203
#ifdef DEBUG_CHECK_IND_CACHE
1204
xt_ind_check_cache(NULL);
1208
IDX_TRACE("%d- R%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
1209
ot->ot_thread->st_statistics.st_ind_cache_miss++;
1213
// PMC - I don't think this is required! memset(new_block->cb_data + red_size, 0, XT_INDEX_PAGE_SIZE - red_size);
1215
IDX_CAC_WRITE_LOCK(seg, ot->ot_thread);
1216
block = seg->cs_hash_table[hash_idx];
1218
if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
1219
/* Oops, someone else was faster! */
1220
xt_lock_mutex_ns(&dcg->cg_lock);
1221
new_block->cb_next = dcg->cg_free_list;
1222
dcg->cg_free_list = new_block;
1223
dcg->cg_free_count++;
1224
#ifdef DEBUG_CHECK_IND_CACHE
1225
dcg->cg_read_count--;
1227
ASSERT_NS(new_block->cb_state == IDX_CAC_BLOCK_CLEAN);
1228
new_block->cb_state = IDX_CAC_BLOCK_FREE;
1229
IDX_TRACE("%d- F%x\n", (int) XT_NODE_ID(address), (int) XT_GET_DISK_2(new_block->cb_data));
1230
xt_unlock_mutex_ns(&dcg->cg_lock);
1233
block = block->cb_next;
1237
/* Make the block the most recently used: */
1238
xt_lock_mutex_ns(&dcg->cg_lock);
1239
block->cb_ru_time = ++dcg->cg_ru_now;
1240
if ((block->cb_lr_used = dcg->cg_mru_block))
1241
dcg->cg_mru_block->cb_mr_used = block;
1242
block->cb_mr_used = NULL;
1243
dcg->cg_mru_block = block;
1244
if (!dcg->cg_lru_block)
1245
dcg->cg_lru_block = block;
1246
#ifdef DEBUG_CHECK_IND_CACHE
1247
dcg->cg_read_count--;
1249
xt_unlock_mutex_ns(&dcg->cg_lock);
1251
/* {LAZY-DEL-INDEX-ITEMS}
1252
* Conditionally count the number of deleted entries in the index:
1253
* We do this before other threads can read the block.
1255
if (ind->mi_lazy_delete && read_data)
1256
xt_ind_count_deleted_items(ot->ot_table, ind, block);
1258
/* Add to the hash table: */
1259
block->cb_next = seg->cs_hash_table[hash_idx];
1260
seg->cs_hash_table[hash_idx] = block;
1264
#ifdef DEBUG_CHECK_IND_CACHE
1265
xt_ind_check_cache(NULL);
1270
static xtBool ind_cac_get(XTOpenTablePtr ot, xtIndexNodeID address, DcSegmentPtr *ret_seg, XTIndBlockPtr *ret_block)
1272
register XTOpenFilePtr file = ot->ot_ind_file;
1273
register XTIndBlockPtr block;
1274
register DcSegmentPtr seg;
1275
register u_int hash_idx;
1276
register DcGlobalsRec *dcg = &ind_cac_globals;
1278
hash_idx = XT_NODE_ID(address) + (file->fr_id * 223);
1279
seg = &dcg->cg_segment[hash_idx & IDX_CAC_SEGMENT_MASK];
1280
hash_idx = (hash_idx >> XT_INDEX_CACHE_SEGMENT_SHIFTS) % dcg->cg_hash_size;
1282
IDX_CAC_READ_LOCK(seg, ot->ot_thread);
1283
block = seg->cs_hash_table[hash_idx];
1285
if (XT_NODE_ID(block->cb_address) == XT_NODE_ID(address) && block->cb_file_id == file->fr_id) {
1286
ASSERT_NS(block->cb_state != IDX_CAC_BLOCK_FREE);
1292
block = block->cb_next;
1294
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1296
/* Block not found: */
1302
xtPublic xtBool xt_ind_write(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
1304
XTIndBlockPtr block;
1307
if (!(block = ind_cac_fetch(ot, ind, address, &seg, FALSE)))
1310
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1311
if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
1312
if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
1313
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1314
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1318
#ifdef IND_OPT_DATA_WRITTEN
1319
block->cb_header = TRUE;
1320
block->cb_min_pos = 0;
1321
if (size-XT_INDEX_PAGE_HEAD_SIZE > block->cb_max_pos)
1322
block->cb_max_pos = size-XT_INDEX_PAGE_HEAD_SIZE;
1323
ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-XT_INDEX_PAGE_HEAD_SIZE);
1324
ASSERT_NS(block->cb_min_pos < block->cb_max_pos);
1326
ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
1327
memcpy(block->cb_data, data, size);
1328
if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
1329
TRACK_BLOCK_WRITE(offset);
1330
xt_spinlock_lock(&ind->mi_dirty_lock);
1331
if ((block->cb_dirty_next = ind->mi_dirty_list))
1332
ind->mi_dirty_list->cb_dirty_prev = block;
1333
block->cb_dirty_prev = NULL;
1334
ind->mi_dirty_list = block;
1335
ind->mi_dirty_blocks++;
1336
xt_spinlock_unlock(&ind->mi_dirty_lock);
1337
if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
1338
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1339
ot->ot_thread->st_statistics.st_ind_cache_dirty++;
1341
block->cb_state = IDX_CAC_BLOCK_DIRTY;
1343
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1344
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1345
#ifdef XT_TRACK_INDEX_UPDATES
1346
ot->ot_ind_changed++;
1352
* Update the cache, if in RAM.
1354
xtPublic xtBool xt_ind_write_cache(XTOpenTablePtr ot, xtIndexNodeID address, size_t size, xtWord1 *data)
1356
XTIndBlockPtr block;
1359
if (!ind_cac_get(ot, address, &seg, &block))
1363
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1364
/* This should only be done to pages that are free, which
1365
* are not on the dirty list, so they must be clean!
1367
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1368
memcpy(block->cb_data, data, size);
1370
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1371
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1377
xtPublic xtBool xt_ind_get(XTOpenTablePtr ot, xtIndexNodeID address, XTIndReferencePtr iref)
1379
XTIndBlockPtr block;
1382
if (!ind_cac_get(ot, address, &seg, &block))
1386
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1387
ASSERT_NS(IDX_CAC_NOT_FREE(block->cb_state));
1388
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1389
iref->ir_block = block;
1390
iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
1393
iref->ir_block = NULL;
1394
iref->ir_branch = NULL;
1396
iref->ir_xlock = TRUE;
1397
iref->ir_updated = FALSE;
1403
* Note, this function may only be called if the block has
1406
xtPublic xtBool xt_ind_free_block(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address)
1408
XTIndBlockPtr block;
1411
if (!ind_cac_get(ot, address, &seg, &block))
1414
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1416
if (block->cb_state == IDX_CAC_BLOCK_FLUSHING) {
1417
if (!ot->ot_table->tab_ind_flush_ilog->il_write_block(ot, block)) {
1418
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1419
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1424
/* {PAGE-NO-IN-INDEX-FILE}
1425
* This is the one exeption to the rule that a block
1426
* that is in the IDX_CAC_BLOCK_LOGGED may be released
1429
ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
1431
if (block->cb_state == IDX_CAC_BLOCK_DIRTY) {
1432
/* Take the block off the dirty list: */
1433
xt_spinlock_lock(&ind->mi_dirty_lock);
1434
if (block->cb_dirty_next)
1435
block->cb_dirty_next->cb_dirty_prev = block->cb_dirty_prev;
1436
if (block->cb_dirty_prev)
1437
block->cb_dirty_prev->cb_dirty_next = block->cb_dirty_next;
1438
if (ind->mi_dirty_list == block)
1439
ind->mi_dirty_list = block->cb_dirty_next;
1440
ind->mi_dirty_blocks--;
1441
xt_spinlock_unlock(&ind->mi_dirty_lock);
1442
block->cb_state = IDX_CAC_BLOCK_CLEAN;
1443
ot->ot_thread->st_statistics.st_ind_cache_dirty--;
1444
#ifdef IND_OPT_DATA_WRITTEN
1445
block->cb_header = FALSE;
1446
block->cb_min_pos = 0xFFFF;
1447
block->cb_max_pos = 0;
1450
XT_IPAGE_UNLOCK(&block->cb_lock, TRUE);
1452
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1458
xtPublic xtBool xt_ind_read_bytes(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, size_t size, xtWord1 *data)
1460
XTIndBlockPtr block;
1463
if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
1466
XT_IPAGE_READ_LOCK(&block->cb_lock);
1467
memcpy(data, block->cb_data, size);
1468
XT_IPAGE_UNLOCK(&block->cb_lock, FALSE);
1469
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1473
xtPublic xtBool xt_ind_fetch(XTOpenTablePtr ot, XTIndexPtr ind, xtIndexNodeID address, XTPageLockType ltype, XTIndReferencePtr iref)
1475
register XTIndBlockPtr block;
1477
xtWord2 branch_size;
1479
xtBool xlock = FALSE;
1482
ASSERT_NS(iref->ir_xlock == 2);
1483
ASSERT_NS(iref->ir_xlock == 2);
1485
if (!(block = ind_cac_fetch(ot, ind, address, &seg, TRUE)))
1488
branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
1489
rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
1490
if (rec_size < 2 || rec_size > XT_INDEX_PAGE_SIZE)
1491
goto failed_corrupt;
1492
if (ind->mi_fix_key) {
1494
if (XT_IS_NODE(branch_size)) {
1495
if (rec_size != 0) {
1496
if (rec_size < XT_NODE_REF_SIZE)
1497
goto failed_corrupt;
1498
rec_size -= XT_NODE_REF_SIZE;
1499
if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE + XT_NODE_REF_SIZE)) != 0)
1500
goto failed_corrupt;
1504
if ((rec_size % (ind->mi_key_size + XT_RECORD_REF_SIZE)) != 0)
1505
goto failed_corrupt;
1516
if (!XT_IS_NODE(branch_size))
1519
case XT_XLOCK_DEL_LEAF:
1520
if (!XT_IS_NODE(branch_size)) {
1521
if (ot->ot_table->tab_dic.dic_no_lazy_delete)
1525
* {LAZY-DEL-INDEX-ITEMS}
1527
* We are fetch a page for delete purpose.
1528
* we decide here if we plan to do a lazy delete,
1529
* Or if we plan to compact the node.
1531
* A lazy delete just requires a shared lock.
1534
if (ind->mi_lazy_delete) {
1535
/* If the number of deleted items is greater than
1536
* half of the number of times that can fit in the
1537
* page, then we will compact the node.
1539
if (!xt_idx_lazy_delete_on_leaf(ind, block, XT_GET_INDEX_BLOCK_LEN(branch_size)))
1549
if ((iref->ir_xlock = xlock))
1550
XT_IPAGE_WRITE_LOCK(&block->cb_lock, ot->ot_thread->t_id);
1552
XT_IPAGE_READ_LOCK(&block->cb_lock);
1554
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1557
* Direct I/O requires that the buffer is 512 byte aligned.
1558
* To do this, cb_data is turned into a pointer, instead
1560
* As a result, we need to pass a pointer to both the
1561
* cache block and the cache block data:
1563
iref->ir_updated = FALSE;
1564
iref->ir_block = block;
1565
iref->ir_branch = (XTIdxBranchDPtr) block->cb_data;
1569
IDX_CAC_UNLOCK(seg, ot->ot_thread);
1570
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, ot->ot_table->tab_name);
1574
xtPublic xtBool xt_ind_release(XTOpenTablePtr ot, XTIndexPtr ind, XTPageUnlockType XT_NDEBUG_UNUSED(utype), XTIndReferencePtr iref)
1576
register XTIndBlockPtr block;
1578
block = iref->ir_block;
1581
ASSERT_NS(iref->ir_xlock != 2);
1582
ASSERT_NS(iref->ir_updated != 2);
1583
if (iref->ir_updated)
1584
ASSERT_NS(utype == XT_UNLOCK_R_UPDATE || utype == XT_UNLOCK_W_UPDATE);
1586
ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_WRITE);
1588
ASSERT_NS(utype == XT_UNLOCK_WRITE || utype == XT_UNLOCK_W_UPDATE);
1590
ASSERT_NS(utype == XT_UNLOCK_READ || utype == XT_UNLOCK_R_UPDATE);
1592
if (iref->ir_updated) {
1594
#ifdef IND_OPT_DATA_WRITTEN
1595
xtWord2 branch_size;
1598
branch_size = XT_GET_DISK_2(((XTIdxBranchDPtr) block->cb_data)->tb_size_2);
1599
rec_size = XT_GET_INDEX_BLOCK_LEN(branch_size);
1601
ASSERT_NS(block->cb_min_pos <= rec_size-2);
1602
ASSERT_NS(block->cb_min_pos <= block->cb_max_pos);
1603
ASSERT_NS(block->cb_max_pos <= rec_size-2);
1604
ASSERT_NS(block->cb_max_pos <= XT_INDEX_PAGE_SIZE-2);
1607
/* The page was update: */
1608
ASSERT_NS(IDX_CAC_MODIFYABLE(block->cb_state));
1609
if (block->cb_state != IDX_CAC_BLOCK_DIRTY) {
1610
TRACK_BLOCK_WRITE(offset);
1611
xt_spinlock_lock(&ind->mi_dirty_lock);
1612
if ((block->cb_dirty_next = ind->mi_dirty_list))
1613
ind->mi_dirty_list->cb_dirty_prev = block;
1614
block->cb_dirty_prev = NULL;
1615
ind->mi_dirty_list = block;
1616
ind->mi_dirty_blocks++;
1617
xt_spinlock_unlock(&ind->mi_dirty_lock);
1618
if (block->cb_state != IDX_CAC_BLOCK_LOGGED) {
1619
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_CLEAN);
1620
ot->ot_thread->st_statistics.st_ind_cache_dirty++;
1622
block->cb_state = IDX_CAC_BLOCK_DIRTY;
1626
XT_IPAGE_UNLOCK(&block->cb_lock, iref->ir_xlock);
1629
iref->ir_updated = 2;
1634
xtPublic xtBool xt_ind_reserve(XTOpenTablePtr ot, u_int count, XTIdxBranchDPtr not_this)
1636
register XTIndBlockPtr block;
1637
register DcGlobalsRec *dcg = &ind_cac_globals;
1639
#ifdef XT_TRACK_INDEX_UPDATES
1640
ot->ot_ind_reserved = count;
1641
ot->ot_ind_reads = 0;
1643
#ifdef DEBUG_CHECK_IND_CACHE
1644
xt_ind_check_cache(NULL);
1646
while (ot->ot_ind_res_count < count) {
1647
if (!dcg->cg_free_list) {
1648
if (!ind_cac_free_lru_blocks(ot, count - ot->ot_ind_res_count, not_this)) {
1649
if (!dcg->cg_free_list) {
1650
xt_ind_free_reserved(ot);
1651
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_INDEX_CACHE);
1652
#ifdef DEBUG_CHECK_IND_CACHE
1653
xt_ind_check_cache(NULL);
1660
/* Get a free block: */
1661
xt_lock_mutex_ns(&dcg->cg_lock);
1662
while (ot->ot_ind_res_count < count && (block = dcg->cg_free_list)) {
1663
ASSERT_NS(block->cb_state == IDX_CAC_BLOCK_FREE);
1664
dcg->cg_free_list = block->cb_next;
1665
dcg->cg_free_count--;
1666
block->cb_next = ot->ot_ind_res_bufs;
1667
ot->ot_ind_res_bufs = block;
1668
ot->ot_ind_res_count++;
1669
#ifdef DEBUG_CHECK_IND_CACHE
1670
dcg->cg_reserved_by_ots++;
1673
xt_unlock_mutex_ns(&dcg->cg_lock);
1675
#ifdef DEBUG_CHECK_IND_CACHE
1676
xt_ind_check_cache(NULL);
1681
xtPublic void xt_ind_free_reserved(XTOpenTablePtr ot)
1683
#ifdef DEBUG_CHECK_IND_CACHE
1684
xt_ind_check_cache(NULL);
1686
if (ot->ot_ind_res_bufs) {
1687
register XTIndBlockPtr block, fblock;
1688
register DcGlobalsRec *dcg = &ind_cac_globals;
1690
xt_lock_mutex_ns(&dcg->cg_lock);
1691
block = ot->ot_ind_res_bufs;
1694
block = block->cb_next;
1696
fblock->cb_next = dcg->cg_free_list;
1697
dcg->cg_free_list = fblock;
1698
#ifdef DEBUG_CHECK_IND_CACHE
1699
dcg->cg_reserved_by_ots--;
1701
dcg->cg_free_count++;
1703
xt_unlock_mutex_ns(&dcg->cg_lock);
1704
ot->ot_ind_res_bufs = NULL;
1705
ot->ot_ind_res_count = 0;
1707
#ifdef DEBUG_CHECK_IND_CACHE
1708
xt_ind_check_cache(NULL);
1712
xtPublic void xt_ind_unreserve(XTOpenTablePtr ot)
1714
if (!ind_cac_globals.cg_free_list)
1715
xt_ind_free_reserved(ot);