1
/* Copyright (C) 2005 PrimeBase Technologies GmbH
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
* 2005-09-30 Paul McCullagh
23
#ifndef __xt_index_h__
24
#define __xt_index_h__
27
#include <drizzled/definitions.h>
28
#include <drizzled/field.h>
29
using drizzled::Field;
31
#include <mysql_version.h>
32
#include <my_bitmap.h>
36
#include "thread_xt.h"
37
#include "linklist_xt.h"
38
#include "datalog_xt.h"
39
#include "datadic_xt.h"
41
#ifndef MYSQL_VERSION_ID
42
#error MYSQL_VERSION_ID must be defined!
45
//#define PRINT_IND_FLUSH_STATS
47
/* Define this to gather data on what area
48
* of an index page is being written.
50
#define IND_OPT_DATA_WRITTEN
52
#ifdef IND_OPT_DATA_WRITTEN
53
/* This is a debug switch that compares
54
* the contents of a cached page
55
* to the data just written to disk
57
* It requires information on what parts
58
* of the cache page have been changed.
60
//#define CHECK_IF_WRITE_WAS_OK
63
#ifdef IND_OPT_DATA_WRITTEN
64
/* Write only those parts of an index
65
* page that have been modified.
67
#define IND_WRITE_MIN_DATA
71
* Define this in order to complete write the
72
* end of an index page (i.e. unused space),
73
* if the next page to be written is the
74
* one that follows this page.
75
* Note that this does not work optimilly
76
* with the option to write minimum data.
78
//#define IND_FILL_BLOCK_TO_NEXT
80
/* Define this all writes to the index
81
* file should be in block sizes and on
84
//#define IND_WRITE_IN_BLOCK_SIZES
86
/* This is the block size used to write the index: */
87
#define IND_WRITE_BLOCK_SIZE XT_BLOCK_SIZE_FOR_DIRECT_IO
88
//#define IND_WRITE_BLOCK_SIZE (1024*1)
91
* Define this to skew the split of nodes
92
* when adding to the end of an index.
94
#define IND_SKEW_SPLIT_ON_APPEND
97
* The maximum amount of data to write before
98
* flushing the index file.
100
#define IND_FLUSH_THRESHOLD (512 * 1024 * 1024)
113
* When a transaction is rolled back, the index entries are not
114
* garbage collected!! Instead, the index entries are deleted
115
* when the data record is garbage collected.
117
* When an index record is written, and this record replaces
118
* some other record (i.e. a node is updated). The new record
119
* references its predecessor.
121
* On cleanup (rollback or commit), the predecessor records
122
* are garbage collected.
124
* NOTE: It is possible to loose memory if a crash occurs during
125
* index modification. This can occur if a node is split and
126
* we crash between writing the 2 new records.
131
* These flags influence the way the compare and search
134
* The low-order 16 bits are reserved for the caller
135
* (i.e. MySQL specific stuff).
137
#define XT_SEARCH_WHOLE_KEY 0x10000000 /* This flag is used to search for an insertion point, or to find
138
* a particular slot that has already been inserted into the
139
* index. The compare includes the handle of the variation.
141
#define XT_SEARCH_AFTER_KEY 0x20000000 /* This flags searches for the position just after the given key.
142
* Even if the key is not found, success is possible if there
143
* is a value in the index that would be after the search key.
145
* If this flag is not set then we search for the first
146
* occurrence of the key in the index. If not found we
147
* take the position just after the search key.
149
#define XT_SEARCH_FIRST_FLAG 0x40000000 /* Use this flags to find the first position in the index.
150
* When set, the actual key value is ignored.
152
#define XT_SEARCH_AFTER_LAST_FLAG 0x80000000 /* Search out the position after the last in the index.
153
* When set, the actual key value is ignored.
156
#define XT_INDEX_MAX_KEY_SIZE_MAX 2048 /* These are allocated on the stack, so this is the maximum! */
158
#define XT_INDEX_MAX_KEY_SIZE ((XT_INDEX_PAGE_SIZE >> 1) > XT_INDEX_MAX_KEY_SIZE_MAX ? XT_INDEX_MAX_KEY_SIZE_MAX : (XT_INDEX_PAGE_SIZE >> 1))
160
#define XT_IS_NODE_BIT 0x8000
162
#define XT_IS_NODE(x) ((x) & XT_IS_NODE_BIT)
164
#define XT_NODE_REF_SIZE 4
165
#define XT_GET_NODE_REF(t, x) XT_RET_NODE_ID(XT_GET_DISK_4(x))
166
#define XT_SET_NODE_REF(t, x, y) XT_SET_DISK_4((x), XT_NODE_ID(y))
168
#define XT_MAX_RECORD_REF_SIZE 8
170
#define XT_INDEX_PAGE_HEAD_SIZE offsetof(XTIdxBranchDRec, tb_data)
171
#define XT_INDEX_PAGE_DATA_SIZE (XT_INDEX_PAGE_SIZE - 2) // XT_INDEX_PAGE_HEAD_SIZE == 2!
173
#define XT_MAKE_LEAF_SIZE(x) ((x) + XT_INDEX_PAGE_HEAD_SIZE)
175
#define XT_MAKE_NODE_SIZE(x) (((x) + XT_INDEX_PAGE_HEAD_SIZE) | XT_IS_NODE_BIT)
177
#define XT_MAKE_BRANCH_SIZE(x, y) (((x) + XT_INDEX_PAGE_HEAD_SIZE) | ((y) ? XT_IS_NODE_BIT : 0))
179
#define XT_GET_INDEX_BLOCK_LEN(x) ((x) & 0x7FFF)
181
#define XT_GET_BRANCH_DATA_SIZE(x) (XT_GET_INDEX_BLOCK_LEN(x) - XT_INDEX_PAGE_HEAD_SIZE)
183
#define XT_DIRTY_BLOCK_LIST_SIZE 4096
185
typedef struct XTIndexHead {
186
XTDiskValue4 tp_format_offset_4; /* The offset of the format part of the header. */
188
XTDiskValue4 tp_header_size_4; /* The size of the header. */
189
XTDiskValue6 tp_not_used_6;
191
XTDiskValue6 tp_ind_eof_6;
192
XTDiskValue6 tp_ind_free_6;
194
/* The index roots follow. Each is if_node_ref_size_1 size. */
195
xtWord1 tp_data[XT_VAR_LENGTH];
196
} XTIndexHeadDRec, *XTIndexHeadDPtr;
198
typedef struct XTIndexFormat {
199
XTDiskValue4 if_format_size_4; /* The size of this structure (index format). */
200
XTDiskValue2 if_tab_version_2; /* The table version number. */
201
XTDiskValue2 if_ind_version_2; /* The index version number. */
202
XTDiskValue1 if_node_ref_size_1; /* This size of index node reference in indexes (default 4 bytes). */
203
XTDiskValue1 if_rec_ref_size_1; /* The size of record references in the indexes (default 4 bytes). */
204
XTDiskValue4 if_page_size_4;
205
} XTIndexFormatDRec, *XTIndexFormatDPtr;
207
typedef struct XTIdxBranch {
208
XTDiskValue2 tb_size_2; /* No of bytes used below. */
210
/* We enough space for 2 buffers when splitting! */
211
xtWord1 tb_data[XT_INDEX_PAGE_DATA_SIZE];
212
} XTIdxBranchDRec, *XTIdxBranchDPtr;
214
typedef struct XTIdxItem {
215
u_int i_total_size; /* Size of the data in the searched branch (excludes 2 byte header). */
216
u_int i_item_size; /* Size of the item at this position. */
217
u_int i_node_ref_size;
218
u_int i_item_offset; /* Item offset. */
219
} XTIdxItemRec, *XTIdxItemPtr;
221
typedef struct XTIdxResult {
222
xtBool sr_found; /* TRUE if the key was found. */
223
xtBool sr_duplicate; /* TRUE if the duplicate was found. */
224
xtBool sr_last_item; /* TRUE if the last item was found. */
225
xtRecordID sr_rec_id; /* Reference to the record of the found key. */
227
xtIndexNodeID sr_branch; /* Branch to follow when searching a node. */
228
XTIdxItemRec sr_item;
229
} XTIdxResultRec, *XTIdxResultPtr;
231
typedef struct XTIdxKeyValue {
233
xtRecordID sv_rec_id;
237
} XTIdxKeyValueRec, *XTIdxKeyValuePtr;
239
typedef struct XTIdxSearchKey {
240
xtBool sk_on_key; /* TRUE if we are positioned on the search key. */
241
XTIdxKeyValueRec sk_key_value; /* The value of the search key. */
242
xtWord1 sk_key_buf[XT_INDEX_MAX_KEY_SIZE];
243
} XTIdxSearchKeyRec, *XTIdxSearchKeyPtr;
245
typedef void (*XTScanBranchFunc)(struct XTTable *tab, struct XTIndex *ind, XTIdxBranchDPtr branch, register XTIdxKeyValuePtr value, register XTIdxResultRec *result);
246
typedef void (*XTPrevItemFunc)(struct XTTable *tab, struct XTIndex *ind, XTIdxBranchDPtr branch, register XTIdxResultRec *result);
247
typedef void (*XTLastItemFunc)(struct XTTable *tab, struct XTIndex *ind, XTIdxBranchDPtr branch, register XTIdxResultRec *result);
249
typedef int (*XTSimpleCompFunc)(struct XTIndex *ind, u_int key_length, xtWord1 *key_value, xtWord1 *b_value);
254
struct charset_info_st;
256
using drizzled::charset_info_st;
258
struct charset_info_st;
261
typedef struct XTIndexSeg /* Key-portion */
263
u_int col_idx; /* The table column index of this component. */
264
u_int is_recs_in_range; /* Value returned by records_in_range(). */
265
u_int is_selectivity; /* The number of unique values per mi_select_total. */
266
xtWord1 type; /* Type of key (for sort) */
268
xtWord1 null_bit; /* bitmask to test for NULL */
269
xtWord1 bit_start,bit_end; /* if bit field */
270
xtWord1 bit_pos,bit_length; /* (not used in 4.1) */
272
xtWord2 length; /* Keylength */
273
xtWord4 start; /* Start of key in record */
274
xtWord4 null_pos; /* position to NULL indicator */
275
MX_CONST_CHARSET_INFO *charset;
276
} XTIndexSegRec, *XTIndexSegPtr;
278
typedef struct XTIndFreeList {
279
struct XTIndFreeList *fl_next_list; /* List of free pages for this index. */
280
u_int fl_start; /* Start for allocating from the front of the list. */
281
u_int fl_free_count; /* Total items in the free list. */
282
xtIndexNodeID fl_page_id[XT_VAR_LENGTH]; /* List of page ID's of the free pages. */
283
} XTIndFreeListRec, *XTIndFreeListPtr;
285
typedef struct XTIndDirtyBlocks {
286
struct XTIndDirtyBlocks *db_next;
287
struct XTIndBlock *db_blocks[XT_DIRTY_BLOCK_LIST_SIZE];
288
} XTIndDirtyBlocksRec, *XTIndDirtyBlocksPtr;
290
typedef struct XTIndDirtyListItor {
293
XTIndDirtyBlocksPtr dli_list;
300
} XTIndDirtyListItorRec, *XTIndDirtyListItorPtr;
302
typedef struct XTIndDirtyList {
303
u_int dl_total_blocks; /* Count of the dirty blocks. */
304
u_int dl_list_usage; /* The number of elements used in the first block list: */
305
XTIndDirtyBlocksPtr dl_block_lists; /* A chain of dirty blocks. */
307
xtBool dl_add_block(struct XTIndBlock *block);
308
void dl_sort_blocks();
311
inline struct XTIndBlock *dl_next_block(XTIndDirtyListItorPtr it) {
312
struct XTIndBlock *block;
314
if (it->dli_i == it->dli_count) {
316
it->dli_count = XT_DIRTY_BLOCK_LIST_SIZE;
317
it->dli_list = it->dli_list->db_next;
320
it->dli_count = dl_list_usage;
321
it->dli_list = dl_block_lists;
328
block = it->dli_list->db_blocks[it->dli_i];
332
} XTIndDirtyListRec, *XTIndDirtyListPtr;
335
* XT_INDEX_USE_PTHREAD_RW:
336
* The stardard pthread RW lock is currently the fastest for INSERTs
337
* in 32 threads on smalltab: runTest(SMALL_INSERT_TEST, 32, dbUrl)
340
* XT_INDEX_USE_RWMUTEX:
341
* But the RW mutex is a close second, if not just as fast.
342
* If it is at least as fast, then it is better because read lock
343
* overhead is then zero.
345
* If definitely does get in the way of the
347
/* XT_INDEX_USE_PTHREAD_RW:
348
* But this is clearly better on Linux. 216682 instead of 169259
349
* payment transactions (DBT2 in non-conflict transactions,
350
* using only the customer table).
353
* The story continues. I have now fixed a bug in RW MUTEX that
354
* may have been slowing things down (see {RACE-WR_MUTEX}).
356
* So we will need to test "customer payment" again.
359
* Latest test show that RW mutex is slightly faster:
360
* 127460 to 123574 payment transactions.
364
#define XT_INDEX_USE_PTHREAD_RW
366
//#define XT_INDEX_USE_PTHREAD_RW
367
#define XT_TAB_ROW_USE_XSMUTEX
368
//#define XT_INDEX_SPINXSLOCK
371
#if defined(XT_INDEX_USE_PTHREAD_RW)
372
#define XT_INDEX_LOCK_TYPE xt_rwlock_type
373
#define XT_INDEX_INIT_LOCK(s, i) xt_init_rwlock_with_autoname(s, &(i)->mi_rwlock)
374
#define XT_INDEX_FREE_LOCK(s, i) xt_free_rwlock(&(i)->mi_rwlock)
375
#define XT_INDEX_READ_LOCK(i, o) do { xt_slock_rwlock_ns(&(i)->mi_rwlock); (void) (o); } while(0)
376
#define XT_INDEX_WRITE_LOCK(i, o) do { xt_xlock_rwlock_ns(&(i)->mi_rwlock); (void) (o); } while(0)
377
#define XT_INDEX_UNLOCK(i, o) do { xt_unlock_rwlock_ns(&(i)->mi_rwlock); (void) (o); } while(0)
378
#define XT_INDEX_HAVE_XLOCK(i, o) TRUE
379
#elif defined(XT_TAB_ROW_USE_XSMUTEX)
380
#define XT_INDEX_LOCK_TYPE XTMutexXSLockRec
381
#define XT_INDEX_INIT_LOCK(s, i) xt_xsmutex_init_with_autoname(s, &(i)->mi_rwlock)
382
#define XT_INDEX_FREE_LOCK(s, i) xt_xsmutex_free(s, &(i)->mi_rwlock)
383
#define XT_INDEX_READ_LOCK(i, o) xt_xsmutex_slock(&(i)->mi_rwlock, (o)->ot_thread->t_id)
384
#define XT_INDEX_WRITE_LOCK(i, o) xt_xsmutex_xlock(&(i)->mi_rwlock, (o)->ot_thread->t_id)
385
#define XT_INDEX_UNLOCK(i, o) xt_xsmutex_unlock(&(i)->mi_rwlock, (o)->ot_thread->t_id)
386
#define XT_INDEX_HAVE_XLOCK(i, o) ((i)->sxs_xlocker == (o)->ot_thread->t_id)
387
#elif defined(XT_INDEX_SPINXSLOCK)
388
#define XT_INDEX_LOCK_TYPE XTSpinXSLockRec
389
#define XT_INDEX_INIT_LOCK(s, i) xt_spinxslock_init_with_autoname(s, &(i)->mi_rwlock)
390
#define XT_INDEX_FREE_LOCK(s, i) xt_spinxslock_free(s, &(i)->mi_rwlock)
391
#define XT_INDEX_READ_LOCK(i, o) xt_spinxslock_slock(&(i)->mi_rwlock, (o)->ot_thread->t_id)
392
#define XT_INDEX_WRITE_LOCK(i, o) xt_spinxslock_xlock(&(i)->mi_rwlock, FALSE, (o)->ot_thread->t_id)
393
#define XT_INDEX_UNLOCK(i, o) xt_spinxslock_unlock(&(i)->mi_rwlock, (o)->ot_thread->t_id)
394
#define XT_INDEX_HAVE_XLOCK(i, o) ((i)->mi_rwlock.nrw_xlocker == (o)->ot_thread->t_id)
396
#error Please define the lock type
399
/* The R/W lock on the index is used as follows:
400
* Read Lock - used for operations on the index that are not of a structural nature.
401
* This includes any read operation and update operations that change an index
403
* Write lock - used to change the structure of the index. This includes adding
404
* and deleting pages.
406
typedef struct XTIndex {
407
u_int mi_index_no; /* The index number (used by MySQL). */
409
/* Protected by the mi_rwlock lock: */
410
XT_INDEX_LOCK_TYPE mi_rwlock; /* This lock protects the structure of the index.
411
* Read lock - structure may not change, but pages may change.
412
* Write lock - structure of index may be changed.
414
xtIndexNodeID mi_root; /* The index root node. */
415
XTIndFreeListPtr mi_free_list; /* List of free pages for this index. */
417
/* Protected by the mi_dirty_lock: */
418
XTSpinLockRec mi_dirty_lock; /* Spin lock protecting the dirty & free lists. */
419
struct XTIndBlock *mi_dirty_list; /* List of dirty pages for this index. */
420
u_int mi_dirty_blocks; /* Count of the dirty blocks. */
422
/* Index contants: */
425
u_int mi_max_items; /* The maximum number of items that can fit in a leaf node. */
426
xtBool mi_key_corrupted; /* Set to TRUE if a currupted index key is detected. */
428
xtBool mi_lazy_delete; /* TRUE if index entries are "lazy deleted". */
429
u_int mi_single_type; /* Used when the index contains a single field. */
430
u_int mi_select_total;
431
XTScanBranchFunc mi_scan_branch;
432
XTPrevItemFunc mi_prev_item;
433
XTLastItemFunc mi_last_item;
434
XTSimpleCompFunc mi_simple_comp_key;
436
uint64_t mi_col_map; /* Bit-map of columns in the index. */
437
uint64_t mi_col_map_size; /* size of this bitmap. */
439
MX_BITMAP mi_col_map; /* Bit-map of columns in the index. */
441
u_int mi_subset_of; /* Indicates if this index is a complete subset of someother index. */
443
XTIndexSegRec mi_seg[200];
444
} XTIndexRec, *XTIndexPtr;
446
#define XT_INDEX_OK 0
447
#define XT_INDEX_TOO_OLD 1
448
#define XT_INDEX_TOO_NEW 2
449
#define XT_INDEX_BAD_BLOCK 3
450
#define XT_INDEX_CORRUPTED 4
451
#define XT_INDEX_MISSING 5
452
#define XT_INDEX_NOT_RECOVERED 6
454
typedef void (*XTFreeDicFunc)(struct XTThread *self, struct XTDictionary *dic);
456
typedef struct XTDictionary {
457
XTDDTable *dic_table; /* XT table information. */
459
/* Table binary information. */
460
u_int dic_mysql_buf_size; /* This is the size of the MySQL buffer (row size + null bytes). */
461
u_int dic_mysql_rec_size; /* This is the size of the fixed length MySQL row. */
462
u_int dic_rec_size; /* This is the size of the handle data file record. */
463
xtBool dic_rec_fixed; /* TRUE if the record has a fixed length size. */
464
u_int dic_tab_flags; /* Table flags: XT_TF_MEMORY_TABLE, XT_TF_MEMORY_TABLE. */
465
xtWord8 dic_min_auto_inc; /* The minimum auto-increment value. */
466
xtWord8 dic_min_row_size;
467
xtWord8 dic_max_row_size;
468
xtWord8 dic_ave_row_size;
469
xtWord8 dic_def_ave_row_size; /* Defined row size set by the user. */
470
u_int dic_no_of_cols; /* Number of columns. */
471
u_int dic_fix_col_count; /* The number of columns always in the fixed part of a extended record. */
472
u_int dic_ind_cols_req; /* The number of columns required to build all indexes. */
473
xtWord8 dic_ind_rec_len; /* Length of the record part that is needed for all index columns! */
476
u_int dic_blob_cols_req; /* The number of the columns required to load all LONGBLOB columns. */
477
u_int dic_blob_count;
478
Field **dic_blob_cols;
480
/* MySQL related information. NULL when no tables are open from MySQL side! */
481
xtBool dic_no_lazy_delete; /* FALSE if lazy delete is OK. */
482
u_int dic_disable_index; /* Non-zero if the index cannot be used. */
483
u_int dic_index_ver; /* The version of the index. */
485
XTIndexPtr *dic_keys; /* MySQL/PBXT key description */
486
xtWord1 dic_table_type; /* XT_TABLE_TYPE_VALUE, so far used only in Drizzled */
487
STRUCT_TABLE *dic_my_table; /* MySQL table */
488
} XTDictionaryRec, *XTDictionaryPtr;
490
#define XT_DT_LOG_HEAD 0
491
#define XT_DT_INDEX_PAGE 1
492
#define XT_DT_FREE_LIST 2
493
#define XT_DT_HEADER 3
494
#define XT_DT_SHORT_IND_PAGE 4
495
#define XT_DT_MOD_IND_PAGE 5
496
#define XT_DT_MOD_IND_PAGE_HEAD 6
497
#define XT_DT_SET_PAGE_HEAD 7
498
#define XT_DT_2_MOD_IND_PAGE 8
499
#define XT_DT_MOD_IND_PAGE_EOB 9
500
#define XT_DT_MOD_IND_PAGE_HEAD_EOB 10
501
#define XT_DT_2_MOD_IND_PAGE_EOB 11
503
typedef struct XTIndLogHead {
504
xtWord1 ilh_data_type; /* XT_DT_LOG_HEAD */
505
XTDiskValue4 ilh_tab_id_4;
506
XTDiskValue4 ilh_log_eof_4; /* The entire size of the log (0 if invalid!) */
507
} XTIndLogHeadDRec, *XTIndLogHeadDPtr;
509
typedef struct XTIndPageData {
510
xtWord1 ild_data_type;
511
XTDiskValue4 ild_page_id_4;
512
xtWord1 ild_data[XT_VAR_LENGTH];
513
} XTIndPageDataDRec, *XTIndPageDataDPtr;
515
typedef struct XTIndHeadData {
516
xtWord1 ilh_data_type;
517
XTDiskValue2 ilh_head_size_2;
518
xtWord1 ilh_data[XT_VAR_LENGTH];
519
} XTIndHeadDataDRec, *XTIndHeadDataDPtr;
521
typedef struct XTIndSetPageHeadData {
522
xtWord1 ild_data_type; /* XT_DT_SET_PAGE_HEAD */
523
XTDiskValue4 ild_page_id_4;
524
XTDiskValue2 ild_page_head_2; /* The page header (first 2 bytes) */
525
} XTIndSetPageHeadDataDRec, *XTIndSetPageHeadDataDPtr;
527
typedef struct XTIndShortPageData {
528
xtWord1 ild_data_type; /* XT_DT_SHORT_IND_PAGE */
529
XTDiskValue4 ild_page_id_4;
530
XTDiskValue2 ild_size_2; /* Size of the data. */
531
xtWord1 ild_data[XT_VAR_LENGTH];
532
} XTIndShortPageDataDRec, *XTIndShortPageDataDPtr;
534
typedef struct XTIndModPageData {
535
xtWord1 ild_data_type; /* XT_DT_MOD_IND_PAGE */
536
XTDiskValue4 ild_page_id_4;
537
XTDiskValue2 ild_size_2; /* Size of the data. */
538
XTDiskValue2 ild_offset_2; /* Offset into the page. */
539
xtWord1 ild_data[XT_VAR_LENGTH];
540
} XTIndModPageDataDRec, *XTIndModPageDataDPtr;
542
typedef struct XTIndModPageHeadData {
543
xtWord1 ild_data_type; /* XT_DT_MOD_IND_PAGE_HEAD/XT_DT_MOD_IND_PAGE_HEAD_EOB */
544
XTDiskValue4 ild_page_id_4;
545
XTDiskValue2 ild_size_2; /* Size of the data. */
546
XTDiskValue2 ild_offset_2; /* Offset into the page. */
547
XTDiskValue2 ild_page_head_2; /* The page header (first 2 bytes) */
548
xtWord1 ild_data[XT_VAR_LENGTH];
549
} XTIndModPageHeadDataDRec, *XTIndModPageHeadDataDPtr;
551
typedef struct XTIndDoubleModPageData {
552
xtWord1 dld_data_type; /* XT_DT_2_MOD_IND_PAGE/XT_DT_2_MOD_IND_PAGE_EOB */
553
XTDiskValue4 dld_page_id_4;
554
XTDiskValue2 dld_size1_2; /* Size of the first data block, offset 0. */
555
XTDiskValue2 dld_offset2_2; /* Offset of second data block. */
556
XTDiskValue2 dld_size2_2; /* Size of the second data block. */
557
xtWord1 dld_data[XT_VAR_LENGTH];
558
} XTIndDoubleModPageDataDRec, *XTIndDoubleModPageDataDPtr;
560
typedef struct XTIndexLog {
561
struct XTIndexLogPool *il_pool;
562
struct XTIndexLog *il_next_in_pool;
564
xtLogID il_log_id; /* The ID of the data log. */
566
size_t il_buffer_size;
571
size_t il_buffer_len;
572
off_t il_buffer_offset;
573
XTSpinLockRec il_write_lock;
574
size_t il_bytes_written;
576
xtBool il_reset(struct XTOpenTable *ot);
577
void il_close(xtBool delete_it);
579
xtBool il_data_written();
581
xtBool il_write_byte(struct XTOpenTable *ot, xtWord1 val);
582
xtBool il_write_word4(struct XTOpenTable *ot, xtWord4 value);
583
xtBool il_write_block(struct XTOpenTable *ot, struct XTIndBlock *block);
584
xtBool il_write_free_list(struct XTOpenTable *ot, u_int free_count, XTIndFreeListPtr free_list);
585
xtBool il_require_space(size_t bytes, XTThreadPtr thread);
586
xtBool il_write_header(struct XTOpenTable *ot, size_t head_size, xtWord1 *head_data);
587
xtBool il_flush(struct XTOpenTable *ot);
588
xtBool il_apply_log_write(struct XTOpenTable *ot);
589
xtBool il_apply_log_flush(struct XTOpenTable *ot);
590
inline xtBool il_pwrite_file(struct XTOpenTable *ot, off_t offs, size_t siz, void *dat);
591
inline xtBool il_flush_file(struct XTOpenTable *ot);
593
xtBool il_open_table(struct XTOpenTable **ot);
594
void il_close_table(struct XTOpenTable *ot);
595
} XTIndexLogRec, *XTIndexLogPtr;
597
typedef struct XTIndexLogPool {
598
struct XTDatabase *ilp_db;
599
size_t ilp_log_buffer_size;
601
XTIndexLogPtr ilp_log_pool;
602
xt_mutex_type ilp_lock; /* The public pool lock. */
603
xtLogID ilp_next_log_id;
605
void ilp_init(struct XTThread *self, struct XTDatabase *db, size_t log_buffer_size);
606
void ilp_close(struct XTThread *self, xtBool lock);
607
void ilp_exit(struct XTThread *self);
608
void ilp_name(size_t size, char *path, xtLogID log_id);
610
xtBool ilp_open_log(XTIndexLogPtr *il, xtLogID log_id, xtBool excl, XTThreadPtr thread);
612
xtBool ilp_get_log(XTIndexLogPtr *il, XTThreadPtr thread);
613
void ilp_release_log(XTIndexLogPtr il);
614
} XTIndexLogPoolRec, *XTIndexLogPoolPtr;
616
class XTFlushIndexTask : public XTLockTask {
618
XTFlushIndexTask() : XTLockTask(),
620
fit_checkpoint(FALSE),
622
fit_blocks_flushed(0)
625
virtual xtBool tk_task(XTThreadPtr thread);
626
virtual void tk_reference();
627
virtual void tk_release();
629
struct XTTable *fit_table;
630
xtBool fit_checkpoint;
631
u_int fit_dirty_blocks; /* The number of dirty blocks being flushed! */
632
u_int fit_blocks_flushed;
635
/* A record reference consists of a record ID and a row ID: */
636
inline void xt_get_record_ref(register xtWord1 *item, xtRecordID *rec_id, xtRowID *row_id) {
637
*rec_id = XT_GET_DISK_4(item);
639
*row_id = XT_GET_DISK_4(item);
642
inline void xt_get_res_record_ref(register xtWord1 *item, register XTIdxResultRec *result) {
643
result->sr_rec_id = XT_GET_DISK_4(item);
645
result->sr_row_id = XT_GET_DISK_4(item);
648
inline void xt_set_record_ref(register xtWord1 *item, xtRecordID rec_id, xtRowID row_id) {
649
XT_SET_DISK_4(item, rec_id);
651
XT_SET_DISK_4(item, row_id);
654
inline void xt_set_val_record_ref(register xtWord1 *item, register XTIdxKeyValuePtr value) {
655
XT_SET_DISK_4(item, value->sv_rec_id);
657
XT_SET_DISK_4(item, value->sv_row_id);
660
xtBool xt_idx_insert(struct XTOpenTable *ot, struct XTIndex *ind, xtRowID row_id, xtRecordID rec_id, xtWord1 *rec_buf, xtWord1 *bef_buf, xtBool allow_dups);
661
xtBool xt_idx_delete(struct XTOpenTable *ot, struct XTIndex *ind, xtRecordID rec_id, xtWord1 *rec_buf);
662
xtBool xt_idx_update_row_id(struct XTOpenTable *ot, struct XTIndex *ind, xtRecordID rec_id, xtRowID row_id, xtWord1 *rec_buf);
663
void xt_idx_prep_key(struct XTIndex *ind, register XTIdxSearchKeyPtr search_key, int flags, xtWord1 *in_key_buf, size_t in_key_length);
664
xtBool xt_idx_research(struct XTOpenTable *ot, struct XTIndex *ind);
665
xtBool xt_idx_search(struct XTOpenTable *ot, struct XTIndex *ind, register XTIdxSearchKeyPtr search_key);
666
xtBool xt_idx_search_prev(struct XTOpenTable *ot, struct XTIndex *ind, register XTIdxSearchKeyPtr search_key);
667
xtBool xt_idx_next(register struct XTOpenTable *ot, register struct XTIndex *ind, register XTIdxSearchKeyPtr search_key);
668
xtBool xt_idx_prev(register struct XTOpenTable *ot, register struct XTIndex *ind, register XTIdxSearchKeyPtr search_key);
669
xtBool xt_idx_read(struct XTOpenTable *ot, struct XTIndex *ind, xtWord1 *rec_buf);
670
void xt_ind_set_index_selectivity(struct XTOpenTable *ot, XTThreadPtr thread);
671
void xt_check_indices(struct XTOpenTable *ot);
672
void xt_load_indices(XTThreadPtr self, struct XTOpenTable *ot);
673
void xt_ind_count_deleted_items(struct XTTable *ot, struct XTIndex *ind, struct XTIndBlock *block);
674
xtBool xt_async_flush_indices(struct XTTable *tab, xtBool notify_complete, xtBool notify_before_write, struct XTThread *thread);
675
xtBool xt_flush_indices(struct XTOpenTable *ot, off_t *bytes_flushed, xtBool have_table_lock, XTFlushIndexTask *ft);
676
void xt_ind_track_dump_block(struct XTTable *tab, xtIndexNodeID address);
678
#define XT_S_MODE_MATCH 0
679
#define XT_S_MODE_NEXT 1
680
#define XT_S_MODE_PREV 2
681
xtBool xt_idx_match_search(struct XTOpenTable *ot, struct XTIndex *ind, register XTIdxSearchKeyPtr search_key, xtWord1 *buf, int mode);
683
int xt_compare_2_int4(XTIndexPtr ind, uint key_length, xtWord1 *key_value, xtWord1 *b_value);
684
int xt_compare_3_int4(XTIndexPtr ind, uint key_length, xtWord1 *key_value, xtWord1 *b_value);
685
void xt_scan_branch_single(struct XTTable *tab, XTIndexPtr ind, XTIdxBranchDPtr branch, register XTIdxKeyValuePtr value, register XTIdxResultRec *result);
686
void xt_scan_branch_fix(struct XTTable *tab, XTIndexPtr ind, XTIdxBranchDPtr branch, register XTIdxKeyValuePtr value, register XTIdxResultRec *result);
687
void xt_scan_branch_fix_simple(struct XTTable *tab, XTIndexPtr ind, XTIdxBranchDPtr branch, register XTIdxKeyValuePtr value, register XTIdxResultRec *result);
688
void xt_scan_branch_var(struct XTTable *tab, XTIndexPtr ind, XTIdxBranchDPtr branch, register XTIdxKeyValuePtr value, register XTIdxResultRec *result);
690
void xt_prev_branch_item_fix(struct XTTable *tab, XTIndexPtr ind, XTIdxBranchDPtr branch, register XTIdxResultRec *result);
691
void xt_prev_branch_item_var(struct XTTable *tab, XTIndexPtr ind, XTIdxBranchDPtr branch, register XTIdxResultRec *result);
693
void xt_last_branch_item_fix(struct XTTable *tab, XTIndexPtr ind, XTIdxBranchDPtr branch, register XTIdxResultPtr result);
694
void xt_last_branch_item_var(struct XTTable *tab, XTIndexPtr ind, XTIdxBranchDPtr branch, register XTIdxResultPtr result);
695
xtBool xt_idx_lazy_delete_on_leaf(XTIndexPtr ind, struct XTIndBlock *block, xtWord2 branch_size);
697
//#define TRACK_ACTIVITY
698
#ifdef TRACK_ACTIVITY
700
#define TRACK_BLOCK_ALLOC(x) track_work(xt_ind_offset_to_node(tab, x), "A")
701
#define TRACK_BLOCK_FREE(x) track_work(xt_ind_offset_to_node(ot->ot_table, x), "-")
702
#define TRACK_BLOCK_SPLIT(x) track_work(xt_ind_offset_to_node(ot->ot_table, x), "/")
703
#define TRACK_BLOCK_WRITE(x) track_work(xt_ind_offset_to_node(ot->ot_table, x), "w")
704
#define TRACK_BLOCK_FLUSH_N(x) track_work(x, "F")
705
#define TRACK_BLOCK_TO_FLUSH(x) track_work(x, "f")
707
xtPublic void track_work(u_int block, char *what);
710
#define TRACK_BLOCK_ALLOC(x)
711
#define TRACK_BLOCK_FREE(x)
712
#define TRACK_BLOCK_SPLIT(x)
713
#define TRACK_BLOCK_WRITE(x)
714
#define TRACK_BLOCK_FLUSH_N(x)
715
#define TRACK_BLOCK_TO_FLUSH(x)