~drizzle-trunk/drizzle/development

1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
1
/* Copyright (c) 2005 PrimeBase Technologies GmbH
2
 *
3
 * PrimeBase XT
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
 *
19
 * 2005-02-08	Paul McCullagh
20
 *
21
 * H&G2JCtL
22
 */
23
#ifndef __xt_table_h__
24
#define __xt_table_h__
25
26
#include <time.h>
27
28
#include "datalog_xt.h"
29
#include "filesys_xt.h"
30
#include "hashtab_xt.h"
31
#include "index_xt.h"
32
#include "cache_xt.h"
33
#include "util_xt.h"
34
#include "heap_xt.h"
35
#include "tabcache_xt.h"
36
#include "xactlog_xt.h"
37
#include "lock_xt.h"
38
39
struct XTDatabase;
40
struct XTThread;
41
struct XTCache;
42
struct XTOpenTable;
43
struct XTTablePath;
44
45
#define XT_TAB_INCOMPATIBLE_VERSION	4
46
#define XT_TAB_CURRENT_VERSION		5
47
48
/* This version of the index does not have lazy
49
 * delete. The new version is compatible with
50
 * this and maintains the old format.
51
 */
52
#define XT_IND_NO_LAZY_DELETE		3
53
#define XT_IND_LAZY_DELETE_OK		4
54
#ifdef XT_USE_LAZY_DELETE
55
#define XT_IND_CURRENT_VERSION		XT_IND_LAZY_DELETE_OK
56
#else
57
#define XT_IND_CURRENT_VERSION		XT_IND_NO_LAZY_DELETE
58
#endif
59
60
#define XT_HEAD_BUFFER_SIZE			1024
61
62
#define XT_TABLE_NAME_BUF_SIZE		(XT_IDENTIFIER_NAME_SIZE + XT_IDENTIFIER_NAME_SIZE + XT_IDENTIFIER_NAME_SIZE + 3)
63
64
#ifdef DEBUG
65
//#define XT_TRACK_INDEX_UPDATES
66
//#define XT_TRACK_RETURNED_ROWS
67
#endif
68
69
/*
70
 * NOTE: Records may only be freed (placed on the free list), after
71
 * all currently running transactions have ended.
72
 * The reason is, running transactions may have references in memory
73
 * to these records (a sequential scan has a large buffer).
74
 * If the records are freed they may be re-used. This will
75
 * cause problems because the references will then refer to
76
 * new data.
77
 *
78
 * As a result, deleted records are first placed in the
79
 * REMOVED state. Later, when transactions have quit, they
80
 * are freed.
81
 */
82
#define XT_TAB_STATUS_FREED			0x00			/* On the free list. */
83
#define XT_TAB_STATUS_DELETE		0x01			/* A transactional delete record (an "update" that indicates a delete). */
84
#define XT_TAB_STATUS_FIXED			0x02
85
#define XT_TAB_STATUS_VARIABLE		0x03			/* Uses one block, but has the variable format. */
86
#define XT_TAB_STATUS_EXT_DLOG		0x04			/* Variable format, and the trailing part of the record in the data log. */
87
#define XT_TAB_STATUS_EXT_HDATA		0x05			/* Variable format, and the trailing part of the record in the handle data file. */
88
#define XT_TAB_STATUS_DATA			0x06			/* A block of data with a next pointer (5 bytes overhead). */
89
#define XT_TAB_STATUS_END_DATA		0x07			/* An block of data without an end pointer (1 byte overhead). */
90
#define XT_TAB_STATUS_MASK			0x0F
91
92
#define XT_TAB_STATUS_DEL_CLEAN		(XT_TAB_STATUS_DELETE | XT_TAB_STATUS_CLEANED_BIT)
93
#define XT_TAB_STATUS_FIX_CLEAN		(XT_TAB_STATUS_FIXED | XT_TAB_STATUS_CLEANED_BIT)
94
#define XT_TAB_STATUS_VAR_CLEAN		(XT_TAB_STATUS_VARIABLE | XT_TAB_STATUS_CLEANED_BIT)
95
#define XT_TAB_STATUS_EXT_CLEAN		(XT_TAB_STATUS_EXT_DLOG | XT_TAB_STATUS_CLEANED_BIT)
96
97
#define XT_TAB_STATUS_CLEANED_BIT	0x80			/* This bit is set when the record is cleaned and committed. */
98
99
#define XT_REC_IS_CLEAN(x)			((x) & XT_TAB_STATUS_CLEANED_BIT)
100
#define XT_REC_IS_FREE(x)			(((x) & XT_TAB_STATUS_MASK) == XT_TAB_STATUS_FREED)
101
#define XT_REC_IS_DELETE(x)			(((x) & XT_TAB_STATUS_MASK) == XT_TAB_STATUS_DELETE)
102
#define XT_REC_IS_FIXED(x)			(((x) & XT_TAB_STATUS_MASK) == XT_TAB_STATUS_FIXED)
103
#define XT_REC_IS_VARIABLE(x)		(((x) & XT_TAB_STATUS_MASK) == XT_TAB_STATUS_VARIABLE)
104
#define XT_REC_IS_EXT_DLOG(x)		(((x) & XT_TAB_STATUS_MASK) == XT_TAB_STATUS_EXT_DLOG)
105
#define XT_REC_IS_EXT_HDATA(x)		(((x) & XT_TAB_STATUS_MASK) == XT_TAB_STATUS_EXT_HDATA)
106
#define XT_REC_NOT_VALID(x)			(XT_REC_IS_FREE(x) || XT_REC_IS_DELETE(x))
107
108
/* Results for xt_use_table_by_id(): */
109
#define XT_TAB_OK					0
110
#define XT_TAB_NOT_FOUND			1
111
#define XT_TAB_NO_DICTIONARY		2
112
#define XT_TAB_POOL_CLOSED			3				/* Cannot open table at the moment, the pool is closed. */
113
#define XT_TAB_FAILED				4
114
115
#ifdef XT_NO_ATOMICS
116
#define XT_TAB_ROW_USE_PTHREAD_RW
117
#else
118
//#define XT_TAB_ROW_USE_PTHREAD_RW
119
#define XT_TAB_ROW_USE_XSMUTEX
120
//#define XT_TAB_ROW_USE_SPINXSLOCK
121
//#define XT_TAB_ROW_USE_SPINLOCK
122
#endif
123
124
#if defined(XT_TAB_ROW_USE_PTHREAD_RW)
125
#define XT_TAB_ROW_LOCK_TYPE			xt_rwlock_type
126
#define XT_TAB_ROW_INIT_LOCK(s, i)		xt_init_rwlock_with_autoname(s, i)
127
#define XT_TAB_ROW_FREE_LOCK(s, i)		xt_free_rwlock(i)	
128
#define XT_TAB_ROW_READ_LOCK(i, s)		xt_slock_rwlock_ns(i)
129
#define XT_TAB_ROW_WRITE_LOCK(i, s)		xt_xlock_rwlock_ns(i)
130
#define XT_TAB_ROW_UNLOCK(i, s)			xt_unlock_rwlock_ns(i)
131
#elif defined(XT_TAB_ROW_USE_XSMUTEX)
132
#define XT_TAB_ROW_LOCK_TYPE			XTMutexXSLockRec
133
#define XT_TAB_ROW_INIT_LOCK(s, i)		xt_xsmutex_init_with_autoname(s, i)
134
#define XT_TAB_ROW_FREE_LOCK(s, i)		xt_xsmutex_free(s, i)	
135
#define XT_TAB_ROW_READ_LOCK(i, s)		xt_xsmutex_slock(i, (s)->t_id)
136
#define XT_TAB_ROW_WRITE_LOCK(i, s)		xt_xsmutex_xlock(i, (s)->t_id)
137
#define XT_TAB_ROW_UNLOCK(i, s)			xt_xsmutex_unlock(i, (s)->t_id)
138
#elif defined(XT_TAB_ROW_USE_SPINXSLOCK)
139
#define XT_TAB_ROW_LOCK_TYPE			XTSpinXSLockRec
140
#define XT_TAB_ROW_INIT_LOCK(s, i)		xt_spinxslock_init_with_autoname(s, i)
141
#define XT_TAB_ROW_FREE_LOCK(s, i)		xt_spinxslock_free(s, i)	
142
#define XT_TAB_ROW_READ_LOCK(i, s)		xt_spinxslock_slock(i, (s)->t_id)
143
#define XT_TAB_ROW_WRITE_LOCK(i, s)		xt_spinxslock_xlock(i, FALSE, (s)->t_id)
144
#define XT_TAB_ROW_UNLOCK(i, s)			xt_spinxslock_unlock(i, (s)->t_id)
145
#elif defined(XT_TAB_ROW_USE_SPINLOCK)
146
#define XT_TAB_ROW_LOCK_TYPE			XTSpinLockRec
147
#define XT_TAB_ROW_INIT_LOCK(s, i)		xt_spinlock_init_with_autoname(s, i)
148
#define XT_TAB_ROW_FREE_LOCK(s, i)		xt_spinlock_free(s, i)	
149
#define XT_TAB_ROW_READ_LOCK(i, s)		xt_spinlock_lock(i)
150
#define XT_TAB_ROW_WRITE_LOCK(i, s)		xt_spinlock_lock(i)
151
#define XT_TAB_ROW_UNLOCK(i, s)			xt_spinlock_unlock(i)
152
#else
153
#error Please define the lock type
154
#endif
155
156
/* ------- TABLE DATA FILE ------- */
157
158
#define XT_TAB_DATA_MAGIC		0x1234ABCD
159
160
#define XT_FORMAT_DEF_SPACE		512
161
162
#define XT_TF_REAL_TEMP_TABLE	1		/* A real temp table, created by the user. */
163
#define XT_TF_MEMORY_TABLE		2
164
#define XT_TF_DDL_TEMP_TABLE	4		/* A temp table created for DDL purposes. */
165
166
#define XT_IS_TEMP_TABLE(x)		((x) & (XT_TF_REAL_TEMP_TABLE | XT_TF_MEMORY_TABLE | XT_TF_DDL_TEMP_TABLE))
167
168
#define XT_TABLE_TYPE_STANDARD  	1
169
#define XT_TABLE_TYPE_TEMPORARY 	2
170
#define XT_TABLE_TYPE_INTERNAL  	3
171
#define XT_TABLE_TYPE_FUNCTION  	4
172
173
/*
174
 * This header ensures that no record in the data file has the offset 0.
175
 */
176
typedef struct XTTableHead {
177
	XTDiskValue4			th_head_size_4;							/* The size of the table header. */
178
	XTDiskValue4			th_op_seq_4;
179
	XTDiskValue6			th_row_free_6;
180
	XTDiskValue6			th_row_eof_6;
181
	XTDiskValue6			th_row_fnum_6;
182
	XTDiskValue6			th_rec_free_6;
183
	XTDiskValue6			th_rec_eof_6;
184
	XTDiskValue6			th_rec_fnum_6;
185
} XTTableHeadDRec, *XTTableHeadDPtr;
186
187
typedef struct XTTableFormat {
188
	XTDiskValue4			tf_format_size_4;						/* The size of this structure (table format). */
189
	XTDiskValue4			tf_tab_head_size_4;						/* The offset of the first record in the data handle file. */
190
	XTDiskValue2			tf_tab_version_2;						/* The table version number. */
191
	XTDiskValue2			tf_tab_unused_2;						/* Unused, set to zero */
192
	XTDiskValue4			tf_rec_size_4;							/* The maximum size of records in the table. */
193
	XTDiskValue1			tf_rec_fixed_1;							/* Set to 1 if this table contains fixed length records. */
194
	XTDiskValue1			tf_reserved_1;
195
	XTDiskValue8			tf_min_auto_inc_8;						/* This is the minimum auto-increment value. */
196
	xtWord1					tf_reserved[64];						/* Reserved, set to 0. */
197
	char					tf_definition[XT_VAR_LENGTH];			/* A cstring, currently it only contains the foreign key information. */
198
} XTTableFormatDRec, *XTTableFormatDPtr;
199
200
#define XT_STAT_ID_MASK(x)	((x) & (u_int) 0x000000FF)
201
202
/* A record that fits completely in the data file record */
203
typedef struct XTTabRecHead {
204
	xtWord1					tr_rec_type_1;
205
	xtWord1					tr_stat_id_1;
206
	xtDiskRecordID4			tr_prev_rec_id_4;		/* The previous variation of this record. */
207
	XTDiskValue4			tr_xact_id_4;			/* The transaction ID. */
208
	XTDiskValue4			tr_row_id_4;			/* The row ID of this record. */
209
} XTTabRecHeadDRec, *XTTabRecHeadDPtr;
210
211
typedef struct XTTabRecFix {
212
	xtWord1					tr_rec_type_1;			/* XT_TAB_STATUS_FREED, XT_TAB_STATUS_DELETE,
213
													 * XT_TAB_STATUS_FIXED, XT_TAB_STATUS_VARIABLE */
214
	xtWord1					tr_stat_id_1;
215
	xtDiskRecordID4			tr_prev_rec_id_4;		/* The previous variation of this record. */
216
	XTDiskValue4			tr_xact_id_4;			/* The transaction ID. */
217
	XTDiskValue4			tr_row_id_4;			/* The row ID of this record. */
218
	xtWord1					rf_data[XT_VAR_LENGTH];	/* NOTE: This data is in RAW MySQL format. */
219
} XTTabRecFixDRec, *XTTabRecFixDPtr;
220
221
/* An extended record that overflows into the log file: */
222
typedef struct XTTabRecExt {
223
	xtWord1					tr_rec_type_1;			/* XT_TAB_STATUS_EXT_DLOG */
224
	xtWord1					tr_stat_id_1;
225
	xtDiskRecordID4			tr_prev_rec_id_4;		/* The previous variation of this record. */
226
	XTDiskValue4			tr_xact_id_4;			/* The transaction ID. */
227
	XTDiskValue4			tr_row_id_4;			/* The row ID of this record. */
228
	XTDiskValue2			re_log_id_2;			/* Reference to overflow area, log ID */
229
	XTDiskValue6			re_log_offs_6;			/* Reference to the overflow area, log offset */
230
	XTDiskValue4			re_log_dat_siz_4;		/* Size of the overflow data. */
231
	xtWord1					re_data[XT_VAR_LENGTH];	/* This data is in packed PBXT format. */
232
} XTTabRecExtDRec, *XTTabRecExtDPtr;
233
234
typedef struct XTTabRecExtHdat {
235
	xtWord1					tr_rec_type_1;			/* XT_TAB_STATUS_EXT_HDATA */
236
	xtWord1					tr_stat_id_1;
237
	xtDiskRecordID4			tr_prev_rec_id_4;		/* The previous variation of this record. */
238
	XTDiskValue4			tr_xact_id_4;			/* The transaction ID. */
239
	XTDiskValue4			tr_row_id_4;			/* The row ID of this record. */
240
	XTDiskValue4			eh_blk_rec_id_4;		/* The record ID of the next block. */
241
	XTDiskValue2			eh_blk_siz_2;			/* The total size of the data in the trailing blocks */
242
	xtWord1					eh_data[XT_VAR_LENGTH];	/* This data is in packed PBXT format. */
243
} XTTabRecExtHdatDRec, *XTTabRecExtHdatDPtr;
244
245
typedef struct XTTabRecData {
246
	xtWord1					tr_rec_type_1;			/* XT_TAB_STATUS_DATA */
247
	XTDiskValue4			rd_blk_rec_id_4;		/* The record ID of the next block. */
248
	xtWord1					rd_data[XT_VAR_LENGTH];	/* This data is in packed PBXT format. */
249
} XTTabRecDataDRec, *XTTabRecDataDPtr;
250
251
typedef struct XTTabRecEndDat {
252
	xtWord1					tr_rec_type_1;			/* XT_TAB_STATUS_END_DATA */
253
	xtWord1					ed_data[XT_VAR_LENGTH];	/* This data is in packed PBXT format. */
254
} XTTabRecEndDatDRec, *XTTabRecEndDatDPtr;
255
256
#define XT_REC_FIX_HEADER_SIZE		sizeof(XTTabRecHeadDRec)
257
#define XT_REC_EXT_HEADER_SIZE		offsetof(XTTabRecExtDRec, re_data)
258
#define XT_REC_FIX_EXT_HEADER_DIFF	(XT_REC_EXT_HEADER_SIZE - XT_REC_FIX_HEADER_SIZE)
259
260
typedef struct XTTabRecFree {
261
	xtWord1					rf_rec_type_1;
262
	xtWord1					rf_not_used_1;
263
	xtDiskRecordID4			rf_next_rec_id_4;		/* The next block on the free list. */
264
} XTTabRecFreeDRec, *XTTabRecFreeDPtr;
265
266
typedef struct XTTabRecInfo {
267
	XTTabRecFixDPtr			ri_fix_rec_buf;			/* This references the start of the buffer (set for all types of records) */
268
	XTTabRecExtDPtr			ri_ext_rec;				/* This is only set for extended records. */
269
	xtWord4					ri_rec_buf_size;
270
	XTactExtRecEntryDPtr	ri_log_buf;
271
	xtWord4					ri_log_data_size;		/* This size of the data in the log record. */
272
	xtRecordID				ri_rec_id;				/* The record ID. */
273
} XTTabRecInfoRec, *XTTabRecInfoPtr;
274
275
class XTFlushRecRowTask : public XTLockTask {
276
	public:
277
	XTFlushRecRowTask() : XTLockTask(),
278
		frt_table(NULL)
279
	{ }
280
281
	virtual xtBool	tk_task(XTThreadPtr thread);
282
	virtual void	tk_reference();
283
	virtual void	tk_release();
284
285
	struct XTTable		*frt_table;
286
};
287
288
/* ------- TABLE ROW FILE ------- */
289
290
#define XT_TAB_ROW_SHIFTS		2
291
#define XT_TAB_ROW_MAGIC		0x4567CDEF
292
//#define XT_TAB_ROW_FREE			0
293
//#define XT_TAB_ROW_IN_USE		1
294
295
/*
296
 * NOTE: The shift count assumes the size of a table row
297
 * reference is 8 bytes (XT_TAB_ROW_SHIFTS)
298
 */
299
typedef struct XTTabRowRef {
300
	XTDiskValue4			rr_ref_id_4;			/* 4-byte reference, could be a RowID or a RecordID
301
													 * If this row is free, then it is a RowID, which
302
													 * references the next free row.
303
													 * If it is in use, then it is a RecordID which
304
													 * points to the first record in the variation
305
													 * list for the row.
306
													 */
307
} XTTabRowRefDRec, *XTTabRowRefDPtr;
308
309
/*
310
 * This is the header for the row file. The size MUST be a
311
 * the same size as sizeof(XTTabRowRefDRec)
312
 */
313
typedef struct XTTabRowHead {
314
	XTDiskValue4			rh_magic_4;
315
} XTTabRowHeadDRec, *XTTabRowHeadDPtr;
316
317
/* ------- TABLE & OPEN TABLES & TABLE LISTING ------- */
318
319
#ifdef XT_SORT_REC_WRITES
320
321
typedef struct XTDelayWrite {
322
	off_t					dw_rec_id;
323
	xtWord2					dw_offset;
324
	xtWord2					dw_size;
325
	size_t					dw_data;
326
} XTDelayWriteRec, *XTDelayWritePtr;
327
328
#endif
329
330
/* {TEMP-TABLES}
331
 * Temporary tables do not need to be flused,
332
 * and they also do not need to be recovered!
333
 * Currently this is determined by the name of the
334
 * table!
335
 */
336
typedef struct XTTable : public XTHeap {
337
	struct XTDatabase		*tab_db;			/* Heap pointer */
338
	XTPathStrPtr			tab_name;
339
	xtBool					tab_free_locks;
340
	xtTableID				tab_id;
341
342
	xtWord8					tab_auto_inc;							/* The last value returned as an auto-increment value {PRE-INC}. */
343
	XTSpinLockRec			tab_ainc_lock;							/* Lock for the auto-increment counter. */
344
345
	size_t					tab_index_format_offset;
346
	size_t					tab_index_header_size;
347
	size_t					tab_index_page_size;
348
	u_int					tab_index_block_shifts;
349
	XTIndexHeadDPtr			tab_index_head;
350
	size_t					tab_table_format_offset;
351
	size_t					tab_table_head_size;
352
	XTDictionaryRec			tab_dic;
353
	xt_mutex_type			tab_dic_field_lock;						/* Lock for setting field->ptr!. */
354
355
	XTRowLocksRec			tab_locks;								/* The locks held on this table. */
356
357
	XTTableSeqRec			tab_seq;								/* The table operation sequence. */
358
	XTTabCacheRec			tab_rows;
359
	XTTabCacheRec			tab_recs;
360
361
	/* Used to apply operations to the database in order. */
362
	XTSortedListPtr			tab_op_list;							/* The operation list. Operations to be applied. */
363
364
	/* Values that belong in the header when flushed! */
365
	xtBool1					tab_flush_pending;						/* TRUE if the table needs to be flushed */
366
	xtBool1					tab_op_seq_set;							/* TRUE if operation sequence has been set during recovery. */
367
	xtBool1					tab_recovery_not_done;					/* TRUE if recovery was not done. */
368
	xtBool1					tab_repair_pending;						/* TRUE if the table has been marked for repair */
369
	off_t					tab_bytes_to_flush;						/* Number of bytes of the record/row files to flush. */
370
371
	xtOpSeqNo				tab_head_op_seq;						/* The number of the operation last applied to the database. */
372
	xtRowID					tab_head_row_free_id;
373
	xtRowID					tab_head_row_eof_id;
374
	xtWord4					tab_head_row_fnum;
375
	xtRecordID				tab_head_rec_free_id;
376
	xtRecordID				tab_head_rec_eof_id;
377
	xtWord4					tab_head_rec_fnum;
378
379
	xtOpSeqNo				tab_co_op_seq;							/* The operation last applied by the compactor. */
380
	xtOpSeqNo				tab_wr_op_seq;							/* The operation last applied by the writer. */
381
	xtBool					tab_wr_wake_freeer;						/* Set to TRUE if the writer must wake the freeer. */
382
	xtOpSeqNo				tab_wake_freeer_op;						/* Set to the sequence number the freeer is waiting for. */
383
384
	XTSpinLockRec			tab_mem_lock;							/* A spin lock for the allocation of memory based extended records. */
385
	size_t					tab_mem_total;							/* Total amount of memory used by all memory based extended records. */
386
	size_t					tab_mem_ind_size;						/* The total size of the index. */
387
	size_t					tab_mem_ind_usage;						/* The total slots used in the index. */
388
	size_t					tab_mem_ind_free;						/* Offset of the next free slot in the index (0 if none). */
389
	xtWord1					**tab_mem_index;						/* An array of pointers to extended records. */
390
391
	XTFilePtr				tab_row_file;
392
	xtRowID					tab_row_eof_id;							/* Indicates the EOF of the table row file. */
393
	xtRowID					tab_row_free_id;						/* The start of the free list in the table row file. */
394
	xtWord4					tab_row_fnum;							/* The count of the number of free rows on the free list. */
395
	xt_mutex_type			tab_row_lock;							/* Lock for updating the EOF and free list. */
396
	XT_TAB_ROW_LOCK_TYPE	tab_row_rwlock[XT_ROW_RWLOCKS];			/* Used to lock a row during update. */
397
398
	xt_mutex_type			tab_rec_flush_lock;						/* Required while the record/row files are being flushed. */
399
	XTFlushRecRowTask		*tab_rec_flush_task;
400
	XTFilePtr				tab_rec_file;
401
#ifdef XT_REC_FLUSH_THRESHOLD
402
	u_int					tab_rec_wr_last_flush;					/* Byte output level of the writer at last flush. */
403
#endif
404
	xtRecordID				tab_rec_eof_id;							/* This value can only grow. */
405
	xtRecordID				tab_rec_free_id;
406
	xtWord4					tab_rec_fnum;							/* The count of the number of free rows on the free list. */
407
	xt_mutex_type			tab_rec_lock;							/* Lock for the free list. */
408
#ifdef XT_SORT_REC_WRITES
409
	xtOpSeqNo				tab_rec_dw_op_seq;
410
	XTSortedListPtr			tab_rec_dw_writes;
411
	size_t					tab_rec_dw_data_size;
412
	size_t					tab_rec_dw_data_usage;
413
	xtWord1					*tab_rec_dw_data;
414
#endif
415
416
	xt_mutex_type			tab_ind_stat_lock;						/* Aquired when calculating index statistics. */
417
	time_t					tab_ind_stat_calc_time;					/* Zero means the index stats have not be calculated, otherwize this is a time. */
418
419
	xt_mutex_type			tab_ind_flush_lock;						/* Required while the index file is being flushed. */
420
	XTFlushIndexTask		*tab_ind_flush_task;
421
	XTIndexLogPtr			tab_ind_flush_ilog;						/* The ilog used to flush the index file. */
422
	XTIndDirtyList			tab_ind_dirty_list;						/* A list of dirty blocks to be flushed for this index. */
423
	xtLogID					tab_ind_rec_log_id;						/* The point before which index entries have been written. */
424
	xtLogOffset				tab_ind_rec_log_offset;					/* The log offset of the write point. */
425
	XTFilePtr				tab_ind_file;
426
	xtIndexNodeID			tab_ind_eof;							/* This value can only grow. */
427
	xtIndexNodeID			tab_ind_free;							/* The start of the free page list of the index. */
428
	XTIndFreeListPtr		tab_ind_free_list;						/* A cache of the free list (if exists, don't go to disk!) */
429
	xt_mutex_type			tab_ind_lock;							/* Lock for reading and writing the index free list. */
430
#ifdef PRINT_IND_FLUSH_STATS
431
	u_int					tab_ind_write;
432
	xtWord8					tab_ind_flush_time;
433
	u_int					tab_ind_flush;
434
#endif
435
} XTTableHRec, *XTTableHPtr;		/* Heap pointer */
436
437
/* Used for an in-memory list of the tables, ordered by ID. */
438
typedef struct XTTableEntry {
439
	xtTableID				te_tab_id;
440
	char					*te_tab_name;
441
	struct XTTablePath		*te_tab_path;
442
	xtBool					te_heap_tab;
443
	XTTableHPtr				te_table;
444
	xtWord1					te_type;
445
} XTTableEntryRec, *XTTableEntryPtr;
446
447
typedef struct XTOpenTable {
448
	struct XTThread			*ot_thread;								/* The thread currently using this open table. */
449
	XTTableHPtr				ot_table;								/* PBXT table information. */
450
451
	struct XTOpenTable		*ot_otp_next_free;						/* Next free open table in the open table pool. */
452
	struct XTOpenTable		*ot_otp_mr_used;
453
	struct XTOpenTable		*ot_otp_lr_used;
454
	time_t					ot_otp_free_time;						/* The time this table was place on the free list. */
455
456
	//struct XTOpenTable	*ot_pool_next;							/* Next pointer for open table pool. */
457
458
	XT_ROW_REC_FILE_PTR		ot_rec_file;
459
	XT_ROW_REC_FILE_PTR		ot_row_file;
460
	XTOpenFilePtr			ot_ind_file;
461
	u_int					ot_err_index_no;						/* The number of the index on which the last error occurred */
462
463
	xtBool					ot_rec_fixed;							/* Cached from table for quick access. */
464
	size_t					ot_rec_size;							/* Cached from table for quick access. */
465
	
466
	char					ot_error_key[XT_IDENTIFIER_NAME_SIZE];
467
	struct XTOpenTable		*ot_prev_update;						/* The UPDATE statement stack! {UPDATE-STACK} */
468
	u_int					ot_update_id;							/* The update statement ID. */	
469
	xtBool					ot_for_update;							/* True if reading FOR UPDATE. */
470
	xtBool					ot_is_modify;							/* True if UPDATE or DELETE. */
471
	xtRowID					ot_temp_row_lock;						/* The temporary row lock set on this table. */
472
	u_int					ot_cols_req;							/* The number of columns required from the table. */
473
474
	/* GOTCHA: Separate buffers for reading and writing rows because
475
	 * of blob references, to this buffer, as in this test:
476
	 *
477
	 * drop table if exists t1;
478
	 * CREATE TABLE t1 (id MEDIUMINT NOT NULL, b1 BIT(8), vc TEXT, 
479
	 *                  bc CHAR(255), d DECIMAL(10,4) DEFAULT 0, 
480
	 *                  f FLOAT DEFAULT 0, total BIGINT UNSIGNED, 
481
	 *                  y YEAR, t DATE)
482
	 *                  PARTITION BY RANGE (YEAR(t)) 
483
	 *                 (PARTITION p1 VALUES LESS THAN (2005), 
484
	 *                  PARTITION p2 VALUES LESS THAN MAXVALUE);
485
	 *                
486
	 * INSERT INTO t1 VALUES(412,1,'eTesting MySQL databases is a cool ',
487
	 *                       'EEEMust make it bug free for the customer',
488
	 *                        654321.4321,15.21,0,1965,"2005-11-14");
489
	 * 
490
	 * UPDATE t1 SET b1 = 0, t="2006-02-22" WHERE id = 412;
491
	 * 
492
	 */
493
	size_t					ot_row_rbuf_size;						/* The current size of the read row buffer (resized dynamically). */
494
	xtWord1					*ot_row_rbuffer;						/* The row buffer for reading rows. */
495
	size_t					ot_row_wbuf_size;						/* The current size of the write row buffer (resized dynamically). */
496
	xtWord1					*ot_row_wbuffer;						/* The row buffer for writing rows. */
497
498
	/* Details of the current record: */
499
	xtRecordID				ot_curr_rec_id;							/* The offset of the current record. */
500
	xtRowID					ot_curr_row_id;							/* The row ID of the current record. */
501
	xtBool					ot_curr_updated;						/* TRUE if the current record was updated by the current transaction. */
502
503
	XTIndBlockPtr			ot_ind_res_bufs;						/* A list of reserved index buffers. */
504
	u_int					ot_ind_res_count;						/* The number of reserved buffers. */
505
#ifdef XT_TRACK_INDEX_UPDATES
506
	u_int					ot_ind_changed;
507
	u_int					ot_ind_reserved;
508
	u_int					ot_ind_reads;
509
#endif
510
#ifdef XT_TRACK_RETURNED_ROWS
511
	u_int					ot_rows_ret_max;
512
	u_int					ot_rows_ret_curr;
513
	xtRecordID				*ot_rows_returned;
514
#endif
515
	/* GOTCHA: Separate buffers for reading and writing the index are required
516
	 * because MySQL sometimes scans and updates an index with the same
517
	 * table handler.
518
	 */
519
	XTIdxItemRec			ot_ind_state;							/* Decribes the state of the index buffer. */
520
	XTIndHandlePtr			ot_ind_rhandle;							/* This handle references a block which is being used in a sequential scan. */
521
#ifdef CHECK_IF_WRITE_WAS_OK
522
	XTIdxBranchDRec			ot_ind_tmp_buf;							/* A temporary read buffer. */
523
#endif
524
	XTIdxBranchDRec			ot_ind_wbuf;							/* Buffer for the current index node for writing. */
525
	xtWord1					ot_ind_wbuf2[XT_INDEX_PAGE_SIZE];		/* Overflow for the write buffer when a node is too big. */
526
527
	/* Note: the fields below ot_ind_rbuf are not zero'ed out on creation
528
	 * of this structure!
529
	 */
530
	xtRecordID				ot_seq_rec_id;							/* Current position of a sequential scan. */
531
	xtRecordID				ot_seq_eof_id;							/* The EOF at the start of the sequential scan. */
532
	XTTabCachePagePtr		ot_seq_page;							/* If ot_seq_buffer is non-NULL, then a page has been locked! */
533
	xtWord1					*ot_seq_data;							/* Non-NULL if the data references memory mapped memory, or if it was
534
																	 * allocated if no memory mapping is being used.
535
																	 */
536
	xtBool					ot_on_page;
537
	size_t					ot_seq_offset;							/* Offset on the current page. */
538
} XTOpenTableRec, *XTOpenTablePtr;
539
540
#define XT_DATABASE_NAME_SIZE		XT_IDENTIFIER_NAME_SIZE
541
542
#define XT_TD_FROM_DIRECTORY		1
543
#define XT_TD_FROM_TAB_FILE			2
544
545
typedef struct XTTableDesc {
546
	char					td_tab_name[XT_TABLE_NAME_SIZE+4];	// 4 extra for DEL# (tables being deleted)
547
	xtTableID				td_tab_id;
548
	xtBool					td_heap_tab;
549
	struct XTTablePath		*td_tab_path;						// The path of the table.
550
	struct XTDatabase		*td_db;
551
	xtWord1					td_tab_type;
552
	int						td_type;
553
	union {
554
		struct {
555
			u_int			td_path_idx;
556
			XTOpenDirPtr	td_open_dir;
557
		} y;
558
		struct {
559
			char			*td_table_info;
560
			char			*td_curr_ptr;
561
		} z;
562
	} x;
563
} XTTableDescRec, *XTTableDescPtr;
564
565
566
typedef struct XTFilesOfTable {
567
	int						ft_state;
568
	XTPathStrPtr			ft_tab_name;
569
	xtTableID				ft_tab_id;
570
	char					ft_file_path[PATH_MAX];
571
} XTFilesOfTableRec, *XTFilesOfTablePtr;
572
573
typedef struct XTRestrictItem {
574
	xtTableID				ri_tab_id;
575
	xtRecordID				ri_rec_id;
576
} XTRestrictItemRec, *XTRestrictItemPtr;
577
578
int					xt_tab_compare_names(const char *n1, const char *n2);
579
int					xt_tab_compare_paths(char *n1, char *n2);
580
void				xt_tab_init_db(struct XTThread *self, struct XTDatabase *db);
581
void				xt_tab_exit_db(struct XTThread *self, struct XTDatabase *db);
582
583
char				*xt_tab_file_to_name(size_t size, char *tab_name, char *file_name);
584
585
void				xt_create_table(struct XTThread *self, XTPathStrPtr name, XTDictionaryPtr dic);
586
XTTableHPtr			xt_use_table(struct XTThread *self, XTPathStrPtr name, xtBool no_load, xtBool missing_ok);
587
void				xt_sync_flush_table(struct XTThread *self, XTOpenTablePtr ot, int timeout);
588
xtBool				xt_async_flush_record_row(XTTableHPtr tab, xtBool notify_complete, XTThreadPtr thread);
589
xtBool				xt_flush_record_row(XTOpenTablePtr ot, off_t *bytes_flushed, xtBool have_table_loc);
590
void				xt_flush_table(struct XTThread *self, XTOpenTablePtr ot);
591
XTTableHPtr			xt_use_table_no_lock(XTThreadPtr self, struct XTDatabase *db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic);
592
XTTableHPtr			xt_use_table_by_id(XTThreadPtr self, struct XTDatabase *db, xtTableID tab_id, int *result);
593
XTTableHPtr			xt_use_table_by_id_ns(struct XTDatabase *db, xtTableID tab_id);
594
XTOpenTablePtr		xt_open_table(XTTableHPtr tab);
595
void				xt_close_table(XTOpenTablePtr ot, xtBool flush, xtBool have_table_lock);
596
void				xt_drop_table(struct XTThread *self, XTPathStrPtr name, xtBool drop_db);
597
void				xt_check_table(XTThreadPtr self, XTOpenTablePtr tab);
598
void				xt_rename_table(struct XTThread *self, XTPathStrPtr old_name, XTPathStrPtr new_name);
599
600
void				xt_describe_tables_init(struct XTThread *self, struct XTDatabase *db, XTTableDescPtr td);
601
xtBool				xt_describe_tables_next(struct XTThread *self, XTTableDescPtr td);
602
void				xt_describe_tables_exit(struct XTThread *self, XTTableDescPtr td);
603
604
xtBool				xt_table_exists(struct XTDatabase *db);
605
606
void				xt_enum_tables_init(u_int *edx);
607
XTTableEntryPtr		xt_enum_tables_next(struct XTThread *self, struct XTDatabase *db, u_int *edx);
608
609
void				xt_enum_files_of_tables_init(XTPathStrPtr tab_name, xtTableID tab_id, XTFilesOfTablePtr ft);
610
xtBool				xt_enum_files_of_tables_next(XTFilesOfTablePtr ft);
611
612
xtBool				xt_tab_seq_init(XTOpenTablePtr ot);
613
void				xt_tab_seq_reset(XTOpenTablePtr ot);
614
void				xt_tab_seq_exit(XTOpenTablePtr ot);
615
xtBool				xt_tab_seq_next(XTOpenTablePtr ot, xtWord1 *buffer, xtBool *eof);
616
void				xt_tab_seq_repeat(XTOpenTablePtr ot);
617
618
xtBool				xt_tab_new_record(XTOpenTablePtr ot, xtWord1 *buffer);
619
xtBool				xt_tab_delete_record(XTOpenTablePtr ot, xtWord1 *buffer);
620
xtBool				xt_tab_restrict_rows(XTBasicListPtr list, struct XTThread *thread);
621
xtBool				xt_tab_update_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf);
622
int					xt_tab_visible(XTOpenTablePtr ot);
623
int					xt_tab_read_record(register XTOpenTablePtr ot, xtWord1 *buffer);
624
int					xt_tab_dirty_read_record(register XTOpenTablePtr ot, xtWord1 *buffer);
625
void				xt_tab_load_row_pointers(XTThreadPtr self, XTOpenTablePtr ot);
626
void				xt_tab_load_table(struct XTThread *self, XTOpenTablePtr ot);
627
xtBool				xt_tab_load_record(register XTOpenTablePtr ot, xtRecordID rec_id, XTInfoBufferPtr rec_buf);
628
int					xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 *rec_data, xtRecordID *prev_var_rec_id, xtBool clean_delete, xtRowID row_id, xtXactID xn_id);
629
int					xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXactID *xn_id, xtRowID *out_rowid, xtBool *out_updated);
630
void				xt_tab_store_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head);
631
xtBool				xt_tab_write_min_auto_inc(XTOpenTablePtr ot);
632
633
xtBool				xt_tab_get_row(register XTOpenTablePtr ot, xtRowID row_id, xtRecordID *var_rec_id);
634
xtBool				xt_tab_set_row(XTOpenTablePtr ot, u_int status, xtRowID row_id, xtRecordID var_rec_id);
635
xtBool				xt_tab_free_row(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id);
636
637
xtBool				xt_tab_load_ext_data(XTOpenTablePtr ot, xtRecordID load_rec_id, xtWord1 *buffer, u_int cols_req);
638
xtBool				xt_tab_put_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq);
639
xtBool				xt_tab_put_eof_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq);
640
xtBool				xt_tab_put_log_op_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer);
641
xtBool				xt_tab_put_log_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq);
642
xtBool				xt_tab_get_rec_data(register XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer);
643
void				xt_tab_disable_index(XTTableHPtr tab, u_int ind_error);
644
void				xt_tab_set_index_error(XTTableHPtr tab);
645
646
XTFileType			xt_rec_file_type(xtBool heap_tab);
647
XTFileType			xt_row_file_type(xtBool heap_tab);
648
XTFileType			xt_ind_file_type(xtBool heap_tab);
649
650
void				xt_tab_make_table_name(XTPathStrPtr tab_path, char *table_name, size_t size);
651
xtBool				xt_tab_is_table_repair_pending(XTTableHPtr tab);
652
void				xt_tab_table_repaired(XTTableHPtr tab);
653
void				xt_tab_set_table_repair_pending(XTTableHPtr tab);
654
655
xtBool				xt_tab_get_ext_slot(XTTableHPtr tab, xtLogID *log_id, xtLogOffset *log_offset, size_t req_size);
656
xtBool				xt_tab_save_ext_record(XTTableHPtr tab, xtLogID log_id, xtLogOffset log_offset, size_t size, xtWord1 *data);
657
void				xt_tab_read_ext_record(XTTableHPtr tab, xtLogID log_id, xtLogOffset log_offset, size_t size, xtWord1 *data);
658
void				xt_tab_free_ext_slot(XTTableHPtr tab, xtLogID log_id, xtLogOffset log_offset, size_t size);
659
660
inline off_t		xt_row_id_to_row_offset(register XTTableHPtr tab, xtRowID row_id)
661
{
662
	return (off_t) tab->tab_rows.tci_header_size + (off_t) (row_id - 1) * (off_t) tab->tab_rows.tci_rec_size;
663
}
664
665
inline  xtRowID		xt_row_offset_row_id(register XTTableHPtr tab, off_t rec_offs)
666
{
667
#ifdef DEBUG
668
	if (((rec_offs - (off_t) tab->tab_rows.tci_header_size) % (off_t) tab->tab_rows.tci_rec_size) != 0) {
669
		printf("ERROR! Not a valid record offset!\n");
670
	}
671
#endif
672
	return (xtRowID) ((rec_offs - (off_t) tab->tab_rows.tci_header_size) / (off_t) tab->tab_rows.tci_rec_size) + 1;
673
}
674
675
inline off_t		xt_rec_id_to_rec_offset(register XTTableHPtr tab, xtRefID ref_id)
676
{
677
	if (!ref_id)
678
		return (off_t) 0;
679
	return (off_t) tab->tab_recs.tci_header_size + (off_t) (ref_id-1) * (off_t) tab->tab_recs.tci_rec_size;
680
}
681
682
inline  xtRefID		xt_rec_offset_rec_id(register XTTableHPtr tab, off_t ref_offs)
683
{
684
	if (!ref_offs)
685
		return (xtRefID) 0;
686
#ifdef DEBUG
687
	if (((ref_offs - (off_t) tab->tab_recs.tci_header_size) % (off_t) tab->tab_recs.tci_rec_size) != 0) {
688
		printf("ERROR! Not a valid record offset!\n");
689
	}
690
#endif
691
		
692
	return (xtRefID) ((ref_offs - (off_t) tab->tab_recs.tci_header_size) / (off_t) tab->tab_recs.tci_rec_size)+1;
693
}
694
695
inline off_t		xt_ind_node_to_offset(register XTTableHPtr tab, xtIndexNodeID node_id)
696
{
697
	if (!XT_NODE_ID(node_id))
698
		return (off_t) 0;
699
	return (off_t) tab->tab_index_header_size + (off_t) (XT_NODE_ID(node_id)-1) * (off_t) tab->tab_index_page_size;
700
}
701
702
inline xtIndexNodeID xt_ind_offset_to_node(register XTTableHPtr tab, off_t ind_offs)
703
{
704
	XT_NODE_TEMP;
705
706
	if (!ind_offs)
707
		return XT_RET_NODE_ID(0);
708
#ifdef DEBUG
709
	if (((ind_offs - (off_t) tab->tab_index_header_size) % (off_t) tab->tab_index_page_size) != 0) {
710
		printf("ERROR! Not a valid index offset!\n");
711
	}
712
#endif
713
		
714
	return XT_RET_NODE_ID(((ind_offs - (off_t) tab->tab_index_header_size) / (off_t) tab->tab_index_page_size)+1);
715
}
716
717
inline xtBool xt_tab_write_rec(XTOpenTablePtr ot, off_t offset, size_t size, xtWord1 *data)
718
{
719
	return xt_pwrite_file(ot->ot_rec_file, offset, size, data, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread);
720
}
721
722
inline xtBool xt_tab_write_row(XTOpenTablePtr ot, off_t offset, size_t size, xtWord1 *data)
723
{
724
	return xt_pwrite_file(ot->ot_row_file, offset, size, data, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread);
725
}
726
727
#define XT_RESIZE_ROW_BUFFER(thr, rb, size) \
728
	do { \
729
		if (rb->rb_size < size) { \
730
			xt_realloc(thr, (void **) &rb->x.rb_buffer, size); \
731
			rb->rb_size = size; \
732
		} \
733
	} \
734
	while (0)
735
736
#endif
737