~drizzle-trunk/drizzle/development

1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
1
/* Copyright (c) 2005 PrimeBase Technologies GmbH
2
 *
3
 * Derived from ha_example.h
4
 * Copyright (C) 2003 MySQL AB
5
 *
6
 * PrimeBase XT
7
 *
8
 * This program is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * This program is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with this program; if not, write to the Free Software
20
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA	02111-1307	USA
21
 *
22
 * 2005-11-10	Paul McCullagh
23
 *
24
 */
25
26
#ifdef USE_PRAGMA_IMPLEMENTATION
27
#pragma implementation				// gcc: Class implementation
28
#endif
29
30
#include "xt_config.h"
31
32
#if defined(XT_WIN)
33
#include <windows.h>
34
#endif
35
36
#include <stdlib.h>
37
#include <time.h>
38
#include <ctype.h>
39
40
#ifdef DRIZZLED
41
#include <fcntl.h>
1455.3.2 by Vladimir Kolesnikov
create/insert/select/drop works
42
#include <drizzled/internal/my_sys.h>
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
43
#include <drizzled/common.h>
44
#include <drizzled/plugin.h>
45
#include <drizzled/field.h>
46
#include <drizzled/session.h>
47
#include <drizzled/data_home.h>
48
#include <drizzled/error.h>
49
#include <drizzled/table.h>
50
#include <drizzled/field/timestamp.h>
51
#include <drizzled/session.h>
52
53
#define my_strdup(a,b) strdup(a)
54
55
using namespace drizzled;
56
using namespace drizzled::plugin;
57
58
#define DEFAULT_FILE_EXTENSION ".dfe"
59
60
#else
61
#include "mysql_priv.h"
62
#include <mysql/plugin.h>
63
#endif
64
65
#include "ha_pbxt.h"
66
#include "ha_xtsys.h"
67
68
#include "strutil_xt.h"
69
#include "database_xt.h"
70
#include "cache_xt.h"
71
#include "trace_xt.h"
72
#include "heap_xt.h"
73
#include "myxt_xt.h"
74
#include "datadic_xt.h"
75
#ifdef PBMS_ENABLED
76
#include "pbms_enabled.h"
77
#endif
78
#include "tabcache_xt.h"
79
#include "systab_xt.h"
80
#include "xaction_xt.h"
81
#include "backup_xt.h"
82
83
#ifdef DEBUG
84
//#define XT_USE_SYS_PAR_DEBUG_SIZES
85
#define PBXT_HANDLER_TRACE
86
//#define PBXT_TRACE_RETURN
87
//#define XT_PRINT_INDEX_OPT
88
//#define XT_SHOW_DUMPS_TRACE
89
//#define XT_UNIT_TEST
90
//#define LOAD_TABLE_ON_OPEN
91
//#define CHECK_TABLE_LOADS
92
93
/* Enable to trace the statements executed by the engine: */
94
//#define TRACE_STATEMENTS
95
96
/* Enable to print the trace to the stdout, instead of
97
 * to the trace log.
98
 */
99
//#define PRINT_STATEMENTS
100
#endif
101
102
#ifndef DRIZZLED
103
static handler	*pbxt_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root);
104
static int		pbxt_init(void *p);
105
static int		pbxt_end(void *p);
106
static int		pbxt_panic(handlerton *hton, enum ha_panic_function flag);
107
static void		pbxt_drop_database(handlerton *hton, char *path);
108
static int		pbxt_close_connection(handlerton *hton, THD* thd);
109
static int		pbxt_commit(handlerton *hton, THD *thd, bool all);
110
static int		pbxt_rollback(handlerton *hton, THD *thd, bool all);
111
static int		pbxt_prepare(handlerton *hton, THD *thd, bool all);
112
static int		pbxt_recover(handlerton *hton, XID *xid_list, uint len);
113
static int		pbxt_commit_by_xid(handlerton *hton, XID *xid);
114
static int		pbxt_rollback_by_xid(handlerton *hton, XID *xid);
115
static int		pbxt_start_consistent_snapshot(handlerton *hton, THD *thd);
116
#endif
117
static void		ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
118
static void		ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share);
119
static void		ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
120
121
#ifdef TRACE_STATEMENTS
122
123
#ifdef PRINT_STATEMENTS
124
#define STAT_TRACE(y, x)		printf("%s: %s\n", y ? y->t_name : "-unknown-", x)
125
#else
126
#define STAT_TRACE(y, x)		xt_ttraceq(y, x)
127
#endif
128
129
#else
130
131
#define STAT_TRACE(y, x)
132
133
#endif
134
135
#ifdef PBXT_HANDLER_TRACE
136
#define PBXT_ALLOW_PRINTING
137
138
#define XT_TRACE_CALL()				ha_trace_function(__FUNC__, NULL)
139
#define XT_TRACE_METHOD()			ha_trace_function(__FUNC__, pb_share->sh_table_path->ps_path)
140
141
#ifdef PBXT_TRACE_RETURN
142
#define XT_RETURN(x)				do { printf("%d\n", (int) (x)); return (x); } while (0)
143
#define XT_RETURN_VOID				do { printf("out\n"); return; } while (0)
144
#else
145
#define XT_RETURN(x)				return (x)
146
#define XT_RETURN_VOID				return
147
#endif
148
149
#else
150
151
#define XT_TRACE_CALL()
152
#define XT_TRACE_METHOD()
153
#define XT_RETURN(x)				return (x)
154
#define XT_RETURN_VOID				return
155
156
#endif
157
158
#ifdef PBXT_ALLOW_PRINTING
159
#define XT_PRINT0(y, x)				do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-"); } while (0)
160
#define XT_PRINT1(y, x, a)			do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a); } while (0)
161
#define XT_PRINT2(y, x, a, b)		do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b); } while (0)
162
#define XT_PRINT3(y, x, a, b, c)	do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b, c); } while (0)
163
#else
164
#define XT_PRINT0(y, x)
165
#define XT_PRINT1(y, x, a)
166
#define XT_PRINT2(y, x, a, b)
167
#define XT_PRINT3(y, x, a, b, c)
168
#endif
169
170
171
#define TS(x)					(x)->s
172
173
handlerton				*pbxt_hton;
174
bool					pbxt_inited = false;		// Variable for checking the init state of hash
175
xtBool					pbxt_ignore_case = true;
176
const char				*pbxt_extensions[]= { ".xtr", ".xtd", ".xtl", ".xti", ".xt", "", NULL };
177
#ifdef XT_CRASH_DEBUG
178
xtBool					pbxt_crash_debug = TRUE;
179
#else
180
xtBool					pbxt_crash_debug = FALSE;
181
#endif
182
183
184
/* Variables for pbxt share methods */
185
static xt_mutex_type	pbxt_database_mutex;		// Prevent a database from being opened while it is being dropped
186
static XTHashTabPtr		pbxt_share_tables;			// Hash used to track open tables
187
static char				*pbxt_index_cache_size;
188
static char				*pbxt_record_cache_size;
189
static char				*pbxt_log_cache_size;
190
static char				*pbxt_log_file_threshold;
191
static char				*pbxt_transaction_buffer_size;
192
static char				*pbxt_log_buffer_size;
193
static char				*pbxt_checkpoint_frequency;
194
static char				*pbxt_data_log_threshold;
195
static char				*pbxt_data_file_grow_size;
196
static char				*pbxt_row_file_grow_size;
197
static char				*pbxt_record_write_threshold;
198
static my_bool			pbxt_support_xa;
199
200
#ifndef DRIZZLED
201
// drizzle complains it's not used
202
static XTXactEnumXARec	pbxt_xa_enum;
203
#endif
204
205
#ifdef DEBUG
206
#define XT_SHARE_LOCK_WAIT		5000
207
#else
208
#define XT_SHARE_LOCK_WAIT		500
209
#endif
210
211
/* 
212
 * Lock timeout in 1/1000ths of a second
213
 */
214
#define XT_SHARE_LOCK_TIMEOUT	30000
215
216
/*
217
 * -----------------------------------------------------------------------
218
 * SYSTEM VARIABLES
219
 *
220
 */
221
 
222
//#define XT_FOR_TEAMDRIVE
223
224
typedef struct HAVarParams {
225
	const char		*vp_var;						/* Variable name. */
226
	const char		*vp_def;						/* Default value. */
227
	const char		*vp_min;						/* Minimum allowed value. */
228
	const char		*vp_max4;						/* Maximum allowed value on 32-bit processors. */
229
	const char		*vp_max8;						/* Maximum allowed value on 64-bit processors. */
230
} HAVarParamsRec, *HAVarParamsPtr;
231
232
#ifdef XT_USE_SYS_PAR_DEBUG_SIZES
233
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
234
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
235
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
236
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
237
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
238
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
239
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
240
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "400K", "400K", "2GB", "256TB" };
241
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
242
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
243
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
244
#define XT_DL_DEFAULT_XLOG_COUNT		3
245
#define XT_DL_DEFAULT_GARBAGE_LEVEL		10
246
#else
247
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
248
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
249
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
250
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
251
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
252
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
253
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
254
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "64MB", "1MB", "2GB", "256TB" };
255
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
256
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
257
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
258
#define XT_DL_DEFAULT_XLOG_COUNT		3
259
#define XT_DL_DEFAULT_GARBAGE_LEVEL		50
260
#endif
261
262
#define XT_AUTO_INCREMENT_DEF			0
263
#define XT_DL_DEFAULT_INDEX_DIRTY_LEVEL	80
264
265
#ifdef XT_MAC
266
#ifdef DEBUG
267
/* For debugging on the Mac, we check the re-use logs: */
268
#define XT_OFFLINE_LOG_FUNCTION_DEF		XT_RECYCLE_LOGS
269
#else
270
#define XT_OFFLINE_LOG_FUNCTION_DEF		XT_DELETE_LOGS
271
#endif
272
#else
273
#define XT_OFFLINE_LOG_FUNCTION_DEF		XT_RECYCLE_LOGS
274
#endif
275
276
/* TeamDrive, uses special auto-increment, and
277
 * we keep the logs for the moment:
278
 */
279
#ifdef XT_FOR_TEAMDRIVE
280
#undef XT_OFFLINE_LOG_FUNCTION_DEF
281
#define XT_OFFLINE_LOG_FUNCTION_DEF		XT_KEEP_LOGS
282
//#undef XT_AUTO_INCREMENT_DEF
283
//#define XT_AUTO_INCREMENT_DEF			1
284
#endif
285
286
#ifdef PBXT_HANDLER_TRACE
287
static void ha_trace_function(const char *function, char *table)
288
{
289
	char		func_buf[50], *ptr;
290
	XTThreadPtr	thread = xt_get_self(); 
291
292
	if ((ptr = const_cast<char *>(strchr(function, '(')))) {
293
		ptr--;
294
		while (ptr > function) {
295
			if (!(isalnum(*ptr) || *ptr == '_'))
296
				break;
297
			ptr--;
298
		}
299
		ptr++;
300
		xt_strcpy(50, func_buf, ptr);
301
		if ((ptr = strchr(func_buf, '(')))
302
			*ptr = 0;
303
	}
304
	else
305
		xt_strcpy(50, func_buf, function);
306
	if (table)
307
		printf("%s %s (%s)\n", thread ? thread->t_name : "-unknown-", func_buf, table);
308
	else
309
		printf("%s %s\n", thread ? thread->t_name : "-unknown-", func_buf);
310
}
311
#endif
312
313
/*
314
 * -----------------------------------------------------------------------
315
 * SHARED TABLE DATA
316
 *
317
 */
318
319
static xtBool ha_hash_comp(void *key, void *data)
320
{
321
	XTSharePtr	share = (XTSharePtr) data;
322
323
	return strcmp((char *) key, share->sh_table_path->ps_path) == 0;
324
}
325
326
static xtHashValue ha_hash(xtBool is_key, void *key_data)
327
{
328
	XTSharePtr	share = (XTSharePtr) key_data;
329
330
	if (is_key)
331
		return xt_ht_hash((char *) key_data);
332
	return xt_ht_hash(share->sh_table_path->ps_path);
333
}
334
335
static xtBool ha_hash_comp_ci(void *key, void *data)
336
{
337
	XTSharePtr	share = (XTSharePtr) data;
338
339
	return strcasecmp((char *) key, share->sh_table_path->ps_path) == 0;
340
}
341
342
static xtHashValue ha_hash_ci(xtBool is_key, void *key_data)
343
{
344
	XTSharePtr	share = (XTSharePtr) key_data;
345
346
	if (is_key)
347
		return xt_ht_casehash((char *) key_data);
348
	return xt_ht_casehash(share->sh_table_path->ps_path);
349
}
350
351
static void ha_open_share(XTThreadPtr self, XTShareRec *share)
352
{
353
	xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
354
	pushr_(xt_unlock_mutex, share->sh_ex_mutex);
355
356
	if (!share->sh_table) {
357
		share->sh_table = xt_use_table(self, share->sh_table_path, FALSE, FALSE);
358
		share->sh_dic_key_count = share->sh_table->tab_dic.dic_key_count;
359
		share->sh_dic_keys = share->sh_table->tab_dic.dic_keys;
360
		share->sh_recalc_selectivity = FALSE;
361
	}
362
363
	freer_(); // xt_ht_unlock(pbxt_share_tables)
364
}
365
366
static void ha_close_share(XTThreadPtr self, XTShareRec *share)
367
{
368
	XTTableHPtr tab;
369
370
	if ((tab = share->sh_table)) {
371
		/* Save this, in case the share is re-opened. */
372
		share->sh_min_auto_inc = tab->tab_auto_inc;
373
374
		xt_heap_release(self, tab);
375
		share->sh_table = NULL;
376
	}
377
378
	/* This are only references: */
379
	share->sh_dic_key_count = 0;
380
	share->sh_dic_keys = NULL;
381
}
382
383
static void ha_cleanup_share(XTThreadPtr self, XTSharePtr share)
384
{
385
	ha_close_share(self, share);
386
387
	if (share->sh_table_path) {
388
		xt_free(self, share->sh_table_path);
389
		share->sh_table_path = NULL;
390
	}
391
392
	if (share->sh_ex_cond) {
393
		thr_lock_delete(&share->sh_lock);
394
		xt_delete_cond(self, (xt_cond_type *) share->sh_ex_cond);
395
		share->sh_ex_cond = NULL;
396
	}
397
398
	if (share->sh_ex_mutex) {
399
		xt_delete_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
400
		share->sh_ex_mutex = NULL;
401
	}
402
403
	xt_free(self, share);
404
}
405
406
static void ha_hash_free(XTThreadPtr self, void *data)
407
{
408
	XTSharePtr	share = (XTSharePtr) data;
409
410
	ha_cleanup_share(self, share);
411
}
412
413
/*
414
 * This structure contains information that is common to all handles.
415
 * (i.e. it is table specific).
416
 */
417
static XTSharePtr ha_get_share(XTThreadPtr self, const char *table_path, bool open_table)
418
{
419
	XTShareRec	*share;
420
421
	enter_();
422
	xt_ht_lock(self, pbxt_share_tables);
423
	pushr_(xt_ht_unlock, pbxt_share_tables);
424
425
	// Check if the table exists...
426
	if (!(share = (XTSharePtr) xt_ht_get(self, pbxt_share_tables, (void *) table_path))) {
427
		share = (XTSharePtr) xt_calloc(self, sizeof(XTShareRec));		
428
		pushr_(ha_cleanup_share, share);
429
430
		share->sh_ex_mutex = (xt_mutex_type *) xt_new_mutex(self);
431
		share->sh_ex_cond = (xt_cond_type *) xt_new_cond(self);
432
433
		thr_lock_init(&share->sh_lock);
434
435
		share->sh_use_count = 0;
436
		share->sh_table_path = (XTPathStrPtr) xt_dup_string(self, table_path);
437
438
		if (open_table)
439
			ha_open_share(self, share);
440
441
		popr_(); // Discard ha_cleanup_share(share);
442
443
		xt_ht_put(self, pbxt_share_tables, share);
444
	}
445
446
	share->sh_use_count++;
447
	freer_(); // xt_ht_unlock(pbxt_share_tables)
448
449
	return_(share);
450
}
451
452
/*
453
 * Free shared information.
454
 */
455
static void ha_unget_share(XTThreadPtr self, XTSharePtr share)
456
{
457
	xt_ht_lock(self, pbxt_share_tables);
458
	pushr_(xt_ht_unlock, pbxt_share_tables);
459
460
	if (!--share->sh_use_count)
461
		xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
462
463
	freer_(); // xt_ht_unlock(pbxt_share_tables)
464
}
465
466
static xtBool ha_unget_share_removed(XTThreadPtr self, XTSharePtr share)
467
{
468
	xtBool removed = FALSE;
469
470
	xt_ht_lock(self, pbxt_share_tables);
471
	pushr_(xt_ht_unlock, pbxt_share_tables);
472
473
	if (!--share->sh_use_count) {
474
		removed = TRUE;
475
		xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
476
	}
477
478
	freer_(); // xt_ht_unlock(pbxt_share_tables)
479
	return removed;
480
}
481
482
static inline void thd_init_xact(THD *thd, XTThreadPtr self, bool set_table_trans)
483
{
484
	self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
485
	self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
486
	self->st_auto_commit = (thd_test_options(thd,(OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
487
	if (set_table_trans) {
488
#ifdef DRIZZLED
489
		self->st_table_trans = FALSE;
490
#else
491
		self->st_table_trans = thd_sql_command(thd) == SQLCOM_LOCK_TABLES;
492
#endif
493
	}
494
	self->st_abort_trans = FALSE;
495
	self->st_stat_ended = FALSE;
496
	self->st_stat_trans = FALSE;
1510.1.1 by Paul McCullagh
Merged with 1.1 trunk
497
	self->st_non_temp_updated = FALSE;
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
498
	XT_PRINT0(self, "xt_xn_begin\n");
499
	xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
500
}
501
502
/*
503
 * -----------------------------------------------------------------------
504
 * PUBLIC FUNCTIONS
505
 *
506
 */
507
508
xtPublic void xt_ha_unlock_table(XTThreadPtr self, void *share)
509
{
510
	ha_release_exclusive_use(self, (XTSharePtr) share);
511
	ha_unget_share(self, (XTSharePtr) share);
512
}
513
514
xtPublic void xt_ha_close_global_database(XTThreadPtr self)
515
{
516
	if (pbxt_database) {
517
		xt_heap_release(self, pbxt_database);
518
		pbxt_database = NULL;
519
	}
520
}
521
522
/*
523
 * Open a PBXT database given the path of a table.
524
 * This function also returns the name of the table.
525
 *
526
 * We use the pbxt_database_mutex to lock this
527
 * operation to make sure it does not occur while
528
 * some other thread is doing a "closeall".
529
 */
530
xtPublic void xt_ha_open_database_of_table(XTThreadPtr self, XTPathStrPtr XT_UNUSED(table_path))
531
{
532
#ifdef XT_USE_GLOBAL_DB
533
	if (!self->st_database) {
534
		if (!pbxt_database) {
535
			xt_open_database(self, mysql_real_data_home, TRUE);
536
			/* {GLOBAL-DB}
537
			 * This can be done at the same time as the recovery thread,
538
			 * strictly speaking I need a lock.
539
			 */
540
			if (!pbxt_database) {
541
				pbxt_database = self->st_database;
542
				xt_heap_reference(self, pbxt_database);
543
			}
544
		}
545
		else
546
			xt_use_database(self, pbxt_database, XT_FOR_USER);
547
	}
548
#else
549
	char db_path[PATH_MAX];
550
551
	xt_strcpy(PATH_MAX, db_path, (char *) table_path);
552
	xt_remove_last_name_of_path(db_path);
553
	xt_remove_dir_char(db_path);
554
555
	if (self->st_database && xt_tab_compare_paths(self->st_database->db_name, xt_last_name_of_path(db_path)) == 0)
556
		/* This thread already has this database open! */
557
		return;
558
559
	/* Auto commit before changing the database: */
560
	if (self->st_xact_data) {
561
		/* PMC - This probably indicates something strange is happening:
562
		 *
563
		 * This sequence generates this error:
564
		 *
565
		 * delimiter |
566
		 * 
567
		 * create temporary table t3 (id int)|
568
		 * 
569
		 * create function f10() returns int
570
		 * begin
571
		 *   drop temporary table if exists t3;
572
		 *   create temporary table t3 (id int) engine=myisam;
573
		 *   insert into t3 select id from t4;
574
		 *   return (select count(*) from t3);
575
		 * end|
576
		 * 
577
		 * select f10()|
578
		 *
579
		 * An error is generated because the same thread is used
580
		 * to open table t4 (at the start of the functions), and
581
		 * then to drop table t3. To drop t3 we need to
582
		 * switch the database, so we land up here!
583
		 */
584
		xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
585
		/*
586
		 if (!xt_xn_commit(self))
587
		 	throw_();
588
		 */
589
	}
590
591
	xt_lock_mutex(self, &pbxt_database_mutex);
592
	pushr_(xt_unlock_mutex, &pbxt_database_mutex);
593
	xt_open_database(self, db_path, FALSE);
594
	freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
595
#endif
596
}
597
598
xtPublic XTThreadPtr xt_ha_set_current_thread(THD *thd, XTExceptionPtr e)
599
{
600
	XTThreadPtr	self;
601
	static int	ha_thread_count = 0, ha_id;
602
603
#ifdef DRIZZLED
604
	if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
605
#else
606
	if (!(self = (XTThreadPtr) *thd_ha_data(thd, pbxt_hton))) {
607
#endif
608
//		const			Security_context *sctx;
609
		char			name[120];
610
		char			ha_id_str[50];
611
612
		ha_id = ++ha_thread_count;
613
		sprintf(ha_id_str, "_%d", ha_id);
614
		xt_strcpy(120,name,"user"); // TODO: Fix this hack
615
/*
616
		sctx = &thd->main_security_ctx;
617
618
		if (sctx->user) {
619
			xt_strcpy(120, name, sctx->user);
620
			xt_strcat(120, name, "@");
621
		}
622
		else
623
			*name = 0;
624
		if (sctx->host)
625
			xt_strcat(120, name, sctx->host);
626
		else if (sctx->ip)
627
			xt_strcat(120, name, sctx->ip);
628
		else if (thd->proc_info)
629
			xt_strcat(120, name, (char *) thd->proc_info);
630
		else
631
			xt_strcat(120, name, "system");
632
*/
633
		xt_strcat(120, name, ha_id_str);
634
		if (!(self = xt_create_thread(name, FALSE, TRUE, e)))
635
			return NULL;
636
637
		self->st_xact_mode = XT_XACT_REPEATABLE_READ;
638
#ifdef DRIZZLED
639
		*thd->getEngineData(pbxt_hton) = (void *) self;
640
#else
641
		*thd_ha_data(thd, pbxt_hton) = (void *) self;
642
#endif
643
	}
644
	return self;
645
}
646
647
xtPublic void xt_ha_close_connection(THD* thd)
648
{
649
	XTThreadPtr		self;
650
651
#ifdef DRIZZLED
652
	if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
653
	*thd->getEngineData(pbxt_hton) = NULL;
654
#else
655
	if ((self = (XTThreadPtr) *thd_ha_data(thd, pbxt_hton))) {
656
		*thd_ha_data(thd, pbxt_hton) = NULL;
657
#endif
658
		xt_free_thread(self);
659
	}
660
}
661
662
xtPublic XTThreadPtr xt_ha_thd_to_self(THD *thd)
663
{
664
#ifdef DRIZZLED
665
	return (XTThreadPtr) *thd->getEngineData(pbxt_hton);
666
#else
667
	return (XTThreadPtr) *thd_ha_data(thd, pbxt_hton);
668
#endif
669
}
670
1455.3.5 by Vladimir Kolesnikov
fixed fetch field count in select (all fields are selected for now)
671
#ifndef DRIZZLED
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
672
/* The first bit is 1. */
673
static u_int ha_get_max_bit(MX_BITMAP *map)
674
{
675
#ifdef DRIZZLED
676
        uint32_t	cnt = map->numOfBitsInMap();
677
	uint32_t 	max_bit = 0;
678
679
	for (uint32_t i = 0; i < cnt; i++)
680
		if (map->isBitSet(i))
1455.3.5 by Vladimir Kolesnikov
fixed fetch field count in select (all fields are selected for now)
681
			max_bit = i+1;
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
682
683
	return max_bit;
684
#else
685
	my_bitmap_map	*data_ptr = map->bitmap;
686
	my_bitmap_map	*end_ptr = map->last_word_ptr;
687
	u_int		cnt = map->n_bits;
688
	my_bitmap_map	b;
689
	
690
	for (; end_ptr >= data_ptr; end_ptr--) {
691
		if ((b = *end_ptr)) {
692
			my_bitmap_map mask;
693
			
694
			if (end_ptr == map->getLastWordPtr() && map->getLastWordMask())
695
				mask = map->getLastWordMask() >> 1;
696
			else
697
				mask = 0x80000000;
698
			while (!(b & mask)) {
699
				b = b << 1;
700
				/* Should not happen, but if it does, we hang! */
701
				if (!b)
702
					return map->numOfBitsInMap();
703
				cnt--;
704
			}
705
			return cnt;
706
		}
707
		if (end_ptr == map->getLastWordPtr())
708
			cnt = ((cnt-1) / 32) * 32;
709
		else
710
			cnt -= 32;
711
	}
712
	return 0;
713
#endif
714
}
1455.3.5 by Vladimir Kolesnikov
fixed fetch field count in select (all fields are selected for now)
715
#endif
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
716
717
/*
718
 * -----------------------------------------------------------------------
719
 * SUPPORT FUNCTIONS
720
 *
721
 */
722
723
/*
724
 * In PBXT, as in MySQL: thread == connection.
725
 *
726
 * So we simply attach a PBXT thread to a MySQL thread.
727
 */
728
static XTThreadPtr ha_set_current_thread(THD *thd, int *err)
729
{
730
	XTThreadPtr		self;
731
	XTExceptionRec	e;
732
733
	if (!(self = xt_ha_set_current_thread(thd, &e))) {
734
		xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
735
		*err = e.e_xt_err;
736
		return NULL;
737
	}
738
	return self;
739
}
740
741
xtPublic int xt_ha_pbxt_to_mysql_error(int xt_err)
742
{
743
	switch (xt_err) {
744
		case XT_NO_ERR:
745
			return(0);
746
		case XT_ERR_DUPLICATE_KEY:
747
				return HA_ERR_FOUND_DUPP_KEY;
748
		case XT_ERR_DEADLOCK:
749
				return HA_ERR_LOCK_DEADLOCK;
750
		case XT_ERR_RECORD_CHANGED:
751
			/* If we generate HA_ERR_RECORD_CHANGED instead of HA_ERR_LOCK_WAIT_TIMEOUT
752
			 * then sysbench does not work because it does not handle this error.
753
			 */
754
			//return HA_ERR_LOCK_WAIT_TIMEOUT; // but HA_ERR_RECORD_CHANGED is the correct error for a optimistic lock failure.
755
			return HA_ERR_RECORD_CHANGED;
756
		case XT_ERR_LOCK_TIMEOUT:
757
			return HA_ERR_LOCK_WAIT_TIMEOUT;
758
		case XT_ERR_TABLE_IN_USE:
759
				return HA_ERR_WRONG_COMMAND;
760
		case XT_ERR_TABLE_NOT_FOUND:
761
			return HA_ERR_NO_SUCH_TABLE;
762
		case XT_ERR_TABLE_EXISTS:
763
			return HA_ERR_TABLE_EXIST;
764
		case XT_ERR_CANNOT_CHANGE_DB:
765
			return ER_TRG_IN_WRONG_SCHEMA;
766
		case XT_ERR_COLUMN_NOT_FOUND:
767
			return HA_ERR_CANNOT_ADD_FOREIGN;
768
		case XT_ERR_NO_REFERENCED_ROW:
769
		case XT_ERR_REF_TABLE_NOT_FOUND:
770
		case XT_ERR_REF_TYPE_WRONG:
771
			return HA_ERR_NO_REFERENCED_ROW;
772
		case XT_ERR_ROW_IS_REFERENCED:
773
			return HA_ERR_ROW_IS_REFERENCED;
774
		case XT_ERR_COLUMN_IS_NOT_NULL:
775
		case XT_ERR_INCORRECT_NO_OF_COLS:
776
		case XT_ERR_FK_ON_TEMP_TABLE:
777
		case XT_ERR_FK_REF_TEMP_TABLE:
778
			return HA_ERR_CANNOT_ADD_FOREIGN;
779
		case XT_ERR_DUPLICATE_FKEY:
780
			return HA_ERR_FOREIGN_DUPLICATE_KEY;
781
		case XT_ERR_RECORD_DELETED:
782
			return HA_ERR_RECORD_DELETED;
783
	}
784
	return(-1);			// Unknown error
785
}
786
1510.1.5 by Paul McCullagh
Actually, the function name was just changed
787
xtPublic int xt_ha_pbxt_thread_error_for_mysql(THD *thd, const XTThreadPtr self, int ignore_dup_key)
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
788
{
789
	int		xt_err = self->t_exception.e_xt_err;
790
	xtBool	dup_key = FALSE;
791
792
	XT_PRINT2(self, "xt_ha_pbxt_thread_error_for_mysql xt_err=%d auto commit=%d\n", (int) xt_err, (int) self->st_auto_commit);
793
	switch (xt_err) {
794
		case XT_NO_ERR:
795
			break;
796
		case XT_ERR_DUPLICATE_KEY:
797
		case XT_ERR_DUPLICATE_FKEY:
798
			/* Let MySQL call rollback as and when it wants to for duplicate
799
			 * key.
800
			 *
801
			 * In addition, we are not allowed to do an auto-rollback
802
			 * inside a sub-statement (function() or procedure())
803
			 * For example:
804
			 * 
805
			 * delimiter |
806
			 *
807
			 * create table t3 (c1 char(1) primary key not null)|
808
			 * 
809
			 * create function bug12379()
810
			 *   returns integer
811
			 * begin
812
			 *    insert into t3 values('X');
813
			 *    insert into t3 values('X');
814
			 *    return 0;
815
			 * end|
816
			 * 
817
			 * --error 1062
818
			 * select bug12379()|
819
			 *
820
			 *
821
			 * Not doing an auto-rollback should solve this problem in the
822
			 * case of duplicate key (but not in others - like deadlock)!
823
			 * I don't think this situation is handled correctly by MySQL.
824
			 */
825
826
			/* If we are in auto-commit mode (and we are not ignoring
827
			 * duplicate keys) then rollback the transaction automatically.
828
			 */
829
			dup_key = TRUE;
830
			if (!ignore_dup_key && self->st_auto_commit)
831
				goto abort_transaction;
832
			break;
833
		case XT_ERR_DEADLOCK:
834
		case XT_ERR_NO_REFERENCED_ROW:
835
		case XT_ERR_ROW_IS_REFERENCED:
836
			goto abort_transaction;
837
		case XT_ERR_RECORD_CHANGED:
838
			/* MySQL also handles the locked error. NOTE: There is no automatic
839
			 * rollback!
840
			 */
841
			break;
842
		default:
843
			xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
844
			abort_transaction:
845
			/* PMC 2006-08-30: It should be that this is not necessary!
846
			 *
847
			 * It is only necessary to call ha_rollback() if the engine
848
			 * aborts the transaction.
849
			 *
850
			 * On the other hand, I shouldn't need to rollback the
851
			 * transaction because, if I return an error, MySQL
852
			 * should do it for me.
853
			 *
854
			 * Unfortunately, when auto-commit is off, MySQL does not
855
			 * rollback automatically (for example when a deadlock
856
			 * is provoked).
857
			 *
858
			 * And when we have a multi update we cannot rely on this
859
			 * either (see comment above).
860
			 */
861
			if (self->st_xact_data) {
862
				/*
863
				 * GOTCHA:
864
				 * A result of the "st_abort_trans = TRUE" below is that
865
				 * the following code results in an empty set.
866
				 * The reason is "ignore_dup_key" is not set so
867
				 * the duplicate key leads to an error which causes
868
				 * the transaction to be aborted.
869
				 * The delayed inserts are all execute in one transaction.
870
				 * 
871
				 * CREATE TABLE t1 (
872
				 * c1 INT(11) NOT NULL AUTO_INCREMENT,
873
				 * c2 INT(11) DEFAULT NULL,
874
				 * PRIMARY KEY (c1)
875
				 * );
876
				 * SET insert_id= 14;
877
				 * INSERT DELAYED INTO t1 VALUES(NULL, 11), (NULL, 12);
878
				 * INSERT DELAYED INTO t1 VALUES(14, 91);
879
				 * INSERT DELAYED INTO t1 VALUES (NULL, 92), (NULL, 93);
880
				 * FLUSH TABLE t1;
881
				 * SELECT * FROM t1;
882
				 */
883
				if (self->st_lock_count == 0) {
884
					/* No table locks, must rollback immediately
885
					 * (there will be no possibility later!
886
					 */
887
					XT_PRINT1(self, "xt_xn_rollback xt_err=%d\n", xt_err);
888
					if (!xt_xn_rollback(self))
889
						xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
890
				}
891
				else {
892
					/* Locks are held on tables.
893
					 * Only rollback after locks are released.
894
					 */
895
					/* I do not think this is required, because
896
					 * I tell mysql to rollback below, 
897
					 * besides it is a hack!
898
					 self->st_auto_commit = TRUE;
899
					 */
900
					self->st_abort_trans = TRUE;
901
				}
1510.1.5 by Paul McCullagh
Actually, the function name was just changed
902
				/* Only tell MySQL to rollback if we automatically rollback.
903
				 * Note: calling this with (thd, FALSE), cause sp.test to fail.
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
904
				 */
1510.1.5 by Paul McCullagh
Actually, the function name was just changed
905
				if (!dup_key) {
906
					if (thd)
907
						thd_mark_transaction_to_rollback(thd, TRUE);
908
				}
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
909
			}
910
			break;
911
	}
912
	return xt_ha_pbxt_to_mysql_error(xt_err);
913
}
914
915
static void ha_conditional_close_database(XTThreadPtr self, XTThreadPtr other_thr, void *db)
916
{
917
	if (other_thr->st_database == (XTDatabaseHPtr) db)
918
		xt_unuse_database(self, other_thr);
919
}
920
921
/*
922
 * This is only called from drop database, so we know that
923
 * no thread is actually using the database. This means that it
924
 * must be safe to close the database.
925
 */
926
xtPublic void xt_ha_all_threads_close_database(XTThreadPtr self, XTDatabaseHPtr db)
927
{
928
	xt_lock_mutex(self, &pbxt_database_mutex);
929
	pushr_(xt_unlock_mutex, &pbxt_database_mutex);
930
	xt_do_to_all_threads(self, ha_conditional_close_database, db);
931
	freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
932
}
933
934
static int ha_log_pbxt_thread_error_for_mysql(int ignore_dup_key)
935
{
936
	return xt_ha_pbxt_thread_error_for_mysql(current_thd, myxt_get_self(), ignore_dup_key);
937
}
938
939
/*
940
 * -----------------------------------------------------------------------
941
 * STATIC HOOKS
942
 *
943
 */
944
static xtWord8 ha_set_variable(char **value, HAVarParamsPtr vp)
945
{
946
	xtWord8	result;
947
	xtWord8	mi, ma;
948
	char	*mm;
949
950
	if (!*value)
951
		*value = getenv(vp->vp_var);
952
	if (!*value)
953
		*value = (char *) vp->vp_def;
954
	result = xt_byte_size_to_int8(*value);
955
	mi = (xtWord8) xt_byte_size_to_int8(vp->vp_min);
956
	if (result < mi) {
957
		result = mi;
958
		*value = (char *) vp->vp_min;
959
	}
960
	if (sizeof(size_t) == 8)
961
		mm = (char *) vp->vp_max8;
962
	else
963
		mm = (char *) vp->vp_max4;
964
	ma = (xtWord8) xt_byte_size_to_int8(mm);
965
	if (result > ma) {
966
		result = ma;
967
		*value = mm;
968
	}
969
	return result;
970
}
971
972
static void pbxt_call_init(XTThreadPtr self)
973
{
974
	xtInt8	index_cache_size;
975
	xtInt8	record_cache_size;
976
	xtInt8	log_cache_size;
977
	xtInt8	log_file_threshold;
978
	xtInt8	transaction_buffer_size;
979
	xtInt8	log_buffer_size;
980
	xtInt8	checkpoint_frequency;
981
	xtInt8	data_log_threshold;
982
	xtInt8	data_file_grow_size;
983
	xtInt8	row_file_grow_size;
984
	xtInt8	record_write_threshold;
985
986
	xt_logf(XT_NT_INFO, "PrimeBase XT (PBXT) Engine %s loaded...\n", xt_get_version());
987
	xt_logf(XT_NT_INFO, "Paul McCullagh, PrimeBase Technologies GmbH, http://www.primebase.org\n");
988
989
	index_cache_size = ha_set_variable(&pbxt_index_cache_size, &vp_index_cache_size);
990
	record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
991
	log_cache_size = ha_set_variable(&pbxt_log_cache_size, &vp_log_cache_size);
992
	log_file_threshold = ha_set_variable(&pbxt_log_file_threshold, &vp_log_file_threshold);
993
	transaction_buffer_size = ha_set_variable(&pbxt_transaction_buffer_size, &vp_transaction_buffer_size);
994
	log_buffer_size = ha_set_variable(&pbxt_log_buffer_size, &vp_log_buffer_size);
995
	checkpoint_frequency = ha_set_variable(&pbxt_checkpoint_frequency, &vp_checkpoint_frequency);
996
	data_log_threshold = ha_set_variable(&pbxt_data_log_threshold, &vp_data_log_threshold);
997
	data_file_grow_size = ha_set_variable(&pbxt_data_file_grow_size, &vp_data_file_grow_size);
998
	row_file_grow_size = ha_set_variable(&pbxt_row_file_grow_size, &vp_row_file_grow_size);
999
	record_write_threshold = ha_set_variable(&pbxt_record_write_threshold, &vp_record_write_threshold);
1000
1001
	xt_db_log_file_threshold = (xtLogOffset) log_file_threshold;
1002
	xt_db_log_buffer_size = (size_t) xt_align_offset(log_buffer_size, 512);
1003
	xt_db_transaction_buffer_size = (size_t) xt_align_offset(transaction_buffer_size, 512);
1004
	xt_db_checkpoint_frequency = (size_t) checkpoint_frequency;
1005
	xt_db_data_log_threshold = (off_t) data_log_threshold;
1006
	xt_db_data_file_grow_size = (size_t) data_file_grow_size;
1007
	xt_db_row_file_grow_size = (size_t) row_file_grow_size;
1008
	xt_db_record_write_threshold = (size_t) record_write_threshold;
1009
1010
#ifdef DRIZZLED
1011
	pbxt_ignore_case = TRUE;
1012
#else
1013
	pbxt_ignore_case = lower_case_table_names != 0;
1014
#endif
1015
	if (pbxt_ignore_case)
1016
		pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp_ci, ha_hash_ci, ha_hash_free, TRUE, FALSE);
1017
	else
1018
		pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp, ha_hash, ha_hash_free, TRUE, FALSE);
1019
1020
	xt_fs_init(self);
1021
	xt_lock_installation(self, mysql_real_data_home);
1022
	XTSystemTableShare::startUp(self);
1023
	xt_init_databases(self);
1024
	xt_ind_init(self, (size_t) index_cache_size);
1025
	xt_tc_init(self, (size_t) record_cache_size);
1026
	xt_xlog_init(self, (size_t) log_cache_size);
1027
}
1028
1029
static void pbxt_call_exit(XTThreadPtr self)
1030
{
1031
	xt_logf(XT_NT_INFO, "PrimeBase XT Engine shutdown...\n");
1032
1033
#ifdef TRACE_STATEMENTS
1034
	xt_dump_trace();
1035
#endif
1036
#ifdef XT_USE_GLOBAL_DB
1037
	xt_ha_close_global_database(self);
1038
#endif
1039
#ifdef DEBUG
1040
	//xt_stop_database_threads(self, FALSE);
1041
	xt_stop_database_threads(self, TRUE);
1042
#else
1043
	xt_stop_database_threads(self, TRUE);
1044
#endif
1045
	/* This will tell the freeer to quit ASAP: */
1046
	xt_quit_freeer(self);
1047
	/* We conditional stop the freeer here, because if we are
1048
	 * in startup, then the free will be hanging.
1049
	 * {FREEER-HANG}
1050
	 *
1051
	 * This problem has been solved by MySQL!
1052
	 */
1053
	xt_stop_freeer(self);
1054
	xt_exit_databases(self);
1055
	XTSystemTableShare::shutDown(self);
1056
	xt_xlog_exit(self);
1057
	xt_tc_exit(self);
1058
	xt_ind_exit(self);
1059
	xt_unlock_installation(self, mysql_real_data_home);
1060
	xt_fs_exit(self);
1061
	if (pbxt_share_tables) {
1062
		xt_free_hashtable(self, pbxt_share_tables);
1063
		pbxt_share_tables = NULL;
1064
	}
1065
}
1066
1067
/*
1068
 * Shutdown the PBXT sub-system.
1069
 */
1070
static void ha_exit(XTThreadPtr self)
1071
{
1072
	xt_xres_terminate_recovery(self);
1073
1074
	/* Wrap things up... */
1075
	xt_unuse_database(self, self);	/* Just in case the main thread has a database in use (for testing)? */
1076
	/* This may cause the streaming engine to cleanup connections and 
1077
	 * tables belonging to this engine. This in turn may require some of
1078
	 * the stuff below (like xt_create_thread() called from pbxt_close_table()! */
1079
#ifdef PBMS_ENABLED
1080
	pbms_finalize();
1081
#endif
1082
	pbxt_call_exit(self);
1083
	xt_exit_threading(self);
1084
	xt_exit_memory();
1085
	xt_exit_logging();
1086
	xt_p_mutex_destroy(&pbxt_database_mutex);		
1087
	pbxt_inited = false;
1088
}
1089
1090
/*
1091
 * Outout the PBXT status. Return FALSE on error.
1092
 */
1093
#ifdef DRIZZLED
1094
bool PBXTStorageEngine::show_status(Session *thd, stat_print_fn *stat_print, enum ha_stat_type)
1095
#else
1096
static bool pbxt_show_status(handlerton *XT_UNUSED(hton), THD* thd, 
1097
                          stat_print_fn* stat_print,
1098
                          enum ha_stat_type XT_UNUSED(stat_type))
1099
#endif
1100
{
1101
	XTThreadPtr			self;	
1102
	int					err = 0;
1103
	XTStringBufferRec	strbuf = { 0, 0, 0 };
1104
	bool				not_ok = FALSE;
1105
1106
	if (!(self = ha_set_current_thread(thd, &err)))
1107
		return FALSE;
1108
1109
#ifdef XT_SHOW_DUMPS_TRACE
1110
	//if (pbxt_database)
1111
	//	xt_dump_xlogs(pbxt_database, 0);
1112
	xt_trace("// %s - dump\n", xt_trace_clock_diff(NULL));
1113
	xt_dump_trace();
1114
#endif
1115
#ifdef XT_TRACK_CONNECTIONS
1116
	xt_dump_conn_tracking();
1117
#endif
1118
1119
#ifdef XT_UNIT_TEST
1120
	xt_unit_test_async_task(self);
1121
#endif
1122
1123
	try_(a) {
1124
		myxt_get_status(self, &strbuf);
1125
	}
1126
	catch_(a) {
1127
		not_ok = TRUE;
1128
	}
1129
	cont_(a);
1130
1131
	if (!not_ok) {
1132
		if (stat_print(thd, "PBXT", 4, "", 0, strbuf.sb_cstring, (uint) strbuf.sb_len))
1133
			not_ok = TRUE;
1134
	}
1135
	xt_sb_set_size(self, &strbuf, 0);
1136
1137
	return not_ok;
1138
}
1139
1140
/*
1141
 * Initialize the PBXT sub-system.
1142
 *
1143
 * return 1 on error, else 0.
1144
 */
1145
#ifdef DRIZZLED
1146
static int pbxt_init(Context &registry)
1147
#else
1148
static int pbxt_init(void *p)
1149
#endif
1150
{
1151
	int init_err = 0;
1152
1153
	XT_PRINT0(NULL, "pbxt_init\n");
1154
1155
	if (sizeof(xtWordPS) != sizeof(void *)) {
1156
		printf("PBXT: This won't work, I require that sizeof(xtWordPS) == sizeof(void *)!\n");
1157
		XT_RETURN(1);
1158
	}
1159
1160
	/* GOTCHA: This will "detect" if are loading the plug-in
1161
	 * with different --with-debug option to MySQL.
1162
	 *
1163
	 * In this case, you will get an error when loading the
1164
	 * library that some symbol was not found.
1165
	 */
1166
	void *dummy = my_malloc(100, MYF(0));
1167
	my_free((byte *) dummy, MYF(0));
1168
1169
 	if (!pbxt_inited) {
1170
		XTThreadPtr self = NULL;
1171
1172
 		xt_p_mutex_init_with_autoname(&pbxt_database_mutex, NULL);
1173
1174
#ifdef DRIZZLED
1175
		pbxt_hton= new PBXTStorageEngine(std::string("PBXT"));
1176
		registry.add(pbxt_hton);
1177
#else
1178
		pbxt_hton = (handlerton *) p;
1179
		pbxt_hton->state = SHOW_OPTION_YES;
1180
		pbxt_hton->db_type = DB_TYPE_PBXT; // Wow! I have my own!
1181
		pbxt_hton->close_connection = pbxt_close_connection; /* close_connection, cleanup thread related data. */
1182
		pbxt_hton->commit = pbxt_commit; /* commit */
1183
		pbxt_hton->rollback = pbxt_rollback; /* rollback */
1184
		if (pbxt_support_xa) {
1185
			pbxt_hton->prepare = pbxt_prepare;
1186
			pbxt_hton->recover = pbxt_recover;
1187
			pbxt_hton->commit_by_xid = pbxt_commit_by_xid;
1188
			pbxt_hton->rollback_by_xid = pbxt_rollback_by_xid;
1189
		}
1190
		else {
1191
			pbxt_hton->prepare = NULL;
1192
			pbxt_hton->recover = NULL;
1193
			pbxt_hton->commit_by_xid = NULL;
1194
			pbxt_hton->rollback_by_xid = NULL;
1195
		}
1196
		pbxt_hton->create = pbxt_create_handler; /* Create a new handler */
1197
		pbxt_hton->drop_database = pbxt_drop_database; /* Drop a database */
1198
		pbxt_hton->panic = pbxt_panic; /* Panic call */
1199
		pbxt_hton->show_status = pbxt_show_status;
1200
		pbxt_hton->flags = HTON_NO_FLAGS; /* HTON_CAN_RECREATE - Without this flags TRUNCATE uses delete_all_rows() */
1201
		pbxt_hton->slot = (uint)-1; /* assign invald value, so we know when it's inited later */
1202
		pbxt_hton->start_consistent_snapshot = pbxt_start_consistent_snapshot;
1203
#if defined(MYSQL_SUPPORTS_BACKUP) && defined(XT_ENABLE_ONLINE_BACKUP)
1204
		pbxt_hton->get_backup_engine = pbxt_backup_engine;
1205
#endif
1206
#endif
1207
		if (!xt_init_logging())					/* Initialize logging */
1208
			goto error_1;
1209
1210
#ifdef PBMS_ENABLED
1211
		PBMSResultRec result;
1212
		if (!pbms_initialize("PBXT", false, &result)) {
1213
			xt_logf(XT_NT_ERROR, "pbms_initialize() Error: %s", result.mr_message);
1214
			goto error_2;
1215
		}
1216
#endif
1217
1218
		if (!xt_init_memory())					/* Initialize memory */
1219
			goto error_3;
1220
1221
		self = xt_init_threading();				/* Create the main self: */
1222
		if (!self)
1223
			goto error_3;
1224
1225
 		pbxt_inited = true;
1226
1227
		try_(a) {
1228
			/* Initialize all systems */
1229
			pbxt_call_init(self);
1230
1231
			/* Conditional unit test: */
1232
#ifdef XT_UNIT_TEST
1233
			//xt_unit_test_create_threads(self);
1234
			//xt_unit_test_read_write_locks(self);
1235
			//xt_unit_test_mutex_locks(self);
1236
#endif
1237
1238
			/* {OPEN-DB-SWEEPER-WAIT}
1239
			 * I have to start the freeer before I open and recover the database
1240
			 * because it we run out of cache while waiting for the sweeper
1241
			 * we will hang!
1242
			 */
1243
			xt_start_freeer(self);
1244
1245
			/* This function is called with LOCK_plugin locked.
1246
			 * This prevents the opening of .frm files, which
1247
			 * is required for recovery.
1248
			 * Our solution is to start reovery in a thread
1249
			 * so that it can run after LOCK_plugin is released.
1250
			 */
1251
			xt_xres_start_database_recovery(self);
1252
		}
1253
		catch_(a) {
1254
			xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
1255
			init_err = 1;
1256
		}
1257
		cont_(a);
1258
1259
		if (init_err) {
1260
			/* {FREEER-HANG} The free-er will be hung in:
1261
				#0	0x91fc6a2e in semaphore_wait_signal_trap
1262
				#1	0x91fce505 in pthread_mutex_lock
1263
				#2	0x00489633 in safe_mutex_lock at thr_mutex.c:149
1264
				#3	0x002dfca9 in plugin_thdvar_init at sql_plugin.cc:2398
1265
				#4	0x000d6a12 in THD::init at sql_class.cc:715
1266
				#5	0x000de9d3 in THD::THD at sql_class.cc:597
1267
				#6	0x000debe1 in THD::THD at sql_class.cc:631
1268
				#7	0x00e207a4 in myxt_create_thread at myxt_xt.cc:2666
1269
				#8	0x00e3134b in tabc_fr_run_thread at tabcache_xt.cc:982
1270
				#9	0x00e422ca in xt_thread_main at thread_xt.cc:1006
1271
				#10	0x91ff7c55 in _pthread_start
1272
				#11	0x91ff7b12 in thread_start
1273
			 *
1274
			 * so it is not good trying to stop it here!
1275
			 *
1276
			 * With regard to this problem, see {OPEN-DB-SWEEPER-WAIT}
1277
			 * Due to this problem, I will probably have to hack
1278
			 * the mutex so that the freeer can get started...
1279
			 *
1280
			 * NOPE! problem has gone in 6.0.9. Also not a problem in
1281
			 * 5.1.29.
1282
			 */
1283
			
1284
			/* {OPEN-DB-SWEEPER-WAIT} 
1285
			 * I have to stop the freeer here because it was
1286
			 * started before opening the database.
1287
			 */
1288
1289
			/* {FREEER-HANG-ON-INIT-ERROR}
1290
			 * pbxt_init is called with LOCK_plugin and if it fails and tries to exit
1291
			 * the freeer here it hangs because the freeer calls THD::~THD which tries
1292
			 * to aquire the same lock and hangs. OTOH MySQL calls pbxt_end() after
1293
			 * an unsuccessful call to pbxt_init, so we defer cleaup, except 
1294
			 * releasing 'self'
1295
			 */
1296
			xt_free_thread(self);
1297
			goto error_3;
1298
		}
1299
		xt_free_thread(self);
1300
 	}
1301
	XT_RETURN(init_err);
1302
1303
	error_3:
1304
#ifdef PBMS_ENABLED
1305
	pbms_finalize();
1306
1307
	error_2:
1308
#endif
1309
1310
	error_1:
1311
	XT_RETURN(1);
1312
}
1313
1314
static int pbxt_end(void *)
1315
{
1316
	XTThreadPtr		self;
1317
	int				err = 0;
1318
1319
	XT_TRACE_CALL();
1320
1321
	if (pbxt_inited) {
1322
		XTExceptionRec	e;
1323
1324
		/* This flag also means "shutting down". */
1325
		pbxt_inited = false; 
1326
		self = xt_create_thread("TempForEnd", FALSE, TRUE, &e);
1327
		if (self) {
1328
			self->t_main = TRUE;
1329
			ha_exit(self);
1330
		}
1331
	}
1332
1333
	XT_RETURN(err);
1334
}
1335
1457.1.1 by Brian Aker
Merge PBXT.
1336
PBXTStorageEngine::~PBXTStorageEngine()
1337
{
1338
  pbxt_end(NULL);
1339
}
1340
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
1341
#ifndef DRIZZLED
1342
static int pbxt_panic(handlerton *hton, enum ha_panic_function flag)
1343
{
1344
	return pbxt_end(hton);
1345
}
1346
#endif
1347
1348
/*
1349
 * Kill the PBXT thread associated with the MySQL thread.
1350
 */
1351
#ifdef DRIZZLED
1352
int PBXTStorageEngine::close_connection(Session *thd)
1353
{
1354
	PBXTStorageEngine * const hton = this;
1355
#else
1356
static int pbxt_close_connection(handlerton *hton, THD* thd)
1357
{
1358
#endif
1359
	XTThreadPtr		self;
1360
1361
	XT_TRACE_CALL();
1362
#ifdef DRIZZLED
1363
	if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1364
		*thd->getEngineData(pbxt_hton) = NULL;
1365
#else
1366
	if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1367
                *thd_ha_data(thd, hton) = NULL;
1368
#endif
1369
		/* Required because freeing the thread could cause
1370
		 * free of database which could call xt_close_file_ns()!
1371
		 */
1372
		xt_set_self(self);
1373
		xt_free_thread(self);
1374
	}
1375
	return 0;
1376
}
1377
1378
/*
1379
 * Currently does nothing because it was all done
1380
 * when the last PBXT table was removed from the 
1381
 * database.
1382
 */
1383
#ifdef DRIZZLED
1384
void PBXTStorageEngine::drop_database(char *)
1385
#else
1386
static void pbxt_drop_database(handlerton *XT_UNUSED(hton), char *XT_UNUSED(path))
1387
#endif
1388
{
1389
	XT_TRACE_CALL();
1390
}
1391
1392
/*
1393
 * NOTES ON TRANSACTIONS:
1394
 *
1395
 * 1. If self->st_lock_count == 0 and transaction can be ended immediately.
1396
 *    If not, we must wait until the last lock is released on the last handler
1397
 *    to ensure that the tables are flushed before the transaction is
1398
 *    committed or aborted.
1399
 *
1400
 * 2. all (below) indicates, within a BEGIN/END (i.e. auto_commit off) whether
1401
 *    the statement or the entire transation is being terminated.
1402
 *    We currently ignore statement termination.
1403
 * 
1404
 * 3. If in BEGIN/END we must call ha_rollback() if we abort the transaction
1405
 *    internally.
1406
 *
1407
 * NOTE ON CONSISTENT SNAPSHOTS:
1408
 * 
1409
 * PBXT itself doesn't need this functiona as its transaction mechanism provides
1410
 * consistent snapshots for all transactions by default. This function is needed
1411
 * only for multi-engine cases like this:
1412
 *
1413
 * CREATE TABLE t1 ... ENGINE=INNODB
1414
 * CREATE TABLE t2 ... ENGINE=PBXT
1415
 * START TRANSACTION WITH CONSISTENT SNAPSHOT
1416
 * SELECT * FROM t1 <-- at this point we need to know about the snapshot
1417
 */
1418
1419
#ifndef DRIZZLED
1420
static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd)
1421
{
1422
	int err          = 0;
1423
	XTThreadPtr self = ha_set_current_thread(thd, &err);
1424
1425
	if (!self->st_database && pbxt_database) {
1426
		xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1427
	}
1428
1429
	thd_init_xact(thd, self, true);
1430
1431
	if (xt_xn_begin(self)) {
1432
		trans_register_ha(thd, TRUE, hton);	
1433
	} else {
1434
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1435
	}
1436
1437
	/*
1438
	 * As of MySQL 5.1.41 the return value is not checked, so the server might assume 
1439
	 * everything is fine even it isn't. InnoDB returns 0 on success.
1440
	 */
1441
	return err;
1442
}
1443
#endif
1444
1445
/*
1446
 * Commit the PBXT transaction of the given thread.
1447
 * thd is the MySQL thread structure.
1448
 * pbxt_thr is a pointer the the PBXT thread structure.
1449
 *
1450
 */
1451
#ifdef DRIZZLED
1452
int PBXTStorageEngine::commit(Session *thd, bool all)
1453
{
1454
	PBXTStorageEngine * const hton = this;
1455
#else
1456
static int pbxt_commit(handlerton *hton, THD *thd, bool all)
1457
{
1458
#endif
1459
	int			err = 0;
1460
	XTThreadPtr	self;
1461
1462
#ifdef DRIZZLED
1463
	if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1464
#else
1465
	if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1466
#endif
1467
		XT_PRINT2(self, "%s pbxt_commit all=%d\n", all ? "END CONN XACT" : "END STAT", all);
1468
1469
		if (self->st_xact_data) {
1470
			/* There are no table locks, commit immediately in all cases
1471
			 * except when this is a statement commit with an explicit
1472
			 * transaction (!all && !self->st_auto_commit).
1473
			 */
1474
			if (all || self->st_auto_commit) {
1475
				XT_PRINT0(self, "xt_xn_commit in pbxt_commit\n");
1476
1477
				if (!xt_xn_commit(self))
1478
					err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1479
			}
1480
		}
1481
		if (!all)
1482
			self->st_stat_trans = FALSE;
1483
	}
1484
	return err;
1485
}
1486
1487
#ifdef DRIZZLED
1488
int PBXTStorageEngine::rollback(Session *thd, bool all)
1489
{
1490
	PBXTStorageEngine * const hton = this;
1491
#else
1492
static int pbxt_rollback(handlerton *hton, THD *thd, bool all)
1493
{
1494
#endif
1495
	int			err = 0;
1496
	XTThreadPtr	self;
1497
1498
#ifdef DRIZZLED
1499
        if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1500
#else
1501
	if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1502
#endif
1503
		XT_PRINT2(self, "%s pbxt_rollback all=%d\n", all ? "CONN END XACT" : "STAT END", all);
1504
1505
		if (self->st_xact_data) {
1506
			/* There are no table locks, rollback immediately in all cases
1507
			 * except when this is a statement commit with an explicit
1508
			 * transaction (!all && !self->st_auto_commit).
1509
			 *
1510
			 * Note, the only reason for a rollback of a operation is
1511
			 * due to an error. In this case PBXT has already
1512
			 * undone the effects of the operation.
1513
			 *
1514
			 * However, this is not the same as statement rollback
1515
			 * which can involve a number of operations.
1516
			 *
1517
			 * TODO: Implement statement rollback.
1518
			 */
1519
			if (all || self->st_auto_commit) {
1520
				XT_PRINT0(self, "xt_xn_rollback\n");
1521
				if (!xt_xn_rollback(self))
1522
					err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1523
			}
1524
		}
1525
		if (!all)
1526
			self->st_stat_trans = FALSE;
1527
	}
1528
	return 0;
1529
}
1530
1531
#ifdef DRIZZLED
1532
Cursor *PBXTStorageEngine::create(TableShare& table, memory::Root *mem_root)
1533
{
1534
	PBXTStorageEngine * const hton = this;
1535
	if (XTSystemTableShare::isSystemTable(table.path.str))
1536
#else
1537
static handler *pbxt_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root)
1538
{
1539
	if (table && XTSystemTableShare::isSystemTable(table->path.str))
1540
#endif
1541
		return new (mem_root) ha_xtsys(hton, table);
1542
	else
1543
		return new (mem_root) ha_pbxt(hton, table);
1544
}
1545
1546
/*
1547
 * -----------------------------------------------------------------------
1548
 * 2-PHASE COMMIT
1549
 *
1550
 */
1551
1552
#ifndef DRIZZLED
1553
1554
static int pbxt_prepare(handlerton *hton, THD *thd, bool all)
1555
{
1556
	int			err = 0;
1557
	XTThreadPtr	self;
1558
1559
	XT_TRACE_CALL();
1560
	if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1561
		XT_PRINT1(self, "pbxt_commit all=%d\n", all);
1562
1563
		if (self->st_xact_data) {
1564
			/* There are no table locks, commit immediately in all cases
1565
			 * except when this is a statement commit with an explicit
1566
			 * transaction (!all && !self->st_auto_commit).
1567
			 */
1568
			if (all || self->st_auto_commit) {
1569
				XID xid;
1570
1571
				XT_PRINT0(self, "xt_xn_prepare in pbxt_prepare\n");
1572
				thd_get_xid(thd, (MYSQL_XID*) &xid);
1573
1574
				if (!xt_xn_prepare(xid.length(), (xtWord1 *) &xid, self))
1575
					err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1576
			}
1577
		}
1578
	}
1579
	return err;
1580
}
1581
1582
static XTThreadPtr ha_temp_open_global_database(handlerton *hton, THD **ret_thd, int *temp_thread, char *thread_name, int *err)
1583
{
1584
	THD			*thd;
1585
	XTThreadPtr	self = NULL;
1586
1587
	*temp_thread = 0;
1588
	if ((thd = current_thd))
1589
		self = (XTThreadPtr) *thd_ha_data(thd, hton);
1590
	else {
1591
		//thd = (THD *) myxt_create_thread();
1592
		//*temp_thread |= 2;
1593
	}
1594
1595
	if (!self) {
1596
		XTExceptionRec e;
1597
1598
		if (!(self = xt_create_thread(thread_name, FALSE, TRUE, &e))) {
1599
			*err = xt_ha_pbxt_to_mysql_error(e.e_xt_err);
1600
			xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1601
			return NULL;
1602
		}
1603
		*temp_thread |= 1;
1604
	}
1605
1606
	xt_xres_wait_for_recovery(self, XT_RECOVER_DONE);
1607
1608
	try_(a) {
1609
		xt_open_database(self, mysql_real_data_home, TRUE);
1610
	}
1611
	catch_(a) {
1612
		*err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1613
		if ((*temp_thread & 1))
1614
			xt_free_thread(self);
1615
		if (*temp_thread & 2)
1616
			myxt_destroy_thread(thd, FALSE);
1617
		self = NULL;
1618
	}
1619
	cont_(a);
1620
1621
	*ret_thd = thd;
1622
	return self;
1623
}
1624
1625
static void ha_temp_close_database(XTThreadPtr self, THD *thd, int temp_thread)
1626
{
1627
	xt_unuse_database(self, self);
1628
	if (temp_thread & 1)
1629
		xt_free_thread(self);
1630
	if (temp_thread & 2)
1631
		myxt_destroy_thread(thd, TRUE);
1632
}
1633
1634
/* Return all prepared transactions, found during recovery.
1635
 * This function returns a count. If len is returned, the
1636
 * function will be called again.
1637
 */
1638
static int pbxt_recover(handlerton *hton, XID *xid_list, uint len)
1639
{
1640
	xtBool				temp_thread;
1641
	XTThreadPtr			self;
1642
	XTDatabaseHPtr		db;
1643
	uint				count = 0;
1644
	XTXactPreparePtr	xap;
1645
	int					err;
1646
	THD					*thd;
1647
1648
	if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRecover", &err)))
1649
		return 0;
1650
1651
	db = self->st_database;
1652
1653
	for (count=0; count<len; count++) {
1654
		xap = xt_xn_enum_xa_data(db, &pbxt_xa_enum);
1655
		if (!xap)
1656
			break;
1657
		memcpy(&xid_list[count], xap->xp_xa_data, xap->xp_data_len);
1658
	}
1659
1660
	ha_temp_close_database(self, thd, temp_thread);
1661
	return (int) count;
1662
}
1663
1664
static int pbxt_commit_by_xid(handlerton *hton, XID *xid)
1665
{
1666
	xtBool				temp_thread;
1667
	XTThreadPtr			self;
1668
	XTDatabaseHPtr		db;
1669
	int					err = 0;
1670
	XTXactPreparePtr	xap;
1671
	THD					*thd;
1672
1673
	XT_TRACE_CALL();
1674
1675
	if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForCommitXA", &err)))
1676
		return err;
1677
	db = self->st_database;
1678
1679
	if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1680
		if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1681
			self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED;  // Prepared transactions cannot be swept!
1682
			if (!xt_xn_commit(self))
1683
				err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1684
		}
1685
		xt_xn_delete_xa_data(db, xap, TRUE, self);
1686
	}
1687
1688
	ha_temp_close_database(self, thd, temp_thread);
1689
	return 0;
1690
}
1691
1692
static int pbxt_rollback_by_xid(handlerton *hton, XID *xid)
1693
{
1694
	int					temp_thread;
1695
	XTThreadPtr			self;
1696
	XTDatabaseHPtr		db;
1697
	int					err = 0;
1698
	XTXactPreparePtr	xap;
1699
	THD					*thd;
1700
1701
	XT_TRACE_CALL();
1702
1703
	if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRollbackXA", &err)))
1704
		return err;
1705
	db = self->st_database;
1706
1707
	if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1708
		if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1709
			self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED;  // Prepared transactions cannot be swept!
1710
			if (!xt_xn_rollback(self))
1711
				err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1712
		}
1713
		xt_xn_delete_xa_data(db, xap, TRUE, self);
1714
	}
1715
1716
	ha_temp_close_database(self, thd, temp_thread);
1717
	return 0;
1718
}
1719
1720
#endif
1721
1722
/*
1723
 * -----------------------------------------------------------------------
1724
 * HANDLER LOCKING FUNCTIONS
1725
 *
1726
 * These functions are used get a lock on all handles of a particular table.
1727
 *
1728
 */
1729
1730
static void ha_add_to_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1731
{
1732
	xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1733
	pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1734
1735
	handler->pb_ex_next = share->sh_handlers;
1736
	handler->pb_ex_prev = NULL;
1737
	if (share->sh_handlers)
1738
		share->sh_handlers->pb_ex_prev = handler;
1739
	share->sh_handlers = handler;
1740
1741
	freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1742
}
1743
1744
static void ha_remove_from_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1745
{
1746
	xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1747
	pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1748
1749
	/* Move front pointer: */
1750
	if (share->sh_handlers == handler)
1751
		share->sh_handlers = handler->pb_ex_next;
1752
1753
	/* Remove from list: */
1754
	if (handler->pb_ex_prev)
1755
		handler->pb_ex_prev->pb_ex_next = handler->pb_ex_next;
1756
	if (handler->pb_ex_next)
1757
		handler->pb_ex_next->pb_ex_prev = handler->pb_ex_prev;
1758
1759
	freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1760
}
1761
1762
/*
1763
 * Aquire exclusive use of a table, by waiting for all
1764
 * threads to complete use of all handlers of the table.
1765
 * At the same time we hold up all threads
1766
 * that want to use handlers belonging to the table.
1767
 *
1768
 * But we do not hold up threads that close the handlers.
1769
 */
1770
static void ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1771
{
1772
	ha_pbxt	*handler;
1773
	time_t	end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1774
1775
	XT_PRINT1(self, "ha_aquire_exclusive_use (%s) PBXT X lock\n", share->sh_table_path->ps_path);
1776
	/* GOTCHA: It is possible to hang here, if you hold
1777
	 * onto the sh_ex_mutex lock, before we really
1778
	 * have the exclusive lock (i.e. before all
1779
	 * handlers are no longer in use.
1780
	 * The reason is, because reopen() is not possible
1781
	 * when some other thread holds sh_ex_mutex.
1782
	 * So this can prevent a thread from completing its
1783
	 * use of a handler, when prevents exclusive use
1784
	 * here.
1785
	 */
1786
	xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1787
	pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1788
1789
	/* Wait until we can get an exclusive lock: */
1790
	while (share->sh_table_lock) {
1791
		xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1792
		if (time(NULL) > end_time) {
1793
			freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1794
			xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1795
		}
1796
	}
1797
1798
	/* This tells readers (and other exclusive lockers) that someone has an exclusive lock. */
1799
	share->sh_table_lock = TRUE;
1800
	
1801
	/* Wait for all open handlers use count to go to 0 */	
1802
	retry:
1803
	handler = share->sh_handlers;
1804
	while (handler) {
1805
		if (handler == mine || !handler->pb_ex_in_use)
1806
			handler = handler->pb_ex_next;
1807
		else {
1808
			/* Wait a bit, and try again: */
1809
			xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1810
			if (time(NULL) > end_time) {
1811
				freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1812
				xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1813
			}
1814
			/* Handler may have been freed, check from the begining again: */
1815
			goto retry;
1816
		}
1817
	}
1818
1819
	freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1820
}
1821
1822
/*
1823
 * If you have exclusively locked the table, you can close all handler
1824
 * open tables.
1825
 *
1826
 * Call ha_close_open_tables() to get an exclusive lock.
1827
 */
1828
static void ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1829
{
1830
	ha_pbxt *handler;
1831
1832
	xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1833
	pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1834
1835
	/* Now that we know no handler is in use, we can close all the
1836
	 * open tables...
1837
	 */
1838
	handler = share->sh_handlers;
1839
	while (handler) {
1840
		if (handler != mine && handler->pb_open_tab) {
1841
			xt_db_return_table_to_pool_ns(handler->pb_open_tab);
1842
			handler->pb_open_tab = NULL;
1843
		}
1844
		handler = handler->pb_ex_next;
1845
	}
1846
1847
	freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1848
}
1849
1850
#ifdef PBXT_ALLOW_PRINTING
1851
static void ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share)
1852
#else
1853
static void ha_release_exclusive_use(XTThreadPtr XT_UNUSED(self), XTSharePtr share)
1854
#endif
1855
{
1856
	XT_PRINT1(self, "ha_release_exclusive_use (%s) PBXT X UNLOCK\n", share->sh_table_path->ps_path);
1857
	xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1858
	share->sh_table_lock = FALSE;
1859
	xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1860
	xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1861
}
1862
1863
static xtBool ha_wait_for_shared_use(ha_pbxt *mine, XTSharePtr share)
1864
{
1865
	time_t	end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1866
1867
	XT_PRINT1(xt_get_self(), "ha_wait_for_shared_use (%s) share lock wait...\n", share->sh_table_path->ps_path);
1868
	mine->pb_ex_in_use = 0;
1869
	xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1870
	while (share->sh_table_lock) {
1871
		/* Wake up the exclusive locker (may be waiting). He can try to continue: */
1872
		xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1873
1874
		if (!xt_timed_wait_cond(NULL, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT)) {
1875
			xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1876
			return FAILED;
1877
		}
1878
1879
		if (time(NULL) > end_time) {
1880
			xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1881
			xt_register_taberr(XT_REG_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1882
			return FAILED;
1883
		}
1884
	}
1885
	mine->pb_ex_in_use = 1;
1886
	xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1887
	return OK;
1888
}
1889
1890
xtPublic int ha_pbxt::reopen()
1891
{
1892
	THD				*thd = current_thd;
1893
	int				err = 0;
1894
	XTThreadPtr		self;	
1895
1896
	if (!(self = ha_set_current_thread(thd, &err)))
1897
		return xt_ha_pbxt_to_mysql_error(err);
1898
1899
	try_(a) {
1900
		xt_ha_open_database_of_table(self, pb_share->sh_table_path);
1901
1902
		ha_open_share(self, pb_share);
1903
1904
		if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
1905
			xt_throw(self);
1906
		pb_open_tab->ot_thread = self;
1907
1908
		/* {TABLE-STATS}
1909
		 * We no longer use the information that a table
1910
		 * was opened in order to know when to calculate
1911
		 * statistics.
1912
		 */
1913
		if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
1914
#ifdef LOAD_TABLE_ON_OPEN
1915
			xt_tab_load_table(self, pb_open_tab);
1916
#else
1917
			xt_tab_load_row_pointers(self, pb_open_tab);
1918
#endif
1919
			xt_ind_set_index_selectivity(pb_open_tab, self);
1920
			/* If the number of rows is less than 150 we will recalculate the
1921
			 * selectity of the indices, as soon as the number of rows
1922
			 * exceeds 200 (see [**])
1923
			 */
1924
			/* {FREE-ROWS-BAD} */
1925
			pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
1926
		}
1927
1928
		/* I am not doing this anymore because it was only required
1929
		 * for DELETE FROM table;, which is now implemented
1930
		 * by deleting each row.
1931
		 * TRUNCATE TABLE does not preserve the counter value.
1932
		 */
1933
		//init_auto_increment(pb_share->sh_min_auto_inc);
1934
		init_auto_increment(0);
1935
	}
1936
	catch_(a) {
1937
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
1938
	}
1939
	cont_(a);
1940
	
1941
	return err;
1942
}
1943
1944
/*
1945
 * -----------------------------------------------------------------------
1946
 * INFORMATION SCHEMA FUNCTIONS
1947
 *
1948
 */
1949
#ifdef DRI_IS
1950
static int pbxt_statistics_fill_table(THD *thd, TABLE_LIST *tables, COND *cond)
1951
{
1952
	XTThreadPtr		self = NULL;	
1953
	int				err = 0;
1954
1955
	if (!pbxt_hton) {
1956
		/* Can't do if PBXT is not loaded! */
1957
		XTExceptionRec	e;
1958
1959
		xt_exception_xterr(&e, XT_CONTEXT, XT_ERR_PBXT_NOT_INSTALLED);
1960
		xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1961
		/* Just return an empty set: */
1962
		return 0;
1963
	}
1964
1965
	if (!(self = ha_set_current_thread(thd, &err)))
1966
		return xt_ha_pbxt_to_mysql_error(err);
1967
1968
1969
	try_(a) {
1970
		/* If the thread has no open database, and the global
1971
		 * database is already open, then open
1972
		 * the database. Otherwise the statement will be
1973
		 * executed without an open database, which means
1974
		 * that the related statistics will be missing.
1975
		 *
1976
		 * This includes all background threads.
1977
		 */
1978
		if (!self->st_database && pbxt_database) {
1979
			xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1980
		}
1981
1982
		err = myxt_statistics_fill_table(self, thd, tables, cond, system_charset_info);
1983
	}
1984
	catch_(a) {
1985
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1986
	}
1987
	cont_(a);
1988
	return err;
1989
}
1990
#endif // DRI_IS
1991
1992
#ifdef DRIZZLED
1993
#ifdef DRI_IS
1994
ColumnInfo pbxt_statistics_fields_info[]=
1995
{
1996
	ColumnInfo("ID", 4, MYSQL_TYPE_LONG,  0, 0, "The ID of the statistic", SKIP_OPEN_TABLE),
1997
        ColumnInfo("Name", 40, MYSQL_TYPE_STRING, 0, 0, "The name of the statistic", SKIP_OPEN_TABLE),
1998
        ColumnInfo("Value", 8, MYSQL_TYPE_LONGLONG, 0, 0, "The accumulated value", SKIP_OPEN_TABLE),
1999
	ColumnInfo()
2000
};
2001
2002
class PBXTStatisticsMethods : public InfoSchemaMethods
2003
{
2004
public:
2005
  int fillTable(Session *session, TableList *tables, COND *cond)
2006
  {
2007
        return pbxt_statistics_fill_table(session, tables, cond);
2008
  }
2009
};
2010
#endif // DRI_IS
2011
#else
2012
ST_FIELD_INFO pbxt_statistics_fields_info[]=
2013
{
2014
	{ "ID",		4,	MYSQL_TYPE_LONG,		0, 0, "The ID of the statistic", SKIP_OPEN_TABLE},
2015
	{ "Name",	40, MYSQL_TYPE_STRING,		0, 0, "The name of the statistic", SKIP_OPEN_TABLE},
2016
	{ "Value",	8,	MYSQL_TYPE_LONGLONG,	0, 0, "The accumulated value", SKIP_OPEN_TABLE},
2017
	{ 0,		0,	MYSQL_TYPE_STRING,		0, 0, 0, SKIP_OPEN_TABLE}
2018
};
2019
#endif
2020
2021
#ifdef DRIZZLED
2022
#ifdef DRI_IS
2023
static InfoSchemaTable	*pbxt_statistics_table;
2024
static PBXTStatisticsMethods pbxt_statistics_methods;
2025
static int pbxt_init_statistics(Registry &registry)
2026
{
2027
        //pbxt_statistics_table = (InfoSchemaTable *)xt_calloc_ns(sizeof(InfoSchemaTable));
2028
        //pbxt_statistics_table->table_name= "PBXT_STATISTICS";
2029
        pbxt_statistics_table = new InfoSchemaTable("PBXT_STATISTICS");
2030
        pbxt_statistics_table->setColumnInfo(pbxt_statistics_fields_info);
2031
        pbxt_statistics_table->setInfoSchemaMethods(&pbxt_statistics_methods);
2032
        registry.add(pbxt_statistics_table);
2033
        return 0;
2034
}
2035
#endif // DRI_IS
2036
#else  // DRIZZLED
2037
static int pbxt_init_statistics(void *p)
2038
{
2039
	ST_SCHEMA_TABLE *pbxt_statistics_table = (ST_SCHEMA_TABLE *) p;
2040
	pbxt_statistics_table->fields_info = pbxt_statistics_fields_info;
2041
	pbxt_statistics_table->fill_table = pbxt_statistics_fill_table;
2042
2043
#if defined(XT_WIN) && defined(XT_COREDUMP)
2044
	void register_crash_filter();
2045
2046
	if (pbxt_crash_debug)
2047
		register_crash_filter();
2048
#endif
2049
	return 0;
2050
}
2051
#endif
2052
2053
#ifdef DRIZZLED
2054
#ifdef DRI_IS
2055
static int pbxt_exit_statistics(Registry &registry)
2056
        registry.remove(pbxt_statistics_table);
2057
        delete pbxt_statistics_table;
2058
        return(0);
2059
}
2060
#endif // DRI_IS
2061
#else  // DRIZZLED
2062
static int pbxt_exit_statistics(void *XT_UNUSED(p))
2063
{
2064
	return(0);
2065
}
2066
#endif	// DRIZZLED
2067
2068
/*
2069
 * -----------------------------------------------------------------------
2070
 * DYNAMIC HOOKS
2071
 *
2072
 */
2073
2074
#ifdef DRIZZLED
2075
ha_pbxt::ha_pbxt(handlerton *hton, TableShare& table_arg) : handler(*hton, table_arg)
2076
#else
2077
ha_pbxt::ha_pbxt(handlerton *hton, TABLE_SHARE *table_arg) : handler(hton, table_arg)
2078
#endif
2079
{
2080
	pb_share = NULL;
2081
	pb_open_tab = NULL;
2082
	pb_key_read = FALSE;
2083
	pb_ignore_dup_key = 0;
2084
	pb_lock_table = FALSE;
2085
	pb_table_locked = 0;
2086
	pb_ex_next = NULL;
2087
	pb_ex_prev = NULL;
2088
	pb_ex_in_use = 0;
2089
	pb_in_stat = FALSE;
2090
}
2091
2092
/*
2093
 * If frm_error() is called then we will use this to to find out what file extentions
2094
 * exist for the storage engine. This is also used by the default rename_table and
2095
 * delete_table method in handler.cc.
2096
 */
2097
#ifdef DRIZZLED
2098
const char **PBXTStorageEngine::bas_ext() const
2099
#else
2100
const char **ha_pbxt::bas_ext() const
2101
#endif
2102
{
2103
	return pbxt_extensions;
2104
}
2105
2106
/*
2107
 * Specify the caching type: HA_CACHE_TBL_NONTRANSACT, HA_CACHE_TBL_NOCACHE
2108
 * HA_CACHE_TBL_ASKTRANSACT, HA_CACHE_TBL_TRANSACT
2109
 */
2110
MX_UINT8_T ha_pbxt::table_cache_type()
2111
{
2112
	return HA_CACHE_TBL_TRANSACT; /* Use transactional query cache */
2113
}
2114
2115
#ifndef DRIZZLED
2116
MX_TABLE_TYPES_T ha_pbxt::table_flags() const
2117
{
2118
	return (
2119
		/* We need this flag because records are not packed
2120
		 * into a table which means #ROWID != offset
2121
		 */
2122
		HA_REC_NOT_IN_SEQ |
2123
		/* Since PBXT caches read records itself, I believe
2124
		 * this to be the case.
2125
		 */
2126
		HA_FAST_KEY_READ |
2127
		/*
2128
		 * I am assuming a "key" means a unique index.
2129
		 * Of course a primary key does not allow nulls.
2130
		 */
2131
		HA_NULL_IN_KEY |
2132
		/*
2133
		 * This is necessary because a MySQL blob can be
2134
		 * fairly small.
2135
		 */
2136
		HA_CAN_INDEX_BLOBS |
2137
		/*
2138
		 * Due to transactional influences, this will be
2139
		 * the case.
2140
		 * Although the count is good enough for practical
2141
		 * purposes!
2142
		HA_NOT_EXACT_COUNT |
2143
		 */
2144
#ifndef DRIZZLED
2145
		/*
2146
		 * This basically means we have a file with the name of
2147
		 * database table (which we do).
2148
		 */
2149
		HA_FILE_BASED |
2150
#endif
2151
		/*
2152
		 * Not sure what this does (but MyISAM and InnoDB have it)?!
2153
		 * Could it mean that we support the handler functions.
2154
		 */
2155
		HA_CAN_SQL_HANDLER |
2156
		/*
2157
		 * This is not true, we cannot insert delayed, but a
2158
		 * really cannot see what's wrong with inserting normally
2159
		 * when asked to insert delayed!
2160
		 * And the functionallity is required to pass the alter_table
2161
		 * test.
2162
		 *
2163
		 * Disabled because of MySQL bug #40505
2164
		 */
2165
		/*HA_CAN_INSERT_DELAYED |*/
2166
#if MYSQL_VERSION_ID > 50119
2167
		/* We can do row logging, but not statement, because
2168
		 * MVCC is not serializable!
2169
		 */
2170
		HA_BINLOG_ROW_CAPABLE |
2171
#endif
2172
		/*
2173
		 * Auto-increment is allowed on a partial key.
2174
		 */
2175
		HA_AUTO_PART_KEY);
2176
}
2177
#endif
2178
2179
/*
2180
 * The following query from the DBT1 test is VERY slow
2181
 * if we do not set HA_READ_ORDER.
2182
 * The reason is that it must scan all duplicates, then
2183
 * sort.
2184
 *
2185
 * SELECT o_id, o_carrier_id, o_entry_d, o_ol_cnt
2186
 * FROM orders FORCE INDEX (o_w_id)
2187
 * WHERE o_w_id = 2
2188
   * AND o_d_id = 1
2189
   * AND o_c_id = 500
2190
 * ORDER BY o_id DESC limit 1;
2191
 *
2192
 */
2193
#define FLAGS_ARE_READ_DYNAMICALLY
2194
2195
MX_ULONG_T ha_pbxt::index_flags(uint XT_UNUSED(inx), uint XT_UNUSED(part), bool XT_UNUSED(all_parts)) const
2196
{
2197
	/* It would be nice if the dynamic version of this function works,
2198
	 * but it does not. MySQL loads this information when the table is openned,
2199
	 * and then it is fixed.
2200
	 *
2201
	 * The problem is, I have had to remove the HA_READ_ORDER option although
2202
	 * it applies to PBXT. PBXT returns entries in index order during an index
2203
	 * scan in _almost_ all cases.
2204
	 *
2205
	 * A number of cases are demostrated here: [(11)]
2206
	 *
2207
	 * If involves the following conditions:
2208
	 * - a SELECT FOR UPDATE, UPDATE or DELETE statement
2209
	 * - an ORDER BY, or join that requires the sort order
2210
	 * - another transaction which updates the index while it is being
2211
	 *   scanned.
2212
	 *
2213
	 * In this "obscure" case, the index scan may return index
2214
	 * entries in the wrong order.
2215
	 */
2216
#ifdef FLAGS_ARE_READ_DYNAMICALLY
2217
	/* If were are in an update (SELECT FOR UPDATE, UPDATE or DELETE), then
2218
	 * it may be that we return the rows from an index in the wrong
2219
	 * order! This is due to the fact that update reads wait for transactions
2220
	 * to commit and this means that index entries may change position during
2221
	 * the scan!
2222
	 */
2223
	if (pb_open_tab && pb_open_tab->ot_for_update)
2224
		return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
2225
	/* If I understand HA_KEYREAD_ONLY then this means I do not
2226
	 * need to fetch the record associated with an index
2227
	 * key.
2228
	 */
2229
	return (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_READ_RANGE | HA_KEYREAD_ONLY);
2230
#else
2231
	return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
2232
#endif
2233
}
2234
2235
void ha_pbxt::internal_close(THD *thd, struct XTThread *self)
2236
{
2237
	if (pb_share) {
2238
		xtBool			removed;
2239
		XTOpenTablePtr	ot;
2240
2241
		try_(a) {
2242
			/* This lock must be held when we remove the handler's
2243
			 * open table because ha_close_open_tables() can run
2244
			 * concurrently.
2245
			 */
2246
			xt_lock_mutex_ns(pb_share->sh_ex_mutex);
2247
			if ((ot = pb_open_tab)) {
2248
				pb_open_tab->ot_thread = self;
2249
				if (self->st_database != pb_open_tab->ot_table->tab_db)
2250
					xt_ha_open_database_of_table(self, pb_share->sh_table_path);
2251
				pb_open_tab = NULL;
2252
				pushr_(xt_db_return_table_to_pool, ot);
2253
			}
2254
			xt_unlock_mutex_ns(pb_share->sh_ex_mutex);
2255
2256
			ha_remove_from_handler_list(self, pb_share, this);
2257
2258
			/* Someone may be waiting for me to complete: */
2259
			xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2260
2261
			removed = ha_unget_share_removed(self, pb_share);
2262
2263
			if (ot) {
2264
				/* Flush the table if this was the last handler: */
2265
				/* This is not necessary but has the affect that
2266
				 * FLUSH TABLES; does a checkpoint!
2267
				 */
2268
				if (removed) {
2269
					/* GOTCHA:
2270
					 * This was killing performance as the number of threads increased!
2271
					 *
2272
					 * When MySQL runs out of table handlers because the table
2273
					 * handler cache is too small, it starts to close handlers.
2274
					 * (open_cache.records > table_cache_size)
2275
					 *
2276
					 * Which can lead to closing all handlers for a particular table.
2277
					 *
2278
					 * It does this while holding lock_OPEN!
2279
					 * So this code below leads to a sync operation while lock_OPEN
2280
					 * is held. The result is that the whole server comes to a stop.
2281
					 */
2282
					if (!thd || thd_sql_command(thd) == SQLCOM_FLUSH) // FLUSH TABLES
2283
						xt_sync_flush_table(self, ot, thd ? 0 : 4);
2284
				}
2285
				freer_(); // xt_db_return_table_to_pool(ot);
2286
			}
2287
		}
2288
		catch_(a) {
2289
			xt_log_and_clear_exception(self);
2290
		}
2291
		cont_(a);
2292
2293
		pb_share = NULL;
2294
	}
2295
}
2296
2297
/*
2298
 * Used for opening tables. The name will be the name of the file.
2299
 * A table is opened when it needs to be opened. For instance
2300
 * when a request comes in for a select on the table (tables are not
2301
 * open and closed for each request, they are cached).
2302
2303
 * Called from handler.cc by handler::ha_open(). The server opens all tables by
2304
 * calling ha_open() which then calls the handler specific open().
2305
 */
2306
int ha_pbxt::open(const char *table_path, int XT_UNUSED(mode), uint XT_UNUSED(test_if_locked))
2307
{
2308
	THD			*thd = current_thd;
2309
	int			err = 0;
2310
	XTThreadPtr	self;
2311
2312
	ref_length = XT_RECORD_OFFS_SIZE;
2313
2314
	if (!(self = ha_set_current_thread(thd, &err)))
2315
		return xt_ha_pbxt_to_mysql_error(err);
2316
2317
	XT_PRINT1(self, "open (%s)\n", table_path);
2318
2319
	pb_ex_in_use = 1;
2320
	try_(a) {
2321
		xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
2322
2323
		pb_share = ha_get_share(self, table_path, false);
2324
		ha_add_to_handler_list(self, pb_share, this);
2325
		if (pb_share->sh_table_lock) {
2326
			if (!ha_wait_for_shared_use(this, pb_share))
2327
				xt_throw(self);
2328
		}
2329
2330
		ha_open_share(self, pb_share);
2331
2332
		thr_lock_data_init(&pb_share->sh_lock, &pb_lock, NULL);
2333
		if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
2334
			xt_throw(self);
2335
		pb_open_tab->ot_thread = self;
2336
2337
		/* {TABLE-STATS} */
2338
		if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
2339
#ifdef LOAD_TABLE_ON_OPEN
2340
			xt_tab_load_table(self, pb_open_tab);
2341
#else
2342
			xt_tab_load_row_pointers(self, pb_open_tab);
2343
#endif
2344
			xt_ind_set_index_selectivity(pb_open_tab, self);
2345
			/* {FREE-ROWS-BAD} */
2346
			pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
2347
		}
2348
2349
		init_auto_increment(0);
2350
	}
2351
	catch_(a) {
2352
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2353
		internal_close(thd, self);
2354
	}
2355
	cont_(a);
2356
2357
	if (!err)
2358
		info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2359
2360
	pb_ex_in_use = 0;
2361
	if (pb_share) {
2362
		/* Someone may be waiting for me to complete: */
2363
		if (pb_share->sh_table_lock)
2364
			xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2365
	}
2366
	return err;
2367
}
2368
2369
2370
/*
2371
	Closes a table. We call the free_share() function to free any resources
2372
	that we have allocated in the "shared" structure.
2373
2374
	Called from sql_base.cc, sql_select.cc, and table.cc.
2375
	In sql_select.cc it is only used to close up temporary tables or during
2376
	the process where a temporary table is converted over to being a
2377
	myisam table.
2378
	For sql_base.cc look at close_data_tables().
2379
*/
2380
int ha_pbxt::close(void)
2381
{
2382
	THD						*thd = current_thd;
2383
	volatile int			err = 0;
2384
	volatile XTThreadPtr	self;
2385
2386
	if (thd)
2387
		self = ha_set_current_thread(thd, (int *) &err);
2388
	else {
2389
		XTExceptionRec e;
2390
2391
		if (!(self = xt_create_thread("TempForClose", FALSE, TRUE, &e))) {
2392
			xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
2393
			return 0;
2394
		}
2395
	}
2396
2397
	XT_PRINT1(self, "close (%s)\n", pb_share && pb_share->sh_table_path->ps_path ? pb_share->sh_table_path->ps_path : "unknown");
2398
2399
	if (self) {
2400
		try_(a) {
2401
			internal_close(thd, self);
2402
		}
2403
		catch_(a) {
2404
			err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2405
		}
2406
		cont_(a);
2407
2408
		if (!thd)
2409
			xt_free_thread(self);
2410
	}
2411
	else
2412
		xt_log(XT_NS_CONTEXT, XT_LOG_WARNING, "Unable to release table reference\n");
2413
		
2414
	return err;
2415
}
2416
2417
void ha_pbxt::init_auto_increment(xtWord8 min_auto_inc)
2418
{
2419
	XTTableHPtr	tab;
2420
	xtWord8		nr = 0;
2421
	int			err;
2422
2423
	/* Get the value of the auto-increment value by
2424
	 * loading the highest value from the index...
2425
	 */
2426
	tab = pb_open_tab->ot_table;
2427
2428
	/* Cannot do this if the index version is bad! */
2429
	if (tab->tab_dic.dic_disable_index)
2430
		return;
2431
2432
	xt_spinlock_lock(&tab->tab_ainc_lock);
2433
	if (table->found_next_number_field && !tab->tab_auto_inc) {
2434
		Field		*tmp_fie = table->next_number_field;
2435
		THD			*tmp_thd = table->in_use;
2436
		xtBool		xn_started = FALSE;
2437
		XTThreadPtr	self = pb_open_tab->ot_thread;
2438
2439
//#ifndef DRIZZLED
2440
		/*
2441
		 * A table may be opened by a thread with a running
2442
		 * transaction!
2443
		 * Since get_auto_increment() does not do an update,
2444
		 * it should be OK to use the transaction we already
2445
		 * have to get the next auto-increment value.
2446
		 */
2447
		if (!self->st_xact_data) {
2448
			self->st_xact_mode = XT_XACT_REPEATABLE_READ;
2449
			self->st_ignore_fkeys = FALSE;
2450
			self->st_auto_commit = TRUE;
2451
			self->st_table_trans = FALSE;
2452
			self->st_abort_trans = FALSE;
2453
			self->st_stat_ended = FALSE;
2454
			self->st_stat_trans = FALSE;
2455
			self->st_is_update = NULL;
2456
			if (!xt_xn_begin(self)) {
2457
				xt_spinlock_unlock(&tab->tab_ainc_lock);
2458
				xt_throw(self);
2459
			}
2460
			xn_started = TRUE;
2461
		}
2462
//#endif
2463
		/* Setup the conditions for the next call! */
2464
		table->in_use = current_thd;
2465
		table->next_number_field = table->found_next_number_field;
2466
2467
		extra(HA_EXTRA_KEYREAD);
2468
		table->mark_columns_used_by_index_no_reset(TS(table)->next_number_index, table->read_set);
2469
		column_bitmaps_signal();
2470
 		index_init(TS(table)->next_number_index, 0);
2471
		if (!TS(table)->next_number_key_offset) {
2472
			// Autoincrement at key-start
2473
			err = index_last(table->record[1]);
2474
			if (!err && !table->next_number_field->is_null(TS(table)->rec_buff_length)) {
2475
				/* {PRE-INC} */
2476
				nr = (xtWord8) table->next_number_field->val_int_offset(TS(table)->rec_buff_length);
2477
			}
2478
		}
2479
		else {
2480
			/* Do an index scan to find the largest value! */
2481
			/* The standard method will not work because it forces
2482
			 * us to lock that table!
2483
			 */
2484
			xtWord8 val;
2485
2486
			err = index_first(table->record[1]);
2487
			while (!err) {
2488
				/* {PRE-INC} */
2489
				val = (xtWord8) table->next_number_field->val_int_offset(TS(table)->rec_buff_length);
2490
				if (val > nr)
2491
					nr = val;
2492
				err = index_next(table->record[1]);
2493
			}
2494
		}
2495
2496
		index_end();
2497
		extra(HA_EXTRA_NO_KEYREAD);
2498
2499
		/* {PRE-INC}
2500
		 * I have changed this from post increment to pre-increment!
2501
		 * The reason is:
2502
		 * When using post increment we are not able to return
2503
		 * the last valid value in the range.
2504
		 *
2505
		 * Here the test example:
2506
		 *
2507
		 * drop table if exists t1;
2508
		 * create table t1 (i tinyint unsigned not null auto_increment primary key) engine=pbxt;
2509
		 * insert into t1 set i = 254;
2510
		 * insert into t1 set i = null;
2511
		 *
2512
		 * With post-increment, this last insert fails because on post increment
2513
		 * the value overflows!
2514
		 *
2515
		 * Pre-increment means we store the current max, and increment
2516
		 * before returning the next value.
2517
		 *
2518
		 * This will work in this situation.
2519
		 */
2520
		tab->tab_auto_inc = nr;
2521
		if (tab->tab_auto_inc < tab->tab_dic.dic_min_auto_inc)
2522
			tab->tab_auto_inc = tab->tab_dic.dic_min_auto_inc-1;
2523
		if (tab->tab_auto_inc < min_auto_inc)
2524
			tab->tab_auto_inc = min_auto_inc-1;
2525
2526
		/* Restore the changed values: */
2527
		table->next_number_field = tmp_fie;
2528
		table->in_use = tmp_thd;
2529
2530
		if (xn_started) {
2531
			XT_PRINT0(self, "xt_xn_commit in init_auto_increment\n");
2532
			xt_xn_commit(self);
2533
		}
2534
	}
2535
	xt_spinlock_unlock(&tab->tab_ainc_lock);
2536
}
2537
2538
void ha_pbxt::get_auto_increment(MX_ULONGLONG_T offset, MX_ULONGLONG_T increment,
2539
                                 MX_ULONGLONG_T XT_UNUSED(nb_desired_values),
2540
                                 MX_ULONGLONG_T *first_value,
2541
                                 MX_ULONGLONG_T *nb_reserved_values)
2542
{
2543
	register XTTableHPtr	tab;
2544
	MX_ULONGLONG_T			nr, nr_less_inc;
2545
2546
	ASSERT_NS(pb_ex_in_use);
2547
2548
	tab = pb_open_tab->ot_table;
2549
2550
	/* {PRE-INC}
2551
	 * Assume that nr contains the last value returned!
2552
	 * We will increment and then return the value.
2553
	 */
2554
	xt_spinlock_lock(&tab->tab_ainc_lock);
2555
	nr = (MX_ULONGLONG_T) tab->tab_auto_inc;
2556
	nr_less_inc = nr;
2557
	if (nr < offset)
2558
		nr = offset;
2559
	else if (increment > 1 && ((nr - offset) % increment) != 0)
2560
		nr += increment - ((nr - offset) % increment);
2561
	else
2562
		nr += increment;
2563
	if (table->next_number_field->cmp((const unsigned char *)&nr_less_inc, (const unsigned char *)&nr) < 0)
2564
		tab->tab_auto_inc = (xtWord8) (nr);
2565
	else
2566
		nr = ~0;	/* indicate error to the caller */
2567
	xt_spinlock_unlock(&tab->tab_ainc_lock);
2568
2569
	*first_value = nr;
2570
	*nb_reserved_values = 1;
2571
}
2572
2573
/* GOTCHA: We need to use signed value here because of the test
2574
 * (from auto_increment.test):
2575
 * create table t1 (a int not null auto_increment primary key);
2576
 * insert into t1 values (NULL);
2577
 * insert into t1 values (-1);
2578
 * insert into t1 values (NULL);
2579
 */
2580
xtPublic void ha_set_auto_increment(XTOpenTablePtr ot, Field *nr)
2581
{
2582
	register XTTableHPtr	tab;
2583
	MX_ULONGLONG_T			nr_int_val;
2584
	
2585
	nr_int_val = nr->val_int();
2586
	tab = ot->ot_table;
2587
2588
	if (nr->cmp((const unsigned char *)&tab->tab_auto_inc) > 0) {
2589
		xt_spinlock_lock(&tab->tab_ainc_lock);
2590
2591
		if (nr->cmp((const unsigned char *)&tab->tab_auto_inc) > 0) {
2592
			/* {PRE-INC}
2593
			 * We increment later, so just set the value!
2594
			MX_ULONGLONG_T nr_int_val_plus_one = nr_int_val + 1;
2595
			if (nr->cmp((const unsigned char *)&nr_int_val_plus_one) < 0)
2596
				tab->tab_auto_inc = nr_int_val_plus_one;
2597
			else
2598
			 */
2599
			tab->tab_auto_inc = nr_int_val;
2600
		}
2601
		xt_spinlock_unlock(&tab->tab_ainc_lock);
2602
	}
2603
2604
	if (xt_db_auto_increment_mode == 1) {
2605
		if (nr_int_val > (MX_ULONGLONG_T) tab->tab_dic.dic_min_auto_inc) {
2606
			/* Do this every 100 calls: */
2607
#ifdef DEBUG
2608
			tab->tab_dic.dic_min_auto_inc = nr_int_val + 5;
2609
#else
2610
			tab->tab_dic.dic_min_auto_inc = nr_int_val + 100;
2611
#endif
2612
			ot->ot_thread = xt_get_self();
2613
			if (!xt_tab_write_min_auto_inc(ot))
2614
				xt_log_and_clear_exception(ot->ot_thread);
2615
		}
2616
	}
2617
}
2618
2619
/*
2620
static void dump_buf(unsigned char *buf, int len)
2621
{
2622
	int i;
2623
	
2624
	for (i=0; i<len; i++) printf("%2c", buf[i] <= 127 ? buf[i] : '.');
2625
	printf("\n");
2626
	for (i=0; i<len; i++) printf("%02x", buf[i]);
2627
	printf("\n");
2628
}
2629
*/
2630
2631
/*
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2632
 * doInsertRecord() inserts a row. No extra() hint is given currently if a bulk load
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2633
 * is happeneding. buf() is a byte array of data. You can use the field
2634
 * information to extract the data from the native byte array type.
2635
 * Example of this would be:
2636
 * for (Field **field=table->field ; *field ; field++)
2637
 * {
2638
 *		...
2639
 * }
2640
2641
 * See ha_tina.cc for an example of extracting all of the data as strings.
2642
 * ha_berekly.cc has an example of how to store it intact by "packing" it
2643
 * for ha_berkeley's own native storage type.
2644
1491.1.3 by Jay Pipes
Cursor::update_row() changed to doUpdateRecord() and updateRecord()
2645
 * See the note for doUpdateRecord() on auto_increments and timestamps. This
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2646
 * case also applied to doInsertRecord().
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2647
2648
 * Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
2649
 * sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
2650
 */
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2651
int ha_pbxt::doInsertRecord(byte *buf)
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2652
{
2653
	int err = 0;
2654
2655
	ASSERT_NS(pb_ex_in_use);
2656
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2657
	XT_PRINT1(pb_open_tab->ot_thread, "doInsertRecord (%s)\n", pb_share->sh_table_path->ps_path);
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2658
	XT_DISABLED_TRACE(("INSERT tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2659
	//statistic_increment(ha_write_count,&LOCK_status);
2660
#ifdef PBMS_ENABLED
2661
	PBMSResultRec result;
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2662
	err = pbms_doInsertRecord_blobs(table, buf, &result);
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2663
	if (err) {
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2664
		xt_logf(XT_NT_ERROR, "pbms_doInsertRecord_blobs() Error: %s", result.mr_message);
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2665
		return err;
2666
	}
2667
#endif
2668
2669
	/* {START-STAT-HACK} previously position of start statement hack. */
2670
	xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2671
2672
	if (pb_open_tab->ot_thread->st_import_stat) {
2673
		if (pb_import_row_count >= XT_IMPORT_ROW_COUNT) {
2674
			/* Commit and restart the transaction. */
2675
			XTThreadPtr thread = pb_open_tab->ot_thread;
2676
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2677
			XT_PRINT0(thread, "xt_xn_commit in doInsertRecord\n");
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2678
			if (!xt_xn_commit(thread)) {
2679
				err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2680
				return err;
2681
			}
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2682
			XT_PRINT0(thread, "xt_xn_begin in doInsertRecord\n");
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2683
			if (!xt_xn_begin(thread)) {
2684
				err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2685
				return err;
2686
			}
2687
			pb_import_row_count = 0;
2688
		}
2689
		else
2690
			pb_import_row_count++;
2691
	}
2692
2693
	if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
2694
		table->timestamp_field->set_time();
2695
2696
	if (table->next_number_field && buf == table->record[0]) {
2697
		int update_err = update_auto_increment();
2698
		if (update_err) {
2699
			ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2700
			err = update_err;
2701
			goto done;
2702
		}
2703
		ha_set_auto_increment(pb_open_tab, table->next_number_field);
2704
	}
2705
2706
	if (!xt_tab_new_record(pb_open_tab, (xtWord1 *) buf)) {
2707
		err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2708
2709
		/*
2710
		 * This is needed to allow the same row to be updated multiple times in case of bulk REPLACE.
2711
		 * This happens during execution of LOAD DATA...REPLACE MySQL first tries to INSERT the row 
2712
		 * and if it gets dup-key error it tries UPDATE, so the same row can be overwriten multiple 
2713
		 * times within the same statement
2714
		 */
2715
		if (err == HA_ERR_FOUND_DUPP_KEY && pb_open_tab->ot_thread->st_is_update) {
2716
			/* Pop the update stack: */
2717
			//pb_open_tab->ot_thread->st_update_id++;
2718
			XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
2719
2720
			pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
2721
			curr->ot_prev_update = NULL;
2722
		}
2723
	}
2724
2725
	done:
2726
#ifdef PBMS_ENABLED
2727
	pbms_completed(table, (err == 0));
2728
#endif
2729
	return err;
2730
}
2731
2732
#ifdef UNUSED_CODE
2733
static int equ_bin(const byte *a, const char *b)
2734
{
2735
	while (*a && *b) {
2736
		if (*a != *b)
2737
			return 0;
2738
		a++;
2739
		b++;
2740
	}
2741
	return 1;
2742
}
2743
static void dump_bin(const byte *a_in, int offset, int len_in)
2744
{
2745
	const byte	*a = a_in;
2746
	int			len = len_in;
2747
	
2748
	a += offset;
2749
	while (len > 0) {
2750
		xt_trace("%02X", (int) *a);
2751
		a++;
2752
		len--;
2753
	}
2754
	xt_trace("==");
2755
	a = a_in;
2756
	len = len_in;
2757
	a += offset;
2758
	while (len > 0) {
2759
		xt_trace("%c", (*a > 8 && *a < 127) ? *a : '.');
2760
		a++;
2761
		len--;
2762
	}
2763
	xt_trace("\n");
2764
}
2765
#endif
2766
2767
/*
1491.1.3 by Jay Pipes
Cursor::update_row() changed to doUpdateRecord() and updateRecord()
2768
 * Yes, doUpdateRecord() does what you expect, it updates a row. old_data will have
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2769
 * the previous row record in it, while new_data will have the newest data in
2770
 * it. Keep in mind that the server can do updates based on ordering if an ORDER BY
2771
 * clause was used. Consecutive ordering is not guarenteed.
2772
 *
2773
 * Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
2774
 */
1491.1.3 by Jay Pipes
Cursor::update_row() changed to doUpdateRecord() and updateRecord()
2775
int ha_pbxt::doUpdateRecord(const byte * old_data, byte * new_data)
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2776
{
2777
	int						err = 0;
2778
	register XTThreadPtr	self = pb_open_tab->ot_thread;
2779
2780
	ASSERT_NS(pb_ex_in_use);
2781
2782
	XT_PRINT1(self, "update_row (%s)\n", pb_share->sh_table_path->ps_path);
2783
	XT_DISABLED_TRACE(("UPDATE tx=%d val=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&new_data[1])));
2784
	//statistic_increment(ha_update_count,&LOCK_status);
2785
	/* {START-STAT-HACK} previously position of start statement hack. */
2786
	xt_xlog_check_long_writer(self);
2787
2788
	/* {UPDATE-STACK} */
2789
	if (self->st_is_update != pb_open_tab) {
2790
		/* Push the update stack: */
2791
		pb_open_tab->ot_prev_update = self->st_is_update;
2792
		self->st_is_update = pb_open_tab;
2793
		pb_open_tab->ot_update_id++;
2794
	}
2795
2796
	if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
2797
		table->timestamp_field->set_time();
2798
2799
#ifdef PBMS_ENABLED
2800
	PBMSResultRec result;
2801
2802
	err = pbms_delete_row_blobs(table, old_data, &result);
2803
	if (err) {
2804
		xt_logf(XT_NT_ERROR, "update_row:pbms_delete_row_blobs() Error: %s", result.mr_message);
2805
		return err;
2806
	}
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2807
	err = pbms_doInsertRecord_blobs(table, new_data, &result);
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2808
	if (err) { 
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
2809
		xt_logf(XT_NT_ERROR, "update_row:pbms_doInsertRecord_blobs() Error: %s", result.mr_message);
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2810
		goto pbms_done;
2811
	}
2812
#endif
2813
2814
	/* GOTCHA: We need to check the auto-increment value on update
2815
	 * because of the following test (which fails for InnoDB) -
2816
	 * auto_increment.test:
2817
	 * create table t1 (a int not null auto_increment primary key, val int);
2818
	 * insert into t1 (val) values (1);
2819
	 * update t1 set a=2 where a=1;
2820
	 * insert into t1 (val) values (1);
2821
	 */
2822
	if (table->found_next_number_field && new_data == table->record[0]) {
2823
		MX_LONGLONG_T	nr;
2824
		my_bitmap_map	*old_map;
2825
2826
		old_map = mx_tmp_use_all_columns(table, table->read_set);
2827
		nr = table->found_next_number_field->val_int();
2828
		ha_set_auto_increment(pb_open_tab, table->found_next_number_field);
2829
		mx_tmp_restore_column_map(table, old_map);
2830
	}
2831
2832
	if (!xt_tab_update_record(pb_open_tab, (xtWord1 *) old_data, (xtWord1 *) new_data))
2833
		err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2834
2835
	pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2836
	
2837
#ifdef PBMS_ENABLED
2838
	pbms_done:
2839
	pbms_completed(table, (err == 0));
2840
#endif
2841
2842
	return err;
2843
}
2844
2845
/*
2846
 * This will delete a row. buf will contain a copy of the row to be deleted.
2847
 * The server will call this right after the current row has been called (from
2848
 * either a previous rnd_next() or index call).
2849
 *
2850
 * Called in sql_acl.cc and sql_udf.cc to manage internal table information.
2851
 * Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select it is
2852
 * used for removing duplicates while in insert it is used for REPLACE calls.
2853
*/
1491.1.4 by Jay Pipes
delete_row() is now deleteRecord() and doDeleteRecord() in Cursor
2854
int ha_pbxt::doDeleteRecord(const byte * buf)
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
2855
{
2856
	int err = 0;
2857
2858
	ASSERT_NS(pb_ex_in_use);
2859
2860
	XT_PRINT1(pb_open_tab->ot_thread, "delete_row (%s)\n", pb_share->sh_table_path->ps_path);
2861
	XT_DISABLED_TRACE(("DELETE tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2862
	//statistic_increment(ha_delete_count,&LOCK_status);
2863
2864
#ifdef PBMS_ENABLED
2865
	PBMSResultRec result;
2866
2867
	err = pbms_delete_row_blobs(table, buf, &result);
2868
	if (err) {
2869
		xt_logf(XT_NT_ERROR, "pbms_delete_row_blobs() Error: %s", result.mr_message);
2870
		return err;
2871
	}
2872
#endif
2873
	/* {START-STAT-HACK} previously position of start statement hack. */
2874
	xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2875
2876
	if (!xt_tab_delete_record(pb_open_tab, (xtWord1 *) buf))
2877
		err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2878
2879
	pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2880
2881
#ifdef PBMS_ENABLED
2882
	pbms_completed(table, (err == 0));
2883
#endif
2884
	return err;
2885
}
2886
2887
/*
2888
 * -----------------------------------------------------------------------
2889
 * INDEX METHODS
2890
 */
2891
2892
/*
2893
 * This looks like a hack, but actually, it is OK.
2894
 * It depends on the setup done by the super-class. It involves an extra
2895
 * range check that we need to do if a "new" record is returned during
2896
 * an index scan.
2897
 *
2898
 * A new record is returned if a row is updated (by another transaction)
2899
 * during the index scan. If an update is detected, then the scan stops
2900
 * and waits for the transaction to end.
2901
 *
2902
 * If the transaction commits, then the updated row is returned instead
2903
 * of the row it would have returned when doing a consistant read
2904
 * (repeatable read).
2905
 *
2906
 * These new records can appear out of index order, and may not even
2907
 * belong to the index range that we are concerned with.
2908
 *
2909
 * Notice that there is not check for the start of the range. It appears
2910
 * that this is not necessary, MySQL seems to have no problem ignoring
2911
 * such values.
2912
 *
2913
 * A number of test have been given below which demonstrate the use
2914
 * of the function.
2915
 *
2916
 * They also demonstrate the ORDER BY problem described here: [(11)].
2917
 *
2918
 * DROP TABLE IF EXISTS test_tab, test_tab_1, test_tab_2;
2919
 * CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20), index(Value, Name)) ENGINE=pbxt;
2920
 * INSERT test_tab values(1, 1, 'A');
2921
 * INSERT test_tab values(2, 1, 'B');
2922
 * INSERT test_tab values(3, 1, 'C');
2923
 * INSERT test_tab values(4, 2, 'D');
2924
 * INSERT test_tab values(5, 2, 'E');
2925
 * INSERT test_tab values(6, 2, 'F');
2926
 * INSERT test_tab values(7, 2, 'G');
2927
 * 
2928
 * select * from test_tab where value = 1 order by value, name for update;
2929
 * 
2930
 * -- Test: 1
2931
 * -- C1
2932
 * begin;
2933
 * select * from test_tab where id = 5 for update;
2934
 * 
2935
 * -- C2
2936
 * begin;
2937
 * select * from test_tab where value = 2 order by value, name for update;
2938
 * 
2939
 * -- C1
2940
 * update test_tab set value = 3 where id = 6;
2941
 * commit;
2942
 * 
2943
 * -- Test: 2
2944
 * -- C1
2945
 * begin;
2946
 * select * from test_tab where id = 5 for update;
2947
 * 
2948
 * -- C2
2949
 * begin;
2950
 * select * from test_tab where value >= 2 order by value, name for update;
2951
 * 
2952
 * -- C1
2953
 * update test_tab set value = 3 where id = 6;
2954
 * commit;
2955
 * 
2956
 * -- Test: 3
2957
 * -- C1
2958
 * begin;
2959
 * select * from test_tab where id = 5 for update;
2960
 * 
2961
 * -- C2
2962
 * begin;
2963
 * select * from test_tab where value = 2 order by value, name for update;
2964
 * 
2965
 * -- C1
2966
 * update test_tab set value = 1 where id = 6;
2967
 * commit;
2968
 */
2969
2970
int ha_pbxt::xt_index_in_range(register XTOpenTablePtr XT_UNUSED(ot), register XTIndexPtr ind,
2971
	register XTIdxSearchKeyPtr search_key, xtWord1 *buf)
2972
{
2973
	/* If search key is given, this means we want an exact match. */
2974
	if (search_key) {
2975
		xtWord1 key_buf[XT_INDEX_MAX_KEY_SIZE];
2976
2977
		myxt_create_key_from_row(ind, key_buf, buf, NULL);
2978
		search_key->sk_on_key = myxt_compare_key(ind, search_key->sk_key_value.sv_flags, search_key->sk_key_value.sv_length,
2979
			search_key->sk_key_value.sv_key, key_buf) == 0;
2980
		return search_key->sk_on_key;
2981
	}
2982
2983
	/* Otherwise, check the end of the range. */
2984
	if (end_range)
2985
		return compare_key(end_range) <= 0;
2986
	return 1;
2987
}
2988
2989
int ha_pbxt::xt_index_next_read(register XTOpenTablePtr ot, register XTIndexPtr ind, xtBool key_only,
2990
	register XTIdxSearchKeyPtr search_key, byte *buf)
2991
{
2992
	xt_xlog_check_long_writer(ot->ot_thread);
2993
2994
	if (key_only) {
2995
		/* We only need to read the data from the key: */
2996
		while (ot->ot_curr_rec_id) {
2997
			if (search_key && !search_key->sk_on_key)
2998
				break;
2999
3000
			switch (xt_tab_visible(ot)) {
3001
				case FALSE:
3002
					if (xt_idx_next(ot, ind, search_key))
3003
						break;
3004
				case XT_ERR:
3005
					goto failed;
3006
				case XT_NEW:
3007
					if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3008
						goto failed;
3009
					if (xt_index_in_range(ot, ind, search_key, buf)) {
3010
						return 0;
3011
					}
3012
					if (!xt_idx_next(ot, ind, search_key))
3013
						goto failed;
3014
					break;
3015
				case XT_RETRY:
3016
					/* We cannot start from the beginning again, if we have
3017
					 * already output rows!
3018
					 * And we need the orginal search key.
3019
					 *
3020
					 * The case in which this occurs is:
3021
					 *
3022
					 * T1: UPDATE tbl_file SET GlobalID = 'DBCD5C4514210200825501089884844_6M' WHERE ID = 39
3023
					 * Locks a particular row.
3024
					 *
3025
					 * T2: SELECT ID,Flags FROM tbl_file WHERE SpaceID = 1 AND Path = '/zi/America/' AND 
3026
					 * Name = 'Cuiaba' AND Flags IN ( 0,1,4,5 ) FOR UPDATE
3027
					 * scans the index and stops on the lock (of the before image) above.
3028
					 *
3029
					 * T1 quits, the sweeper deletes the record updated by T1?!
3030
					 * BUG: Cleanup should wait until T2 is complete!
3031
					 *
3032
					 * T2 continues, and returns XT_RETRY.
3033
					 *
3034
					 * At this stage T2 has already returned some rows, so it may not retry from the
3035
					 * start. Instead it tries to locate the last record it tried to lock.
3036
					 * This record is gone (or not visible), so it finds the next one.
3037
					 *
3038
					 * POTENTIAL BUG: If cleanup does not wait until T2 is complete, then
3039
					 * I may miss the update record, if it is moved before the index scan
3040
					 * position.
3041
					 */
3042
					if (!pb_ind_row_count && search_key) {
3043
						if (!xt_idx_search(pb_open_tab, ind, search_key))
3044
							return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3045
					}
3046
					else {
3047
						if (!xt_idx_research(pb_open_tab, ind))
3048
							goto failed;
3049
					}
3050
					break;
3051
				default:
3052
					if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3053
						goto failed;
3054
					return 0;
3055
			}
3056
		}
3057
	}
3058
	else {
3059
		while (ot->ot_curr_rec_id) {
3060
			if (search_key && !search_key->sk_on_key)
3061
				break;
3062
3063
			switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
3064
				case FALSE:
3065
					XT_DISABLED_TRACE(("not visi tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
3066
					if (xt_idx_next(ot, ind, search_key))
3067
						break;
3068
				case XT_ERR:
3069
					goto failed;
3070
				case XT_NEW:
3071
					if (xt_index_in_range(ot, ind, search_key, buf))
3072
						return 0;
3073
					if (!xt_idx_next(ot, ind, search_key))
3074
						goto failed;
3075
					break;
3076
				case XT_RETRY:
3077
					if (!pb_ind_row_count && search_key) {
3078
						if (!xt_idx_search(pb_open_tab, ind, search_key))
3079
							return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3080
					}
3081
					else {
3082
						if (!xt_idx_research(pb_open_tab, ind))
3083
							goto failed;
3084
					}
3085
					break;
3086
				default:
3087
					XT_DISABLED_TRACE(("visible tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
3088
					return 0;
3089
			}
3090
		}
3091
	}
3092
	return HA_ERR_END_OF_FILE;
3093
3094
	failed:
3095
	return ha_log_pbxt_thread_error_for_mysql(FALSE);
3096
}
3097
3098
int ha_pbxt::xt_index_prev_read(XTOpenTablePtr ot, XTIndexPtr ind, xtBool key_only,
3099
	register XTIdxSearchKeyPtr search_key, byte *buf)
3100
{
3101
	if (key_only) {
3102
		/* We only need to read the data from the key: */
3103
		while (ot->ot_curr_rec_id) {
3104
			if (search_key && !search_key->sk_on_key)
3105
				break;
3106
3107
			switch (xt_tab_visible(ot)) {
3108
				case FALSE:
3109
					if (xt_idx_prev(ot, ind, search_key))
3110
						break;
3111
				case XT_ERR:
3112
					goto failed;
3113
				case XT_NEW:
3114
					if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3115
						goto failed;
3116
					if (xt_index_in_range(ot, ind, search_key, buf))
3117
						return 0;
3118
					if (!xt_idx_next(ot, ind, search_key))
3119
						goto failed;
3120
					break;
3121
				case XT_RETRY:
3122
					if (!pb_ind_row_count && search_key) {
3123
						if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3124
							return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3125
					}
3126
					else {
3127
						if (!xt_idx_research(pb_open_tab, ind))
3128
							goto failed;
3129
					}
3130
					break;
3131
				default:
3132
					if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3133
						goto failed;
3134
					return 0;
3135
			}
3136
		}
3137
	}
3138
	else {
3139
		/* We need to read the entire record: */
3140
		while (ot->ot_curr_rec_id) {
3141
			if (search_key && !search_key->sk_on_key)
3142
				break;
3143
3144
			switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
3145
				case FALSE:
3146
					if (xt_idx_prev(ot, ind, search_key))
3147
						break;
3148
				case XT_ERR:
3149
					goto failed;
3150
				case XT_NEW:
3151
					if (xt_index_in_range(ot, ind, search_key, buf))
3152
						return 0;
3153
					if (!xt_idx_next(ot, ind, search_key))
3154
						goto failed;
3155
					break;
3156
				case XT_RETRY:
3157
					if (!pb_ind_row_count && search_key) {
3158
						if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3159
							return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3160
					}
3161
					else {
3162
						if (!xt_idx_research(pb_open_tab, ind))
3163
							goto failed;
3164
					}
3165
					break;
3166
				default:
3167
					return 0;
3168
			}
3169
		}
3170
	}
3171
	return HA_ERR_END_OF_FILE;
3172
3173
	failed:
3174
	return ha_log_pbxt_thread_error_for_mysql(FALSE);
3175
}
3176
3177
int ha_pbxt::index_init(uint idx, bool XT_UNUSED(sorted))
3178
{
3179
	XTIndexPtr	ind;
3180
	XTThreadPtr	thread = pb_open_tab->ot_thread;
3181
3182
	/* select count(*) from smalltab_PBXT;
3183
	 * ignores the error below, and continues to
3184
	 * call index_first!
3185
	 */
3186
	active_index = idx;
3187
3188
	if (pb_open_tab->ot_table->tab_dic.dic_disable_index) {
3189
		active_index = MAX_KEY;
3190
		xt_tab_set_index_error(pb_open_tab->ot_table);
3191
		return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3192
	}
3193
3194
	/* The number of columns required: */
3195
	if (pb_open_tab->ot_is_modify) {
3196
3197
		pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
3198
#ifdef XT_PRINT_INDEX_OPT
3199
		ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3200
3201
		printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, pb_open_tab->ot_cols_req, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap);
3202
#endif
3203
		/* {START-STAT-HACK} previously position of start statement hack,
3204
		 * previous comment to code below: */
3205
		/* Start a statement based transaction as soon
3206
		 * as a read is done for a modify type statement!
3207
		 * Previously, this was done too late!
3208
		 */
3209
	}
3210
	else {
1455.3.5 by Vladimir Kolesnikov
fixed fetch field count in select (all fields are selected for now)
3211
		//pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3212
		pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
3213
3214
		/* Check for index coverage!
3215
		 *
3216
		 * Given the following table:
3217
		 *
3218
		 * CREATE TABLE `customer` (
3219
		 * `c_id` int(11) NOT NULL DEFAULT '0',
3220
		 * `c_d_id` int(11) NOT NULL DEFAULT '0',
3221
		 * `c_w_id` int(11) NOT NULL DEFAULT '0',
3222
		 * `c_first` varchar(16) DEFAULT NULL,
3223
		 * `c_middle` char(2) DEFAULT NULL,
3224
		 * `c_last` varchar(16) DEFAULT NULL,
3225
		 * `c_street_1` varchar(20) DEFAULT NULL,
3226
		 * `c_street_2` varchar(20) DEFAULT NULL,
3227
		 * `c_city` varchar(20) DEFAULT NULL,
3228
		 * `c_state` char(2) DEFAULT NULL,
3229
		 * `c_zip` varchar(9) DEFAULT NULL,
3230
		 * `c_phone` varchar(16) DEFAULT NULL,
3231
		 * `c_since` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
3232
		 * `c_credit` char(2) DEFAULT NULL,
3233
		 * `c_credit_lim` decimal(24,12) DEFAULT NULL,
3234
		 * `c_discount` double DEFAULT NULL,
3235
		 * `c_balance` decimal(24,12) DEFAULT NULL,
3236
		 * `c_ytd_payment` decimal(24,12) DEFAULT NULL,
3237
		 * `c_payment_cnt` double DEFAULT NULL,
3238
		 * `c_delivery_cnt` double DEFAULT NULL,
3239
		 * `c_data` text,
3240
		 * PRIMARY KEY (`c_w_id`,`c_d_id`,`c_id`),
3241
		 * KEY `c_w_id` (`c_w_id`,`c_d_id`,`c_last`,`c_first`,`c_id`)
3242
		 * ) ENGINE=PBXT;
3243
		 *
3244
		 * MySQL does not recognize index coverage on the followin select:
3245
		 *
3246
		 * SELECT c_id FROM customer WHERE c_w_id = 3 AND c_d_id = 8 AND 
3247
		 * c_last = 'EINGATIONANTI' ORDER BY c_first ASC LIMIT 1;
3248
		 *
3249
		 * TODO: Find out why this is necessary, MyISAM does not
3250
		 * seem to have this problem!
3251
		 */
3252
		ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3253
		if (MX_BIT_IS_SUBSET(table->read_set, &ind->mi_col_map))
3254
			pb_key_read = TRUE;
3255
#ifdef XT_PRINT_INDEX_OPT
3256
		printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X converage=%d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, table->read_set->MX_BIT_SIZE(), (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) (MX_BIT_IS_SUBSET(table->read_set, &ind->mi_col_map) != 0));
3257
#endif
3258
	}
3259
	
3260
	xt_xlog_check_long_writer(thread);
3261
3262
	pb_open_tab->ot_thread->st_statistics.st_scan_index++;
3263
	return 0;
3264
}
3265
3266
int ha_pbxt::index_end()
3267
{
3268
	int err = 0;
3269
3270
	XT_TRACE_METHOD();
3271
3272
	XTThreadPtr thread = pb_open_tab->ot_thread;
3273
3274
	/*
3275
	 * the assertion below is not always held, because the sometimes handler is unlocked
3276
	 * before this function is called
3277
	 */
3278
	/*ASSERT_NS(pb_ex_in_use);*/
3279
3280
	if (pb_open_tab->ot_ind_rhandle) {
3281
		xt_ind_release_handle(pb_open_tab->ot_ind_rhandle, FALSE, thread);
3282
		pb_open_tab->ot_ind_rhandle = NULL;
3283
	}
3284
3285
	/*
3286
	 * make permanent the lock for the last scanned row
3287
	 */
3288
	if (pb_open_tab)
3289
		pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3290
3291
	xt_xlog_check_long_writer(thread);
3292
3293
	active_index = MAX_KEY;
3294
	XT_RETURN(err);
3295
}
3296
3297
#ifdef XT_TRACK_RETURNED_ROWS
3298
void ha_start_scan(XTOpenTablePtr ot, u_int index)
3299
{
3300
	xt_ttracef(ot->ot_thread, "SCAN %d:%d\n", (int) ot->ot_table->tab_id, (int) index);
3301
	ot->ot_rows_ret_curr = 0;
3302
	for (u_int i=0; i<ot->ot_rows_ret_max; i++)
3303
		ot->ot_rows_returned[i] = 0;
3304
}
3305
3306
void ha_return_row(XTOpenTablePtr ot, u_int index)
3307
{
3308
	xt_ttracef(ot->ot_thread, "%d:%d ROW=%d:%d\n",
3309
		(int) ot->ot_table->tab_id, (int) index, (int) ot->ot_curr_row_id, (int) ot->ot_curr_rec_id);
3310
	ot->ot_rows_ret_curr++;
3311
	if (ot->ot_curr_row_id >= ot->ot_rows_ret_max) {
3312
		if (!xt_realloc_ns((void **) &ot->ot_rows_returned, (ot->ot_curr_row_id+1) * sizeof(xtRecordID)))
3313
			ASSERT_NS(FALSE);
3314
		memset(&ot->ot_rows_returned[ot->ot_rows_ret_max], 0, (ot->ot_curr_row_id+1 - ot->ot_rows_ret_max) * sizeof(xtRecordID));
3315
		ot->ot_rows_ret_max = ot->ot_curr_row_id+1;
3316
	}
3317
	if (!ot->ot_curr_row_id || !ot->ot_curr_rec_id || ot->ot_rows_returned[ot->ot_curr_row_id]) {
3318
		char *sql = *thd_query(current_thd);
3319
3320
		xt_ttracef(ot->ot_thread, "DUP %d:%d %s\n",
3321
			(int) ot->ot_table->tab_id, (int) index, *thd_query(current_thd));
3322
		xt_dump_trace();
3323
		printf("ERROR: row=%d rec=%d newr=%d, already returned!\n", (int) ot->ot_curr_row_id, (int) ot->ot_rows_returned[ot->ot_curr_row_id], (int) ot->ot_curr_rec_id);
3324
		printf("ERROR: %s\n", sql);
3325
#ifdef XT_WIN
3326
		FatalAppExit(0, "Debug Me!");
3327
#endif
3328
	}
3329
	else
3330
		ot->ot_rows_returned[ot->ot_curr_row_id] = ot->ot_curr_rec_id;
3331
}
3332
#endif
3333
3334
int ha_pbxt::index_read_xt(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3335
{
3336
	int					err = 0;
3337
	XTIndexPtr			ind;
3338
	int					prefix = 0;
3339
	XTIdxSearchKeyRec	search_key;
3340
3341
	if (idx == MAX_KEY) {
3342
		err = HA_ERR_WRONG_INDEX;
3343
		goto done;
3344
	}
3345
#ifdef XT_TRACK_RETURNED_ROWS
3346
	ha_start_scan(pb_open_tab, idx);
3347
#endif
3348
3349
	/* This call starts a search on this handler! */
3350
	pb_ind_row_count = 0;
3351
3352
	ASSERT_NS(pb_ex_in_use);
3353
3354
	XT_PRINT1(pb_open_tab->ot_thread, "index_read_xt (%s)\n", pb_share->sh_table_path->ps_path);
3355
	XT_DISABLED_TRACE(("search tx=%d val=%d update=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), pb_modified));
3356
	ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3357
3358
	switch (find_flag) {
3359
		case HA_READ_PREFIX_LAST:
3360
		case HA_READ_PREFIX_LAST_OR_PREV:
3361
			prefix = SEARCH_PREFIX;
3362
		case HA_READ_BEFORE_KEY:
3363
		case HA_READ_KEY_OR_PREV: // I assume you want to be positioned on the last entry in the key duplicate list!! 
3364
			xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_BEFORE_KEY) ? 0 : XT_SEARCH_AFTER_KEY) | prefix, (xtWord1 *) key, (size_t) key_len);
3365
			if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3366
				err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3367
			else
3368
				err = xt_index_prev_read(pb_open_tab, ind, pb_key_read,
3369
					(find_flag == HA_READ_PREFIX_LAST) ? &search_key : NULL, buf);
3370
			break;
3371
		case HA_READ_PREFIX:
3372
			prefix = SEARCH_PREFIX;
3373
		case HA_READ_KEY_EXACT:
3374
		case HA_READ_KEY_OR_NEXT:
3375
		case HA_READ_AFTER_KEY:
3376
		default:
3377
			xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_AFTER_KEY) ? XT_SEARCH_AFTER_KEY : 0) | prefix, (xtWord1 *) key, key_len);
3378
			if (!xt_idx_search(pb_open_tab, ind, &search_key))
3379
				err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3380
			else {
3381
				err = xt_index_next_read(pb_open_tab, ind, pb_key_read,
3382
					(find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX) ? &search_key : NULL, buf);
3383
				if (err == HA_ERR_END_OF_FILE && find_flag == HA_READ_AFTER_KEY)
3384
					err = HA_ERR_KEY_NOT_FOUND;			
3385
			}
3386
			break;
3387
	}
3388
3389
	pb_ind_row_count++;
3390
#ifdef XT_TRACK_RETURNED_ROWS
3391
	if (!err)
3392
		ha_return_row(pb_open_tab, idx);
3393
#endif
3394
	XT_DISABLED_TRACE(("search tx=%d val=%d err=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), err));
3395
	done:
3396
	if (err)
3397
		table->status = STATUS_NOT_FOUND;
3398
	else {
3399
		pb_open_tab->ot_thread->st_statistics.st_row_select++;
3400
		table->status = 0;
3401
	}
3402
	return err;
3403
}
3404
3405
/*
3406
 * Positions an index cursor to the index specified in the handle. Fetches the
3407
 * row if available. If the key value is null, begin at the first key of the
3408
 * index.
3409
 */
3410
int ha_pbxt::index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag)
3411
{
3412
	//statistic_increment(ha_read_key_count,&LOCK_status);
3413
	return index_read_xt(buf, active_index, key, key_len, find_flag);
3414
}
3415
3416
int ha_pbxt::index_read_idx(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3417
{
3418
	//statistic_increment(ha_read_key_count,&LOCK_status);
3419
	return index_read_xt(buf, idx, key, key_len, find_flag);
3420
}
3421
3422
int ha_pbxt::index_read_last(byte * buf, const byte * key, uint key_len)
3423
{
3424
	//statistic_increment(ha_read_key_count,&LOCK_status);
3425
	return index_read_xt(buf, active_index, key, key_len, HA_READ_PREFIX_LAST);
3426
}
3427
3428
/*
3429
 * Used to read forward through the index.
3430
 */
3431
int ha_pbxt::index_next(byte * buf)
3432
{
3433
	int			err = 0;
3434
	XTIndexPtr	ind;
3435
3436
	XT_TRACE_METHOD();
3437
	//statistic_increment(ha_read_next_count,&LOCK_status);
3438
	ASSERT_NS(pb_ex_in_use);
3439
3440
	if (active_index == MAX_KEY) {
3441
		err = HA_ERR_WRONG_INDEX;
3442
		goto done;
3443
	}
3444
	ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3445
3446
	if (!xt_idx_next(pb_open_tab, ind, NULL))
3447
		err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3448
	else
3449
		err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3450
3451
	pb_ind_row_count++;
3452
#ifdef XT_TRACK_RETURNED_ROWS
3453
	if (!err)
3454
		ha_return_row(pb_open_tab, active_index);
3455
#endif
3456
	done:
3457
	if (err)
3458
		table->status = STATUS_NOT_FOUND;
3459
	else {
3460
		pb_open_tab->ot_thread->st_statistics.st_row_select++;
3461
		table->status = 0;
3462
	}
3463
	XT_RETURN(err);
3464
}
3465
3466
/*
3467
 * I have implemented this because there is currently a
3468
 * bug in handler::index_next_same().
3469
 *
3470
 * drop table if exists t1;
3471
 * CREATE TABLE t1 (a int, b int, primary key(a,b))
3472
 * PARTITION BY KEY(b,a) PARTITIONS 2;
3473
 * insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6);
3474
 * select * from t1 where a = 4;
3475
 * 
3476
 */
3477
int ha_pbxt::index_next_same(byte * buf, const byte *key, uint length)
3478
{
3479
	int					err = 0;
3480
	XTIndexPtr			ind;
3481
	XTIdxSearchKeyRec	search_key;
3482
3483
	XT_TRACE_METHOD();
3484
	//statistic_increment(ha_read_next_count,&LOCK_status);
3485
	ASSERT_NS(pb_ex_in_use);
3486
3487
	if (active_index == MAX_KEY) {
3488
		err = HA_ERR_WRONG_INDEX;
3489
		goto done;
3490
	}
3491
	ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3492
3493
	search_key.sk_key_value.sv_flags = HA_READ_KEY_EXACT;
3494
	search_key.sk_key_value.sv_rec_id = 0;
3495
	search_key.sk_key_value.sv_row_id = 0;
3496
	search_key.sk_key_value.sv_key = search_key.sk_key_buf;
3497
	search_key.sk_key_value.sv_length = myxt_create_key_from_key(ind, search_key.sk_key_buf, (xtWord1 *) key, (u_int) length);
3498
	search_key.sk_on_key = TRUE;
3499
3500
	if (!xt_idx_next(pb_open_tab, ind, &search_key))
3501
		err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3502
	else
3503
		err = xt_index_next_read(pb_open_tab, ind, pb_key_read, &search_key, buf);
3504
3505
	pb_ind_row_count++;
3506
#ifdef XT_TRACK_RETURNED_ROWS
3507
	if (!err)
3508
		ha_return_row(pb_open_tab, active_index);
3509
#endif
3510
	done:
3511
	if (err)
3512
		table->status = STATUS_NOT_FOUND;
3513
	else {
3514
		pb_open_tab->ot_thread->st_statistics.st_row_select++;
3515
		table->status = 0;
3516
	}
3517
	XT_RETURN(err);
3518
}
3519
3520
/*
3521
 * Used to read backwards through the index.
3522
 */
3523
int ha_pbxt::index_prev(byte * buf)
3524
{
3525
	int			err = 0;
3526
	XTIndexPtr	ind;
3527
3528
	XT_TRACE_METHOD();
3529
	//statistic_increment(ha_read_prev_count,&LOCK_status);
3530
	ASSERT_NS(pb_ex_in_use);
3531
3532
	if (active_index == MAX_KEY) {
3533
		err = HA_ERR_WRONG_INDEX;
3534
		goto done;
3535
	}
3536
	ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3537
3538
	if (!xt_idx_prev(pb_open_tab, ind, NULL))
3539
		err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3540
	else
3541
		err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3542
3543
	pb_ind_row_count++;
3544
#ifdef XT_TRACK_RETURNED_ROWS
3545
	if (!err)
3546
		ha_return_row(pb_open_tab, active_index);
3547
#endif
3548
	done:
3549
	if (err)
3550
		table->status = STATUS_NOT_FOUND;
3551
	else {
3552
		pb_open_tab->ot_thread->st_statistics.st_row_select++;
3553
		table->status = 0;
3554
	}
3555
	XT_RETURN(err);
3556
}
3557
3558
/*
3559
 * index_first() asks for the first key in the index.
3560
 */
3561
int ha_pbxt::index_first(byte * buf)
3562
{
3563
	int					err = 0;
3564
	XTIndexPtr			ind;
3565
	XTIdxSearchKeyRec	search_key;
3566
3567
	XT_TRACE_METHOD();
3568
	//statistic_increment(ha_read_first_count,&LOCK_status);
3569
	ASSERT_NS(pb_ex_in_use);
3570
3571
	/* This is required because MySQL ignores the error returned
3572
	 * init init_index sometimes, for example:
3573
	 *
3574
     * if (!table->file->inited)
3575
     *    table->file->ha_index_init(tab->index, tab->sorted);
3576
     *  if ((error=tab->table->file->index_first(tab->table->record[0])))
3577
	 */
3578
	if (active_index == MAX_KEY) {
3579
		err = HA_ERR_WRONG_INDEX;
3580
		goto done;
3581
	}
3582
3583
#ifdef XT_TRACK_RETURNED_ROWS
3584
	ha_start_scan(pb_open_tab, active_index);
3585
#endif
3586
	pb_ind_row_count = 0;
3587
3588
	ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3589
3590
	xt_idx_prep_key(ind, &search_key, XT_SEARCH_FIRST_FLAG, NULL, 0);
3591
	if (!xt_idx_search(pb_open_tab, ind, &search_key))
3592
		err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3593
	else
3594
		err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3595
3596
	pb_ind_row_count++;
3597
#ifdef XT_TRACK_RETURNED_ROWS
3598
	if (!err)
3599
		ha_return_row(pb_open_tab, active_index);
3600
#endif
3601
	done:
3602
	if (err)
3603
		table->status = STATUS_NOT_FOUND;
3604
	else {
3605
		pb_open_tab->ot_thread->st_statistics.st_row_select++;
3606
		table->status = 0;
3607
	}
3608
	XT_RETURN(err);
3609
}
3610
3611
/*
3612
 * index_last() asks for the last key in the index.
3613
 */
3614
int ha_pbxt::index_last(byte * buf)
3615
{
3616
	int					err = 0;
3617
	XTIndexPtr			ind;
3618
	XTIdxSearchKeyRec	search_key;
3619
3620
	XT_TRACE_METHOD();
3621
	//statistic_increment(ha_read_last_count,&LOCK_status);
3622
	ASSERT_NS(pb_ex_in_use);
3623
3624
	if (active_index == MAX_KEY) {
3625
		err = HA_ERR_WRONG_INDEX;
3626
		goto done;
3627
	}
3628
3629
#ifdef XT_TRACK_RETURNED_ROWS
3630
	ha_start_scan(pb_open_tab, active_index);
3631
#endif
3632
	pb_ind_row_count = 0;
3633
3634
	ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3635
3636
	xt_idx_prep_key(ind, &search_key, XT_SEARCH_AFTER_LAST_FLAG, NULL, 0);
3637
	if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3638
		err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3639
	else
3640
		err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3641
3642
	pb_ind_row_count++;
3643
#ifdef XT_TRACK_RETURNED_ROWS
3644
	if (!err)
3645
		ha_return_row(pb_open_tab, active_index);
3646
#endif
3647
	done:
3648
	if (err)
3649
		table->status = STATUS_NOT_FOUND;
3650
	else {
3651
		pb_open_tab->ot_thread->st_statistics.st_row_select++;
3652
		table->status = 0;
3653
	}
3654
	XT_RETURN(err);
3655
}
3656
3657
/*
3658
 * -----------------------------------------------------------------------
3659
 * RAMDOM/SEQUENTIAL READ METHODS
3660
 */
3661
 
3662
/*
3663
 * rnd_init() is called when the system wants the storage engine to do a table
3664
 * scan.
3665
 * See the example in the introduction at the top of this file to see when
3666
 * rnd_init() is called.
3667
 *
3668
 * Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3669
 * and sql_update.cc.
3670
 */
3671
int ha_pbxt::rnd_init(bool scan)
3672
{
3673
	int			err = 0;
3674
	XTThreadPtr	thread = pb_open_tab->ot_thread;
3675
3676
	XT_PRINT1(thread, "rnd_init (%s)\n", pb_share->sh_table_path->ps_path);
3677
	XT_DISABLED_TRACE(("seq scan tx=%d\n", (int) thread->st_xact_data->xd_start_xn_id));
3678
3679
	/* Call xt_tab_seq_exit() to make sure the resources used by the previous
3680
	 * scan are freed. In particular make sure cache page ref count is decremented.
3681
	 * This is needed as rnd_init() can be called mulitple times w/o matching calls 
3682
	 * to rnd_end(). Our experience is that currently this is done in queries like:
3683
	 *
3684
	 * SELECT t1.c1,t2.c1 FROM t1 LEFT JOIN t2 USING (c1);
3685
	 * UPDATE t1 LEFT JOIN t2 USING (c1) SET t1.c1 = t2.c1 WHERE t1.c1 = t2.c1;
3686
	 *
3687
	 * when scanning inner tables. It is important to understand that in such case
3688
	 * multiple calls to rnd_init() are not semantically equal to a new query. For
3689
	 * example we cannot make row locks permanent as we do in rnd_end(), as 
3690
	 * ha_pbxt::unlock_row still can be called.
3691
	 */
3692
	xt_tab_seq_exit(pb_open_tab);
3693
3694
	/* The number of columns required: */
3695
	if (pb_open_tab->ot_is_modify) {
3696
		pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
3697
		/* {START-STAT-HACK} previously position of start statement hack,
3698
		 * previous comment to code below: */
3699
		/* Start a statement based transaction as soon
3700
		 * as a read is done for a modify type statement!
3701
		 * Previously, this was done too late!
3702
		 */
3703
	}
3704
	else {
1455.3.5 by Vladimir Kolesnikov
fixed fetch field count in select (all fields are selected for now)
3705
		//pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3706
		pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
3707
3708
		/*
3709
		 * in case of queries like SELECT COUNT(*) FROM t
3710
		 * table->read_set is empty. Otoh, ot_cols_req == 0 can be treated
3711
		 * as "all columns" by some internal code (see e.g. myxt_load_row), 
3712
		 * which makes such queries very ineffective for the records with 
3713
		 * extended part. Setting column count to 1 makes sure that the 
3714
		 * extended part will not be acessed in most cases.
3715
		 */
3716
3717
		if (pb_open_tab->ot_cols_req == 0)
3718
			pb_open_tab->ot_cols_req = 1;
3719
	}
3720
3721
	ASSERT_NS(pb_ex_in_use);
3722
	if (scan) {
3723
		if (!xt_tab_seq_init(pb_open_tab))
3724
			err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3725
	}
3726
	else
3727
		xt_tab_seq_reset(pb_open_tab);
3728
3729
	xt_xlog_check_long_writer(thread);
3730
3731
	return err;
3732
}
3733
3734
int ha_pbxt::rnd_end()
3735
{
3736
	XT_TRACE_METHOD();
3737
3738
	/*
3739
	 * make permanent the lock for the last scanned row
3740
	 */
3741
	XTThreadPtr thread = pb_open_tab->ot_thread;
3742
	if (pb_open_tab)
3743
		pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3744
3745
	xt_xlog_check_long_writer(thread);
3746
3747
	xt_tab_seq_exit(pb_open_tab);
3748
	XT_RETURN(0);
3749
}
3750
3751
/*
3752
 * This is called for each row of the table scan. When you run out of records
3753
 * you should return HA_ERR_END_OF_FILE. Fill buff up with the row information.
3754
 * The Field structure for the table is the key to getting data into buf
3755
 * in a manner that will allow the server to understand it.
3756
 *
3757
 * Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3758
 * and sql_update.cc.
3759
 */
3760
int ha_pbxt::rnd_next(byte *buf)
3761
{
3762
	int		err = 0;
3763
	xtBool	eof;
3764
3765
	XT_TRACE_METHOD();
3766
	ASSERT_NS(pb_ex_in_use);
3767
	//statistic_increment(ha_read_rnd_next_count, &LOCK_status);
3768
	xt_xlog_check_long_writer(pb_open_tab->ot_thread);
3769
3770
	if (!xt_tab_seq_next(pb_open_tab, (xtWord1 *) buf, &eof))
3771
		err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3772
	else if (eof)
3773
		err = HA_ERR_END_OF_FILE;
3774
3775
	if (err)
3776
		table->status = STATUS_NOT_FOUND;
3777
	else {
3778
		pb_open_tab->ot_thread->st_statistics.st_row_select++;
3779
		table->status = 0;
3780
	}
3781
	XT_RETURN(err);
3782
}
3783
3784
/*
3785
 * position() is called after each call to rnd_next() if the data needs
3786
 * to be ordered. You can do something like the following to store
3787
 * the position:
3788
 * ha_store_ptr(ref, ref_length, current_position);
3789
 *
3790
 * The server uses ref to store data. ref_length in the above case is
3791
 * the size needed to store current_position. ref is just a byte array
3792
 * that the server will maintain. If you are using offsets to mark rows, then
3793
 * current_position should be the offset. If it is a primary key like in
3794
 * BDB, then it needs to be a primary key.
3795
 *
3796
 * Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
3797
 */
3798
void ha_pbxt::position(const byte *XT_UNUSED(record))
3799
{
3800
	XT_TRACE_METHOD();
3801
	ASSERT_NS(pb_ex_in_use);
3802
	/*
3803
	 * I changed this from using little endian to big endian.
3804
	 *
3805
	 * The reason is because sometime the pointer are sorted.
3806
	 * When they are are sorted a binary compare is used.
3807
	 * A binary compare sorts big endian values correctly!
3808
	 *
3809
	 * Take the followin example:
3810
	 *
3811
	 * create table t1 (a int, b text);
3812
	 * insert into t1 values (1, 'aa'), (1, 'bb'), (1, 'cc');
3813
	 * select group_concat(b) from t1 group by a;
3814
	 *
3815
	 * With little endian pointers the result is:
3816
	 * aa,bb,cc
3817
	 *
3818
	 * With big-endian pointer the result is:
3819
	 * aa,cc,bb
3820
	 *
3821
	 */
3822
	(void) ASSERT_NS(XT_RECORD_OFFS_SIZE == 4);
3823
	mi_int4store((xtWord1 *) ref, pb_open_tab->ot_curr_rec_id);
3824
	XT_RETURN_VOID;
3825
}
3826
3827
/*
3828
 * Given the #ROWID retrieve the record.
3829
 *
3830
 * Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc.
3831
 */
3832
int ha_pbxt::rnd_pos(byte * buf, byte *pos)
3833
{
3834
	int err = 0;
3835
3836
	XT_TRACE_METHOD();
3837
	ASSERT_NS(pb_ex_in_use);
3838
	//statistic_increment(ha_read_rnd_count, &LOCK_status);
3839
	XT_PRINT1(pb_open_tab->ot_thread, "rnd_pos (%s)\n", pb_share->sh_table_path->ps_path);
3840
3841
	pb_open_tab->ot_curr_rec_id = mi_uint4korr((xtWord1 *) pos);
3842
	switch (xt_tab_dirty_read_record(pb_open_tab, (xtWord1 *) buf)) {
3843
		case FALSE:
3844
			err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3845
			break;
3846
		default:
3847
			break;
3848
	}		
3849
3850
	if (err)
3851
		table->status = STATUS_NOT_FOUND;
3852
	else {
3853
		pb_open_tab->ot_thread->st_statistics.st_row_select++;
3854
		table->status = 0;
3855
	}
3856
	XT_RETURN(err);
3857
}
3858
3859
/*
3860
 * -----------------------------------------------------------------------
3861
 * INFO METHODS
3862
 */
3863
 
3864
/*
3865
	::info() is used to return information to the optimizer.
3866
	Currently this table handler doesn't implement most of the fields
3867
	really needed. SHOW also makes use of this data
3868
	Another note, you will probably want to have the following in your
3869
	code:
3870
	if (records < 2)
3871
		records = 2;
3872
	The reason is that the server will optimize for cases of only a single
3873
	record. If in a table scan you don't know the number of records
3874
	it will probably be better to set records to two so you can return
3875
	as many records as you need.
3876
	Along with records a few more variables you may wish to set are:
3877
		records
3878
		deleted
3879
		data_file_length
3880
		index_file_length
3881
		delete_length
3882
		check_time
3883
	Take a look at the public variables in handler.h for more information.
3884
3885
	Called in:
3886
		filesort.cc
3887
		ha_heap.cc
3888
		item_sum.cc
3889
		opt_sum.cc
3890
		sql_delete.cc
3891
		sql_delete.cc
3892
		sql_derived.cc
3893
		sql_select.cc
3894
		sql_select.cc
3895
		sql_select.cc
3896
		sql_select.cc
3897
		sql_select.cc
3898
		sql_show.cc
3899
		sql_show.cc
3900
		sql_show.cc
3901
		sql_show.cc
3902
		sql_table.cc
3903
		sql_union.cc
3904
		sql_update.cc
3905
3906
*/
3907
#if MYSQL_VERSION_ID < 50114
3908
void ha_pbxt::info(uint flag)
3909
#else
3910
int ha_pbxt::info(uint flag)
3911
#endif
3912
{
3913
	XTOpenTablePtr	ot;
3914
	int				in_use;
3915
3916
	XT_TRACE_METHOD();
3917
	
3918
	if (!(in_use = pb_ex_in_use)) {
3919
		pb_ex_in_use = 1;
3920
		if (pb_share && pb_share->sh_table_lock) {
3921
			/* If some thread has an exclusive lock, then
3922
			 * we wait for the lock to be removed:
3923
			 */
3924
#if MYSQL_VERSION_ID < 50114
3925
			ha_wait_for_shared_use(this, pb_share);
3926
			pb_ex_in_use = 1;
3927
#else
3928
			if (!ha_wait_for_shared_use(this, pb_share))
3929
				return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3930
#endif
3931
		}
3932
	}
3933
3934
	if ((ot = pb_open_tab)) {
3935
		if (flag & HA_STATUS_VARIABLE) {
3936
			/* {FREE-ROWS-BAD}
3937
			 * Free row count is not reliable, so ignore it.
3938
			 * The problem is if tab_row_fnum > tab_row_eof_id - 1 then
3939
			 * we have a very bad result.
3940
			 *
3941
			 * If stats.records+EXTRA_RECORDS == 0 as returned by 
3942
			 * estimate_rows_upper_bound(), then filesort will crash here:
3943
			 *
3944
			 * make_sortkey(param,sort_keys[idx++],ref_pos);
3945
			 * 
3946
			 * #0	0x000bf69c in Field_long::sort_string at field.cc:3766
3947
			 * #1	0x0022e1f1 in make_sortkey at filesort.cc:769
3948
			 * #2	0x0022f1cf in find_all_keys at filesort.cc:619
3949
			 * #3	0x00230eec in filesort at filesort.cc:243
3950
			 * #4	0x001b9d89 in mysql_update at sql_update.cc:415
3951
			 * #5	0x0010db12 in mysql_execute_command at sql_parse.cc:2959
3952
			 * #6	0x0011480d in mysql_parse at sql_parse.cc:5787
3953
			 * #7	0x00115afb in dispatch_command at sql_parse.cc:1200
3954
			 * #8	0x00116de2 in do_command at sql_parse.cc:857
3955
			 * #9	0x00101ee4 in handle_one_connection at sql_connect.cc:1115
3956
			 *
3957
			 * The problem is that sort_keys is allocated to handle just 1 vector.
3958
			 * Sorting one vector crashes. Although I could not find a check for
3959
			 * the actual number of vectors. But it must assume that it has at
3960
			 * least EXTRA_RECORDS vectors.
3961
			 */
3962
			stats.deleted = /* ot->ot_table->tab_row_fnum */ 0;
3963
			stats.records = (ha_rows) (ot->ot_table->tab_row_eof_id - 1 /* - stats.deleted */);
3964
			stats.data_file_length = xt_rec_id_to_rec_offset(ot->ot_table, ot->ot_table->tab_rec_eof_id);
3965
			stats.index_file_length = xt_ind_node_to_offset(ot->ot_table, ot->ot_table->tab_ind_eof);
3966
			stats.delete_length = ot->ot_table->tab_rec_fnum * ot->ot_rec_size;
3967
			//check_time = info.check_time;
3968
			stats.mean_rec_length = (ulong) ot->ot_rec_size;
3969
		}
3970
3971
		if (flag & HA_STATUS_CONST) {
3972
			ha_rows		rec_per_key;
3973
			XTIndexPtr	ind;
3974
			TABLE_SHARE	*share= TS(table);
3975
3976
			stats.max_data_file_length = 0x00FFFFFF;
3977
			stats.max_index_file_length = 0x00FFFFFF;
3978
			//stats.create_time = info.create_time;
3979
			ref_length = XT_RECORD_OFFS_SIZE;
3980
			//share->db_options_in_use = info.options;
3981
			stats.block_size = XT_INDEX_PAGE_SIZE;
3982
3983
#ifdef DRIZZLED
3984
			if (share->tmp_table == message::Table::STANDARD)
3985
#else
3986
			if (share->tmp_table == NO_TMP_TABLE)
3987
#endif
3988
#ifdef DRIZZLED
3989
#define WHICH_MUTEX			mutex
3990
#elif MYSQL_VERSION_ID >= 50404
3991
#define WHICH_MUTEX			LOCK_ha_data
3992
#else
3993
			if (share->tmp_table == NO_TMP_TABLE)
3994
#define WHICH_MUTEX			mutex
3995
#endif
3996
3997
#ifdef SAFE_MUTEX
3998
3999
#if MYSQL_VERSION_ID < 50404
4000
#if MYSQL_VERSION_ID < 50123
4001
				safe_mutex_lock(&share->mutex,__FILE__,__LINE__);
4002
#else
4003
				safe_mutex_lock(&share->mutex,0,__FILE__,__LINE__);
4004
#endif
4005
#else
4006
				safe_mutex_lock(&share->WHICH_MUTEX,0,__FILE__,__LINE__);
4007
#endif
4008
4009
#else // SAFE_MUTEX
4010
4011
#ifdef MY_PTHREAD_FASTMUTEX
4012
				my_pthread_fastmutex_lock(&share->WHICH_MUTEX);
4013
#else
4014
				pthread_mutex_lock(&share->WHICH_MUTEX);
4015
#endif
4016
4017
#endif // SAFE_MUTEX
4018
#ifdef DRIZZLED
4019
			set_prefix(share->keys_in_use, share->keys);
4020
			share->keys_for_keyread&= share->keys_in_use;
4021
#else
4022
			share->keys_in_use.set_prefix(share->keys);
4023
			//share->keys_in_use.intersect_extended(info.key_map);
4024
			share->keys_for_keyread.intersect(share->keys_in_use);
4025
			//share->db_record_offset = info.record_offset;
4026
#endif
4027
			for (u_int i = 0; i < share->keys; i++) {
4028
				ind = pb_share->sh_dic_keys[i];
4029
4030
				rec_per_key = 0;
4031
				if (ind->mi_seg_count == 1 && (ind->mi_flags & HA_NOSAME))
4032
					rec_per_key = 1;
4033
				else {
4034
					rec_per_key = 1;	
4035
				}
4036
				for (u_int j = 0; j < table->key_info[i].key_parts; j++)
4037
	 				table->key_info[i].rec_per_key[j] = (ulong) rec_per_key;
4038
			}
4039
#ifdef DRIZZLED
4040
			if (share->tmp_table == message::Table::STANDARD)
4041
#else
4042
			if (share->tmp_table == NO_TMP_TABLE)
4043
#endif
4044
#ifdef SAFE_MUTEX
4045
				safe_mutex_unlock(&share->WHICH_MUTEX,__FILE__,__LINE__);
4046
#else
4047
#ifdef MY_PTHREAD_FASTMUTEX
4048
				pthread_mutex_unlock(&share->WHICH_MUTEX.mutex);
4049
#else
4050
				pthread_mutex_unlock(&share->WHICH_MUTEX);
4051
#endif
4052
#endif
4053
	  		/*
4054
			 Set data_file_name and index_file_name to point at the symlink value
4055
			 if table is symlinked (Ie;  Real name is not same as generated name)
4056
	   		*/
4057
	   		/*
4058
			data_file_name = index_file_name = 0;
4059
			fn_format(name_buff, file->filename, "", MI_NAME_DEXT, 2);
4060
			if (strcmp(name_buff, info.data_file_name))
4061
				data_file_name = info.data_file_name;
4062
			strmov(fn_ext(name_buff), MI_NAME_IEXT);
4063
			if (strcmp(name_buff, info.index_file_name))
4064
				index_file_name = info.index_file_name;
4065
			*/
4066
		}
4067
4068
 		if (flag & HA_STATUS_ERRKEY)
4069
	 		errkey = ot->ot_err_index_no;
4070
4071
		/* {PRE-INC}
4072
		 * We assume they want the next value to be returned!
4073
		 *
4074
		 * At least, this is what works for the following code:
4075
		 *
4076
		 * create table t1 (a int auto_increment primary key)
4077
		 * auto_increment=100
4078
		 * engine=pbxt
4079
		 * partition by list (a)
4080
		 * (partition p0 values in (1, 98,99, 100, 101));
4081
		 * create index inx on t1 (a);
4082
		 * insert into t1 values (null);
4083
		 * select * from t1;
4084
		 */
4085
		if (flag & HA_STATUS_AUTO)
4086
			stats.auto_increment_value = (ulonglong) ot->ot_table->tab_auto_inc+1;
4087
	}
4088
	else
4089
		errkey = (uint) -1;
4090
4091
	if (!in_use) {
4092
		pb_ex_in_use = 0;
4093
		if (pb_share) {
4094
			/* Someone may be waiting for me to complete: */
4095
			if (pb_share->sh_table_lock)
4096
				xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4097
		}
4098
	}
4099
#if MYSQL_VERSION_ID < 50114
4100
	XT_RETURN_VOID;
4101
#else
4102
	XT_RETURN(0);
4103
#endif
4104
}
4105
4106
/*
4107
 * extra() is called whenever the server wishes to send a hint to
4108
 * the storage engine. The myisam engine implements the most hints.
4109
 * ha_innodb.cc has the most exhaustive list of these hints.
4110
 */
4111
int ha_pbxt::extra(enum ha_extra_function operation)
4112
{
4113
	int err = 0;
4114
4115
	XT_PRINT2(xt_get_self(), "ha_pbxt::extra (%s) operation=%d\n", pb_share->sh_table_path->ps_path, operation);
4116
4117
	switch (operation) {
4118
		case HA_EXTRA_RESET_STATE:
4119
			pb_key_read = FALSE;
4120
			pb_ignore_dup_key = 0;
4121
			/* As far as I can tell, this function is called for
4122
			 * every table at the end of a statement.
4123
			 *
4124
			 * So, during a LOCK TABLES ... UNLOCK TABLES, I use
4125
			 * this to find the end of a statement.
4126
			 * start_stmt() indicates the start of a statement,
4127
			 * and is also called once for each table in the
4128
			 * statement.
4129
			 *
4130
			 * So the statement boundary is indicated by 
4131
			 * self->st_stat_count == 0
4132
			 *
4133
			 * GOTCHA: I cannot end the transaction here!
4134
			 * I must end it in start_stmt().
4135
			 * The reason is because there are situations
4136
			 * where this would end a transaction that
4137
			 * was begin by external_lock().
4138
			 *
4139
			 * An example of this is when a function
4140
			 * is called when doing CREATE TABLE SELECT.
4141
			 */
4142
			if (pb_in_stat) {
4143
				/* NOTE: pb_in_stat is just used to avoid getting
4144
				 * self, if it is not necessary!!
4145
				 */
4146
				XTThreadPtr self;
4147
4148
				pb_in_stat = FALSE;
4149
4150
				if (!(self = ha_set_current_thread(pb_mysql_thd, &err)))
4151
					return xt_ha_pbxt_to_mysql_error(err);
4152
4153
				if (self->st_stat_count > 0) {
4154
					self->st_stat_count--;
4155
					if (self->st_stat_count == 0)
4156
						self->st_stat_ended = TRUE;
4157
				}
4158
4159
				/* This is the end of a statement, I can turn any locks into perminant locks now: */
4160
				if (pb_open_tab)
4161
					pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4162
			}
4163
			if (pb_open_tab)
4164
				pb_open_tab->ot_for_update = 0;
4165
			break;
4166
		case HA_EXTRA_KEYREAD:
4167
			/* This means we so not need to read the entire record. */
4168
			pb_key_read = TRUE;
4169
			break;
4170
		case HA_EXTRA_NO_KEYREAD:
4171
			pb_key_read = FALSE;
4172
			break;
4173
		case HA_EXTRA_IGNORE_DUP_KEY:
4174
			/* NOTE!!! Calls to extra(HA_EXTRA_IGNORE_DUP_KEY) can be nested!
4175
			 * In fact, the calls are from different threads, so
4176
			 * strictly speaking I should protect this variable!!
4177
			 * Here is the sequence that produces the duplicate call:
4178
			 *
4179
			 * drop table if exists t1;
4180
			 * CREATE TABLE t1 (x int not null, y int, primary key (x)) engine=pbxt;
4181
			 * insert into t1 values (1, 3), (4, 1);
4182
			 * replace DELAYED into t1 (x, y) VALUES (4, 2);
4183
			 * select * from t1 order by x;
4184
			 *
4185
			 */
4186
			pb_ignore_dup_key++;
4187
			break;
4188
		case HA_EXTRA_NO_IGNORE_DUP_KEY:
4189
			pb_ignore_dup_key--;
4190
			break;
4191
		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
4192
			/* MySQL needs all fields */
4193
			pb_key_read = FALSE;
4194
			break;
4195
		default:
4196
			break;
4197
	}
4198
4199
	return err;
4200
}
4201
4202
4203
/*
4204
 * Deprecated and likely to be removed in the future. Storage engines normally
4205
 * just make a call like:
4206
 * ha_pbxt::extra(HA_EXTRA_RESET);
4207
 * to handle it.
4208
 */
4209
int ha_pbxt::reset(void)
4210
{
4211
	XT_TRACE_METHOD();
4212
	extra(HA_EXTRA_RESET_STATE);
4213
	XT_RETURN(0);
4214
}
4215
4216
void ha_pbxt::unlock_row()
4217
{
4218
	XT_TRACE_METHOD();
4219
	if (pb_open_tab)
4220
		pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, FALSE);
4221
}
4222
4223
/*
4224
 * Used to delete all rows in a table. Both for cases of truncate and
4225
 * for cases where the optimizer realizes that all rows will be
4226
 * removed as a result of a SQL statement.
4227
 *
4228
 * Called from item_sum.cc by Item_func_group_concat::clear(),
4229
 * Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
4230
 * Called from sql_delete.cc by mysql_delete().
4231
 * Called from sql_select.cc by JOIN::reinit().
4232
 * Called from sql_union.cc by st_select_lex_unit::exec().
4233
 */
4234
int ha_pbxt::delete_all_rows()
4235
{
4236
	THD				*thd = current_thd;
4237
	int				err = 0;
4238
	XTThreadPtr		self;
4239
	XTDDTable		*tab_def = NULL;
4240
	char			path[PATH_MAX];
4241
4242
	XT_TRACE_METHOD();
4243
4244
	if (thd_sql_command(thd) != SQLCOM_TRUNCATE) {
4245
		/* Just like InnoDB we only handle TRUNCATE TABLE
4246
		 * by recreating the table.
4247
		 * DELETE FROM t must be handled by deleting
4248
		 * each row because it may be part of a transaction,
4249
		 * and there may be foreign key actions.
4250
		 */
4251
#ifdef DRIZZLED
4252
		XT_RETURN (errno = HA_ERR_WRONG_COMMAND);
4253
#else
4254
		XT_RETURN (my_errno = HA_ERR_WRONG_COMMAND);
4255
#endif
4256
	}
4257
4258
	if (!(self = ha_set_current_thread(thd, &err)))
4259
		return xt_ha_pbxt_to_mysql_error(err);
4260
4261
	try_(a) {
4262
		XTDictionaryRec dic;
4263
4264
		memset(&dic, 0, sizeof(dic));
4265
4266
		dic = pb_share->sh_table->tab_dic;
4267
		xt_strcpy(PATH_MAX, path, pb_share->sh_table->tab_name->ps_path);
4268
4269
		if ((tab_def = dic.dic_table))
4270
			tab_def->reference();
4271
4272
		if (!(thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)))
4273
			tab_def->deleteAllRows(self);
4274
4275
		/* We should have a table lock! */
4276
		//ASSERT(pb_lock_table);
4277
		if (!pb_table_locked) {
4278
			ha_aquire_exclusive_use(self, pb_share, this);
4279
			pushr_(ha_release_exclusive_use, pb_share);
4280
		}
4281
		ha_close_open_tables(self, pb_share, NULL);
4282
4283
		/* This is required in the case of delete_all_rows, because we must
4284
		 * ensure that the handlers no longer reference the old
4285
		 * table, so that it will not be used again. The table
4286
		 * must be re-openned, because the ID has changed!
4287
		 *
4288
		 * 0.9.86+ Must check if this is still necessary.
4289
		 *
4290
		 * the ha_close_share(self, pb_share) call was moved from above
4291
		 * (before tab_def = dic.dic_table), because of a crash.
4292
		 * Test case:
4293
		 *
4294
		 * set storage_engine = pbxt;
4295
		 * create table t1 (s1 int primary key);
4296
		 * insert into t1 values (1);
4297
		 * create table t2 (s1 int, foreign key (s1) references t1 (s1));
4298
		 * insert into t2 values (1); 
4299
		 * truncate table t1; -- this should fail because of FK constraint
4300
		 * alter table t1 engine = myisam; -- this caused crash
4301
		 *
4302
		 */
4303
		ha_close_share(self, pb_share);
4304
4305
		/* MySQL documentation requires us to reset auto increment value to 1
4306
		 * on truncate even if the table was created with a different value. 
4307
		 * This is also consistent with other engines.
4308
		 */
4309
		dic.dic_min_auto_inc = 1;
4310
4311
		xt_create_table(self, (XTPathStrPtr) path, &dic);
4312
		if (!pb_table_locked)
4313
			freer_(); // ha_release_exclusive_use(pb_share)
4314
	}
4315
	catch_(a) {
4316
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4317
	}
4318
	cont_(a);
4319
4320
	if (tab_def)
4321
		tab_def->release(self);
4322
4323
	XT_RETURN(err);
4324
}
4325
4326
/*
4327
 * TODO: Implement!
4328
 * Assuming a key (a,b,c)
4329
 * 
4330
 * rec_per_key[0] = SELECT COUNT(*)/COUNT(DISTINCT a) FROM t;
4331
 * rec_per_key[1] = SELECT COUNT(*)/COUNT(DISTINCT a,b) FROM t;
4332
 * rec_per_key[2] = SELECT COUNT(*)/COUNT(DISTINCT a,b,c) FROM t;
4333
 *
4334
 * After this is implemented, the selectivity can serve as
4335
 * a quick estimate of records_in_range().
4336
 *
4337
 * After you have done this, you need to redo the index_merge*
4338
 * tests. Restore the standard result to check if we
4339
 * now agree with the MyISAM strategy.
4340
 * 
4341
 */
1468.1.2 by Monty Taylor
Fixed the compile for Sun Studio.
4342
#ifdef DRIZZLED
4343
int ha_pbxt::analyze(THD *thd)
4344
#else
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
4345
int ha_pbxt::analyze(THD *thd, HA_CHECK_OPT *XT_UNUSED(check_opt))
1468.1.2 by Monty Taylor
Fixed the compile for Sun Studio.
4346
#endif
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
4347
{
4348
	int				err = 0;
4349
	XTDatabaseHPtr	db;
4350
	xtXactID		my_xn_id;
4351
	xtXactID		clean_xn_id = 0;
4352
	uint			cnt = 10;
4353
4354
	XT_TRACE_METHOD();
4355
4356
	if (!pb_open_tab) {
4357
		if ((err = reopen()))
4358
			XT_RETURN(err);
4359
	}
4360
4361
	/* Wait until the sweeper is no longer busy!
4362
	 * If you want an accurate count(*) value, then call
4363
	 * ANALYZE TABLE first. This function waits until the
4364
	 * sweeper has completed.
4365
	 */
4366
	db = pb_open_tab->ot_table->tab_db;
4367
	
4368
	/*
4369
	 * Wait until everything is cleaned up before this transaction.
4370
	 * But this will only work if the we quit out transaction!
4371
	 *
4372
	 * GOTCHA: When a PBXT table is partitioned, then analyze() is
4373
	 * called for each component. The first calls xt_xn_commit().
4374
	 * All following calls have no transaction!:
4375
	 *
4376
	 * CREATE TABLE t1 (a int)
4377
	 * PARTITION BY LIST (a)
4378
	 * (PARTITION x1 VALUES IN (10), PARTITION x2 VALUES IN (20));
4379
	 * 
4380
	 * analyze table t1;
4381
	 * 
4382
	 */
4383
	if (pb_open_tab->ot_thread && pb_open_tab->ot_thread->st_xact_data) {
4384
		my_xn_id = pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id;
4385
		XT_PRINT0(xt_get_self(), "xt_xn_commit\n");
4386
		xt_xn_commit(pb_open_tab->ot_thread);
4387
	}
4388
	else
4389
		my_xn_id = db->db_xn_to_clean_id;
4390
4391
	while ((!db->db_sw_idle || xt_xn_is_before(db->db_xn_to_clean_id, my_xn_id)) && !thd_killed(thd)) {
4392
		xt_busy_wait();
4393
4394
		/*
4395
		 * It is possible that the sweeper gets stuck because
4396
		 * it has no dictionary information!
4397
		 * As in the example below.
4398
		 *
4399
		 * create table t4 (
4400
		 *   pk_col int auto_increment primary key, a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(64) default ' '
4401
		 * ) engine=pbxt;
4402
		 *
4403
		 * insert into t4 (a1, a2, b, c, d, dummy) select * from t1;
4404
		 * 
4405
		 * create index idx12672_0 on t4 (a1);
4406
		 * create index idx12672_1 on t4 (a1,a2,b,c);
4407
		 * create index idx12672_2 on t4 (a1,a2,b);
4408
		 * analyze table t1;
4409
		 */
4410
		if (db->db_sw_idle) {
4411
			/* This will make sure we don't wait forever: */
4412
			if (clean_xn_id != db->db_xn_to_clean_id) {
4413
				clean_xn_id = db->db_xn_to_clean_id;
4414
				cnt = 10;
4415
			}
4416
			else {
4417
				cnt--;
4418
				if (!cnt)
4419
					break;
4420
			}
4421
			xt_wakeup_sweeper(db);
4422
		}
4423
	}
4424
4425
	XT_RETURN(err);
4426
}
4427
4428
#ifndef DRIZZLED
4429
int ha_pbxt::repair(THD *XT_UNUSED(thd), HA_CHECK_OPT *XT_UNUSED(check_opt))
4430
{
4431
	return(HA_ADMIN_TRY_ALTER);
4432
}
4433
4434
/*
4435
 * This is mapped to "ALTER TABLE tablename TYPE=PBXT", which rebuilds
4436
 * the table in MySQL.
4437
 */
4438
int ha_pbxt::optimize(THD *XT_UNUSED(thd), HA_CHECK_OPT *XT_UNUSED(check_opt))
4439
{
4440
	return(HA_ADMIN_TRY_ALTER);
4441
}
4442
#endif
4443
4444
#ifdef DEBUG
4445
extern int pbxt_mysql_trace_on;
4446
#endif
4447
1468.1.2 by Monty Taylor
Fixed the compile for Sun Studio.
4448
#ifdef DRIZZLED
4449
int ha_pbxt::check(THD* thd)
4450
#else
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
4451
int ha_pbxt::check(THD* thd, HA_CHECK_OPT* XT_UNUSED(check_opt))
1468.1.2 by Monty Taylor
Fixed the compile for Sun Studio.
4452
#endif
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
4453
{
4454
	int				err = 0;
4455
	XTThreadPtr		self;
4456
4457
	if (!(self = ha_set_current_thread(thd, &err)))
4458
		return xt_ha_pbxt_to_mysql_error(err);
4459
	if (self->st_lock_count)
4460
		ASSERT(self->st_xact_data);
4461
4462
	if (!pb_table_locked) {
4463
		ha_aquire_exclusive_use(self, pb_share, this);
4464
		pushr_(ha_release_exclusive_use, pb_share);
4465
	}
4466
4467
#ifdef CHECK_TABLE_LOADS
4468
	xt_tab_load_table(self, pb_open_tab);
4469
#endif
4470
	xt_check_table(self, pb_open_tab);
4471
4472
	if (!pb_table_locked)
4473
		freer_(); // ha_release_exclusive_use(pb_share)
4474
4475
	//pbxt_mysql_trace_on = TRUE;
4476
	return 0;
4477
}
4478
4479
/*
4480
 * This function is called:
4481
 * For each table in LOCK TABLES,
4482
 * OR
4483
 * For each table in a statement.
4484
 *
4485
 * It is called with F_UNLCK:
4486
 * in UNLOCK TABLES
4487
 * OR
4488
 * at the end of a statement.
4489
 *
4490
 */
4491
xtPublic int ha_pbxt::external_lock(THD *thd, int lock_type)
4492
{
1455.3.12 by Paul McCullagh
Some compilers complained that: variable 'err' might be clobbered by 'longjmp' or 'vfork'
4493
	/* Some compiler complain that: variable 'err' might be clobbered by 'longjmp' or 'vfork' */
4494
	volatile int				err = 0;
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
4495
	XTThreadPtr		self;
4496
	
1455.3.12 by Paul McCullagh
Some compilers complained that: variable 'err' might be clobbered by 'longjmp' or 'vfork'
4497
	if (!(self = ha_set_current_thread(thd, (int *) &err)))
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
4498
		return xt_ha_pbxt_to_mysql_error(err);
4499
4500
	/* F_UNLCK is set when this function is called at end
4501
	 * of statement or UNLOCK TABLES
4502
	 */
4503
	if (lock_type == F_UNLCK) {
4504
		/* This is not TRUE if external_lock() FAILED!
4505
		 * Can we rely on external_unlock being called when
4506
		 * external_lock() fails? Currently yes, but it does
4507
		 * not make sense!
4508
		ASSERT_NS(pb_ex_in_use);
4509
		*/
4510
4511
		XT_PRINT1(self, "EXTERNAL_LOCK (%s) lock_type=UNLOCK\n", pb_share->sh_table_path->ps_path);
4512
4513
		/* Make any temporary locks on this table permanent.
4514
		 *
4515
		 * This is required here because of the following example:
4516
		 * create table t1 (a int NOT NULL, b int, primary key (a));
4517
		 * create table t2 (a int NOT NULL, b int, primary key (a));
4518
		 * insert into t1 values (0, 10),(1, 11),(2, 12);
4519
		 * insert into t2 values (1, 21),(2, 22),(3, 23);
4520
		 * update t1 set b= (select b from t2 where t1.a = t2.a);
4521
		 * update t1 set b= (select b from t2 where t1.a = t2.a);
4522
		 * select * from t1;
4523
		 * drop table t1, t2;
4524
		 *
4525
		 */
4526
4527
		/* GOTCHA! It's weird, but, if this function returns an error
4528
		 * on lock, then UNLOCK is called?!
4529
		 * This should not be done, because if lock fails, it should be
4530
		 * assumed that no UNLOCK is required.
4531
		 * Basically, I have to assume that some code will presume this,
4532
		 * although the function lock_external() calls unlock, even
4533
		 * when lock fails.
4534
		 * The result is, that my lock count can go wrong. So I could
4535
		 * change the lock method, and increment the lock count, even
4536
		 * if it fails. However, the consequences are more serious,
4537
		 * if some code decides not to call UNLOCK after lock fails.
4538
		 * The result is that I would have a permanent too high lock,
4539
		 * count and nothing will work.
4540
		 * So instead, I handle the fact that I might too many unlocks
4541
		 * here.
4542
		 */
4543
		if (self->st_lock_count > 0)
4544
			self->st_lock_count--;
4545
		if (!self->st_lock_count) {
4546
			/* This section handles "auto-commit"... */
4547
4548
#ifdef XT_IMPLEMENT_NO_ACTION
4549
			/* {NO-ACTION-BUG}
4550
			 * This is required here because it marks the end of a statement.
4551
			 * If we are in a non-auto-commit mode, then we cannot
4552
			 * wait for st_is_update to be set by the begining of a new transaction.
4553
			 */
4554
			if (self->st_restrict_list.bl_count) {
4555
				if (!xt_tab_restrict_rows(&self->st_restrict_list, self))
4556
					err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4557
			}
4558
#endif
4559
4560
			if (self->st_xact_data) {
4561
				if (self->st_auto_commit) {
4562
					/*
4563
					 * Normally I could assume that if the transaction
4564
					 * has not been aborted by now, then it should be committed.
4565
					 *
4566
					 * Unfortunately, this is not the case!
4567
					 *
4568
					 * create table t1 (id int primary key) engine = pbxt;
4569
					 * create table t2 (id int) engine = pbxt;
4570
					 * 
4571
					 * insert into t1 values ( 1 ) ;
4572
					 * insert into t1 values ( 2 ) ;
4573
					 * insert into t2 values ( 1 ) ;
4574
					 * insert into t2 values ( 2 ) ;
4575
					 * 
4576
					 * --This statement is returns an error calls ha_autocommit_or_rollback():
4577
					 * update t1 set t1.id=1 where t1.id=2;
4578
					 * 
4579
					 * --This statement is returns no error and calls ha_autocommit_or_rollback():
4580
					 * update t1,t2 set t1.id=3, t2.id=3 where t1.id=2 and t2.id = t1.id;
4581
					 * 
4582
					 * --But this statement returns an error and does not call ha_autocommit_or_rollback():
4583
					 * update t1,t2 set t1.id=1, t2.id=1 where t1.id=3 and t2.id = t1.id;
4584
					 * 
4585
					 * The result is, I cannot rely on ha_autocommit_or_rollback() being called :(
4586
					 * So I have to abort myself here...
4587
					 */
4588
					if (pb_open_tab)
4589
						pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4590
4591
					if (self->st_abort_trans) {
4592
						XT_PRINT0(self, "xt_xn_rollback in unlock\n");
4593
						if (!xt_xn_rollback(self))
4594
							err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4595
					}
4596
					else {
4597
						XT_PRINT0(self, "xt_xn_commit in unlock\n");
4598
						if (!xt_xn_commit(self))
4599
							err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4600
					}
4601
				}
4602
			}
4603
4604
			/* If the previous statement was "for update", then set the visibilty
4605
			 * so that non- for update SELECTs will see what the for update select
4606
			 * (or update statement) just saw.
4607
			 */
4608
			if (pb_open_tab) {
4609
				if (pb_open_tab->ot_for_update) {
4610
					self->st_visible_time = self->st_database->db_xn_end_time;
4611
					pb_open_tab->ot_for_update = 0;
4612
				}
4613
4614
				if (pb_share->sh_recalc_selectivity) {
4615
					/* {FREE-ROWS-BAD} */
4616
					if ((pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) >= 200) {
4617
						/* [**] */
4618
						pb_share->sh_recalc_selectivity = FALSE;
4619
						xt_ind_set_index_selectivity(pb_open_tab, self);
4620
						/* {FREE-ROWS-BAD} */
4621
						pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
4622
					}
4623
				}
4624
			}
4625
4626
			if (self->st_stat_modify)
4627
				self->st_statistics.st_stat_write++;
4628
			else
4629
				self->st_statistics.st_stat_read++;
4630
			self->st_stat_modify = FALSE;
4631
			self->st_import_stat = XT_IMP_NO_IMPORT;
4632
		}
4633
4634
		if (pb_table_locked) {
4635
			pb_table_locked--;
4636
			if (!pb_table_locked)
4637
				ha_release_exclusive_use(self, pb_share);
4638
		}
4639
4640
		/* No longer in use: */
4641
		pb_ex_in_use = 0;
4642
		/* Someone may be waiting for me to complete: */
4643
		if (pb_share->sh_table_lock)
4644
			xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4645
	}
4646
	else {
4647
		XT_PRINT2(self, "ha_pbxt::EXTERNAL_LOCK (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, lock_type);
4648
		
4649
		if (pb_lock_table) {
4650
			pb_ex_in_use = 1;
4651
			try_(a) {
4652
				if (!pb_table_locked)
4653
					ha_aquire_exclusive_use(self, pb_share, this);
4654
				pb_table_locked++;
4655
4656
				ha_close_open_tables(self, pb_share, this);
4657
4658
				if (!pb_share->sh_table) {
4659
					xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4660
4661
					ha_open_share(self, pb_share);
4662
				}
4663
			}
4664
			catch_(a) {
4665
				err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4666
				pb_ex_in_use = 0;
4667
				goto complete;
4668
			}
4669
			cont_(a);
4670
		}
4671
		else {
4672
			pb_ex_in_use = 1;
4673
			if (pb_share->sh_table_lock && !pb_table_locked) {
4674
				/* If some thread has an exclusive lock, then
4675
				 * we wait for the lock to be removed:
4676
				 */
4677
				if (!ha_wait_for_shared_use(this, pb_share)) {
4678
					err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4679
					goto complete;
4680
				}
4681
			}
4682
4683
			if (!pb_open_tab) {
4684
				if ((err = reopen())) {
4685
					pb_ex_in_use = 0;
4686
					goto complete;
4687
				}
4688
			}
4689
4690
			/* Set the current thread for this open table: */
4691
			pb_open_tab->ot_thread = self;
4692
4693
			/* If this is a set, then it is in UPDATE/DELETE TABLE ...
4694
			 * or SELECT ... FOR UPDATE
4695
			 */	
4696
			pb_open_tab->ot_is_modify = FALSE;
4697
			if ((pb_open_tab->ot_for_update = (lock_type == F_WRLCK))) {
4698
				switch ((int) thd_sql_command(thd)) {
4699
					case SQLCOM_DELETE:
4700
#ifndef DRIZZLED
4701
					case SQLCOM_DELETE_MULTI:
4702
#endif
4703
						/* turn DELETE IGNORE into normal DELETE. The IGNORE option causes problems because 
4704
						 * when a record is deleted we add an xlog record which we cannot "rollback" later
4705
						 * when we find that an FK-constraint has failed. 
4706
						 */
4707
						thd->lex->ignore = false;
4708
					case SQLCOM_UPDATE:
4709
#ifndef DRIZZLED
4710
					case SQLCOM_UPDATE_MULTI:
4711
#endif
4712
					case SQLCOM_REPLACE:
4713
					case SQLCOM_REPLACE_SELECT:
4714
					case SQLCOM_INSERT:
4715
					case SQLCOM_INSERT_SELECT:
4716
						pb_open_tab->ot_is_modify = TRUE;
4717
						self->st_stat_modify = TRUE;
4718
						break;
4719
					case SQLCOM_ALTER_TABLE:
4720
					case SQLCOM_CREATE_INDEX:
4721
#ifndef DRIZZLED
4722
					case SQLCOM_REPAIR:
4723
					case SQLCOM_OPTIMIZE:
4724
#endif
4725
					case SQLCOM_DROP_INDEX:
4726
						self->st_stat_modify = TRUE;
4727
						self->st_import_stat = XT_IMP_COPY_TABLE;
4728
						pb_import_row_count = 0;
4729
						/* Do not read FOR UPDATE!
4730
						 * this avoids taking locks on the rows that are read
4731
						 * Which leads to the assertion failure:
4732
						 * int XTRowLocks::xt_make_lock_permanent(XTOpenTable*, XTRowLockList*)(lock_xt.cc:646) item
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
4733
						 * after the transaction is committed in doInsertRecord.
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
4734
						 */
4735
						pb_open_tab->ot_for_update = FALSE;
4736
						break;
4737
					case SQLCOM_LOAD:
4738
						self->st_stat_modify = TRUE;
4739
						self->st_import_stat = XT_IMP_LOAD_TABLE;
4740
						pb_import_row_count = 0;
4741
						pb_open_tab->ot_for_update = FALSE;
4742
						break;
4743
					case SQLCOM_CREATE_TABLE:
4744
					case SQLCOM_TRUNCATE:
4745
					case SQLCOM_DROP_TABLE:
4746
						self->st_stat_modify = TRUE;
4747
						break;
4748
				}
4749
			}
4750
4751
			if (pb_open_tab->ot_is_modify && pb_open_tab->ot_table->tab_dic.dic_disable_index) {
4752
				xt_tab_set_index_error(pb_open_tab->ot_table);
4753
				err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4754
				goto complete;
4755
			}
4756
		}
4757
4758
		/* Record the associated MySQL thread: */
4759
		pb_mysql_thd = thd;
4760
4761
		if (self->st_database != pb_share->sh_table->tab_db) {				
4762
			try_(b) {
4763
				/* PBXT does not permit multiple databases us one statement,
4764
				 * or in a single transaction!
4765
				 *
4766
				 * Example query:
4767
				 *
4768
				 * update mysqltest_1.t1, mysqltest_2.t2 set a=10,d=10;
4769
				 */
4770
				if (self->st_lock_count > 0)
4771
					xt_throw_xterr(XT_CONTEXT, XT_ERR_MULTIPLE_DATABASES);
4772
4773
				xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4774
			}
4775
			catch_(b) {
4776
				err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4777
				pb_ex_in_use = 0;
4778
				goto complete;
4779
			}
4780
			cont_(b);
4781
		}
4782
4783
		/* See {IS-UPDATE-STAT} nad {UPDATE-STACK} */
4784
		self->st_is_update = NULL;
4785
4786
		/* Auto begin a transaction (if one is not already running): */
4787
		if (!self->st_xact_data) {
4788
			/* Transaction mode numbers must be identical! */
4789
			(void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
4790
			(void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
4791
4792
			thd_init_xact(thd, self, true);
4793
			
4794
			if (!xt_xn_begin(self)) {
4795
				err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4796
				pb_ex_in_use = 0;
4797
				goto complete;
4798
			}
4799
			/*
4800
			 * {START-TRANS} GOTCHA: trans_register_ha() is not mentioned in the documentation.
4801
			 * It must be called to inform MySQL that we have a transaction (see start_stmt).
4802
			 *
4803
			 * Here are some tests that confirm whether things are done correctly:
4804
			 *
4805
			 * drop table if exists t1, t2;
4806
			 * create table t1 (c1 int);
4807
			 * insert t1 values (1);
4808
			 * select * from t1;
4809
			 * rename table t1 to t2;
4810
			 *
4811
			 * rename will generate an error if MySQL thinks a transaction is
4812
			 * still running.
4813
			 *
4814
			 * create table t1 (a text character set utf8, b text character set latin1);
4815
			 * insert t1 values (0x4F736E616272C3BC636B, 0x4BF66C6E);
4816
			 * select * from t1;
4817
			 * --exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/ test
4818
			 * --exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/t1.sql
4819
			 * --exec $MYSQL_IMPORT test $MYSQLTEST_VARDIR/tmp/t1.txt
4820
			 * select * from t1;
4821
			 *
4822
			 * This test forces a begin transaction in start_stmt()
4823
			 *
4824
			 * drop tables if exists t1;
4825
			 * create table t1 (c1 int);
4826
			 * lock tables t1 write;
4827
			 * insert t1 values (1);
4828
			 * insert t1 values (2);
4829
			 * unlock tables;
4830
			 *
4831
			 * The second select will return an empty result of the
4832
			 * MySQL is not informed that a transaction is running (auto-commit 
4833
			 * in external_lock comes too late)!
4834
			 *
4835
			 */
4836
#ifndef DRIZZLED
4837
			if (!self->st_auto_commit) {
4838
				trans_register_ha(thd, TRUE, pbxt_hton);
4839
				XT_PRINT0(self, "CONN START XACT - ha_pbxt::external_lock --> trans_register_ha\n");
4840
			}
4841
#endif
4842
		}
4843
4844
		/* Start a statment transaction: */
4845
		/* {START-STAT-HACK} The problem that ha_commit_trans() is not
4846
		 * called by MySQL seems to be fixed (tests confirm this).
4847
		 * Here is the previous comment when this code was execute 
4848
		 * here {START-STAT-HACK}
4849
		 *
4850
		 * GOTCHA: I have a huge problem with the transaction statement.
4851
		 * It is not ALWAYS committed (I mean ha_commit_trans() is
4852
		 * not always called - for example in SELECT).
4853
		 *
4854
		 * If I call trans_register_ha() but ha_commit_trans() is not called
4855
		 * then MySQL thinks a transaction is still running (while
4856
		 * I have committed the auto-transaction in ha_pbxt::external_lock()).
4857
		 *
4858
		 * This causes all kinds of problems, like transactions
4859
		 * are killed when they should not be.
4860
		 *
4861
		 * To prevent this, I only inform MySQL that a transaction
4862
		 * has beens started when an update is performed. I have determined that
4863
		 * ha_commit_trans() is only guarenteed to be called if an update is done.
4864
		 * --------
4865
		 *
4866
		 * So, this is the correct place to start a statement transaction.
4867
		 *
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
4868
		 * Note: if trans_register_ha() is not called before insertRecord(), then 
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
4869
		 * PBXT is not registered correctly as a modification transaction.
1491.1.2 by Jay Pipes
Cursor::write_row() -> Cursor::doInsertRecord(). Cursor::ha_write_row() -> Cursor::insertRecord()
4870
		 * (mark_trx_read_write call in insertRecord).
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
4871
		 * This leads to 2-phase commit not being called as it should when
4872
		 * binary logging is enabled.
4873
		 */
4874
#ifndef DRIZZLED
4875
		if (!pb_open_tab->ot_thread->st_stat_trans) {
4876
			trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
4877
			XT_PRINT0(pb_open_tab->ot_thread, "STAT START - ha_pbxt::external_lock --> trans_register_ha\n");
4878
			pb_open_tab->ot_thread->st_stat_trans = TRUE;
4879
		}
4880
#endif
4881
		if (lock_type == F_WRLCK || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
4882
			self->st_visible_time = self->st_database->db_xn_end_time;
4883
4884
#ifdef TRACE_STATEMENTS
4885
		if (self->st_lock_count == 0)
4886
			STAT_TRACE(self, *thd_query(thd));
4887
#endif
4888
		self->st_lock_count++;
4889
	}
4890
4891
	complete:
4892
	return err;
4893
}
4894
4895
/*
4896
 * This function is called for each table in a statement
4897
 * after LOCK TABLES has been used.
4898
 *
4899
 * Currently I only use this function to set the
4900
 * current thread of the table handle. 
4901
 *
4902
 * GOTCHA: The prototype of start_stmt() has changed
4903
 * from version 4.1 to 5.1!
4904
 */
4905
int ha_pbxt::start_stmt(THD *thd, thr_lock_type lock_type)
4906
{
4907
	int				err = 0;
4908
	XTThreadPtr		self;
4909
4910
	ASSERT_NS(pb_ex_in_use);
4911
4912
	if (!(self = ha_set_current_thread(thd, &err)))
4913
		return xt_ha_pbxt_to_mysql_error(err);
4914
4915
	XT_PRINT2(self, "ha_pbxt::start_stmt (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, (int) lock_type);
4916
4917
	if (!pb_open_tab) {
4918
		if ((err = reopen()))
4919
			goto complete;
4920
	}
4921
4922
	ASSERT_NS(pb_open_tab->ot_thread == self);
4923
	ASSERT_NS(thd == pb_mysql_thd);
4924
	ASSERT_NS(self->st_database == pb_open_tab->ot_table->tab_db);
4925
4926
	if (self->st_stat_ended) {
4927
		self->st_stat_ended = FALSE;
4928
		self->st_stat_trans = FALSE;
4929
4930
#ifdef XT_IMPLEMENT_NO_ACTION
4931
		if (self->st_restrict_list.bl_count) {
4932
			if (!xt_tab_restrict_rows(&self->st_restrict_list, self)) {
4933
				err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4934
			}
4935
		}
4936
#endif
4937
4938
		/* This section handles "auto-commit"... */
4939
		if (self->st_xact_data && self->st_auto_commit && self->st_table_trans) {
4940
			if (self->st_abort_trans) {
4941
				XT_PRINT0(self, "xt_xn_rollback in start_stmt\n");
4942
				if (!xt_xn_rollback(self))
4943
					err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4944
			}
4945
			else {
4946
				XT_PRINT0(self, "xt_xn_commit in start_stmt\n");
4947
				if (!xt_xn_commit(self))
4948
					err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4949
			}
4950
		}
4951
4952
		if (self->st_stat_modify)
4953
			self->st_statistics.st_stat_write++;
4954
		else
4955
			self->st_statistics.st_stat_read++;
4956
		self->st_stat_modify = FALSE;
4957
		self->st_import_stat = XT_IMP_NO_IMPORT;
4958
4959
		/* If the previous statement was "for update", then set the visibilty
4960
		 * so that non- for update SELECTs will see what the for update select
4961
		 * (or update statement) just saw.
4962
		 */
4963
		if (pb_open_tab->ot_for_update)
4964
			self->st_visible_time = self->st_database->db_xn_end_time;
4965
	}
4966
4967
	pb_open_tab->ot_for_update =
4968
		(lock_type != TL_READ && 
4969
		 lock_type != TL_READ_WITH_SHARED_LOCKS &&
4970
#ifndef DRIZZLED
4971
		 lock_type != TL_READ_HIGH_PRIORITY && 
4972
#endif
4973
		 lock_type != TL_READ_NO_INSERT);
4974
	pb_open_tab->ot_is_modify = FALSE;
4975
	if (pb_open_tab->ot_for_update) {
4976
		switch ((int) thd_sql_command(thd)) {
4977
			case SQLCOM_UPDATE:
4978
			case SQLCOM_DELETE:
4979
#ifndef DRIZZLED
4980
			case SQLCOM_UPDATE_MULTI:
4981
			case SQLCOM_DELETE_MULTI:
4982
#endif
4983
			case SQLCOM_REPLACE:
4984
			case SQLCOM_REPLACE_SELECT:
4985
			case SQLCOM_INSERT:
4986
			case SQLCOM_INSERT_SELECT:
4987
				pb_open_tab->ot_is_modify = TRUE;
4988
				self->st_stat_modify = TRUE;
4989
				break;
4990
			case SQLCOM_CREATE_TABLE:
4991
			case SQLCOM_CREATE_INDEX:
4992
			case SQLCOM_ALTER_TABLE:
4993
			case SQLCOM_TRUNCATE:
4994
			case SQLCOM_DROP_TABLE:
4995
			case SQLCOM_DROP_INDEX:
4996
			case SQLCOM_LOAD:
4997
#ifndef DRIZZLED
4998
			case SQLCOM_REPAIR:
4999
			case SQLCOM_OPTIMIZE:
5000
				self->st_stat_modify = TRUE;
5001
#endif
5002
				break;
5003
		}
5004
	}
5005
5006
	/* {IS-UPDATE-STAT} This is required at this level!
5007
	 * No matter how often it is called, it is still the start of a
5008
	 * statement. We need to make sure statements that are NOT mistaken
5009
	 * for different type of statement.
5010
	 *
5011
	 * Here is an example:
5012
	 * select * from t1 where data = getcount("bar")
5013
	 *
5014
	 * If the procedure getcount() addresses another table.
5015
	 * then open and close of the statements in getcount()
5016
	 * are nested within an open close of the select t1
5017
	 * statement.
5018
	 */
5019
	/* {UPDATE-STACK}
5020
	 * Add to this I add the following:
5021
	 * A trigger in the middle of an update also causes nested
5022
	 * statements. If I reset st_is_update, then then
5023
	 * when the trigger returns the system thinks we
5024
	 * are in a different update statement, and may
5025
	 * update the same row again.
5026
	 */
5027
	if (self->st_is_update == pb_open_tab) {
5028
		/* Pop the update stack: */
5029
		XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
5030
5031
		pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
5032
		curr->ot_prev_update = NULL;
5033
	}
5034
5035
	/* See comment {START-TRANS} */
5036
	if (!self->st_xact_data) {
5037
5038
		thd_init_xact(thd, self, false);
5039
5040
		if (!xt_xn_begin(self)) {
5041
			err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5042
			goto complete;
5043
		}
5044
#ifndef DRIZZLED
5045
		if (!self->st_auto_commit) {
5046
			trans_register_ha(thd, TRUE, pbxt_hton);
5047
			XT_PRINT0(self, "START CONN XACT - ha_pbxt::start_stmt --> trans_register_ha\n");
5048
		}
5049
#endif
5050
	}
5051
5052
	/* Start a statment (see {START-STAT-HACK}): */
5053
#ifndef DRIZZLED
5054
	if (!pb_open_tab->ot_thread->st_stat_trans) {
5055
		trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
5056
		XT_PRINT0(pb_open_tab->ot_thread, "START STAT - ha_pbxt::start_stmt --> trans_register_ha\n");
5057
		pb_open_tab->ot_thread->st_stat_trans = TRUE;
5058
	}
5059
#endif
5060
	if (pb_open_tab->ot_for_update || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
5061
		self->st_visible_time = self->st_database->db_xn_end_time;
5062
5063
	pb_in_stat = TRUE;
5064
5065
	self->st_stat_count++;
5066
5067
	complete:
5068
	return err;
5069
}
5070
5071
/*
5072
 * The idea with handler::store_lock() is the following:
5073
 *
5074
 * The statement decided which locks we should need for the table
5075
 * for updates/deletes/inserts we get WRITE locks, for SELECT... we get
5076
 * read locks.
5077
 *
5078
 * Before adding the lock into the table lock handler (see thr_lock.c)
5079
 * mysqld calls store lock with the requested locks. Store lock can now
5080
 * modify a write lock to a read lock (or some other lock), ignore the
5081
 * lock (if we don't want to use MySQL table locks at all) or add locks
5082
 * for many tables (like we do when we are using a MERGE handler).
5083
 *
5084
 * When releasing locks, store_lock() are also called. In this case one
5085
 * usually doesn't have to do anything.
5086
 *
5087
 * In some exceptional cases MySQL may send a request for a TL_IGNORE;
5088
 * This means that we are requesting the same lock as last time and this
5089
 * should also be ignored. (This may happen when someone does a flush
5090
 * table when we have opened a part of the tables, in which case mysqld
5091
 * closes and reopens the tables and tries to get the same locks at last
5092
 * time). In the future we will probably try to remove this.
5093
 *
5094
 * Called from lock.cc by get_lock_data().
5095
 */
5096
THR_LOCK_DATA **ha_pbxt::store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type)
5097
{
5098
	/*
5099
	 * TL_READ means concurrent INSERTs are allowed. This is a problem as in this mode
5100
	 * PBXT is not compatible with MyISAM which allows INSERTs but isolates them from
5101
	 * current "transaction" (started by LOCK TABLES, ended by UNLOCK TABLES). PBXT 
5102
	 * used to allow INSERTs and made them visible to the locker (on commit). 
5103
	 * While MySQL manual doesn't state anything regarding row visibility limitations 
5104
	 * we choose to convert local locks into normal read locks for better compatibility 
5105
	 * with MyISAM.
5106
	 */
5107
	if (lock_type == TL_READ)
5108
		lock_type = TL_READ_NO_INSERT;
5109
5110
	if (lock_type != TL_IGNORE && pb_lock.type == TL_UNLOCK) {
5111
		/* Set to TRUE for operations that require a table lock: */
5112
		switch (thd_sql_command(thd)) {
5113
			case SQLCOM_TRUNCATE:
5114
				/* GOTCHA:
5115
				 * The problem is, if I do not do this, then
5116
				 * TRUNCATE TABLE deadlocks with a normal update of the table!
5117
				 * The reason is:
5118
				 *
5119
				 * external_lock() is called before MySQL actually locks the
5120
				 * table. In external_lock(), the table is shared locked,
5121
				 * by indicating that the handler is in use.
5122
				 *
5123
				 * Then later, in delete_all_rows(), a exclusive lock must be
5124
				 * obtained. If an UPDATE or INSERT has also gained a shared
5125
				 * lock in the meantime, then TRUNCATE TABLE hangs.
5126
				 *
5127
				 * By setting pb_lock_table we indicate that an exclusive lock
5128
				 * should be gained in external_lock().
5129
				 *
5130
				 * This is the locking behaviour:
5131
				 *
5132
				 * TRUNCATE TABLE:
5133
				 * XT SHARE LOCK (mysql_lock_tables calls external_lock)
5134
				 * MySQL WRITE LOCK (mysql_lock_tables)
5135
				 * ...
5136
				 * XT EXCLUSIVE LOCK (delete_all_rows)
5137
				 *
5138
				 * INSERT:
5139
				 * XT SHARED LOCK (mysql_lock_tables calls external_lock)
5140
				 * MySQL WRITE_ALLOW_WRITE LOCK (mysql_lock_tables)
5141
				 *
5142
				 * If the locking for INSERT is done in the ... phase
5143
				 * above, then we have a deadlock because 
5144
				 * WRITE_ALLOW_WRITE conflicts with WRITE.
5145
				 *
5146
				 * Making TRUNCATE TABLE take a WRITE_ALLOW_WRITE LOCK, will
5147
				 * not solve the problem because then 2 TRUNCATE TABLES
5148
				 * can deadlock due to lock escalation.
5149
				 *
5150
				 * What may work is if MySQL were to lock BEFORE calling
5151
				 * external_lock()!
5152
				 *
5153
				 * However, using this method, TRUNCATE TABLE does deadlock
5154
				 * with other operations such as ALTER TABLE!
5155
				 *
5156
				 * This is handled with a lock timeout. Assuming 
5157
				 * TRUNCATE TABLE will be mixed with DML this is the
5158
				 * best solution!
5159
				 */
5160
				pb_lock_table = TRUE;
5161
				break;
5162
			default:
5163
				pb_lock_table = FALSE;
5164
				break;
5165
		}
5166
5167
#ifdef PBXT_HANDLER_TRACE
5168
		pb_lock.type = lock_type;
5169
#endif
5170
		/* GOTCHA: Before it was OK to weaken the lock after just checking
5171
		 * that !thd->in_lock_tables. However, when starting a procedure, MySQL
5172
		 * simulates a LOCK TABLES statement.
5173
		 *
5174
		 * So we need to be more specific here, and check what the actual statement
5175
		 * type. Before doing this I got a deadlock (undetected) on the following test.
5176
		 * However, now we get a failed assertion in ha_rollback_trans():
5177
		 * TODO: Check this with InnoDB!
5178
		 *
5179
		 * DBUG_ASSERT(0);
5180
		 * my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
5181
		 *
5182
		 * drop table if exists t3;
5183
		 * create table t3 (a smallint primary key) engine=pbxt;
5184
		 * insert into t3 (a) values (40);
5185
		 * insert into t3 (a) values (50);
5186
		 * 
5187
		 * delimiter |
5188
		 * 
5189
		 * drop function if exists t3_update|
5190
		 * 
5191
		 * create function t3_update() returns int
5192
		 * begin
5193
		 *   insert into t3 values (10);
5194
		 *   return 100;
5195
		 * end|
5196
		 * 
5197
		 * delimiter ;
5198
		 * 
5199
		 * CONN 1:
5200
		 * 
5201
		 * begin;
5202
		 * update t3 set a = 5 where a = 50;
5203
		 * 
5204
		 * CONN 2:
5205
		 * 
5206
		 * begin;
5207
		 * update t3 set a = 4 where a = 40;
5208
		 * 
5209
		 * CONN 1:
5210
		 * 
5211
		 * update t3 set a = 4 where a = 40; // Hangs waiting CONN 2.
5212
		 * 
5213
		 * CONN 2:
5214
		 * 
5215
		 * select t3_update(); // Hangs waiting for table lock.
5216
		 * 
5217
		 */
5218
		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && lock_type <= TL_WRITE) && 
5219
#ifndef DRIZZLED
5220
			!(thd_in_lock_tables(thd) && thd_sql_command(thd) == SQLCOM_LOCK_TABLES) &&
5221
#endif
5222
			!thd_tablespace_op(thd) &&
5223
			thd_sql_command(thd) != SQLCOM_TRUNCATE &&
5224
#ifndef DRIZZLED
5225
			thd_sql_command(thd) != SQLCOM_OPTIMIZE &&
5226
#endif
5227
			thd_sql_command(thd) != SQLCOM_CREATE_TABLE) {
5228
			lock_type = TL_WRITE_ALLOW_WRITE;
5229
		}
5230
5231
		/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
5232
		 * MySQL would use the lock TL_READ_NO_INSERT on t2, and that
5233
		 * would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
5234
		 * to t2. Convert the lock to a normal read lock to allow
5235
		 * concurrent inserts to t2.
5236
		 * 
5237
		 * (This one from InnoDB)
5238
5239
                 * Stewart: removed SQLCOM_CALL, not sure of implications.
5240
		 */
5241
		if (lock_type == TL_READ_NO_INSERT
5242
#ifndef DRIZZLED
5243
			&& (!thd_in_lock_tables(thd)
5244
			 || thd_sql_command(thd) == SQLCOM_CALL
5245
			)
5246
#endif
5247
			)
5248
		{
5249
			lock_type = TL_READ;
5250
		}
5251
5252
		XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d\n", pb_share->sh_table_path->ps_path, pb_lock.type, lock_type);
5253
		pb_lock.type = lock_type;
5254
	}
5255
#ifdef PBXT_HANDLER_TRACE
5256
	else {
5257
		XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d (ignore/unlock)\n", pb_share->sh_table_path->ps_path, lock_type, lock_type);
5258
	}
5259
#endif
5260
	*to++= &pb_lock;
5261
	return to;
5262
}
5263
5264
/*
5265
 * Used to delete a table. By the time delete_table() has been called all
5266
 * opened references to this table will have been closed (and your globally
5267
 * shared references released. The variable name will just be the name of
5268
 * the table. You will need to remove any files you have created at this point.
5269
 *
5270
 * Called from handler.cc by delete_table and ha_create_table(). Only used
5271
 * during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
5272
 * the storage engine.
5273
*/
5274
#ifdef DRIZZLED
5275
int PBXTStorageEngine::doDropTable(Session &, TableIdentifier& ident)
5276
{
5277
	const std::string& path = ident.getPath();
5278
	const char *table_path = path.c_str();
5279
#else
5280
int ha_pbxt::delete_table(const char *table_path)
5281
{
5282
#endif
5283
	THD				*thd = current_thd;
5284
	int				err = 0;
5285
	XTThreadPtr		self = NULL;
5286
	XTSharePtr		share;
5287
5288
	STAT_TRACE(self, *thd_query(thd));
5289
	XT_PRINT1(self, "delete_table (%s)\n", table_path);
5290
5291
	if (XTSystemTableShare::isSystemTable(table_path))
5292
		return delete_system_table(table_path);
5293
5294
	if (!(self = ha_set_current_thread(thd, &err)))
5295
		return xt_ha_pbxt_to_mysql_error(err);
5296
5297
	self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5298
5299
	try_(a) {
5300
		xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5301
5302
		ASSERT(xt_get_self() == self);
5303
		try_(b) {
5304
			/* NOTE: MySQL does not drop a table by first locking it!
5305
			 * We also cannot use pb_share because the handler used
5306
			 * to delete a table is not openned correctly.
5307
			 */
5308
			share = ha_get_share(self, table_path, false);
5309
			pushr_(ha_unget_share, share);
5310
			ha_aquire_exclusive_use(self, share, NULL);
5311
			pushr_(ha_release_exclusive_use, share);
5312
			ha_close_open_tables(self, share, NULL);
5313
5314
			xt_drop_table(self, (XTPathStrPtr) table_path, thd_sql_command(thd) == SQLCOM_DROP_DB);
5315
5316
			freer_(); // ha_release_exclusive_use(share)
5317
			freer_(); // ha_unget_share(share)
5318
		}
5319
		catch_(b) {
5320
			/* In MySQL if the table does not exist, just log the error and continue. This is
5321
 			 * needed to delete table in the case when CREATE TABLE fails and no PBXT disk
5322
 			 * structures were created. 
5323
 			 * Drizzle unlike MySQL iterates over all handlers and tries to delete table. It
5324
 			 * stops after when a handler returns TRUE, so in Drizzle we need to report error.  
5325
			 */
5326
#ifndef DRIZZLED
5327
			if (self->t_exception.e_xt_err == XT_ERR_TABLE_NOT_FOUND)
5328
				xt_log_and_clear_exception(self);
5329
			else
5330
#endif
5331
				throw_();
5332
		}
5333
		cont_(b);
5334
5335
		/*
5336
		 * If there are no more PBXT tables in the database, we
5337
		 * "drop the database", which deletes all PBXT resources
5338
		 * in the database.
5339
		 */
5340
		/* We now only drop the pbxt system data,
5341
		 * when the PBXT database is dropped.
5342
		 */
5343
#ifndef XT_USE_GLOBAL_DB
5344
		if (!xt_table_exists(self->st_database)) {
5345
			xt_ha_all_threads_close_database(self, self->st_database);
5346
			xt_drop_database(self, self->st_database);
5347
			xt_unuse_database(self, self);
5348
			xt_ha_close_global_database(self);
5349
		}
5350
#endif
5351
	}
5352
	catch_(a) {
5353
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5354
#ifdef DRIZZLED
5355
		if (err == HA_ERR_NO_SUCH_TABLE)
5356
			err = ENOENT;
5357
#endif
5358
	}
5359
	cont_(a);
5360
	
5361
#ifdef PBMS_ENABLED
5362
	/* Call pbms_delete_table_with_blobs() last because it cannot be undone. */
5363
	if (!err) {
5364
		PBMSResultRec result;
5365
5366
		if (pbms_delete_table_with_blobs(table_path, &result)) {
5367
			xt_logf(XT_NT_WARNING, "pbms_delete_table_with_blobs() Error: %s", result.mr_message);
5368
		}
5369
		
5370
		pbms_completed(NULL, true);
5371
	}
5372
#endif
5373
1455.3.2 by Vladimir Kolesnikov
create/insert/select/drop works
5374
#ifdef DRIZZLED
5375
          std::string path2(ident.getPath());
5376
          path2.append(DEFAULT_FILE_EXTENSION);
5377
          (void)internal::my_delete(path2.c_str(), MYF(0));
5378
#endif
5379
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
5380
	return err;
5381
}
5382
5383
#ifdef DRIZZLED
5384
int PBXTStorageEngine::delete_system_table(const char *table_path)
5385
#else
5386
int ha_pbxt::delete_system_table(const char *table_path)
5387
#endif
5388
{
5389
	THD				*thd = current_thd;
5390
	XTExceptionRec	e;
5391
	int				err = 0;
5392
	XTThreadPtr		self;
5393
5394
	if (!(self = xt_ha_set_current_thread(thd, &e)))
5395
		return xt_ha_pbxt_to_mysql_error(e.e_xt_err);
5396
5397
	try_(a) {
5398
		xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5399
5400
		if (xt_table_exists(self->st_database))
5401
			xt_throw_xterr(XT_CONTEXT, XT_ERR_PBXT_TABLE_EXISTS);
5402
5403
		XTSystemTableShare::setSystemTableDeleted(table_path);
5404
5405
		if (!XTSystemTableShare::doesSystemTableExist()) {
5406
			xt_ha_all_threads_close_database(self, self->st_database);
5407
			xt_drop_database(self, self->st_database);
5408
			xt_unuse_database(self, self);
5409
			xt_ha_close_global_database(self);
5410
		}
5411
	}
5412
	catch_(a) {
5413
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5414
	}
5415
	cont_(a);
5416
5417
	return err;
5418
}
5419
5420
/*
5421
 * Renames a table from one name to another from alter table call.
5422
 * This function can be used to move a table from one database to
5423
 * another.
5424
 */
5425
#ifdef DRIZZLED
5426
int PBXTStorageEngine::doRenameTable(Session&,
5427
                                     TableIdentifier& from_ident,
5428
                                     TableIdentifier& to_ident)
5429
{
5430
	const char *from = from_ident.getPath().c_str();
5431
	const char *to = to_ident.getPath().c_str();
1455.3.6 by Vladimir Kolesnikov
fixed alter_table test
5432
5433
        if (strcmp(from, to) == 0)
5434
                return 0;
5435
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
5436
#else
5437
int ha_pbxt::rename_table(const char *from, const char *to)
5438
{
5439
#endif
5440
	THD				*thd = current_thd;
5441
	int				err = 0;
5442
	XTThreadPtr		self;
5443
	XTSharePtr		share;
5444
	XTDatabaseHPtr	to_db;
5445
5446
	if (XTSystemTableShare::isSystemTable(from))
5447
		return rename_system_table(from, to);
5448
5449
	if (!(self = ha_set_current_thread(thd, &err)))
5450
		return xt_ha_pbxt_to_mysql_error(err);
5451
5452
	XT_PRINT2(self, "rename_table (%s -> %s)\n", from, to);
5453
5454
#ifdef PBMS_ENABLED
5455
	PBMSResultRec result;
5456
5457
	err = pbms_rename_table_with_blobs(from, to, &result);
5458
	if (err) {
5459
		xt_logf(XT_NT_ERROR, "pbms_rename_table_with_blobs() Error: %s", result.mr_message);
5460
		return err;
5461
	}
5462
#endif
5463
5464
	try_(a) {
5465
		xt_ha_open_database_of_table(self, (XTPathStrPtr) to);
5466
		to_db = self->st_database;
5467
5468
		xt_ha_open_database_of_table(self, (XTPathStrPtr) from);
5469
5470
		if (self->st_database != to_db)
5471
			xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
5472
5473
		/*
5474
		 * NOTE: MySQL does not lock before calling rename table!
5475
		 *
5476
		 * We cannot use pb_share because rename_table() is
5477
		 * called without correctly initializing
5478
		 * the handler!
5479
		 */
5480
		share = ha_get_share(self, from, true);
5481
		pushr_(ha_unget_share, share);
5482
		ha_aquire_exclusive_use(self, share, NULL);
5483
		pushr_(ha_release_exclusive_use, share);
5484
		ha_close_open_tables(self, share, NULL);
5485
5486
		self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5487
		xt_rename_table(self, (XTPathStrPtr) from, (XTPathStrPtr) to);
5488
5489
		freer_(); // ha_release_exclusive_use(share)
5490
		freer_(); // ha_unget_share(share)
5491
5492
		/*
5493
		 * If there are no more PBXT tables in the database, we
5494
		 * "drop the database", which deletes all PBXT resources
5495
		 * in the database.
5496
		 */
5497
#ifdef XT_USE_GLOBAL_DB
5498
		/* We now only drop the pbxt system data,
5499
		 * when the PBXT database is dropped.
5500
		 */
5501
		if (!xt_table_exists(self->st_database)) {
5502
			xt_ha_all_threads_close_database(self, self->st_database);
5503
			xt_drop_database(self, self->st_database);
5504
		}
5505
#endif
5506
	}
5507
	catch_(a) {
5508
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5509
	}
5510
	cont_(a);
5511
	
5512
#ifdef PBMS_ENABLED
5513
	pbms_completed(NULL, (err == 0));
5514
#endif
5515
1455.3.4 by Vladimir Kolesnikov
fixed easy test cases
5516
#ifdef DRIZZLED
5517
	if (err == 0)
5518
		plugin::StorageEngine::renameDefinitionFromPath(to_ident, from_ident);
5519
#endif
5520
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
5521
	XT_RETURN(err);
5522
}
5523
5524
#ifdef DRIZZLED
5525
int PBXTStorageEngine::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5526
#else
5527
int ha_pbxt::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5528
#endif
5529
{
5530
	return ER_NOT_SUPPORTED_YET;
5531
}
5532
5533
uint ha_pbxt::max_supported_key_length() const
5534
{
5535
	return XT_INDEX_MAX_KEY_SIZE;
5536
}
5537
5538
uint ha_pbxt::max_supported_key_part_length() const
5539
{
5540
	/* There is a little overhead in order to fit! */
5541
	return XT_INDEX_MAX_KEY_SIZE-4;
5542
}
5543
5544
/*
5545
 * Called in test_quick_select to determine if indexes should be used.
5546
 *
5547
 * As far as I can tell, time is measured in "disk reads". So the
5548
 * calculation below means the system reads about 20 rows per read.
5549
 *
5550
 * For example a sequence scan uses a read buffer which reads a
5551
 * number of rows at once, or a sequential scan can make use
5552
 * of the cache (so it need to read less).
5553
 */
5554
double ha_pbxt::scan_time()
5555
{
5556
	double result = (double) (stats.records + stats.deleted) / 38.0 + 2;
5557
	return result;
5558
}
5559
5560
/*
5561
 * The next method will never be called if you do not implement indexes.
5562
 */
5563
double ha_pbxt::read_time(uint XT_UNUSED(index), uint ranges, ha_rows rows)
5564
{
5565
	double result = rows2double(ranges+rows);
5566
	return result;
5567
}
5568
5569
/*
5570
 * Given a starting key, and an ending key estimate the number of rows that
5571
 * will exist between the two. end_key may be empty which in case determine
5572
 * if start_key matches any rows.
5573
 * 
5574
 * Called from opt_range.cc by check_quick_keys().
5575
 *
5576
 */
5577
ha_rows ha_pbxt::records_in_range(uint inx, key_range *min_key, key_range *max_key)
5578
{
5579
	XTIndexPtr		ind;
5580
	key_part_map	keypart_map;
5581
	u_int			segement = 0;
5582
	ha_rows			result;
5583
5584
	if (min_key)
5585
		keypart_map = min_key->keypart_map;
5586
	else if (max_key)
5587
		keypart_map = max_key->keypart_map;
5588
	else
5589
		return 1;
5590
	ind = (XTIndexPtr) pb_share->sh_dic_keys[inx];
5591
	
5592
	while (keypart_map & 1) {
5593
		segement++;
5594
		keypart_map = keypart_map >> 1;
5595
	}
5596
5597
	if (segement < 1 || segement > ind->mi_seg_count)
5598
		result = 1;
5599
	else
5600
		result = ind->mi_seg[segement-1].is_recs_in_range;
5601
#ifdef XT_PRINT_INDEX_OPT
5602
	printf("records_in_range %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X --> %d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) inx, segement, ind->mi_seg_count, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) result);
5603
#endif
5604
	return result;
5605
}
5606
5607
/*
5608
 * create() is called to create a table/database. The variable name will have the name
5609
 * of the table. When create() is called you do not need to worry about opening
5610
 * the table. Also, the FRM file will have already been created so adjusting
5611
 * create_info will not do you any good. You can overwrite the frm file at this
5612
 * point if you wish to change the table definition, but there are no methods
5613
 * currently provided for doing that.
5614
5615
 * Called from handle.cc by ha_create_table().
5616
*/
5617
#ifdef DRIZZLED
5618
int PBXTStorageEngine::doCreateTable(Session&, 
5619
                                     Table& table_arg, 
5620
                                     TableIdentifier& ident,
5621
				     drizzled::message::Table& proto)
5622
{
5623
	const std::string& path = ident.getPath();
5624
	const char *table_path = path.c_str();
5625
#else
5626
int ha_pbxt::create(const char *table_path, TABLE *table_arg, HA_CREATE_INFO *create_info)
5627
{
5628
#endif
5629
	THD				*thd = current_thd;
5630
	int				err = 0;
5631
	XTThreadPtr		self;
5632
	XTDDTable		*tab_def = NULL;
5633
	XTDictionaryRec	dic, source_dic;
5634
5635
	if ((strcmp(table_path, "./pbxt/location") == 0) || 
5636
		(strcmp(table_path, "./pbxt/tables") == 0) ||
5637
		(strcmp(table_path, "./pbxt/statistics") == 0))
5638
		return 0;
5639
5640
	if ((strcmp(table_path, "./pbxt/location") == 0) || (strcmp(table_path, "./pbxt/statistics") == 0))
5641
		return 0;
5642
5643
	memset(&dic, 0, sizeof(dic));
5644
	memset(&source_dic, 0, sizeof(source_dic));
5645
5646
	if (!(self = ha_set_current_thread(thd, &err)))
5647
		return xt_ha_pbxt_to_mysql_error(err);
5648
#ifdef DRIZZLED
5649
	XT_PRINT2(self, "create (%s) %s\n", table_path, (proto.type() == message::Table::TEMPORARY) ? "temporary" : "");
5650
        switch(ident.getType()) {
5651
        	case message::Table::STANDARD:
5652
                	dic.dic_table_type = XT_TABLE_TYPE_STANDARD;
5653
                        break;
5654
5655
                case message::Table::TEMPORARY:
5656
                	dic.dic_table_type = XT_TABLE_TYPE_TEMPORARY;
5657
                        break;
5658
5659
                case message::Table::INTERNAL:
5660
                	dic.dic_table_type = XT_TABLE_TYPE_INTERNAL;
5661
                        break;
5662
5663
                case message::Table::FUNCTION:
5664
                	dic.dic_table_type = XT_TABLE_TYPE_FUNCTION;
5665
			break;
5666
	}
5667
#else
5668
	XT_PRINT2(self, "create (%s) %s\n", table_path, (create_info->options & HA_LEX_CREATE_TMP_TABLE) ? "temporary" : "");
5669
#endif
5670
5671
	STAT_TRACE(self, *thd_query(thd));
5672
5673
	try_(a) {
5674
		xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5675
5676
#ifdef DRIZZLED
5677
		for (uint i=0; i<TS(&table_arg)->keys; i++) {
5678
			if (table_arg.key_info[i].key_length > XT_INDEX_MAX_KEY_SIZE)
5679
				xt_throw_sulxterr(XT_CONTEXT, XT_ERR_KEY_TOO_LARGE, table_arg.key_info[i].name, (u_long) XT_INDEX_MAX_KEY_SIZE);
5680
		}
5681
#else
5682
		for (uint i=0; i<TS(table_arg)->keys; i++) {
5683
			if (table_arg->key_info[i].key_length > XT_INDEX_MAX_KEY_SIZE)
5684
				xt_throw_sulxterr(XT_CONTEXT, XT_ERR_KEY_TOO_LARGE, table_arg->key_info[i].name, (u_long) XT_INDEX_MAX_KEY_SIZE);
5685
		}
5686
#endif
5687
5688
		/* ($) auto_increment_value will be zero if 
5689
		 * AUTO_INCREMENT is not used. Otherwise
5690
		 * Query was ALTER TABLE ... AUTO_INCREMENT = x; or 
5691
		 * CREATE TABLE ... AUTO_INCREMENT = x;
5692
		 */
5693
#ifdef XT_USE_DEFAULT_MEMORY_TABS
5694
		if (create_info->storage_media == HA_SM_DEFAULT)
5695
			source_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5696
#endif
5697
5698
#ifdef DRIZZLED
5699
		StorageEngine::writeDefinitionFromPath(ident, proto);
5700
5701
		tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, const_cast<char *>(thd->getQueryString().c_str()), myxt_create_table_from_table(self, &table_arg), &source_dic);
5702
		tab_def->checkForeignKeys(self, proto.type() == message::Table::TEMPORARY);
5703
#else
5704
		// tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, *thd_query(thd), myxt_create_table_from_table(self, table_arg));
5705
		tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, *thd_query(thd), myxt_create_table_from_table(self, table_arg), &source_dic);
5706
		tab_def->checkForeignKeys(self, create_info->options & HA_LEX_CREATE_TMP_TABLE);
5707
		dic.dic_table_type = XT_TABLE_TYPE_STANDARD;
5708
#endif
5709
5710
		dic.dic_table = tab_def;
5711
#ifdef DRIZZLED
5712
		dic.dic_my_table = &table_arg;
5713
		dic.dic_tab_flags = source_dic.dic_tab_flags;
5714
		//if (create_info.storage_media == HA_SM_MEMORY)
5715
		//	dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5716
		if (proto.type() == message::Table::TEMPORARY)
5717
			dic.dic_tab_flags |= XT_TF_REAL_TEMP_TABLE;
5718
		if (myxt_temp_table_name(table_path))
5719
			dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
5720
5721
		dic.dic_min_auto_inc = (xtWord8) proto.options().auto_increment_value(); /* ($) */
5722
		dic.dic_def_ave_row_size =  proto.options().avg_row_length();
5723
#else
5724
		dic.dic_my_table = table_arg;
5725
		dic.dic_tab_flags = source_dic.dic_tab_flags;
5726
5727
		if (create_info->storage_media == HA_SM_MEMORY)
5728
			dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5729
		if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
5730
			dic.dic_tab_flags |= XT_TF_REAL_TEMP_TABLE;
5731
		if (myxt_temp_table_name(table_path))
5732
			dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
5733
5734
		dic.dic_min_auto_inc = (xtWord8) create_info->auto_increment_value; /* ($) */
5735
		dic.dic_def_ave_row_size = (xtWord8) table_arg->s->avg_row_length;
5736
#endif
5737
		myxt_setup_dictionary(self, &dic);
5738
5739
		/*
5740
		 * We used to ignore the value of foreign_key_checks flag and allowed creation
5741
		 * of tables with "hanging" references. Now we validate FKs if foreign_key_checks != 0
5742
		 */
5743
		self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5744
5745
		/*
5746
		 * Previously I set delete_if_exists=TRUE because
5747
		 * CREATE TABLE was being used to TRUNCATE.
5748
		 * This was due to the flag HTON_CAN_RECREATE.
5749
		 * Now I could set delete_if_exists=FALSE, but
5750
		 * leaving it TRUE should not cause any problems.
5751
		 */
5752
		xt_create_table(self, (XTPathStrPtr) table_path, &dic);
5753
	}
5754
	catch_(a) {
5755
		if (tab_def)
5756
			tab_def->finalize(self);
5757
		dic.dic_table = NULL;
5758
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5759
	}
5760
	cont_(a);
5761
5762
	/* Free the dictionary, but not 'table_arg'! */
5763
	dic.dic_my_table = NULL;
5764
	myxt_free_dictionary(self, &dic);
5765
5766
	XT_RETURN(err);
5767
}
5768
5769
void ha_pbxt::update_create_info(HA_CREATE_INFO *create_info)
5770
{
5771
	XTOpenTablePtr	ot;
5772
5773
	if ((ot = pb_open_tab)) {
5774
		if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
5775
			/* Fill in the minimum auto-increment value! */
5776
			create_info->auto_increment_value = ot->ot_table->tab_dic.dic_min_auto_inc;
5777
		}
5778
	}
5779
}
5780
5781
#ifdef DRIZZLED
5782
int PBXTStorageEngine::doStartTransaction(Session *thd, start_transaction_option_t XT_UNUSED(options))
5783
{
5784
	int err = 0;
5785
	XTThreadPtr self = ha_set_current_thread(thd, &err);	
5786
5787
	XT_PRINT0(self, "PBXTStorageEngine::doStartTransaction\n");
5788
5789
	/* Transaction mode numbers must be identical! */
5790
	(void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
5791
	(void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
5792
5793
	self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
5794
	self->st_ignore_fkeys = (thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5795
	self->st_auto_commit = (thd_test_options(thd, (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
5796
	self->st_table_trans = FALSE;
5797
	self->st_abort_trans = FALSE;
5798
	self->st_stat_ended = FALSE;
5799
	self->st_stat_trans = FALSE;
5800
	xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
5801
5802
	if (!self->st_database)
5803
		xt_ha_open_database_of_table(self, NULL);
5804
5805
	if (!xt_xn_begin(self)) {
5806
			err = xt_ha_pbxt_thread_error_for_mysql(thd, self, /*pb_ignore_dup_key*/false);
5807
			//pb_ex_in_use = 0;
5808
	}
5809
5810
	return err;
5811
}
5812
5813
int PBXTStorageEngine::doSetSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5814
{ 
5815
	return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false); 
5816
}
5817
        
5818
int PBXTStorageEngine::doRollbackToSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&) 
5819
{
5820
	return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5821
}
5822
5823
int PBXTStorageEngine::doReleaseSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&) 
5824
{
5825
	return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5826
}
5827
5828
int PBXTStorageEngine::doCommit(drizzled::Session* thd, bool)
5829
{
5830
	int err = 0;
5831
	XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5832
5833
	bool real_commit = !session_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
5834
	
5835
	XT_PRINT1(self, "PBXTStorageEngine::doCommit(real_commit = %s)\n", real_commit ? "true" : "false");
5836
5837
	if (real_commit && self) {
5838
        	if (!xt_xn_commit(self))
5839
                	err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5840
	}
5841
5842
	return err;
5843
}
5844
5845
int PBXTStorageEngine::doRollback(drizzled::Session* thd, bool)
5846
{
5847
        int err = 0;
5848
        XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5849
5850
        bool real_commit = !session_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
5851
5852
	XT_PRINT1(self, "PBXTStorageEngine::doRollback(real_commit = %s)\n", real_commit ? "true" : "false");
5853
5854
        if (real_commit && self) {
5855
                if (!xt_xn_rollback(self))
5856
                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5857
        }
5858
5859
        return err;
5860
}
5861
1455.3.2 by Vladimir Kolesnikov
create/insert/select/drop works
5862
#if 0
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
5863
void PBXTStorageEngine::doGetTableIdentifiers(drizzled::CachedDirectory &directory,
5864
                                           drizzled::SchemaIdentifier &schema_identifier,
5865
                                           drizzled::TableIdentifiers &set_of_identifiers)
5866
{
5867
  CachedDirectory::Entries entries= directory.getEntries();
5868
5869
  for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5870
       entry_iter != entries.end(); ++entry_iter)
5871
  {
5872
    CachedDirectory::Entry *entry= *entry_iter;
5873
    const std::string *filename= &entry->filename;
5874
5875
    assert(filename->size());
5876
5877
    const char *ext= strchr(filename->c_str(), '.');
5878
5879
    if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5880
        (filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5881
    { }
5882
    else
5883
    {
5884
      char uname[NAME_LEN + 1];
5885
      uint32_t file_name_len;
5886
5887
      file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5888
      // TODO: Remove need for memory copy here
5889
      uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL 
5890
5891
      set_of_identifiers.push_back(TableIdentifier(schema_identifier, uname));
5892
    }
5893
  }
5894
}
5895
5896
void PBXTStorageEngine::doGetTableNames(
5897
	CachedDirectory &directory, 
5898
	SchemaIdentifier&, 
5899
	std::set<std::string>& set_of_names)
5900
{
5901
  CachedDirectory::Entries entries= directory.getEntries();
5902
5903
  for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5904
       entry_iter != entries.end(); ++entry_iter)
5905
  {
5906
    CachedDirectory::Entry *entry= *entry_iter;
5907
    const std::string *filename= &entry->filename;
5908
5909
    assert(filename->size());
5910
5911
    const char *ext= strchr(filename->c_str(), '.');
5912
5913
    if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5914
        (filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5915
    { }
5916
    else
5917
    {
5918
      char uname[NAME_LEN + 1];
5919
      uint32_t file_name_len;
5920
5921
      file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5922
      // TODO: Remove need for memory copy here
5923
      uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL 
5924
      set_of_names.insert(uname);
5925
    }
5926
  }
5927
}
1455.3.2 by Vladimir Kolesnikov
create/insert/select/drop works
5928
#endif
1455.3.1 by Vladimir Kolesnikov
lp:drizzle + pbxt 1.1 + test results
5929
5930
bool PBXTStorageEngine::doDoesTableExist(Session&, TableIdentifier &identifier)
5931
{
5932
  std::string proto_path(identifier.getPath());
5933
  proto_path.append(DEFAULT_FILE_EXTENSION);
5934
5935
  if (access(proto_path.c_str(), F_OK))
5936
  {
5937
    return false;
5938
  }
5939
5940
  return true;
5941
}
5942
5943
#endif // DRIZZLED
5944
5945
char *ha_pbxt::get_foreign_key_create_info()
5946
{
5947
	THD					*thd = current_thd;
5948
	int					err = 0;
5949
	XTThreadPtr			self;
5950
	XTStringBufferRec	tab_def = { 0, 0, 0 };
5951
5952
	if (!(self = ha_set_current_thread(thd, &err))) {
5953
		xt_ha_pbxt_to_mysql_error(err);
5954
		return NULL;
5955
	}
5956
5957
	if (!pb_open_tab) {
5958
		if ((err = reopen()))
5959
			return NULL;
5960
	}
5961
5962
	if (!pb_open_tab->ot_table->tab_dic.dic_table)
5963
		return NULL;
5964
5965
	try_(a) {
5966
		pb_open_tab->ot_table->tab_dic.dic_table->loadForeignKeyString(self, &tab_def);
5967
	}
5968
	catch_(a) {
5969
		xt_sb_set_size(self, &tab_def, 0);
5970
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5971
	}
5972
	cont_(a);
5973
5974
	return tab_def.sb_cstring;
5975
}
5976
5977
void ha_pbxt::free_foreign_key_create_info(char* str)
5978
{
5979
	xt_free(NULL, str);
5980
}
5981
5982
bool ha_pbxt::get_error_message(int XT_UNUSED(error), String *buf)
5983
{
5984
	THD				*thd = current_thd;
5985
	int				err = 0;
5986
	XTThreadPtr		self;
5987
5988
	if (!(self = ha_set_current_thread(thd, &err)))
5989
		return FALSE;
5990
5991
	if (!self->t_exception.e_xt_err)
5992
		return FALSE;
5993
5994
	buf->copy(self->t_exception.e_err_msg, (uint32_t) strlen(self->t_exception.e_err_msg), system_charset_info);
5995
	return TRUE;
5996
}
5997
5998
/* 
5999
 * get info about FKs of the currently open table
6000
 * used in 
6001
 * 1. REPLACE; is > 0 if table is referred by a FOREIGN KEY 
6002
 * 2. INFORMATION_SCHEMA tables: TABLE_CONSTRAINTS, REFERENTIAL_CONSTRAINTS
6003
 * Return value: as of 5.1.24 it's ignored
6004
 */
6005
#ifdef DRI_IS
6006
int ha_pbxt::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
6007
{
6008
	int err = 0;
6009
	XTThreadPtr	self;
6010
	const char *action;
6011
6012
	if (!(self = ha_set_current_thread(thd, &err))) {
6013
		return xt_ha_pbxt_to_mysql_error(err);
6014
	}
6015
6016
	try_(a) {
6017
		XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
6018
6019
		if (table_dic == NULL)
6020
			xt_throw_errno(XT_CONTEXT, XT_ERR_NO_DICTIONARY);
6021
6022
		for (int i = 0, sz = table_dic->dt_fkeys.size(); i < sz; i++) {
6023
			FOREIGN_KEY_INFO *fk_info= new	// assumed that C++ exceptions are disabled
6024
				(thd_alloc(thd, sizeof(FOREIGN_KEY_INFO))) FOREIGN_KEY_INFO;
6025
6026
			if (fk_info == NULL)
6027
				xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
6028
6029
			XTDDForeignKey *fk = table_dic->dt_fkeys.itemAt(i);
6030
6031
			const char *path = fk->fk_ref_tab_name->ps_path;
6032
			const char *ref_tbl_name = path + strlen(path);
6033
6034
			while (ref_tbl_name != path && !XT_IS_DIR_CHAR(*ref_tbl_name)) 
6035
				ref_tbl_name--;
6036
6037
			const char * ref_db_name = ref_tbl_name - 1;
6038
6039
			while (ref_db_name != path && !XT_IS_DIR_CHAR(*ref_db_name)) 
6040
				ref_db_name--;
6041
6042
			ref_tbl_name++;
6043
			ref_db_name++;
6044
6045
			fk_info->forein_id = thd_make_lex_string(thd, 0,
6046
				fk->co_name, (uint) strlen(fk->co_name), 1);
6047
6048
			fk_info->referenced_db = thd_make_lex_string(thd, 0,
6049
				ref_db_name, (uint) (ref_tbl_name - ref_db_name - 1), 1);
6050
6051
			fk_info->referenced_table = thd_make_lex_string(thd, 0,
6052
				ref_tbl_name, (uint) strlen(ref_tbl_name), 1);
6053
			
6054
			fk_info->referenced_key_name = NULL;			
6055
6056
			XTIndex *ix = fk->getReferenceIndexPtr();
6057
			if (ix == NULL) /* can be NULL if another thread changes referenced table at the moment */
6058
				continue;
6059
			
6060
			XTDDTable *ref_table = fk->fk_ref_table;
6061
6062
			// might be a self-reference
6063
			if ((ref_table == NULL) 
6064
				&& (xt_tab_compare_names(path, table_dic->dt_table->tab_name->ps_path) == 0)) {
6065
				ref_table = table_dic;
6066
			}
6067
6068
			if (ref_table != NULL) {
6069
				const XTList<XTDDIndex>& ix_list = ref_table->dt_indexes;
6070
				for (int j = 0, sz2 = ix_list.size(); j < sz2; j++) {
6071
					XTDDIndex *ddix = ix_list.itemAt(j);
6072
					if (ddix->in_index ==  ix->mi_index_no) {
6073
						const char *ix_name = 
6074
							ddix->co_name ? ddix->co_name : ddix->co_ind_name;
6075
						fk_info->referenced_key_name = thd_make_lex_string(thd, 0,
6076
							ix_name, (uint) strlen(ix_name), 1);
6077
						break;
6078
					}
6079
				}
6080
			}
6081
6082
			action = XTDDForeignKey::actionTypeToString(fk->fk_on_delete);
6083
			fk_info->delete_method = thd_make_lex_string(thd, 0,
6084
				action, (uint) strlen(action), 1);
6085
			action = XTDDForeignKey::actionTypeToString(fk->fk_on_update);
6086
			fk_info->update_method = thd_make_lex_string(thd, 0,
6087
				action, (uint) strlen(action), 1);
6088
6089
			const XTList<XTDDColumnRef>& cols = fk->co_cols;
6090
			for (int j = 0, sz2 = cols.size(); j < sz2; j++) {
6091
				XTDDColumnRef *col_ref= cols.itemAt(j);
6092
				fk_info->foreign_fields.push_back(thd_make_lex_string(thd, 0,
6093
					col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
6094
			}
6095
6096
			const XTList<XTDDColumnRef>& ref_cols = fk->fk_ref_cols;
6097
			for (int j = 0, sz2 = ref_cols.size(); j < sz2; j++) {
6098
				XTDDColumnRef *col_ref= ref_cols.itemAt(j);
6099
				fk_info->referenced_fields.push_back(thd_make_lex_string(thd, 0,
6100
					col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
6101
			}
6102
6103
			f_key_list->push_back(fk_info);
6104
		}
6105
	}
6106
	catch_(a) {
6107
		err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
6108
	}
6109
	cont_(a);
6110
6111
	return err; 
6112
}
6113
6114
uint ha_pbxt::referenced_by_foreign_key()
6115
{
6116
	XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
6117
6118
	if (!table_dic)
6119
		return 0;
6120
	/* Check the list of referencing tables: */
6121
	return table_dic->dt_trefs ? 1 : 0;
6122
}
6123
#endif // DRI_IS
6124
6125
struct st_mysql_sys_var
6126
{
6127
	MYSQL_PLUGIN_VAR_HEADER;
6128
};
6129
6130
#if MYSQL_VERSION_ID < 60000
6131
#if MYSQL_VERSION_ID >= 50124
6132
#define USE_CONST_SAVE
6133
#endif
6134
#else
6135
#if MYSQL_VERSION_ID >= 60005
6136
#define USE_CONST_SAVE
6137
#endif
6138
#endif
6139
6140
#ifdef DRIZZLED
6141
#define st_mysql_sys_var drizzled::drizzle_sys_var
6142
#endif
6143
6144
#ifdef USE_CONST_SAVE
6145
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, const void *save)
6146
#else
6147
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, void *save)
6148
#endif
6149
{
6150
	xtInt8	record_cache_size;
6151
6152
	char *old= *(char **) tgt;
6153
	*(char **)tgt= *(char **) save;
6154
	if (var->flags & PLUGIN_VAR_MEMALLOC)
6155
	{
6156
		*(char **)tgt= my_strdup(*(char **) save, MYF(0));
6157
		my_free(old, MYF(0));
6158
	}
6159
	record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
6160
	xt_tc_set_cache_size((size_t) record_cache_size);
6161
#ifdef DEBUG
6162
	char buffer[200];
6163
6164
	sprintf(buffer, "pbxt_record_cache_size=%llu\n", (u_llong) record_cache_size);
6165
	xt_logf(XT_NT_INFO, buffer);
6166
#endif
6167
}
6168
6169
#ifndef DRIZZLED
6170
struct st_mysql_storage_engine pbxt_storage_engine = {
6171
	MYSQL_HANDLERTON_INTERFACE_VERSION
6172
};
6173
static st_mysql_information_schema pbxt_statitics = {
6174
	MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
6175
};
6176
#endif
6177
6178
#if MYSQL_VERSION_ID >= 50118
6179
static MYSQL_SYSVAR_STR(index_cache_size, pbxt_index_cache_size,
6180
  PLUGIN_VAR_READONLY,
6181
  "The amount of memory allocated to the index cache, used only to cache index data.",
6182
  NULL, NULL, NULL);
6183
6184
static MYSQL_SYSVAR_STR(record_cache_size, pbxt_record_cache_size,
6185
  PLUGIN_VAR_READONLY, // PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
6186
  "The amount of memory allocated to the record cache used to cache table data.",
6187
  NULL, pbxt_record_cache_size_func, NULL);
6188
6189
static MYSQL_SYSVAR_STR(log_cache_size, pbxt_log_cache_size,
6190
  PLUGIN_VAR_READONLY,
6191
  "The amount of memory allocated to the transaction log cache used to cache transaction log data.",
6192
  NULL, NULL, NULL);
6193
6194
static MYSQL_SYSVAR_STR(log_file_threshold, pbxt_log_file_threshold,
6195
  PLUGIN_VAR_READONLY,
6196
  "The size of a transaction log before rollover, and a new log is created.",
6197
  NULL, NULL, NULL);
6198
6199
static MYSQL_SYSVAR_STR(transaction_buffer_size, pbxt_transaction_buffer_size,
6200
  PLUGIN_VAR_READONLY,
6201
  "The size of the global transaction log buffer (the engine allocates 2 buffers of this size).",
6202
  NULL, NULL, NULL);
6203
6204
static MYSQL_SYSVAR_STR(log_buffer_size, pbxt_log_buffer_size,
6205
  PLUGIN_VAR_READONLY,
6206
  "The size of the buffer used to cache data from transaction and data logs during sequential scans, or when writing a data log.",
6207
  NULL, NULL, NULL);
6208
6209
static MYSQL_SYSVAR_STR(checkpoint_frequency, pbxt_checkpoint_frequency,
6210
  PLUGIN_VAR_READONLY,
6211
  "The size of the transaction data buffer which is allocate by each thread.",
6212
  NULL, NULL, NULL);
6213
6214
static MYSQL_SYSVAR_STR(data_log_threshold, pbxt_data_log_threshold,
6215
  PLUGIN_VAR_READONLY,
6216
  "The maximum size of a data log file.",
6217
  NULL, NULL, NULL);
6218
6219
static MYSQL_SYSVAR_STR(data_file_grow_size, pbxt_data_file_grow_size,
6220
  PLUGIN_VAR_READONLY,
6221
  "The amount by which the handle data files (.xtd) grow.",
6222
  NULL, NULL, NULL);
6223
6224
static MYSQL_SYSVAR_STR(row_file_grow_size, pbxt_row_file_grow_size,
6225
  PLUGIN_VAR_READONLY,
6226
  "The amount by which the row pointer files (.xtr) grow.",
6227
  NULL, NULL, NULL);
6228
6229
static MYSQL_SYSVAR_STR(record_write_threshold, pbxt_record_write_threshold,
6230
  PLUGIN_VAR_READONLY,
6231
  "The amount data written to the record files (.xtd and .xtr) before the changes are applied to the database.",
6232
  NULL, NULL, NULL);
6233
6234
static MYSQL_SYSVAR_INT(garbage_threshold, xt_db_garbage_threshold,
6235
	PLUGIN_VAR_OPCMDARG,
6236
	"The percentage of garbage in a repository file before it is compacted.",
6237
	NULL, NULL, XT_DL_DEFAULT_GARBAGE_LEVEL, 0, 100, 1);
6238
6239
static MYSQL_SYSVAR_INT(log_file_count, xt_db_log_file_count,
6240
	PLUGIN_VAR_OPCMDARG,
6241
	"The minimum number of transaction logs used.",
6242
	NULL, NULL, XT_DL_DEFAULT_XLOG_COUNT, 1, 20000, 1);
6243
6244
static MYSQL_SYSVAR_INT(auto_increment_mode, xt_db_auto_increment_mode,
6245
	PLUGIN_VAR_OPCMDARG,
6246
	"The auto-increment mode, 0 = MySQL standard (default), 1 = previous ID's never reused.",
6247
	NULL, NULL, XT_AUTO_INCREMENT_DEF, 0, 1, 1);
6248
6249
/* {RN145} */
6250
static MYSQL_SYSVAR_INT(offline_log_function, xt_db_offline_log_function,
6251
	PLUGIN_VAR_OPCMDARG,
6252
	"Determines what happens to transaction logs when the are moved offline, 0 = recycle logs (default), 1 = delete logs (default on Mac OS X), 2 = keep logs.",
6253
	NULL, NULL, XT_OFFLINE_LOG_FUNCTION_DEF, 0, 2, 1);
6254
6255
/* {RN150} */
6256
static MYSQL_SYSVAR_INT(sweeper_priority, xt_db_sweeper_priority,
6257
	PLUGIN_VAR_OPCMDARG,
6258
	"Determines the priority of the background sweeper process, 0 = low (default), 1 = normal (same as user threads), 2 = high.",
6259
	NULL, NULL, XT_PRIORITY_LOW, XT_PRIORITY_LOW, XT_PRIORITY_HIGH, 1);
6260
6261
#ifndef DEBUG
6262
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6263
	PLUGIN_VAR_OPCMDARG,
6264
	"Enable PBXT support for the XA two-phase commit, default is enabled",
6265
	NULL, NULL, TRUE);
6266
#else
6267
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6268
	PLUGIN_VAR_OPCMDARG,
6269
	"Enable PBXT support for the XA two-phase commit, default is disabled (due to assertion failure in MySQL)",
6270
	/* The problem is, in MySQL an assertion fails in debug mode: 
6271
	 * Assertion failed: (total_ha_2pc == (ulong) opt_bin_log+1), function ha_recover, file handler.cc, line 1557.
6272
     */
6273
	NULL, NULL, FALSE);
6274
#endif
6275
6276
static MYSQL_SYSVAR_INT(index_dirty_threshold, xt_db_index_dirty_threshold,
6277
	PLUGIN_VAR_OPCMDARG,
6278
	"The percentage of the index cache that must be dirty before the index cache is flushed.",
6279
	NULL, NULL, XT_DL_DEFAULT_INDEX_DIRTY_LEVEL, 0, 100, 1);
6280
	
6281
static MYSQL_SYSVAR_INT(flush_log_at_trx_commit, xt_db_flush_log_at_trx_commit,
6282
	PLUGIN_VAR_OPCMDARG,
6283
	"Determines whether the transaction log is written and/or flushed when a transaction is committed (no matter what the setting the log is written and flushed once per second), 0 = no write & no flush, 1 = write & flush (default), 2 = write & no flush.",
6284
	NULL, NULL, 1, 0, 2, 1);
6285
6286
static struct st_mysql_sys_var* pbxt_system_variables[] = {
6287
  MYSQL_SYSVAR(index_cache_size),
6288
  MYSQL_SYSVAR(record_cache_size),
6289
  MYSQL_SYSVAR(log_cache_size),
6290
  MYSQL_SYSVAR(log_file_threshold),
6291
  MYSQL_SYSVAR(transaction_buffer_size),
6292
  MYSQL_SYSVAR(log_buffer_size),
6293
  MYSQL_SYSVAR(checkpoint_frequency),
6294
  MYSQL_SYSVAR(data_log_threshold),
6295
  MYSQL_SYSVAR(data_file_grow_size),
6296
  MYSQL_SYSVAR(row_file_grow_size),
6297
  MYSQL_SYSVAR(record_write_threshold),
6298
  MYSQL_SYSVAR(garbage_threshold),
6299
  MYSQL_SYSVAR(log_file_count),
6300
  MYSQL_SYSVAR(auto_increment_mode),
6301
  MYSQL_SYSVAR(offline_log_function),
6302
  MYSQL_SYSVAR(sweeper_priority),
6303
  MYSQL_SYSVAR(support_xa),
6304
  MYSQL_SYSVAR(index_dirty_threshold),
6305
  MYSQL_SYSVAR(flush_log_at_trx_commit),
6306
  NULL
6307
};
6308
#endif
6309
6310
#ifdef DRIZZLED
6311
DRIZZLE_DECLARE_PLUGIN
6312
{
6313
	DRIZZLE_VERSION_ID,
6314
	"PBXT",
6315
	"1.0",
6316
        "Paul McCullagh, PrimeBase Technologies GmbH",
6317
        "High performance, multi-versioning transactional engine",
6318
        PLUGIN_LICENSE_GPL,
6319
        pbxt_init, /* Plugin Init */
6320
        pbxt_system_variables,          /* system variables                */
6321
        NULL                                            /* config options                  */
6322
}
6323
DRIZZLE_DECLARE_PLUGIN_END;
6324
#else // MySQL case
6325
mysql_declare_plugin(pbxt)
6326
{
6327
        MYSQL_STORAGE_ENGINE_PLUGIN,
6328
        &pbxt_storage_engine,
6329
	"PBXT",
6330
        "Paul McCullagh, PrimeBase Technologies GmbH",
6331
        "High performance, multi-versioning transactional engine",
6332
        PLUGIN_LICENSE_GPL,
6333
        pbxt_init, /* Plugin Init */
6334
        pbxt_end, /* Plugin Deinit */
6335
        0x0001 /* 0.1 */,
6336
        NULL,                       /* status variables                */
6337
#if MYSQL_VERSION_ID >= 50118
6338
        pbxt_system_variables,          /* system variables                */
6339
#else
6340
	NULL,
6341
#endif
6342
	NULL						/* config options                  */
6343
}, {
6344
	MYSQL_INFORMATION_SCHEMA_PLUGIN,
6345
	&pbxt_statitics,
6346
	"PBXT_STATISTICS",
6347
	"Paul McCullagh, PrimeBase Technologies GmbH",
6348
	"PBXT internal system statitics",
6349
	PLUGIN_LICENSE_GPL,
6350
	pbxt_init_statistics,						/* plugin init */
6351
	pbxt_exit_statistics,						/* plugin deinit */
6352
	0x0005,
6353
	NULL,										/* status variables */
6354
	NULL,										/* system variables */
6355
	NULL										/* config options */
6356
}
6357
mysql_declare_plugin_end;
6358
#endif
6359
6360
#if defined(XT_WIN) && defined(XT_COREDUMP)
6361
6362
/*
6363
 * WINDOWS CORE DUMP SUPPORT
6364
 *
6365
 * MySQL supports core dumping on Windows with --core-file command line option. 
6366
 * However it creates dumps with the MiniDumpNormal option which saves only stack traces.
6367
 *
6368
 * We instead (or in addition) create dumps with MiniDumpWithoutOptionalData option
6369
 * which saves all available information. To enable core dumping enable XT_COREDUMP
6370
 * at compile time.
6371
 * In addition, pbxt_crash_debug must be set to TRUE which is the case if XT_CRASH_DEBUG
6372
 * is defined.
6373
 * This switch is also controlled by creating a file called "no-debug" or "crash-debug"
6374
 * in the pbxt database directory.
6375
 */
6376
6377
typedef enum _MINIDUMP_TYPE {
6378
    MiniDumpNormal                         = 0x0000,
6379
    MiniDumpWithDataSegs                   = 0x0001,
6380
    MiniDumpWithFullMemory                 = 0x0002,
6381
    MiniDumpWithHandleData                 = 0x0004,
6382
    MiniDumpFilterMemory                   = 0x0008,
6383
    MiniDumpScanMemory                     = 0x0010,
6384
    MiniDumpWithUnloadedModules            = 0x0020,
6385
    MiniDumpWithIndirectlyReferencedMemory = 0x0040,
6386
    MiniDumpFilterModulePaths              = 0x0080,
6387
    MiniDumpWithProcessThreadData          = 0x0100,
6388
    MiniDumpWithPrivateReadWriteMemory     = 0x0200,
6389
} MINIDUMP_TYPE;
6390
6391
typedef struct _MINIDUMP_EXCEPTION_INFORMATION {
6392
    DWORD ThreadId;
6393
    PEXCEPTION_POINTERS ExceptionPointers;
6394
    BOOL ClientPointers;
6395
} MINIDUMP_EXCEPTION_INFORMATION, *PMINIDUMP_EXCEPTION_INFORMATION;
6396
6397
typedef BOOL (WINAPI *MINIDUMPWRITEDUMP)(
6398
	HANDLE hProcess, 
6399
	DWORD dwPid, 
6400
	HANDLE hFile, 
6401
	MINIDUMP_TYPE DumpType,
6402
	void *ExceptionParam,
6403
	void *UserStreamParam,
6404
	void *CallbackParam
6405
	);
6406
6407
char base_path[_MAX_PATH] = {0};
6408
char dump_path[_MAX_PATH] = {0};
6409
6410
void core_dump(struct _EXCEPTION_POINTERS *pExceptionInfo)
6411
{
6412
	SECURITY_ATTRIBUTES	sa = { sizeof(SECURITY_ATTRIBUTES), 0, 0 };
6413
	int i;
6414
	HMODULE hDll = NULL;
6415
	HANDLE hFile;
6416
	MINIDUMPWRITEDUMP pDump;
6417
	char *end_ptr = base_path;
6418
6419
	MINIDUMP_EXCEPTION_INFORMATION ExInfo, *ExInfoPtr = NULL;
6420
6421
	if (pExceptionInfo) {
6422
		ExInfo.ThreadId = GetCurrentThreadId();
6423
		ExInfo.ExceptionPointers = pExceptionInfo;
6424
		ExInfo.ClientPointers = NULL;
6425
		ExInfoPtr = &ExInfo;
6426
	}
6427
6428
	end_ptr = base_path + strlen(base_path);
6429
6430
	strcat(base_path, "DBGHELP.DLL" );
6431
	hDll = LoadLibrary(base_path);
6432
	*end_ptr = 0;
6433
	if (hDll==NULL) {
6434
		int err;
6435
		err = HRESULT_CODE(GetLastError());
6436
		hDll = LoadLibrary( "DBGHELP.DLL" );
6437
		if (hDll==NULL) {
6438
			err = HRESULT_CODE(GetLastError());
6439
			return;
6440
		}
6441
	}
6442
6443
	pDump = (MINIDUMPWRITEDUMP)GetProcAddress( hDll, "MiniDumpWriteDump" );
6444
	if (!pDump) {
6445
		int err;
6446
		err = HRESULT_CODE(GetLastError());
6447
		return;
6448
	}
6449
6450
	for (i = 1; i < INT_MAX; i++) {
6451
		sprintf(dump_path, "%sPBXTCore%08d.dmp", base_path, i);
6452
		hFile = CreateFile( dump_path, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_NEW,
6453
							FILE_ATTRIBUTE_NORMAL, NULL );
6454
6455
		if ( hFile != INVALID_HANDLE_VALUE )
6456
			break;
6457
6458
		if (HRESULT_CODE(GetLastError()) == ERROR_FILE_EXISTS )
6459
			continue;
6460
6461
		return;
6462
	}
6463
6464
	// write the dump
6465
	BOOL bOK = pDump( GetCurrentProcess(), GetCurrentProcessId(), hFile, 
6466
		MiniDumpWithPrivateReadWriteMemory, ExInfoPtr, NULL, NULL );
6467
6468
	CloseHandle(hFile);
6469
}
6470
6471
LONG crash_filter( struct _EXCEPTION_POINTERS *pExceptionInfo )
6472
{
6473
	core_dump(pExceptionInfo);
6474
	return EXCEPTION_EXECUTE_HANDLER;
6475
}
6476
6477
void register_crash_filter()
6478
{
6479
	SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER) crash_filter );
6480
}
6481
6482
#endif // XT_WIN && XT_COREDUMP