1
/* Copyright (C) 2005 PrimeBase Technologies GmbH
3
* Derived from ha_example.h
4
* Copyright (C) 2003 MySQL AB
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22
* 2005-11-10 Paul McCullagh
26
#ifdef USE_PRAGMA_IMPLEMENTATION
27
#pragma implementation // gcc: Class implementation
30
#include "xt_config.h"
42
#include <drizzled/internal/my_sys.h>
43
#include <drizzled/common.h>
44
#include <drizzled/plugin.h>
45
#include <drizzled/field.h>
46
#include <drizzled/session.h>
47
#include <drizzled/data_home.h>
48
#include <drizzled/error.h>
49
#include <drizzled/table.h>
50
#include <drizzled/session.h>
54
#define my_strdup(a,b) strdup(a)
56
using namespace drizzled;
57
using namespace drizzled::plugin;
59
#define DEFAULT_FILE_EXTENSION ".dfe"
62
#include "mysql_priv.h"
63
#include <mysql/plugin.h>
69
#include "strutil_xt.h"
70
#include "database_xt.h"
75
#include "datadic_xt.h"
77
#include "pbms_enabled.h"
79
#include "tabcache_xt.h"
80
#include "systab_xt.h"
81
#include "xaction_xt.h"
82
#include "backup_xt.h"
86
//#define XT_USE_SYS_PAR_DEBUG_SIZES
87
//#define PBXT_HANDLER_TRACE
88
//#define PBXT_TRACE_RETURN
89
//#define XT_PRINT_INDEX_OPT
90
//#define XT_SHOW_DUMPS_TRACE
91
//#define XT_UNIT_TEST
92
//#define LOAD_TABLE_ON_OPEN
93
//#define CHECK_TABLE_LOADS
95
/* Enable to trace the statements executed by the engine: */
96
//#define TRACE_STATEMENTS
98
/* Enable to print the trace to the stdout, instead of
101
//#define PRINT_STATEMENTS
105
static handler *pbxt_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root);
106
static int pbxt_init(void *p);
107
static int pbxt_end(void *p);
108
static int pbxt_panic(handlerton *hton, enum ha_panic_function flag);
109
static void pbxt_drop_database(handlerton *hton, char *path);
110
static int pbxt_close_connection(handlerton *hton, THD* thd);
111
static int pbxt_commit(handlerton *hton, THD *thd, bool all);
112
static int pbxt_rollback(handlerton *hton, THD *thd, bool all);
113
static int pbxt_prepare(handlerton *hton, THD *thd, bool all);
114
static int pbxt_recover(handlerton *hton, XID *xid_list, uint len);
115
static int pbxt_commit_by_xid(handlerton *hton, XID *xid);
116
static int pbxt_rollback_by_xid(handlerton *hton, XID *xid);
117
static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd);
119
static void ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
120
static void ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share);
121
static void ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
123
#ifdef TRACE_STATEMENTS
125
#ifdef PRINT_STATEMENTS
126
#define STAT_TRACE(y, x) printf("%s: %s\n", y ? y->t_name : "-unknown-", x)
128
#define STAT_TRACE(y, x) xt_ttraceq(y, x)
133
#define STAT_TRACE(y, x)
137
#ifdef PBXT_HANDLER_TRACE
138
#define PBXT_ALLOW_PRINTING
140
#define XT_TRACE_CALL() ha_trace_function(__FUNC__, NULL)
141
#define XT_TRACE_METHOD() ha_trace_function(__FUNC__, pb_share->sh_table_path->ps_path)
143
#ifdef PBXT_TRACE_RETURN
144
#define XT_RETURN(x) do { printf("%d\n", (int) (x)); return (x); } while (0)
145
#define XT_RETURN_VOID do { printf("out\n"); return; } while (0)
147
#define XT_RETURN(x) return (x)
148
#define XT_RETURN_VOID return
153
#define XT_TRACE_CALL()
154
#define XT_TRACE_METHOD()
155
#define XT_RETURN(x) return (x)
156
#define XT_RETURN_VOID return
160
#ifdef PBXT_ALLOW_PRINTING
161
#define XT_PRINT0(y, x) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-"); } while (0)
162
#define XT_PRINT1(y, x, a) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a); } while (0)
163
#define XT_PRINT2(y, x, a, b) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b); } while (0)
164
#define XT_PRINT3(y, x, a, b, c) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b, c); } while (0)
166
#define XT_PRINT0(y, x)
167
#define XT_PRINT1(y, x, a)
168
#define XT_PRINT2(y, x, a, b)
169
#define XT_PRINT3(y, x, a, b, c)
175
handlerton *pbxt_hton;
176
bool pbxt_inited = false; // Variable for checking the init state of hash
177
xtBool pbxt_ignore_case = true;
178
const char *pbxt_extensions[]= { ".xtr", ".xtd", ".xtl", ".xti", ".xt", "", NULL };
179
#ifdef XT_CRASH_DEBUG
180
xtBool pbxt_crash_debug = TRUE;
182
xtBool pbxt_crash_debug = FALSE;
186
/* Variables for pbxt share methods */
187
static xt_mutex_type pbxt_database_mutex; // Prevent a database from being opened while it is being dropped
188
static XTHashTabPtr pbxt_share_tables; // Hash used to track open tables
189
static char *pbxt_index_cache_size;
190
static char *pbxt_record_cache_size;
191
static char *pbxt_log_cache_size;
192
static char *pbxt_log_file_threshold;
193
static char *pbxt_transaction_buffer_size;
194
static char *pbxt_log_buffer_size;
195
static char *pbxt_checkpoint_frequency;
196
static char *pbxt_data_log_threshold;
197
static char *pbxt_data_file_grow_size;
198
static char *pbxt_row_file_grow_size;
199
static char *pbxt_record_write_threshold;
202
// drizzle complains it's not used
203
static my_bool pbxt_support_xa;
204
static XTXactEnumXARec pbxt_xa_enum;
208
#define XT_SHARE_LOCK_WAIT 5000
210
#define XT_SHARE_LOCK_WAIT 500
214
* Lock timeout in 1/1000ths of a second
216
#define XT_SHARE_LOCK_TIMEOUT 30000
219
* -----------------------------------------------------------------------
224
//#define XT_FOR_TEAMDRIVE
226
typedef struct HAVarParams {
227
const char *vp_var; /* Variable name. */
228
const char *vp_def; /* Default value. */
229
const char *vp_min; /* Minimum allowed value. */
230
const char *vp_max4; /* Maximum allowed value on 32-bit processors. */
231
const char *vp_max8; /* Maximum allowed value on 64-bit processors. */
232
} HAVarParamsRec, *HAVarParamsPtr;
234
#ifdef XT_USE_SYS_PAR_DEBUG_SIZES
235
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
236
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
237
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
238
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
239
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
240
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
241
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
242
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "400K", "400K", "2GB", "256TB" };
243
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
244
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
245
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
246
#define XT_DL_DEFAULT_XLOG_COUNT 3
247
#define XT_DL_DEFAULT_GARBAGE_LEVEL 10
249
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
250
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
251
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
252
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
253
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
254
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
255
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
256
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "64MB", "1MB", "2GB", "256TB" };
257
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
258
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
259
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
260
#define XT_DL_DEFAULT_XLOG_COUNT 3
261
#define XT_DL_DEFAULT_GARBAGE_LEVEL 50
264
#define XT_AUTO_INCREMENT_DEF 0
265
#define XT_DL_DEFAULT_INDEX_DIRTY_LEVEL 80
269
/* For debugging on the Mac, we check the re-use logs: */
270
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_RECYCLE_LOGS
272
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_DELETE_LOGS
275
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_RECYCLE_LOGS
278
/* TeamDrive, uses special auto-increment, and
279
* we keep the logs for the moment:
281
#ifdef XT_FOR_TEAMDRIVE
282
#undef XT_OFFLINE_LOG_FUNCTION_DEF
283
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_KEEP_LOGS
284
//#undef XT_AUTO_INCREMENT_DEF
285
//#define XT_AUTO_INCREMENT_DEF 1
288
#ifdef PBXT_HANDLER_TRACE
289
static void ha_trace_function(const char *function, char *table)
291
char func_buf[50], *ptr;
292
XTThreadPtr thread = xt_get_self();
294
if ((ptr = const_cast<char *>(strchr(function, '(')))) {
296
while (ptr > function) {
297
if (!(isalnum(*ptr) || *ptr == '_'))
302
xt_strcpy(50, func_buf, ptr);
303
if ((ptr = strchr(func_buf, '(')))
307
xt_strcpy(50, func_buf, function);
309
printf("%s %s (%s)\n", thread ? thread->t_name : "-unknown-", func_buf, table);
311
printf("%s %s\n", thread ? thread->t_name : "-unknown-", func_buf);
316
* -----------------------------------------------------------------------
321
static xtBool ha_hash_comp(void *key, void *data)
323
XTSharePtr share = (XTSharePtr) data;
325
return strcmp((char *) key, share->sh_table_path->ps_path) == 0;
328
static xtHashValue ha_hash(xtBool is_key, void *key_data)
330
XTSharePtr share = (XTSharePtr) key_data;
333
return xt_ht_hash((char *) key_data);
334
return xt_ht_hash(share->sh_table_path->ps_path);
337
static xtBool ha_hash_comp_ci(void *key, void *data)
339
XTSharePtr share = (XTSharePtr) data;
341
return strcasecmp((char *) key, share->sh_table_path->ps_path) == 0;
344
static xtHashValue ha_hash_ci(xtBool is_key, void *key_data)
346
XTSharePtr share = (XTSharePtr) key_data;
349
return xt_ht_casehash((char *) key_data);
350
return xt_ht_casehash(share->sh_table_path->ps_path);
353
static void ha_open_share(XTThreadPtr self, XTShareRec *share)
355
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
356
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
358
if (!share->sh_table) {
359
share->sh_table = xt_use_table(self, share->sh_table_path, FALSE, FALSE);
360
share->sh_dic_key_count = share->sh_table->tab_dic.dic_key_count;
361
share->sh_dic_keys = share->sh_table->tab_dic.dic_keys;
362
share->sh_recalc_selectivity = FALSE;
365
freer_(); // xt_ht_unlock(pbxt_share_tables)
368
static void ha_close_share(XTThreadPtr self, XTShareRec *share)
372
if ((tab = share->sh_table)) {
373
/* Save this, in case the share is re-opened. */
374
share->sh_min_auto_inc = tab->tab_auto_inc;
376
xt_heap_release(self, tab);
377
share->sh_table = NULL;
380
/* This are only references: */
381
share->sh_dic_key_count = 0;
382
share->sh_dic_keys = NULL;
385
static void ha_cleanup_share(XTThreadPtr self, XTSharePtr share)
387
ha_close_share(self, share);
389
if (share->sh_table_path) {
390
xt_free(self, share->sh_table_path);
391
share->sh_table_path = NULL;
394
if (share->sh_ex_cond) {
395
share->sh_lock.unlock();
396
xt_delete_cond(self, (xt_cond_type *) share->sh_ex_cond);
397
share->sh_ex_cond = NULL;
400
if (share->sh_ex_mutex) {
401
xt_delete_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
402
share->sh_ex_mutex = NULL;
405
xt_free(self, share);
408
static void ha_hash_free(XTThreadPtr self, void *data)
410
XTSharePtr share = (XTSharePtr) data;
412
ha_cleanup_share(self, share);
416
* This structure contains information that is common to all handles.
417
* (i.e. it is table specific).
419
static XTSharePtr ha_get_share(XTThreadPtr self, const char *table_path, bool open_table)
424
xt_ht_lock(self, pbxt_share_tables);
425
pushr_(xt_ht_unlock, pbxt_share_tables);
427
// Check if the table exists...
428
if (!(share = (XTSharePtr) xt_ht_get(self, pbxt_share_tables, (void *) table_path))) {
429
share = (XTSharePtr) xt_calloc(self, sizeof(XTShareRec));
430
pushr_(ha_cleanup_share, share);
432
share->sh_ex_mutex = (xt_mutex_type *) xt_new_mutex(self);
433
share->sh_ex_cond = (xt_cond_type *) xt_new_cond(self);
435
thr_lock_init(&share->sh_lock);
437
share->sh_use_count = 0;
438
share->sh_table_path = (XTPathStrPtr) xt_dup_string(self, table_path);
441
ha_open_share(self, share);
443
popr_(); // Discard ha_cleanup_share(share);
445
xt_ht_put(self, pbxt_share_tables, share);
448
share->sh_use_count++;
449
freer_(); // xt_ht_unlock(pbxt_share_tables)
455
* Free shared information.
457
static void ha_unget_share(XTThreadPtr self, XTSharePtr share)
459
xt_ht_lock(self, pbxt_share_tables);
460
pushr_(xt_ht_unlock, pbxt_share_tables);
462
if (!--share->sh_use_count)
463
xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
465
freer_(); // xt_ht_unlock(pbxt_share_tables)
468
static xtBool ha_unget_share_removed(XTThreadPtr self, XTSharePtr share)
470
xtBool removed = FALSE;
472
xt_ht_lock(self, pbxt_share_tables);
473
pushr_(xt_ht_unlock, pbxt_share_tables);
475
if (!--share->sh_use_count) {
477
xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
480
freer_(); // xt_ht_unlock(pbxt_share_tables)
484
static inline void thd_init_xact(THD *thd, XTThreadPtr self, bool set_table_trans)
486
self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
487
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
488
self->st_auto_commit = (thd_test_options(thd,(OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
489
if (set_table_trans) {
491
self->st_table_trans = FALSE;
493
self->st_table_trans = thd_sql_command(thd) == SQLCOM_LOCK_TABLES;
496
self->st_abort_trans = FALSE;
497
self->st_stat_ended = FALSE;
498
self->st_stat_trans = FALSE;
499
self->st_non_temp_updated = FALSE;
500
XT_PRINT0(self, "xt_xn_begin\n");
501
xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
505
* -----------------------------------------------------------------------
510
xtPublic void xt_ha_unlock_table(XTThreadPtr self, void *share)
512
ha_release_exclusive_use(self, (XTSharePtr) share);
513
ha_unget_share(self, (XTSharePtr) share);
516
xtPublic void xt_ha_close_global_database(XTThreadPtr self)
519
xt_heap_release(self, pbxt_database);
520
pbxt_database = NULL;
525
* Open a PBXT database given the path of a table.
526
* This function also returns the name of the table.
528
* We use the pbxt_database_mutex to lock this
529
* operation to make sure it does not occur while
530
* some other thread is doing a "closeall".
532
xtPublic void xt_ha_open_database_of_table(XTThreadPtr self, XTPathStrPtr XT_UNUSED(table_path))
534
#ifdef XT_USE_GLOBAL_DB
535
if (!self->st_database) {
536
if (!pbxt_database) {
537
xt_open_database(self, mysql_real_data_home, TRUE);
539
* This can be done at the same time as the recovery thread,
540
* strictly speaking I need a lock.
542
if (!pbxt_database) {
543
pbxt_database = self->st_database;
544
xt_heap_reference(self, pbxt_database);
548
xt_use_database(self, pbxt_database, XT_FOR_USER);
551
char db_path[PATH_MAX];
553
xt_strcpy(PATH_MAX, db_path, (char *) table_path);
554
xt_remove_last_name_of_path(db_path);
555
xt_remove_dir_char(db_path);
557
if (self->st_database && xt_tab_compare_paths(self->st_database->db_name, xt_last_name_of_path(db_path)) == 0)
558
/* This thread already has this database open! */
561
/* Auto commit before changing the database: */
562
if (self->st_xact_data) {
563
/* PMC - This probably indicates something strange is happening:
565
* This sequence generates this error:
569
* create temporary table t3 (id int)|
571
* create function f10() returns int
573
* drop temporary table if exists t3;
574
* create temporary table t3 (id int) engine=myisam;
575
* insert into t3 select id from t4;
576
* return (select count(*) from t3);
581
* An error is generated because the same thread is used
582
* to open table t4 (at the start of the functions), and
583
* then to drop table t3. To drop t3 we need to
584
* switch the database, so we land up here!
586
xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
588
if (!xt_xn_commit(self))
593
xt_lock_mutex(self, &pbxt_database_mutex);
594
pushr_(xt_unlock_mutex, &pbxt_database_mutex);
595
xt_open_database(self, db_path, FALSE);
596
freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
600
xtPublic XTThreadPtr xt_ha_set_current_thread(THD *thd, XTExceptionPtr e)
603
static int ha_thread_count = 0, ha_id;
605
if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
606
// const Security_context *sctx;
610
ha_id = ++ha_thread_count;
611
sprintf(ha_id_str, "_%d", ha_id);
612
xt_strcpy(120,name,"user"); // TODO: Fix this hack
614
sctx = &thd->main_security_ctx;
617
xt_strcpy(120, name, sctx->user);
618
xt_strcat(120, name, "@");
623
xt_strcat(120, name, sctx->host);
625
xt_strcat(120, name, sctx->ip);
626
else if (thd->proc_info)
627
xt_strcat(120, name, (char *) thd->proc_info);
629
xt_strcat(120, name, "system");
631
xt_strcat(120, name, ha_id_str);
632
if (!(self = xt_create_thread(name, FALSE, TRUE, e)))
635
self->st_xact_mode = XT_XACT_REPEATABLE_READ;
636
*thd->getEngineData(pbxt_hton) = (void *) self;
641
xtPublic void xt_ha_close_connection(THD* thd)
645
if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
646
*thd->getEngineData(pbxt_hton) = NULL;
647
xt_free_thread(self);
651
xtPublic XTThreadPtr xt_ha_thd_to_self(THD *thd)
653
return (XTThreadPtr) *thd->getEngineData(pbxt_hton);
657
* -----------------------------------------------------------------------
663
* In PBXT, as in MySQL: thread == connection.
665
* So we simply attach a PBXT thread to a MySQL thread.
667
static XTThreadPtr ha_set_current_thread(THD *thd, int *err)
672
if (!(self = xt_ha_set_current_thread(thd, &e))) {
673
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
680
xtPublic int xt_ha_pbxt_to_mysql_error(int xt_err)
685
case XT_ERR_DUPLICATE_KEY:
686
return HA_ERR_FOUND_DUPP_KEY;
687
case XT_ERR_DEADLOCK:
688
return HA_ERR_LOCK_DEADLOCK;
689
case XT_ERR_RECORD_CHANGED:
690
/* If we generate HA_ERR_RECORD_CHANGED instead of HA_ERR_LOCK_WAIT_TIMEOUT
691
* then sysbench does not work because it does not handle this error.
693
//return HA_ERR_LOCK_WAIT_TIMEOUT; // but HA_ERR_RECORD_CHANGED is the correct error for a optimistic lock failure.
694
return HA_ERR_RECORD_CHANGED;
695
case XT_ERR_LOCK_TIMEOUT:
696
return HA_ERR_LOCK_WAIT_TIMEOUT;
697
case XT_ERR_TABLE_IN_USE:
698
return HA_ERR_WRONG_COMMAND;
699
case XT_ERR_TABLE_NOT_FOUND:
700
return HA_ERR_NO_SUCH_TABLE;
701
case XT_ERR_TABLE_EXISTS:
702
return HA_ERR_TABLE_EXIST;
703
case XT_ERR_CANNOT_CHANGE_DB:
704
return ER_TRG_IN_WRONG_SCHEMA;
705
case XT_ERR_COLUMN_NOT_FOUND:
706
return HA_ERR_CANNOT_ADD_FOREIGN;
707
case XT_ERR_NO_REFERENCED_ROW:
708
case XT_ERR_REF_TABLE_NOT_FOUND:
709
case XT_ERR_REF_TYPE_WRONG:
710
return HA_ERR_NO_REFERENCED_ROW;
711
case XT_ERR_ROW_IS_REFERENCED:
712
return HA_ERR_ROW_IS_REFERENCED;
713
case XT_ERR_COLUMN_IS_NOT_NULL:
714
case XT_ERR_INCORRECT_NO_OF_COLS:
715
case XT_ERR_FK_ON_TEMP_TABLE:
716
case XT_ERR_FK_REF_TEMP_TABLE:
717
return HA_ERR_CANNOT_ADD_FOREIGN;
718
case XT_ERR_DUPLICATE_FKEY:
719
return HA_ERR_FOREIGN_DUPLICATE_KEY;
720
case XT_ERR_RECORD_DELETED:
721
return HA_ERR_RECORD_DELETED;
723
return(-1); // Unknown error
726
xtPublic int xt_ha_pbxt_thread_error_for_mysql(THD *thd, const XTThreadPtr self, int ignore_dup_key)
728
int xt_err = self->t_exception.e_xt_err;
729
xtBool dup_key = FALSE;
731
XT_PRINT2(self, "xt_ha_pbxt_thread_error_for_mysql xt_err=%d auto commit=%d\n", (int) xt_err, (int) self->st_auto_commit);
735
case XT_ERR_DUPLICATE_KEY:
736
case XT_ERR_DUPLICATE_FKEY:
737
/* Let MySQL call rollback as and when it wants to for duplicate
740
* In addition, we are not allowed to do an auto-rollback
741
* inside a sub-statement (function() or procedure())
746
* create table t3 (c1 char(1) primary key not null)|
748
* create function bug12379()
751
* insert into t3 values('X');
752
* insert into t3 values('X');
760
* Not doing an auto-rollback should solve this problem in the
761
* case of duplicate key (but not in others - like deadlock)!
762
* I don't think this situation is handled correctly by MySQL.
765
/* If we are in auto-commit mode (and we are not ignoring
766
* duplicate keys) then rollback the transaction automatically.
769
if (!ignore_dup_key && self->st_auto_commit)
770
goto abort_transaction;
772
case XT_ERR_DEADLOCK:
773
case XT_ERR_NO_REFERENCED_ROW:
774
case XT_ERR_ROW_IS_REFERENCED:
775
goto abort_transaction;
776
case XT_ERR_RECORD_CHANGED:
777
/* MySQL also handles the locked error. NOTE: There is no automatic
782
xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
784
/* PMC 2006-08-30: It should be that this is not necessary!
786
* It is only necessary to call ha_rollback() if the engine
787
* aborts the transaction.
789
* On the other hand, I shouldn't need to rollback the
790
* transaction because, if I return an error, MySQL
791
* should do it for me.
793
* Unfortunately, when auto-commit is off, MySQL does not
794
* rollback automatically (for example when a deadlock
797
* And when we have a multi update we cannot rely on this
798
* either (see comment above).
800
if (self->st_xact_data) {
803
* A result of the "st_abort_trans = TRUE" below is that
804
* the following code results in an empty set.
805
* The reason is "ignore_dup_key" is not set so
806
* the duplicate key leads to an error which causes
807
* the transaction to be aborted.
808
* The delayed inserts are all execute in one transaction.
811
* c1 INT(11) NOT NULL AUTO_INCREMENT,
812
* c2 INT(11) DEFAULT NULL,
816
* INSERT DELAYED INTO t1 VALUES(NULL, 11), (NULL, 12);
817
* INSERT DELAYED INTO t1 VALUES(14, 91);
818
* INSERT DELAYED INTO t1 VALUES (NULL, 92), (NULL, 93);
822
if (self->st_lock_count == 0) {
823
/* No table locks, must rollback immediately
824
* (there will be no possibility later!
826
XT_PRINT1(self, "xt_xn_rollback xt_err=%d\n", xt_err);
827
if (!xt_xn_rollback(self))
828
xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
831
/* Locks are held on tables.
832
* Only rollback after locks are released.
834
/* I do not think this is required, because
835
* I tell mysql to rollback below,
836
* besides it is a hack!
837
self->st_auto_commit = TRUE;
839
self->st_abort_trans = TRUE;
841
/* Only tell MySQL to rollback if we automatically rollback.
842
* Note: calling this with (thd, FALSE), cause sp.test to fail.
846
thd_mark_transaction_to_rollback(thd, TRUE);
851
return xt_ha_pbxt_to_mysql_error(xt_err);
854
static void ha_conditional_close_database(XTThreadPtr self, XTThreadPtr other_thr, void *db)
856
if (other_thr->st_database == (XTDatabaseHPtr) db)
857
xt_unuse_database(self, other_thr);
861
* This is only called from drop database, so we know that
862
* no thread is actually using the database. This means that it
863
* must be safe to close the database.
865
xtPublic void xt_ha_all_threads_close_database(XTThreadPtr self, XTDatabaseHPtr db)
867
xt_lock_mutex(self, &pbxt_database_mutex);
868
pushr_(xt_unlock_mutex, &pbxt_database_mutex);
869
xt_do_to_all_threads(self, ha_conditional_close_database, db);
870
freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
873
static int ha_log_pbxt_thread_error_for_mysql(int ignore_dup_key)
875
return xt_ha_pbxt_thread_error_for_mysql(current_thd, myxt_get_self(), ignore_dup_key);
879
* -----------------------------------------------------------------------
883
static xtWord8 ha_set_variable(char **value, HAVarParamsPtr vp)
890
*value = getenv(vp->vp_var);
892
*value = (char *) vp->vp_def;
893
result = xt_byte_size_to_int8(*value);
894
mi = (xtWord8) xt_byte_size_to_int8(vp->vp_min);
897
*value = (char *) vp->vp_min;
899
if (sizeof(size_t) == 8)
900
mm = (char *) vp->vp_max8;
902
mm = (char *) vp->vp_max4;
903
ma = (xtWord8) xt_byte_size_to_int8(mm);
911
static void pbxt_call_init(XTThreadPtr self)
913
xtInt8 index_cache_size;
914
xtInt8 record_cache_size;
915
xtInt8 log_cache_size;
916
xtInt8 log_file_threshold;
917
xtInt8 transaction_buffer_size;
918
xtInt8 log_buffer_size;
919
xtInt8 checkpoint_frequency;
920
xtInt8 data_log_threshold;
921
xtInt8 data_file_grow_size;
922
xtInt8 row_file_grow_size;
923
xtInt8 record_write_threshold;
925
xt_logf(XT_NT_INFO, "PrimeBase XT (PBXT) Engine %s loaded...\n", xt_get_version());
926
xt_logf(XT_NT_INFO, "Paul McCullagh, PrimeBase Technologies GmbH, http://www.primebase.org\n");
928
index_cache_size = ha_set_variable(&pbxt_index_cache_size, &vp_index_cache_size);
929
record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
930
log_cache_size = ha_set_variable(&pbxt_log_cache_size, &vp_log_cache_size);
931
log_file_threshold = ha_set_variable(&pbxt_log_file_threshold, &vp_log_file_threshold);
932
transaction_buffer_size = ha_set_variable(&pbxt_transaction_buffer_size, &vp_transaction_buffer_size);
933
log_buffer_size = ha_set_variable(&pbxt_log_buffer_size, &vp_log_buffer_size);
934
checkpoint_frequency = ha_set_variable(&pbxt_checkpoint_frequency, &vp_checkpoint_frequency);
935
data_log_threshold = ha_set_variable(&pbxt_data_log_threshold, &vp_data_log_threshold);
936
data_file_grow_size = ha_set_variable(&pbxt_data_file_grow_size, &vp_data_file_grow_size);
937
row_file_grow_size = ha_set_variable(&pbxt_row_file_grow_size, &vp_row_file_grow_size);
938
record_write_threshold = ha_set_variable(&pbxt_record_write_threshold, &vp_record_write_threshold);
940
xt_db_log_file_threshold = (xtLogOffset) log_file_threshold;
941
xt_db_log_buffer_size = (size_t) xt_align_offset(log_buffer_size, 512);
942
xt_db_transaction_buffer_size = (size_t) xt_align_offset(transaction_buffer_size, 512);
943
xt_db_checkpoint_frequency = (size_t) checkpoint_frequency;
944
xt_db_data_log_threshold = (off_t) data_log_threshold;
945
xt_db_data_file_grow_size = (size_t) data_file_grow_size;
946
xt_db_row_file_grow_size = (size_t) row_file_grow_size;
947
xt_db_record_write_threshold = (size_t) record_write_threshold;
950
pbxt_ignore_case = TRUE;
952
pbxt_ignore_case = lower_case_table_names != 0;
954
if (pbxt_ignore_case)
955
pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp_ci, ha_hash_ci, ha_hash_free, TRUE, FALSE);
957
pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp, ha_hash, ha_hash_free, TRUE, FALSE);
960
xt_lock_installation(self, mysql_real_data_home);
961
XTSystemTableShare::startUp(self);
962
xt_init_databases(self);
963
xt_ind_init(self, (size_t) index_cache_size);
964
xt_tc_init(self, (size_t) record_cache_size);
965
xt_xlog_init(self, (size_t) log_cache_size);
968
static void pbxt_call_exit(XTThreadPtr self)
970
xt_logf(XT_NT_INFO, "PrimeBase XT Engine shutdown...\n");
972
#ifdef TRACE_STATEMENTS
975
#ifdef XT_USE_GLOBAL_DB
976
xt_ha_close_global_database(self);
979
//xt_stop_database_threads(self, FALSE);
980
xt_stop_database_threads(self, TRUE);
982
xt_stop_database_threads(self, TRUE);
984
/* This will tell the freeer to quit ASAP: */
985
xt_quit_freeer(self);
986
/* We conditional stop the freeer here, because if we are
987
* in startup, then the free will be hanging.
990
* This problem has been solved by MySQL!
992
xt_stop_freeer(self);
993
xt_exit_databases(self);
994
XTSystemTableShare::shutDown(self);
998
xt_unlock_installation(self, mysql_real_data_home);
1000
if (pbxt_share_tables) {
1001
xt_free_hashtable(self, pbxt_share_tables);
1002
pbxt_share_tables = NULL;
1007
* Shutdown the PBXT sub-system.
1009
static void ha_exit(XTThreadPtr self)
1011
xt_xres_terminate_recovery(self);
1013
/* Wrap things up... */
1014
xt_unuse_database(self, self); /* Just in case the main thread has a database in use (for testing)? */
1015
/* This may cause the streaming engine to cleanup connections and
1016
* tables belonging to this engine. This in turn may require some of
1017
* the stuff below (like xt_create_thread() called from pbxt_close_table()! */
1021
pbxt_call_exit(self);
1022
xt_exit_threading(self);
1025
xt_p_mutex_destroy(&pbxt_database_mutex);
1026
pbxt_inited = false;
1030
* Outout the PBXT status. Return FALSE on error.
1033
bool PBXTStorageEngine::show_status(Session *thd, stat_print_fn *stat_print, enum ha_stat_type)
1035
static bool pbxt_show_status(handlerton *XT_UNUSED(hton), THD* thd,
1036
stat_print_fn* stat_print,
1037
enum ha_stat_type XT_UNUSED(stat_type))
1042
XTStringBufferRec strbuf = { 0, 0, 0 };
1043
bool not_ok = FALSE;
1045
if (!(self = ha_set_current_thread(thd, &err)))
1048
#ifdef XT_SHOW_DUMPS_TRACE
1049
//if (pbxt_database)
1050
// xt_dump_xlogs(pbxt_database, 0);
1051
xt_trace("// %s - dump\n", xt_trace_clock_diff(NULL));
1054
#ifdef XT_TRACK_CONNECTIONS
1055
xt_dump_conn_tracking();
1059
xt_unit_test_async_task(self);
1063
myxt_get_status(self, &strbuf);
1071
if (stat_print(thd, "PBXT", 4, "", 0, strbuf.sb_cstring, (uint) strbuf.sb_len))
1074
xt_sb_set_size(self, &strbuf, 0);
1080
* Initialize the PBXT sub-system.
1082
* return 1 on error, else 0.
1085
static int pbxt_init(module::Context ®istry)
1087
static int pbxt_init(void *p)
1092
XT_PRINT0(NULL, "pbxt_init\n");
1094
if (sizeof(xtWordPS) != sizeof(void *)) {
1095
printf("PBXT: This won't work, I require that sizeof(xtWordPS) == sizeof(void *)!\n");
1099
/* GOTCHA: This will "detect" if are loading the plug-in
1100
* with different --with-debug option to MySQL.
1102
* In this case, you will get an error when loading the
1103
* library that some symbol was not found.
1105
void *dummy = my_malloc(100, MYF(0));
1106
my_free((byte *) dummy, MYF(0));
1109
XTThreadPtr self = NULL;
1111
xt_p_mutex_init_with_autoname(&pbxt_database_mutex, NULL);
1114
pbxt_hton= new PBXTStorageEngine(std::string("PBXT"));
1115
registry.add(pbxt_hton);
1117
pbxt_hton = (handlerton *) p;
1118
pbxt_hton->state = SHOW_OPTION_YES;
1119
pbxt_hton->db_type = DB_TYPE_PBXT; // Wow! I have my own!
1120
pbxt_hton->close_connection = pbxt_close_connection; /* close_connection, cleanup thread related data. */
1121
pbxt_hton->commit = pbxt_commit; /* commit */
1122
pbxt_hton->rollback = pbxt_rollback; /* rollback */
1123
if (pbxt_support_xa) {
1124
pbxt_hton->prepare = pbxt_prepare;
1125
pbxt_hton->recover = pbxt_recover;
1126
pbxt_hton->commit_by_xid = pbxt_commit_by_xid;
1127
pbxt_hton->rollback_by_xid = pbxt_rollback_by_xid;
1130
pbxt_hton->prepare = NULL;
1131
pbxt_hton->recover = NULL;
1132
pbxt_hton->commit_by_xid = NULL;
1133
pbxt_hton->rollback_by_xid = NULL;
1135
pbxt_hton->create = pbxt_create_handler; /* Create a new handler */
1136
pbxt_hton->drop_database = pbxt_drop_database; /* Drop a database */
1137
pbxt_hton->panic = pbxt_panic; /* Panic call */
1138
pbxt_hton->show_status = pbxt_show_status;
1139
pbxt_hton->flags = HTON_NO_FLAGS; /* HTON_CAN_RECREATE - Without this flags TRUNCATE uses delete_all_rows() */
1140
pbxt_hton->slot = (uint)-1; /* assign invald value, so we know when it's inited later */
1141
pbxt_hton->start_consistent_snapshot = pbxt_start_consistent_snapshot;
1142
#if defined(MYSQL_SUPPORTS_BACKUP) && defined(XT_ENABLE_ONLINE_BACKUP)
1143
pbxt_hton->get_backup_engine = pbxt_backup_engine;
1146
if (!xt_init_logging()) /* Initialize logging */
1150
PBMSResultRec result;
1151
if (!pbms_initialize("PBXT", false, &result)) {
1152
xt_logf(XT_NT_ERROR, "pbms_initialize() Error: %s", result.mr_message);
1157
if (!xt_init_memory()) /* Initialize memory */
1160
self = xt_init_threading(); /* Create the main self: */
1167
/* Initialize all systems */
1168
pbxt_call_init(self);
1170
/* Conditional unit test: */
1172
//xt_unit_test_create_threads(self);
1173
//xt_unit_test_read_write_locks(self);
1174
//xt_unit_test_mutex_locks(self);
1177
/* {OPEN-DB-SWEEPER-WAIT}
1178
* I have to start the freeer before I open and recover the database
1179
* because it we run out of cache while waiting for the sweeper
1182
xt_start_freeer(self);
1184
/* This function is called with LOCK_plugin locked.
1185
* This prevents the opening of .frm files, which
1186
* is required for recovery.
1187
* Our solution is to start reovery in a thread
1188
* so that it can run after LOCK_plugin is released.
1190
xt_xres_start_database_recovery(self);
1193
xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
1199
/* {FREEER-HANG} The free-er will be hung in:
1200
#0 0x91fc6a2e in semaphore_wait_signal_trap
1201
#1 0x91fce505 in pthread_mutex_lock
1202
#2 0x00489633 in safe_mutex_lock at thr_mutex.c:149
1203
#3 0x002dfca9 in plugin_thdvar_init at sql_plugin.cc:2398
1204
#4 0x000d6a12 in THD::init at sql_class.cc:715
1205
#5 0x000de9d3 in THD::THD at sql_class.cc:597
1206
#6 0x000debe1 in THD::THD at sql_class.cc:631
1207
#7 0x00e207a4 in myxt_create_thread at myxt_xt.cc:2666
1208
#8 0x00e3134b in tabc_fr_run_thread at tabcache_xt.cc:982
1209
#9 0x00e422ca in xt_thread_main at thread_xt.cc:1006
1210
#10 0x91ff7c55 in _pthread_start
1211
#11 0x91ff7b12 in thread_start
1213
* so it is not good trying to stop it here!
1215
* With regard to this problem, see {OPEN-DB-SWEEPER-WAIT}
1216
* Due to this problem, I will probably have to hack
1217
* the mutex so that the freeer can get started...
1219
* NOPE! problem has gone in 6.0.9. Also not a problem in
1223
/* {OPEN-DB-SWEEPER-WAIT}
1224
* I have to stop the freeer here because it was
1225
* started before opening the database.
1228
/* {FREEER-HANG-ON-INIT-ERROR}
1229
* pbxt_init is called with LOCK_plugin and if it fails and tries to exit
1230
* the freeer here it hangs because the freeer calls THD::~THD which tries
1231
* to aquire the same lock and hangs. OTOH MySQL calls pbxt_end() after
1232
* an unsuccessful call to pbxt_init, so we defer cleaup, except
1235
xt_free_thread(self);
1238
xt_free_thread(self);
1240
XT_RETURN(init_err);
1253
void PBXTStorageEngine::shutdownPlugin()
1262
/* This flag also means "shutting down". */
1263
pbxt_inited = false;
1264
self = xt_create_thread("TempForEnd", FALSE, TRUE, &e);
1266
self->t_main = TRUE;
1272
PBXTStorageEngine::~PBXTStorageEngine()
1274
/* We do nothing here, because it is now all done in shutdownPlugin(). */
1278
* The following query from the DBT1 test is VERY slow
1279
* if we do not set HA_READ_ORDER.
1280
* The reason is that it must scan all duplicates, then
1283
* SELECT o_id, o_carrier_id, o_entry_d, o_ol_cnt
1284
* FROM orders FORCE INDEX (o_w_id)
1288
* ORDER BY o_id DESC limit 1;
1291
//#define FLAGS_ARE_READ_DYNAMICALLY
1293
uint32_t PBXTStorageEngine::index_flags(enum ha_key_alg) const
1295
/* It would be nice if the dynamic version of this function works,
1296
* but it does not. MySQL loads this information when the table is openned,
1297
* and then it is fixed.
1299
* The problem is, I have had to remove the HA_READ_ORDER option although
1300
* it applies to PBXT. PBXT returns entries in index order during an index
1301
* scan in _almost_ all cases.
1303
* A number of cases are demostrated here: [(11)]
1305
* If involves the following conditions:
1306
* - a SELECT FOR UPDATE, UPDATE or DELETE statement
1307
* - an ORDER BY, or join that requires the sort order
1308
* - another transaction which updates the index while it is being
1311
* In this "obscure" case, the index scan may return index
1312
* entries in the wrong order.
1314
#ifdef FLAGS_ARE_READ_DYNAMICALLY
1315
/* If were are in an update (SELECT FOR UPDATE, UPDATE or DELETE), then
1316
* it may be that we return the rows from an index in the wrong
1317
* order! This is due to the fact that update reads wait for transactions
1318
* to commit and this means that index entries may change position during
1321
if (pb_open_tab && pb_open_tab->ot_for_update)
1322
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
1323
/* If I understand HA_KEYREAD_ONLY then this means I do not
1324
* need to fetch the record associated with an index
1327
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_READ_RANGE | HA_KEYREAD_ONLY);
1329
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
1334
* Kill the PBXT thread associated with the MySQL thread.
1336
int PBXTStorageEngine::close_connection(Session *thd)
1338
PBXTStorageEngine * const hton = this;
1342
if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1343
*thd->getEngineData(pbxt_hton) = NULL;
1344
/* Required because freeing the thread could cause
1345
* free of database which could call xt_close_file_ns()!
1348
xt_free_thread(self);
1354
* Currently does nothing because it was all done
1355
* when the last PBXT table was removed from the
1358
void PBXTStorageEngine::drop_database(char *)
1364
* NOTES ON TRANSACTIONS:
1366
* 1. If self->st_lock_count == 0 and transaction can be ended immediately.
1367
* If not, we must wait until the last lock is released on the last handler
1368
* to ensure that the tables are flushed before the transaction is
1369
* committed or aborted.
1371
* 2. all (below) indicates, within a BEGIN/END (i.e. auto_commit off) whether
1372
* the statement or the entire transation is being terminated.
1373
* We currently ignore statement termination.
1375
* 3. If in BEGIN/END we must call ha_rollback() if we abort the transaction
1378
* NOTE ON CONSISTENT SNAPSHOTS:
1380
* PBXT itself doesn't need this functiona as its transaction mechanism provides
1381
* consistent snapshots for all transactions by default. This function is needed
1382
* only for multi-engine cases like this:
1384
* CREATE TABLE t1 ... ENGINE=INNODB
1385
* CREATE TABLE t2 ... ENGINE=PBXT
1386
* START TRANSACTION WITH CONSISTENT SNAPSHOT
1387
* SELECT * FROM t1 <-- at this point we need to know about the snapshot
1391
static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd)
1394
XTThreadPtr self = ha_set_current_thread(thd, &err);
1396
if (!self->st_database && pbxt_database) {
1397
xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1400
thd_init_xact(thd, self, true);
1402
if (xt_xn_begin(self)) {
1403
trans_register_ha(thd, TRUE, hton);
1405
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1409
* As of MySQL 5.1.41 the return value is not checked, so the server might assume
1410
* everything is fine even it isn't. InnoDB returns 0 on success.
1417
* Commit the PBXT transaction of the given thread.
1418
* thd is the MySQL thread structure.
1419
* pbxt_thr is a pointer the the PBXT thread structure.
1422
int PBXTStorageEngine::commit(Session *thd, bool all)
1424
PBXTStorageEngine * const hton = this;
1428
if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1429
XT_PRINT2(self, "%s pbxt_commit all=%d\n", all ? "END CONN XACT" : "END STAT", all);
1431
if (self->st_xact_data) {
1432
/* There are no table locks, commit immediately in all cases
1433
* except when this is a statement commit with an explicit
1434
* transaction (!all && !self->st_auto_commit).
1436
if (all || self->st_auto_commit) {
1437
XT_PRINT0(self, "xt_xn_commit in pbxt_commit\n");
1439
if (!xt_xn_commit(self))
1440
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1444
self->st_stat_trans = FALSE;
1449
int PBXTStorageEngine::rollback(Session *thd, bool all)
1451
PBXTStorageEngine * const hton = this;
1455
if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1456
XT_PRINT2(self, "%s pbxt_rollback all=%d\n", all ? "CONN END XACT" : "STAT END", all);
1458
if (self->st_xact_data) {
1459
/* There are no table locks, rollback immediately in all cases
1460
* except when this is a statement commit with an explicit
1461
* transaction (!all && !self->st_auto_commit).
1463
* Note, the only reason for a rollback of a operation is
1464
* due to an error. In this case PBXT has already
1465
* undone the effects of the operation.
1467
* However, this is not the same as statement rollback
1468
* which can involve a number of operations.
1470
* TODO: Implement statement rollback.
1472
if (all || self->st_auto_commit) {
1473
XT_PRINT0(self, "xt_xn_rollback\n");
1474
if (!xt_xn_rollback(self))
1475
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1479
self->st_stat_trans = FALSE;
1484
Cursor *PBXTStorageEngine::create(Table& table)
1486
return new ha_pbxt(*this, table);
1490
* -----------------------------------------------------------------------
1497
static int pbxt_prepare(handlerton *hton, THD *thd, bool all)
1503
if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1504
XT_PRINT1(self, "pbxt_commit all=%d\n", all);
1506
if (self->st_xact_data) {
1507
/* There are no table locks, commit immediately in all cases
1508
* except when this is a statement commit with an explicit
1509
* transaction (!all && !self->st_auto_commit).
1511
if (all || self->st_auto_commit) {
1514
XT_PRINT0(self, "xt_xn_prepare in pbxt_prepare\n");
1515
thd_get_xid(thd, (MYSQL_XID*) &xid);
1517
if (!xt_xn_prepare(xid.length(), (xtWord1 *) &xid, self))
1518
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1525
static XTThreadPtr ha_temp_open_global_database(handlerton *hton, THD **ret_thd, int *temp_thread, const char *thread_name, int *err)
1528
XTThreadPtr self = NULL;
1531
if ((thd = current_thd))
1532
self = (XTThreadPtr) *thd_ha_data(thd, hton);
1534
//thd = (THD *) myxt_create_thread();
1535
//*temp_thread |= 2;
1541
if (!(self = xt_create_thread(thread_name, FALSE, TRUE, &e))) {
1542
*err = xt_ha_pbxt_to_mysql_error(e.e_xt_err);
1543
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1549
xt_xres_wait_for_recovery(self, XT_RECOVER_DONE);
1552
xt_open_database(self, mysql_real_data_home, TRUE);
1555
*err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1556
if ((*temp_thread & 1))
1557
xt_free_thread(self);
1558
if (*temp_thread & 2)
1559
myxt_destroy_thread(thd, FALSE);
1568
static void ha_temp_close_database(XTThreadPtr self, THD *thd, int temp_thread)
1570
xt_unuse_database(self, self);
1571
if (temp_thread & 1)
1572
xt_free_thread(self);
1573
if (temp_thread & 2)
1574
myxt_destroy_thread(thd, TRUE);
1577
/* Return all prepared transactions, found during recovery.
1578
* This function returns a count. If len is returned, the
1579
* function will be called again.
1581
static int pbxt_recover(handlerton *hton, XID *xid_list, uint len)
1587
XTXactPreparePtr xap;
1591
if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRecover", &err)))
1594
db = self->st_database;
1596
for (count=0; count<len; count++) {
1597
xap = xt_xn_enum_xa_data(db, &pbxt_xa_enum);
1600
memcpy(&xid_list[count], xap->xp_xa_data, xap->xp_data_len);
1603
ha_temp_close_database(self, thd, temp_thread);
1607
static int pbxt_commit_by_xid(handlerton *hton, XID *xid)
1613
XTXactPreparePtr xap;
1618
if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForCommitXA", &err)))
1620
db = self->st_database;
1622
if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1623
if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1624
self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED; // Prepared transactions cannot be swept!
1625
if (!xt_xn_commit(self))
1626
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1628
xt_xn_delete_xa_data(db, xap, TRUE, self);
1631
ha_temp_close_database(self, thd, temp_thread);
1635
static int pbxt_rollback_by_xid(handlerton *hton, XID *xid)
1641
XTXactPreparePtr xap;
1646
if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRollbackXA", &err)))
1648
db = self->st_database;
1650
if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1651
if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1652
self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED; // Prepared transactions cannot be swept!
1653
if (!xt_xn_rollback(self))
1654
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1656
xt_xn_delete_xa_data(db, xap, TRUE, self);
1659
ha_temp_close_database(self, thd, temp_thread);
1666
* -----------------------------------------------------------------------
1667
* HANDLER LOCKING FUNCTIONS
1669
* These functions are used get a lock on all handles of a particular table.
1673
static void ha_add_to_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1675
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1676
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1678
handler->pb_ex_next = share->sh_handlers;
1679
handler->pb_ex_prev = NULL;
1680
if (share->sh_handlers)
1681
share->sh_handlers->pb_ex_prev = handler;
1682
share->sh_handlers = handler;
1684
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1687
static void ha_remove_from_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1689
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1690
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1692
/* Move front pointer: */
1693
if (share->sh_handlers == handler)
1694
share->sh_handlers = handler->pb_ex_next;
1696
/* Remove from list: */
1697
if (handler->pb_ex_prev)
1698
handler->pb_ex_prev->pb_ex_next = handler->pb_ex_next;
1699
if (handler->pb_ex_next)
1700
handler->pb_ex_next->pb_ex_prev = handler->pb_ex_prev;
1702
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1706
* Aquire exclusive use of a table, by waiting for all
1707
* threads to complete use of all handlers of the table.
1708
* At the same time we hold up all threads
1709
* that want to use handlers belonging to the table.
1711
* But we do not hold up threads that close the handlers.
1713
static void ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1716
time_t end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1718
XT_PRINT1(self, "ha_aquire_exclusive_use (%s) PBXT X lock\n", share->sh_table_path->ps_path);
1719
/* GOTCHA: It is possible to hang here, if you hold
1720
* onto the sh_ex_mutex lock, before we really
1721
* have the exclusive lock (i.e. before all
1722
* handlers are no longer in use.
1723
* The reason is, because reopen() is not possible
1724
* when some other thread holds sh_ex_mutex.
1725
* So this can prevent a thread from completing its
1726
* use of a handler, when prevents exclusive use
1729
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1730
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1732
/* Wait until we can get an exclusive lock: */
1733
while (share->sh_table_lock) {
1734
xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1735
if (time(NULL) > end_time) {
1736
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1737
xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1741
/* This tells readers (and other exclusive lockers) that someone has an exclusive lock. */
1742
share->sh_table_lock = TRUE;
1744
/* Wait for all open handlers use count to go to 0 */
1746
handler = share->sh_handlers;
1748
if (handler == mine || !handler->pb_ex_in_use)
1749
handler = handler->pb_ex_next;
1751
/* Wait a bit, and try again: */
1752
xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1753
if (time(NULL) > end_time) {
1754
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1755
xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1757
/* Handler may have been freed, check from the begining again: */
1762
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1766
* If you have exclusively locked the table, you can close all handler
1769
* Call ha_close_open_tables() to get an exclusive lock.
1771
static void ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1775
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1776
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1778
/* Now that we know no handler is in use, we can close all the
1781
handler = share->sh_handlers;
1783
if (handler != mine && handler->pb_open_tab) {
1784
xt_db_return_table_to_pool_ns(handler->pb_open_tab);
1785
handler->pb_open_tab = NULL;
1787
handler = handler->pb_ex_next;
1790
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1793
#ifdef PBXT_ALLOW_PRINTING
1794
static void ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share)
1796
static void ha_release_exclusive_use(XTThreadPtr XT_UNUSED(self), XTSharePtr share)
1799
XT_PRINT1(self, "ha_release_exclusive_use (%s) PBXT X UNLOCK\n", share->sh_table_path->ps_path);
1800
xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1801
share->sh_table_lock = FALSE;
1802
xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1803
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1806
static xtBool ha_wait_for_shared_use(ha_pbxt *mine, XTSharePtr share)
1808
time_t end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1810
XT_PRINT1(xt_get_self(), "ha_wait_for_shared_use (%s) share lock wait...\n", share->sh_table_path->ps_path);
1811
mine->pb_ex_in_use = 0;
1812
xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1813
while (share->sh_table_lock) {
1814
/* Wake up the exclusive locker (may be waiting). He can try to continue: */
1815
xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1817
if (!xt_timed_wait_cond(NULL, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT)) {
1818
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1822
if (time(NULL) > end_time) {
1823
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1824
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1828
mine->pb_ex_in_use = 1;
1829
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1833
xtPublic int ha_pbxt::reopen()
1835
THD *thd = current_thd;
1839
if (!(self = ha_set_current_thread(thd, &err)))
1840
return xt_ha_pbxt_to_mysql_error(err);
1843
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
1845
ha_open_share(self, pb_share);
1847
if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
1849
pb_open_tab->ot_thread = self;
1852
* We no longer use the information that a table
1853
* was opened in order to know when to calculate
1856
if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
1857
#ifdef LOAD_TABLE_ON_OPEN
1858
xt_tab_load_table(self, pb_open_tab);
1860
xt_tab_load_row_pointers(self, pb_open_tab);
1862
xt_ind_set_index_selectivity(pb_open_tab, self);
1863
/* If the number of rows is less than 150 we will recalculate the
1864
* selectity of the indices, as soon as the number of rows
1865
* exceeds 200 (see [**])
1867
#ifdef XT_ROW_COUNT_CORRECTED
1868
/* {CORRECTED-ROW-COUNT} */
1869
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150;
1871
/* {FREE-ROWS-BAD} */
1872
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
1876
/* I am not doing this anymore because it was only required
1877
* for DELETE FROM table;, which is now implemented
1878
* by deleting each row.
1879
* TRUNCATE TABLE does not preserve the counter value.
1881
//init_auto_increment(pb_share->sh_min_auto_inc);
1882
init_auto_increment(0);
1885
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
1893
* -----------------------------------------------------------------------
1894
* INFORMATION SCHEMA FUNCTIONS
1898
static int pbxt_statistics_fill_table(THD *thd, TABLE_LIST *tables, COND *cond)
1900
XTThreadPtr self = NULL;
1904
/* Can't do if PBXT is not loaded! */
1907
xt_exception_xterr(&e, XT_CONTEXT, XT_ERR_PBXT_NOT_INSTALLED);
1908
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1909
/* Just return an empty set: */
1913
if (!(self = ha_set_current_thread(thd, &err)))
1914
return xt_ha_pbxt_to_mysql_error(err);
1918
/* If the thread has no open database, and the global
1919
* database is already open, then open
1920
* the database. Otherwise the statement will be
1921
* executed without an open database, which means
1922
* that the related statistics will be missing.
1924
* This includes all background threads.
1926
if (!self->st_database && pbxt_database) {
1927
xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1930
err = myxt_statistics_fill_table(self, thd, tables, cond, system_charset_info);
1933
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1942
ColumnInfo pbxt_statistics_fields_info[]=
1944
ColumnInfo("ID", 4, MYSQL_TYPE_LONG, 0, 0, "The ID of the statistic", SKIP_OPEN_TABLE),
1945
ColumnInfo("Name", 40, MYSQL_TYPE_STRING, 0, 0, "The name of the statistic", SKIP_OPEN_TABLE),
1946
ColumnInfo("Value", 8, MYSQL_TYPE_LONGLONG, 0, 0, "The accumulated value", SKIP_OPEN_TABLE),
1950
class PBXTStatisticsMethods : public InfoSchemaMethods
1953
int fillTable(Session *session, TableList *tables, COND *cond)
1955
return pbxt_statistics_fill_table(session, tables, cond);
1960
ST_FIELD_INFO pbxt_statistics_fields_info[]=
1962
{ "ID", 4, MYSQL_TYPE_LONG, 0, 0, "The ID of the statistic", SKIP_OPEN_TABLE},
1963
{ "Name", 40, MYSQL_TYPE_STRING, 0, 0, "The name of the statistic", SKIP_OPEN_TABLE},
1964
{ "Value", 8, MYSQL_TYPE_LONGLONG, 0, 0, "The accumulated value", SKIP_OPEN_TABLE},
1965
{ 0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
1971
static InfoSchemaTable *pbxt_statistics_table;
1972
static PBXTStatisticsMethods pbxt_statistics_methods;
1973
static int pbxt_init_statistics(Registry ®istry)
1975
//pbxt_statistics_table = (InfoSchemaTable *)xt_calloc_ns(sizeof(InfoSchemaTable));
1976
//pbxt_statistics_table->table_name= "PBXT_STATISTICS";
1977
pbxt_statistics_table = new InfoSchemaTable("PBXT_STATISTICS");
1978
pbxt_statistics_table->setColumnInfo(pbxt_statistics_fields_info);
1979
pbxt_statistics_table->setInfoSchemaMethods(&pbxt_statistics_methods);
1980
registry.add(pbxt_statistics_table);
1985
static int pbxt_init_statistics(void *p)
1987
ST_SCHEMA_TABLE *pbxt_statistics_table = (ST_SCHEMA_TABLE *) p;
1988
pbxt_statistics_table->fields_info = pbxt_statistics_fields_info;
1989
pbxt_statistics_table->fill_table = pbxt_statistics_fill_table;
1991
#if defined(XT_WIN) && defined(XT_COREDUMP)
1992
void register_crash_filter();
1994
if (pbxt_crash_debug)
1995
register_crash_filter();
2003
static int pbxt_exit_statistics(Registry ®istry)
2004
registry.remove(pbxt_statistics_table);
2005
delete pbxt_statistics_table;
2010
static int pbxt_exit_statistics(void *XT_UNUSED(p))
2017
* -----------------------------------------------------------------------
2022
ha_pbxt::ha_pbxt(plugin::StorageEngine &engine_arg, Table &table_arg) : Cursor(engine_arg, table_arg)
2026
pb_key_read = FALSE;
2027
pb_ignore_dup_key = 0;
2028
pb_lock_table = FALSE;
2029
pb_table_locked = 0;
2037
* If frm_error() is called then we will use this to to find out what file extentions
2038
* exist for the storage engine. This is also used by the default rename_table and
2039
* delete_table method in handler.cc.
2042
const char **PBXTStorageEngine::bas_ext() const
2044
const char **ha_pbxt::bas_ext() const
2047
return pbxt_extensions;
2051
* Specify the caching type: HA_CACHE_TBL_NONTRANSACT, HA_CACHE_TBL_NOCACHE
2052
* HA_CACHE_TBL_ASKTRANSACT, HA_CACHE_TBL_TRANSACT
2054
MX_UINT8_T ha_pbxt::table_cache_type()
2056
return HA_CACHE_TBL_TRANSACT; /* Use transactional query cache */
2060
MX_TABLE_TYPES_T ha_pbxt::table_flags() const
2063
/* We need this flag because records are not packed
2064
* into a table which means #ROWID != offset
2067
/* Since PBXT caches read records itself, I believe
2068
* this to be the case.
2072
* I am assuming a "key" means a unique index.
2073
* Of course a primary key does not allow nulls.
2077
* This is necessary because a MySQL blob can be
2080
HA_CAN_INDEX_BLOBS |
2082
* Due to transactional influences, this will be
2084
* Although the count is good enough for practical
2086
HA_NOT_EXACT_COUNT |
2089
* This basically means we have a file with the name of
2090
* database table (which we do).
2094
* Not sure what this does (but MyISAM and InnoDB have it)?!
2095
* Could it mean that we support the handler functions.
2097
HA_CAN_SQL_HANDLER |
2099
* This is not true, we cannot insert delayed, but a
2100
* really cannot see what's wrong with inserting normally
2101
* when asked to insert delayed!
2102
* And the functionallity is required to pass the alter_table
2105
* Disabled because of MySQL bug #40505
2107
/*HA_CAN_INSERT_DELAYED |*/
2108
#if MYSQL_VERSION_ID > 50119
2109
/* We can do row logging, but not statement, because
2110
* MVCC is not serializable!
2112
HA_BINLOG_ROW_CAPABLE |
2115
* Auto-increment is allowed on a partial key.
2121
void ha_pbxt::internal_close(THD *thd, struct XTThread *self)
2128
/* This lock must be held when we remove the handler's
2129
* open table because ha_close_open_tables() can run
2132
xt_lock_mutex_ns(pb_share->sh_ex_mutex);
2133
if ((ot = pb_open_tab)) {
2134
pb_open_tab->ot_thread = self;
2135
if (self->st_database != pb_open_tab->ot_table->tab_db)
2136
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
2138
pushr_(xt_db_return_table_to_pool, ot);
2140
xt_unlock_mutex_ns(pb_share->sh_ex_mutex);
2142
ha_remove_from_handler_list(self, pb_share, this);
2144
/* Someone may be waiting for me to complete: */
2145
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2147
removed = ha_unget_share_removed(self, pb_share);
2150
/* Flush the table if this was the last handler: */
2151
/* This is not necessary but has the affect that
2152
* FLUSH TABLES; does a checkpoint!
2156
* This was killing performance as the number of threads increased!
2158
* When MySQL runs out of table handlers because the table
2159
* handler cache is too small, it starts to close handlers.
2160
* (open_cache.records > table_cache_size)
2162
* Which can lead to closing all handlers for a particular table.
2164
* It does this while holding lock_OPEN!
2165
* So this code below leads to a sync operation while lock_OPEN
2166
* is held. The result is that the whole server comes to a stop.
2168
if (!thd || thd_sql_command(thd) == SQLCOM_FLUSH) // FLUSH TABLES
2169
xt_sync_flush_table(self, ot, thd ? 0 : 4);
2171
/* This change is a result of a problem mentioned by Arjen.
2172
* REPAIR and ALTER lead to the following sequence:
2173
* 1. tab -- copy --> tmp1
2174
* 2. tab -- rename --> tmp2
2175
* 3. tmp1 -- rename --> tab
2178
* PBXT flushes a table before rename.
2179
* In the sequence above results in a table flush in step 3 which can
2180
* take a very long time.
2182
* The problem is, during this time frame we have only temp tables.
2183
* A crash in this state leaves the database in a bad state.
2185
* To reduce the time in this state, the flush needs to be done
2186
* elsewhere. The code below causes the flish to occur after
2189
switch (thd_sql_command(thd)) {
2190
case SQLCOM_RENAME_TABLE:
2191
case SQLCOM_ANALYZE:
2192
case SQLCOM_ALTER_TABLE:
2193
case SQLCOM_CREATE_INDEX:
2194
xt_sync_flush_table(self, ot, thd ? 0 : 4);
2199
freer_(); // xt_db_return_table_to_pool(ot);
2203
xt_log_and_clear_exception(self);
2212
* Used for opening tables. The name will be the name of the file.
2213
* A table is opened when it needs to be opened. For instance
2214
* when a request comes in for a select on the table (tables are not
2215
* open and closed for each request, they are cached).
2217
* Called from handler.cc by handler::ha_open(). The server opens all tables by
2218
* calling ha_open() which then calls the handler specific open().
2220
int ha_pbxt::open(const char *table_path, int XT_UNUSED(mode), uint XT_UNUSED(test_if_locked))
2222
THD *thd = current_thd;
2226
ref_length = XT_RECORD_OFFS_SIZE;
2228
if (!(self = ha_set_current_thread(thd, &err)))
2229
return xt_ha_pbxt_to_mysql_error(err);
2231
XT_PRINT1(self, "open (%s)\n", table_path);
2235
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
2237
pb_share = ha_get_share(self, table_path, false);
2238
ha_add_to_handler_list(self, pb_share, this);
2239
if (pb_share->sh_table_lock) {
2240
if (!ha_wait_for_shared_use(this, pb_share))
2244
ha_open_share(self, pb_share);
2246
pb_lock.init(&pb_share->sh_lock);
2247
if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
2249
pb_open_tab->ot_thread = self;
2252
if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
2253
#ifdef LOAD_TABLE_ON_OPEN
2254
xt_tab_load_table(self, pb_open_tab);
2256
xt_tab_load_row_pointers(self, pb_open_tab);
2259
xt_ind_set_index_selectivity(pb_open_tab, self);
2260
#ifdef XT_ROW_COUNT_CORRECTED
2261
/* {CORRECTED-ROW-COUNT} */
2262
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150;
2264
/* {FREE-ROWS-BAD} */
2265
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
2269
init_auto_increment(0);
2272
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2273
internal_close(thd, self);
2278
info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2282
/* Someone may be waiting for me to complete: */
2283
if (pb_share->sh_table_lock)
2284
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2291
Closes a table. We call the free_share() function to free any resources
2292
that we have allocated in the "shared" structure.
2294
Called from sql_base.cc, sql_select.cc, and table.cc.
2295
In sql_select.cc it is only used to close up temporary tables or during
2296
the process where a temporary table is converted over to being a
2298
For sql_base.cc look at close_data_tables().
2300
int ha_pbxt::close(void)
2302
THD *thd = current_thd;
2303
volatile int err = 0;
2304
volatile XTThreadPtr self;
2307
self = ha_set_current_thread(thd, (int *) &err);
2311
if (!(self = xt_create_thread("TempForClose", FALSE, TRUE, &e))) {
2312
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
2317
XT_PRINT1(self, "close (%s)\n", pb_share && pb_share->sh_table_path->ps_path ? pb_share->sh_table_path->ps_path : "unknown");
2321
internal_close(thd, self);
2324
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2329
xt_free_thread(self);
2332
xt_log(XT_NS_CONTEXT, XT_LOG_WARNING, "Unable to release table reference\n");
2337
void ha_pbxt::init_auto_increment(xtWord8 min_auto_inc)
2343
/* Get the value of the auto-increment value by
2344
* loading the highest value from the index...
2346
tab = pb_open_tab->ot_table;
2348
/* Cannot do this if the index version is bad! */
2349
if (tab->tab_dic.dic_disable_index)
2352
xt_spinlock_lock(&tab->tab_ainc_lock);
2353
if (getTable()->found_next_number_field && !tab->tab_auto_inc) {
2354
Field *tmp_fie = getTable()->next_number_field;
2355
THD *tmp_thd = getTable()->in_use;
2356
xtBool xn_started = FALSE;
2357
XTThreadPtr self = pb_open_tab->ot_thread;
2360
* A table may be opened by a thread with a running
2362
* Since get_auto_increment() does not do an update,
2363
* it should be OK to use the transaction we already
2364
* have to get the next auto-increment value.
2366
if (!self->st_xact_data) {
2367
self->st_xact_mode = XT_XACT_REPEATABLE_READ;
2368
self->st_ignore_fkeys = FALSE;
2369
self->st_auto_commit = TRUE;
2370
self->st_table_trans = FALSE;
2371
self->st_abort_trans = FALSE;
2372
self->st_stat_ended = FALSE;
2373
self->st_stat_trans = FALSE;
2374
self->st_is_update = NULL;
2375
if (!xt_xn_begin(self)) {
2376
xt_spinlock_unlock(&tab->tab_ainc_lock);
2382
/* Setup the conditions for the next call! */
2383
getTable()->in_use = current_thd;
2384
getTable()->next_number_field = getTable()->found_next_number_field;
2386
extra(HA_EXTRA_KEYREAD);
2387
getTable()->mark_columns_used_by_index_no_reset(getTable()->getShare()->next_number_index, *getTable()->read_set);
2388
column_bitmaps_signal();
2389
doStartIndexScan(getTable()->getShare()->next_number_index, 0);
2390
if (!getTable()->getShare()->next_number_key_offset) {
2391
// Autoincrement at key-start
2392
err = index_last(getTable()->getUpdateRecord());
2393
if (!err && !getTable()->next_number_field->is_null(getTable()->getShare()->rec_buff_length)) {
2395
nr = (xtWord8) getTable()->next_number_field->val_int_offset(getTable()->getShare()->rec_buff_length);
2399
/* Do an index scan to find the largest value! */
2400
/* The standard method will not work because it forces
2401
* us to lock that table!
2405
err = index_first(getTable()->getUpdateRecord());
2408
val = (xtWord8) getTable()->next_number_field->val_int_offset(getTable()->getShare()->rec_buff_length);
2411
err = index_next(getTable()->getUpdateRecord());
2416
extra(HA_EXTRA_NO_KEYREAD);
2419
* I have changed this from post increment to pre-increment!
2421
* When using post increment we are not able to return
2422
* the last valid value in the range.
2424
* Here the test example:
2426
* drop table if exists t1;
2427
* create table t1 (i tinyint unsigned not null auto_increment primary key) engine=pbxt;
2428
* insert into t1 set i = 254;
2429
* insert into t1 set i = null;
2431
* With post-increment, this last insert fails because on post increment
2432
* the value overflows!
2434
* Pre-increment means we store the current max, and increment
2435
* before returning the next value.
2437
* This will work in this situation.
2439
tab->tab_auto_inc = nr;
2440
if (tab->tab_auto_inc < tab->tab_dic.dic_min_auto_inc)
2441
tab->tab_auto_inc = tab->tab_dic.dic_min_auto_inc-1;
2442
if (tab->tab_auto_inc < min_auto_inc)
2443
tab->tab_auto_inc = min_auto_inc-1;
2445
/* Restore the changed values: */
2446
getTable()->next_number_field = tmp_fie;
2447
getTable()->in_use = tmp_thd;
2450
XT_PRINT0(self, "xt_xn_commit in init_auto_increment\n");
2454
xt_spinlock_unlock(&tab->tab_ainc_lock);
2457
void ha_pbxt::get_auto_increment(MX_ULONGLONG_T offset, MX_ULONGLONG_T increment,
2458
MX_ULONGLONG_T XT_UNUSED(nb_desired_values),
2459
MX_ULONGLONG_T *first_value,
2460
MX_ULONGLONG_T *nb_reserved_values)
2462
register XTTableHPtr tab;
2463
MX_ULONGLONG_T nr, nr_less_inc;
2465
ASSERT_NS(pb_ex_in_use);
2467
tab = pb_open_tab->ot_table;
2470
* Assume that nr contains the last value returned!
2471
* We will increment and then return the value.
2473
xt_spinlock_lock(&tab->tab_ainc_lock);
2474
nr = (MX_ULONGLONG_T) tab->tab_auto_inc;
2478
else if (increment > 1 && ((nr - offset) % increment) != 0)
2479
nr += increment - ((nr - offset) % increment);
2482
if (getTable()->next_number_field->cmp((const unsigned char *)&nr_less_inc, (const unsigned char *)&nr) < 0)
2483
tab->tab_auto_inc = (xtWord8) (nr);
2485
nr = ~0; /* indicate error to the caller */
2486
xt_spinlock_unlock(&tab->tab_ainc_lock);
2489
*nb_reserved_values = 1;
2492
/* GOTCHA: We need to use signed value here because of the test
2493
* (from auto_increment.test):
2494
* create table t1 (a int not null auto_increment primary key);
2495
* insert into t1 values (NULL);
2496
* insert into t1 values (-1);
2497
* insert into t1 values (NULL);
2499
xtPublic void ha_set_auto_increment(XTOpenTablePtr ot, Field *nr)
2501
register XTTableHPtr tab;
2502
MX_ULONGLONG_T nr_int_val;
2504
nr_int_val = nr->val_int();
2507
if (nr->cmp_internal((const unsigned char *)&tab->tab_auto_inc) > 0) {
2508
xt_spinlock_lock(&tab->tab_ainc_lock);
2510
if (nr->cmp_internal((const unsigned char *)&tab->tab_auto_inc) > 0) {
2512
* We increment later, so just set the value!
2513
MX_ULONGLONG_T nr_int_val_plus_one = nr_int_val + 1;
2514
if (nr->cmp((const unsigned char *)&nr_int_val_plus_one) < 0)
2515
tab->tab_auto_inc = nr_int_val_plus_one;
2518
tab->tab_auto_inc = nr_int_val;
2520
xt_spinlock_unlock(&tab->tab_ainc_lock);
2523
if (xt_db_auto_increment_mode == 1) {
2524
if (nr_int_val > (MX_ULONGLONG_T) tab->tab_dic.dic_min_auto_inc) {
2525
/* Do this every 100 calls: */
2527
tab->tab_dic.dic_min_auto_inc = nr_int_val + 5;
2529
tab->tab_dic.dic_min_auto_inc = nr_int_val + 100;
2531
ot->ot_thread = xt_get_self();
2532
if (!xt_tab_write_min_auto_inc(ot))
2533
xt_log_and_clear_exception(ot->ot_thread);
2539
static void dump_buf(unsigned char *buf, int len)
2543
for (i=0; i<len; i++) printf("%2c", buf[i] <= 127 ? buf[i] : '.');
2545
for (i=0; i<len; i++) printf("%02x", buf[i]);
2551
* doInsertRecord() inserts a row. No extra() hint is given currently if a bulk load
2552
* is happeneding. buf() is a byte array of data. You can use the field
2553
* information to extract the data from the native byte array type.
2554
* Example of this would be:
2555
* for (Field **field=table->field ; *field ; field++)
2560
* See ha_tina.cc for an example of extracting all of the data as strings.
2561
* ha_berekly.cc has an example of how to store it intact by "packing" it
2562
* for ha_berkeley's own native storage type.
2564
* See the note for doUpdateRecord() on auto_increments and timestamps. This
2565
* case also applied to doInsertRecord().
2567
* Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
2568
* sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
2570
int ha_pbxt::doInsertRecord(byte *buf)
2574
ASSERT_NS(pb_ex_in_use);
2576
XT_PRINT1(pb_open_tab->ot_thread, "doInsertRecord (%s)\n", pb_share->sh_table_path->ps_path);
2577
XT_DISABLED_TRACE(("INSERT tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2578
//statistic_increment(ha_write_count,&LOCK_status);
2580
PBMSResultRec result;
2581
err = pbms_doInsertRecord_blobs(table, buf, &result);
2583
xt_logf(XT_NT_ERROR, "pbms_doInsertRecord_blobs() Error: %s", result.mr_message);
2588
/* {START-STAT-HACK} previously position of start statement hack. */
2589
xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2591
if (pb_open_tab->ot_thread->st_import_stat) {
2592
if (pb_import_row_count >= XT_IMPORT_ROW_COUNT) {
2593
/* Commit and restart the transaction. */
2594
XTThreadPtr thread = pb_open_tab->ot_thread;
2596
XT_PRINT0(thread, "xt_xn_commit in doInsertRecord\n");
2597
if (!xt_xn_commit(thread)) {
2598
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2601
XT_PRINT0(thread, "xt_xn_begin in doInsertRecord\n");
2602
if (!xt_xn_begin(thread)) {
2603
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2606
pb_import_row_count = 0;
2609
pb_import_row_count++;
2612
if (getTable()->next_number_field && buf == getTable()->getInsertRecord()) {
2613
int update_err = update_auto_increment();
2615
ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2619
ha_set_auto_increment(pb_open_tab, getTable()->next_number_field);
2622
if (!xt_tab_new_record(pb_open_tab, (xtWord1 *) buf)) {
2623
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2626
* This is needed to allow the same row to be updated multiple times in case of bulk REPLACE.
2627
* This happens during execution of LOAD DATA...REPLACE MySQL first tries to INSERT the row
2628
* and if it gets dup-key error it tries UPDATE, so the same row can be overwriten multiple
2629
* times within the same statement
2631
if (err == HA_ERR_FOUND_DUPP_KEY && pb_open_tab->ot_thread->st_is_update) {
2632
/* Pop the update stack: */
2633
//pb_open_tab->ot_thread->st_update_id++;
2634
XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
2636
pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
2637
curr->ot_prev_update = NULL;
2643
pbms_completed(table, (err == 0));
2649
static int equ_bin(const byte *a, const char *b)
2659
static void dump_bin(const byte *a_in, int offset, int len_in)
2661
const byte *a = a_in;
2666
xt_trace("%02X", (int) *a);
2675
xt_trace("%c", (*a > 8 && *a < 127) ? *a : '.');
2684
* Yes, doUpdateRecord() does what you expect, it updates a row. old_data will have
2685
* the previous row record in it, while new_data will have the newest data in
2686
* it. Keep in mind that the server can do updates based on ordering if an ORDER BY
2687
* clause was used. Consecutive ordering is not guarenteed.
2689
* Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
2691
int ha_pbxt::doUpdateRecord(const byte * old_data, byte * new_data)
2694
register XTThreadPtr self = pb_open_tab->ot_thread;
2696
ASSERT_NS(pb_ex_in_use);
2698
XT_PRINT1(self, "update_row (%s)\n", pb_share->sh_table_path->ps_path);
2699
XT_DISABLED_TRACE(("UPDATE tx=%d val=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&new_data[1])));
2700
//statistic_increment(ha_update_count,&LOCK_status);
2702
/* {START-STAT-HACK} previously position of start statement hack. */
2704
xt_xlog_check_long_writer(self);
2706
/* {UPDATE-STACK} */
2707
if (self->st_is_update != pb_open_tab) {
2708
/* Push the update stack: */
2709
pb_open_tab->ot_prev_update = self->st_is_update;
2710
self->st_is_update = pb_open_tab;
2711
pb_open_tab->ot_update_id++;
2715
PBMSResultRec result;
2717
err = pbms_delete_row_blobs(table, old_data, &result);
2719
xt_logf(XT_NT_ERROR, "update_row:pbms_delete_row_blobs() Error: %s", result.mr_message);
2722
err = pbms_doInsertRecord_blobs(table, new_data, &result);
2724
xt_logf(XT_NT_ERROR, "update_row:pbms_doInsertRecord_blobs() Error: %s", result.mr_message);
2729
/* GOTCHA: We need to check the auto-increment value on update
2730
* because of the following test (which fails for InnoDB) -
2731
* auto_increment.test:
2732
* create table t1 (a int not null auto_increment primary key, val int);
2733
* insert into t1 (val) values (1);
2734
* update t1 set a=2 where a=1;
2735
* insert into t1 (val) values (1);
2737
if (getTable()->found_next_number_field && new_data == getTable()->getInsertRecord()) {
2739
const boost::dynamic_bitset<>& old_bitmap= getTable()->use_all_columns(*getTable()->read_set);
2740
nr = getTable()->found_next_number_field->val_int();
2741
ha_set_auto_increment(pb_open_tab, getTable()->found_next_number_field);
2742
getTable()->restore_column_map(old_bitmap);
2745
if (!xt_tab_update_record(pb_open_tab, (xtWord1 *) old_data, (xtWord1 *) new_data))
2746
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2748
pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2752
pbms_completed(table, (err == 0));
2759
* This will delete a row. buf will contain a copy of the row to be deleted.
2760
* The server will call this right after the current row has been called (from
2761
* either a previous rnd_next() or index call).
2763
* Called in sql_acl.cc and sql_udf.cc to manage internal table information.
2764
* Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select it is
2765
* used for removing duplicates while in insert it is used for REPLACE calls.
2767
int ha_pbxt::doDeleteRecord(const byte * buf)
2771
ASSERT_NS(pb_ex_in_use);
2773
XT_PRINT1(pb_open_tab->ot_thread, "delete_row (%s)\n", pb_share->sh_table_path->ps_path);
2774
XT_DISABLED_TRACE(("DELETE tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2775
//statistic_increment(ha_delete_count,&LOCK_status);
2778
PBMSResultRec result;
2780
err = pbms_delete_row_blobs(table, buf, &result);
2782
xt_logf(XT_NT_ERROR, "pbms_delete_row_blobs() Error: %s", result.mr_message);
2787
/* {START-STAT-HACK} previously position of start statement hack. */
2789
xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2791
if (!xt_tab_delete_record(pb_open_tab, (xtWord1 *) buf))
2792
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2794
pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2797
pbms_completed(table, (err == 0));
2803
* -----------------------------------------------------------------------
2808
* This looks like a hack, but actually, it is OK.
2809
* It depends on the setup done by the super-class. It involves an extra
2810
* range check that we need to do if a "new" record is returned during
2813
* A new record is returned if a row is updated (by another transaction)
2814
* during the index scan. If an update is detected, then the scan stops
2815
* and waits for the transaction to end.
2817
* If the transaction commits, then the updated row is returned instead
2818
* of the row it would have returned when doing a consistant read
2819
* (repeatable read).
2821
* These new records can appear out of index order, and may not even
2822
* belong to the index range that we are concerned with.
2824
* Notice that there is not check for the start of the range. It appears
2825
* that this is not necessary, MySQL seems to have no problem ignoring
2828
* A number of test have been given below which demonstrate the use
2831
* They also demonstrate the ORDER BY problem described here: [(11)].
2833
* DROP TABLE IF EXISTS test_tab, test_tab_1, test_tab_2;
2834
* CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20), index(Value, Name)) ENGINE=pbxt;
2835
* INSERT test_tab values(1, 1, 'A');
2836
* INSERT test_tab values(2, 1, 'B');
2837
* INSERT test_tab values(3, 1, 'C');
2838
* INSERT test_tab values(4, 2, 'D');
2839
* INSERT test_tab values(5, 2, 'E');
2840
* INSERT test_tab values(6, 2, 'F');
2841
* INSERT test_tab values(7, 2, 'G');
2843
* select * from test_tab where value = 1 order by value, name for update;
2848
* select * from test_tab where id = 5 for update;
2852
* select * from test_tab where value = 2 order by value, name for update;
2855
* update test_tab set value = 3 where id = 6;
2861
* select * from test_tab where id = 5 for update;
2865
* select * from test_tab where value >= 2 order by value, name for update;
2868
* update test_tab set value = 3 where id = 6;
2874
* select * from test_tab where id = 5 for update;
2878
* select * from test_tab where value = 2 order by value, name for update;
2881
* update test_tab set value = 1 where id = 6;
2885
int ha_pbxt::xt_index_in_range(register XTOpenTablePtr XT_UNUSED(ot), register XTIndexPtr ind,
2886
register XTIdxSearchKeyPtr search_key, xtWord1 *buf)
2888
/* If search key is given, this means we want an exact match. */
2890
xtWord1 key_buf[XT_INDEX_MAX_KEY_SIZE];
2892
myxt_create_key_from_row(ind, key_buf, buf, NULL);
2893
search_key->sk_on_key = myxt_compare_key(ind, search_key->sk_key_value.sv_flags, search_key->sk_key_value.sv_length,
2894
search_key->sk_key_value.sv_key, key_buf) == 0;
2895
return search_key->sk_on_key;
2898
/* Otherwise, check the end of the range. */
2900
return compare_key(end_range) <= 0;
2904
int ha_pbxt::xt_index_next_read(register XTOpenTablePtr ot, register XTIndexPtr ind, xtBool key_only,
2905
register XTIdxSearchKeyPtr search_key, byte *buf)
2907
xt_xlog_check_long_writer(ot->ot_thread);
2910
/* We only need to read the data from the key: */
2911
while (ot->ot_curr_rec_id) {
2912
if (search_key && !search_key->sk_on_key)
2915
switch (xt_tab_visible(ot)) {
2917
if (xt_idx_next(ot, ind, search_key))
2922
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
2924
if (xt_index_in_range(ot, ind, search_key, buf)) {
2927
if (!xt_idx_next(ot, ind, search_key))
2931
/* We cannot start from the beginning again, if we have
2932
* already output rows!
2933
* And we need the orginal search key.
2935
* The case in which this occurs is:
2937
* T1: UPDATE tbl_file SET GlobalID = 'DBCD5C4514210200825501089884844_6M' WHERE ID = 39
2938
* Locks a particular row.
2940
* T2: SELECT ID,Flags FROM tbl_file WHERE SpaceID = 1 AND Path = '/zi/America/' AND
2941
* Name = 'Cuiaba' AND Flags IN ( 0,1,4,5 ) FOR UPDATE
2942
* scans the index and stops on the lock (of the before image) above.
2944
* T1 quits, the sweeper deletes the record updated by T1?!
2945
* BUG: Cleanup should wait until T2 is complete!
2947
* T2 continues, and returns XT_RETRY.
2949
* At this stage T2 has already returned some rows, so it may not retry from the
2950
* start. Instead it tries to locate the last record it tried to lock.
2951
* This record is gone (or not visible), so it finds the next one.
2953
* POTENTIAL BUG: If cleanup does not wait until T2 is complete, then
2954
* I may miss the update record, if it is moved before the index scan
2957
if (!pb_ind_row_count && search_key) {
2958
if (!xt_idx_search(pb_open_tab, ind, search_key))
2959
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2962
if (!xt_idx_research(pb_open_tab, ind))
2967
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
2974
while (ot->ot_curr_rec_id) {
2975
if (search_key && !search_key->sk_on_key)
2978
switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
2980
XT_DISABLED_TRACE(("not visi tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
2981
if (xt_idx_next(ot, ind, search_key))
2986
if (xt_index_in_range(ot, ind, search_key, buf))
2988
if (!xt_idx_next(ot, ind, search_key))
2992
if (!pb_ind_row_count && search_key) {
2993
if (!xt_idx_search(pb_open_tab, ind, search_key))
2994
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2997
if (!xt_idx_research(pb_open_tab, ind))
3002
XT_DISABLED_TRACE(("visible tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
3007
return HA_ERR_END_OF_FILE;
3010
return ha_log_pbxt_thread_error_for_mysql(FALSE);
3013
int ha_pbxt::xt_index_prev_read(XTOpenTablePtr ot, XTIndexPtr ind, xtBool key_only,
3014
register XTIdxSearchKeyPtr search_key, byte *buf)
3017
/* We only need to read the data from the key: */
3018
while (ot->ot_curr_rec_id) {
3019
if (search_key && !search_key->sk_on_key)
3022
switch (xt_tab_visible(ot)) {
3024
if (xt_idx_prev(ot, ind, search_key))
3029
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3031
if (xt_index_in_range(ot, ind, search_key, buf))
3033
if (!xt_idx_next(ot, ind, search_key))
3037
if (!pb_ind_row_count && search_key) {
3038
if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3039
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3042
if (!xt_idx_research(pb_open_tab, ind))
3047
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3054
/* We need to read the entire record: */
3055
while (ot->ot_curr_rec_id) {
3056
if (search_key && !search_key->sk_on_key)
3059
switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
3061
if (xt_idx_prev(ot, ind, search_key))
3066
if (xt_index_in_range(ot, ind, search_key, buf))
3068
if (!xt_idx_next(ot, ind, search_key))
3072
if (!pb_ind_row_count && search_key) {
3073
if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3074
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3077
if (!xt_idx_research(pb_open_tab, ind))
3086
return HA_ERR_END_OF_FILE;
3089
return ha_log_pbxt_thread_error_for_mysql(FALSE);
3094
static std::string convert_long_to_bit_string(uint64_t bitset, uint64_t bitset_size)
3099
res.push_back((bitset & 1) + '0');
3104
std::reverse(res.begin(), res.end());
3110
std::string final(bitset_size - res.length(), '0');
3116
int ha_pbxt::doStartIndexScan(uint idx, bool XT_UNUSED(sorted))
3119
XTThreadPtr thread = pb_open_tab->ot_thread;
3121
/* select count(*) from smalltab_PBXT;
3122
* ignores the error below, and continues to
3127
if (pb_open_tab->ot_table->tab_dic.dic_disable_index) {
3128
active_index = MAX_KEY;
3129
xt_tab_set_index_error(pb_open_tab->ot_table);
3130
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3133
/* The number of columns required: */
3134
if (pb_open_tab->ot_is_modify) {
3136
pb_open_tab->ot_cols_req = getTable()->read_set->MX_BIT_SIZE();
3137
#ifdef XT_PRINT_INDEX_OPT
3138
ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3140
printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, pb_open_tab->ot_cols_req, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap);
3142
/* {START-STAT-HACK} previously position of start statement hack,
3143
* previous comment to code below: */
3144
/* Start a statement based transaction as soon
3145
* as a read is done for a modify type statement!
3146
* Previously, this was done too late!
3150
//pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3151
pb_open_tab->ot_cols_req = getTable()->read_set->MX_BIT_SIZE();
3153
/* Check for index coverage!
3155
* Given the following table:
3157
* CREATE TABLE `customer` (
3158
* `c_id` int(11) NOT NULL DEFAULT '0',
3159
* `c_d_id` int(11) NOT NULL DEFAULT '0',
3160
* `c_w_id` int(11) NOT NULL DEFAULT '0',
3161
* `c_first` varchar(16) DEFAULT NULL,
3162
* `c_middle` char(2) DEFAULT NULL,
3163
* `c_last` varchar(16) DEFAULT NULL,
3164
* `c_street_1` varchar(20) DEFAULT NULL,
3165
* `c_street_2` varchar(20) DEFAULT NULL,
3166
* `c_city` varchar(20) DEFAULT NULL,
3167
* `c_state` char(2) DEFAULT NULL,
3168
* `c_zip` varchar(9) DEFAULT NULL,
3169
* `c_phone` varchar(16) DEFAULT NULL,
3170
* `c_since` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
3171
* `c_credit` char(2) DEFAULT NULL,
3172
* `c_credit_lim` decimal(24,12) DEFAULT NULL,
3173
* `c_discount` double DEFAULT NULL,
3174
* `c_balance` decimal(24,12) DEFAULT NULL,
3175
* `c_ytd_payment` decimal(24,12) DEFAULT NULL,
3176
* `c_payment_cnt` double DEFAULT NULL,
3177
* `c_delivery_cnt` double DEFAULT NULL,
3179
* PRIMARY KEY (`c_w_id`,`c_d_id`,`c_id`),
3180
* KEY `c_w_id` (`c_w_id`,`c_d_id`,`c_last`,`c_first`,`c_id`)
3183
* MySQL does not recognize index coverage on the followin select:
3185
* SELECT c_id FROM customer WHERE c_w_id = 3 AND c_d_id = 8 AND
3186
* c_last = 'EINGATIONANTI' ORDER BY c_first ASC LIMIT 1;
3188
* TODO: Find out why this is necessary, MyISAM does not
3189
* seem to have this problem!
3191
ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3194
* Need to do this for drizzle because we use boost's dynamic_bitset
3195
* to represent the bitsets and allocating memory for an object of that
3196
* type does not play well with the memory allocation routines in PBXT.
3197
* For that reason, we just store a uint which represents the bitset
3198
* in the XTIndexPtr structure for PBXT.
3200
std::string bitmap_str= convert_long_to_bit_string(ind->mi_col_map, ind->mi_col_map_size);
3201
MX_BITMAP tmp(bitmap_str);
3202
if (MX_BIT_IS_SUBSET(getTable()->read_set, tmp))
3204
if (MX_BIT_IS_SUBSET(getTable()->read_set, ind->mi_col_map))
3207
#ifdef XT_PRINT_INDEX_OPT
3208
printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X converage=%d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, table->read_set->MX_BIT_SIZE(), (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) (MX_BIT_IS_SUBSET(table->read_set, &ind->mi_col_map) != 0));
3212
xt_xlog_check_long_writer(thread);
3214
pb_open_tab->ot_thread->st_statistics.st_scan_index++;
3218
int ha_pbxt::doEndIndexScan()
3224
XTThreadPtr thread = pb_open_tab->ot_thread;
3227
* the assertion below is not always held, because the sometimes handler is unlocked
3228
* before this function is called
3230
/*ASSERT_NS(pb_ex_in_use);*/
3232
if (pb_open_tab->ot_ind_rhandle) {
3233
xt_ind_release_handle(pb_open_tab->ot_ind_rhandle, FALSE, thread);
3234
pb_open_tab->ot_ind_rhandle = NULL;
3238
* make permanent the lock for the last scanned row
3241
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3243
xt_xlog_check_long_writer(thread);
3245
active_index = MAX_KEY;
3249
#ifdef XT_TRACK_RETURNED_ROWS
3250
void ha_start_scan(XTOpenTablePtr ot, u_int index)
3252
xt_ttracef(ot->ot_thread, "SCAN %d:%d\n", (int) ot->ot_table->tab_id, (int) index);
3253
ot->ot_rows_ret_curr = 0;
3254
for (u_int i=0; i<ot->ot_rows_ret_max; i++)
3255
ot->ot_rows_returned[i] = 0;
3258
void ha_return_row(XTOpenTablePtr ot, u_int index)
3260
xt_ttracef(ot->ot_thread, "%d:%d ROW=%d:%d\n",
3261
(int) ot->ot_table->tab_id, (int) index, (int) ot->ot_curr_row_id, (int) ot->ot_curr_rec_id);
3262
ot->ot_rows_ret_curr++;
3263
if (ot->ot_curr_row_id >= ot->ot_rows_ret_max) {
3264
if (!xt_realloc_ns((void **) &ot->ot_rows_returned, (ot->ot_curr_row_id+1) * sizeof(xtRecordID)))
3266
memset(&ot->ot_rows_returned[ot->ot_rows_ret_max], 0, (ot->ot_curr_row_id+1 - ot->ot_rows_ret_max) * sizeof(xtRecordID));
3267
ot->ot_rows_ret_max = ot->ot_curr_row_id+1;
3269
if (!ot->ot_curr_row_id || !ot->ot_curr_rec_id || ot->ot_rows_returned[ot->ot_curr_row_id]) {
3270
char *sql = *thd_query(current_thd);
3272
xt_ttracef(ot->ot_thread, "DUP %d:%d %s\n",
3273
(int) ot->ot_table->tab_id, (int) index, *thd_query(current_thd));
3275
printf("ERROR: row=%d rec=%d newr=%d, already returned!\n", (int) ot->ot_curr_row_id, (int) ot->ot_rows_returned[ot->ot_curr_row_id], (int) ot->ot_curr_rec_id);
3276
printf("ERROR: %s\n", sql);
3278
FatalAppExit(0, "Debug Me!");
3282
ot->ot_rows_returned[ot->ot_curr_row_id] = ot->ot_curr_rec_id;
3286
int ha_pbxt::index_read_xt(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3291
XTIdxSearchKeyRec search_key;
3293
if (idx == MAX_KEY) {
3294
err = HA_ERR_WRONG_INDEX;
3297
#ifdef XT_TRACK_RETURNED_ROWS
3298
ha_start_scan(pb_open_tab, idx);
3301
/* This call starts a search on this handler! */
3302
pb_ind_row_count = 0;
3304
ASSERT_NS(pb_ex_in_use);
3306
XT_PRINT1(pb_open_tab->ot_thread, "index_read_xt (%s)\n", pb_share->sh_table_path->ps_path);
3307
XT_DISABLED_TRACE(("search tx=%d val=%d update=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), pb_modified));
3308
ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3310
switch (find_flag) {
3311
case HA_READ_PREFIX_LAST:
3312
case HA_READ_PREFIX_LAST_OR_PREV:
3313
prefix = SEARCH_PREFIX;
3314
case HA_READ_BEFORE_KEY:
3315
case HA_READ_KEY_OR_PREV: // I assume you want to be positioned on the last entry in the key duplicate list!!
3316
xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_BEFORE_KEY) ? 0 : XT_SEARCH_AFTER_KEY) | prefix, (xtWord1 *) key, (size_t) key_len);
3317
if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3318
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3320
err = xt_index_prev_read(pb_open_tab, ind, pb_key_read,
3321
(find_flag == HA_READ_PREFIX_LAST) ? &search_key : NULL, buf);
3323
case HA_READ_PREFIX:
3324
prefix = SEARCH_PREFIX;
3325
case HA_READ_KEY_EXACT:
3326
case HA_READ_KEY_OR_NEXT:
3327
case HA_READ_AFTER_KEY:
3329
xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_AFTER_KEY) ? XT_SEARCH_AFTER_KEY : 0) | prefix, (xtWord1 *) key, key_len);
3330
if (!xt_idx_search(pb_open_tab, ind, &search_key))
3331
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3333
err = xt_index_next_read(pb_open_tab, ind, pb_key_read,
3334
(find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX) ? &search_key : NULL, buf);
3335
if (err == HA_ERR_END_OF_FILE && find_flag == HA_READ_AFTER_KEY)
3336
err = HA_ERR_KEY_NOT_FOUND;
3342
#ifdef XT_TRACK_RETURNED_ROWS
3344
ha_return_row(pb_open_tab, idx);
3346
XT_DISABLED_TRACE(("search tx=%d val=%d err=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), err));
3349
getTable()->status = STATUS_NOT_FOUND;
3351
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3352
getTable()->status = 0;
3358
* Positions an index cursor to the index specified in the handle. Fetches the
3359
* row if available. If the key value is null, begin at the first key of the
3362
int ha_pbxt::index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag)
3364
//statistic_increment(ha_read_key_count,&LOCK_status);
3365
return index_read_xt(buf, active_index, key, key_len, find_flag);
3368
int ha_pbxt::index_read_idx(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3370
//statistic_increment(ha_read_key_count,&LOCK_status);
3371
return index_read_xt(buf, idx, key, key_len, find_flag);
3374
int ha_pbxt::index_read_last(byte * buf, const byte * key, uint key_len)
3376
//statistic_increment(ha_read_key_count,&LOCK_status);
3377
return index_read_xt(buf, active_index, key, key_len, HA_READ_PREFIX_LAST);
3381
* Used to read forward through the index.
3383
int ha_pbxt::index_next(byte * buf)
3389
//statistic_increment(ha_read_next_count,&LOCK_status);
3390
ASSERT_NS(pb_ex_in_use);
3392
if (active_index == MAX_KEY) {
3393
err = HA_ERR_WRONG_INDEX;
3396
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3398
if (!xt_idx_next(pb_open_tab, ind, NULL))
3399
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3401
err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3404
#ifdef XT_TRACK_RETURNED_ROWS
3406
ha_return_row(pb_open_tab, active_index);
3410
getTable()->status = STATUS_NOT_FOUND;
3412
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3413
getTable()->status = 0;
3419
* I have implemented this because there is currently a
3420
* bug in handler::index_next_same().
3422
* drop table if exists t1;
3423
* CREATE TABLE t1 (a int, b int, primary key(a,b))
3424
* PARTITION BY KEY(b,a) PARTITIONS 2;
3425
* insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6);
3426
* select * from t1 where a = 4;
3429
int ha_pbxt::index_next_same(byte * buf, const byte *key, uint length)
3433
XTIdxSearchKeyRec search_key;
3436
//statistic_increment(ha_read_next_count,&LOCK_status);
3437
ASSERT_NS(pb_ex_in_use);
3439
if (active_index == MAX_KEY) {
3440
err = HA_ERR_WRONG_INDEX;
3443
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3445
search_key.sk_key_value.sv_flags = HA_READ_KEY_EXACT;
3446
search_key.sk_key_value.sv_rec_id = 0;
3447
search_key.sk_key_value.sv_row_id = 0;
3448
search_key.sk_key_value.sv_key = search_key.sk_key_buf;
3449
search_key.sk_key_value.sv_length = myxt_create_key_from_key(ind, search_key.sk_key_buf, (xtWord1 *) key, (u_int) length);
3450
search_key.sk_on_key = TRUE;
3452
if (!xt_idx_next(pb_open_tab, ind, &search_key))
3453
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3455
err = xt_index_next_read(pb_open_tab, ind, pb_key_read, &search_key, buf);
3458
#ifdef XT_TRACK_RETURNED_ROWS
3460
ha_return_row(pb_open_tab, active_index);
3464
getTable()->status = STATUS_NOT_FOUND;
3466
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3467
getTable()->status = 0;
3473
* Used to read backwards through the index.
3475
int ha_pbxt::index_prev(byte * buf)
3481
//statistic_increment(ha_read_prev_count,&LOCK_status);
3482
ASSERT_NS(pb_ex_in_use);
3484
if (active_index == MAX_KEY) {
3485
err = HA_ERR_WRONG_INDEX;
3488
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3490
if (!xt_idx_prev(pb_open_tab, ind, NULL))
3491
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3493
err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3496
#ifdef XT_TRACK_RETURNED_ROWS
3498
ha_return_row(pb_open_tab, active_index);
3502
getTable()->status = STATUS_NOT_FOUND;
3504
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3505
getTable()->status = 0;
3511
* index_first() asks for the first key in the index.
3513
int ha_pbxt::index_first(byte * buf)
3517
XTIdxSearchKeyRec search_key;
3520
//statistic_increment(ha_read_first_count,&LOCK_status);
3521
ASSERT_NS(pb_ex_in_use);
3523
/* This is required because MySQL ignores the error returned
3524
* init init_index sometimes, for example:
3526
* if (!table->file->inited)
3527
* table->file->startIndexScan(tab->index, tab->sorted);
3528
* if ((error=tab->table->file->index_first(tab->table->getInsertRecord())))
3530
if (active_index == MAX_KEY) {
3531
err = HA_ERR_WRONG_INDEX;
3535
#ifdef XT_TRACK_RETURNED_ROWS
3536
ha_start_scan(pb_open_tab, active_index);
3538
pb_ind_row_count = 0;
3540
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3542
xt_idx_prep_key(ind, &search_key, XT_SEARCH_FIRST_FLAG, NULL, 0);
3543
if (!xt_idx_search(pb_open_tab, ind, &search_key))
3544
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3546
err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3549
#ifdef XT_TRACK_RETURNED_ROWS
3551
ha_return_row(pb_open_tab, active_index);
3555
getTable()->status = STATUS_NOT_FOUND;
3557
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3558
getTable()->status = 0;
3564
* index_last() asks for the last key in the index.
3566
int ha_pbxt::index_last(byte * buf)
3570
XTIdxSearchKeyRec search_key;
3573
//statistic_increment(ha_read_last_count,&LOCK_status);
3574
ASSERT_NS(pb_ex_in_use);
3576
if (active_index == MAX_KEY) {
3577
err = HA_ERR_WRONG_INDEX;
3581
#ifdef XT_TRACK_RETURNED_ROWS
3582
ha_start_scan(pb_open_tab, active_index);
3584
pb_ind_row_count = 0;
3586
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3588
xt_idx_prep_key(ind, &search_key, XT_SEARCH_AFTER_LAST_FLAG, NULL, 0);
3589
if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3590
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3592
err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3595
#ifdef XT_TRACK_RETURNED_ROWS
3597
ha_return_row(pb_open_tab, active_index);
3601
getTable()->status = STATUS_NOT_FOUND;
3603
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3604
getTable()->status = 0;
3610
* -----------------------------------------------------------------------
3611
* RAMDOM/SEQUENTIAL READ METHODS
3615
* doStartTableScan() is called when the system wants the storage engine to do a table
3617
* See the example in the introduction at the top of this file to see when
3618
* doStartTableScan() is called.
3620
* Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3621
* and sql_update.cc.
3623
int ha_pbxt::doStartTableScan(bool scan)
3626
XTThreadPtr thread = pb_open_tab->ot_thread;
3628
XT_PRINT1(thread, "rnd_init (%s)\n", pb_share->sh_table_path->ps_path);
3629
XT_DISABLED_TRACE(("seq scan tx=%d\n", (int) thread->st_xact_data->xd_start_xn_id));
3631
/* Call xt_tab_seq_exit() to make sure the resources used by the previous
3632
* scan are freed. In particular make sure cache page ref count is decremented.
3633
* This is needed as doStartTableScan() can be called mulitple times w/o matching calls
3634
* to doEndTableScan(). Our experience is that currently this is done in queries like:
3636
* SELECT t1.c1,t2.c1 FROM t1 LEFT JOIN t2 USING (c1);
3637
* UPDATE t1 LEFT JOIN t2 USING (c1) SET t1.c1 = t2.c1 WHERE t1.c1 = t2.c1;
3639
* when scanning inner tables. It is important to understand that in such case
3640
* multiple calls to doStartTableScan() are not semantically equal to a new query. For
3641
* example we cannot make row locks permanent as we do in doEndTableScan(), as
3642
* ha_pbxt::unlock_row still can be called.
3644
xt_tab_seq_exit(pb_open_tab);
3646
/* The number of columns required: */
3647
if (pb_open_tab->ot_is_modify) {
3648
pb_open_tab->ot_cols_req = getTable()->read_set->MX_BIT_SIZE();
3649
/* {START-STAT-HACK} previously position of start statement hack,
3650
* previous comment to code below: */
3651
/* Start a statement based transaction as soon
3652
* as a read is done for a modify type statement!
3653
* Previously, this was done too late!
3657
//pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3658
pb_open_tab->ot_cols_req = getTable()->read_set->MX_BIT_SIZE();
3661
* in case of queries like SELECT COUNT(*) FROM t
3662
* table->read_set is empty. Otoh, ot_cols_req == 0 can be treated
3663
* as "all columns" by some internal code (see e.g. myxt_load_row),
3664
* which makes such queries very ineffective for the records with
3665
* extended part. Setting column count to 1 makes sure that the
3666
* extended part will not be acessed in most cases.
3669
if (pb_open_tab->ot_cols_req == 0)
3670
pb_open_tab->ot_cols_req = 1;
3673
ASSERT_NS(pb_ex_in_use);
3675
if (!xt_tab_seq_init(pb_open_tab))
3676
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3679
xt_tab_seq_reset(pb_open_tab);
3681
xt_xlog_check_long_writer(thread);
3686
int ha_pbxt::doEndTableScan()
3691
* make permanent the lock for the last scanned row
3693
XTThreadPtr thread = pb_open_tab->ot_thread;
3695
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3697
xt_xlog_check_long_writer(thread);
3699
xt_tab_seq_exit(pb_open_tab);
3704
* This is called for each row of the table scan. When you run out of records
3705
* you should return HA_ERR_END_OF_FILE. Fill buff up with the row information.
3706
* The Field structure for the table is the key to getting data into buf
3707
* in a manner that will allow the server to understand it.
3709
* Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3710
* and sql_update.cc.
3712
int ha_pbxt::rnd_next(byte *buf)
3718
ASSERT_NS(pb_ex_in_use);
3719
//statistic_increment(ha_read_rnd_next_count, &LOCK_status);
3720
xt_xlog_check_long_writer(pb_open_tab->ot_thread);
3722
if (!xt_tab_seq_next(pb_open_tab, (xtWord1 *) buf, &eof))
3723
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3725
err = HA_ERR_END_OF_FILE;
3728
getTable()->status = STATUS_NOT_FOUND;
3730
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3731
getTable()->status = 0;
3737
* position() is called after each call to rnd_next() if the data needs
3738
* to be ordered. You can do something like the following to store
3740
* ha_store_ptr(ref, ref_length, current_position);
3742
* The server uses ref to store data. ref_length in the above case is
3743
* the size needed to store current_position. ref is just a byte array
3744
* that the server will maintain. If you are using offsets to mark rows, then
3745
* current_position should be the offset. If it is a primary key like in
3746
* BDB, then it needs to be a primary key.
3748
* Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
3750
void ha_pbxt::position(const byte *XT_UNUSED(record))
3753
ASSERT_NS(pb_ex_in_use);
3755
* I changed this from using little endian to big endian.
3757
* The reason is because sometime the pointer are sorted.
3758
* When they are are sorted a binary compare is used.
3759
* A binary compare sorts big endian values correctly!
3761
* Take the followin example:
3763
* create table t1 (a int, b text);
3764
* insert into t1 values (1, 'aa'), (1, 'bb'), (1, 'cc');
3765
* select group_concat(b) from t1 group by a;
3767
* With little endian pointers the result is:
3770
* With big-endian pointer the result is:
3774
(void) ASSERT_NS(XT_RECORD_OFFS_SIZE == 4);
3775
mi_int4store((xtWord1 *) ref, pb_open_tab->ot_curr_rec_id);
3780
* Given the #ROWID retrieve the record.
3782
* Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc.
3784
int ha_pbxt::rnd_pos(byte * buf, byte *pos)
3789
ASSERT_NS(pb_ex_in_use);
3790
//statistic_increment(ha_read_rnd_count, &LOCK_status);
3791
XT_PRINT1(pb_open_tab->ot_thread, "rnd_pos (%s)\n", pb_share->sh_table_path->ps_path);
3793
pb_open_tab->ot_curr_rec_id = mi_uint4korr((xtWord1 *) pos);
3794
switch (xt_tab_dirty_read_record(pb_open_tab, (xtWord1 *) buf)) {
3796
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3803
getTable()->status = STATUS_NOT_FOUND;
3805
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3806
getTable()->status = 0;
3812
* -----------------------------------------------------------------------
3817
::info() is used to return information to the optimizer.
3818
Currently this table handler doesn't implement most of the fields
3819
really needed. SHOW also makes use of this data
3820
Another note, you will probably want to have the following in your
3824
The reason is that the server will optimize for cases of only a single
3825
record. If in a table scan you don't know the number of records
3826
it will probably be better to set records to two so you can return
3827
as many records as you need.
3828
Along with records a few more variables you may wish to set are:
3835
Take a look at the public variables in handler.h for more information.
3859
#if MYSQL_VERSION_ID < 50114
3860
void ha_pbxt::info(uint flag)
3862
int ha_pbxt::info(uint flag)
3870
if (!(in_use = pb_ex_in_use)) {
3872
if (pb_share && pb_share->sh_table_lock) {
3873
/* If some thread has an exclusive lock, then
3874
* we wait for the lock to be removed:
3876
#if MYSQL_VERSION_ID < 50114
3877
ha_wait_for_shared_use(this, pb_share);
3880
if (!ha_wait_for_shared_use(this, pb_share))
3881
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3886
if ((ot = pb_open_tab)) {
3887
if (flag & HA_STATUS_VARIABLE) {
3888
register XTTableHPtr tab = ot->ot_table;
3891
* Free row count is not reliable, so ignore it.
3892
* The problem is if tab_row_fnum > tab_row_eof_id - 1 then
3893
* we have a very bad result.
3895
* If stats.records+EXTRA_RECORDS == 0 as returned by
3896
* estimate_rows_upper_bound(), then filesort will crash here:
3898
* make_sortkey(param,sort_keys[idx++],ref_pos);
3900
* #0 0x000bf69c in Field_long::sort_string at field.cc:3766
3901
* #1 0x0022e1f1 in make_sortkey at filesort.cc:769
3902
* #2 0x0022f1cf in find_all_keys at filesort.cc:619
3903
* #3 0x00230eec in filesort at filesort.cc:243
3904
* #4 0x001b9d89 in update_query at sql_update.cc:415
3905
* #5 0x0010db12 in mysql_execute_command at sql_parse.cc:2959
3906
* #6 0x0011480d in mysql_parse at sql_parse.cc:5787
3907
* #7 0x00115afb in dispatch_command at sql_parse.cc:1200
3908
* #8 0x00116de2 in do_command at sql_parse.cc:857
3909
* #9 0x00101ee4 in handle_one_connection at sql_connect.cc:1115
3911
* The problem is that sort_keys is allocated to handle just 1 vector.
3912
* Sorting one vector crashes. Although I could not find a check for
3913
* the actual number of vectors. But it must assume that it has at
3914
* least EXTRA_RECORDS vectors.
3916
#ifdef XT_ROW_COUNT_CORRECTED
3917
if (tab->tab_row_eof_id <= tab->tab_row_fnum ||
3918
(!tab->tab_row_free_id && tab->tab_row_fnum))
3919
xt_tab_check_free_lists(NULL, ot, false, true);
3920
stats.records = (ha_rows) tab->tab_row_eof_id - 1;
3921
if (stats.records >= tab->tab_row_fnum) {
3922
stats.deleted = tab->tab_row_fnum;
3923
stats.records -= stats.deleted;
3930
stats.deleted = /* tab->tab_row_fnum */ 0;
3931
stats.records = (ha_rows) (tab->tab_row_eof_id - 1 /* - stats.deleted */);
3933
stats.data_file_length = xt_rec_id_to_rec_offset(tab, tab->tab_rec_eof_id);
3934
stats.index_file_length = xt_ind_node_to_offset(tab, tab->tab_ind_eof);
3935
stats.delete_length = tab->tab_rec_fnum * ot->ot_rec_size;
3936
//check_time = info.check_time;
3937
stats.mean_rec_length = (ulong) ot->ot_rec_size;
3940
#if 0 // Commented out, I am pretty sure this will blow up on someone since the global share should be treated as being non-mutable
3941
if (flag & HA_STATUS_CONST) {
3942
ha_rows rec_per_key;
3944
TABLE_SHARE *share= TS(table);
3946
stats.max_data_file_length = 0x00FFFFFF;
3947
stats.max_index_file_length = 0x00FFFFFF;
3948
//stats.create_time = info.create_time;
3949
ref_length = XT_RECORD_OFFS_SIZE;
3950
//share->db_options_in_use = info.options;
3951
stats.block_size = XT_INDEX_PAGE_SIZE;
3953
if (share->getType() == message::Table::STANDARD)
3955
#define WHICH_MUTEX mutex
3956
#elif MYSQL_VERSION_ID >= 50404
3957
#define WHICH_MUTEX LOCK_ha_data
3959
if (share->tmp_table == NO_TMP_TABLE)
3960
#define WHICH_MUTEX mutex
3965
#if MYSQL_VERSION_ID < 50404
3966
#if MYSQL_VERSION_ID < 50123
3967
safe_mutex_lock(&share->mutex,__FILE__,__LINE__);
3969
safe_mutex_lock(&share->mutex,0,__FILE__,__LINE__);
3972
safe_mutex_lock(&share->WHICH_MUTEX,0,__FILE__,__LINE__);
3977
#ifdef MY_PTHREAD_FASTMUTEX
3978
my_pthread_fastmutex_lock(&share->WHICH_MUTEX);
3983
#endif // SAFE_MUTEX
3985
set_prefix(share->keys_in_use, share->keys);
3986
share->keys_for_keyread&= share->keys_in_use;
3988
share->keys_in_use.set_prefix(share->keys);
3989
//share->keys_in_use.intersect_extended(info.key_map);
3990
share->keys_for_keyread.intersect(share->keys_in_use);
3991
//share->db_record_offset = info.record_offset;
3993
for (u_int i = 0; i < share->keys; i++) {
3994
ind = pb_share->sh_dic_keys[i];
3997
if (ind->mi_seg_count == 1 && (ind->mi_flags & HA_NOSAME))
4002
for (u_int j = 0; j < table->key_info[i].key_parts; j++)
4003
table->key_info[i].rec_per_key[j] = (ulong) rec_per_key;
4006
if (share->getType() == message::Table::STANDARD)
4008
if (share->tmp_table == NO_TMP_TABLE)
4011
safe_mutex_unlock(&share->WHICH_MUTEX,__FILE__,__LINE__);
4013
#ifdef MY_PTHREAD_FASTMUTEX
4014
pthread_mutex_unlock(&share->WHICH_MUTEX.mutex);
4020
Set data_file_name and index_file_name to point at the symlink value
4021
if table is symlinked (Ie; Real name is not same as generated name)
4024
data_file_name = index_file_name = 0;
4025
fn_format(name_buff, file->filename, "", MI_NAME_DEXT, 2);
4026
if (strcmp(name_buff, info.data_file_name))
4027
data_file_name = info.data_file_name;
4028
strmov(fn_ext(name_buff), MI_NAME_IEXT);
4029
if (strcmp(name_buff, info.index_file_name))
4030
index_file_name = info.index_file_name;
4035
if (flag & HA_STATUS_ERRKEY)
4036
errkey = ot->ot_err_index_no;
4039
* We assume they want the next value to be returned!
4041
* At least, this is what works for the following code:
4043
* create table t1 (a int auto_increment primary key)
4044
* auto_increment=100
4046
* partition by list (a)
4047
* (partition p0 values in (1, 98,99, 100, 101));
4048
* create index inx on t1 (a);
4049
* insert into t1 values (null);
4052
if (flag & HA_STATUS_AUTO)
4053
stats.auto_increment_value = (ulonglong) ot->ot_table->tab_auto_inc+1;
4061
/* Someone may be waiting for me to complete: */
4062
if (pb_share->sh_table_lock)
4063
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4066
#if MYSQL_VERSION_ID < 50114
4074
* extra() is called whenever the server wishes to send a hint to
4075
* the storage engine. The myisam engine implements the most hints.
4076
* ha_innodb.cc has the most exhaustive list of these hints.
4078
int ha_pbxt::extra(enum ha_extra_function operation)
4082
XT_PRINT2(xt_get_self(), "ha_pbxt::extra (%s) operation=%d\n", pb_share->sh_table_path->ps_path, operation);
4084
switch (operation) {
4085
case HA_EXTRA_RESET_STATE:
4086
pb_key_read = FALSE;
4087
pb_ignore_dup_key = 0;
4088
/* As far as I can tell, this function is called for
4089
* every table at the end of a statement.
4091
* So, during a LOCK TABLES ... UNLOCK TABLES, I use
4092
* this to find the end of a statement.
4093
* start_stmt() indicates the start of a statement,
4094
* and is also called once for each table in the
4097
* So the statement boundary is indicated by
4098
* self->st_stat_count == 0
4100
* GOTCHA: I cannot end the transaction here!
4101
* I must end it in start_stmt().
4102
* The reason is because there are situations
4103
* where this would end a transaction that
4104
* was begin by external_lock().
4106
* An example of this is when a function
4107
* is called when doing CREATE TABLE SELECT.
4110
/* NOTE: pb_in_stat is just used to avoid getting
4111
* self, if it is not necessary!!
4117
if (!(self = ha_set_current_thread(pb_mysql_thd, &err)))
4118
return xt_ha_pbxt_to_mysql_error(err);
4120
if (self->st_stat_count > 0) {
4121
self->st_stat_count--;
4122
if (self->st_stat_count == 0)
4123
self->st_stat_ended = TRUE;
4126
/* This is the end of a statement, I can turn any locks into perminant locks now: */
4128
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4131
pb_open_tab->ot_for_update = 0;
4133
case HA_EXTRA_KEYREAD:
4134
/* This means we so not need to read the entire record. */
4137
case HA_EXTRA_NO_KEYREAD:
4138
pb_key_read = FALSE;
4140
case HA_EXTRA_IGNORE_DUP_KEY:
4141
/* NOTE!!! Calls to extra(HA_EXTRA_IGNORE_DUP_KEY) can be nested!
4142
* In fact, the calls are from different threads, so
4143
* strictly speaking I should protect this variable!!
4144
* Here is the sequence that produces the duplicate call:
4146
* drop table if exists t1;
4147
* CREATE TABLE t1 (x int not null, y int, primary key (x)) engine=pbxt;
4148
* insert into t1 values (1, 3), (4, 1);
4149
* replace DELAYED into t1 (x, y) VALUES (4, 2);
4150
* select * from t1 order by x;
4153
pb_ignore_dup_key++;
4155
case HA_EXTRA_NO_IGNORE_DUP_KEY:
4156
pb_ignore_dup_key--;
4158
case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
4159
/* MySQL needs all fields */
4160
pb_key_read = FALSE;
4171
* Deprecated and likely to be removed in the future. Storage engines normally
4172
* just make a call like:
4173
* ha_pbxt::extra(HA_EXTRA_RESET);
4176
int ha_pbxt::reset(void)
4179
extra(HA_EXTRA_RESET_STATE);
4183
void ha_pbxt::unlock_row()
4187
pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, FALSE);
4191
* Used to delete all rows in a table. Both for cases of truncate and
4192
* for cases where the optimizer realizes that all rows will be
4193
* removed as a result of a SQL statement.
4195
* Called from item_sum.cc by Item_func_group_concat::clear(),
4196
* Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
4197
* Called from sql_delete.cc by delete_query().
4198
* Called from sql_select.cc by JOIN::reinit().
4199
* Called from sql_union.cc by st_select_lex_unit::exec().
4201
int ha_pbxt::delete_all_rows()
4203
THD *thd = current_thd;
4206
XTDDTable *tab_def = NULL;
4207
char path[PATH_MAX];
4211
if (thd_sql_command(thd) != SQLCOM_TRUNCATE) {
4212
/* Just like InnoDB we only handle TRUNCATE TABLE
4213
* by recreating the table.
4214
* DELETE FROM t must be handled by deleting
4215
* each row because it may be part of a transaction,
4216
* and there may be foreign key actions.
4218
XT_RETURN (errno = HA_ERR_WRONG_COMMAND);
4221
if (!(self = ha_set_current_thread(thd, &err)))
4222
return xt_ha_pbxt_to_mysql_error(err);
4225
XTDictionaryRec dic;
4227
memset(&dic, 0, sizeof(dic));
4229
dic = pb_share->sh_table->tab_dic;
4230
xt_strcpy(PATH_MAX, path, pb_share->sh_table->tab_name->ps_path);
4232
if ((tab_def = dic.dic_table))
4233
tab_def->reference();
4235
if (!(thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)))
4236
tab_def->deleteAllRows(self);
4238
/* We should have a table lock! */
4239
//ASSERT(pb_lock_table);
4240
if (!pb_table_locked) {
4241
ha_aquire_exclusive_use(self, pb_share, this);
4242
pushr_(ha_release_exclusive_use, pb_share);
4244
ha_close_open_tables(self, pb_share, NULL);
4246
/* This is required in the case of delete_all_rows, because we must
4247
* ensure that the handlers no longer reference the old
4248
* table, so that it will not be used again. The table
4249
* must be re-openned, because the ID has changed!
4251
* 0.9.86+ Must check if this is still necessary.
4253
* the ha_close_share(self, pb_share) call was moved from above
4254
* (before tab_def = dic.dic_table), because of a crash.
4257
* set storage_engine = pbxt;
4258
* create table t1 (s1 int primary key);
4259
* insert into t1 values (1);
4260
* create table t2 (s1 int, foreign key (s1) references t1 (s1));
4261
* insert into t2 values (1);
4262
* truncate table t1; -- this should fail because of FK constraint
4263
* alter table t1 engine = myisam; -- this caused crash
4266
ha_close_share(self, pb_share);
4268
/* MySQL documentation requires us to reset auto increment value to 1
4269
* on truncate even if the table was created with a different value.
4270
* This is also consistent with other engines.
4272
dic.dic_min_auto_inc = 1;
4274
xt_create_table(self, (XTPathStrPtr) path, &dic);
4275
if (!pb_table_locked)
4276
freer_(); // ha_release_exclusive_use(pb_share)
4279
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4284
tab_def->release(self);
4291
* Assuming a key (a,b,c)
4293
* rec_per_key[0] = SELECT COUNT(*)/COUNT(DISTINCT a) FROM t;
4294
* rec_per_key[1] = SELECT COUNT(*)/COUNT(DISTINCT a,b) FROM t;
4295
* rec_per_key[2] = SELECT COUNT(*)/COUNT(DISTINCT a,b,c) FROM t;
4297
* After this is implemented, the selectivity can serve as
4298
* a quick estimate of records_in_range().
4300
* After you have done this, you need to redo the index_merge*
4301
* tests. Restore the standard result to check if we
4302
* now agree with the MyISAM strategy.
4305
int ha_pbxt::analyze(THD *thd)
4310
xtXactID clean_xn_id = 0;
4316
if ((err = reopen()))
4320
/* Wait until the sweeper is no longer busy!
4321
* If you want an accurate count(*) value, then call
4322
* ANALYZE TABLE first. This function waits until the
4323
* sweeper has completed.
4325
db = pb_open_tab->ot_table->tab_db;
4328
* Wait until everything is cleaned up before this transaction.
4329
* But this will only work if the we quit out transaction!
4331
* GOTCHA: When a PBXT table is partitioned, then analyze() is
4332
* called for each component. The first calls xt_xn_commit().
4333
* All following calls have no transaction!:
4335
* CREATE TABLE t1 (a int)
4336
* PARTITION BY LIST (a)
4337
* (PARTITION x1 VALUES IN (10), PARTITION x2 VALUES IN (20));
4342
if (pb_open_tab->ot_thread && pb_open_tab->ot_thread->st_xact_data) {
4343
my_xn_id = pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id;
4344
XT_PRINT0(xt_get_self(), "xt_xn_commit\n");
4345
xt_xn_commit(pb_open_tab->ot_thread);
4348
my_xn_id = db->db_xn_to_clean_id;
4350
while ((!db->db_sw_idle || xt_xn_is_before(db->db_xn_to_clean_id, my_xn_id)) && not (thd->getKilled())) {
4354
* It is possible that the sweeper gets stuck because
4355
* it has no dictionary information!
4356
* As in the example below.
4359
* pk_col int auto_increment primary key, a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(64) default ' '
4362
* insert into t4 (a1, a2, b, c, d, dummy) select * from t1;
4364
* create index idx12672_0 on t4 (a1);
4365
* create index idx12672_1 on t4 (a1,a2,b,c);
4366
* create index idx12672_2 on t4 (a1,a2,b);
4369
if (db->db_sw_idle) {
4370
/* This will make sure we don't wait forever: */
4371
if (clean_xn_id != db->db_xn_to_clean_id) {
4372
clean_xn_id = db->db_xn_to_clean_id;
4380
xt_wakeup_sweeper(db);
4388
extern int pbxt_mysql_trace_on;
4391
int ha_pbxt::check(THD* thd)
4396
if (!(self = ha_set_current_thread(thd, &err)))
4397
return xt_ha_pbxt_to_mysql_error(err);
4398
if (self->st_lock_count)
4399
ASSERT(self->st_xact_data);
4401
if (!pb_table_locked) {
4402
ha_aquire_exclusive_use(self, pb_share, this);
4403
pushr_(ha_release_exclusive_use, pb_share);
4406
#ifdef CHECK_TABLE_LOADS
4407
xt_tab_load_table(self, pb_open_tab);
4409
xt_check_table(self, pb_open_tab);
4411
if (!pb_table_locked)
4412
freer_(); // ha_release_exclusive_use(pb_share)
4414
//pbxt_mysql_trace_on = TRUE;
4419
* This function is called:
4420
* For each table in LOCK TABLES,
4422
* For each table in a statement.
4424
* It is called with F_UNLCK:
4427
* at the end of a statement.
4430
xtPublic int ha_pbxt::external_lock(THD *thd, int lock_type)
4432
/* Some compiler complain that: variable 'err' might be clobbered by 'longjmp' or 'vfork' */
4433
volatile int err = 0;
4436
if (!(self = ha_set_current_thread(thd, (int *) &err)))
4437
return xt_ha_pbxt_to_mysql_error(err);
4439
/* F_UNLCK is set when this function is called at end
4440
* of statement or UNLOCK TABLES
4442
if (lock_type == F_UNLCK) {
4443
/* This is not TRUE if external_lock() FAILED!
4444
* Can we rely on external_unlock being called when
4445
* external_lock() fails? Currently yes, but it does
4447
ASSERT_NS(pb_ex_in_use);
4450
XT_PRINT1(self, "EXTERNAL_LOCK (%s) lock_type=UNLOCK\n", pb_share->sh_table_path->ps_path);
4452
/* Make any temporary locks on this table permanent.
4454
* This is required here because of the following example:
4455
* create table t1 (a int NOT NULL, b int, primary key (a));
4456
* create table t2 (a int NOT NULL, b int, primary key (a));
4457
* insert into t1 values (0, 10),(1, 11),(2, 12);
4458
* insert into t2 values (1, 21),(2, 22),(3, 23);
4459
* update t1 set b= (select b from t2 where t1.a = t2.a);
4460
* update t1 set b= (select b from t2 where t1.a = t2.a);
4462
* drop table t1, t2;
4466
/* GOTCHA! It's weird, but, if this function returns an error
4467
* on lock, then UNLOCK is called?!
4468
* This should not be done, because if lock fails, it should be
4469
* assumed that no UNLOCK is required.
4470
* Basically, I have to assume that some code will presume this,
4471
* although the function lock_external() calls unlock, even
4473
* The result is, that my lock count can go wrong. So I could
4474
* change the lock method, and increment the lock count, even
4475
* if it fails. However, the consequences are more serious,
4476
* if some code decides not to call UNLOCK after lock fails.
4477
* The result is that I would have a permanent too high lock,
4478
* count and nothing will work.
4479
* So instead, I handle the fact that I might too many unlocks
4482
if (self->st_lock_count > 0)
4483
self->st_lock_count--;
4484
if (!self->st_lock_count) {
4485
/* This section handles "auto-commit"... */
4487
#ifdef XT_IMPLEMENT_NO_ACTION
4489
* This is required here because it marks the end of a statement.
4490
* If we are in a non-auto-commit mode, then we cannot
4491
* wait for st_is_update to be set by the begining of a new transaction.
4493
if (self->st_restrict_list.bl_count) {
4494
if (!xt_tab_restrict_rows(&self->st_restrict_list, self))
4495
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4499
if (self->st_xact_data) {
4500
if (self->st_auto_commit) {
4502
* Normally I could assume that if the transaction
4503
* has not been aborted by now, then it should be committed.
4505
* Unfortunately, this is not the case!
4507
* create table t1 (id int primary key) engine = pbxt;
4508
* create table t2 (id int) engine = pbxt;
4510
* insert into t1 values ( 1 ) ;
4511
* insert into t1 values ( 2 ) ;
4512
* insert into t2 values ( 1 ) ;
4513
* insert into t2 values ( 2 ) ;
4515
* --This statement is returns an error calls ha_autocommit_or_rollback():
4516
* update t1 set t1.id=1 where t1.id=2;
4518
* --This statement is returns no error and calls ha_autocommit_or_rollback():
4519
* update t1,t2 set t1.id=3, t2.id=3 where t1.id=2 and t2.id = t1.id;
4521
* --But this statement returns an error and does not call ha_autocommit_or_rollback():
4522
* update t1,t2 set t1.id=1, t2.id=1 where t1.id=3 and t2.id = t1.id;
4524
* The result is, I cannot rely on ha_autocommit_or_rollback() being called :(
4525
* So I have to abort myself here...
4528
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4530
if (self->st_abort_trans) {
4531
XT_PRINT0(self, "xt_xn_rollback in unlock\n");
4532
if (!xt_xn_rollback(self))
4533
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4536
XT_PRINT0(self, "xt_xn_commit in unlock\n");
4537
if (!xt_xn_commit(self))
4538
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4543
/* If the previous statement was "for update", then set the visibilty
4544
* so that non- for update SELECTs will see what the for update select
4545
* (or update statement) just saw.
4548
if (pb_open_tab->ot_for_update) {
4549
self->st_visible_time = self->st_database->db_xn_end_time;
4550
pb_open_tab->ot_for_update = 0;
4553
if (pb_share->sh_recalc_selectivity) {
4554
#ifdef XT_ROW_COUNT_CORRECTED
4555
/* {CORRECTED-ROW-COUNT} */
4556
if ((pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) >= 200)
4558
/* {FREE-ROWS-BAD} */
4559
if ((pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) >= 200)
4563
pb_share->sh_recalc_selectivity = FALSE;
4564
xt_ind_set_index_selectivity(pb_open_tab, self);
4565
#ifdef XT_ROW_COUNT_CORRECTED
4566
/* {CORRECTED-ROW-COUNT} */
4567
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150;
4569
/* {FREE-ROWS-BAD} */
4570
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
4576
if (self->st_stat_modify)
4577
self->st_statistics.st_stat_write++;
4579
self->st_statistics.st_stat_read++;
4580
self->st_stat_modify = FALSE;
4581
self->st_import_stat = XT_IMP_NO_IMPORT;
4584
if (pb_table_locked) {
4586
if (!pb_table_locked)
4587
ha_release_exclusive_use(self, pb_share);
4590
/* No longer in use: */
4592
/* Someone may be waiting for me to complete: */
4593
if (pb_share->sh_table_lock)
4594
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4597
XT_PRINT2(self, "ha_pbxt::EXTERNAL_LOCK (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, lock_type);
4599
if (pb_lock_table) {
4602
if (!pb_table_locked)
4603
ha_aquire_exclusive_use(self, pb_share, this);
4606
ha_close_open_tables(self, pb_share, this);
4608
if (!pb_share->sh_table) {
4609
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4611
ha_open_share(self, pb_share);
4615
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4621
/* Occurs if you do:
4622
* truncate table t1;
4623
* truncate table t1;
4626
if ((err = reopen())) {
4634
if (pb_share->sh_table_lock && !pb_table_locked) {
4635
/* If some thread has an exclusive lock, then
4636
* we wait for the lock to be removed:
4638
if (!ha_wait_for_shared_use(this, pb_share)) {
4639
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4645
if ((err = reopen())) {
4651
/* Set the current thread for this open table: */
4652
pb_open_tab->ot_thread = self;
4654
/* If this is a set, then it is in UPDATE/DELETE TABLE ...
4655
* or SELECT ... FOR UPDATE
4657
pb_open_tab->ot_is_modify = FALSE;
4658
if ((pb_open_tab->ot_for_update = (lock_type == F_WRLCK))) {
4659
switch ((int) thd_sql_command(thd)) {
4662
case SQLCOM_DELETE_MULTI:
4664
/* turn DELETE IGNORE into normal DELETE. The IGNORE option causes problems because
4665
* when a record is deleted we add an xlog record which we cannot "rollback" later
4666
* when we find that an FK-constraint has failed.
4668
thd->lex->ignore = false;
4671
case SQLCOM_UPDATE_MULTI:
4673
case SQLCOM_REPLACE:
4674
case SQLCOM_REPLACE_SELECT:
4676
case SQLCOM_INSERT_SELECT:
4677
pb_open_tab->ot_is_modify = TRUE;
4678
self->st_stat_modify = TRUE;
4680
case SQLCOM_ALTER_TABLE:
4681
case SQLCOM_CREATE_INDEX:
4684
case SQLCOM_OPTIMIZE:
4686
case SQLCOM_DROP_INDEX:
4687
self->st_stat_modify = TRUE;
4688
self->st_import_stat = XT_IMP_COPY_TABLE;
4689
pb_import_row_count = 0;
4690
/* Do not read FOR UPDATE!
4691
* this avoids taking locks on the rows that are read
4692
* Which leads to the assertion failure:
4693
* int XTRowLocks::xt_make_lock_permanent(XTOpenTable*, XTRowLockList*)(lock_xt.cc:646) item
4694
* after the transaction is committed in doInsertRecord.
4696
pb_open_tab->ot_for_update = FALSE;
4699
self->st_stat_modify = TRUE;
4700
self->st_import_stat = XT_IMP_LOAD_TABLE;
4701
pb_import_row_count = 0;
4702
pb_open_tab->ot_for_update = FALSE;
4704
case SQLCOM_CREATE_TABLE:
4705
case SQLCOM_TRUNCATE:
4706
case SQLCOM_DROP_TABLE:
4707
self->st_stat_modify = TRUE;
4712
if (pb_open_tab->ot_is_modify && pb_open_tab->ot_table->tab_dic.dic_disable_index) {
4713
xt_tab_set_index_error(pb_open_tab->ot_table);
4714
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4719
/* Record the associated MySQL thread: */
4722
if (self->st_database != pb_share->sh_table->tab_db) {
4724
/* PBXT does not permit multiple databases us one statement,
4725
* or in a single transaction!
4729
* update mysqltest_1.t1, mysqltest_2.t2 set a=10,d=10;
4731
if (self->st_lock_count > 0)
4732
xt_throw_xterr(XT_CONTEXT, XT_ERR_MULTIPLE_DATABASES);
4734
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4737
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4744
/* See {IS-UPDATE-STAT} nad {UPDATE-STACK} */
4745
self->st_is_update = NULL;
4747
/* Auto begin a transaction (if one is not already running): */
4748
if (!self->st_xact_data) {
4749
/* Transaction mode numbers must be identical! */
4750
(void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
4751
(void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
4753
thd_init_xact(thd, self, true);
4755
if (!xt_xn_begin(self)) {
4756
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4761
* {START-TRANS} GOTCHA: trans_register_ha() is not mentioned in the documentation.
4762
* It must be called to inform MySQL that we have a transaction (see start_stmt).
4764
* Here are some tests that confirm whether things are done correctly:
4766
* drop table if exists t1, t2;
4767
* create table t1 (c1 int);
4768
* insert t1 values (1);
4770
* rename table t1 to t2;
4772
* rename will generate an error if MySQL thinks a transaction is
4775
* create table t1 (a text character set utf8, b text character set latin1);
4776
* insert t1 values (0x4F736E616272C3BC636B, 0x4BF66C6E);
4778
* --exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/ test
4779
* --exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/t1.sql
4780
* --exec $MYSQL_IMPORT test $MYSQLTEST_VARDIR/tmp/t1.txt
4783
* This test forces a begin transaction in start_stmt()
4785
* drop tables if exists t1;
4786
* create table t1 (c1 int);
4787
* lock tables t1 write;
4788
* insert t1 values (1);
4789
* insert t1 values (2);
4792
* The second select will return an empty result of the
4793
* MySQL is not informed that a transaction is running (auto-commit
4794
* in external_lock comes too late)!
4798
if (!self->st_auto_commit) {
4799
trans_register_ha(thd, TRUE, pbxt_hton);
4800
XT_PRINT0(self, "CONN START XACT - ha_pbxt::external_lock --> trans_register_ha\n");
4805
/* Start a statment transaction: */
4806
/* {START-STAT-HACK} The problem that ha_commit_trans() is not
4807
* called by MySQL seems to be fixed (tests confirm this).
4808
* Here is the previous comment when this code was execute
4809
* here {START-STAT-HACK}
4811
* GOTCHA: I have a huge problem with the transaction statement.
4812
* It is not ALWAYS committed (I mean ha_commit_trans() is
4813
* not always called - for example in SELECT).
4815
* If I call trans_register_ha() but ha_commit_trans() is not called
4816
* then MySQL thinks a transaction is still running (while
4817
* I have committed the auto-transaction in ha_pbxt::external_lock()).
4819
* This causes all kinds of problems, like transactions
4820
* are killed when they should not be.
4822
* To prevent this, I only inform MySQL that a transaction
4823
* has beens started when an update is performed. I have determined that
4824
* ha_commit_trans() is only guarenteed to be called if an update is done.
4827
* So, this is the correct place to start a statement transaction.
4829
* Note: if trans_register_ha() is not called before insertRecord(), then
4830
* PBXT is not registered correctly as a modification transaction.
4831
* (mark_trx_read_write call in insertRecord).
4832
* This leads to 2-phase commit not being called as it should when
4833
* binary logging is enabled.
4836
if (!pb_open_tab->ot_thread->st_stat_trans) {
4837
trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
4838
XT_PRINT0(pb_open_tab->ot_thread, "STAT START - ha_pbxt::external_lock --> trans_register_ha\n");
4839
pb_open_tab->ot_thread->st_stat_trans = TRUE;
4842
if (lock_type == F_WRLCK || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
4843
self->st_visible_time = self->st_database->db_xn_end_time;
4845
#ifdef TRACE_STATEMENTS
4846
if (self->st_lock_count == 0)
4847
STAT_TRACE(self, *thd_query(thd));
4849
self->st_lock_count++;
4857
* This function is called for each table in a statement
4858
* after LOCK TABLES has been used.
4860
* Currently I only use this function to set the
4861
* current thread of the table handle.
4863
* GOTCHA: The prototype of start_stmt() has changed
4864
* from version 4.1 to 5.1!
4866
int ha_pbxt::start_stmt(THD *thd, thr_lock_type lock_type)
4871
ASSERT_NS(pb_ex_in_use);
4873
if (!(self = ha_set_current_thread(thd, &err)))
4874
return xt_ha_pbxt_to_mysql_error(err);
4876
XT_PRINT2(self, "ha_pbxt::start_stmt (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, (int) lock_type);
4879
if ((err = reopen()))
4883
ASSERT_NS(pb_open_tab->ot_thread == self);
4884
ASSERT_NS(thd == pb_mysql_thd);
4885
ASSERT_NS(self->st_database == pb_open_tab->ot_table->tab_db);
4887
if (self->st_stat_ended) {
4888
self->st_stat_ended = FALSE;
4889
self->st_stat_trans = FALSE;
4891
#ifdef XT_IMPLEMENT_NO_ACTION
4892
if (self->st_restrict_list.bl_count) {
4893
if (!xt_tab_restrict_rows(&self->st_restrict_list, self)) {
4894
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4899
/* This section handles "auto-commit"... */
4900
if (self->st_xact_data && self->st_auto_commit && self->st_table_trans) {
4901
if (self->st_abort_trans) {
4902
XT_PRINT0(self, "xt_xn_rollback in start_stmt\n");
4903
if (!xt_xn_rollback(self))
4904
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4907
XT_PRINT0(self, "xt_xn_commit in start_stmt\n");
4908
if (!xt_xn_commit(self))
4909
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4913
if (self->st_stat_modify)
4914
self->st_statistics.st_stat_write++;
4916
self->st_statistics.st_stat_read++;
4917
self->st_stat_modify = FALSE;
4918
self->st_import_stat = XT_IMP_NO_IMPORT;
4920
/* If the previous statement was "for update", then set the visibilty
4921
* so that non- for update SELECTs will see what the for update select
4922
* (or update statement) just saw.
4924
if (pb_open_tab->ot_for_update)
4925
self->st_visible_time = self->st_database->db_xn_end_time;
4928
pb_open_tab->ot_for_update =
4929
(lock_type != TL_READ &&
4930
lock_type != TL_READ_WITH_SHARED_LOCKS &&
4932
lock_type != TL_READ_HIGH_PRIORITY &&
4934
lock_type != TL_READ_NO_INSERT);
4935
pb_open_tab->ot_is_modify = FALSE;
4936
if (pb_open_tab->ot_for_update) {
4937
switch ((int) thd_sql_command(thd)) {
4941
case SQLCOM_UPDATE_MULTI:
4942
case SQLCOM_DELETE_MULTI:
4944
case SQLCOM_REPLACE:
4945
case SQLCOM_REPLACE_SELECT:
4947
case SQLCOM_INSERT_SELECT:
4948
pb_open_tab->ot_is_modify = TRUE;
4949
self->st_stat_modify = TRUE;
4951
case SQLCOM_CREATE_TABLE:
4952
case SQLCOM_CREATE_INDEX:
4953
case SQLCOM_ALTER_TABLE:
4954
case SQLCOM_TRUNCATE:
4955
case SQLCOM_DROP_TABLE:
4956
case SQLCOM_DROP_INDEX:
4960
case SQLCOM_OPTIMIZE:
4962
self->st_stat_modify = TRUE;
4967
/* {IS-UPDATE-STAT} This is required at this level!
4968
* No matter how often it is called, it is still the start of a
4969
* statement. We need to make sure statements that are NOT mistaken
4970
* for different type of statement.
4972
* Here is an example:
4973
* select * from t1 where data = getcount("bar")
4975
* If the procedure getcount() addresses another table.
4976
* then open and close of the statements in getcount()
4977
* are nested within an open close of the select t1
4981
* Add to this I add the following:
4982
* A trigger in the middle of an update also causes nested
4983
* statements. If I reset st_is_update, then then
4984
* when the trigger returns the system thinks we
4985
* are in a different update statement, and may
4986
* update the same row again.
4988
if (self->st_is_update == pb_open_tab) {
4989
/* Pop the update stack: */
4990
XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
4992
pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
4993
curr->ot_prev_update = NULL;
4996
/* See comment {START-TRANS} */
4997
if (!self->st_xact_data) {
4999
thd_init_xact(thd, self, false);
5001
if (!xt_xn_begin(self)) {
5002
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5006
if (!self->st_auto_commit) {
5007
trans_register_ha(thd, TRUE, pbxt_hton);
5008
XT_PRINT0(self, "START CONN XACT - ha_pbxt::start_stmt --> trans_register_ha\n");
5013
/* Start a statment (see {START-STAT-HACK}): */
5015
if (!pb_open_tab->ot_thread->st_stat_trans) {
5016
trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
5017
XT_PRINT0(pb_open_tab->ot_thread, "START STAT - ha_pbxt::start_stmt --> trans_register_ha\n");
5018
pb_open_tab->ot_thread->st_stat_trans = TRUE;
5021
if (pb_open_tab->ot_for_update || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
5022
self->st_visible_time = self->st_database->db_xn_end_time;
5026
self->st_stat_count++;
5033
* The idea with handler::store_lock() is the following:
5035
* The statement decided which locks we should need for the table
5036
* for updates/deletes/inserts we get WRITE locks, for SELECT... we get
5039
* Before adding the lock into the table lock handler (see thr_lock.c)
5040
* mysqld calls store lock with the requested locks. Store lock can now
5041
* modify a write lock to a read lock (or some other lock), ignore the
5042
* lock (if we don't want to use MySQL table locks at all) or add locks
5043
* for many tables (like we do when we are using a MERGE handler).
5045
* When releasing locks, store_lock() are also called. In this case one
5046
* usually doesn't have to do anything.
5048
* In some exceptional cases MySQL may send a request for a TL_IGNORE;
5049
* This means that we are requesting the same lock as last time and this
5050
* should also be ignored. (This may happen when someone does a flush
5051
* table when we have opened a part of the tables, in which case mysqld
5052
* closes and reopens the tables and tries to get the same locks at last
5053
* time). In the future we will probably try to remove this.
5055
* Called from lock.cc by get_lock_data().
5057
THR_LOCK_DATA **ha_pbxt::store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type)
5060
* TL_READ means concurrent INSERTs are allowed. This is a problem as in this mode
5061
* PBXT is not compatible with MyISAM which allows INSERTs but isolates them from
5062
* current "transaction" (started by LOCK TABLES, ended by UNLOCK TABLES). PBXT
5063
* used to allow INSERTs and made them visible to the locker (on commit).
5064
* While MySQL manual doesn't state anything regarding row visibility limitations
5065
* we choose to convert local locks into normal read locks for better compatibility
5068
if (lock_type == TL_READ)
5069
lock_type = TL_READ_NO_INSERT;
5071
if (lock_type != TL_IGNORE && pb_lock.type == TL_UNLOCK) {
5072
/* Set to TRUE for operations that require a table lock: */
5073
switch (thd_sql_command(thd)) {
5074
case SQLCOM_TRUNCATE:
5076
* The problem is, if I do not do this, then
5077
* TRUNCATE TABLE deadlocks with a normal update of the table!
5080
* external_lock() is called before MySQL actually locks the
5081
* table. In external_lock(), the table is shared locked,
5082
* by indicating that the handler is in use.
5084
* Then later, in delete_all_rows(), a exclusive lock must be
5085
* obtained. If an UPDATE or INSERT has also gained a shared
5086
* lock in the meantime, then TRUNCATE TABLE hangs.
5088
* By setting pb_lock_table we indicate that an exclusive lock
5089
* should be gained in external_lock().
5091
* This is the locking behaviour:
5094
* XT SHARE LOCK (mysql_lock_tables calls external_lock)
5095
* MySQL WRITE LOCK (mysql_lock_tables)
5097
* XT EXCLUSIVE LOCK (delete_all_rows)
5100
* XT SHARED LOCK (mysql_lock_tables calls external_lock)
5101
* MySQL WRITE_ALLOW_WRITE LOCK (mysql_lock_tables)
5103
* If the locking for INSERT is done in the ... phase
5104
* above, then we have a deadlock because
5105
* WRITE_ALLOW_WRITE conflicts with WRITE.
5107
* Making TRUNCATE TABLE take a WRITE_ALLOW_WRITE LOCK, will
5108
* not solve the problem because then 2 TRUNCATE TABLES
5109
* can deadlock due to lock escalation.
5111
* What may work is if MySQL were to lock BEFORE calling
5114
* However, using this method, TRUNCATE TABLE does deadlock
5115
* with other operations such as ALTER TABLE!
5117
* This is handled with a lock timeout. Assuming
5118
* TRUNCATE TABLE will be mixed with DML this is the
5121
pb_lock_table = TRUE;
5124
pb_lock_table = FALSE;
5128
#ifdef PBXT_HANDLER_TRACE
5129
pb_lock.type = lock_type;
5131
/* GOTCHA: Before it was OK to weaken the lock after just checking
5132
* that !thd->in_lock_tables. However, when starting a procedure, MySQL
5133
* simulates a LOCK TABLES statement.
5135
* So we need to be more specific here, and check what the actual statement
5136
* type. Before doing this I got a deadlock (undetected) on the following test.
5137
* However, now we get a failed assertion in ha_rollback_trans():
5138
* TODO: Check this with InnoDB!
5141
* my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
5143
* drop table if exists t3;
5144
* create table t3 (a smallint primary key) engine=pbxt;
5145
* insert into t3 (a) values (40);
5146
* insert into t3 (a) values (50);
5150
* drop function if exists t3_update|
5152
* create function t3_update() returns int
5154
* insert into t3 values (10);
5163
* update t3 set a = 5 where a = 50;
5168
* update t3 set a = 4 where a = 40;
5172
* update t3 set a = 4 where a = 40; // Hangs waiting CONN 2.
5176
* select t3_update(); // Hangs waiting for table lock.
5179
if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && lock_type <= TL_WRITE) &&
5181
!(thd_in_lock_tables(thd) && thd_sql_command(thd) == SQLCOM_LOCK_TABLES) &&
5183
!thd_tablespace_op(thd) &&
5184
thd_sql_command(thd) != SQLCOM_TRUNCATE &&
5186
thd_sql_command(thd) != SQLCOM_OPTIMIZE &&
5188
thd_sql_command(thd) != SQLCOM_CREATE_TABLE) {
5189
lock_type = TL_WRITE_ALLOW_WRITE;
5192
/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
5193
* MySQL would use the lock TL_READ_NO_INSERT on t2, and that
5194
* would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
5195
* to t2. Convert the lock to a normal read lock to allow
5196
* concurrent inserts to t2.
5198
* (This one from InnoDB)
5200
* Stewart: removed SQLCOM_CALL, not sure of implications.
5202
if (lock_type == TL_READ_NO_INSERT
5204
&& (!thd_in_lock_tables(thd)
5205
|| thd_sql_command(thd) == SQLCOM_CALL
5210
lock_type = TL_READ;
5213
XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d\n", pb_share->sh_table_path->ps_path, pb_lock.type, lock_type);
5214
pb_lock.type = lock_type;
5216
#ifdef PBXT_HANDLER_TRACE
5218
XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d (ignore/unlock)\n", pb_share->sh_table_path->ps_path, lock_type, lock_type);
5226
* Used to delete a table. By the time delete_table() has been called all
5227
* opened references to this table will have been closed (and your globally
5228
* shared references released. The variable name will just be the name of
5229
* the table. You will need to remove any files you have created at this point.
5231
* Called from handler.cc by delete_table and ha_create_table(). Only used
5232
* during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
5233
* the storage engine.
5235
int PBXTStorageEngine::doDropTable(Session &, const TableIdentifier& ident)
5237
const std::string& path = ident.getPath();
5238
const char *table_path = path.c_str();
5239
THD *thd = current_thd;
5241
XTThreadPtr self = NULL;
5244
STAT_TRACE(self, *thd_query(thd));
5245
XT_PRINT1(self, "delete_table (%s)\n", table_path);
5247
if (XTSystemTableShare::isSystemTable(table_path))
5248
return delete_system_table(table_path);
5250
if (!(self = ha_set_current_thread(thd, &err)))
5251
return xt_ha_pbxt_to_mysql_error(err);
5253
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5256
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5258
ASSERT(xt_get_self() == self);
5260
/* NOTE: MySQL does not drop a table by first locking it!
5261
* We also cannot use pb_share because the handler used
5262
* to delete a table is not openned correctly.
5264
share = ha_get_share(self, table_path, false);
5265
pushr_(ha_unget_share, share);
5266
ha_aquire_exclusive_use(self, share, NULL);
5267
pushr_(ha_release_exclusive_use, share);
5268
ha_close_open_tables(self, share, NULL);
5270
xt_drop_table(self, (XTPathStrPtr) table_path, thd_sql_command(thd) == SQLCOM_DROP_DB);
5272
freer_(); // ha_release_exclusive_use(share)
5273
freer_(); // ha_unget_share(share)
5276
/* In MySQL if the table does not exist, just log the error and continue. This is
5277
* needed to delete table in the case when CREATE TABLE fails and no PBXT disk
5278
* structures were created.
5279
* Drizzle unlike MySQL iterates over all handlers and tries to delete table. It
5280
* stops after when a handler returns TRUE, so in Drizzle we need to report error.
5283
if (self->t_exception.e_xt_err == XT_ERR_TABLE_NOT_FOUND)
5284
xt_log_and_clear_exception(self);
5292
* If there are no more PBXT tables in the database, we
5293
* "drop the database", which deletes all PBXT resources
5296
/* We now only drop the pbxt system data,
5297
* when the PBXT database is dropped.
5299
#ifndef XT_USE_GLOBAL_DB
5300
if (!xt_table_exists(self->st_database)) {
5301
xt_ha_all_threads_close_database(self, self->st_database);
5302
xt_drop_database(self, self->st_database);
5303
xt_unuse_database(self, self);
5304
xt_ha_close_global_database(self);
5309
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5311
if (err == HA_ERR_NO_SUCH_TABLE)
5318
/* Call pbms_delete_table_with_blobs() last because it cannot be undone. */
5320
PBMSResultRec result;
5322
if (pbms_delete_table_with_blobs(table_path, &result)) {
5323
xt_logf(XT_NT_WARNING, "pbms_delete_table_with_blobs() Error: %s", result.mr_message);
5326
pbms_completed(NULL, true);
5330
std::string path2(ident.getPath());
5331
path2.append(DEFAULT_FILE_EXTENSION);
5332
(void)internal::my_delete(path2.c_str(), MYF(0));
5338
int PBXTStorageEngine::delete_system_table(const char *table_path)
5340
int ha_pbxt::delete_system_table(const char *table_path)
5343
THD *thd = current_thd;
5348
if (!(self = xt_ha_set_current_thread(thd, &e)))
5349
return xt_ha_pbxt_to_mysql_error(e.e_xt_err);
5352
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5354
if (xt_table_exists(self->st_database))
5355
xt_throw_xterr(XT_CONTEXT, XT_ERR_PBXT_TABLE_EXISTS);
5357
XTSystemTableShare::setSystemTableDeleted(table_path);
5359
if (!XTSystemTableShare::doesSystemTableExist()) {
5360
xt_ha_all_threads_close_database(self, self->st_database);
5361
xt_drop_database(self, self->st_database);
5362
xt_unuse_database(self, self);
5363
xt_ha_close_global_database(self);
5367
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5375
* Renames a table from one name to another from alter table call.
5376
* This function can be used to move a table from one database to
5379
int PBXTStorageEngine::doRenameTable(Session&,
5380
const TableIdentifier& from_ident,
5381
const TableIdentifier& to_ident)
5383
const char *from = from_ident.getPath().c_str();
5384
const char *to = to_ident.getPath().c_str();
5386
if (strcmp(from, to) == 0)
5389
THD *thd = current_thd;
5393
XTDatabaseHPtr to_db;
5395
if (XTSystemTableShare::isSystemTable(from))
5396
return rename_system_table(from, to);
5398
if (!(self = ha_set_current_thread(thd, &err)))
5399
return xt_ha_pbxt_to_mysql_error(err);
5401
XT_PRINT2(self, "rename_table (%s -> %s)\n", from, to);
5404
PBMSResultRec result;
5406
err = pbms_rename_table_with_blobs(from, to, &result);
5408
xt_logf(XT_NT_ERROR, "pbms_rename_table_with_blobs() Error: %s", result.mr_message);
5414
xt_ha_open_database_of_table(self, (XTPathStrPtr) to);
5415
to_db = self->st_database;
5417
xt_ha_open_database_of_table(self, (XTPathStrPtr) from);
5419
if (self->st_database != to_db)
5420
xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
5423
* NOTE: MySQL does not lock before calling rename table!
5425
* We cannot use pb_share because rename_table() is
5426
* called without correctly initializing
5429
share = ha_get_share(self, from, true);
5430
pushr_(ha_unget_share, share);
5431
ha_aquire_exclusive_use(self, share, NULL);
5432
pushr_(ha_release_exclusive_use, share);
5433
ha_close_open_tables(self, share, NULL);
5435
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5436
xt_rename_table(self, (XTPathStrPtr) from, (XTPathStrPtr) to);
5438
freer_(); // ha_release_exclusive_use(share)
5439
freer_(); // ha_unget_share(share)
5442
* If there are no more PBXT tables in the database, we
5443
* "drop the database", which deletes all PBXT resources
5446
#ifdef XT_USE_GLOBAL_DB
5447
/* We now only drop the pbxt system data,
5448
* when the PBXT database is dropped.
5450
if (!xt_table_exists(self->st_database)) {
5451
xt_ha_all_threads_close_database(self, self->st_database);
5452
xt_drop_database(self, self->st_database);
5457
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5462
pbms_completed(NULL, (err == 0));
5466
plugin::StorageEngine::renameDefinitionFromPath(to_ident, from_ident);
5472
int PBXTStorageEngine::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5474
int ha_pbxt::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5477
return ER_NOT_SUPPORTED_YET;
5480
uint ha_pbxt::max_supported_key_length() const
5482
return XT_INDEX_MAX_KEY_SIZE;
5485
uint ha_pbxt::max_supported_key_part_length() const
5487
/* There is a little overhead in order to fit! */
5488
return XT_INDEX_MAX_KEY_SIZE-4;
5492
* Called in test_quick_select to determine if indexes should be used.
5494
* As far as I can tell, time is measured in "disk reads". So the
5495
* calculation below means the system reads about 20 rows per read.
5497
* For example a sequence scan uses a read buffer which reads a
5498
* number of rows at once, or a sequential scan can make use
5499
* of the cache (so it need to read less).
5501
double ha_pbxt::scan_time()
5503
double result = (double) (stats.records + stats.deleted) / 38.0 + 2;
5508
* The next method will never be called if you do not implement indexes.
5510
double ha_pbxt::read_time(uint XT_UNUSED(index), uint ranges, ha_rows rows)
5512
double result = rows2double(ranges+rows);
5517
* Given a starting key, and an ending key estimate the number of rows that
5518
* will exist between the two. end_key may be empty which in case determine
5519
* if start_key matches any rows.
5521
* Called from opt_range.cc by check_quick_keys().
5524
ha_rows ha_pbxt::records_in_range(uint inx, key_range *min_key, key_range *max_key)
5527
key_part_map keypart_map;
5532
keypart_map = min_key->keypart_map;
5534
keypart_map = max_key->keypart_map;
5537
ind = (XTIndexPtr) pb_share->sh_dic_keys[inx];
5539
while (keypart_map & 1) {
5541
keypart_map = keypart_map >> 1;
5544
if (segement < 1 || segement > ind->mi_seg_count)
5547
result = ind->mi_seg[segement-1].is_recs_in_range;
5548
#ifdef XT_PRINT_INDEX_OPT
5549
printf("records_in_range %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X --> %d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) inx, segement, ind->mi_seg_count, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) result);
5555
* create() is called to create a table/database. The variable name will have the name
5556
* of the table. When create() is called you do not need to worry about opening
5557
* the table. Also, the FRM file will have already been created so adjusting
5558
* create_info will not do you any good. You can overwrite the frm file at this
5559
* point if you wish to change the table definition, but there are no methods
5560
* currently provided for doing that.
5562
* Called from handle.cc by ha_create_table().
5564
int PBXTStorageEngine::doCreateTable(Session&,
5566
const TableIdentifier& ident,
5567
drizzled::message::Table& proto)
5569
const std::string& path = ident.getPath();
5570
const char *table_path = path.c_str();
5571
THD *thd = current_thd;
5574
XTDDTable *tab_def = NULL;
5575
XTDictionaryRec dic, source_dic;
5577
if ((strcmp(table_path, "./pbxt/location") == 0) ||
5578
(strcmp(table_path, "./pbxt/tables") == 0) ||
5579
(strcmp(table_path, "./pbxt/statistics") == 0))
5582
if ((strcmp(table_path, "./pbxt/location") == 0) || (strcmp(table_path, "./pbxt/statistics") == 0))
5585
memset(&dic, 0, sizeof(dic));
5586
memset(&source_dic, 0, sizeof(source_dic));
5588
if (!(self = ha_set_current_thread(thd, &err)))
5589
return xt_ha_pbxt_to_mysql_error(err);
5590
XT_PRINT2(self, "create (%s) %s\n", table_path, (proto.type() == message::Table::TEMPORARY) ? "temporary" : "");
5591
switch(ident.getType()) {
5592
case message::Table::STANDARD:
5593
dic.dic_table_type = XT_TABLE_TYPE_STANDARD;
5596
case message::Table::TEMPORARY:
5597
dic.dic_table_type = XT_TABLE_TYPE_TEMPORARY;
5600
case message::Table::INTERNAL:
5601
dic.dic_table_type = XT_TABLE_TYPE_INTERNAL;
5604
case message::Table::FUNCTION:
5605
dic.dic_table_type = XT_TABLE_TYPE_FUNCTION;
5609
STAT_TRACE(self, *thd_query(thd));
5612
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5614
for (uint i=0; i<table_arg.getShare()->keys; i++) {
5615
if (table_arg.key_info[i].key_length > XT_INDEX_MAX_KEY_SIZE)
5616
xt_throw_sulxterr(XT_CONTEXT, XT_ERR_KEY_TOO_LARGE, table_arg.key_info[i].name, (u_long) XT_INDEX_MAX_KEY_SIZE);
5619
/* ($) auto_increment_value will be zero if
5620
* AUTO_INCREMENT is not used. Otherwise
5621
* Query was ALTER TABLE ... AUTO_INCREMENT = x; or
5622
* CREATE TABLE ... AUTO_INCREMENT = x;
5624
#ifdef XT_USE_DEFAULT_MEMORY_TABS
5625
if (create_info->storage_media == HA_SM_DEFAULT)
5626
source_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5629
StorageEngine::writeDefinitionFromPath(ident, proto);
5631
Session::QueryString query_string(thd->getQueryString());
5632
tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, const_cast<char *>(query_string->c_str()), myxt_create_table_from_table(self, table_arg.getMutableShare()), &source_dic);
5633
tab_def->checkForeignKeys(self, proto.type() == message::Table::TEMPORARY);
5635
dic.dic_table = tab_def;
5636
dic.dic_my_table = table_arg.getMutableShare();
5637
dic.dic_tab_flags = source_dic.dic_tab_flags;
5638
//if (create_info.storage_media == HA_SM_MEMORY)
5639
// dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5640
if (proto.type() == message::Table::TEMPORARY)
5641
dic.dic_tab_flags |= XT_TF_REAL_TEMP_TABLE;
5642
if (myxt_temp_table_name(table_path))
5643
dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
5645
dic.dic_min_auto_inc = (xtWord8) proto.options().auto_increment_value(); /* ($) */
5646
dic.dic_def_ave_row_size = proto.options().avg_row_length();
5647
myxt_setup_dictionary(self, &dic);
5650
* We used to ignore the value of foreign_key_checks flag and allowed creation
5651
* of tables with "hanging" references. Now we validate FKs if foreign_key_checks != 0
5653
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5656
* Previously I set delete_if_exists=TRUE because
5657
* CREATE TABLE was being used to TRUNCATE.
5658
* This was due to the flag HTON_CAN_RECREATE.
5659
* Now I could set delete_if_exists=FALSE, but
5660
* leaving it TRUE should not cause any problems.
5662
xt_create_table(self, (XTPathStrPtr) table_path, &dic);
5666
tab_def->finalize(self);
5667
dic.dic_table = NULL;
5668
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5672
/* Free the dictionary, but not 'table_arg'! */
5673
dic.dic_my_table = NULL;
5674
myxt_free_dictionary(self, &dic);
5680
int PBXTStorageEngine::doStartTransaction(Session *thd, start_transaction_option_t XT_UNUSED(options))
5683
XTThreadPtr self = ha_set_current_thread(thd, &err);
5685
XT_PRINT0(self, "PBXTStorageEngine::doStartTransaction\n");
5687
/* Transaction mode numbers must be identical! */
5688
(void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
5689
(void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
5691
self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
5692
self->st_ignore_fkeys = (thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5693
self->st_auto_commit = (thd_test_options(thd, (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
5694
self->st_table_trans = FALSE;
5695
self->st_abort_trans = FALSE;
5696
self->st_stat_ended = FALSE;
5697
self->st_stat_trans = FALSE;
5698
xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
5700
if (!self->st_database)
5701
xt_ha_open_database_of_table(self, NULL);
5703
assert(!self->st_xact_data); // Check we're not called twice
5704
if (!xt_xn_begin(self)) {
5705
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, /*pb_ignore_dup_key*/false);
5711
int PBXTStorageEngine::doSetSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5713
return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5716
int PBXTStorageEngine::doRollbackToSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5718
return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5721
int PBXTStorageEngine::doReleaseSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5723
return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5726
int PBXTStorageEngine::doCommit(drizzled::Session* thd, bool real_commit)
5729
XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5731
XT_PRINT1(self, "PBXTStorageEngine::doCommit(real_commit = %s)\n", real_commit ? "true" : "false");
5733
if (real_commit && self) {
5734
if (!xt_xn_commit(self))
5735
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5741
int PBXTStorageEngine::doRollback(drizzled::Session* thd, bool real_commit)
5744
XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5746
XT_PRINT1(self, "PBXTStorageEngine::doRollback(real_commit = %s)\n", real_commit ? "true" : "false");
5748
if (real_commit && self) {
5749
if (!xt_xn_rollback(self))
5750
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5757
void PBXTStorageEngine::doGetTableIdentifiers(drizzled::CachedDirectory &directory,
5758
drizzled::SchemaIdentifier &schema_identifier,
5759
drizzled::TableIdentifiers &set_of_identifiers)
5761
CachedDirectory::Entries entries= directory.getEntries();
5763
for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5764
entry_iter != entries.end(); ++entry_iter)
5766
CachedDirectory::Entry *entry= *entry_iter;
5767
const std::string *filename= &entry->filename;
5769
assert(filename->size());
5771
const char *ext= strchr(filename->c_str(), '.');
5773
if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5774
(filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5778
char uname[NAME_LEN + 1];
5779
uint32_t file_name_len;
5781
file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5782
// TODO: Remove need for memory copy here
5783
uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL
5785
set_of_identifiers.push_back(TableIdentifier(schema_identifier, uname));
5790
void PBXTStorageEngine::doGetTableNames(
5791
CachedDirectory &directory,
5793
std::set<std::string>& set_of_names)
5795
CachedDirectory::Entries entries= directory.getEntries();
5797
for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5798
entry_iter != entries.end(); ++entry_iter)
5800
CachedDirectory::Entry *entry= *entry_iter;
5801
const std::string *filename= &entry->filename;
5803
assert(filename->size());
5805
const char *ext= strchr(filename->c_str(), '.');
5807
if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5808
(filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5812
char uname[NAME_LEN + 1];
5813
uint32_t file_name_len;
5815
file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5816
// TODO: Remove need for memory copy here
5817
uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL
5818
set_of_names.insert(uname);
5824
bool PBXTStorageEngine::doDoesTableExist(Session&, const TableIdentifier &identifier)
5826
std::string proto_path(identifier.getPath());
5827
proto_path.append(DEFAULT_FILE_EXTENSION);
5829
if (access(proto_path.c_str(), F_OK))
5839
char *ha_pbxt::get_foreign_key_create_info()
5841
THD *thd = current_thd;
5844
XTStringBufferRec tab_def = { 0, 0, 0 };
5846
if (!(self = ha_set_current_thread(thd, &err))) {
5847
xt_ha_pbxt_to_mysql_error(err);
5852
if ((err = reopen()))
5856
if (!pb_open_tab->ot_table->tab_dic.dic_table)
5860
pb_open_tab->ot_table->tab_dic.dic_table->loadForeignKeyString(self, &tab_def);
5863
xt_sb_set_size(self, &tab_def, 0);
5864
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5868
return tab_def.sb_cstring;
5871
void ha_pbxt::free_foreign_key_create_info(char* str)
5876
bool ha_pbxt::get_error_message(int XT_UNUSED(error), String *buf)
5878
THD *thd = current_thd;
5882
if (!(self = ha_set_current_thread(thd, &err)))
5885
if (!self->t_exception.e_xt_err)
5888
buf->copy(self->t_exception.e_err_msg, (uint32_t) strlen(self->t_exception.e_err_msg), system_charset_info);
5893
* get info about FKs of the currently open table
5895
* 1. REPLACE; is > 0 if table is referred by a FOREIGN KEY
5896
* 2. INFORMATION_SCHEMA tables: TABLE_CONSTRAINTS, REFERENTIAL_CONSTRAINTS
5897
* Return value: as of 5.1.24 it's ignored
5900
int ha_pbxt::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
5906
if (!(self = ha_set_current_thread(thd, &err))) {
5907
return xt_ha_pbxt_to_mysql_error(err);
5911
XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
5913
if (table_dic == NULL)
5914
xt_throw_errno(XT_CONTEXT, XT_ERR_NO_DICTIONARY);
5916
for (int i = 0, sz = table_dic->dt_fkeys.size(); i < sz; i++) {
5917
FOREIGN_KEY_INFO *fk_info= new // assumed that C++ exceptions are disabled
5918
(thd_alloc(thd, sizeof(FOREIGN_KEY_INFO))) FOREIGN_KEY_INFO;
5920
if (fk_info == NULL)
5921
xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
5923
XTDDForeignKey *fk = table_dic->dt_fkeys.itemAt(i);
5925
const char *path = fk->fk_ref_tab_name->ps_path;
5926
const char *ref_tbl_name = path + strlen(path);
5928
while (ref_tbl_name != path && !XT_IS_DIR_CHAR(*ref_tbl_name))
5931
const char * ref_db_name = ref_tbl_name - 1;
5933
while (ref_db_name != path && !XT_IS_DIR_CHAR(*ref_db_name))
5939
fk_info->forein_id = thd_make_lex_string(thd, 0,
5940
fk->co_name, (uint) strlen(fk->co_name), 1);
5942
fk_info->referenced_db = thd_make_lex_string(thd, 0,
5943
ref_db_name, (uint) (ref_tbl_name - ref_db_name - 1), 1);
5945
fk_info->referenced_table = thd_make_lex_string(thd, 0,
5946
ref_tbl_name, (uint) strlen(ref_tbl_name), 1);
5948
fk_info->referenced_key_name = NULL;
5950
XTIndex *ix = fk->getReferenceIndexPtr();
5951
if (ix == NULL) /* can be NULL if another thread changes referenced table at the moment */
5954
XTDDTable *ref_table = fk->fk_ref_table;
5956
// might be a self-reference
5957
if ((ref_table == NULL)
5958
&& (xt_tab_compare_names(path, table_dic->dt_table->tab_name->ps_path) == 0)) {
5959
ref_table = table_dic;
5962
if (ref_table != NULL) {
5963
const XTList<XTDDIndex>& ix_list = ref_table->dt_indexes;
5964
for (int j = 0, sz2 = ix_list.size(); j < sz2; j++) {
5965
XTDDIndex *ddix = ix_list.itemAt(j);
5966
if (ddix->in_index == ix->mi_index_no) {
5967
const char *ix_name =
5968
ddix->co_name ? ddix->co_name : ddix->co_ind_name;
5969
fk_info->referenced_key_name = thd_make_lex_string(thd, 0,
5970
ix_name, (uint) strlen(ix_name), 1);
5976
action = XTDDForeignKey::actionTypeToString(fk->fk_on_delete);
5977
fk_info->delete_method = thd_make_lex_string(thd, 0,
5978
action, (uint) strlen(action), 1);
5979
action = XTDDForeignKey::actionTypeToString(fk->fk_on_update);
5980
fk_info->update_method = thd_make_lex_string(thd, 0,
5981
action, (uint) strlen(action), 1);
5983
const XTList<XTDDColumnRef>& cols = fk->co_cols;
5984
for (int j = 0, sz2 = cols.size(); j < sz2; j++) {
5985
XTDDColumnRef *col_ref= cols.itemAt(j);
5986
fk_info->foreign_fields.push_back(thd_make_lex_string(thd, 0,
5987
col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
5990
const XTList<XTDDColumnRef>& ref_cols = fk->fk_ref_cols;
5991
for (int j = 0, sz2 = ref_cols.size(); j < sz2; j++) {
5992
XTDDColumnRef *col_ref= ref_cols.itemAt(j);
5993
fk_info->referenced_fields.push_back(thd_make_lex_string(thd, 0,
5994
col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
5997
f_key_list->push_back(fk_info);
6001
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
6008
uint ha_pbxt::referenced_by_foreign_key()
6010
XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
6014
/* Check the list of referencing tables: */
6015
return table_dic->dt_trefs ? 1 : 0;
6020
struct st_mysql_sys_var
6022
MYSQL_PLUGIN_VAR_HEADER;
6025
#if MYSQL_VERSION_ID < 60000
6026
#if MYSQL_VERSION_ID >= 50124
6027
#define USE_CONST_SAVE
6030
#if MYSQL_VERSION_ID >= 60005
6031
#define USE_CONST_SAVE
6037
#define st_mysql_sys_var drizzled::drizzle_sys_var
6041
#ifdef USE_CONST_SAVE
6042
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, const void *save)
6044
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, void *save)
6047
xtInt8 record_cache_size;
6049
char *old= *(char **) tgt;
6050
*(char **)tgt= *(char **) save;
6051
if (var->flags & PLUGIN_VAR_MEMALLOC)
6053
*(char **)tgt= my_strdup(*(char **) save, MYF(0));
6054
my_free(old, MYF(0));
6056
record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
6057
xt_tc_set_cache_size((size_t) record_cache_size);
6061
sprintf(buffer, "pbxt_record_cache_size=%llu\n", (u_llong) record_cache_size);
6062
xt_logf(XT_NT_INFO, buffer);
6066
struct st_mysql_storage_engine pbxt_storage_engine = {
6067
MYSQL_HANDLERTON_INTERFACE_VERSION
6069
static st_mysql_information_schema pbxt_statitics = {
6070
MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
6073
#if MYSQL_VERSION_ID >= 50118
6074
static MYSQL_SYSVAR_STR(index_cache_size, pbxt_index_cache_size,
6075
PLUGIN_VAR_READONLY,
6076
"The amount of memory allocated to the index cache, used only to cache index data.",
6079
static MYSQL_SYSVAR_STR(record_cache_size, pbxt_record_cache_size,
6080
PLUGIN_VAR_READONLY, // PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
6081
"The amount of memory allocated to the record cache used to cache table data.",
6082
NULL, pbxt_record_cache_size_func, NULL);
6084
static MYSQL_SYSVAR_STR(log_cache_size, pbxt_log_cache_size,
6085
PLUGIN_VAR_READONLY,
6086
"The amount of memory allocated to the transaction log cache used to cache transaction log data.",
6089
static MYSQL_SYSVAR_STR(log_file_threshold, pbxt_log_file_threshold,
6090
PLUGIN_VAR_READONLY,
6091
"The size of a transaction log before rollover, and a new log is created.",
6094
static MYSQL_SYSVAR_STR(transaction_buffer_size, pbxt_transaction_buffer_size,
6095
PLUGIN_VAR_READONLY,
6096
"The size of the global transaction log buffer (the engine allocates 2 buffers of this size).",
6099
static MYSQL_SYSVAR_STR(log_buffer_size, pbxt_log_buffer_size,
6100
PLUGIN_VAR_READONLY,
6101
"The size of the buffer used to cache data from transaction and data logs during sequential scans, or when writing a data log.",
6104
static MYSQL_SYSVAR_STR(checkpoint_frequency, pbxt_checkpoint_frequency,
6105
PLUGIN_VAR_READONLY,
6106
"The size of the transaction data buffer which is allocate by each thread.",
6109
static MYSQL_SYSVAR_STR(data_log_threshold, pbxt_data_log_threshold,
6110
PLUGIN_VAR_READONLY,
6111
"The maximum size of a data log file.",
6114
static MYSQL_SYSVAR_STR(data_file_grow_size, pbxt_data_file_grow_size,
6115
PLUGIN_VAR_READONLY,
6116
"The amount by which the handle data files (.xtd) grow.",
6119
static MYSQL_SYSVAR_STR(row_file_grow_size, pbxt_row_file_grow_size,
6120
PLUGIN_VAR_READONLY,
6121
"The amount by which the row pointer files (.xtr) grow.",
6124
static MYSQL_SYSVAR_STR(record_write_threshold, pbxt_record_write_threshold,
6125
PLUGIN_VAR_READONLY,
6126
"The amount data written to the record files (.xtd and .xtr) before the changes are applied to the database.",
6129
static MYSQL_SYSVAR_INT(garbage_threshold, xt_db_garbage_threshold,
6130
PLUGIN_VAR_OPCMDARG,
6131
"The percentage of garbage in a repository file before it is compacted.",
6132
NULL, NULL, XT_DL_DEFAULT_GARBAGE_LEVEL, 0, 100, 1);
6134
static MYSQL_SYSVAR_INT(log_file_count, xt_db_log_file_count,
6135
PLUGIN_VAR_OPCMDARG,
6136
"The minimum number of transaction logs used.",
6137
NULL, NULL, XT_DL_DEFAULT_XLOG_COUNT, 1, 20000, 1);
6139
static MYSQL_SYSVAR_INT(auto_increment_mode, xt_db_auto_increment_mode,
6140
PLUGIN_VAR_OPCMDARG,
6141
"The auto-increment mode, 0 = MySQL standard (default), 1 = previous ID's never reused.",
6142
NULL, NULL, XT_AUTO_INCREMENT_DEF, 0, 1, 1);
6145
static MYSQL_SYSVAR_INT(offline_log_function, xt_db_offline_log_function,
6146
PLUGIN_VAR_OPCMDARG,
6147
"Determines what happens to transaction logs when the are moved offline, 0 = recycle logs (default), 1 = delete logs (default on Mac OS X), 2 = keep logs.",
6148
NULL, NULL, XT_OFFLINE_LOG_FUNCTION_DEF, 0, 2, 1);
6151
static MYSQL_SYSVAR_INT(sweeper_priority, xt_db_sweeper_priority,
6152
PLUGIN_VAR_OPCMDARG,
6153
"Determines the priority of the background sweeper process, 0 = low (default), 1 = normal (same as user threads), 2 = high.",
6154
NULL, NULL, XT_PRIORITY_LOW, XT_PRIORITY_LOW, XT_PRIORITY_HIGH, 1);
6157
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6158
PLUGIN_VAR_OPCMDARG,
6159
"Enable PBXT support for the XA two-phase commit, default is enabled",
6162
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6163
PLUGIN_VAR_OPCMDARG,
6164
"Enable PBXT support for the XA two-phase commit, default is disabled (due to assertion failure in MySQL)",
6165
/* The problem is, in MySQL an assertion fails in debug mode:
6166
* Assertion failed: (total_ha_2pc == (ulong) opt_bin_log+1), function ha_recover, file handler.cc, line 1557.
6171
static MYSQL_SYSVAR_INT(index_dirty_threshold, xt_db_index_dirty_threshold,
6172
PLUGIN_VAR_OPCMDARG,
6173
"The percentage of the index cache that must be dirty before the index cache is flushed.",
6174
NULL, NULL, XT_DL_DEFAULT_INDEX_DIRTY_LEVEL, 0, 100, 1);
6176
static MYSQL_SYSVAR_INT(flush_log_at_trx_commit, xt_db_flush_log_at_trx_commit,
6177
PLUGIN_VAR_OPCMDARG,
6178
"Determines whether the transaction log is written and/or flushed when a transaction is committed (no matter what the setting the log is written and flushed once per second), 0 = no write & no flush, 1 = write & flush (default), 2 = write & no flush.",
6179
NULL, NULL, 1, 0, 2, 1);
6181
static struct st_mysql_sys_var* pbxt_system_variables[] = {
6182
MYSQL_SYSVAR(index_cache_size),
6183
MYSQL_SYSVAR(record_cache_size),
6184
MYSQL_SYSVAR(log_cache_size),
6185
MYSQL_SYSVAR(log_file_threshold),
6186
MYSQL_SYSVAR(transaction_buffer_size),
6187
MYSQL_SYSVAR(log_buffer_size),
6188
MYSQL_SYSVAR(checkpoint_frequency),
6189
MYSQL_SYSVAR(data_log_threshold),
6190
MYSQL_SYSVAR(data_file_grow_size),
6191
MYSQL_SYSVAR(row_file_grow_size),
6192
MYSQL_SYSVAR(record_write_threshold),
6193
MYSQL_SYSVAR(garbage_threshold),
6194
MYSQL_SYSVAR(log_file_count),
6195
MYSQL_SYSVAR(auto_increment_mode),
6196
MYSQL_SYSVAR(offline_log_function),
6197
MYSQL_SYSVAR(sweeper_priority),
6198
MYSQL_SYSVAR(support_xa),
6199
MYSQL_SYSVAR(index_dirty_threshold),
6200
MYSQL_SYSVAR(flush_log_at_trx_commit),
6207
DRIZZLE_DECLARE_PLUGIN
6212
"Paul McCullagh, PrimeBase Technologies GmbH",
6213
"High performance, multi-versioning transactional engine",
6215
pbxt_init, /* Plugin Init */
6217
NULL /* config options */
6219
DRIZZLE_DECLARE_PLUGIN_END;
6221
mysql_declare_plugin(pbxt)
6223
MYSQL_STORAGE_ENGINE_PLUGIN,
6224
&pbxt_storage_engine,
6226
"Paul McCullagh, PrimeBase Technologies GmbH",
6227
"High performance, multi-versioning transactional engine",
6229
pbxt_init, /* Plugin Init */
6230
pbxt_end, /* Plugin Deinit */
6232
NULL, /* status variables */
6233
#if MYSQL_VERSION_ID >= 50118
6234
pbxt_system_variables, /* depends */
6238
NULL /* config options */
6240
MYSQL_INFORMATION_SCHEMA_PLUGIN,
6243
"Paul McCullagh, PrimeBase Technologies GmbH",
6244
"PBXT internal system statitics",
6246
pbxt_init_statistics, /* plugin init */
6247
pbxt_exit_statistics, /* plugin deinit */
6249
NULL, /* status variables */
6251
NULL /* config options */
6253
mysql_declare_plugin_end;
6256
#if defined(XT_WIN) && defined(XT_COREDUMP)
6259
* WINDOWS CORE DUMP SUPPORT
6261
* MySQL supports core dumping on Windows with --core-file command line option.
6262
* However it creates dumps with the MiniDumpNormal option which saves only stack traces.
6264
* We instead (or in addition) create dumps with MiniDumpWithoutOptionalData option
6265
* which saves all available information. To enable core dumping enable XT_COREDUMP
6267
* In addition, pbxt_crash_debug must be set to TRUE which is the case if XT_CRASH_DEBUG
6269
* This switch is also controlled by creating a file called "no-debug" or "crash-debug"
6270
* in the pbxt database directory.
6273
typedef enum _MINIDUMP_TYPE {
6274
MiniDumpNormal = 0x0000,
6275
MiniDumpWithDataSegs = 0x0001,
6276
MiniDumpWithFullMemory = 0x0002,
6277
MiniDumpWithHandleData = 0x0004,
6278
MiniDumpFilterMemory = 0x0008,
6279
MiniDumpScanMemory = 0x0010,
6280
MiniDumpWithUnloadedModules = 0x0020,
6281
MiniDumpWithIndirectlyReferencedMemory = 0x0040,
6282
MiniDumpFilterModulePaths = 0x0080,
6283
MiniDumpWithProcessThreadData = 0x0100,
6284
MiniDumpWithPrivateReadWriteMemory = 0x0200,
6287
typedef struct _MINIDUMP_EXCEPTION_INFORMATION {
6289
PEXCEPTION_POINTERS ExceptionPointers;
6290
BOOL ClientPointers;
6291
} MINIDUMP_EXCEPTION_INFORMATION, *PMINIDUMP_EXCEPTION_INFORMATION;
6293
typedef BOOL (WINAPI *MINIDUMPWRITEDUMP)(
6297
MINIDUMP_TYPE DumpType,
6298
void *ExceptionParam,
6299
void *UserStreamParam,
6303
char base_path[_MAX_PATH] = {0};
6304
char dump_path[_MAX_PATH] = {0};
6306
void core_dump(struct _EXCEPTION_POINTERS *pExceptionInfo)
6308
SECURITY_ATTRIBUTES sa = { sizeof(SECURITY_ATTRIBUTES), 0, 0 };
6310
HMODULE hDll = NULL;
6312
MINIDUMPWRITEDUMP pDump;
6313
char *end_ptr = base_path;
6315
MINIDUMP_EXCEPTION_INFORMATION ExInfo, *ExInfoPtr = NULL;
6317
if (pExceptionInfo) {
6318
ExInfo.ThreadId = GetCurrentThreadId();
6319
ExInfo.ExceptionPointers = pExceptionInfo;
6320
ExInfo.ClientPointers = NULL;
6321
ExInfoPtr = &ExInfo;
6324
end_ptr = base_path + strlen(base_path);
6326
strcat(base_path, "DBGHELP.DLL" );
6327
hDll = LoadLibrary(base_path);
6331
err = HRESULT_CODE(GetLastError());
6332
hDll = LoadLibrary( "DBGHELP.DLL" );
6334
err = HRESULT_CODE(GetLastError());
6339
pDump = (MINIDUMPWRITEDUMP)GetProcAddress( hDll, "MiniDumpWriteDump" );
6342
err = HRESULT_CODE(GetLastError());
6346
for (i = 1; i < INT_MAX; i++) {
6347
sprintf(dump_path, "%sPBXTCore%08d.dmp", base_path, i);
6348
hFile = CreateFile( dump_path, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_NEW,
6349
FILE_ATTRIBUTE_NORMAL, NULL );
6351
if ( hFile != INVALID_HANDLE_VALUE )
6354
if (HRESULT_CODE(GetLastError()) == ERROR_FILE_EXISTS )
6361
BOOL bOK = pDump( GetCurrentProcess(), GetCurrentProcessId(), hFile,
6362
MiniDumpWithPrivateReadWriteMemory, ExInfoPtr, NULL, NULL );
6367
LONG crash_filter( struct _EXCEPTION_POINTERS *pExceptionInfo )
6369
core_dump(pExceptionInfo);
6370
return EXCEPTION_EXECUTE_HANDLER;
6373
void register_crash_filter()
6375
SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER) crash_filter );
6378
#endif // XT_WIN && XT_COREDUMP