1
/* Copyright (c) 2005 PrimeBase Technologies GmbH
3
* Derived from ha_example.h
4
* Copyright (C) 2003 MySQL AB
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
* the Free Software Foundation; either version 2 of the License, or
11
* (at your option) any later version.
13
* This program is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
* GNU General Public License for more details.
18
* You should have received a copy of the GNU General Public License
19
* along with this program; if not, write to the Free Software
20
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
* 2005-11-10 Paul McCullagh
26
#ifdef USE_PRAGMA_IMPLEMENTATION
27
#pragma implementation // gcc: Class implementation
30
#include "xt_config.h"
42
#include <drizzled/common.h>
43
#include <drizzled/plugin.h>
44
#include <drizzled/field.h>
45
#include <drizzled/session.h>
46
#include <drizzled/data_home.h>
47
#include <drizzled/error.h>
48
#include <drizzled/table.h>
49
#include <drizzled/field/timestamp.h>
50
#include <drizzled/session.h>
52
#define my_strdup(a,b) strdup(a)
54
using namespace drizzled;
55
using namespace drizzled::plugin;
57
#define DEFAULT_FILE_EXTENSION ".dfe"
60
#include "mysql_priv.h"
61
#include <mysql/plugin.h>
67
#include "strutil_xt.h"
68
#include "database_xt.h"
73
#include "datadic_xt.h"
75
#include "pbms_enabled.h"
77
#include "tabcache_xt.h"
78
#include "systab_xt.h"
79
#include "xaction_xt.h"
80
#include "backup_xt.h"
83
//#define XT_USE_SYS_PAR_DEBUG_SIZES
84
#define PBXT_HANDLER_TRACE
85
//#define PBXT_TRACE_RETURN
86
//#define XT_PRINT_INDEX_OPT
87
//#define XT_SHOW_DUMPS_TRACE
88
//#define XT_UNIT_TEST
89
//#define LOAD_TABLE_ON_OPEN
90
//#define CHECK_TABLE_LOADS
92
/* Enable to trace the statements executed by the engine: */
93
//#define TRACE_STATEMENTS
95
/* Enable to print the trace to the stdout, instead of
98
//#define PRINT_STATEMENTS
102
static handler *pbxt_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root);
103
static int pbxt_init(void *p);
104
static int pbxt_end(void *p);
105
static int pbxt_panic(handlerton *hton, enum ha_panic_function flag);
106
static void pbxt_drop_database(handlerton *hton, char *path);
107
static int pbxt_close_connection(handlerton *hton, THD* thd);
108
static int pbxt_commit(handlerton *hton, THD *thd, bool all);
109
static int pbxt_rollback(handlerton *hton, THD *thd, bool all);
110
static int pbxt_prepare(handlerton *hton, THD *thd, bool all);
111
static int pbxt_recover(handlerton *hton, XID *xid_list, uint len);
112
static int pbxt_commit_by_xid(handlerton *hton, XID *xid);
113
static int pbxt_rollback_by_xid(handlerton *hton, XID *xid);
114
static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd);
116
static void ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
117
static void ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share);
118
static void ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
120
#ifdef TRACE_STATEMENTS
122
#ifdef PRINT_STATEMENTS
123
#define STAT_TRACE(y, x) printf("%s: %s\n", y ? y->t_name : "-unknown-", x)
125
#define STAT_TRACE(y, x) xt_ttraceq(y, x)
130
#define STAT_TRACE(y, x)
134
#ifdef PBXT_HANDLER_TRACE
135
#define PBXT_ALLOW_PRINTING
137
#define XT_TRACE_CALL() ha_trace_function(__FUNC__, NULL)
138
#define XT_TRACE_METHOD() ha_trace_function(__FUNC__, pb_share->sh_table_path->ps_path)
140
#ifdef PBXT_TRACE_RETURN
141
#define XT_RETURN(x) do { printf("%d\n", (int) (x)); return (x); } while (0)
142
#define XT_RETURN_VOID do { printf("out\n"); return; } while (0)
144
#define XT_RETURN(x) return (x)
145
#define XT_RETURN_VOID return
150
#define XT_TRACE_CALL()
151
#define XT_TRACE_METHOD()
152
#define XT_RETURN(x) return (x)
153
#define XT_RETURN_VOID return
157
#ifdef PBXT_ALLOW_PRINTING
158
#define XT_PRINT0(y, x) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-"); } while (0)
159
#define XT_PRINT1(y, x, a) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a); } while (0)
160
#define XT_PRINT2(y, x, a, b) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b); } while (0)
161
#define XT_PRINT3(y, x, a, b, c) do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b, c); } while (0)
163
#define XT_PRINT0(y, x)
164
#define XT_PRINT1(y, x, a)
165
#define XT_PRINT2(y, x, a, b)
166
#define XT_PRINT3(y, x, a, b, c)
172
handlerton *pbxt_hton;
173
bool pbxt_inited = false; // Variable for checking the init state of hash
174
xtBool pbxt_ignore_case = true;
175
const char *pbxt_extensions[]= { ".xtr", ".xtd", ".xtl", ".xti", ".xt", "", NULL };
176
#ifdef XT_CRASH_DEBUG
177
xtBool pbxt_crash_debug = TRUE;
179
xtBool pbxt_crash_debug = FALSE;
183
/* Variables for pbxt share methods */
184
static xt_mutex_type pbxt_database_mutex; // Prevent a database from being opened while it is being dropped
185
static XTHashTabPtr pbxt_share_tables; // Hash used to track open tables
186
static char *pbxt_index_cache_size;
187
static char *pbxt_record_cache_size;
188
static char *pbxt_log_cache_size;
189
static char *pbxt_log_file_threshold;
190
static char *pbxt_transaction_buffer_size;
191
static char *pbxt_log_buffer_size;
192
static char *pbxt_checkpoint_frequency;
193
static char *pbxt_data_log_threshold;
194
static char *pbxt_data_file_grow_size;
195
static char *pbxt_row_file_grow_size;
196
static char *pbxt_record_write_threshold;
197
static my_bool pbxt_support_xa;
200
// drizzle complains it's not used
201
static XTXactEnumXARec pbxt_xa_enum;
205
#define XT_SHARE_LOCK_WAIT 5000
207
#define XT_SHARE_LOCK_WAIT 500
211
* Lock timeout in 1/1000ths of a second
213
#define XT_SHARE_LOCK_TIMEOUT 30000
216
* -----------------------------------------------------------------------
221
//#define XT_FOR_TEAMDRIVE
223
typedef struct HAVarParams {
224
const char *vp_var; /* Variable name. */
225
const char *vp_def; /* Default value. */
226
const char *vp_min; /* Minimum allowed value. */
227
const char *vp_max4; /* Maximum allowed value on 32-bit processors. */
228
const char *vp_max8; /* Maximum allowed value on 64-bit processors. */
229
} HAVarParamsRec, *HAVarParamsPtr;
231
#ifdef XT_USE_SYS_PAR_DEBUG_SIZES
232
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
233
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
234
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
235
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
236
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
237
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
238
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
239
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "400K", "400K", "2GB", "256TB" };
240
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
241
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
242
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
243
#define XT_DL_DEFAULT_XLOG_COUNT 3
244
#define XT_DL_DEFAULT_GARBAGE_LEVEL 10
246
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
247
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
248
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
249
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
250
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
251
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
252
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
253
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "64MB", "1MB", "2GB", "256TB" };
254
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
255
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
256
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
257
#define XT_DL_DEFAULT_XLOG_COUNT 3
258
#define XT_DL_DEFAULT_GARBAGE_LEVEL 50
261
#define XT_AUTO_INCREMENT_DEF 0
262
#define XT_DL_DEFAULT_INDEX_DIRTY_LEVEL 80
266
/* For debugging on the Mac, we check the re-use logs: */
267
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_RECYCLE_LOGS
269
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_DELETE_LOGS
272
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_RECYCLE_LOGS
275
/* TeamDrive, uses special auto-increment, and
276
* we keep the logs for the moment:
278
#ifdef XT_FOR_TEAMDRIVE
279
#undef XT_OFFLINE_LOG_FUNCTION_DEF
280
#define XT_OFFLINE_LOG_FUNCTION_DEF XT_KEEP_LOGS
281
//#undef XT_AUTO_INCREMENT_DEF
282
//#define XT_AUTO_INCREMENT_DEF 1
285
#ifdef PBXT_HANDLER_TRACE
286
static void ha_trace_function(const char *function, char *table)
288
char func_buf[50], *ptr;
289
XTThreadPtr thread = xt_get_self();
291
if ((ptr = const_cast<char *>(strchr(function, '(')))) {
293
while (ptr > function) {
294
if (!(isalnum(*ptr) || *ptr == '_'))
299
xt_strcpy(50, func_buf, ptr);
300
if ((ptr = strchr(func_buf, '(')))
304
xt_strcpy(50, func_buf, function);
306
printf("%s %s (%s)\n", thread ? thread->t_name : "-unknown-", func_buf, table);
308
printf("%s %s\n", thread ? thread->t_name : "-unknown-", func_buf);
313
* -----------------------------------------------------------------------
318
static xtBool ha_hash_comp(void *key, void *data)
320
XTSharePtr share = (XTSharePtr) data;
322
return strcmp((char *) key, share->sh_table_path->ps_path) == 0;
325
static xtHashValue ha_hash(xtBool is_key, void *key_data)
327
XTSharePtr share = (XTSharePtr) key_data;
330
return xt_ht_hash((char *) key_data);
331
return xt_ht_hash(share->sh_table_path->ps_path);
334
static xtBool ha_hash_comp_ci(void *key, void *data)
336
XTSharePtr share = (XTSharePtr) data;
338
return strcasecmp((char *) key, share->sh_table_path->ps_path) == 0;
341
static xtHashValue ha_hash_ci(xtBool is_key, void *key_data)
343
XTSharePtr share = (XTSharePtr) key_data;
346
return xt_ht_casehash((char *) key_data);
347
return xt_ht_casehash(share->sh_table_path->ps_path);
350
static void ha_open_share(XTThreadPtr self, XTShareRec *share)
352
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
353
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
355
if (!share->sh_table) {
356
share->sh_table = xt_use_table(self, share->sh_table_path, FALSE, FALSE);
357
share->sh_dic_key_count = share->sh_table->tab_dic.dic_key_count;
358
share->sh_dic_keys = share->sh_table->tab_dic.dic_keys;
359
share->sh_recalc_selectivity = FALSE;
362
freer_(); // xt_ht_unlock(pbxt_share_tables)
365
static void ha_close_share(XTThreadPtr self, XTShareRec *share)
369
if ((tab = share->sh_table)) {
370
/* Save this, in case the share is re-opened. */
371
share->sh_min_auto_inc = tab->tab_auto_inc;
373
xt_heap_release(self, tab);
374
share->sh_table = NULL;
377
/* This are only references: */
378
share->sh_dic_key_count = 0;
379
share->sh_dic_keys = NULL;
382
static void ha_cleanup_share(XTThreadPtr self, XTSharePtr share)
384
ha_close_share(self, share);
386
if (share->sh_table_path) {
387
xt_free(self, share->sh_table_path);
388
share->sh_table_path = NULL;
391
if (share->sh_ex_cond) {
392
thr_lock_delete(&share->sh_lock);
393
xt_delete_cond(self, (xt_cond_type *) share->sh_ex_cond);
394
share->sh_ex_cond = NULL;
397
if (share->sh_ex_mutex) {
398
xt_delete_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
399
share->sh_ex_mutex = NULL;
402
xt_free(self, share);
405
static void ha_hash_free(XTThreadPtr self, void *data)
407
XTSharePtr share = (XTSharePtr) data;
409
ha_cleanup_share(self, share);
413
* This structure contains information that is common to all handles.
414
* (i.e. it is table specific).
416
static XTSharePtr ha_get_share(XTThreadPtr self, const char *table_path, bool open_table)
421
xt_ht_lock(self, pbxt_share_tables);
422
pushr_(xt_ht_unlock, pbxt_share_tables);
424
// Check if the table exists...
425
if (!(share = (XTSharePtr) xt_ht_get(self, pbxt_share_tables, (void *) table_path))) {
426
share = (XTSharePtr) xt_calloc(self, sizeof(XTShareRec));
427
pushr_(ha_cleanup_share, share);
429
share->sh_ex_mutex = (xt_mutex_type *) xt_new_mutex(self);
430
share->sh_ex_cond = (xt_cond_type *) xt_new_cond(self);
432
thr_lock_init(&share->sh_lock);
434
share->sh_use_count = 0;
435
share->sh_table_path = (XTPathStrPtr) xt_dup_string(self, table_path);
438
ha_open_share(self, share);
440
popr_(); // Discard ha_cleanup_share(share);
442
xt_ht_put(self, pbxt_share_tables, share);
445
share->sh_use_count++;
446
freer_(); // xt_ht_unlock(pbxt_share_tables)
452
* Free shared information.
454
static void ha_unget_share(XTThreadPtr self, XTSharePtr share)
456
xt_ht_lock(self, pbxt_share_tables);
457
pushr_(xt_ht_unlock, pbxt_share_tables);
459
if (!--share->sh_use_count)
460
xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
462
freer_(); // xt_ht_unlock(pbxt_share_tables)
465
static xtBool ha_unget_share_removed(XTThreadPtr self, XTSharePtr share)
467
xtBool removed = FALSE;
469
xt_ht_lock(self, pbxt_share_tables);
470
pushr_(xt_ht_unlock, pbxt_share_tables);
472
if (!--share->sh_use_count) {
474
xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
477
freer_(); // xt_ht_unlock(pbxt_share_tables)
481
static inline void thd_init_xact(THD *thd, XTThreadPtr self, bool set_table_trans)
483
self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
484
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
485
self->st_auto_commit = (thd_test_options(thd,(OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
486
if (set_table_trans) {
488
self->st_table_trans = FALSE;
490
self->st_table_trans = thd_sql_command(thd) == SQLCOM_LOCK_TABLES;
493
self->st_abort_trans = FALSE;
494
self->st_stat_ended = FALSE;
495
self->st_stat_trans = FALSE;
496
XT_PRINT0(self, "xt_xn_begin\n");
497
xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
501
* -----------------------------------------------------------------------
506
xtPublic void xt_ha_unlock_table(XTThreadPtr self, void *share)
508
ha_release_exclusive_use(self, (XTSharePtr) share);
509
ha_unget_share(self, (XTSharePtr) share);
512
xtPublic void xt_ha_close_global_database(XTThreadPtr self)
515
xt_heap_release(self, pbxt_database);
516
pbxt_database = NULL;
521
* Open a PBXT database given the path of a table.
522
* This function also returns the name of the table.
524
* We use the pbxt_database_mutex to lock this
525
* operation to make sure it does not occur while
526
* some other thread is doing a "closeall".
528
xtPublic void xt_ha_open_database_of_table(XTThreadPtr self, XTPathStrPtr XT_UNUSED(table_path))
530
#ifdef XT_USE_GLOBAL_DB
531
if (!self->st_database) {
532
if (!pbxt_database) {
533
xt_open_database(self, mysql_real_data_home, TRUE);
535
* This can be done at the same time as the recovery thread,
536
* strictly speaking I need a lock.
538
if (!pbxt_database) {
539
pbxt_database = self->st_database;
540
xt_heap_reference(self, pbxt_database);
544
xt_use_database(self, pbxt_database, XT_FOR_USER);
547
char db_path[PATH_MAX];
549
xt_strcpy(PATH_MAX, db_path, (char *) table_path);
550
xt_remove_last_name_of_path(db_path);
551
xt_remove_dir_char(db_path);
553
if (self->st_database && xt_tab_compare_paths(self->st_database->db_name, xt_last_name_of_path(db_path)) == 0)
554
/* This thread already has this database open! */
557
/* Auto commit before changing the database: */
558
if (self->st_xact_data) {
559
/* PMC - This probably indicates something strange is happening:
561
* This sequence generates this error:
565
* create temporary table t3 (id int)|
567
* create function f10() returns int
569
* drop temporary table if exists t3;
570
* create temporary table t3 (id int) engine=myisam;
571
* insert into t3 select id from t4;
572
* return (select count(*) from t3);
577
* An error is generated because the same thread is used
578
* to open table t4 (at the start of the functions), and
579
* then to drop table t3. To drop t3 we need to
580
* switch the database, so we land up here!
582
xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
584
if (!xt_xn_commit(self))
589
xt_lock_mutex(self, &pbxt_database_mutex);
590
pushr_(xt_unlock_mutex, &pbxt_database_mutex);
591
xt_open_database(self, db_path, FALSE);
592
freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
596
xtPublic XTThreadPtr xt_ha_set_current_thread(THD *thd, XTExceptionPtr e)
599
static int ha_thread_count = 0, ha_id;
602
if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
604
if (!(self = (XTThreadPtr) *thd_ha_data(thd, pbxt_hton))) {
606
// const Security_context *sctx;
610
ha_id = ++ha_thread_count;
611
sprintf(ha_id_str, "_%d", ha_id);
612
xt_strcpy(120,name,"user"); // TODO: Fix this hack
614
sctx = &thd->main_security_ctx;
617
xt_strcpy(120, name, sctx->user);
618
xt_strcat(120, name, "@");
623
xt_strcat(120, name, sctx->host);
625
xt_strcat(120, name, sctx->ip);
626
else if (thd->proc_info)
627
xt_strcat(120, name, (char *) thd->proc_info);
629
xt_strcat(120, name, "system");
631
xt_strcat(120, name, ha_id_str);
632
if (!(self = xt_create_thread(name, FALSE, TRUE, e)))
635
self->st_xact_mode = XT_XACT_REPEATABLE_READ;
637
*thd->getEngineData(pbxt_hton) = (void *) self;
639
*thd_ha_data(thd, pbxt_hton) = (void *) self;
645
xtPublic void xt_ha_close_connection(THD* thd)
650
if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
651
*thd->getEngineData(pbxt_hton) = NULL;
653
if ((self = (XTThreadPtr) *thd_ha_data(thd, pbxt_hton))) {
654
*thd_ha_data(thd, pbxt_hton) = NULL;
656
xt_free_thread(self);
660
xtPublic XTThreadPtr xt_ha_thd_to_self(THD *thd)
663
return (XTThreadPtr) *thd->getEngineData(pbxt_hton);
665
return (XTThreadPtr) *thd_ha_data(thd, pbxt_hton);
669
/* The first bit is 1. */
670
static u_int ha_get_max_bit(MX_BITMAP *map)
673
uint32_t cnt = map->numOfBitsInMap();
674
uint32_t max_bit = 0;
676
for (uint32_t i = 0; i < cnt; i++)
677
if (map->isBitSet(i))
682
my_bitmap_map *data_ptr = map->bitmap;
683
my_bitmap_map *end_ptr = map->last_word_ptr;
684
u_int cnt = map->n_bits;
687
for (; end_ptr >= data_ptr; end_ptr--) {
688
if ((b = *end_ptr)) {
691
if (end_ptr == map->getLastWordPtr() && map->getLastWordMask())
692
mask = map->getLastWordMask() >> 1;
695
while (!(b & mask)) {
697
/* Should not happen, but if it does, we hang! */
699
return map->numOfBitsInMap();
704
if (end_ptr == map->getLastWordPtr())
705
cnt = ((cnt-1) / 32) * 32;
714
* -----------------------------------------------------------------------
720
* In PBXT, as in MySQL: thread == connection.
722
* So we simply attach a PBXT thread to a MySQL thread.
724
static XTThreadPtr ha_set_current_thread(THD *thd, int *err)
729
if (!(self = xt_ha_set_current_thread(thd, &e))) {
730
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
737
xtPublic int xt_ha_pbxt_to_mysql_error(int xt_err)
742
case XT_ERR_DUPLICATE_KEY:
743
return HA_ERR_FOUND_DUPP_KEY;
744
case XT_ERR_DEADLOCK:
745
return HA_ERR_LOCK_DEADLOCK;
746
case XT_ERR_RECORD_CHANGED:
747
/* If we generate HA_ERR_RECORD_CHANGED instead of HA_ERR_LOCK_WAIT_TIMEOUT
748
* then sysbench does not work because it does not handle this error.
750
//return HA_ERR_LOCK_WAIT_TIMEOUT; // but HA_ERR_RECORD_CHANGED is the correct error for a optimistic lock failure.
751
return HA_ERR_RECORD_CHANGED;
752
case XT_ERR_LOCK_TIMEOUT:
753
return HA_ERR_LOCK_WAIT_TIMEOUT;
754
case XT_ERR_TABLE_IN_USE:
755
return HA_ERR_WRONG_COMMAND;
756
case XT_ERR_TABLE_NOT_FOUND:
757
return HA_ERR_NO_SUCH_TABLE;
758
case XT_ERR_TABLE_EXISTS:
759
return HA_ERR_TABLE_EXIST;
760
case XT_ERR_CANNOT_CHANGE_DB:
761
return ER_TRG_IN_WRONG_SCHEMA;
762
case XT_ERR_COLUMN_NOT_FOUND:
763
return HA_ERR_CANNOT_ADD_FOREIGN;
764
case XT_ERR_NO_REFERENCED_ROW:
765
case XT_ERR_REF_TABLE_NOT_FOUND:
766
case XT_ERR_REF_TYPE_WRONG:
767
return HA_ERR_NO_REFERENCED_ROW;
768
case XT_ERR_ROW_IS_REFERENCED:
769
return HA_ERR_ROW_IS_REFERENCED;
770
case XT_ERR_COLUMN_IS_NOT_NULL:
771
case XT_ERR_INCORRECT_NO_OF_COLS:
772
case XT_ERR_FK_ON_TEMP_TABLE:
773
case XT_ERR_FK_REF_TEMP_TABLE:
774
return HA_ERR_CANNOT_ADD_FOREIGN;
775
case XT_ERR_DUPLICATE_FKEY:
776
return HA_ERR_FOREIGN_DUPLICATE_KEY;
777
case XT_ERR_RECORD_DELETED:
778
return HA_ERR_RECORD_DELETED;
780
return(-1); // Unknown error
783
xtPublic int xt_ha_pbxt_thread_error_for_mysql(THD *thd, const XTThreadPtr self, int ignore_dup_key)
785
int xt_err = self->t_exception.e_xt_err;
786
xtBool dup_key = FALSE;
788
XT_PRINT2(self, "xt_ha_pbxt_thread_error_for_mysql xt_err=%d auto commit=%d\n", (int) xt_err, (int) self->st_auto_commit);
792
case XT_ERR_DUPLICATE_KEY:
793
case XT_ERR_DUPLICATE_FKEY:
794
/* Let MySQL call rollback as and when it wants to for duplicate
797
* In addition, we are not allowed to do an auto-rollback
798
* inside a sub-statement (function() or procedure())
803
* create table t3 (c1 char(1) primary key not null)|
805
* create function bug12379()
808
* insert into t3 values('X');
809
* insert into t3 values('X');
817
* Not doing an auto-rollback should solve this problem in the
818
* case of duplicate key (but not in others - like deadlock)!
819
* I don't think this situation is handled correctly by MySQL.
822
/* If we are in auto-commit mode (and we are not ignoring
823
* duplicate keys) then rollback the transaction automatically.
826
if (!ignore_dup_key && self->st_auto_commit)
827
goto abort_transaction;
829
case XT_ERR_DEADLOCK:
830
case XT_ERR_NO_REFERENCED_ROW:
831
case XT_ERR_ROW_IS_REFERENCED:
832
goto abort_transaction;
833
case XT_ERR_RECORD_CHANGED:
834
/* MySQL also handles the locked error. NOTE: There is no automatic
839
xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
841
/* PMC 2006-08-30: It should be that this is not necessary!
843
* It is only necessary to call ha_rollback() if the engine
844
* aborts the transaction.
846
* On the other hand, I shouldn't need to rollback the
847
* transaction because, if I return an error, MySQL
848
* should do it for me.
850
* Unfortunately, when auto-commit is off, MySQL does not
851
* rollback automatically (for example when a deadlock
854
* And when we have a multi update we cannot rely on this
855
* either (see comment above).
857
if (self->st_xact_data) {
860
* A result of the "st_abort_trans = TRUE" below is that
861
* the following code results in an empty set.
862
* The reason is "ignore_dup_key" is not set so
863
* the duplicate key leads to an error which causes
864
* the transaction to be aborted.
865
* The delayed inserts are all execute in one transaction.
868
* c1 INT(11) NOT NULL AUTO_INCREMENT,
869
* c2 INT(11) DEFAULT NULL,
873
* INSERT DELAYED INTO t1 VALUES(NULL, 11), (NULL, 12);
874
* INSERT DELAYED INTO t1 VALUES(14, 91);
875
* INSERT DELAYED INTO t1 VALUES (NULL, 92), (NULL, 93);
879
if (self->st_lock_count == 0) {
880
/* No table locks, must rollback immediately
881
* (there will be no possibility later!
883
XT_PRINT1(self, "xt_xn_rollback xt_err=%d\n", xt_err);
884
if (!xt_xn_rollback(self))
885
xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
888
/* Locks are held on tables.
889
* Only rollback after locks are released.
891
/* I do not think this is required, because
892
* I tell mysql to rollback below,
893
* besides it is a hack!
894
self->st_auto_commit = TRUE;
896
self->st_abort_trans = TRUE;
898
/* Only tell MySQL to rollback if we automatically rollback.
899
* Note: calling this with (thd, FALSE), cause sp.test to fail.
903
thd_mark_transaction_to_rollback(thd, TRUE);
908
return xt_ha_pbxt_to_mysql_error(xt_err);
911
static void ha_conditional_close_database(XTThreadPtr self, XTThreadPtr other_thr, void *db)
913
if (other_thr->st_database == (XTDatabaseHPtr) db)
914
xt_unuse_database(self, other_thr);
918
* This is only called from drop database, so we know that
919
* no thread is actually using the database. This means that it
920
* must be safe to close the database.
922
xtPublic void xt_ha_all_threads_close_database(XTThreadPtr self, XTDatabaseHPtr db)
924
xt_lock_mutex(self, &pbxt_database_mutex);
925
pushr_(xt_unlock_mutex, &pbxt_database_mutex);
926
xt_do_to_all_threads(self, ha_conditional_close_database, db);
927
freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
930
static int ha_log_pbxt_thread_error_for_mysql(int ignore_dup_key)
932
return xt_ha_pbxt_thread_error_for_mysql(current_thd, myxt_get_self(), ignore_dup_key);
936
* -----------------------------------------------------------------------
940
static xtWord8 ha_set_variable(char **value, HAVarParamsPtr vp)
947
*value = getenv(vp->vp_var);
949
*value = (char *) vp->vp_def;
950
result = xt_byte_size_to_int8(*value);
951
mi = (xtWord8) xt_byte_size_to_int8(vp->vp_min);
954
*value = (char *) vp->vp_min;
956
if (sizeof(size_t) == 8)
957
mm = (char *) vp->vp_max8;
959
mm = (char *) vp->vp_max4;
960
ma = (xtWord8) xt_byte_size_to_int8(mm);
968
static void pbxt_call_init(XTThreadPtr self)
970
xtInt8 index_cache_size;
971
xtInt8 record_cache_size;
972
xtInt8 log_cache_size;
973
xtInt8 log_file_threshold;
974
xtInt8 transaction_buffer_size;
975
xtInt8 log_buffer_size;
976
xtInt8 checkpoint_frequency;
977
xtInt8 data_log_threshold;
978
xtInt8 data_file_grow_size;
979
xtInt8 row_file_grow_size;
980
xtInt8 record_write_threshold;
982
xt_logf(XT_NT_INFO, "PrimeBase XT (PBXT) Engine %s loaded...\n", xt_get_version());
983
xt_logf(XT_NT_INFO, "Paul McCullagh, PrimeBase Technologies GmbH, http://www.primebase.org\n");
985
index_cache_size = ha_set_variable(&pbxt_index_cache_size, &vp_index_cache_size);
986
record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
987
log_cache_size = ha_set_variable(&pbxt_log_cache_size, &vp_log_cache_size);
988
log_file_threshold = ha_set_variable(&pbxt_log_file_threshold, &vp_log_file_threshold);
989
transaction_buffer_size = ha_set_variable(&pbxt_transaction_buffer_size, &vp_transaction_buffer_size);
990
log_buffer_size = ha_set_variable(&pbxt_log_buffer_size, &vp_log_buffer_size);
991
checkpoint_frequency = ha_set_variable(&pbxt_checkpoint_frequency, &vp_checkpoint_frequency);
992
data_log_threshold = ha_set_variable(&pbxt_data_log_threshold, &vp_data_log_threshold);
993
data_file_grow_size = ha_set_variable(&pbxt_data_file_grow_size, &vp_data_file_grow_size);
994
row_file_grow_size = ha_set_variable(&pbxt_row_file_grow_size, &vp_row_file_grow_size);
995
record_write_threshold = ha_set_variable(&pbxt_record_write_threshold, &vp_record_write_threshold);
997
xt_db_log_file_threshold = (xtLogOffset) log_file_threshold;
998
xt_db_log_buffer_size = (size_t) xt_align_offset(log_buffer_size, 512);
999
xt_db_transaction_buffer_size = (size_t) xt_align_offset(transaction_buffer_size, 512);
1000
xt_db_checkpoint_frequency = (size_t) checkpoint_frequency;
1001
xt_db_data_log_threshold = (off_t) data_log_threshold;
1002
xt_db_data_file_grow_size = (size_t) data_file_grow_size;
1003
xt_db_row_file_grow_size = (size_t) row_file_grow_size;
1004
xt_db_record_write_threshold = (size_t) record_write_threshold;
1007
pbxt_ignore_case = TRUE;
1009
pbxt_ignore_case = lower_case_table_names != 0;
1011
if (pbxt_ignore_case)
1012
pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp_ci, ha_hash_ci, ha_hash_free, TRUE, FALSE);
1014
pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp, ha_hash, ha_hash_free, TRUE, FALSE);
1017
xt_lock_installation(self, mysql_real_data_home);
1018
XTSystemTableShare::startUp(self);
1019
xt_init_databases(self);
1020
xt_ind_init(self, (size_t) index_cache_size);
1021
xt_tc_init(self, (size_t) record_cache_size);
1022
xt_xlog_init(self, (size_t) log_cache_size);
1025
static void pbxt_call_exit(XTThreadPtr self)
1027
xt_logf(XT_NT_INFO, "PrimeBase XT Engine shutdown...\n");
1029
#ifdef TRACE_STATEMENTS
1032
#ifdef XT_USE_GLOBAL_DB
1033
xt_ha_close_global_database(self);
1036
//xt_stop_database_threads(self, FALSE);
1037
xt_stop_database_threads(self, TRUE);
1039
xt_stop_database_threads(self, TRUE);
1041
/* This will tell the freeer to quit ASAP: */
1042
xt_quit_freeer(self);
1043
/* We conditional stop the freeer here, because if we are
1044
* in startup, then the free will be hanging.
1047
* This problem has been solved by MySQL!
1049
xt_stop_freeer(self);
1050
xt_exit_databases(self);
1051
XTSystemTableShare::shutDown(self);
1055
xt_unlock_installation(self, mysql_real_data_home);
1057
if (pbxt_share_tables) {
1058
xt_free_hashtable(self, pbxt_share_tables);
1059
pbxt_share_tables = NULL;
1064
* Shutdown the PBXT sub-system.
1066
static void ha_exit(XTThreadPtr self)
1068
xt_xres_terminate_recovery(self);
1070
/* Wrap things up... */
1071
xt_unuse_database(self, self); /* Just in case the main thread has a database in use (for testing)? */
1072
/* This may cause the streaming engine to cleanup connections and
1073
* tables belonging to this engine. This in turn may require some of
1074
* the stuff below (like xt_create_thread() called from pbxt_close_table()! */
1078
pbxt_call_exit(self);
1079
xt_exit_threading(self);
1082
xt_p_mutex_destroy(&pbxt_database_mutex);
1083
pbxt_inited = false;
1087
* Outout the PBXT status. Return FALSE on error.
1090
bool PBXTStorageEngine::show_status(Session *thd, stat_print_fn *stat_print, enum ha_stat_type)
1092
static bool pbxt_show_status(handlerton *XT_UNUSED(hton), THD* thd,
1093
stat_print_fn* stat_print,
1094
enum ha_stat_type XT_UNUSED(stat_type))
1099
XTStringBufferRec strbuf = { 0, 0, 0 };
1100
bool not_ok = FALSE;
1102
if (!(self = ha_set_current_thread(thd, &err)))
1105
#ifdef XT_SHOW_DUMPS_TRACE
1106
//if (pbxt_database)
1107
// xt_dump_xlogs(pbxt_database, 0);
1108
xt_trace("// %s - dump\n", xt_trace_clock_diff(NULL));
1111
#ifdef XT_TRACK_CONNECTIONS
1112
xt_dump_conn_tracking();
1116
xt_unit_test_async_task(self);
1120
myxt_get_status(self, &strbuf);
1128
if (stat_print(thd, "PBXT", 4, "", 0, strbuf.sb_cstring, (uint) strbuf.sb_len))
1131
xt_sb_set_size(self, &strbuf, 0);
1137
* Initialize the PBXT sub-system.
1139
* return 1 on error, else 0.
1142
static int pbxt_init(Context ®istry)
1144
static int pbxt_init(void *p)
1149
XT_PRINT0(NULL, "pbxt_init\n");
1151
if (sizeof(xtWordPS) != sizeof(void *)) {
1152
printf("PBXT: This won't work, I require that sizeof(xtWordPS) == sizeof(void *)!\n");
1156
/* GOTCHA: This will "detect" if are loading the plug-in
1157
* with different --with-debug option to MySQL.
1159
* In this case, you will get an error when loading the
1160
* library that some symbol was not found.
1162
void *dummy = my_malloc(100, MYF(0));
1163
my_free((byte *) dummy, MYF(0));
1166
XTThreadPtr self = NULL;
1168
xt_p_mutex_init_with_autoname(&pbxt_database_mutex, NULL);
1171
pbxt_hton= new PBXTStorageEngine(std::string("PBXT"));
1172
registry.add(pbxt_hton);
1174
pbxt_hton = (handlerton *) p;
1175
pbxt_hton->state = SHOW_OPTION_YES;
1176
pbxt_hton->db_type = DB_TYPE_PBXT; // Wow! I have my own!
1177
pbxt_hton->close_connection = pbxt_close_connection; /* close_connection, cleanup thread related data. */
1178
pbxt_hton->commit = pbxt_commit; /* commit */
1179
pbxt_hton->rollback = pbxt_rollback; /* rollback */
1180
if (pbxt_support_xa) {
1181
pbxt_hton->prepare = pbxt_prepare;
1182
pbxt_hton->recover = pbxt_recover;
1183
pbxt_hton->commit_by_xid = pbxt_commit_by_xid;
1184
pbxt_hton->rollback_by_xid = pbxt_rollback_by_xid;
1187
pbxt_hton->prepare = NULL;
1188
pbxt_hton->recover = NULL;
1189
pbxt_hton->commit_by_xid = NULL;
1190
pbxt_hton->rollback_by_xid = NULL;
1192
pbxt_hton->create = pbxt_create_handler; /* Create a new handler */
1193
pbxt_hton->drop_database = pbxt_drop_database; /* Drop a database */
1194
pbxt_hton->panic = pbxt_panic; /* Panic call */
1195
pbxt_hton->show_status = pbxt_show_status;
1196
pbxt_hton->flags = HTON_NO_FLAGS; /* HTON_CAN_RECREATE - Without this flags TRUNCATE uses delete_all_rows() */
1197
pbxt_hton->slot = (uint)-1; /* assign invald value, so we know when it's inited later */
1198
pbxt_hton->start_consistent_snapshot = pbxt_start_consistent_snapshot;
1199
#if defined(MYSQL_SUPPORTS_BACKUP) && defined(XT_ENABLE_ONLINE_BACKUP)
1200
pbxt_hton->get_backup_engine = pbxt_backup_engine;
1203
if (!xt_init_logging()) /* Initialize logging */
1207
PBMSResultRec result;
1208
if (!pbms_initialize("PBXT", false, &result)) {
1209
xt_logf(XT_NT_ERROR, "pbms_initialize() Error: %s", result.mr_message);
1214
if (!xt_init_memory()) /* Initialize memory */
1217
self = xt_init_threading(); /* Create the main self: */
1224
/* Initialize all systems */
1225
pbxt_call_init(self);
1227
/* Conditional unit test: */
1229
//xt_unit_test_create_threads(self);
1230
//xt_unit_test_read_write_locks(self);
1231
//xt_unit_test_mutex_locks(self);
1234
/* {OPEN-DB-SWEEPER-WAIT}
1235
* I have to start the freeer before I open and recover the database
1236
* because it we run out of cache while waiting for the sweeper
1239
xt_start_freeer(self);
1241
/* This function is called with LOCK_plugin locked.
1242
* This prevents the opening of .frm files, which
1243
* is required for recovery.
1244
* Our solution is to start reovery in a thread
1245
* so that it can run after LOCK_plugin is released.
1247
xt_xres_start_database_recovery(self);
1250
xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
1256
/* {FREEER-HANG} The free-er will be hung in:
1257
#0 0x91fc6a2e in semaphore_wait_signal_trap
1258
#1 0x91fce505 in pthread_mutex_lock
1259
#2 0x00489633 in safe_mutex_lock at thr_mutex.c:149
1260
#3 0x002dfca9 in plugin_thdvar_init at sql_plugin.cc:2398
1261
#4 0x000d6a12 in THD::init at sql_class.cc:715
1262
#5 0x000de9d3 in THD::THD at sql_class.cc:597
1263
#6 0x000debe1 in THD::THD at sql_class.cc:631
1264
#7 0x00e207a4 in myxt_create_thread at myxt_xt.cc:2666
1265
#8 0x00e3134b in tabc_fr_run_thread at tabcache_xt.cc:982
1266
#9 0x00e422ca in xt_thread_main at thread_xt.cc:1006
1267
#10 0x91ff7c55 in _pthread_start
1268
#11 0x91ff7b12 in thread_start
1270
* so it is not good trying to stop it here!
1272
* With regard to this problem, see {OPEN-DB-SWEEPER-WAIT}
1273
* Due to this problem, I will probably have to hack
1274
* the mutex so that the freeer can get started...
1276
* NOPE! problem has gone in 6.0.9. Also not a problem in
1280
/* {OPEN-DB-SWEEPER-WAIT}
1281
* I have to stop the freeer here because it was
1282
* started before opening the database.
1285
/* {FREEER-HANG-ON-INIT-ERROR}
1286
* pbxt_init is called with LOCK_plugin and if it fails and tries to exit
1287
* the freeer here it hangs because the freeer calls THD::~THD which tries
1288
* to aquire the same lock and hangs. OTOH MySQL calls pbxt_end() after
1289
* an unsuccessful call to pbxt_init, so we defer cleaup, except
1292
xt_free_thread(self);
1295
xt_free_thread(self);
1297
XT_RETURN(init_err);
1311
int pbxt_end(Registry ®istry)
1313
static int pbxt_end(void *)
1324
/* This flag also means "shutting down". */
1325
pbxt_inited = false;
1326
self = xt_create_thread("TempForEnd", FALSE, TRUE, &e);
1328
self->t_main = TRUE;
1334
registry.remove(pbxt_hton);
1340
static int pbxt_panic(handlerton *hton, enum ha_panic_function flag)
1342
return pbxt_end(hton);
1347
* Kill the PBXT thread associated with the MySQL thread.
1350
int PBXTStorageEngine::close_connection(Session *thd)
1352
PBXTStorageEngine * const hton = this;
1354
static int pbxt_close_connection(handlerton *hton, THD* thd)
1361
if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1362
*thd->getEngineData(pbxt_hton) = NULL;
1364
if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1365
*thd_ha_data(thd, hton) = NULL;
1367
/* Required because freeing the thread could cause
1368
* free of database which could call xt_close_file_ns()!
1371
xt_free_thread(self);
1377
* Currently does nothing because it was all done
1378
* when the last PBXT table was removed from the
1382
void PBXTStorageEngine::drop_database(char *)
1384
static void pbxt_drop_database(handlerton *XT_UNUSED(hton), char *XT_UNUSED(path))
1391
* NOTES ON TRANSACTIONS:
1393
* 1. If self->st_lock_count == 0 and transaction can be ended immediately.
1394
* If not, we must wait until the last lock is released on the last handler
1395
* to ensure that the tables are flushed before the transaction is
1396
* committed or aborted.
1398
* 2. all (below) indicates, within a BEGIN/END (i.e. auto_commit off) whether
1399
* the statement or the entire transation is being terminated.
1400
* We currently ignore statement termination.
1402
* 3. If in BEGIN/END we must call ha_rollback() if we abort the transaction
1405
* NOTE ON CONSISTENT SNAPSHOTS:
1407
* PBXT itself doesn't need this functiona as its transaction mechanism provides
1408
* consistent snapshots for all transactions by default. This function is needed
1409
* only for multi-engine cases like this:
1411
* CREATE TABLE t1 ... ENGINE=INNODB
1412
* CREATE TABLE t2 ... ENGINE=PBXT
1413
* START TRANSACTION WITH CONSISTENT SNAPSHOT
1414
* SELECT * FROM t1 <-- at this point we need to know about the snapshot
1418
static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd)
1421
XTThreadPtr self = ha_set_current_thread(thd, &err);
1423
if (!self->st_database && pbxt_database) {
1424
xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1427
thd_init_xact(thd, self, true);
1429
if (xt_xn_begin(self)) {
1430
trans_register_ha(thd, TRUE, hton);
1432
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1436
* As of MySQL 5.1.41 the return value is not checked, so the server might assume
1437
* everything is fine even it isn't. InnoDB returns 0 on success.
1444
* Commit the PBXT transaction of the given thread.
1445
* thd is the MySQL thread structure.
1446
* pbxt_thr is a pointer the the PBXT thread structure.
1450
int PBXTStorageEngine::commit(Session *thd, bool all)
1452
PBXTStorageEngine * const hton = this;
1454
static int pbxt_commit(handlerton *hton, THD *thd, bool all)
1461
if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1463
if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1465
XT_PRINT2(self, "%s pbxt_commit all=%d\n", all ? "END CONN XACT" : "END STAT", all);
1467
if (self->st_xact_data) {
1468
/* There are no table locks, commit immediately in all cases
1469
* except when this is a statement commit with an explicit
1470
* transaction (!all && !self->st_auto_commit).
1472
if (all || self->st_auto_commit) {
1473
XT_PRINT0(self, "xt_xn_commit in pbxt_commit\n");
1475
if (!xt_xn_commit(self))
1476
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1480
self->st_stat_trans = FALSE;
1486
int PBXTStorageEngine::rollback(Session *thd, bool all)
1488
PBXTStorageEngine * const hton = this;
1490
static int pbxt_rollback(handlerton *hton, THD *thd, bool all)
1497
if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1499
if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1501
XT_PRINT2(self, "%s pbxt_rollback all=%d\n", all ? "CONN END XACT" : "STAT END", all);
1503
if (self->st_xact_data) {
1504
/* There are no table locks, rollback immediately in all cases
1505
* except when this is a statement commit with an explicit
1506
* transaction (!all && !self->st_auto_commit).
1508
* Note, the only reason for a rollback of a operation is
1509
* due to an error. In this case PBXT has already
1510
* undone the effects of the operation.
1512
* However, this is not the same as statement rollback
1513
* which can involve a number of operations.
1515
* TODO: Implement statement rollback.
1517
if (all || self->st_auto_commit) {
1518
XT_PRINT0(self, "xt_xn_rollback\n");
1519
if (!xt_xn_rollback(self))
1520
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1524
self->st_stat_trans = FALSE;
1530
Cursor *PBXTStorageEngine::create(TableShare& table, memory::Root *mem_root)
1532
PBXTStorageEngine * const hton = this;
1533
if (XTSystemTableShare::isSystemTable(table.path.str))
1535
static handler *pbxt_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root)
1537
if (table && XTSystemTableShare::isSystemTable(table->path.str))
1539
return new (mem_root) ha_xtsys(hton, table);
1541
return new (mem_root) ha_pbxt(hton, table);
1545
* -----------------------------------------------------------------------
1552
static int pbxt_prepare(handlerton *hton, THD *thd, bool all)
1558
if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1559
XT_PRINT1(self, "pbxt_commit all=%d\n", all);
1561
if (self->st_xact_data) {
1562
/* There are no table locks, commit immediately in all cases
1563
* except when this is a statement commit with an explicit
1564
* transaction (!all && !self->st_auto_commit).
1566
if (all || self->st_auto_commit) {
1569
XT_PRINT0(self, "xt_xn_prepare in pbxt_prepare\n");
1570
thd_get_xid(thd, (MYSQL_XID*) &xid);
1572
if (!xt_xn_prepare(xid.length(), (xtWord1 *) &xid, self))
1573
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1580
static XTThreadPtr ha_temp_open_global_database(handlerton *hton, THD **ret_thd, int *temp_thread, char *thread_name, int *err)
1583
XTThreadPtr self = NULL;
1586
if ((thd = current_thd))
1587
self = (XTThreadPtr) *thd_ha_data(thd, hton);
1589
//thd = (THD *) myxt_create_thread();
1590
//*temp_thread |= 2;
1596
if (!(self = xt_create_thread(thread_name, FALSE, TRUE, &e))) {
1597
*err = xt_ha_pbxt_to_mysql_error(e.e_xt_err);
1598
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1604
xt_xres_wait_for_recovery(self, XT_RECOVER_DONE);
1607
xt_open_database(self, mysql_real_data_home, TRUE);
1610
*err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1611
if ((*temp_thread & 1))
1612
xt_free_thread(self);
1613
if (*temp_thread & 2)
1614
myxt_destroy_thread(thd, FALSE);
1623
static void ha_temp_close_database(XTThreadPtr self, THD *thd, int temp_thread)
1625
xt_unuse_database(self, self);
1626
if (temp_thread & 1)
1627
xt_free_thread(self);
1628
if (temp_thread & 2)
1629
myxt_destroy_thread(thd, TRUE);
1632
/* Return all prepared transactions, found during recovery.
1633
* This function returns a count. If len is returned, the
1634
* function will be called again.
1636
static int pbxt_recover(handlerton *hton, XID *xid_list, uint len)
1642
XTXactPreparePtr xap;
1646
if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRecover", &err)))
1649
db = self->st_database;
1651
for (count=0; count<len; count++) {
1652
xap = xt_xn_enum_xa_data(db, &pbxt_xa_enum);
1655
memcpy(&xid_list[count], xap->xp_xa_data, xap->xp_data_len);
1658
ha_temp_close_database(self, thd, temp_thread);
1662
static int pbxt_commit_by_xid(handlerton *hton, XID *xid)
1668
XTXactPreparePtr xap;
1673
if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForCommitXA", &err)))
1675
db = self->st_database;
1677
if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1678
if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1679
self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED; // Prepared transactions cannot be swept!
1680
if (!xt_xn_commit(self))
1681
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1683
xt_xn_delete_xa_data(db, xap, TRUE, self);
1686
ha_temp_close_database(self, thd, temp_thread);
1690
static int pbxt_rollback_by_xid(handlerton *hton, XID *xid)
1696
XTXactPreparePtr xap;
1701
if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRollbackXA", &err)))
1703
db = self->st_database;
1705
if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1706
if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1707
self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED; // Prepared transactions cannot be swept!
1708
if (!xt_xn_rollback(self))
1709
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1711
xt_xn_delete_xa_data(db, xap, TRUE, self);
1714
ha_temp_close_database(self, thd, temp_thread);
1721
* -----------------------------------------------------------------------
1722
* HANDLER LOCKING FUNCTIONS
1724
* These functions are used get a lock on all handles of a particular table.
1728
static void ha_add_to_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1730
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1731
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1733
handler->pb_ex_next = share->sh_handlers;
1734
handler->pb_ex_prev = NULL;
1735
if (share->sh_handlers)
1736
share->sh_handlers->pb_ex_prev = handler;
1737
share->sh_handlers = handler;
1739
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1742
static void ha_remove_from_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1744
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1745
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1747
/* Move front pointer: */
1748
if (share->sh_handlers == handler)
1749
share->sh_handlers = handler->pb_ex_next;
1751
/* Remove from list: */
1752
if (handler->pb_ex_prev)
1753
handler->pb_ex_prev->pb_ex_next = handler->pb_ex_next;
1754
if (handler->pb_ex_next)
1755
handler->pb_ex_next->pb_ex_prev = handler->pb_ex_prev;
1757
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1761
* Aquire exclusive use of a table, by waiting for all
1762
* threads to complete use of all handlers of the table.
1763
* At the same time we hold up all threads
1764
* that want to use handlers belonging to the table.
1766
* But we do not hold up threads that close the handlers.
1768
static void ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1771
time_t end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1773
XT_PRINT1(self, "ha_aquire_exclusive_use (%s) PBXT X lock\n", share->sh_table_path->ps_path);
1774
/* GOTCHA: It is possible to hang here, if you hold
1775
* onto the sh_ex_mutex lock, before we really
1776
* have the exclusive lock (i.e. before all
1777
* handlers are no longer in use.
1778
* The reason is, because reopen() is not possible
1779
* when some other thread holds sh_ex_mutex.
1780
* So this can prevent a thread from completing its
1781
* use of a handler, when prevents exclusive use
1784
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1785
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1787
/* Wait until we can get an exclusive lock: */
1788
while (share->sh_table_lock) {
1789
xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1790
if (time(NULL) > end_time) {
1791
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1792
xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1796
/* This tells readers (and other exclusive lockers) that someone has an exclusive lock. */
1797
share->sh_table_lock = TRUE;
1799
/* Wait for all open handlers use count to go to 0 */
1801
handler = share->sh_handlers;
1803
if (handler == mine || !handler->pb_ex_in_use)
1804
handler = handler->pb_ex_next;
1806
/* Wait a bit, and try again: */
1807
xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1808
if (time(NULL) > end_time) {
1809
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1810
xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1812
/* Handler may have been freed, check from the begining again: */
1817
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1821
* If you have exclusively locked the table, you can close all handler
1824
* Call ha_close_open_tables() to get an exclusive lock.
1826
static void ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1830
xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1831
pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1833
/* Now that we know no handler is in use, we can close all the
1836
handler = share->sh_handlers;
1838
if (handler != mine && handler->pb_open_tab) {
1839
xt_db_return_table_to_pool_ns(handler->pb_open_tab);
1840
handler->pb_open_tab = NULL;
1842
handler = handler->pb_ex_next;
1845
freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1848
#ifdef PBXT_ALLOW_PRINTING
1849
static void ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share)
1851
static void ha_release_exclusive_use(XTThreadPtr XT_UNUSED(self), XTSharePtr share)
1854
XT_PRINT1(self, "ha_release_exclusive_use (%s) PBXT X UNLOCK\n", share->sh_table_path->ps_path);
1855
xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1856
share->sh_table_lock = FALSE;
1857
xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1858
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1861
static xtBool ha_wait_for_shared_use(ha_pbxt *mine, XTSharePtr share)
1863
time_t end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1865
XT_PRINT1(xt_get_self(), "ha_wait_for_shared_use (%s) share lock wait...\n", share->sh_table_path->ps_path);
1866
mine->pb_ex_in_use = 0;
1867
xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1868
while (share->sh_table_lock) {
1869
/* Wake up the exclusive locker (may be waiting). He can try to continue: */
1870
xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1872
if (!xt_timed_wait_cond(NULL, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT)) {
1873
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1877
if (time(NULL) > end_time) {
1878
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1879
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1883
mine->pb_ex_in_use = 1;
1884
xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1888
xtPublic int ha_pbxt::reopen()
1890
THD *thd = current_thd;
1894
if (!(self = ha_set_current_thread(thd, &err)))
1895
return xt_ha_pbxt_to_mysql_error(err);
1898
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
1900
ha_open_share(self, pb_share);
1902
if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
1904
pb_open_tab->ot_thread = self;
1907
* We no longer use the information that a table
1908
* was opened in order to know when to calculate
1911
if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
1912
#ifdef LOAD_TABLE_ON_OPEN
1913
xt_tab_load_table(self, pb_open_tab);
1915
xt_tab_load_row_pointers(self, pb_open_tab);
1917
xt_ind_set_index_selectivity(pb_open_tab, self);
1918
/* If the number of rows is less than 150 we will recalculate the
1919
* selectity of the indices, as soon as the number of rows
1920
* exceeds 200 (see [**])
1922
/* {FREE-ROWS-BAD} */
1923
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
1926
/* I am not doing this anymore because it was only required
1927
* for DELETE FROM table;, which is now implemented
1928
* by deleting each row.
1929
* TRUNCATE TABLE does not preserve the counter value.
1931
//init_auto_increment(pb_share->sh_min_auto_inc);
1932
init_auto_increment(0);
1935
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
1943
* -----------------------------------------------------------------------
1944
* INFORMATION SCHEMA FUNCTIONS
1948
static int pbxt_statistics_fill_table(THD *thd, TABLE_LIST *tables, COND *cond)
1950
XTThreadPtr self = NULL;
1954
/* Can't do if PBXT is not loaded! */
1957
xt_exception_xterr(&e, XT_CONTEXT, XT_ERR_PBXT_NOT_INSTALLED);
1958
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1959
/* Just return an empty set: */
1963
if (!(self = ha_set_current_thread(thd, &err)))
1964
return xt_ha_pbxt_to_mysql_error(err);
1968
/* If the thread has no open database, and the global
1969
* database is already open, then open
1970
* the database. Otherwise the statement will be
1971
* executed without an open database, which means
1972
* that the related statistics will be missing.
1974
* This includes all background threads.
1976
if (!self->st_database && pbxt_database) {
1977
xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1980
err = myxt_statistics_fill_table(self, thd, tables, cond, system_charset_info);
1983
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1992
ColumnInfo pbxt_statistics_fields_info[]=
1994
ColumnInfo("ID", 4, MYSQL_TYPE_LONG, 0, 0, "The ID of the statistic", SKIP_OPEN_TABLE),
1995
ColumnInfo("Name", 40, MYSQL_TYPE_STRING, 0, 0, "The name of the statistic", SKIP_OPEN_TABLE),
1996
ColumnInfo("Value", 8, MYSQL_TYPE_LONGLONG, 0, 0, "The accumulated value", SKIP_OPEN_TABLE),
2000
class PBXTStatisticsMethods : public InfoSchemaMethods
2003
int fillTable(Session *session, TableList *tables, COND *cond)
2005
return pbxt_statistics_fill_table(session, tables, cond);
2010
ST_FIELD_INFO pbxt_statistics_fields_info[]=
2012
{ "ID", 4, MYSQL_TYPE_LONG, 0, 0, "The ID of the statistic", SKIP_OPEN_TABLE},
2013
{ "Name", 40, MYSQL_TYPE_STRING, 0, 0, "The name of the statistic", SKIP_OPEN_TABLE},
2014
{ "Value", 8, MYSQL_TYPE_LONGLONG, 0, 0, "The accumulated value", SKIP_OPEN_TABLE},
2015
{ 0, 0, MYSQL_TYPE_STRING, 0, 0, 0, SKIP_OPEN_TABLE}
2021
static InfoSchemaTable *pbxt_statistics_table;
2022
static PBXTStatisticsMethods pbxt_statistics_methods;
2023
static int pbxt_init_statistics(Registry ®istry)
2025
//pbxt_statistics_table = (InfoSchemaTable *)xt_calloc_ns(sizeof(InfoSchemaTable));
2026
//pbxt_statistics_table->table_name= "PBXT_STATISTICS";
2027
pbxt_statistics_table = new InfoSchemaTable("PBXT_STATISTICS");
2028
pbxt_statistics_table->setColumnInfo(pbxt_statistics_fields_info);
2029
pbxt_statistics_table->setInfoSchemaMethods(&pbxt_statistics_methods);
2030
registry.add(pbxt_statistics_table);
2035
static int pbxt_init_statistics(void *p)
2037
ST_SCHEMA_TABLE *pbxt_statistics_table = (ST_SCHEMA_TABLE *) p;
2038
pbxt_statistics_table->fields_info = pbxt_statistics_fields_info;
2039
pbxt_statistics_table->fill_table = pbxt_statistics_fill_table;
2041
#if defined(XT_WIN) && defined(XT_COREDUMP)
2042
void register_crash_filter();
2044
if (pbxt_crash_debug)
2045
register_crash_filter();
2053
static int pbxt_exit_statistics(Registry ®istry)
2054
registry.remove(pbxt_statistics_table);
2055
delete pbxt_statistics_table;
2060
static int pbxt_exit_statistics(void *XT_UNUSED(p))
2067
* -----------------------------------------------------------------------
2073
ha_pbxt::ha_pbxt(handlerton *hton, TableShare& table_arg) : handler(*hton, table_arg)
2075
ha_pbxt::ha_pbxt(handlerton *hton, TABLE_SHARE *table_arg) : handler(hton, table_arg)
2080
pb_key_read = FALSE;
2081
pb_ignore_dup_key = 0;
2082
pb_lock_table = FALSE;
2083
pb_table_locked = 0;
2091
* If frm_error() is called then we will use this to to find out what file extentions
2092
* exist for the storage engine. This is also used by the default rename_table and
2093
* delete_table method in handler.cc.
2096
const char **PBXTStorageEngine::bas_ext() const
2098
const char **ha_pbxt::bas_ext() const
2101
return pbxt_extensions;
2105
* Specify the caching type: HA_CACHE_TBL_NONTRANSACT, HA_CACHE_TBL_NOCACHE
2106
* HA_CACHE_TBL_ASKTRANSACT, HA_CACHE_TBL_TRANSACT
2108
MX_UINT8_T ha_pbxt::table_cache_type()
2110
return HA_CACHE_TBL_TRANSACT; /* Use transactional query cache */
2114
MX_TABLE_TYPES_T ha_pbxt::table_flags() const
2117
/* We need this flag because records are not packed
2118
* into a table which means #ROWID != offset
2121
/* Since PBXT caches read records itself, I believe
2122
* this to be the case.
2126
* I am assuming a "key" means a unique index.
2127
* Of course a primary key does not allow nulls.
2131
* This is necessary because a MySQL blob can be
2134
HA_CAN_INDEX_BLOBS |
2136
* Due to transactional influences, this will be
2138
* Although the count is good enough for practical
2140
HA_NOT_EXACT_COUNT |
2144
* This basically means we have a file with the name of
2145
* database table (which we do).
2150
* Not sure what this does (but MyISAM and InnoDB have it)?!
2151
* Could it mean that we support the handler functions.
2153
HA_CAN_SQL_HANDLER |
2155
* This is not true, we cannot insert delayed, but a
2156
* really cannot see what's wrong with inserting normally
2157
* when asked to insert delayed!
2158
* And the functionallity is required to pass the alter_table
2161
* Disabled because of MySQL bug #40505
2163
/*HA_CAN_INSERT_DELAYED |*/
2164
#if MYSQL_VERSION_ID > 50119
2165
/* We can do row logging, but not statement, because
2166
* MVCC is not serializable!
2168
HA_BINLOG_ROW_CAPABLE |
2171
* Auto-increment is allowed on a partial key.
2178
* The following query from the DBT1 test is VERY slow
2179
* if we do not set HA_READ_ORDER.
2180
* The reason is that it must scan all duplicates, then
2183
* SELECT o_id, o_carrier_id, o_entry_d, o_ol_cnt
2184
* FROM orders FORCE INDEX (o_w_id)
2188
* ORDER BY o_id DESC limit 1;
2191
#define FLAGS_ARE_READ_DYNAMICALLY
2193
MX_ULONG_T ha_pbxt::index_flags(uint XT_UNUSED(inx), uint XT_UNUSED(part), bool XT_UNUSED(all_parts)) const
2195
/* It would be nice if the dynamic version of this function works,
2196
* but it does not. MySQL loads this information when the table is openned,
2197
* and then it is fixed.
2199
* The problem is, I have had to remove the HA_READ_ORDER option although
2200
* it applies to PBXT. PBXT returns entries in index order during an index
2201
* scan in _almost_ all cases.
2203
* A number of cases are demostrated here: [(11)]
2205
* If involves the following conditions:
2206
* - a SELECT FOR UPDATE, UPDATE or DELETE statement
2207
* - an ORDER BY, or join that requires the sort order
2208
* - another transaction which updates the index while it is being
2211
* In this "obscure" case, the index scan may return index
2212
* entries in the wrong order.
2214
#ifdef FLAGS_ARE_READ_DYNAMICALLY
2215
/* If were are in an update (SELECT FOR UPDATE, UPDATE or DELETE), then
2216
* it may be that we return the rows from an index in the wrong
2217
* order! This is due to the fact that update reads wait for transactions
2218
* to commit and this means that index entries may change position during
2221
if (pb_open_tab && pb_open_tab->ot_for_update)
2222
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
2223
/* If I understand HA_KEYREAD_ONLY then this means I do not
2224
* need to fetch the record associated with an index
2227
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_READ_RANGE | HA_KEYREAD_ONLY);
2229
return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
2233
void ha_pbxt::internal_close(THD *thd, struct XTThread *self)
2240
/* This lock must be held when we remove the handler's
2241
* open table because ha_close_open_tables() can run
2244
xt_lock_mutex_ns(pb_share->sh_ex_mutex);
2245
if ((ot = pb_open_tab)) {
2246
pb_open_tab->ot_thread = self;
2247
if (self->st_database != pb_open_tab->ot_table->tab_db)
2248
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
2250
pushr_(xt_db_return_table_to_pool, ot);
2252
xt_unlock_mutex_ns(pb_share->sh_ex_mutex);
2254
ha_remove_from_handler_list(self, pb_share, this);
2256
/* Someone may be waiting for me to complete: */
2257
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2259
removed = ha_unget_share_removed(self, pb_share);
2262
/* Flush the table if this was the last handler: */
2263
/* This is not necessary but has the affect that
2264
* FLUSH TABLES; does a checkpoint!
2268
* This was killing performance as the number of threads increased!
2270
* When MySQL runs out of table handlers because the table
2271
* handler cache is too small, it starts to close handlers.
2272
* (open_cache.records > table_cache_size)
2274
* Which can lead to closing all handlers for a particular table.
2276
* It does this while holding lock_OPEN!
2277
* So this code below leads to a sync operation while lock_OPEN
2278
* is held. The result is that the whole server comes to a stop.
2280
if (!thd || thd_sql_command(thd) == SQLCOM_FLUSH) // FLUSH TABLES
2281
xt_sync_flush_table(self, ot, thd ? 0 : 4);
2283
freer_(); // xt_db_return_table_to_pool(ot);
2287
xt_log_and_clear_exception(self);
2296
* Used for opening tables. The name will be the name of the file.
2297
* A table is opened when it needs to be opened. For instance
2298
* when a request comes in for a select on the table (tables are not
2299
* open and closed for each request, they are cached).
2301
* Called from handler.cc by handler::ha_open(). The server opens all tables by
2302
* calling ha_open() which then calls the handler specific open().
2304
int ha_pbxt::open(const char *table_path, int XT_UNUSED(mode), uint XT_UNUSED(test_if_locked))
2306
THD *thd = current_thd;
2310
ref_length = XT_RECORD_OFFS_SIZE;
2312
if (!(self = ha_set_current_thread(thd, &err)))
2313
return xt_ha_pbxt_to_mysql_error(err);
2315
XT_PRINT1(self, "open (%s)\n", table_path);
2319
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
2321
pb_share = ha_get_share(self, table_path, false);
2322
ha_add_to_handler_list(self, pb_share, this);
2323
if (pb_share->sh_table_lock) {
2324
if (!ha_wait_for_shared_use(this, pb_share))
2328
ha_open_share(self, pb_share);
2330
thr_lock_data_init(&pb_share->sh_lock, &pb_lock, NULL);
2331
if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
2333
pb_open_tab->ot_thread = self;
2336
if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
2337
#ifdef LOAD_TABLE_ON_OPEN
2338
xt_tab_load_table(self, pb_open_tab);
2340
xt_tab_load_row_pointers(self, pb_open_tab);
2342
xt_ind_set_index_selectivity(pb_open_tab, self);
2343
/* {FREE-ROWS-BAD} */
2344
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
2347
init_auto_increment(0);
2350
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2351
internal_close(thd, self);
2356
info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2360
/* Someone may be waiting for me to complete: */
2361
if (pb_share->sh_table_lock)
2362
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2369
Closes a table. We call the free_share() function to free any resources
2370
that we have allocated in the "shared" structure.
2372
Called from sql_base.cc, sql_select.cc, and table.cc.
2373
In sql_select.cc it is only used to close up temporary tables or during
2374
the process where a temporary table is converted over to being a
2376
For sql_base.cc look at close_data_tables().
2378
int ha_pbxt::close(void)
2380
THD *thd = current_thd;
2381
volatile int err = 0;
2382
volatile XTThreadPtr self;
2385
self = ha_set_current_thread(thd, (int *) &err);
2389
if (!(self = xt_create_thread("TempForClose", FALSE, TRUE, &e))) {
2390
xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
2395
XT_PRINT1(self, "close (%s)\n", pb_share && pb_share->sh_table_path->ps_path ? pb_share->sh_table_path->ps_path : "unknown");
2399
internal_close(thd, self);
2402
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2407
xt_free_thread(self);
2410
xt_log(XT_NS_CONTEXT, XT_LOG_WARNING, "Unable to release table reference\n");
2415
void ha_pbxt::init_auto_increment(xtWord8 min_auto_inc)
2421
/* Get the value of the auto-increment value by
2422
* loading the highest value from the index...
2424
tab = pb_open_tab->ot_table;
2426
/* Cannot do this if the index version is bad! */
2427
if (tab->tab_dic.dic_disable_index)
2430
xt_spinlock_lock(&tab->tab_ainc_lock);
2431
if (table->found_next_number_field && !tab->tab_auto_inc) {
2432
Field *tmp_fie = table->next_number_field;
2433
THD *tmp_thd = table->in_use;
2434
xtBool xn_started = FALSE;
2435
XTThreadPtr self = pb_open_tab->ot_thread;
2439
* A table may be opened by a thread with a running
2441
* Since get_auto_increment() does not do an update,
2442
* it should be OK to use the transaction we already
2443
* have to get the next auto-increment value.
2445
if (!self->st_xact_data) {
2446
self->st_xact_mode = XT_XACT_REPEATABLE_READ;
2447
self->st_ignore_fkeys = FALSE;
2448
self->st_auto_commit = TRUE;
2449
self->st_table_trans = FALSE;
2450
self->st_abort_trans = FALSE;
2451
self->st_stat_ended = FALSE;
2452
self->st_stat_trans = FALSE;
2453
self->st_is_update = NULL;
2454
if (!xt_xn_begin(self)) {
2455
xt_spinlock_unlock(&tab->tab_ainc_lock);
2461
/* Setup the conditions for the next call! */
2462
table->in_use = current_thd;
2463
table->next_number_field = table->found_next_number_field;
2465
extra(HA_EXTRA_KEYREAD);
2466
table->mark_columns_used_by_index_no_reset(TS(table)->next_number_index, table->read_set);
2467
column_bitmaps_signal();
2468
index_init(TS(table)->next_number_index, 0);
2469
if (!TS(table)->next_number_key_offset) {
2470
// Autoincrement at key-start
2471
err = index_last(table->record[1]);
2472
if (!err && !table->next_number_field->is_null(TS(table)->rec_buff_length)) {
2474
nr = (xtWord8) table->next_number_field->val_int_offset(TS(table)->rec_buff_length);
2478
/* Do an index scan to find the largest value! */
2479
/* The standard method will not work because it forces
2480
* us to lock that table!
2484
err = index_first(table->record[1]);
2487
val = (xtWord8) table->next_number_field->val_int_offset(TS(table)->rec_buff_length);
2490
err = index_next(table->record[1]);
2495
extra(HA_EXTRA_NO_KEYREAD);
2498
* I have changed this from post increment to pre-increment!
2500
* When using post increment we are not able to return
2501
* the last valid value in the range.
2503
* Here the test example:
2505
* drop table if exists t1;
2506
* create table t1 (i tinyint unsigned not null auto_increment primary key) engine=pbxt;
2507
* insert into t1 set i = 254;
2508
* insert into t1 set i = null;
2510
* With post-increment, this last insert fails because on post increment
2511
* the value overflows!
2513
* Pre-increment means we store the current max, and increment
2514
* before returning the next value.
2516
* This will work in this situation.
2518
tab->tab_auto_inc = nr;
2519
if (tab->tab_auto_inc < tab->tab_dic.dic_min_auto_inc)
2520
tab->tab_auto_inc = tab->tab_dic.dic_min_auto_inc-1;
2521
if (tab->tab_auto_inc < min_auto_inc)
2522
tab->tab_auto_inc = min_auto_inc-1;
2524
/* Restore the changed values: */
2525
table->next_number_field = tmp_fie;
2526
table->in_use = tmp_thd;
2529
XT_PRINT0(self, "xt_xn_commit in init_auto_increment\n");
2533
xt_spinlock_unlock(&tab->tab_ainc_lock);
2536
void ha_pbxt::get_auto_increment(MX_ULONGLONG_T offset, MX_ULONGLONG_T increment,
2537
MX_ULONGLONG_T XT_UNUSED(nb_desired_values),
2538
MX_ULONGLONG_T *first_value,
2539
MX_ULONGLONG_T *nb_reserved_values)
2541
register XTTableHPtr tab;
2542
MX_ULONGLONG_T nr, nr_less_inc;
2544
ASSERT_NS(pb_ex_in_use);
2546
tab = pb_open_tab->ot_table;
2549
* Assume that nr contains the last value returned!
2550
* We will increment and then return the value.
2552
xt_spinlock_lock(&tab->tab_ainc_lock);
2553
nr = (MX_ULONGLONG_T) tab->tab_auto_inc;
2557
else if (increment > 1 && ((nr - offset) % increment) != 0)
2558
nr += increment - ((nr - offset) % increment);
2561
if (table->next_number_field->cmp((const unsigned char *)&nr_less_inc, (const unsigned char *)&nr) < 0)
2562
tab->tab_auto_inc = (xtWord8) (nr);
2564
nr = ~0; /* indicate error to the caller */
2565
xt_spinlock_unlock(&tab->tab_ainc_lock);
2568
*nb_reserved_values = 1;
2571
/* GOTCHA: We need to use signed value here because of the test
2572
* (from auto_increment.test):
2573
* create table t1 (a int not null auto_increment primary key);
2574
* insert into t1 values (NULL);
2575
* insert into t1 values (-1);
2576
* insert into t1 values (NULL);
2578
xtPublic void ha_set_auto_increment(XTOpenTablePtr ot, Field *nr)
2580
register XTTableHPtr tab;
2581
MX_ULONGLONG_T nr_int_val;
2583
nr_int_val = nr->val_int();
2586
if (nr->cmp((const unsigned char *)&tab->tab_auto_inc) > 0) {
2587
xt_spinlock_lock(&tab->tab_ainc_lock);
2589
if (nr->cmp((const unsigned char *)&tab->tab_auto_inc) > 0) {
2591
* We increment later, so just set the value!
2592
MX_ULONGLONG_T nr_int_val_plus_one = nr_int_val + 1;
2593
if (nr->cmp((const unsigned char *)&nr_int_val_plus_one) < 0)
2594
tab->tab_auto_inc = nr_int_val_plus_one;
2597
tab->tab_auto_inc = nr_int_val;
2599
xt_spinlock_unlock(&tab->tab_ainc_lock);
2602
if (xt_db_auto_increment_mode == 1) {
2603
if (nr_int_val > (MX_ULONGLONG_T) tab->tab_dic.dic_min_auto_inc) {
2604
/* Do this every 100 calls: */
2606
tab->tab_dic.dic_min_auto_inc = nr_int_val + 5;
2608
tab->tab_dic.dic_min_auto_inc = nr_int_val + 100;
2610
ot->ot_thread = xt_get_self();
2611
if (!xt_tab_write_min_auto_inc(ot))
2612
xt_log_and_clear_exception(ot->ot_thread);
2618
static void dump_buf(unsigned char *buf, int len)
2622
for (i=0; i<len; i++) printf("%2c", buf[i] <= 127 ? buf[i] : '.');
2624
for (i=0; i<len; i++) printf("%02x", buf[i]);
2630
* write_row() inserts a row. No extra() hint is given currently if a bulk load
2631
* is happeneding. buf() is a byte array of data. You can use the field
2632
* information to extract the data from the native byte array type.
2633
* Example of this would be:
2634
* for (Field **field=table->field ; *field ; field++)
2639
* See ha_tina.cc for an example of extracting all of the data as strings.
2640
* ha_berekly.cc has an example of how to store it intact by "packing" it
2641
* for ha_berkeley's own native storage type.
2643
* See the note for update_row() on auto_increments and timestamps. This
2644
* case also applied to write_row().
2646
* Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
2647
* sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
2649
int ha_pbxt::write_row(byte *buf)
2653
ASSERT_NS(pb_ex_in_use);
2655
XT_PRINT1(pb_open_tab->ot_thread, "write_row (%s)\n", pb_share->sh_table_path->ps_path);
2656
XT_DISABLED_TRACE(("INSERT tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2657
//statistic_increment(ha_write_count,&LOCK_status);
2659
PBMSResultRec result;
2660
err = pbms_write_row_blobs(table, buf, &result);
2662
xt_logf(XT_NT_ERROR, "pbms_write_row_blobs() Error: %s", result.mr_message);
2667
/* {START-STAT-HACK} previously position of start statement hack. */
2668
xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2670
if (pb_open_tab->ot_thread->st_import_stat) {
2671
if (pb_import_row_count >= XT_IMPORT_ROW_COUNT) {
2672
/* Commit and restart the transaction. */
2673
XTThreadPtr thread = pb_open_tab->ot_thread;
2675
XT_PRINT0(thread, "xt_xn_commit in write_row\n");
2676
if (!xt_xn_commit(thread)) {
2677
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2680
XT_PRINT0(thread, "xt_xn_begin in write_row\n");
2681
if (!xt_xn_begin(thread)) {
2682
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2685
pb_import_row_count = 0;
2688
pb_import_row_count++;
2691
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
2692
table->timestamp_field->set_time();
2694
if (table->next_number_field && buf == table->record[0]) {
2695
int update_err = update_auto_increment();
2697
ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2701
ha_set_auto_increment(pb_open_tab, table->next_number_field);
2704
if (!xt_tab_new_record(pb_open_tab, (xtWord1 *) buf)) {
2705
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2708
* This is needed to allow the same row to be updated multiple times in case of bulk REPLACE.
2709
* This happens during execution of LOAD DATA...REPLACE MySQL first tries to INSERT the row
2710
* and if it gets dup-key error it tries UPDATE, so the same row can be overwriten multiple
2711
* times within the same statement
2713
if (err == HA_ERR_FOUND_DUPP_KEY && pb_open_tab->ot_thread->st_is_update) {
2714
/* Pop the update stack: */
2715
//pb_open_tab->ot_thread->st_update_id++;
2716
XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
2718
pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
2719
curr->ot_prev_update = NULL;
2725
pbms_completed(table, (err == 0));
2731
static int equ_bin(const byte *a, const char *b)
2741
static void dump_bin(const byte *a_in, int offset, int len_in)
2743
const byte *a = a_in;
2748
xt_trace("%02X", (int) *a);
2757
xt_trace("%c", (*a > 8 && *a < 127) ? *a : '.');
2766
* Yes, update_row() does what you expect, it updates a row. old_data will have
2767
* the previous row record in it, while new_data will have the newest data in
2768
* it. Keep in mind that the server can do updates based on ordering if an ORDER BY
2769
* clause was used. Consecutive ordering is not guarenteed.
2771
* Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
2773
int ha_pbxt::update_row(const byte * old_data, byte * new_data)
2776
register XTThreadPtr self = pb_open_tab->ot_thread;
2778
ASSERT_NS(pb_ex_in_use);
2780
XT_PRINT1(self, "update_row (%s)\n", pb_share->sh_table_path->ps_path);
2781
XT_DISABLED_TRACE(("UPDATE tx=%d val=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&new_data[1])));
2782
//statistic_increment(ha_update_count,&LOCK_status);
2783
/* {START-STAT-HACK} previously position of start statement hack. */
2784
xt_xlog_check_long_writer(self);
2786
/* {UPDATE-STACK} */
2787
if (self->st_is_update != pb_open_tab) {
2788
/* Push the update stack: */
2789
pb_open_tab->ot_prev_update = self->st_is_update;
2790
self->st_is_update = pb_open_tab;
2791
pb_open_tab->ot_update_id++;
2794
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
2795
table->timestamp_field->set_time();
2798
PBMSResultRec result;
2800
err = pbms_delete_row_blobs(table, old_data, &result);
2802
xt_logf(XT_NT_ERROR, "update_row:pbms_delete_row_blobs() Error: %s", result.mr_message);
2805
err = pbms_write_row_blobs(table, new_data, &result);
2807
xt_logf(XT_NT_ERROR, "update_row:pbms_write_row_blobs() Error: %s", result.mr_message);
2812
/* GOTCHA: We need to check the auto-increment value on update
2813
* because of the following test (which fails for InnoDB) -
2814
* auto_increment.test:
2815
* create table t1 (a int not null auto_increment primary key, val int);
2816
* insert into t1 (val) values (1);
2817
* update t1 set a=2 where a=1;
2818
* insert into t1 (val) values (1);
2820
if (table->found_next_number_field && new_data == table->record[0]) {
2822
my_bitmap_map *old_map;
2824
old_map = mx_tmp_use_all_columns(table, table->read_set);
2825
nr = table->found_next_number_field->val_int();
2826
ha_set_auto_increment(pb_open_tab, table->found_next_number_field);
2827
mx_tmp_restore_column_map(table, old_map);
2830
if (!xt_tab_update_record(pb_open_tab, (xtWord1 *) old_data, (xtWord1 *) new_data))
2831
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2833
pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2837
pbms_completed(table, (err == 0));
2844
* This will delete a row. buf will contain a copy of the row to be deleted.
2845
* The server will call this right after the current row has been called (from
2846
* either a previous rnd_next() or index call).
2848
* Called in sql_acl.cc and sql_udf.cc to manage internal table information.
2849
* Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select it is
2850
* used for removing duplicates while in insert it is used for REPLACE calls.
2852
int ha_pbxt::delete_row(const byte * buf)
2856
ASSERT_NS(pb_ex_in_use);
2858
XT_PRINT1(pb_open_tab->ot_thread, "delete_row (%s)\n", pb_share->sh_table_path->ps_path);
2859
XT_DISABLED_TRACE(("DELETE tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2860
//statistic_increment(ha_delete_count,&LOCK_status);
2863
PBMSResultRec result;
2865
err = pbms_delete_row_blobs(table, buf, &result);
2867
xt_logf(XT_NT_ERROR, "pbms_delete_row_blobs() Error: %s", result.mr_message);
2871
/* {START-STAT-HACK} previously position of start statement hack. */
2872
xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2874
if (!xt_tab_delete_record(pb_open_tab, (xtWord1 *) buf))
2875
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2877
pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2880
pbms_completed(table, (err == 0));
2886
* -----------------------------------------------------------------------
2891
* This looks like a hack, but actually, it is OK.
2892
* It depends on the setup done by the super-class. It involves an extra
2893
* range check that we need to do if a "new" record is returned during
2896
* A new record is returned if a row is updated (by another transaction)
2897
* during the index scan. If an update is detected, then the scan stops
2898
* and waits for the transaction to end.
2900
* If the transaction commits, then the updated row is returned instead
2901
* of the row it would have returned when doing a consistant read
2902
* (repeatable read).
2904
* These new records can appear out of index order, and may not even
2905
* belong to the index range that we are concerned with.
2907
* Notice that there is not check for the start of the range. It appears
2908
* that this is not necessary, MySQL seems to have no problem ignoring
2911
* A number of test have been given below which demonstrate the use
2914
* They also demonstrate the ORDER BY problem described here: [(11)].
2916
* DROP TABLE IF EXISTS test_tab, test_tab_1, test_tab_2;
2917
* CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20), index(Value, Name)) ENGINE=pbxt;
2918
* INSERT test_tab values(1, 1, 'A');
2919
* INSERT test_tab values(2, 1, 'B');
2920
* INSERT test_tab values(3, 1, 'C');
2921
* INSERT test_tab values(4, 2, 'D');
2922
* INSERT test_tab values(5, 2, 'E');
2923
* INSERT test_tab values(6, 2, 'F');
2924
* INSERT test_tab values(7, 2, 'G');
2926
* select * from test_tab where value = 1 order by value, name for update;
2931
* select * from test_tab where id = 5 for update;
2935
* select * from test_tab where value = 2 order by value, name for update;
2938
* update test_tab set value = 3 where id = 6;
2944
* select * from test_tab where id = 5 for update;
2948
* select * from test_tab where value >= 2 order by value, name for update;
2951
* update test_tab set value = 3 where id = 6;
2957
* select * from test_tab where id = 5 for update;
2961
* select * from test_tab where value = 2 order by value, name for update;
2964
* update test_tab set value = 1 where id = 6;
2968
int ha_pbxt::xt_index_in_range(register XTOpenTablePtr XT_UNUSED(ot), register XTIndexPtr ind,
2969
register XTIdxSearchKeyPtr search_key, xtWord1 *buf)
2971
/* If search key is given, this means we want an exact match. */
2973
xtWord1 key_buf[XT_INDEX_MAX_KEY_SIZE];
2975
myxt_create_key_from_row(ind, key_buf, buf, NULL);
2976
search_key->sk_on_key = myxt_compare_key(ind, search_key->sk_key_value.sv_flags, search_key->sk_key_value.sv_length,
2977
search_key->sk_key_value.sv_key, key_buf) == 0;
2978
return search_key->sk_on_key;
2981
/* Otherwise, check the end of the range. */
2983
return compare_key(end_range) <= 0;
2987
int ha_pbxt::xt_index_next_read(register XTOpenTablePtr ot, register XTIndexPtr ind, xtBool key_only,
2988
register XTIdxSearchKeyPtr search_key, byte *buf)
2990
xt_xlog_check_long_writer(ot->ot_thread);
2993
/* We only need to read the data from the key: */
2994
while (ot->ot_curr_rec_id) {
2995
if (search_key && !search_key->sk_on_key)
2998
switch (xt_tab_visible(ot)) {
3000
if (xt_idx_next(ot, ind, search_key))
3005
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3007
if (xt_index_in_range(ot, ind, search_key, buf)) {
3010
if (!xt_idx_next(ot, ind, search_key))
3014
/* We cannot start from the beginning again, if we have
3015
* already output rows!
3016
* And we need the orginal search key.
3018
* The case in which this occurs is:
3020
* T1: UPDATE tbl_file SET GlobalID = 'DBCD5C4514210200825501089884844_6M' WHERE ID = 39
3021
* Locks a particular row.
3023
* T2: SELECT ID,Flags FROM tbl_file WHERE SpaceID = 1 AND Path = '/zi/America/' AND
3024
* Name = 'Cuiaba' AND Flags IN ( 0,1,4,5 ) FOR UPDATE
3025
* scans the index and stops on the lock (of the before image) above.
3027
* T1 quits, the sweeper deletes the record updated by T1?!
3028
* BUG: Cleanup should wait until T2 is complete!
3030
* T2 continues, and returns XT_RETRY.
3032
* At this stage T2 has already returned some rows, so it may not retry from the
3033
* start. Instead it tries to locate the last record it tried to lock.
3034
* This record is gone (or not visible), so it finds the next one.
3036
* POTENTIAL BUG: If cleanup does not wait until T2 is complete, then
3037
* I may miss the update record, if it is moved before the index scan
3040
if (!pb_ind_row_count && search_key) {
3041
if (!xt_idx_search(pb_open_tab, ind, search_key))
3042
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3045
if (!xt_idx_research(pb_open_tab, ind))
3050
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3057
while (ot->ot_curr_rec_id) {
3058
if (search_key && !search_key->sk_on_key)
3061
switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
3063
XT_DISABLED_TRACE(("not visi tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
3064
if (xt_idx_next(ot, ind, search_key))
3069
if (xt_index_in_range(ot, ind, search_key, buf))
3071
if (!xt_idx_next(ot, ind, search_key))
3075
if (!pb_ind_row_count && search_key) {
3076
if (!xt_idx_search(pb_open_tab, ind, search_key))
3077
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3080
if (!xt_idx_research(pb_open_tab, ind))
3085
XT_DISABLED_TRACE(("visible tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
3090
return HA_ERR_END_OF_FILE;
3093
return ha_log_pbxt_thread_error_for_mysql(FALSE);
3096
int ha_pbxt::xt_index_prev_read(XTOpenTablePtr ot, XTIndexPtr ind, xtBool key_only,
3097
register XTIdxSearchKeyPtr search_key, byte *buf)
3100
/* We only need to read the data from the key: */
3101
while (ot->ot_curr_rec_id) {
3102
if (search_key && !search_key->sk_on_key)
3105
switch (xt_tab_visible(ot)) {
3107
if (xt_idx_prev(ot, ind, search_key))
3112
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3114
if (xt_index_in_range(ot, ind, search_key, buf))
3116
if (!xt_idx_next(ot, ind, search_key))
3120
if (!pb_ind_row_count && search_key) {
3121
if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3122
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3125
if (!xt_idx_research(pb_open_tab, ind))
3130
if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3137
/* We need to read the entire record: */
3138
while (ot->ot_curr_rec_id) {
3139
if (search_key && !search_key->sk_on_key)
3142
switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
3144
if (xt_idx_prev(ot, ind, search_key))
3149
if (xt_index_in_range(ot, ind, search_key, buf))
3151
if (!xt_idx_next(ot, ind, search_key))
3155
if (!pb_ind_row_count && search_key) {
3156
if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3157
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3160
if (!xt_idx_research(pb_open_tab, ind))
3169
return HA_ERR_END_OF_FILE;
3172
return ha_log_pbxt_thread_error_for_mysql(FALSE);
3175
int ha_pbxt::index_init(uint idx, bool XT_UNUSED(sorted))
3178
XTThreadPtr thread = pb_open_tab->ot_thread;
3180
/* select count(*) from smalltab_PBXT;
3181
* ignores the error below, and continues to
3186
if (pb_open_tab->ot_table->tab_dic.dic_disable_index) {
3187
active_index = MAX_KEY;
3188
xt_tab_set_index_error(pb_open_tab->ot_table);
3189
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3192
/* The number of columns required: */
3193
if (pb_open_tab->ot_is_modify) {
3195
pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
3196
#ifdef XT_PRINT_INDEX_OPT
3197
ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3199
printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, pb_open_tab->ot_cols_req, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap);
3201
/* {START-STAT-HACK} previously position of start statement hack,
3202
* previous comment to code below: */
3203
/* Start a statement based transaction as soon
3204
* as a read is done for a modify type statement!
3205
* Previously, this was done too late!
3209
pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3211
/* Check for index coverage!
3213
* Given the following table:
3215
* CREATE TABLE `customer` (
3216
* `c_id` int(11) NOT NULL DEFAULT '0',
3217
* `c_d_id` int(11) NOT NULL DEFAULT '0',
3218
* `c_w_id` int(11) NOT NULL DEFAULT '0',
3219
* `c_first` varchar(16) DEFAULT NULL,
3220
* `c_middle` char(2) DEFAULT NULL,
3221
* `c_last` varchar(16) DEFAULT NULL,
3222
* `c_street_1` varchar(20) DEFAULT NULL,
3223
* `c_street_2` varchar(20) DEFAULT NULL,
3224
* `c_city` varchar(20) DEFAULT NULL,
3225
* `c_state` char(2) DEFAULT NULL,
3226
* `c_zip` varchar(9) DEFAULT NULL,
3227
* `c_phone` varchar(16) DEFAULT NULL,
3228
* `c_since` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
3229
* `c_credit` char(2) DEFAULT NULL,
3230
* `c_credit_lim` decimal(24,12) DEFAULT NULL,
3231
* `c_discount` double DEFAULT NULL,
3232
* `c_balance` decimal(24,12) DEFAULT NULL,
3233
* `c_ytd_payment` decimal(24,12) DEFAULT NULL,
3234
* `c_payment_cnt` double DEFAULT NULL,
3235
* `c_delivery_cnt` double DEFAULT NULL,
3237
* PRIMARY KEY (`c_w_id`,`c_d_id`,`c_id`),
3238
* KEY `c_w_id` (`c_w_id`,`c_d_id`,`c_last`,`c_first`,`c_id`)
3241
* MySQL does not recognize index coverage on the followin select:
3243
* SELECT c_id FROM customer WHERE c_w_id = 3 AND c_d_id = 8 AND
3244
* c_last = 'EINGATIONANTI' ORDER BY c_first ASC LIMIT 1;
3246
* TODO: Find out why this is necessary, MyISAM does not
3247
* seem to have this problem!
3249
ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3250
if (MX_BIT_IS_SUBSET(table->read_set, &ind->mi_col_map))
3252
#ifdef XT_PRINT_INDEX_OPT
3253
printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X converage=%d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, table->read_set->MX_BIT_SIZE(), (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) (MX_BIT_IS_SUBSET(table->read_set, &ind->mi_col_map) != 0));
3257
xt_xlog_check_long_writer(thread);
3259
pb_open_tab->ot_thread->st_statistics.st_scan_index++;
3263
int ha_pbxt::index_end()
3269
XTThreadPtr thread = pb_open_tab->ot_thread;
3272
* the assertion below is not always held, because the sometimes handler is unlocked
3273
* before this function is called
3275
/*ASSERT_NS(pb_ex_in_use);*/
3277
if (pb_open_tab->ot_ind_rhandle) {
3278
xt_ind_release_handle(pb_open_tab->ot_ind_rhandle, FALSE, thread);
3279
pb_open_tab->ot_ind_rhandle = NULL;
3283
* make permanent the lock for the last scanned row
3286
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3288
xt_xlog_check_long_writer(thread);
3290
active_index = MAX_KEY;
3294
#ifdef XT_TRACK_RETURNED_ROWS
3295
void ha_start_scan(XTOpenTablePtr ot, u_int index)
3297
xt_ttracef(ot->ot_thread, "SCAN %d:%d\n", (int) ot->ot_table->tab_id, (int) index);
3298
ot->ot_rows_ret_curr = 0;
3299
for (u_int i=0; i<ot->ot_rows_ret_max; i++)
3300
ot->ot_rows_returned[i] = 0;
3303
void ha_return_row(XTOpenTablePtr ot, u_int index)
3305
xt_ttracef(ot->ot_thread, "%d:%d ROW=%d:%d\n",
3306
(int) ot->ot_table->tab_id, (int) index, (int) ot->ot_curr_row_id, (int) ot->ot_curr_rec_id);
3307
ot->ot_rows_ret_curr++;
3308
if (ot->ot_curr_row_id >= ot->ot_rows_ret_max) {
3309
if (!xt_realloc_ns((void **) &ot->ot_rows_returned, (ot->ot_curr_row_id+1) * sizeof(xtRecordID)))
3311
memset(&ot->ot_rows_returned[ot->ot_rows_ret_max], 0, (ot->ot_curr_row_id+1 - ot->ot_rows_ret_max) * sizeof(xtRecordID));
3312
ot->ot_rows_ret_max = ot->ot_curr_row_id+1;
3314
if (!ot->ot_curr_row_id || !ot->ot_curr_rec_id || ot->ot_rows_returned[ot->ot_curr_row_id]) {
3315
char *sql = *thd_query(current_thd);
3317
xt_ttracef(ot->ot_thread, "DUP %d:%d %s\n",
3318
(int) ot->ot_table->tab_id, (int) index, *thd_query(current_thd));
3320
printf("ERROR: row=%d rec=%d newr=%d, already returned!\n", (int) ot->ot_curr_row_id, (int) ot->ot_rows_returned[ot->ot_curr_row_id], (int) ot->ot_curr_rec_id);
3321
printf("ERROR: %s\n", sql);
3323
FatalAppExit(0, "Debug Me!");
3327
ot->ot_rows_returned[ot->ot_curr_row_id] = ot->ot_curr_rec_id;
3331
int ha_pbxt::index_read_xt(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3336
XTIdxSearchKeyRec search_key;
3338
if (idx == MAX_KEY) {
3339
err = HA_ERR_WRONG_INDEX;
3342
#ifdef XT_TRACK_RETURNED_ROWS
3343
ha_start_scan(pb_open_tab, idx);
3346
/* This call starts a search on this handler! */
3347
pb_ind_row_count = 0;
3349
ASSERT_NS(pb_ex_in_use);
3351
XT_PRINT1(pb_open_tab->ot_thread, "index_read_xt (%s)\n", pb_share->sh_table_path->ps_path);
3352
XT_DISABLED_TRACE(("search tx=%d val=%d update=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), pb_modified));
3353
ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3355
switch (find_flag) {
3356
case HA_READ_PREFIX_LAST:
3357
case HA_READ_PREFIX_LAST_OR_PREV:
3358
prefix = SEARCH_PREFIX;
3359
case HA_READ_BEFORE_KEY:
3360
case HA_READ_KEY_OR_PREV: // I assume you want to be positioned on the last entry in the key duplicate list!!
3361
xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_BEFORE_KEY) ? 0 : XT_SEARCH_AFTER_KEY) | prefix, (xtWord1 *) key, (size_t) key_len);
3362
if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3363
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3365
err = xt_index_prev_read(pb_open_tab, ind, pb_key_read,
3366
(find_flag == HA_READ_PREFIX_LAST) ? &search_key : NULL, buf);
3368
case HA_READ_PREFIX:
3369
prefix = SEARCH_PREFIX;
3370
case HA_READ_KEY_EXACT:
3371
case HA_READ_KEY_OR_NEXT:
3372
case HA_READ_AFTER_KEY:
3374
xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_AFTER_KEY) ? XT_SEARCH_AFTER_KEY : 0) | prefix, (xtWord1 *) key, key_len);
3375
if (!xt_idx_search(pb_open_tab, ind, &search_key))
3376
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3378
err = xt_index_next_read(pb_open_tab, ind, pb_key_read,
3379
(find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX) ? &search_key : NULL, buf);
3380
if (err == HA_ERR_END_OF_FILE && find_flag == HA_READ_AFTER_KEY)
3381
err = HA_ERR_KEY_NOT_FOUND;
3387
#ifdef XT_TRACK_RETURNED_ROWS
3389
ha_return_row(pb_open_tab, idx);
3391
XT_DISABLED_TRACE(("search tx=%d val=%d err=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), err));
3394
table->status = STATUS_NOT_FOUND;
3396
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3403
* Positions an index cursor to the index specified in the handle. Fetches the
3404
* row if available. If the key value is null, begin at the first key of the
3407
int ha_pbxt::index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag)
3409
//statistic_increment(ha_read_key_count,&LOCK_status);
3410
return index_read_xt(buf, active_index, key, key_len, find_flag);
3413
int ha_pbxt::index_read_idx(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3415
//statistic_increment(ha_read_key_count,&LOCK_status);
3416
return index_read_xt(buf, idx, key, key_len, find_flag);
3419
int ha_pbxt::index_read_last(byte * buf, const byte * key, uint key_len)
3421
//statistic_increment(ha_read_key_count,&LOCK_status);
3422
return index_read_xt(buf, active_index, key, key_len, HA_READ_PREFIX_LAST);
3426
* Used to read forward through the index.
3428
int ha_pbxt::index_next(byte * buf)
3434
//statistic_increment(ha_read_next_count,&LOCK_status);
3435
ASSERT_NS(pb_ex_in_use);
3437
if (active_index == MAX_KEY) {
3438
err = HA_ERR_WRONG_INDEX;
3441
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3443
if (!xt_idx_next(pb_open_tab, ind, NULL))
3444
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3446
err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3449
#ifdef XT_TRACK_RETURNED_ROWS
3451
ha_return_row(pb_open_tab, active_index);
3455
table->status = STATUS_NOT_FOUND;
3457
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3464
* I have implemented this because there is currently a
3465
* bug in handler::index_next_same().
3467
* drop table if exists t1;
3468
* CREATE TABLE t1 (a int, b int, primary key(a,b))
3469
* PARTITION BY KEY(b,a) PARTITIONS 2;
3470
* insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6);
3471
* select * from t1 where a = 4;
3474
int ha_pbxt::index_next_same(byte * buf, const byte *key, uint length)
3478
XTIdxSearchKeyRec search_key;
3481
//statistic_increment(ha_read_next_count,&LOCK_status);
3482
ASSERT_NS(pb_ex_in_use);
3484
if (active_index == MAX_KEY) {
3485
err = HA_ERR_WRONG_INDEX;
3488
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3490
search_key.sk_key_value.sv_flags = HA_READ_KEY_EXACT;
3491
search_key.sk_key_value.sv_rec_id = 0;
3492
search_key.sk_key_value.sv_row_id = 0;
3493
search_key.sk_key_value.sv_key = search_key.sk_key_buf;
3494
search_key.sk_key_value.sv_length = myxt_create_key_from_key(ind, search_key.sk_key_buf, (xtWord1 *) key, (u_int) length);
3495
search_key.sk_on_key = TRUE;
3497
if (!xt_idx_next(pb_open_tab, ind, &search_key))
3498
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3500
err = xt_index_next_read(pb_open_tab, ind, pb_key_read, &search_key, buf);
3503
#ifdef XT_TRACK_RETURNED_ROWS
3505
ha_return_row(pb_open_tab, active_index);
3509
table->status = STATUS_NOT_FOUND;
3511
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3518
* Used to read backwards through the index.
3520
int ha_pbxt::index_prev(byte * buf)
3526
//statistic_increment(ha_read_prev_count,&LOCK_status);
3527
ASSERT_NS(pb_ex_in_use);
3529
if (active_index == MAX_KEY) {
3530
err = HA_ERR_WRONG_INDEX;
3533
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3535
if (!xt_idx_prev(pb_open_tab, ind, NULL))
3536
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3538
err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3541
#ifdef XT_TRACK_RETURNED_ROWS
3543
ha_return_row(pb_open_tab, active_index);
3547
table->status = STATUS_NOT_FOUND;
3549
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3556
* index_first() asks for the first key in the index.
3558
int ha_pbxt::index_first(byte * buf)
3562
XTIdxSearchKeyRec search_key;
3565
//statistic_increment(ha_read_first_count,&LOCK_status);
3566
ASSERT_NS(pb_ex_in_use);
3568
/* This is required because MySQL ignores the error returned
3569
* init init_index sometimes, for example:
3571
* if (!table->file->inited)
3572
* table->file->ha_index_init(tab->index, tab->sorted);
3573
* if ((error=tab->table->file->index_first(tab->table->record[0])))
3575
if (active_index == MAX_KEY) {
3576
err = HA_ERR_WRONG_INDEX;
3580
#ifdef XT_TRACK_RETURNED_ROWS
3581
ha_start_scan(pb_open_tab, active_index);
3583
pb_ind_row_count = 0;
3585
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3587
xt_idx_prep_key(ind, &search_key, XT_SEARCH_FIRST_FLAG, NULL, 0);
3588
if (!xt_idx_search(pb_open_tab, ind, &search_key))
3589
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3591
err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3594
#ifdef XT_TRACK_RETURNED_ROWS
3596
ha_return_row(pb_open_tab, active_index);
3600
table->status = STATUS_NOT_FOUND;
3602
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3609
* index_last() asks for the last key in the index.
3611
int ha_pbxt::index_last(byte * buf)
3615
XTIdxSearchKeyRec search_key;
3618
//statistic_increment(ha_read_last_count,&LOCK_status);
3619
ASSERT_NS(pb_ex_in_use);
3621
if (active_index == MAX_KEY) {
3622
err = HA_ERR_WRONG_INDEX;
3626
#ifdef XT_TRACK_RETURNED_ROWS
3627
ha_start_scan(pb_open_tab, active_index);
3629
pb_ind_row_count = 0;
3631
ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3633
xt_idx_prep_key(ind, &search_key, XT_SEARCH_AFTER_LAST_FLAG, NULL, 0);
3634
if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3635
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3637
err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3640
#ifdef XT_TRACK_RETURNED_ROWS
3642
ha_return_row(pb_open_tab, active_index);
3646
table->status = STATUS_NOT_FOUND;
3648
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3655
* -----------------------------------------------------------------------
3656
* RAMDOM/SEQUENTIAL READ METHODS
3660
* rnd_init() is called when the system wants the storage engine to do a table
3662
* See the example in the introduction at the top of this file to see when
3663
* rnd_init() is called.
3665
* Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3666
* and sql_update.cc.
3668
int ha_pbxt::rnd_init(bool scan)
3671
XTThreadPtr thread = pb_open_tab->ot_thread;
3673
XT_PRINT1(thread, "rnd_init (%s)\n", pb_share->sh_table_path->ps_path);
3674
XT_DISABLED_TRACE(("seq scan tx=%d\n", (int) thread->st_xact_data->xd_start_xn_id));
3676
/* Call xt_tab_seq_exit() to make sure the resources used by the previous
3677
* scan are freed. In particular make sure cache page ref count is decremented.
3678
* This is needed as rnd_init() can be called mulitple times w/o matching calls
3679
* to rnd_end(). Our experience is that currently this is done in queries like:
3681
* SELECT t1.c1,t2.c1 FROM t1 LEFT JOIN t2 USING (c1);
3682
* UPDATE t1 LEFT JOIN t2 USING (c1) SET t1.c1 = t2.c1 WHERE t1.c1 = t2.c1;
3684
* when scanning inner tables. It is important to understand that in such case
3685
* multiple calls to rnd_init() are not semantically equal to a new query. For
3686
* example we cannot make row locks permanent as we do in rnd_end(), as
3687
* ha_pbxt::unlock_row still can be called.
3689
xt_tab_seq_exit(pb_open_tab);
3691
/* The number of columns required: */
3692
if (pb_open_tab->ot_is_modify) {
3693
pb_open_tab->ot_cols_req = table->read_set->MX_BIT_SIZE();
3694
/* {START-STAT-HACK} previously position of start statement hack,
3695
* previous comment to code below: */
3696
/* Start a statement based transaction as soon
3697
* as a read is done for a modify type statement!
3698
* Previously, this was done too late!
3702
pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3705
* in case of queries like SELECT COUNT(*) FROM t
3706
* table->read_set is empty. Otoh, ot_cols_req == 0 can be treated
3707
* as "all columns" by some internal code (see e.g. myxt_load_row),
3708
* which makes such queries very ineffective for the records with
3709
* extended part. Setting column count to 1 makes sure that the
3710
* extended part will not be acessed in most cases.
3713
if (pb_open_tab->ot_cols_req == 0)
3714
pb_open_tab->ot_cols_req = 1;
3717
ASSERT_NS(pb_ex_in_use);
3719
if (!xt_tab_seq_init(pb_open_tab))
3720
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3723
xt_tab_seq_reset(pb_open_tab);
3725
xt_xlog_check_long_writer(thread);
3730
int ha_pbxt::rnd_end()
3735
* make permanent the lock for the last scanned row
3737
XTThreadPtr thread = pb_open_tab->ot_thread;
3739
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3741
xt_xlog_check_long_writer(thread);
3743
xt_tab_seq_exit(pb_open_tab);
3748
* This is called for each row of the table scan. When you run out of records
3749
* you should return HA_ERR_END_OF_FILE. Fill buff up with the row information.
3750
* The Field structure for the table is the key to getting data into buf
3751
* in a manner that will allow the server to understand it.
3753
* Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3754
* and sql_update.cc.
3756
int ha_pbxt::rnd_next(byte *buf)
3762
ASSERT_NS(pb_ex_in_use);
3763
//statistic_increment(ha_read_rnd_next_count, &LOCK_status);
3764
xt_xlog_check_long_writer(pb_open_tab->ot_thread);
3766
if (!xt_tab_seq_next(pb_open_tab, (xtWord1 *) buf, &eof))
3767
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3769
err = HA_ERR_END_OF_FILE;
3772
table->status = STATUS_NOT_FOUND;
3774
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3781
* position() is called after each call to rnd_next() if the data needs
3782
* to be ordered. You can do something like the following to store
3784
* ha_store_ptr(ref, ref_length, current_position);
3786
* The server uses ref to store data. ref_length in the above case is
3787
* the size needed to store current_position. ref is just a byte array
3788
* that the server will maintain. If you are using offsets to mark rows, then
3789
* current_position should be the offset. If it is a primary key like in
3790
* BDB, then it needs to be a primary key.
3792
* Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
3794
void ha_pbxt::position(const byte *XT_UNUSED(record))
3797
ASSERT_NS(pb_ex_in_use);
3799
* I changed this from using little endian to big endian.
3801
* The reason is because sometime the pointer are sorted.
3802
* When they are are sorted a binary compare is used.
3803
* A binary compare sorts big endian values correctly!
3805
* Take the followin example:
3807
* create table t1 (a int, b text);
3808
* insert into t1 values (1, 'aa'), (1, 'bb'), (1, 'cc');
3809
* select group_concat(b) from t1 group by a;
3811
* With little endian pointers the result is:
3814
* With big-endian pointer the result is:
3818
(void) ASSERT_NS(XT_RECORD_OFFS_SIZE == 4);
3819
mi_int4store((xtWord1 *) ref, pb_open_tab->ot_curr_rec_id);
3824
* Given the #ROWID retrieve the record.
3826
* Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc.
3828
int ha_pbxt::rnd_pos(byte * buf, byte *pos)
3833
ASSERT_NS(pb_ex_in_use);
3834
//statistic_increment(ha_read_rnd_count, &LOCK_status);
3835
XT_PRINT1(pb_open_tab->ot_thread, "rnd_pos (%s)\n", pb_share->sh_table_path->ps_path);
3837
pb_open_tab->ot_curr_rec_id = mi_uint4korr((xtWord1 *) pos);
3838
switch (xt_tab_dirty_read_record(pb_open_tab, (xtWord1 *) buf)) {
3840
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3847
table->status = STATUS_NOT_FOUND;
3849
pb_open_tab->ot_thread->st_statistics.st_row_select++;
3856
* -----------------------------------------------------------------------
3861
::info() is used to return information to the optimizer.
3862
Currently this table handler doesn't implement most of the fields
3863
really needed. SHOW also makes use of this data
3864
Another note, you will probably want to have the following in your
3868
The reason is that the server will optimize for cases of only a single
3869
record. If in a table scan you don't know the number of records
3870
it will probably be better to set records to two so you can return
3871
as many records as you need.
3872
Along with records a few more variables you may wish to set are:
3879
Take a look at the public variables in handler.h for more information.
3903
#if MYSQL_VERSION_ID < 50114
3904
void ha_pbxt::info(uint flag)
3906
int ha_pbxt::info(uint flag)
3914
if (!(in_use = pb_ex_in_use)) {
3916
if (pb_share && pb_share->sh_table_lock) {
3917
/* If some thread has an exclusive lock, then
3918
* we wait for the lock to be removed:
3920
#if MYSQL_VERSION_ID < 50114
3921
ha_wait_for_shared_use(this, pb_share);
3924
if (!ha_wait_for_shared_use(this, pb_share))
3925
return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3930
if ((ot = pb_open_tab)) {
3931
if (flag & HA_STATUS_VARIABLE) {
3933
* Free row count is not reliable, so ignore it.
3934
* The problem is if tab_row_fnum > tab_row_eof_id - 1 then
3935
* we have a very bad result.
3937
* If stats.records+EXTRA_RECORDS == 0 as returned by
3938
* estimate_rows_upper_bound(), then filesort will crash here:
3940
* make_sortkey(param,sort_keys[idx++],ref_pos);
3942
* #0 0x000bf69c in Field_long::sort_string at field.cc:3766
3943
* #1 0x0022e1f1 in make_sortkey at filesort.cc:769
3944
* #2 0x0022f1cf in find_all_keys at filesort.cc:619
3945
* #3 0x00230eec in filesort at filesort.cc:243
3946
* #4 0x001b9d89 in mysql_update at sql_update.cc:415
3947
* #5 0x0010db12 in mysql_execute_command at sql_parse.cc:2959
3948
* #6 0x0011480d in mysql_parse at sql_parse.cc:5787
3949
* #7 0x00115afb in dispatch_command at sql_parse.cc:1200
3950
* #8 0x00116de2 in do_command at sql_parse.cc:857
3951
* #9 0x00101ee4 in handle_one_connection at sql_connect.cc:1115
3953
* The problem is that sort_keys is allocated to handle just 1 vector.
3954
* Sorting one vector crashes. Although I could not find a check for
3955
* the actual number of vectors. But it must assume that it has at
3956
* least EXTRA_RECORDS vectors.
3958
stats.deleted = /* ot->ot_table->tab_row_fnum */ 0;
3959
stats.records = (ha_rows) (ot->ot_table->tab_row_eof_id - 1 /* - stats.deleted */);
3960
stats.data_file_length = xt_rec_id_to_rec_offset(ot->ot_table, ot->ot_table->tab_rec_eof_id);
3961
stats.index_file_length = xt_ind_node_to_offset(ot->ot_table, ot->ot_table->tab_ind_eof);
3962
stats.delete_length = ot->ot_table->tab_rec_fnum * ot->ot_rec_size;
3963
//check_time = info.check_time;
3964
stats.mean_rec_length = (ulong) ot->ot_rec_size;
3967
if (flag & HA_STATUS_CONST) {
3968
ha_rows rec_per_key;
3970
TABLE_SHARE *share= TS(table);
3972
stats.max_data_file_length = 0x00FFFFFF;
3973
stats.max_index_file_length = 0x00FFFFFF;
3974
//stats.create_time = info.create_time;
3975
ref_length = XT_RECORD_OFFS_SIZE;
3976
//share->db_options_in_use = info.options;
3977
stats.block_size = XT_INDEX_PAGE_SIZE;
3980
if (share->tmp_table == message::Table::STANDARD)
3982
if (share->tmp_table == NO_TMP_TABLE)
3985
#define WHICH_MUTEX mutex
3986
#elif MYSQL_VERSION_ID >= 50404
3987
#define WHICH_MUTEX LOCK_ha_data
3989
if (share->tmp_table == NO_TMP_TABLE)
3990
#define WHICH_MUTEX mutex
3995
#if MYSQL_VERSION_ID < 50404
3996
#if MYSQL_VERSION_ID < 50123
3997
safe_mutex_lock(&share->mutex,__FILE__,__LINE__);
3999
safe_mutex_lock(&share->mutex,0,__FILE__,__LINE__);
4002
safe_mutex_lock(&share->WHICH_MUTEX,0,__FILE__,__LINE__);
4007
#ifdef MY_PTHREAD_FASTMUTEX
4008
my_pthread_fastmutex_lock(&share->WHICH_MUTEX);
4010
pthread_mutex_lock(&share->WHICH_MUTEX);
4013
#endif // SAFE_MUTEX
4015
set_prefix(share->keys_in_use, share->keys);
4016
share->keys_for_keyread&= share->keys_in_use;
4018
share->keys_in_use.set_prefix(share->keys);
4019
//share->keys_in_use.intersect_extended(info.key_map);
4020
share->keys_for_keyread.intersect(share->keys_in_use);
4021
//share->db_record_offset = info.record_offset;
4023
for (u_int i = 0; i < share->keys; i++) {
4024
ind = pb_share->sh_dic_keys[i];
4027
if (ind->mi_seg_count == 1 && (ind->mi_flags & HA_NOSAME))
4032
for (u_int j = 0; j < table->key_info[i].key_parts; j++)
4033
table->key_info[i].rec_per_key[j] = (ulong) rec_per_key;
4036
if (share->tmp_table == message::Table::STANDARD)
4038
if (share->tmp_table == NO_TMP_TABLE)
4041
safe_mutex_unlock(&share->WHICH_MUTEX,__FILE__,__LINE__);
4043
#ifdef MY_PTHREAD_FASTMUTEX
4044
pthread_mutex_unlock(&share->WHICH_MUTEX.mutex);
4046
pthread_mutex_unlock(&share->WHICH_MUTEX);
4050
Set data_file_name and index_file_name to point at the symlink value
4051
if table is symlinked (Ie; Real name is not same as generated name)
4054
data_file_name = index_file_name = 0;
4055
fn_format(name_buff, file->filename, "", MI_NAME_DEXT, 2);
4056
if (strcmp(name_buff, info.data_file_name))
4057
data_file_name = info.data_file_name;
4058
strmov(fn_ext(name_buff), MI_NAME_IEXT);
4059
if (strcmp(name_buff, info.index_file_name))
4060
index_file_name = info.index_file_name;
4064
if (flag & HA_STATUS_ERRKEY)
4065
errkey = ot->ot_err_index_no;
4068
* We assume they want the next value to be returned!
4070
* At least, this is what works for the following code:
4072
* create table t1 (a int auto_increment primary key)
4073
* auto_increment=100
4075
* partition by list (a)
4076
* (partition p0 values in (1, 98,99, 100, 101));
4077
* create index inx on t1 (a);
4078
* insert into t1 values (null);
4081
if (flag & HA_STATUS_AUTO)
4082
stats.auto_increment_value = (ulonglong) ot->ot_table->tab_auto_inc+1;
4090
/* Someone may be waiting for me to complete: */
4091
if (pb_share->sh_table_lock)
4092
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4095
#if MYSQL_VERSION_ID < 50114
4103
* extra() is called whenever the server wishes to send a hint to
4104
* the storage engine. The myisam engine implements the most hints.
4105
* ha_innodb.cc has the most exhaustive list of these hints.
4107
int ha_pbxt::extra(enum ha_extra_function operation)
4111
XT_PRINT2(xt_get_self(), "ha_pbxt::extra (%s) operation=%d\n", pb_share->sh_table_path->ps_path, operation);
4113
switch (operation) {
4114
case HA_EXTRA_RESET_STATE:
4115
pb_key_read = FALSE;
4116
pb_ignore_dup_key = 0;
4117
/* As far as I can tell, this function is called for
4118
* every table at the end of a statement.
4120
* So, during a LOCK TABLES ... UNLOCK TABLES, I use
4121
* this to find the end of a statement.
4122
* start_stmt() indicates the start of a statement,
4123
* and is also called once for each table in the
4126
* So the statement boundary is indicated by
4127
* self->st_stat_count == 0
4129
* GOTCHA: I cannot end the transaction here!
4130
* I must end it in start_stmt().
4131
* The reason is because there are situations
4132
* where this would end a transaction that
4133
* was begin by external_lock().
4135
* An example of this is when a function
4136
* is called when doing CREATE TABLE SELECT.
4139
/* NOTE: pb_in_stat is just used to avoid getting
4140
* self, if it is not necessary!!
4146
if (!(self = ha_set_current_thread(pb_mysql_thd, &err)))
4147
return xt_ha_pbxt_to_mysql_error(err);
4149
if (self->st_stat_count > 0) {
4150
self->st_stat_count--;
4151
if (self->st_stat_count == 0)
4152
self->st_stat_ended = TRUE;
4155
/* This is the end of a statement, I can turn any locks into perminant locks now: */
4157
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4160
pb_open_tab->ot_for_update = 0;
4162
case HA_EXTRA_KEYREAD:
4163
/* This means we so not need to read the entire record. */
4166
case HA_EXTRA_NO_KEYREAD:
4167
pb_key_read = FALSE;
4169
case HA_EXTRA_IGNORE_DUP_KEY:
4170
/* NOTE!!! Calls to extra(HA_EXTRA_IGNORE_DUP_KEY) can be nested!
4171
* In fact, the calls are from different threads, so
4172
* strictly speaking I should protect this variable!!
4173
* Here is the sequence that produces the duplicate call:
4175
* drop table if exists t1;
4176
* CREATE TABLE t1 (x int not null, y int, primary key (x)) engine=pbxt;
4177
* insert into t1 values (1, 3), (4, 1);
4178
* replace DELAYED into t1 (x, y) VALUES (4, 2);
4179
* select * from t1 order by x;
4182
pb_ignore_dup_key++;
4184
case HA_EXTRA_NO_IGNORE_DUP_KEY:
4185
pb_ignore_dup_key--;
4187
case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
4188
/* MySQL needs all fields */
4189
pb_key_read = FALSE;
4200
* Deprecated and likely to be removed in the future. Storage engines normally
4201
* just make a call like:
4202
* ha_pbxt::extra(HA_EXTRA_RESET);
4205
int ha_pbxt::reset(void)
4208
extra(HA_EXTRA_RESET_STATE);
4212
void ha_pbxt::unlock_row()
4216
pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, FALSE);
4220
* Used to delete all rows in a table. Both for cases of truncate and
4221
* for cases where the optimizer realizes that all rows will be
4222
* removed as a result of a SQL statement.
4224
* Called from item_sum.cc by Item_func_group_concat::clear(),
4225
* Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
4226
* Called from sql_delete.cc by mysql_delete().
4227
* Called from sql_select.cc by JOIN::reinit().
4228
* Called from sql_union.cc by st_select_lex_unit::exec().
4230
int ha_pbxt::delete_all_rows()
4232
THD *thd = current_thd;
4235
XTDDTable *tab_def = NULL;
4236
char path[PATH_MAX];
4240
if (thd_sql_command(thd) != SQLCOM_TRUNCATE) {
4241
/* Just like InnoDB we only handle TRUNCATE TABLE
4242
* by recreating the table.
4243
* DELETE FROM t must be handled by deleting
4244
* each row because it may be part of a transaction,
4245
* and there may be foreign key actions.
4248
XT_RETURN (errno = HA_ERR_WRONG_COMMAND);
4250
XT_RETURN (my_errno = HA_ERR_WRONG_COMMAND);
4254
if (!(self = ha_set_current_thread(thd, &err)))
4255
return xt_ha_pbxt_to_mysql_error(err);
4258
XTDictionaryRec dic;
4260
memset(&dic, 0, sizeof(dic));
4262
dic = pb_share->sh_table->tab_dic;
4263
xt_strcpy(PATH_MAX, path, pb_share->sh_table->tab_name->ps_path);
4265
if ((tab_def = dic.dic_table))
4266
tab_def->reference();
4268
if (!(thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)))
4269
tab_def->deleteAllRows(self);
4271
/* We should have a table lock! */
4272
//ASSERT(pb_lock_table);
4273
if (!pb_table_locked) {
4274
ha_aquire_exclusive_use(self, pb_share, this);
4275
pushr_(ha_release_exclusive_use, pb_share);
4277
ha_close_open_tables(self, pb_share, NULL);
4279
/* This is required in the case of delete_all_rows, because we must
4280
* ensure that the handlers no longer reference the old
4281
* table, so that it will not be used again. The table
4282
* must be re-openned, because the ID has changed!
4284
* 0.9.86+ Must check if this is still necessary.
4286
* the ha_close_share(self, pb_share) call was moved from above
4287
* (before tab_def = dic.dic_table), because of a crash.
4290
* set storage_engine = pbxt;
4291
* create table t1 (s1 int primary key);
4292
* insert into t1 values (1);
4293
* create table t2 (s1 int, foreign key (s1) references t1 (s1));
4294
* insert into t2 values (1);
4295
* truncate table t1; -- this should fail because of FK constraint
4296
* alter table t1 engine = myisam; -- this caused crash
4299
ha_close_share(self, pb_share);
4301
/* MySQL documentation requires us to reset auto increment value to 1
4302
* on truncate even if the table was created with a different value.
4303
* This is also consistent with other engines.
4305
dic.dic_min_auto_inc = 1;
4307
xt_create_table(self, (XTPathStrPtr) path, &dic);
4308
if (!pb_table_locked)
4309
freer_(); // ha_release_exclusive_use(pb_share)
4312
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4317
tab_def->release(self);
4324
* Assuming a key (a,b,c)
4326
* rec_per_key[0] = SELECT COUNT(*)/COUNT(DISTINCT a) FROM t;
4327
* rec_per_key[1] = SELECT COUNT(*)/COUNT(DISTINCT a,b) FROM t;
4328
* rec_per_key[2] = SELECT COUNT(*)/COUNT(DISTINCT a,b,c) FROM t;
4330
* After this is implemented, the selectivity can serve as
4331
* a quick estimate of records_in_range().
4333
* After you have done this, you need to redo the index_merge*
4334
* tests. Restore the standard result to check if we
4335
* now agree with the MyISAM strategy.
4338
int ha_pbxt::analyze(THD *thd, HA_CHECK_OPT *XT_UNUSED(check_opt))
4343
xtXactID clean_xn_id = 0;
4349
if ((err = reopen()))
4353
/* Wait until the sweeper is no longer busy!
4354
* If you want an accurate count(*) value, then call
4355
* ANALYZE TABLE first. This function waits until the
4356
* sweeper has completed.
4358
db = pb_open_tab->ot_table->tab_db;
4361
* Wait until everything is cleaned up before this transaction.
4362
* But this will only work if the we quit out transaction!
4364
* GOTCHA: When a PBXT table is partitioned, then analyze() is
4365
* called for each component. The first calls xt_xn_commit().
4366
* All following calls have no transaction!:
4368
* CREATE TABLE t1 (a int)
4369
* PARTITION BY LIST (a)
4370
* (PARTITION x1 VALUES IN (10), PARTITION x2 VALUES IN (20));
4375
if (pb_open_tab->ot_thread && pb_open_tab->ot_thread->st_xact_data) {
4376
my_xn_id = pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id;
4377
XT_PRINT0(xt_get_self(), "xt_xn_commit\n");
4378
xt_xn_commit(pb_open_tab->ot_thread);
4381
my_xn_id = db->db_xn_to_clean_id;
4383
while ((!db->db_sw_idle || xt_xn_is_before(db->db_xn_to_clean_id, my_xn_id)) && !thd_killed(thd)) {
4387
* It is possible that the sweeper gets stuck because
4388
* it has no dictionary information!
4389
* As in the example below.
4392
* pk_col int auto_increment primary key, a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(64) default ' '
4395
* insert into t4 (a1, a2, b, c, d, dummy) select * from t1;
4397
* create index idx12672_0 on t4 (a1);
4398
* create index idx12672_1 on t4 (a1,a2,b,c);
4399
* create index idx12672_2 on t4 (a1,a2,b);
4402
if (db->db_sw_idle) {
4403
/* This will make sure we don't wait forever: */
4404
if (clean_xn_id != db->db_xn_to_clean_id) {
4405
clean_xn_id = db->db_xn_to_clean_id;
4413
xt_wakeup_sweeper(db);
4421
int ha_pbxt::repair(THD *XT_UNUSED(thd), HA_CHECK_OPT *XT_UNUSED(check_opt))
4423
return(HA_ADMIN_TRY_ALTER);
4427
* This is mapped to "ALTER TABLE tablename TYPE=PBXT", which rebuilds
4428
* the table in MySQL.
4430
int ha_pbxt::optimize(THD *XT_UNUSED(thd), HA_CHECK_OPT *XT_UNUSED(check_opt))
4432
return(HA_ADMIN_TRY_ALTER);
4437
extern int pbxt_mysql_trace_on;
4440
int ha_pbxt::check(THD* thd, HA_CHECK_OPT* XT_UNUSED(check_opt))
4445
if (!(self = ha_set_current_thread(thd, &err)))
4446
return xt_ha_pbxt_to_mysql_error(err);
4447
if (self->st_lock_count)
4448
ASSERT(self->st_xact_data);
4450
if (!pb_table_locked) {
4451
ha_aquire_exclusive_use(self, pb_share, this);
4452
pushr_(ha_release_exclusive_use, pb_share);
4455
#ifdef CHECK_TABLE_LOADS
4456
xt_tab_load_table(self, pb_open_tab);
4458
xt_check_table(self, pb_open_tab);
4460
if (!pb_table_locked)
4461
freer_(); // ha_release_exclusive_use(pb_share)
4463
//pbxt_mysql_trace_on = TRUE;
4468
* This function is called:
4469
* For each table in LOCK TABLES,
4471
* For each table in a statement.
4473
* It is called with F_UNLCK:
4476
* at the end of a statement.
4479
xtPublic int ha_pbxt::external_lock(THD *thd, int lock_type)
4484
if (!(self = ha_set_current_thread(thd, &err)))
4485
return xt_ha_pbxt_to_mysql_error(err);
4487
/* F_UNLCK is set when this function is called at end
4488
* of statement or UNLOCK TABLES
4490
if (lock_type == F_UNLCK) {
4491
/* This is not TRUE if external_lock() FAILED!
4492
* Can we rely on external_unlock being called when
4493
* external_lock() fails? Currently yes, but it does
4495
ASSERT_NS(pb_ex_in_use);
4498
XT_PRINT1(self, "EXTERNAL_LOCK (%s) lock_type=UNLOCK\n", pb_share->sh_table_path->ps_path);
4500
/* Make any temporary locks on this table permanent.
4502
* This is required here because of the following example:
4503
* create table t1 (a int NOT NULL, b int, primary key (a));
4504
* create table t2 (a int NOT NULL, b int, primary key (a));
4505
* insert into t1 values (0, 10),(1, 11),(2, 12);
4506
* insert into t2 values (1, 21),(2, 22),(3, 23);
4507
* update t1 set b= (select b from t2 where t1.a = t2.a);
4508
* update t1 set b= (select b from t2 where t1.a = t2.a);
4510
* drop table t1, t2;
4514
/* GOTCHA! It's weird, but, if this function returns an error
4515
* on lock, then UNLOCK is called?!
4516
* This should not be done, because if lock fails, it should be
4517
* assumed that no UNLOCK is required.
4518
* Basically, I have to assume that some code will presume this,
4519
* although the function lock_external() calls unlock, even
4521
* The result is, that my lock count can go wrong. So I could
4522
* change the lock method, and increment the lock count, even
4523
* if it fails. However, the consequences are more serious,
4524
* if some code decides not to call UNLOCK after lock fails.
4525
* The result is that I would have a permanent too high lock,
4526
* count and nothing will work.
4527
* So instead, I handle the fact that I might too many unlocks
4530
if (self->st_lock_count > 0)
4531
self->st_lock_count--;
4532
if (!self->st_lock_count) {
4533
/* This section handles "auto-commit"... */
4535
#ifdef XT_IMPLEMENT_NO_ACTION
4537
* This is required here because it marks the end of a statement.
4538
* If we are in a non-auto-commit mode, then we cannot
4539
* wait for st_is_update to be set by the begining of a new transaction.
4541
if (self->st_restrict_list.bl_count) {
4542
if (!xt_tab_restrict_rows(&self->st_restrict_list, self))
4543
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4547
if (self->st_xact_data) {
4548
if (self->st_auto_commit) {
4550
* Normally I could assume that if the transaction
4551
* has not been aborted by now, then it should be committed.
4553
* Unfortunately, this is not the case!
4555
* create table t1 (id int primary key) engine = pbxt;
4556
* create table t2 (id int) engine = pbxt;
4558
* insert into t1 values ( 1 ) ;
4559
* insert into t1 values ( 2 ) ;
4560
* insert into t2 values ( 1 ) ;
4561
* insert into t2 values ( 2 ) ;
4563
* --This statement is returns an error calls ha_autocommit_or_rollback():
4564
* update t1 set t1.id=1 where t1.id=2;
4566
* --This statement is returns no error and calls ha_autocommit_or_rollback():
4567
* update t1,t2 set t1.id=3, t2.id=3 where t1.id=2 and t2.id = t1.id;
4569
* --But this statement returns an error and does not call ha_autocommit_or_rollback():
4570
* update t1,t2 set t1.id=1, t2.id=1 where t1.id=3 and t2.id = t1.id;
4572
* The result is, I cannot rely on ha_autocommit_or_rollback() being called :(
4573
* So I have to abort myself here...
4576
pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4578
if (self->st_abort_trans) {
4579
XT_PRINT0(self, "xt_xn_rollback in unlock\n");
4580
if (!xt_xn_rollback(self))
4581
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4584
XT_PRINT0(self, "xt_xn_commit in unlock\n");
4585
if (!xt_xn_commit(self))
4586
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4591
/* If the previous statement was "for update", then set the visibilty
4592
* so that non- for update SELECTs will see what the for update select
4593
* (or update statement) just saw.
4596
if (pb_open_tab->ot_for_update) {
4597
self->st_visible_time = self->st_database->db_xn_end_time;
4598
pb_open_tab->ot_for_update = 0;
4601
if (pb_share->sh_recalc_selectivity) {
4602
/* {FREE-ROWS-BAD} */
4603
if ((pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) >= 200) {
4605
pb_share->sh_recalc_selectivity = FALSE;
4606
xt_ind_set_index_selectivity(pb_open_tab, self);
4607
/* {FREE-ROWS-BAD} */
4608
pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
4613
if (self->st_stat_modify)
4614
self->st_statistics.st_stat_write++;
4616
self->st_statistics.st_stat_read++;
4617
self->st_stat_modify = FALSE;
4618
self->st_import_stat = XT_IMP_NO_IMPORT;
4620
/* Only reset this if there is no transactions running, and
4621
* no tables are open!
4623
if (!self->st_xact_data)
4624
self->st_non_temp_opened = FALSE;
4627
if (pb_table_locked) {
4629
if (!pb_table_locked)
4630
ha_release_exclusive_use(self, pb_share);
4633
/* No longer in use: */
4635
/* Someone may be waiting for me to complete: */
4636
if (pb_share->sh_table_lock)
4637
xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4640
XT_PRINT2(self, "ha_pbxt::EXTERNAL_LOCK (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, lock_type);
4642
if (pb_lock_table) {
4645
if (!pb_table_locked)
4646
ha_aquire_exclusive_use(self, pb_share, this);
4649
ha_close_open_tables(self, pb_share, this);
4651
if (!pb_share->sh_table) {
4652
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4654
ha_open_share(self, pb_share);
4658
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4666
if (pb_share->sh_table_lock && !pb_table_locked) {
4667
/* If some thread has an exclusive lock, then
4668
* we wait for the lock to be removed:
4670
if (!ha_wait_for_shared_use(this, pb_share)) {
4671
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4677
if ((err = reopen())) {
4683
/* Set the current thread for this open table: */
4684
pb_open_tab->ot_thread = self;
4686
/* If this is a set, then it is in UPDATE/DELETE TABLE ...
4687
* or SELECT ... FOR UPDATE
4689
pb_open_tab->ot_is_modify = FALSE;
4690
if ((pb_open_tab->ot_for_update = (lock_type == F_WRLCK))) {
4691
switch ((int) thd_sql_command(thd)) {
4694
case SQLCOM_DELETE_MULTI:
4696
/* turn DELETE IGNORE into normal DELETE. The IGNORE option causes problems because
4697
* when a record is deleted we add an xlog record which we cannot "rollback" later
4698
* when we find that an FK-constraint has failed.
4700
thd->lex->ignore = false;
4703
case SQLCOM_UPDATE_MULTI:
4705
case SQLCOM_REPLACE:
4706
case SQLCOM_REPLACE_SELECT:
4708
case SQLCOM_INSERT_SELECT:
4709
pb_open_tab->ot_is_modify = TRUE;
4710
self->st_stat_modify = TRUE;
4712
case SQLCOM_ALTER_TABLE:
4713
case SQLCOM_CREATE_INDEX:
4716
case SQLCOM_OPTIMIZE:
4718
case SQLCOM_DROP_INDEX:
4719
self->st_stat_modify = TRUE;
4720
self->st_import_stat = XT_IMP_COPY_TABLE;
4721
pb_import_row_count = 0;
4722
/* Do not read FOR UPDATE!
4723
* this avoids taking locks on the rows that are read
4724
* Which leads to the assertion failure:
4725
* int XTRowLocks::xt_make_lock_permanent(XTOpenTable*, XTRowLockList*)(lock_xt.cc:646) item
4726
* after the transaction is committed in write_row.
4728
pb_open_tab->ot_for_update = FALSE;
4731
self->st_stat_modify = TRUE;
4732
self->st_import_stat = XT_IMP_LOAD_TABLE;
4733
pb_import_row_count = 0;
4734
pb_open_tab->ot_for_update = FALSE;
4736
case SQLCOM_CREATE_TABLE:
4737
case SQLCOM_TRUNCATE:
4738
case SQLCOM_DROP_TABLE:
4739
self->st_stat_modify = TRUE;
4744
if (pb_open_tab->ot_is_modify && pb_open_tab->ot_table->tab_dic.dic_disable_index) {
4745
xt_tab_set_index_error(pb_open_tab->ot_table);
4746
err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4751
/* Record the associated MySQL thread: */
4754
if (self->st_database != pb_share->sh_table->tab_db) {
4756
/* PBXT does not permit multiple databases us one statement,
4757
* or in a single transaction!
4761
* update mysqltest_1.t1, mysqltest_2.t2 set a=10,d=10;
4763
if (self->st_lock_count > 0)
4764
xt_throw_xterr(XT_CONTEXT, XT_ERR_MULTIPLE_DATABASES);
4766
xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4769
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4776
/* See {IS-UPDATE-STAT} nad {UPDATE-STACK} */
4777
self->st_is_update = NULL;
4779
/* Auto begin a transaction (if one is not already running): */
4780
if (!self->st_xact_data) {
4781
/* Transaction mode numbers must be identical! */
4782
(void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
4783
(void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
4785
thd_init_xact(thd, self, true);
4787
if (!xt_xn_begin(self)) {
4788
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4793
* {START-TRANS} GOTCHA: trans_register_ha() is not mentioned in the documentation.
4794
* It must be called to inform MySQL that we have a transaction (see start_stmt).
4796
* Here are some tests that confirm whether things are done correctly:
4798
* drop table if exists t1, t2;
4799
* create table t1 (c1 int);
4800
* insert t1 values (1);
4802
* rename table t1 to t2;
4804
* rename will generate an error if MySQL thinks a transaction is
4807
* create table t1 (a text character set utf8, b text character set latin1);
4808
* insert t1 values (0x4F736E616272C3BC636B, 0x4BF66C6E);
4810
* --exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/ test
4811
* --exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/t1.sql
4812
* --exec $MYSQL_IMPORT test $MYSQLTEST_VARDIR/tmp/t1.txt
4815
* This test forces a begin transaction in start_stmt()
4817
* drop tables if exists t1;
4818
* create table t1 (c1 int);
4819
* lock tables t1 write;
4820
* insert t1 values (1);
4821
* insert t1 values (2);
4824
* The second select will return an empty result of the
4825
* MySQL is not informed that a transaction is running (auto-commit
4826
* in external_lock comes too late)!
4830
if (!self->st_auto_commit) {
4831
trans_register_ha(thd, TRUE, pbxt_hton);
4832
XT_PRINT0(self, "CONN START XACT - ha_pbxt::external_lock --> trans_register_ha\n");
4837
/* Any open table can cause this to be FALSE: */
4838
if (!XT_IS_TEMP_TABLE(pb_open_tab->ot_table->tab_dic.dic_tab_flags))
4839
self->st_non_temp_opened = TRUE;
4841
/* Start a statment transaction: */
4842
/* {START-STAT-HACK} The problem that ha_commit_trans() is not
4843
* called by MySQL seems to be fixed (tests confirm this).
4844
* Here is the previous comment when this code was execute
4845
* here {START-STAT-HACK}
4847
* GOTCHA: I have a huge problem with the transaction statement.
4848
* It is not ALWAYS committed (I mean ha_commit_trans() is
4849
* not always called - for example in SELECT).
4851
* If I call trans_register_ha() but ha_commit_trans() is not called
4852
* then MySQL thinks a transaction is still running (while
4853
* I have committed the auto-transaction in ha_pbxt::external_lock()).
4855
* This causes all kinds of problems, like transactions
4856
* are killed when they should not be.
4858
* To prevent this, I only inform MySQL that a transaction
4859
* has beens started when an update is performed. I have determined that
4860
* ha_commit_trans() is only guarenteed to be called if an update is done.
4863
* So, this is the correct place to start a statement transaction.
4865
* Note: if trans_register_ha() is not called before ha_write_row(), then
4866
* PBXT is not registered correctly as a modification transaction.
4867
* (mark_trx_read_write call in ha_write_row).
4868
* This leads to 2-phase commit not being called as it should when
4869
* binary logging is enabled.
4872
if (!pb_open_tab->ot_thread->st_stat_trans) {
4873
trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
4874
XT_PRINT0(pb_open_tab->ot_thread, "STAT START - ha_pbxt::external_lock --> trans_register_ha\n");
4875
pb_open_tab->ot_thread->st_stat_trans = TRUE;
4878
if (lock_type == F_WRLCK || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
4879
self->st_visible_time = self->st_database->db_xn_end_time;
4881
#ifdef TRACE_STATEMENTS
4882
if (self->st_lock_count == 0)
4883
STAT_TRACE(self, *thd_query(thd));
4885
self->st_lock_count++;
4893
* This function is called for each table in a statement
4894
* after LOCK TABLES has been used.
4896
* Currently I only use this function to set the
4897
* current thread of the table handle.
4899
* GOTCHA: The prototype of start_stmt() has changed
4900
* from version 4.1 to 5.1!
4902
int ha_pbxt::start_stmt(THD *thd, thr_lock_type lock_type)
4907
ASSERT_NS(pb_ex_in_use);
4909
if (!(self = ha_set_current_thread(thd, &err)))
4910
return xt_ha_pbxt_to_mysql_error(err);
4912
XT_PRINT2(self, "ha_pbxt::start_stmt (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, (int) lock_type);
4915
if ((err = reopen()))
4919
ASSERT_NS(pb_open_tab->ot_thread == self);
4920
ASSERT_NS(thd == pb_mysql_thd);
4921
ASSERT_NS(self->st_database == pb_open_tab->ot_table->tab_db);
4923
if (self->st_stat_ended) {
4924
self->st_stat_ended = FALSE;
4925
self->st_stat_trans = FALSE;
4927
#ifdef XT_IMPLEMENT_NO_ACTION
4928
if (self->st_restrict_list.bl_count) {
4929
if (!xt_tab_restrict_rows(&self->st_restrict_list, self)) {
4930
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4935
/* This section handles "auto-commit"... */
4936
if (self->st_xact_data && self->st_auto_commit && self->st_table_trans) {
4937
if (self->st_abort_trans) {
4938
XT_PRINT0(self, "xt_xn_rollback in start_stmt\n");
4939
if (!xt_xn_rollback(self))
4940
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4943
XT_PRINT0(self, "xt_xn_commit in start_stmt\n");
4944
if (!xt_xn_commit(self))
4945
err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4949
if (self->st_stat_modify)
4950
self->st_statistics.st_stat_write++;
4952
self->st_statistics.st_stat_read++;
4953
self->st_stat_modify = FALSE;
4954
self->st_import_stat = XT_IMP_NO_IMPORT;
4956
/* If the previous statement was "for update", then set the visibilty
4957
* so that non- for update SELECTs will see what the for update select
4958
* (or update statement) just saw.
4960
if (pb_open_tab->ot_for_update)
4961
self->st_visible_time = self->st_database->db_xn_end_time;
4964
pb_open_tab->ot_for_update =
4965
(lock_type != TL_READ &&
4966
lock_type != TL_READ_WITH_SHARED_LOCKS &&
4968
lock_type != TL_READ_HIGH_PRIORITY &&
4970
lock_type != TL_READ_NO_INSERT);
4971
pb_open_tab->ot_is_modify = FALSE;
4972
if (pb_open_tab->ot_for_update) {
4973
switch ((int) thd_sql_command(thd)) {
4977
case SQLCOM_UPDATE_MULTI:
4978
case SQLCOM_DELETE_MULTI:
4980
case SQLCOM_REPLACE:
4981
case SQLCOM_REPLACE_SELECT:
4983
case SQLCOM_INSERT_SELECT:
4984
pb_open_tab->ot_is_modify = TRUE;
4985
self->st_stat_modify = TRUE;
4987
case SQLCOM_CREATE_TABLE:
4988
case SQLCOM_CREATE_INDEX:
4989
case SQLCOM_ALTER_TABLE:
4990
case SQLCOM_TRUNCATE:
4991
case SQLCOM_DROP_TABLE:
4992
case SQLCOM_DROP_INDEX:
4996
case SQLCOM_OPTIMIZE:
4997
self->st_stat_modify = TRUE;
5003
/* {IS-UPDATE-STAT} This is required at this level!
5004
* No matter how often it is called, it is still the start of a
5005
* statement. We need to make sure statements that are NOT mistaken
5006
* for different type of statement.
5008
* Here is an example:
5009
* select * from t1 where data = getcount("bar")
5011
* If the procedure getcount() addresses another table.
5012
* then open and close of the statements in getcount()
5013
* are nested within an open close of the select t1
5017
* Add to this I add the following:
5018
* A trigger in the middle of an update also causes nested
5019
* statements. If I reset st_is_update, then then
5020
* when the trigger returns the system thinks we
5021
* are in a different update statement, and may
5022
* update the same row again.
5024
if (self->st_is_update == pb_open_tab) {
5025
/* Pop the update stack: */
5026
XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
5028
pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
5029
curr->ot_prev_update = NULL;
5032
/* See comment {START-TRANS} */
5033
if (!self->st_xact_data) {
5035
thd_init_xact(thd, self, false);
5037
if (!xt_xn_begin(self)) {
5038
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5042
if (!self->st_auto_commit) {
5043
trans_register_ha(thd, TRUE, pbxt_hton);
5044
XT_PRINT0(self, "START CONN XACT - ha_pbxt::start_stmt --> trans_register_ha\n");
5049
/* Start a statment (see {START-STAT-HACK}): */
5051
if (!pb_open_tab->ot_thread->st_stat_trans) {
5052
trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
5053
XT_PRINT0(pb_open_tab->ot_thread, "START STAT - ha_pbxt::start_stmt --> trans_register_ha\n");
5054
pb_open_tab->ot_thread->st_stat_trans = TRUE;
5057
if (pb_open_tab->ot_for_update || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
5058
self->st_visible_time = self->st_database->db_xn_end_time;
5062
self->st_stat_count++;
5069
* The idea with handler::store_lock() is the following:
5071
* The statement decided which locks we should need for the table
5072
* for updates/deletes/inserts we get WRITE locks, for SELECT... we get
5075
* Before adding the lock into the table lock handler (see thr_lock.c)
5076
* mysqld calls store lock with the requested locks. Store lock can now
5077
* modify a write lock to a read lock (or some other lock), ignore the
5078
* lock (if we don't want to use MySQL table locks at all) or add locks
5079
* for many tables (like we do when we are using a MERGE handler).
5081
* When releasing locks, store_lock() are also called. In this case one
5082
* usually doesn't have to do anything.
5084
* In some exceptional cases MySQL may send a request for a TL_IGNORE;
5085
* This means that we are requesting the same lock as last time and this
5086
* should also be ignored. (This may happen when someone does a flush
5087
* table when we have opened a part of the tables, in which case mysqld
5088
* closes and reopens the tables and tries to get the same locks at last
5089
* time). In the future we will probably try to remove this.
5091
* Called from lock.cc by get_lock_data().
5093
THR_LOCK_DATA **ha_pbxt::store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type)
5096
* TL_READ means concurrent INSERTs are allowed. This is a problem as in this mode
5097
* PBXT is not compatible with MyISAM which allows INSERTs but isolates them from
5098
* current "transaction" (started by LOCK TABLES, ended by UNLOCK TABLES). PBXT
5099
* used to allow INSERTs and made them visible to the locker (on commit).
5100
* While MySQL manual doesn't state anything regarding row visibility limitations
5101
* we choose to convert local locks into normal read locks for better compatibility
5104
if (lock_type == TL_READ)
5105
lock_type = TL_READ_NO_INSERT;
5107
if (lock_type != TL_IGNORE && pb_lock.type == TL_UNLOCK) {
5108
/* Set to TRUE for operations that require a table lock: */
5109
switch (thd_sql_command(thd)) {
5110
case SQLCOM_TRUNCATE:
5112
* The problem is, if I do not do this, then
5113
* TRUNCATE TABLE deadlocks with a normal update of the table!
5116
* external_lock() is called before MySQL actually locks the
5117
* table. In external_lock(), the table is shared locked,
5118
* by indicating that the handler is in use.
5120
* Then later, in delete_all_rows(), a exclusive lock must be
5121
* obtained. If an UPDATE or INSERT has also gained a shared
5122
* lock in the meantime, then TRUNCATE TABLE hangs.
5124
* By setting pb_lock_table we indicate that an exclusive lock
5125
* should be gained in external_lock().
5127
* This is the locking behaviour:
5130
* XT SHARE LOCK (mysql_lock_tables calls external_lock)
5131
* MySQL WRITE LOCK (mysql_lock_tables)
5133
* XT EXCLUSIVE LOCK (delete_all_rows)
5136
* XT SHARED LOCK (mysql_lock_tables calls external_lock)
5137
* MySQL WRITE_ALLOW_WRITE LOCK (mysql_lock_tables)
5139
* If the locking for INSERT is done in the ... phase
5140
* above, then we have a deadlock because
5141
* WRITE_ALLOW_WRITE conflicts with WRITE.
5143
* Making TRUNCATE TABLE take a WRITE_ALLOW_WRITE LOCK, will
5144
* not solve the problem because then 2 TRUNCATE TABLES
5145
* can deadlock due to lock escalation.
5147
* What may work is if MySQL were to lock BEFORE calling
5150
* However, using this method, TRUNCATE TABLE does deadlock
5151
* with other operations such as ALTER TABLE!
5153
* This is handled with a lock timeout. Assuming
5154
* TRUNCATE TABLE will be mixed with DML this is the
5157
pb_lock_table = TRUE;
5160
pb_lock_table = FALSE;
5164
#ifdef PBXT_HANDLER_TRACE
5165
pb_lock.type = lock_type;
5167
/* GOTCHA: Before it was OK to weaken the lock after just checking
5168
* that !thd->in_lock_tables. However, when starting a procedure, MySQL
5169
* simulates a LOCK TABLES statement.
5171
* So we need to be more specific here, and check what the actual statement
5172
* type. Before doing this I got a deadlock (undetected) on the following test.
5173
* However, now we get a failed assertion in ha_rollback_trans():
5174
* TODO: Check this with InnoDB!
5177
* my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
5179
* drop table if exists t3;
5180
* create table t3 (a smallint primary key) engine=pbxt;
5181
* insert into t3 (a) values (40);
5182
* insert into t3 (a) values (50);
5186
* drop function if exists t3_update|
5188
* create function t3_update() returns int
5190
* insert into t3 values (10);
5199
* update t3 set a = 5 where a = 50;
5204
* update t3 set a = 4 where a = 40;
5208
* update t3 set a = 4 where a = 40; // Hangs waiting CONN 2.
5212
* select t3_update(); // Hangs waiting for table lock.
5215
if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && lock_type <= TL_WRITE) &&
5217
!(thd_in_lock_tables(thd) && thd_sql_command(thd) == SQLCOM_LOCK_TABLES) &&
5219
!thd_tablespace_op(thd) &&
5220
thd_sql_command(thd) != SQLCOM_TRUNCATE &&
5222
thd_sql_command(thd) != SQLCOM_OPTIMIZE &&
5224
thd_sql_command(thd) != SQLCOM_CREATE_TABLE) {
5225
lock_type = TL_WRITE_ALLOW_WRITE;
5228
/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
5229
* MySQL would use the lock TL_READ_NO_INSERT on t2, and that
5230
* would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
5231
* to t2. Convert the lock to a normal read lock to allow
5232
* concurrent inserts to t2.
5234
* (This one from InnoDB)
5236
* Stewart: removed SQLCOM_CALL, not sure of implications.
5238
if (lock_type == TL_READ_NO_INSERT
5240
&& (!thd_in_lock_tables(thd)
5241
|| thd_sql_command(thd) == SQLCOM_CALL
5246
lock_type = TL_READ;
5249
XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d\n", pb_share->sh_table_path->ps_path, pb_lock.type, lock_type);
5250
pb_lock.type = lock_type;
5252
#ifdef PBXT_HANDLER_TRACE
5254
XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d (ignore/unlock)\n", pb_share->sh_table_path->ps_path, lock_type, lock_type);
5262
* Used to delete a table. By the time delete_table() has been called all
5263
* opened references to this table will have been closed (and your globally
5264
* shared references released. The variable name will just be the name of
5265
* the table. You will need to remove any files you have created at this point.
5267
* Called from handler.cc by delete_table and ha_create_table(). Only used
5268
* during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
5269
* the storage engine.
5272
int PBXTStorageEngine::doDropTable(Session &, TableIdentifier& ident)
5274
const std::string& path = ident.getPath();
5275
const char *table_path = path.c_str();
5277
int ha_pbxt::delete_table(const char *table_path)
5280
THD *thd = current_thd;
5282
XTThreadPtr self = NULL;
5285
STAT_TRACE(self, *thd_query(thd));
5286
XT_PRINT1(self, "delete_table (%s)\n", table_path);
5288
if (XTSystemTableShare::isSystemTable(table_path))
5289
return delete_system_table(table_path);
5291
if (!(self = ha_set_current_thread(thd, &err)))
5292
return xt_ha_pbxt_to_mysql_error(err);
5294
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5297
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5299
ASSERT(xt_get_self() == self);
5301
/* NOTE: MySQL does not drop a table by first locking it!
5302
* We also cannot use pb_share because the handler used
5303
* to delete a table is not openned correctly.
5305
share = ha_get_share(self, table_path, false);
5306
pushr_(ha_unget_share, share);
5307
ha_aquire_exclusive_use(self, share, NULL);
5308
pushr_(ha_release_exclusive_use, share);
5309
ha_close_open_tables(self, share, NULL);
5311
xt_drop_table(self, (XTPathStrPtr) table_path, thd_sql_command(thd) == SQLCOM_DROP_DB);
5313
freer_(); // ha_release_exclusive_use(share)
5314
freer_(); // ha_unget_share(share)
5317
/* In MySQL if the table does not exist, just log the error and continue. This is
5318
* needed to delete table in the case when CREATE TABLE fails and no PBXT disk
5319
* structures were created.
5320
* Drizzle unlike MySQL iterates over all handlers and tries to delete table. It
5321
* stops after when a handler returns TRUE, so in Drizzle we need to report error.
5324
if (self->t_exception.e_xt_err == XT_ERR_TABLE_NOT_FOUND)
5325
xt_log_and_clear_exception(self);
5333
* If there are no more PBXT tables in the database, we
5334
* "drop the database", which deletes all PBXT resources
5337
/* We now only drop the pbxt system data,
5338
* when the PBXT database is dropped.
5340
#ifndef XT_USE_GLOBAL_DB
5341
if (!xt_table_exists(self->st_database)) {
5342
xt_ha_all_threads_close_database(self, self->st_database);
5343
xt_drop_database(self, self->st_database);
5344
xt_unuse_database(self, self);
5345
xt_ha_close_global_database(self);
5350
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5352
if (err == HA_ERR_NO_SUCH_TABLE)
5359
/* Call pbms_delete_table_with_blobs() last because it cannot be undone. */
5361
PBMSResultRec result;
5363
if (pbms_delete_table_with_blobs(table_path, &result)) {
5364
xt_logf(XT_NT_WARNING, "pbms_delete_table_with_blobs() Error: %s", result.mr_message);
5367
pbms_completed(NULL, true);
5375
int PBXTStorageEngine::delete_system_table(const char *table_path)
5377
int ha_pbxt::delete_system_table(const char *table_path)
5380
THD *thd = current_thd;
5385
if (!(self = xt_ha_set_current_thread(thd, &e)))
5386
return xt_ha_pbxt_to_mysql_error(e.e_xt_err);
5389
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5391
if (xt_table_exists(self->st_database))
5392
xt_throw_xterr(XT_CONTEXT, XT_ERR_PBXT_TABLE_EXISTS);
5394
XTSystemTableShare::setSystemTableDeleted(table_path);
5396
if (!XTSystemTableShare::doesSystemTableExist()) {
5397
xt_ha_all_threads_close_database(self, self->st_database);
5398
xt_drop_database(self, self->st_database);
5399
xt_unuse_database(self, self);
5400
xt_ha_close_global_database(self);
5404
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5412
* Renames a table from one name to another from alter table call.
5413
* This function can be used to move a table from one database to
5417
int PBXTStorageEngine::doRenameTable(Session&,
5418
TableIdentifier& from_ident,
5419
TableIdentifier& to_ident)
5421
const char *from = from_ident.getPath().c_str();
5422
const char *to = to_ident.getPath().c_str();
5424
int ha_pbxt::rename_table(const char *from, const char *to)
5427
THD *thd = current_thd;
5431
XTDatabaseHPtr to_db;
5433
if (XTSystemTableShare::isSystemTable(from))
5434
return rename_system_table(from, to);
5436
if (!(self = ha_set_current_thread(thd, &err)))
5437
return xt_ha_pbxt_to_mysql_error(err);
5439
XT_PRINT2(self, "rename_table (%s -> %s)\n", from, to);
5442
PBMSResultRec result;
5444
err = pbms_rename_table_with_blobs(from, to, &result);
5446
xt_logf(XT_NT_ERROR, "pbms_rename_table_with_blobs() Error: %s", result.mr_message);
5452
xt_ha_open_database_of_table(self, (XTPathStrPtr) to);
5453
to_db = self->st_database;
5455
xt_ha_open_database_of_table(self, (XTPathStrPtr) from);
5457
if (self->st_database != to_db)
5458
xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
5461
* NOTE: MySQL does not lock before calling rename table!
5463
* We cannot use pb_share because rename_table() is
5464
* called without correctly initializing
5467
share = ha_get_share(self, from, true);
5468
pushr_(ha_unget_share, share);
5469
ha_aquire_exclusive_use(self, share, NULL);
5470
pushr_(ha_release_exclusive_use, share);
5471
ha_close_open_tables(self, share, NULL);
5473
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5474
xt_rename_table(self, (XTPathStrPtr) from, (XTPathStrPtr) to);
5476
freer_(); // ha_release_exclusive_use(share)
5477
freer_(); // ha_unget_share(share)
5480
* If there are no more PBXT tables in the database, we
5481
* "drop the database", which deletes all PBXT resources
5484
#ifdef XT_USE_GLOBAL_DB
5485
/* We now only drop the pbxt system data,
5486
* when the PBXT database is dropped.
5488
if (!xt_table_exists(self->st_database)) {
5489
xt_ha_all_threads_close_database(self, self->st_database);
5490
xt_drop_database(self, self->st_database);
5495
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5500
pbms_completed(NULL, (err == 0));
5507
int PBXTStorageEngine::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5509
int ha_pbxt::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5512
return ER_NOT_SUPPORTED_YET;
5515
uint ha_pbxt::max_supported_key_length() const
5517
return XT_INDEX_MAX_KEY_SIZE;
5520
uint ha_pbxt::max_supported_key_part_length() const
5522
/* There is a little overhead in order to fit! */
5523
return XT_INDEX_MAX_KEY_SIZE-4;
5527
* Called in test_quick_select to determine if indexes should be used.
5529
* As far as I can tell, time is measured in "disk reads". So the
5530
* calculation below means the system reads about 20 rows per read.
5532
* For example a sequence scan uses a read buffer which reads a
5533
* number of rows at once, or a sequential scan can make use
5534
* of the cache (so it need to read less).
5536
double ha_pbxt::scan_time()
5538
double result = (double) (stats.records + stats.deleted) / 38.0 + 2;
5543
* The next method will never be called if you do not implement indexes.
5545
double ha_pbxt::read_time(uint XT_UNUSED(index), uint ranges, ha_rows rows)
5547
double result = rows2double(ranges+rows);
5552
* Given a starting key, and an ending key estimate the number of rows that
5553
* will exist between the two. end_key may be empty which in case determine
5554
* if start_key matches any rows.
5556
* Called from opt_range.cc by check_quick_keys().
5559
ha_rows ha_pbxt::records_in_range(uint inx, key_range *min_key, key_range *max_key)
5562
key_part_map keypart_map;
5567
keypart_map = min_key->keypart_map;
5569
keypart_map = max_key->keypart_map;
5572
ind = (XTIndexPtr) pb_share->sh_dic_keys[inx];
5574
while (keypart_map & 1) {
5576
keypart_map = keypart_map >> 1;
5579
if (segement < 1 || segement > ind->mi_seg_count)
5582
result = ind->mi_seg[segement-1].is_recs_in_range;
5583
#ifdef XT_PRINT_INDEX_OPT
5584
printf("records_in_range %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X --> %d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) inx, segement, ind->mi_seg_count, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) result);
5590
* create() is called to create a table/database. The variable name will have the name
5591
* of the table. When create() is called you do not need to worry about opening
5592
* the table. Also, the FRM file will have already been created so adjusting
5593
* create_info will not do you any good. You can overwrite the frm file at this
5594
* point if you wish to change the table definition, but there are no methods
5595
* currently provided for doing that.
5597
* Called from handle.cc by ha_create_table().
5600
int PBXTStorageEngine::doCreateTable(Session&,
5602
TableIdentifier& ident,
5603
drizzled::message::Table& proto)
5605
const std::string& path = ident.getPath();
5606
const char *table_path = path.c_str();
5608
int ha_pbxt::create(const char *table_path, TABLE *table_arg, HA_CREATE_INFO *create_info)
5611
THD *thd = current_thd;
5614
XTDDTable *tab_def = NULL;
5615
XTDictionaryRec dic, source_dic;
5617
if ((strcmp(table_path, "./pbxt/location") == 0) ||
5618
(strcmp(table_path, "./pbxt/tables") == 0) ||
5619
(strcmp(table_path, "./pbxt/statistics") == 0))
5622
if ((strcmp(table_path, "./pbxt/location") == 0) || (strcmp(table_path, "./pbxt/statistics") == 0))
5625
memset(&dic, 0, sizeof(dic));
5626
memset(&source_dic, 0, sizeof(source_dic));
5628
if (!(self = ha_set_current_thread(thd, &err)))
5629
return xt_ha_pbxt_to_mysql_error(err);
5631
XT_PRINT2(self, "create (%s) %s\n", table_path, (proto.type() == message::Table::TEMPORARY) ? "temporary" : "");
5632
switch(ident.getType()) {
5633
case message::Table::STANDARD:
5634
dic.dic_table_type = XT_TABLE_TYPE_STANDARD;
5637
case message::Table::TEMPORARY:
5638
dic.dic_table_type = XT_TABLE_TYPE_TEMPORARY;
5641
case message::Table::INTERNAL:
5642
dic.dic_table_type = XT_TABLE_TYPE_INTERNAL;
5645
case message::Table::FUNCTION:
5646
dic.dic_table_type = XT_TABLE_TYPE_FUNCTION;
5650
XT_PRINT2(self, "create (%s) %s\n", table_path, (create_info->options & HA_LEX_CREATE_TMP_TABLE) ? "temporary" : "");
5653
STAT_TRACE(self, *thd_query(thd));
5656
xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5659
for (uint i=0; i<TS(&table_arg)->keys; i++) {
5660
if (table_arg.key_info[i].key_length > XT_INDEX_MAX_KEY_SIZE)
5661
xt_throw_sulxterr(XT_CONTEXT, XT_ERR_KEY_TOO_LARGE, table_arg.key_info[i].name, (u_long) XT_INDEX_MAX_KEY_SIZE);
5664
for (uint i=0; i<TS(table_arg)->keys; i++) {
5665
if (table_arg->key_info[i].key_length > XT_INDEX_MAX_KEY_SIZE)
5666
xt_throw_sulxterr(XT_CONTEXT, XT_ERR_KEY_TOO_LARGE, table_arg->key_info[i].name, (u_long) XT_INDEX_MAX_KEY_SIZE);
5670
/* ($) auto_increment_value will be zero if
5671
* AUTO_INCREMENT is not used. Otherwise
5672
* Query was ALTER TABLE ... AUTO_INCREMENT = x; or
5673
* CREATE TABLE ... AUTO_INCREMENT = x;
5675
#ifdef XT_USE_DEFAULT_MEMORY_TABS
5676
if (create_info->storage_media == HA_SM_DEFAULT)
5677
source_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5681
StorageEngine::writeDefinitionFromPath(ident, proto);
5683
tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, const_cast<char *>(thd->getQueryString().c_str()), myxt_create_table_from_table(self, &table_arg), &source_dic);
5684
tab_def->checkForeignKeys(self, proto.type() == message::Table::TEMPORARY);
5686
// tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, *thd_query(thd), myxt_create_table_from_table(self, table_arg));
5687
tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, *thd_query(thd), myxt_create_table_from_table(self, table_arg), &source_dic);
5688
tab_def->checkForeignKeys(self, create_info->options & HA_LEX_CREATE_TMP_TABLE);
5689
dic.dic_table_type = XT_TABLE_TYPE_STANDARD;
5692
dic.dic_table = tab_def;
5694
dic.dic_my_table = &table_arg;
5695
dic.dic_tab_flags = source_dic.dic_tab_flags;
5696
//if (create_info.storage_media == HA_SM_MEMORY)
5697
// dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5698
if (proto.type() == message::Table::TEMPORARY)
5699
dic.dic_tab_flags |= XT_TF_REAL_TEMP_TABLE;
5700
if (myxt_temp_table_name(table_path))
5701
dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
5703
dic.dic_min_auto_inc = (xtWord8) proto.options().auto_increment_value(); /* ($) */
5704
dic.dic_def_ave_row_size = proto.options().avg_row_length();
5706
dic.dic_my_table = table_arg;
5707
dic.dic_tab_flags = source_dic.dic_tab_flags;
5709
if (create_info->storage_media == HA_SM_MEMORY)
5710
dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5711
if (create_info->options & HA_LEX_CREATE_TMP_TABLE)
5712
dic.dic_tab_flags |= XT_TF_REAL_TEMP_TABLE;
5713
if (myxt_temp_table_name(table_path))
5714
dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
5716
dic.dic_min_auto_inc = (xtWord8) create_info->auto_increment_value; /* ($) */
5717
dic.dic_def_ave_row_size = (xtWord8) table_arg->s->avg_row_length;
5719
myxt_setup_dictionary(self, &dic);
5722
* We used to ignore the value of foreign_key_checks flag and allowed creation
5723
* of tables with "hanging" references. Now we validate FKs if foreign_key_checks != 0
5725
self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5728
* Previously I set delete_if_exists=TRUE because
5729
* CREATE TABLE was being used to TRUNCATE.
5730
* This was due to the flag HTON_CAN_RECREATE.
5731
* Now I could set delete_if_exists=FALSE, but
5732
* leaving it TRUE should not cause any problems.
5734
xt_create_table(self, (XTPathStrPtr) table_path, &dic);
5738
tab_def->finalize(self);
5739
dic.dic_table = NULL;
5740
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5744
/* Free the dictionary, but not 'table_arg'! */
5745
dic.dic_my_table = NULL;
5746
myxt_free_dictionary(self, &dic);
5751
void ha_pbxt::update_create_info(HA_CREATE_INFO *create_info)
5755
if ((ot = pb_open_tab)) {
5756
if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
5757
/* Fill in the minimum auto-increment value! */
5758
create_info->auto_increment_value = ot->ot_table->tab_dic.dic_min_auto_inc;
5764
int PBXTStorageEngine::doStartTransaction(Session *thd, start_transaction_option_t XT_UNUSED(options))
5767
XTThreadPtr self = ha_set_current_thread(thd, &err);
5769
XT_PRINT0(self, "PBXTStorageEngine::doStartTransaction\n");
5771
/* Transaction mode numbers must be identical! */
5772
(void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
5773
(void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
5775
self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
5776
self->st_ignore_fkeys = (thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5777
self->st_auto_commit = (thd_test_options(thd, (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
5778
self->st_table_trans = FALSE;
5779
self->st_abort_trans = FALSE;
5780
self->st_stat_ended = FALSE;
5781
self->st_stat_trans = FALSE;
5782
xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
5784
if (!self->st_database)
5785
xt_ha_open_database_of_table(self, NULL);
5787
if (!xt_xn_begin(self)) {
5788
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, /*pb_ignore_dup_key*/false);
5795
int PBXTStorageEngine::doSetSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5797
return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5800
int PBXTStorageEngine::doRollbackToSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5802
return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5805
int PBXTStorageEngine::doReleaseSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5807
return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5810
int PBXTStorageEngine::doCommit(drizzled::Session* thd, bool)
5813
XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5815
bool real_commit = !session_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
5817
XT_PRINT1(self, "PBXTStorageEngine::doCommit(real_commit = %s)\n", real_commit ? "true" : "false");
5819
if (real_commit && self) {
5820
if (!xt_xn_commit(self))
5821
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5827
int PBXTStorageEngine::doRollback(drizzled::Session* thd, bool)
5830
XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5832
bool real_commit = !session_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN);
5834
XT_PRINT1(self, "PBXTStorageEngine::doRollback(real_commit = %s)\n", real_commit ? "true" : "false");
5836
if (real_commit && self) {
5837
if (!xt_xn_rollback(self))
5838
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5844
void PBXTStorageEngine::doGetTableIdentifiers(drizzled::CachedDirectory &directory,
5845
drizzled::SchemaIdentifier &schema_identifier,
5846
drizzled::TableIdentifiers &set_of_identifiers)
5848
CachedDirectory::Entries entries= directory.getEntries();
5850
for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5851
entry_iter != entries.end(); ++entry_iter)
5853
CachedDirectory::Entry *entry= *entry_iter;
5854
const std::string *filename= &entry->filename;
5856
assert(filename->size());
5858
const char *ext= strchr(filename->c_str(), '.');
5860
if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5861
(filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5865
char uname[NAME_LEN + 1];
5866
uint32_t file_name_len;
5868
file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5869
// TODO: Remove need for memory copy here
5870
uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL
5872
set_of_identifiers.push_back(TableIdentifier(schema_identifier, uname));
5877
void PBXTStorageEngine::doGetTableNames(
5878
CachedDirectory &directory,
5880
std::set<std::string>& set_of_names)
5882
CachedDirectory::Entries entries= directory.getEntries();
5884
for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5885
entry_iter != entries.end(); ++entry_iter)
5887
CachedDirectory::Entry *entry= *entry_iter;
5888
const std::string *filename= &entry->filename;
5890
assert(filename->size());
5892
const char *ext= strchr(filename->c_str(), '.');
5894
if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5895
(filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5899
char uname[NAME_LEN + 1];
5900
uint32_t file_name_len;
5902
file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5903
// TODO: Remove need for memory copy here
5904
uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL
5905
set_of_names.insert(uname);
5910
bool PBXTStorageEngine::doDoesTableExist(Session&, TableIdentifier &identifier)
5912
std::string proto_path(identifier.getPath());
5913
proto_path.append(DEFAULT_FILE_EXTENSION);
5915
if (access(proto_path.c_str(), F_OK))
5925
char *ha_pbxt::get_foreign_key_create_info()
5927
THD *thd = current_thd;
5930
XTStringBufferRec tab_def = { 0, 0, 0 };
5932
if (!(self = ha_set_current_thread(thd, &err))) {
5933
xt_ha_pbxt_to_mysql_error(err);
5938
if ((err = reopen()))
5942
if (!pb_open_tab->ot_table->tab_dic.dic_table)
5946
pb_open_tab->ot_table->tab_dic.dic_table->loadForeignKeyString(self, &tab_def);
5949
xt_sb_set_size(self, &tab_def, 0);
5950
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5954
return tab_def.sb_cstring;
5957
void ha_pbxt::free_foreign_key_create_info(char* str)
5962
bool ha_pbxt::get_error_message(int XT_UNUSED(error), String *buf)
5964
THD *thd = current_thd;
5968
if (!(self = ha_set_current_thread(thd, &err)))
5971
if (!self->t_exception.e_xt_err)
5974
buf->copy(self->t_exception.e_err_msg, (uint32_t) strlen(self->t_exception.e_err_msg), system_charset_info);
5979
* get info about FKs of the currently open table
5981
* 1. REPLACE; is > 0 if table is referred by a FOREIGN KEY
5982
* 2. INFORMATION_SCHEMA tables: TABLE_CONSTRAINTS, REFERENTIAL_CONSTRAINTS
5983
* Return value: as of 5.1.24 it's ignored
5986
int ha_pbxt::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
5992
if (!(self = ha_set_current_thread(thd, &err))) {
5993
return xt_ha_pbxt_to_mysql_error(err);
5997
XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
5999
if (table_dic == NULL)
6000
xt_throw_errno(XT_CONTEXT, XT_ERR_NO_DICTIONARY);
6002
for (int i = 0, sz = table_dic->dt_fkeys.size(); i < sz; i++) {
6003
FOREIGN_KEY_INFO *fk_info= new // assumed that C++ exceptions are disabled
6004
(thd_alloc(thd, sizeof(FOREIGN_KEY_INFO))) FOREIGN_KEY_INFO;
6006
if (fk_info == NULL)
6007
xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
6009
XTDDForeignKey *fk = table_dic->dt_fkeys.itemAt(i);
6011
const char *path = fk->fk_ref_tab_name->ps_path;
6012
const char *ref_tbl_name = path + strlen(path);
6014
while (ref_tbl_name != path && !XT_IS_DIR_CHAR(*ref_tbl_name))
6017
const char * ref_db_name = ref_tbl_name - 1;
6019
while (ref_db_name != path && !XT_IS_DIR_CHAR(*ref_db_name))
6025
fk_info->forein_id = thd_make_lex_string(thd, 0,
6026
fk->co_name, (uint) strlen(fk->co_name), 1);
6028
fk_info->referenced_db = thd_make_lex_string(thd, 0,
6029
ref_db_name, (uint) (ref_tbl_name - ref_db_name - 1), 1);
6031
fk_info->referenced_table = thd_make_lex_string(thd, 0,
6032
ref_tbl_name, (uint) strlen(ref_tbl_name), 1);
6034
fk_info->referenced_key_name = NULL;
6036
XTIndex *ix = fk->getReferenceIndexPtr();
6037
if (ix == NULL) /* can be NULL if another thread changes referenced table at the moment */
6040
XTDDTable *ref_table = fk->fk_ref_table;
6042
// might be a self-reference
6043
if ((ref_table == NULL)
6044
&& (xt_tab_compare_names(path, table_dic->dt_table->tab_name->ps_path) == 0)) {
6045
ref_table = table_dic;
6048
if (ref_table != NULL) {
6049
const XTList<XTDDIndex>& ix_list = ref_table->dt_indexes;
6050
for (int j = 0, sz2 = ix_list.size(); j < sz2; j++) {
6051
XTDDIndex *ddix = ix_list.itemAt(j);
6052
if (ddix->in_index == ix->mi_index_no) {
6053
const char *ix_name =
6054
ddix->co_name ? ddix->co_name : ddix->co_ind_name;
6055
fk_info->referenced_key_name = thd_make_lex_string(thd, 0,
6056
ix_name, (uint) strlen(ix_name), 1);
6062
action = XTDDForeignKey::actionTypeToString(fk->fk_on_delete);
6063
fk_info->delete_method = thd_make_lex_string(thd, 0,
6064
action, (uint) strlen(action), 1);
6065
action = XTDDForeignKey::actionTypeToString(fk->fk_on_update);
6066
fk_info->update_method = thd_make_lex_string(thd, 0,
6067
action, (uint) strlen(action), 1);
6069
const XTList<XTDDColumnRef>& cols = fk->co_cols;
6070
for (int j = 0, sz2 = cols.size(); j < sz2; j++) {
6071
XTDDColumnRef *col_ref= cols.itemAt(j);
6072
fk_info->foreign_fields.push_back(thd_make_lex_string(thd, 0,
6073
col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
6076
const XTList<XTDDColumnRef>& ref_cols = fk->fk_ref_cols;
6077
for (int j = 0, sz2 = ref_cols.size(); j < sz2; j++) {
6078
XTDDColumnRef *col_ref= ref_cols.itemAt(j);
6079
fk_info->referenced_fields.push_back(thd_make_lex_string(thd, 0,
6080
col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
6083
f_key_list->push_back(fk_info);
6087
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
6094
uint ha_pbxt::referenced_by_foreign_key()
6096
XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
6100
/* Check the list of referencing tables: */
6101
return table_dic->dt_trefs ? 1 : 0;
6105
struct st_mysql_sys_var
6107
MYSQL_PLUGIN_VAR_HEADER;
6110
#if MYSQL_VERSION_ID < 60000
6111
#if MYSQL_VERSION_ID >= 50124
6112
#define USE_CONST_SAVE
6115
#if MYSQL_VERSION_ID >= 60005
6116
#define USE_CONST_SAVE
6121
#define st_mysql_sys_var drizzled::drizzle_sys_var
6124
#ifdef USE_CONST_SAVE
6125
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, const void *save)
6127
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, void *save)
6130
xtInt8 record_cache_size;
6132
char *old= *(char **) tgt;
6133
*(char **)tgt= *(char **) save;
6134
if (var->flags & PLUGIN_VAR_MEMALLOC)
6136
*(char **)tgt= my_strdup(*(char **) save, MYF(0));
6137
my_free(old, MYF(0));
6139
record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
6140
xt_tc_set_cache_size((size_t) record_cache_size);
6144
sprintf(buffer, "pbxt_record_cache_size=%llu\n", (u_llong) record_cache_size);
6145
xt_logf(XT_NT_INFO, buffer);
6150
struct st_mysql_storage_engine pbxt_storage_engine = {
6151
MYSQL_HANDLERTON_INTERFACE_VERSION
6153
static st_mysql_information_schema pbxt_statitics = {
6154
MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
6158
#if MYSQL_VERSION_ID >= 50118
6159
static MYSQL_SYSVAR_STR(index_cache_size, pbxt_index_cache_size,
6160
PLUGIN_VAR_READONLY,
6161
"The amount of memory allocated to the index cache, used only to cache index data.",
6164
static MYSQL_SYSVAR_STR(record_cache_size, pbxt_record_cache_size,
6165
PLUGIN_VAR_READONLY, // PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
6166
"The amount of memory allocated to the record cache used to cache table data.",
6167
NULL, pbxt_record_cache_size_func, NULL);
6169
static MYSQL_SYSVAR_STR(log_cache_size, pbxt_log_cache_size,
6170
PLUGIN_VAR_READONLY,
6171
"The amount of memory allocated to the transaction log cache used to cache transaction log data.",
6174
static MYSQL_SYSVAR_STR(log_file_threshold, pbxt_log_file_threshold,
6175
PLUGIN_VAR_READONLY,
6176
"The size of a transaction log before rollover, and a new log is created.",
6179
static MYSQL_SYSVAR_STR(transaction_buffer_size, pbxt_transaction_buffer_size,
6180
PLUGIN_VAR_READONLY,
6181
"The size of the global transaction log buffer (the engine allocates 2 buffers of this size).",
6184
static MYSQL_SYSVAR_STR(log_buffer_size, pbxt_log_buffer_size,
6185
PLUGIN_VAR_READONLY,
6186
"The size of the buffer used to cache data from transaction and data logs during sequential scans, or when writing a data log.",
6189
static MYSQL_SYSVAR_STR(checkpoint_frequency, pbxt_checkpoint_frequency,
6190
PLUGIN_VAR_READONLY,
6191
"The size of the transaction data buffer which is allocate by each thread.",
6194
static MYSQL_SYSVAR_STR(data_log_threshold, pbxt_data_log_threshold,
6195
PLUGIN_VAR_READONLY,
6196
"The maximum size of a data log file.",
6199
static MYSQL_SYSVAR_STR(data_file_grow_size, pbxt_data_file_grow_size,
6200
PLUGIN_VAR_READONLY,
6201
"The amount by which the handle data files (.xtd) grow.",
6204
static MYSQL_SYSVAR_STR(row_file_grow_size, pbxt_row_file_grow_size,
6205
PLUGIN_VAR_READONLY,
6206
"The amount by which the row pointer files (.xtr) grow.",
6209
static MYSQL_SYSVAR_STR(record_write_threshold, pbxt_record_write_threshold,
6210
PLUGIN_VAR_READONLY,
6211
"The amount data written to the record files (.xtd and .xtr) before the changes are applied to the database.",
6214
static MYSQL_SYSVAR_INT(garbage_threshold, xt_db_garbage_threshold,
6215
PLUGIN_VAR_OPCMDARG,
6216
"The percentage of garbage in a repository file before it is compacted.",
6217
NULL, NULL, XT_DL_DEFAULT_GARBAGE_LEVEL, 0, 100, 1);
6219
static MYSQL_SYSVAR_INT(log_file_count, xt_db_log_file_count,
6220
PLUGIN_VAR_OPCMDARG,
6221
"The minimum number of transaction logs used.",
6222
NULL, NULL, XT_DL_DEFAULT_XLOG_COUNT, 1, 20000, 1);
6224
static MYSQL_SYSVAR_INT(auto_increment_mode, xt_db_auto_increment_mode,
6225
PLUGIN_VAR_OPCMDARG,
6226
"The auto-increment mode, 0 = MySQL standard (default), 1 = previous ID's never reused.",
6227
NULL, NULL, XT_AUTO_INCREMENT_DEF, 0, 1, 1);
6230
static MYSQL_SYSVAR_INT(offline_log_function, xt_db_offline_log_function,
6231
PLUGIN_VAR_OPCMDARG,
6232
"Determines what happens to transaction logs when the are moved offline, 0 = recycle logs (default), 1 = delete logs (default on Mac OS X), 2 = keep logs.",
6233
NULL, NULL, XT_OFFLINE_LOG_FUNCTION_DEF, 0, 2, 1);
6236
static MYSQL_SYSVAR_INT(sweeper_priority, xt_db_sweeper_priority,
6237
PLUGIN_VAR_OPCMDARG,
6238
"Determines the priority of the background sweeper process, 0 = low (default), 1 = normal (same as user threads), 2 = high.",
6239
NULL, NULL, XT_PRIORITY_LOW, XT_PRIORITY_LOW, XT_PRIORITY_HIGH, 1);
6242
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6243
PLUGIN_VAR_OPCMDARG,
6244
"Enable PBXT support for the XA two-phase commit, default is enabled",
6247
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6248
PLUGIN_VAR_OPCMDARG,
6249
"Enable PBXT support for the XA two-phase commit, default is disabled (due to assertion failure in MySQL)",
6250
/* The problem is, in MySQL an assertion fails in debug mode:
6251
* Assertion failed: (total_ha_2pc == (ulong) opt_bin_log+1), function ha_recover, file handler.cc, line 1557.
6256
static MYSQL_SYSVAR_INT(index_dirty_threshold, xt_db_index_dirty_threshold,
6257
PLUGIN_VAR_OPCMDARG,
6258
"The percentage of the index cache that must be dirty before the index cache is flushed.",
6259
NULL, NULL, XT_DL_DEFAULT_INDEX_DIRTY_LEVEL, 0, 100, 1);
6261
static MYSQL_SYSVAR_INT(flush_log_at_trx_commit, xt_db_flush_log_at_trx_commit,
6262
PLUGIN_VAR_OPCMDARG,
6263
"Determines whether the transaction log is written and/or flushed when a transaction is committed (no matter what the setting the log is written and flushed once per second), 0 = no write & no flush, 1 = write & flush (default), 2 = write & no flush.",
6264
NULL, NULL, 1, 0, 2, 1);
6266
static struct st_mysql_sys_var* pbxt_system_variables[] = {
6267
MYSQL_SYSVAR(index_cache_size),
6268
MYSQL_SYSVAR(record_cache_size),
6269
MYSQL_SYSVAR(log_cache_size),
6270
MYSQL_SYSVAR(log_file_threshold),
6271
MYSQL_SYSVAR(transaction_buffer_size),
6272
MYSQL_SYSVAR(log_buffer_size),
6273
MYSQL_SYSVAR(checkpoint_frequency),
6274
MYSQL_SYSVAR(data_log_threshold),
6275
MYSQL_SYSVAR(data_file_grow_size),
6276
MYSQL_SYSVAR(row_file_grow_size),
6277
MYSQL_SYSVAR(record_write_threshold),
6278
MYSQL_SYSVAR(garbage_threshold),
6279
MYSQL_SYSVAR(log_file_count),
6280
MYSQL_SYSVAR(auto_increment_mode),
6281
MYSQL_SYSVAR(offline_log_function),
6282
MYSQL_SYSVAR(sweeper_priority),
6283
MYSQL_SYSVAR(support_xa),
6284
MYSQL_SYSVAR(index_dirty_threshold),
6285
MYSQL_SYSVAR(flush_log_at_trx_commit),
6291
DRIZZLE_DECLARE_PLUGIN
6296
"Paul McCullagh, PrimeBase Technologies GmbH",
6297
"High performance, multi-versioning transactional engine",
6299
pbxt_init, /* Plugin Init */
6300
pbxt_system_variables, /* system variables */
6301
NULL /* config options */
6303
DRIZZLE_DECLARE_PLUGIN_END;
6305
mysql_declare_plugin(pbxt)
6307
MYSQL_STORAGE_ENGINE_PLUGIN,
6308
&pbxt_storage_engine,
6310
"Paul McCullagh, PrimeBase Technologies GmbH",
6311
"High performance, multi-versioning transactional engine",
6313
pbxt_init, /* Plugin Init */
6314
pbxt_end, /* Plugin Deinit */
6316
NULL, /* status variables */
6317
#if MYSQL_VERSION_ID >= 50118
6318
pbxt_system_variables, /* system variables */
6322
NULL /* config options */
6324
MYSQL_INFORMATION_SCHEMA_PLUGIN,
6327
"Paul McCullagh, PrimeBase Technologies GmbH",
6328
"PBXT internal system statitics",
6330
pbxt_init_statistics, /* plugin init */
6331
pbxt_exit_statistics, /* plugin deinit */
6333
NULL, /* status variables */
6334
NULL, /* system variables */
6335
NULL /* config options */
6337
mysql_declare_plugin_end;
6340
#if defined(XT_WIN) && defined(XT_COREDUMP)
6343
* WINDOWS CORE DUMP SUPPORT
6345
* MySQL supports core dumping on Windows with --core-file command line option.
6346
* However it creates dumps with the MiniDumpNormal option which saves only stack traces.
6348
* We instead (or in addition) create dumps with MiniDumpWithoutOptionalData option
6349
* which saves all available information. To enable core dumping enable XT_COREDUMP
6351
* In addition, pbxt_crash_debug must be set to TRUE which is the case if XT_CRASH_DEBUG
6353
* This switch is also controlled by creating a file called "no-debug" or "crash-debug"
6354
* in the pbxt database directory.
6357
typedef enum _MINIDUMP_TYPE {
6358
MiniDumpNormal = 0x0000,
6359
MiniDumpWithDataSegs = 0x0001,
6360
MiniDumpWithFullMemory = 0x0002,
6361
MiniDumpWithHandleData = 0x0004,
6362
MiniDumpFilterMemory = 0x0008,
6363
MiniDumpScanMemory = 0x0010,
6364
MiniDumpWithUnloadedModules = 0x0020,
6365
MiniDumpWithIndirectlyReferencedMemory = 0x0040,
6366
MiniDumpFilterModulePaths = 0x0080,
6367
MiniDumpWithProcessThreadData = 0x0100,
6368
MiniDumpWithPrivateReadWriteMemory = 0x0200,
6371
typedef struct _MINIDUMP_EXCEPTION_INFORMATION {
6373
PEXCEPTION_POINTERS ExceptionPointers;
6374
BOOL ClientPointers;
6375
} MINIDUMP_EXCEPTION_INFORMATION, *PMINIDUMP_EXCEPTION_INFORMATION;
6377
typedef BOOL (WINAPI *MINIDUMPWRITEDUMP)(
6381
MINIDUMP_TYPE DumpType,
6382
void *ExceptionParam,
6383
void *UserStreamParam,
6387
char base_path[_MAX_PATH] = {0};
6388
char dump_path[_MAX_PATH] = {0};
6390
void core_dump(struct _EXCEPTION_POINTERS *pExceptionInfo)
6392
SECURITY_ATTRIBUTES sa = { sizeof(SECURITY_ATTRIBUTES), 0, 0 };
6394
HMODULE hDll = NULL;
6396
MINIDUMPWRITEDUMP pDump;
6397
char *end_ptr = base_path;
6399
MINIDUMP_EXCEPTION_INFORMATION ExInfo, *ExInfoPtr = NULL;
6401
if (pExceptionInfo) {
6402
ExInfo.ThreadId = GetCurrentThreadId();
6403
ExInfo.ExceptionPointers = pExceptionInfo;
6404
ExInfo.ClientPointers = NULL;
6405
ExInfoPtr = &ExInfo;
6408
end_ptr = base_path + strlen(base_path);
6410
strcat(base_path, "DBGHELP.DLL" );
6411
hDll = LoadLibrary(base_path);
6415
err = HRESULT_CODE(GetLastError());
6416
hDll = LoadLibrary( "DBGHELP.DLL" );
6418
err = HRESULT_CODE(GetLastError());
6423
pDump = (MINIDUMPWRITEDUMP)GetProcAddress( hDll, "MiniDumpWriteDump" );
6426
err = HRESULT_CODE(GetLastError());
6430
for (i = 1; i < INT_MAX; i++) {
6431
sprintf(dump_path, "%sPBXTCore%08d.dmp", base_path, i);
6432
hFile = CreateFile( dump_path, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_NEW,
6433
FILE_ATTRIBUTE_NORMAL, NULL );
6435
if ( hFile != INVALID_HANDLE_VALUE )
6438
if (HRESULT_CODE(GetLastError()) == ERROR_FILE_EXISTS )
6445
BOOL bOK = pDump( GetCurrentProcess(), GetCurrentProcessId(), hFile,
6446
MiniDumpWithPrivateReadWriteMemory, ExInfoPtr, NULL, NULL );
6451
LONG crash_filter( struct _EXCEPTION_POINTERS *pExceptionInfo )
6453
core_dump(pExceptionInfo);
6454
return EXCEPTION_EXECUTE_HANDLER;
6457
void register_crash_filter()
6459
SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER) crash_filter );
6462
#endif // XT_WIN && XT_COREDUMP