~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/pbxt/src/ha_pbxt.cc

merge lp:~linuxjedi/drizzle/trunk-remove-drizzleadmin

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/* Copyright (C) 2005 PrimeBase Technologies GmbH
2
 
 *
3
 
 * Derived from ha_example.h
4
 
 * Copyright (C) 2003 MySQL AB
5
 
 *
6
 
 * PrimeBase XT
7
 
 *
8
 
 * This program is free software; you can redistribute it and/or modify
9
 
 * it under the terms of the GNU General Public License as published by
10
 
 * the Free Software Foundation; either version 2 of the License, or
11
 
 * (at your option) any later version.
12
 
 *
13
 
 * This program is distributed in the hope that it will be useful,
14
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
 
 * GNU General Public License for more details.
17
 
 *
18
 
 * You should have received a copy of the GNU General Public License
19
 
 * along with this program; if not, write to the Free Software
20
 
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA    02110-1301      USA
21
 
 *
22
 
 * 2005-11-10   Paul McCullagh
23
 
 *
24
 
 */
25
 
 
26
 
#ifdef USE_PRAGMA_IMPLEMENTATION
27
 
#pragma implementation                          // gcc: Class implementation
28
 
#endif
29
 
 
30
 
#include "xt_config.h"
31
 
 
32
 
#if defined(XT_WIN)
33
 
#include <windows.h>
34
 
#endif
35
 
 
36
 
#include <stdlib.h>
37
 
#include <time.h>
38
 
#include <ctype.h>
39
 
 
40
 
#ifdef DRIZZLED
41
 
#include <fcntl.h>
42
 
#include <drizzled/internal/my_sys.h>
43
 
#include <drizzled/common.h>
44
 
#include <drizzled/plugin.h>
45
 
#include <drizzled/field.h>
46
 
#include <drizzled/session.h>
47
 
#include <drizzled/data_home.h>
48
 
#include <drizzled/error.h>
49
 
#include <drizzled/table.h>
50
 
#include <drizzled/session.h>
51
 
 
52
 
#include <string>
53
 
 
54
 
#define my_strdup(a,b) strdup(a)
55
 
 
56
 
using namespace drizzled;
57
 
using namespace drizzled::plugin;
58
 
 
59
 
#define DEFAULT_FILE_EXTENSION ".dfe"
60
 
 
61
 
#else
62
 
#include "mysql_priv.h"
63
 
#include <mysql/plugin.h>
64
 
#endif
65
 
 
66
 
#include "ha_pbxt.h"
67
 
#include "ha_xtsys.h"
68
 
 
69
 
#include "strutil_xt.h"
70
 
#include "database_xt.h"
71
 
#include "cache_xt.h"
72
 
#include "trace_xt.h"
73
 
#include "heap_xt.h"
74
 
#include "myxt_xt.h"
75
 
#include "datadic_xt.h"
76
 
#ifdef PBMS_ENABLED
77
 
#include "pbms_enabled.h"
78
 
#endif
79
 
#include "tabcache_xt.h"
80
 
#include "systab_xt.h"
81
 
#include "xaction_xt.h"
82
 
#include "backup_xt.h"
83
 
#include "heap_xt.h"
84
 
 
85
 
#ifdef DEBUG
86
 
//#define XT_USE_SYS_PAR_DEBUG_SIZES
87
 
//#define PBXT_HANDLER_TRACE
88
 
//#define PBXT_TRACE_RETURN
89
 
//#define XT_PRINT_INDEX_OPT
90
 
//#define XT_SHOW_DUMPS_TRACE
91
 
//#define XT_UNIT_TEST
92
 
//#define LOAD_TABLE_ON_OPEN
93
 
//#define CHECK_TABLE_LOADS
94
 
 
95
 
/* Enable to trace the statements executed by the engine: */
96
 
//#define TRACE_STATEMENTS
97
 
 
98
 
/* Enable to print the trace to the stdout, instead of
99
 
 * to the trace log.
100
 
 */
101
 
//#define PRINT_STATEMENTS
102
 
#endif
103
 
 
104
 
#ifndef DRIZZLED
105
 
static handler  *pbxt_create_handler(handlerton *hton, TABLE_SHARE *table, MEM_ROOT *mem_root);
106
 
static int              pbxt_init(void *p);
107
 
static int              pbxt_end(void *p);
108
 
static int              pbxt_panic(handlerton *hton, enum ha_panic_function flag);
109
 
static void             pbxt_drop_database(handlerton *hton, char *path);
110
 
static int              pbxt_close_connection(handlerton *hton, THD* thd);
111
 
static int              pbxt_commit(handlerton *hton, THD *thd, bool all);
112
 
static int              pbxt_rollback(handlerton *hton, THD *thd, bool all);
113
 
static int              pbxt_prepare(handlerton *hton, THD *thd, bool all);
114
 
static int              pbxt_recover(handlerton *hton, XID *xid_list, uint len);
115
 
static int              pbxt_commit_by_xid(handlerton *hton, XID *xid);
116
 
static int              pbxt_rollback_by_xid(handlerton *hton, XID *xid);
117
 
static int              pbxt_start_consistent_snapshot(handlerton *hton, THD *thd);
118
 
#endif
119
 
static void             ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
120
 
static void             ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share);
121
 
static void             ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine);
122
 
 
123
 
#ifdef TRACE_STATEMENTS
124
 
 
125
 
#ifdef PRINT_STATEMENTS
126
 
#define STAT_TRACE(y, x)                printf("%s: %s\n", y ? y->t_name : "-unknown-", x)
127
 
#else
128
 
#define STAT_TRACE(y, x)                xt_ttraceq(y, x)
129
 
#endif
130
 
 
131
 
#else
132
 
 
133
 
#define STAT_TRACE(y, x)
134
 
 
135
 
#endif
136
 
 
137
 
#ifdef PBXT_HANDLER_TRACE
138
 
#define PBXT_ALLOW_PRINTING
139
 
 
140
 
#define XT_TRACE_CALL()                         ha_trace_function(__FUNC__, NULL)
141
 
#define XT_TRACE_METHOD()                       ha_trace_function(__FUNC__, pb_share->sh_table_path->ps_path)
142
 
 
143
 
#ifdef PBXT_TRACE_RETURN
144
 
#define XT_RETURN(x)                            do { printf("%d\n", (int) (x)); return (x); } while (0)
145
 
#define XT_RETURN_VOID                          do { printf("out\n"); return; } while (0)
146
 
#else
147
 
#define XT_RETURN(x)                            return (x)
148
 
#define XT_RETURN_VOID                          return
149
 
#endif
150
 
 
151
 
#else
152
 
 
153
 
#define XT_TRACE_CALL()
154
 
#define XT_TRACE_METHOD()
155
 
#define XT_RETURN(x)                            return (x)
156
 
#define XT_RETURN_VOID                          return
157
 
 
158
 
#endif
159
 
 
160
 
#ifdef PBXT_ALLOW_PRINTING
161
 
#define XT_PRINT0(y, x)                         do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-"); } while (0)
162
 
#define XT_PRINT1(y, x, a)                      do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a); } while (0)
163
 
#define XT_PRINT2(y, x, a, b)           do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b); } while (0)
164
 
#define XT_PRINT3(y, x, a, b, c)        do { XTThreadPtr s = (y); printf("%s " x, s ? s->t_name : "-unknown-", a, b, c); } while (0)
165
 
#else
166
 
#define XT_PRINT0(y, x)
167
 
#define XT_PRINT1(y, x, a)
168
 
#define XT_PRINT2(y, x, a, b)
169
 
#define XT_PRINT3(y, x, a, b, c)
170
 
#endif
171
 
 
172
 
 
173
 
#define TS(x)                                   (x)->s
174
 
 
175
 
handlerton                              *pbxt_hton;
176
 
bool                                    pbxt_inited = false;            // Variable for checking the init state of hash
177
 
xtBool                                  pbxt_ignore_case = true;
178
 
const char                              *pbxt_extensions[]= { ".xtr", ".xtd", ".xtl", ".xti", ".xt", "", NULL };
179
 
#ifdef XT_CRASH_DEBUG
180
 
xtBool                                  pbxt_crash_debug = TRUE;
181
 
#else
182
 
xtBool                                  pbxt_crash_debug = FALSE;
183
 
#endif
184
 
 
185
 
 
186
 
/* Variables for pbxt share methods */
187
 
static xt_mutex_type    pbxt_database_mutex;            // Prevent a database from being opened while it is being dropped
188
 
static XTHashTabPtr             pbxt_share_tables;                      // Hash used to track open tables
189
 
static char                             *pbxt_index_cache_size;
190
 
static char                             *pbxt_record_cache_size;
191
 
static char                             *pbxt_log_cache_size;
192
 
static char                             *pbxt_log_file_threshold;
193
 
static char                             *pbxt_transaction_buffer_size;
194
 
static char                             *pbxt_log_buffer_size;
195
 
static char                             *pbxt_checkpoint_frequency;
196
 
static char                             *pbxt_data_log_threshold;
197
 
static char                             *pbxt_data_file_grow_size;
198
 
static char                             *pbxt_row_file_grow_size;
199
 
static char                             *pbxt_record_write_threshold;
200
 
 
201
 
#ifndef DRIZZLED
202
 
// drizzle complains it's not used
203
 
static my_bool                  pbxt_support_xa;
204
 
static XTXactEnumXARec  pbxt_xa_enum;
205
 
#endif
206
 
 
207
 
#ifdef DEBUG
208
 
#define XT_SHARE_LOCK_WAIT              5000
209
 
#else
210
 
#define XT_SHARE_LOCK_WAIT              500
211
 
#endif
212
 
 
213
 
/* 
214
 
 * Lock timeout in 1/1000ths of a second
215
 
 */
216
 
#define XT_SHARE_LOCK_TIMEOUT   30000
217
 
 
218
 
/*
219
 
 * -----------------------------------------------------------------------
220
 
 * SYSTEM VARIABLES
221
 
 *
222
 
 */
223
 
 
224
 
//#define XT_FOR_TEAMDRIVE
225
 
 
226
 
typedef struct HAVarParams {
227
 
        const char              *vp_var;                                                /* Variable name. */
228
 
        const char              *vp_def;                                                /* Default value. */
229
 
        const char              *vp_min;                                                /* Minimum allowed value. */
230
 
        const char              *vp_max4;                                               /* Maximum allowed value on 32-bit processors. */
231
 
        const char              *vp_max8;                                               /* Maximum allowed value on 64-bit processors. */
232
 
} HAVarParamsRec, *HAVarParamsPtr;
233
 
 
234
 
#ifdef XT_USE_SYS_PAR_DEBUG_SIZES
235
 
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
236
 
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
237
 
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
238
 
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
239
 
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
240
 
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
241
 
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
242
 
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "400K", "400K", "2GB", "256TB" };
243
 
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
244
 
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
245
 
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
246
 
#define XT_DL_DEFAULT_XLOG_COUNT                3
247
 
#define XT_DL_DEFAULT_GARBAGE_LEVEL             10
248
 
#else
249
 
static HAVarParamsRec vp_index_cache_size = { "pbxt_index_cache_size", "32MB", "8MB", "2GB", "2000GB" };
250
 
static HAVarParamsRec vp_record_cache_size = { "pbxt_record_cache_size", "32MB", "8MB", "2GB", "2000GB" };
251
 
static HAVarParamsRec vp_log_cache_size = { "pbxt_log_cache_size", "16MB", "4MB", "2GB", "2000GB" };
252
 
static HAVarParamsRec vp_checkpoint_frequency = { "pbxt_checkpoint_frequency", "1GB", "2MB", "2000GB", "2000GB" };
253
 
static HAVarParamsRec vp_log_file_threshold = { "pbxt_log_file_threshold", "32MB", "1MB", "2GB", "256TB" };
254
 
static HAVarParamsRec vp_transaction_buffer_size = { "pbxt_transaction_buffer_size", "1MB", "128K", "1GB", "24GB" };
255
 
static HAVarParamsRec vp_log_buffer_size = { "pbxt_log_buffer_size", "256K", "128K", "1GB", "24GB" };
256
 
static HAVarParamsRec vp_data_log_threshold = { "pbxt_data_log_threshold", "64MB", "1MB", "2GB", "256TB" };
257
 
static HAVarParamsRec vp_data_file_grow_size = { "pbxt_data_file_grow_size", "2MB", "128K", "1GB", "2GB" };
258
 
static HAVarParamsRec vp_row_file_grow_size = { "pbxt_row_file_grow_size", "256K", "32K", "1GB", "2GB" };
259
 
static HAVarParamsRec vp_record_write_threshold = { "pbxt_record_write_threshold", "4MB", "0", "2GB", "8GB" };
260
 
#define XT_DL_DEFAULT_XLOG_COUNT                3
261
 
#define XT_DL_DEFAULT_GARBAGE_LEVEL             50
262
 
#endif
263
 
 
264
 
#define XT_AUTO_INCREMENT_DEF                   0
265
 
#define XT_DL_DEFAULT_INDEX_DIRTY_LEVEL 80
266
 
 
267
 
#ifdef XT_MAC
268
 
#ifdef DEBUG
269
 
/* For debugging on the Mac, we check the re-use logs: */
270
 
#define XT_OFFLINE_LOG_FUNCTION_DEF             XT_RECYCLE_LOGS
271
 
#else
272
 
#define XT_OFFLINE_LOG_FUNCTION_DEF             XT_DELETE_LOGS
273
 
#endif
274
 
#else
275
 
#define XT_OFFLINE_LOG_FUNCTION_DEF             XT_RECYCLE_LOGS
276
 
#endif
277
 
 
278
 
/* TeamDrive, uses special auto-increment, and
279
 
 * we keep the logs for the moment:
280
 
 */
281
 
#ifdef XT_FOR_TEAMDRIVE
282
 
#undef XT_OFFLINE_LOG_FUNCTION_DEF
283
 
#define XT_OFFLINE_LOG_FUNCTION_DEF             XT_KEEP_LOGS
284
 
//#undef XT_AUTO_INCREMENT_DEF
285
 
//#define XT_AUTO_INCREMENT_DEF                 1
286
 
#endif
287
 
 
288
 
#ifdef PBXT_HANDLER_TRACE
289
 
static void ha_trace_function(const char *function, char *table)
290
 
{
291
 
        char            func_buf[50], *ptr;
292
 
        XTThreadPtr     thread = xt_get_self(); 
293
 
 
294
 
        if ((ptr = const_cast<char *>(strchr(function, '(')))) {
295
 
                ptr--;
296
 
                while (ptr > function) {
297
 
                        if (!(isalnum(*ptr) || *ptr == '_'))
298
 
                                break;
299
 
                        ptr--;
300
 
                }
301
 
                ptr++;
302
 
                xt_strcpy(50, func_buf, ptr);
303
 
                if ((ptr = strchr(func_buf, '(')))
304
 
                        *ptr = 0;
305
 
        }
306
 
        else
307
 
                xt_strcpy(50, func_buf, function);
308
 
        if (table)
309
 
                printf("%s %s (%s)\n", thread ? thread->t_name : "-unknown-", func_buf, table);
310
 
        else
311
 
                printf("%s %s\n", thread ? thread->t_name : "-unknown-", func_buf);
312
 
}
313
 
#endif
314
 
 
315
 
/*
316
 
 * -----------------------------------------------------------------------
317
 
 * SHARED TABLE DATA
318
 
 *
319
 
 */
320
 
 
321
 
static xtBool ha_hash_comp(void *key, void *data)
322
 
{
323
 
        XTSharePtr      share = (XTSharePtr) data;
324
 
 
325
 
        return strcmp((char *) key, share->sh_table_path->ps_path) == 0;
326
 
}
327
 
 
328
 
static xtHashValue ha_hash(xtBool is_key, void *key_data)
329
 
{
330
 
        XTSharePtr      share = (XTSharePtr) key_data;
331
 
 
332
 
        if (is_key)
333
 
                return xt_ht_hash((char *) key_data);
334
 
        return xt_ht_hash(share->sh_table_path->ps_path);
335
 
}
336
 
 
337
 
static xtBool ha_hash_comp_ci(void *key, void *data)
338
 
{
339
 
        XTSharePtr      share = (XTSharePtr) data;
340
 
 
341
 
        return strcasecmp((char *) key, share->sh_table_path->ps_path) == 0;
342
 
}
343
 
 
344
 
static xtHashValue ha_hash_ci(xtBool is_key, void *key_data)
345
 
{
346
 
        XTSharePtr      share = (XTSharePtr) key_data;
347
 
 
348
 
        if (is_key)
349
 
                return xt_ht_casehash((char *) key_data);
350
 
        return xt_ht_casehash(share->sh_table_path->ps_path);
351
 
}
352
 
 
353
 
static void ha_open_share(XTThreadPtr self, XTShareRec *share)
354
 
{
355
 
        xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
356
 
        pushr_(xt_unlock_mutex, share->sh_ex_mutex);
357
 
 
358
 
        if (!share->sh_table) {
359
 
                share->sh_table = xt_use_table(self, share->sh_table_path, FALSE, FALSE);
360
 
                share->sh_dic_key_count = share->sh_table->tab_dic.dic_key_count;
361
 
                share->sh_dic_keys = share->sh_table->tab_dic.dic_keys;
362
 
                share->sh_recalc_selectivity = FALSE;
363
 
        }
364
 
 
365
 
        freer_(); // xt_ht_unlock(pbxt_share_tables)
366
 
}
367
 
 
368
 
static void ha_close_share(XTThreadPtr self, XTShareRec *share)
369
 
{
370
 
        XTTableHPtr tab;
371
 
 
372
 
        if ((tab = share->sh_table)) {
373
 
                /* Save this, in case the share is re-opened. */
374
 
                share->sh_min_auto_inc = tab->tab_auto_inc;
375
 
 
376
 
                xt_heap_release(self, tab);
377
 
                share->sh_table = NULL;
378
 
        }
379
 
 
380
 
        /* This are only references: */
381
 
        share->sh_dic_key_count = 0;
382
 
        share->sh_dic_keys = NULL;
383
 
}
384
 
 
385
 
static void ha_cleanup_share(XTThreadPtr self, XTSharePtr share)
386
 
{
387
 
        ha_close_share(self, share);
388
 
 
389
 
        if (share->sh_table_path) {
390
 
                xt_free(self, share->sh_table_path);
391
 
                share->sh_table_path = NULL;
392
 
        }
393
 
 
394
 
        if (share->sh_ex_cond) {
395
 
                share->sh_lock.unlock();
396
 
                xt_delete_cond(self, (xt_cond_type *) share->sh_ex_cond);
397
 
                share->sh_ex_cond = NULL;
398
 
        }
399
 
 
400
 
        if (share->sh_ex_mutex) {
401
 
                xt_delete_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
402
 
                share->sh_ex_mutex = NULL;
403
 
        }
404
 
 
405
 
        xt_free(self, share);
406
 
}
407
 
 
408
 
static void ha_hash_free(XTThreadPtr self, void *data)
409
 
{
410
 
        XTSharePtr      share = (XTSharePtr) data;
411
 
 
412
 
        ha_cleanup_share(self, share);
413
 
}
414
 
 
415
 
/*
416
 
 * This structure contains information that is common to all handles.
417
 
 * (i.e. it is table specific).
418
 
 */
419
 
static XTSharePtr ha_get_share(XTThreadPtr self, const char *table_path, bool open_table)
420
 
{
421
 
        XTShareRec      *share;
422
 
 
423
 
        enter_();
424
 
        xt_ht_lock(self, pbxt_share_tables);
425
 
        pushr_(xt_ht_unlock, pbxt_share_tables);
426
 
 
427
 
        // Check if the table exists...
428
 
        if (!(share = (XTSharePtr) xt_ht_get(self, pbxt_share_tables, (void *) table_path))) {
429
 
                share = (XTSharePtr) xt_calloc(self, sizeof(XTShareRec));               
430
 
                pushr_(ha_cleanup_share, share);
431
 
 
432
 
                share->sh_ex_mutex = (xt_mutex_type *) xt_new_mutex(self);
433
 
                share->sh_ex_cond = (xt_cond_type *) xt_new_cond(self);
434
 
 
435
 
                thr_lock_init(&share->sh_lock);
436
 
 
437
 
                share->sh_use_count = 0;
438
 
                share->sh_table_path = (XTPathStrPtr) xt_dup_string(self, table_path);
439
 
 
440
 
                if (open_table)
441
 
                        ha_open_share(self, share);
442
 
 
443
 
                popr_(); // Discard ha_cleanup_share(share);
444
 
 
445
 
                xt_ht_put(self, pbxt_share_tables, share);
446
 
        }
447
 
 
448
 
        share->sh_use_count++;
449
 
        freer_(); // xt_ht_unlock(pbxt_share_tables)
450
 
 
451
 
        return_(share);
452
 
}
453
 
 
454
 
/*
455
 
 * Free shared information.
456
 
 */
457
 
static void ha_unget_share(XTThreadPtr self, XTSharePtr share)
458
 
{
459
 
        xt_ht_lock(self, pbxt_share_tables);
460
 
        pushr_(xt_ht_unlock, pbxt_share_tables);
461
 
 
462
 
        if (!--share->sh_use_count)
463
 
                xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
464
 
 
465
 
        freer_(); // xt_ht_unlock(pbxt_share_tables)
466
 
}
467
 
 
468
 
static xtBool ha_unget_share_removed(XTThreadPtr self, XTSharePtr share)
469
 
{
470
 
        xtBool removed = FALSE;
471
 
 
472
 
        xt_ht_lock(self, pbxt_share_tables);
473
 
        pushr_(xt_ht_unlock, pbxt_share_tables);
474
 
 
475
 
        if (!--share->sh_use_count) {
476
 
                removed = TRUE;
477
 
                xt_ht_del(self, pbxt_share_tables, share->sh_table_path);
478
 
        }
479
 
 
480
 
        freer_(); // xt_ht_unlock(pbxt_share_tables)
481
 
        return removed;
482
 
}
483
 
 
484
 
static inline void thd_init_xact(THD *thd, XTThreadPtr self, bool set_table_trans)
485
 
{
486
 
        self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
487
 
        self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
488
 
        self->st_auto_commit = (thd_test_options(thd,(OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
489
 
        if (set_table_trans) {
490
 
#ifdef DRIZZLED
491
 
                self->st_table_trans = FALSE;
492
 
#else
493
 
                self->st_table_trans = thd_sql_command(thd) == SQLCOM_LOCK_TABLES;
494
 
#endif
495
 
        }
496
 
        self->st_abort_trans = FALSE;
497
 
        self->st_stat_ended = FALSE;
498
 
        self->st_stat_trans = FALSE;
499
 
        self->st_non_temp_updated = FALSE;
500
 
        XT_PRINT0(self, "xt_xn_begin\n");
501
 
        xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
502
 
}
503
 
 
504
 
/*
505
 
 * -----------------------------------------------------------------------
506
 
 * PUBLIC FUNCTIONS
507
 
 *
508
 
 */
509
 
 
510
 
xtPublic void xt_ha_unlock_table(XTThreadPtr self, void *share)
511
 
{
512
 
        ha_release_exclusive_use(self, (XTSharePtr) share);
513
 
        ha_unget_share(self, (XTSharePtr) share);
514
 
}
515
 
 
516
 
xtPublic void xt_ha_close_global_database(XTThreadPtr self)
517
 
{
518
 
        if (pbxt_database) {
519
 
                xt_heap_release(self, pbxt_database);
520
 
                pbxt_database = NULL;
521
 
        }
522
 
}
523
 
 
524
 
/*
525
 
 * Open a PBXT database given the path of a table.
526
 
 * This function also returns the name of the table.
527
 
 *
528
 
 * We use the pbxt_database_mutex to lock this
529
 
 * operation to make sure it does not occur while
530
 
 * some other thread is doing a "closeall".
531
 
 */
532
 
xtPublic void xt_ha_open_database_of_table(XTThreadPtr self, XTPathStrPtr XT_UNUSED(table_path))
533
 
{
534
 
#ifdef XT_USE_GLOBAL_DB
535
 
        if (!self->st_database) {
536
 
                if (!pbxt_database) {
537
 
                        xt_open_database(self, mysql_real_data_home, TRUE);
538
 
                        /* {GLOBAL-DB}
539
 
                         * This can be done at the same time as the recovery thread,
540
 
                         * strictly speaking I need a lock.
541
 
                         */
542
 
                        if (!pbxt_database) {
543
 
                                pbxt_database = self->st_database;
544
 
                                xt_heap_reference(self, pbxt_database);
545
 
                        }
546
 
                }
547
 
                else
548
 
                        xt_use_database(self, pbxt_database, XT_FOR_USER);
549
 
        }
550
 
#else
551
 
        char db_path[PATH_MAX];
552
 
 
553
 
        xt_strcpy(PATH_MAX, db_path, (char *) table_path);
554
 
        xt_remove_last_name_of_path(db_path);
555
 
        xt_remove_dir_char(db_path);
556
 
 
557
 
        if (self->st_database && xt_tab_compare_paths(self->st_database->db_name, xt_last_name_of_path(db_path)) == 0)
558
 
                /* This thread already has this database open! */
559
 
                return;
560
 
 
561
 
        /* Auto commit before changing the database: */
562
 
        if (self->st_xact_data) {
563
 
                /* PMC - This probably indicates something strange is happening:
564
 
                 *
565
 
                 * This sequence generates this error:
566
 
                 *
567
 
                 * delimiter |
568
 
                 * 
569
 
                 * create temporary table t3 (id int)|
570
 
                 * 
571
 
                 * create function f10() returns int
572
 
                 * begin
573
 
                 *   drop temporary table if exists t3;
574
 
                 *   create temporary table t3 (id int) engine=myisam;
575
 
                 *   insert into t3 select id from t4;
576
 
                 *   return (select count(*) from t3);
577
 
                 * end|
578
 
                 * 
579
 
                 * select f10()|
580
 
                 *
581
 
                 * An error is generated because the same thread is used
582
 
                 * to open table t4 (at the start of the functions), and
583
 
                 * then to drop table t3. To drop t3 we need to
584
 
                 * switch the database, so we land up here!
585
 
                 */
586
 
                xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
587
 
                /*
588
 
                 if (!xt_xn_commit(self))
589
 
                        throw_();
590
 
                 */
591
 
        }
592
 
 
593
 
        xt_lock_mutex(self, &pbxt_database_mutex);
594
 
        pushr_(xt_unlock_mutex, &pbxt_database_mutex);
595
 
        xt_open_database(self, db_path, FALSE);
596
 
        freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
597
 
#endif
598
 
}
599
 
 
600
 
xtPublic XTThreadPtr xt_ha_set_current_thread(THD *thd, XTExceptionPtr e)
601
 
{
602
 
        XTThreadPtr     self;
603
 
        static int      ha_thread_count = 0, ha_id;
604
 
 
605
 
        if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
606
 
//              const                   Security_context *sctx;
607
 
                char                    name[120];
608
 
                char                    ha_id_str[50];
609
 
 
610
 
                ha_id = ++ha_thread_count;
611
 
                sprintf(ha_id_str, "_%d", ha_id);
612
 
                xt_strcpy(120,name,"user"); // TODO: Fix this hack
613
 
/*
614
 
                sctx = &thd->main_security_ctx;
615
 
 
616
 
                if (sctx->user) {
617
 
                        xt_strcpy(120, name, sctx->user);
618
 
                        xt_strcat(120, name, "@");
619
 
                }
620
 
                else
621
 
                        *name = 0;
622
 
                if (sctx->host)
623
 
                        xt_strcat(120, name, sctx->host);
624
 
                else if (sctx->ip)
625
 
                        xt_strcat(120, name, sctx->ip);
626
 
                else if (thd->proc_info)
627
 
                        xt_strcat(120, name, (char *) thd->proc_info);
628
 
                else
629
 
                        xt_strcat(120, name, "system");
630
 
*/
631
 
                xt_strcat(120, name, ha_id_str);
632
 
                if (!(self = xt_create_thread(name, FALSE, TRUE, e)))
633
 
                        return NULL;
634
 
 
635
 
                self->st_xact_mode = XT_XACT_REPEATABLE_READ;
636
 
                *thd->getEngineData(pbxt_hton) = (void *) self;
637
 
        }
638
 
        return self;
639
 
}
640
 
 
641
 
xtPublic void xt_ha_close_connection(THD* thd)
642
 
{
643
 
        XTThreadPtr             self;
644
 
 
645
 
        if (!(self = (XTThreadPtr) *thd->getEngineData(pbxt_hton))) {
646
 
        *thd->getEngineData(pbxt_hton) = NULL;
647
 
                xt_free_thread(self);
648
 
        }
649
 
}
650
 
 
651
 
xtPublic XTThreadPtr xt_ha_thd_to_self(THD *thd)
652
 
{
653
 
        return (XTThreadPtr) *thd->getEngineData(pbxt_hton);
654
 
}
655
 
 
656
 
/*
657
 
 * -----------------------------------------------------------------------
658
 
 * SUPPORT FUNCTIONS
659
 
 *
660
 
 */
661
 
 
662
 
/*
663
 
 * In PBXT, as in MySQL: thread == connection.
664
 
 *
665
 
 * So we simply attach a PBXT thread to a MySQL thread.
666
 
 */
667
 
static XTThreadPtr ha_set_current_thread(THD *thd, int *err)
668
 
{
669
 
        XTThreadPtr             self;
670
 
        XTExceptionRec  e;
671
 
 
672
 
        if (!(self = xt_ha_set_current_thread(thd, &e))) {
673
 
                xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
674
 
                *err = e.e_xt_err;
675
 
                return NULL;
676
 
        }
677
 
        return self;
678
 
}
679
 
 
680
 
xtPublic int xt_ha_pbxt_to_mysql_error(int xt_err)
681
 
{
682
 
        switch (xt_err) {
683
 
                case XT_NO_ERR:
684
 
                        return(0);
685
 
                case XT_ERR_DUPLICATE_KEY:
686
 
                                return HA_ERR_FOUND_DUPP_KEY;
687
 
                case XT_ERR_DEADLOCK:
688
 
                                return HA_ERR_LOCK_DEADLOCK;
689
 
                case XT_ERR_RECORD_CHANGED:
690
 
                        /* If we generate HA_ERR_RECORD_CHANGED instead of HA_ERR_LOCK_WAIT_TIMEOUT
691
 
                         * then sysbench does not work because it does not handle this error.
692
 
                         */
693
 
                        //return HA_ERR_LOCK_WAIT_TIMEOUT; // but HA_ERR_RECORD_CHANGED is the correct error for a optimistic lock failure.
694
 
                        return HA_ERR_RECORD_CHANGED;
695
 
                case XT_ERR_LOCK_TIMEOUT:
696
 
                        return HA_ERR_LOCK_WAIT_TIMEOUT;
697
 
                case XT_ERR_TABLE_IN_USE:
698
 
                                return HA_ERR_WRONG_COMMAND;
699
 
                case XT_ERR_TABLE_NOT_FOUND:
700
 
                        return HA_ERR_NO_SUCH_TABLE;
701
 
                case XT_ERR_TABLE_EXISTS:
702
 
                        return HA_ERR_TABLE_EXIST;
703
 
                case XT_ERR_CANNOT_CHANGE_DB:
704
 
                        return ER_TRG_IN_WRONG_SCHEMA;
705
 
                case XT_ERR_COLUMN_NOT_FOUND:
706
 
                        return HA_ERR_CANNOT_ADD_FOREIGN;
707
 
                case XT_ERR_NO_REFERENCED_ROW:
708
 
                case XT_ERR_REF_TABLE_NOT_FOUND:
709
 
                case XT_ERR_REF_TYPE_WRONG:
710
 
                        return HA_ERR_NO_REFERENCED_ROW;
711
 
                case XT_ERR_ROW_IS_REFERENCED:
712
 
                        return HA_ERR_ROW_IS_REFERENCED;
713
 
                case XT_ERR_COLUMN_IS_NOT_NULL:
714
 
                case XT_ERR_INCORRECT_NO_OF_COLS:
715
 
                case XT_ERR_FK_ON_TEMP_TABLE:
716
 
                case XT_ERR_FK_REF_TEMP_TABLE:
717
 
                        return HA_ERR_CANNOT_ADD_FOREIGN;
718
 
                case XT_ERR_DUPLICATE_FKEY:
719
 
                        return HA_ERR_FOREIGN_DUPLICATE_KEY;
720
 
                case XT_ERR_RECORD_DELETED:
721
 
                        return HA_ERR_RECORD_DELETED;
722
 
        }
723
 
        return(-1);                     // Unknown error
724
 
}
725
 
 
726
 
xtPublic int xt_ha_pbxt_thread_error_for_mysql(THD *thd, const XTThreadPtr self, int ignore_dup_key)
727
 
{
728
 
        int             xt_err = self->t_exception.e_xt_err;
729
 
        xtBool  dup_key = FALSE;
730
 
 
731
 
        XT_PRINT2(self, "xt_ha_pbxt_thread_error_for_mysql xt_err=%d auto commit=%d\n", (int) xt_err, (int) self->st_auto_commit);
732
 
        switch (xt_err) {
733
 
                case XT_NO_ERR:
734
 
                        break;
735
 
                case XT_ERR_DUPLICATE_KEY:
736
 
                case XT_ERR_DUPLICATE_FKEY:
737
 
                        /* Let MySQL call rollback as and when it wants to for duplicate
738
 
                         * key.
739
 
                         *
740
 
                         * In addition, we are not allowed to do an auto-rollback
741
 
                         * inside a sub-statement (function() or procedure())
742
 
                         * For example:
743
 
                         * 
744
 
                         * delimiter |
745
 
                         *
746
 
                         * create table t3 (c1 char(1) primary key not null)|
747
 
                         * 
748
 
                         * create function bug12379()
749
 
                         *   returns integer
750
 
                         * begin
751
 
                         *    insert into t3 values('X');
752
 
                         *    insert into t3 values('X');
753
 
                         *    return 0;
754
 
                         * end|
755
 
                         * 
756
 
                         * --error 1062
757
 
                         * select bug12379()|
758
 
                         *
759
 
                         *
760
 
                         * Not doing an auto-rollback should solve this problem in the
761
 
                         * case of duplicate key (but not in others - like deadlock)!
762
 
                         * I don't think this situation is handled correctly by MySQL.
763
 
                         */
764
 
 
765
 
                        /* If we are in auto-commit mode (and we are not ignoring
766
 
                         * duplicate keys) then rollback the transaction automatically.
767
 
                         */
768
 
                        dup_key = TRUE;
769
 
                        if (!ignore_dup_key && self->st_auto_commit)
770
 
                                goto abort_transaction;
771
 
                        break;
772
 
                case XT_ERR_DEADLOCK:
773
 
                case XT_ERR_NO_REFERENCED_ROW:
774
 
                case XT_ERR_ROW_IS_REFERENCED:
775
 
                        goto abort_transaction;
776
 
                case XT_ERR_RECORD_CHANGED:
777
 
                        /* MySQL also handles the locked error. NOTE: There is no automatic
778
 
                         * rollback!
779
 
                         */
780
 
                        break;
781
 
                default:
782
 
                        xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
783
 
                        abort_transaction:
784
 
                        /* PMC 2006-08-30: It should be that this is not necessary!
785
 
                         *
786
 
                         * It is only necessary to call ha_rollback() if the engine
787
 
                         * aborts the transaction.
788
 
                         *
789
 
                         * On the other hand, I shouldn't need to rollback the
790
 
                         * transaction because, if I return an error, MySQL
791
 
                         * should do it for me.
792
 
                         *
793
 
                         * Unfortunately, when auto-commit is off, MySQL does not
794
 
                         * rollback automatically (for example when a deadlock
795
 
                         * is provoked).
796
 
                         *
797
 
                         * And when we have a multi update we cannot rely on this
798
 
                         * either (see comment above).
799
 
                         */
800
 
                        if (self->st_xact_data) {
801
 
                                /*
802
 
                                 * GOTCHA:
803
 
                                 * A result of the "st_abort_trans = TRUE" below is that
804
 
                                 * the following code results in an empty set.
805
 
                                 * The reason is "ignore_dup_key" is not set so
806
 
                                 * the duplicate key leads to an error which causes
807
 
                                 * the transaction to be aborted.
808
 
                                 * The delayed inserts are all execute in one transaction.
809
 
                                 * 
810
 
                                 * CREATE TABLE t1 (
811
 
                                 * c1 INT(11) NOT NULL AUTO_INCREMENT,
812
 
                                 * c2 INT(11) DEFAULT NULL,
813
 
                                 * PRIMARY KEY (c1)
814
 
                                 * );
815
 
                                 * SET insert_id= 14;
816
 
                                 * INSERT DELAYED INTO t1 VALUES(NULL, 11), (NULL, 12);
817
 
                                 * INSERT DELAYED INTO t1 VALUES(14, 91);
818
 
                                 * INSERT DELAYED INTO t1 VALUES (NULL, 92), (NULL, 93);
819
 
                                 * FLUSH TABLE t1;
820
 
                                 * SELECT * FROM t1;
821
 
                                 */
822
 
                                if (self->st_lock_count == 0) {
823
 
                                        /* No table locks, must rollback immediately
824
 
                                         * (there will be no possibility later!
825
 
                                         */
826
 
                                        XT_PRINT1(self, "xt_xn_rollback xt_err=%d\n", xt_err);
827
 
                                        if (!xt_xn_rollback(self))
828
 
                                                xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
829
 
                                }
830
 
                                else {
831
 
                                        /* Locks are held on tables.
832
 
                                         * Only rollback after locks are released.
833
 
                                         */
834
 
                                        /* I do not think this is required, because
835
 
                                         * I tell mysql to rollback below, 
836
 
                                         * besides it is a hack!
837
 
                                         self->st_auto_commit = TRUE;
838
 
                                         */
839
 
                                        self->st_abort_trans = TRUE;
840
 
                                }
841
 
                                /* Only tell MySQL to rollback if we automatically rollback.
842
 
                                 * Note: calling this with (thd, FALSE), cause sp.test to fail.
843
 
                                 */
844
 
                                if (!dup_key) {
845
 
                                        if (thd)
846
 
                                                thd_mark_transaction_to_rollback(thd, TRUE);
847
 
                                }
848
 
                        }
849
 
                        break;
850
 
        }
851
 
        return xt_ha_pbxt_to_mysql_error(xt_err);
852
 
}
853
 
 
854
 
static void ha_conditional_close_database(XTThreadPtr self, XTThreadPtr other_thr, void *db)
855
 
{
856
 
        if (other_thr->st_database == (XTDatabaseHPtr) db)
857
 
                xt_unuse_database(self, other_thr);
858
 
}
859
 
 
860
 
/*
861
 
 * This is only called from drop database, so we know that
862
 
 * no thread is actually using the database. This means that it
863
 
 * must be safe to close the database.
864
 
 */
865
 
xtPublic void xt_ha_all_threads_close_database(XTThreadPtr self, XTDatabaseHPtr db)
866
 
{
867
 
        xt_lock_mutex(self, &pbxt_database_mutex);
868
 
        pushr_(xt_unlock_mutex, &pbxt_database_mutex);
869
 
        xt_do_to_all_threads(self, ha_conditional_close_database, db);
870
 
        freer_(); // xt_unlock_mutex(&pbxt_database_mutex);
871
 
}
872
 
 
873
 
static int ha_log_pbxt_thread_error_for_mysql(int ignore_dup_key)
874
 
{
875
 
        return xt_ha_pbxt_thread_error_for_mysql(current_thd, myxt_get_self(), ignore_dup_key);
876
 
}
877
 
 
878
 
/*
879
 
 * -----------------------------------------------------------------------
880
 
 * STATIC HOOKS
881
 
 *
882
 
 */
883
 
static xtWord8 ha_set_variable(char **value, HAVarParamsPtr vp)
884
 
{
885
 
        xtWord8 result;
886
 
        xtWord8 mi, ma;
887
 
        char    *mm;
888
 
 
889
 
        if (!*value)
890
 
                *value = getenv(vp->vp_var);
891
 
        if (!*value)
892
 
                *value = (char *) vp->vp_def;
893
 
        result = xt_byte_size_to_int8(*value);
894
 
        mi = (xtWord8) xt_byte_size_to_int8(vp->vp_min);
895
 
        if (result < mi) {
896
 
                result = mi;
897
 
                *value = (char *) vp->vp_min;
898
 
        }
899
 
        if (sizeof(size_t) == 8)
900
 
                mm = (char *) vp->vp_max8;
901
 
        else
902
 
                mm = (char *) vp->vp_max4;
903
 
        ma = (xtWord8) xt_byte_size_to_int8(mm);
904
 
        if (result > ma) {
905
 
                result = ma;
906
 
                *value = mm;
907
 
        }
908
 
        return result;
909
 
}
910
 
 
911
 
static void pbxt_call_init(XTThreadPtr self)
912
 
{
913
 
        xtInt8  index_cache_size;
914
 
        xtInt8  record_cache_size;
915
 
        xtInt8  log_cache_size;
916
 
        xtInt8  log_file_threshold;
917
 
        xtInt8  transaction_buffer_size;
918
 
        xtInt8  log_buffer_size;
919
 
        xtInt8  checkpoint_frequency;
920
 
        xtInt8  data_log_threshold;
921
 
        xtInt8  data_file_grow_size;
922
 
        xtInt8  row_file_grow_size;
923
 
        xtInt8  record_write_threshold;
924
 
 
925
 
        xt_logf(XT_NT_INFO, "PrimeBase XT (PBXT) Engine %s loaded...\n", xt_get_version());
926
 
        xt_logf(XT_NT_INFO, "Paul McCullagh, PrimeBase Technologies GmbH, http://www.primebase.org\n");
927
 
 
928
 
        index_cache_size = ha_set_variable(&pbxt_index_cache_size, &vp_index_cache_size);
929
 
        record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
930
 
        log_cache_size = ha_set_variable(&pbxt_log_cache_size, &vp_log_cache_size);
931
 
        log_file_threshold = ha_set_variable(&pbxt_log_file_threshold, &vp_log_file_threshold);
932
 
        transaction_buffer_size = ha_set_variable(&pbxt_transaction_buffer_size, &vp_transaction_buffer_size);
933
 
        log_buffer_size = ha_set_variable(&pbxt_log_buffer_size, &vp_log_buffer_size);
934
 
        checkpoint_frequency = ha_set_variable(&pbxt_checkpoint_frequency, &vp_checkpoint_frequency);
935
 
        data_log_threshold = ha_set_variable(&pbxt_data_log_threshold, &vp_data_log_threshold);
936
 
        data_file_grow_size = ha_set_variable(&pbxt_data_file_grow_size, &vp_data_file_grow_size);
937
 
        row_file_grow_size = ha_set_variable(&pbxt_row_file_grow_size, &vp_row_file_grow_size);
938
 
        record_write_threshold = ha_set_variable(&pbxt_record_write_threshold, &vp_record_write_threshold);
939
 
 
940
 
        xt_db_log_file_threshold = (xtLogOffset) log_file_threshold;
941
 
        xt_db_log_buffer_size = (size_t) xt_align_offset(log_buffer_size, 512);
942
 
        xt_db_transaction_buffer_size = (size_t) xt_align_offset(transaction_buffer_size, 512);
943
 
        xt_db_checkpoint_frequency = (size_t) checkpoint_frequency;
944
 
        xt_db_data_log_threshold = (off_t) data_log_threshold;
945
 
        xt_db_data_file_grow_size = (size_t) data_file_grow_size;
946
 
        xt_db_row_file_grow_size = (size_t) row_file_grow_size;
947
 
        xt_db_record_write_threshold = (size_t) record_write_threshold;
948
 
 
949
 
#ifdef DRIZZLED
950
 
        pbxt_ignore_case = TRUE;
951
 
#else
952
 
        pbxt_ignore_case = lower_case_table_names != 0;
953
 
#endif
954
 
        if (pbxt_ignore_case)
955
 
                pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp_ci, ha_hash_ci, ha_hash_free, TRUE, FALSE);
956
 
        else
957
 
                pbxt_share_tables = xt_new_hashtable(self, ha_hash_comp, ha_hash, ha_hash_free, TRUE, FALSE);
958
 
 
959
 
        xt_fs_init(self);
960
 
        xt_lock_installation(self, mysql_real_data_home);
961
 
        XTSystemTableShare::startUp(self);
962
 
        xt_init_databases(self);
963
 
        xt_ind_init(self, (size_t) index_cache_size);
964
 
        xt_tc_init(self, (size_t) record_cache_size);
965
 
        xt_xlog_init(self, (size_t) log_cache_size);
966
 
}
967
 
 
968
 
static void pbxt_call_exit(XTThreadPtr self)
969
 
{
970
 
        xt_logf(XT_NT_INFO, "PrimeBase XT Engine shutdown...\n");
971
 
 
972
 
#ifdef TRACE_STATEMENTS
973
 
        xt_dump_trace();
974
 
#endif
975
 
#ifdef XT_USE_GLOBAL_DB
976
 
        xt_ha_close_global_database(self);
977
 
#endif
978
 
#ifdef DEBUG
979
 
        //xt_stop_database_threads(self, FALSE);
980
 
        xt_stop_database_threads(self, TRUE);
981
 
#else
982
 
        xt_stop_database_threads(self, TRUE);
983
 
#endif
984
 
        /* This will tell the freeer to quit ASAP: */
985
 
        xt_quit_freeer(self);
986
 
        /* We conditional stop the freeer here, because if we are
987
 
         * in startup, then the free will be hanging.
988
 
         * {FREEER-HANG}
989
 
         *
990
 
         * This problem has been solved by MySQL!
991
 
         */
992
 
        xt_stop_freeer(self);
993
 
        xt_exit_databases(self);
994
 
        XTSystemTableShare::shutDown(self);
995
 
        xt_xlog_exit(self);
996
 
        xt_tc_exit(self);
997
 
        xt_ind_exit(self);
998
 
        xt_unlock_installation(self, mysql_real_data_home);
999
 
        xt_fs_exit(self);
1000
 
        if (pbxt_share_tables) {
1001
 
                xt_free_hashtable(self, pbxt_share_tables);
1002
 
                pbxt_share_tables = NULL;
1003
 
        }
1004
 
}
1005
 
 
1006
 
/*
1007
 
 * Shutdown the PBXT sub-system.
1008
 
 */
1009
 
static void ha_exit(XTThreadPtr self)
1010
 
{
1011
 
        xt_xres_terminate_recovery(self);
1012
 
 
1013
 
        /* Wrap things up... */
1014
 
        xt_unuse_database(self, self);  /* Just in case the main thread has a database in use (for testing)? */
1015
 
        /* This may cause the streaming engine to cleanup connections and 
1016
 
         * tables belonging to this engine. This in turn may require some of
1017
 
         * the stuff below (like xt_create_thread() called from pbxt_close_table()! */
1018
 
#ifdef PBMS_ENABLED
1019
 
        pbms_finalize();
1020
 
#endif
1021
 
        pbxt_call_exit(self);
1022
 
        xt_exit_threading(self);
1023
 
        xt_exit_memory();
1024
 
        xt_exit_logging();
1025
 
        xt_p_mutex_destroy(&pbxt_database_mutex);               
1026
 
        pbxt_inited = false;
1027
 
}
1028
 
 
1029
 
/*
1030
 
 * Outout the PBXT status. Return FALSE on error.
1031
 
 */
1032
 
#ifdef DRIZZLED
1033
 
bool PBXTStorageEngine::show_status(Session *thd, stat_print_fn *stat_print, enum ha_stat_type)
1034
 
#else
1035
 
static bool pbxt_show_status(handlerton *XT_UNUSED(hton), THD* thd, 
1036
 
                          stat_print_fn* stat_print,
1037
 
                          enum ha_stat_type XT_UNUSED(stat_type))
1038
 
#endif
1039
 
{
1040
 
        XTThreadPtr                     self;   
1041
 
        int                                     err = 0;
1042
 
        XTStringBufferRec       strbuf = { 0, 0, 0 };
1043
 
        bool                            not_ok = FALSE;
1044
 
 
1045
 
        if (!(self = ha_set_current_thread(thd, &err)))
1046
 
                return FALSE;
1047
 
 
1048
 
#ifdef XT_SHOW_DUMPS_TRACE
1049
 
        //if (pbxt_database)
1050
 
        //      xt_dump_xlogs(pbxt_database, 0);
1051
 
        xt_trace("// %s - dump\n", xt_trace_clock_diff(NULL));
1052
 
        xt_dump_trace();
1053
 
#endif
1054
 
#ifdef XT_TRACK_CONNECTIONS
1055
 
        xt_dump_conn_tracking();
1056
 
#endif
1057
 
 
1058
 
#ifdef XT_UNIT_TEST
1059
 
        xt_unit_test_async_task(self);
1060
 
#endif
1061
 
 
1062
 
        try_(a) {
1063
 
                myxt_get_status(self, &strbuf);
1064
 
        }
1065
 
        catch_(a) {
1066
 
                not_ok = TRUE;
1067
 
        }
1068
 
        cont_(a);
1069
 
 
1070
 
        if (!not_ok) {
1071
 
                if (stat_print(thd, "PBXT", 4, "", 0, strbuf.sb_cstring, (uint) strbuf.sb_len))
1072
 
                        not_ok = TRUE;
1073
 
        }
1074
 
        xt_sb_set_size(self, &strbuf, 0);
1075
 
 
1076
 
        return not_ok;
1077
 
}
1078
 
 
1079
 
/*
1080
 
 * Initialize the PBXT sub-system.
1081
 
 *
1082
 
 * return 1 on error, else 0.
1083
 
 */
1084
 
#ifdef DRIZZLED
1085
 
static int pbxt_init(module::Context &registry)
1086
 
#else
1087
 
static int pbxt_init(void *p)
1088
 
#endif
1089
 
{
1090
 
        int init_err = 0;
1091
 
 
1092
 
        XT_PRINT0(NULL, "pbxt_init\n");
1093
 
 
1094
 
        if (sizeof(xtWordPS) != sizeof(void *)) {
1095
 
                printf("PBXT: This won't work, I require that sizeof(xtWordPS) == sizeof(void *)!\n");
1096
 
                XT_RETURN(1);
1097
 
        }
1098
 
 
1099
 
        /* GOTCHA: This will "detect" if are loading the plug-in
1100
 
         * with different --with-debug option to MySQL.
1101
 
         *
1102
 
         * In this case, you will get an error when loading the
1103
 
         * library that some symbol was not found.
1104
 
         */
1105
 
        void *dummy = my_malloc(100, MYF(0));
1106
 
        my_free((byte *) dummy, MYF(0));
1107
 
 
1108
 
        if (!pbxt_inited) {
1109
 
                XTThreadPtr self = NULL;
1110
 
 
1111
 
                xt_p_mutex_init_with_autoname(&pbxt_database_mutex, NULL);
1112
 
 
1113
 
#ifdef DRIZZLED
1114
 
                pbxt_hton= new PBXTStorageEngine(std::string("PBXT"));
1115
 
                registry.add(pbxt_hton);
1116
 
#else
1117
 
                pbxt_hton = (handlerton *) p;
1118
 
                pbxt_hton->state = SHOW_OPTION_YES;
1119
 
                pbxt_hton->db_type = DB_TYPE_PBXT; // Wow! I have my own!
1120
 
                pbxt_hton->close_connection = pbxt_close_connection; /* close_connection, cleanup thread related data. */
1121
 
                pbxt_hton->commit = pbxt_commit; /* commit */
1122
 
                pbxt_hton->rollback = pbxt_rollback; /* rollback */
1123
 
                if (pbxt_support_xa) {
1124
 
                        pbxt_hton->prepare = pbxt_prepare;
1125
 
                        pbxt_hton->recover = pbxt_recover;
1126
 
                        pbxt_hton->commit_by_xid = pbxt_commit_by_xid;
1127
 
                        pbxt_hton->rollback_by_xid = pbxt_rollback_by_xid;
1128
 
                }
1129
 
                else {
1130
 
                        pbxt_hton->prepare = NULL;
1131
 
                        pbxt_hton->recover = NULL;
1132
 
                        pbxt_hton->commit_by_xid = NULL;
1133
 
                        pbxt_hton->rollback_by_xid = NULL;
1134
 
                }
1135
 
                pbxt_hton->create = pbxt_create_handler; /* Create a new handler */
1136
 
                pbxt_hton->drop_database = pbxt_drop_database; /* Drop a database */
1137
 
                pbxt_hton->panic = pbxt_panic; /* Panic call */
1138
 
                pbxt_hton->show_status = pbxt_show_status;
1139
 
                pbxt_hton->flags = HTON_NO_FLAGS; /* HTON_CAN_RECREATE - Without this flags TRUNCATE uses delete_all_rows() */
1140
 
                pbxt_hton->slot = (uint)-1; /* assign invald value, so we know when it's inited later */
1141
 
                pbxt_hton->start_consistent_snapshot = pbxt_start_consistent_snapshot;
1142
 
#if defined(MYSQL_SUPPORTS_BACKUP) && defined(XT_ENABLE_ONLINE_BACKUP)
1143
 
                pbxt_hton->get_backup_engine = pbxt_backup_engine;
1144
 
#endif
1145
 
#endif
1146
 
                if (!xt_init_logging())                                 /* Initialize logging */
1147
 
                        goto error_1;
1148
 
 
1149
 
#ifdef PBMS_ENABLED
1150
 
                PBMSResultRec result;
1151
 
                if (!pbms_initialize("PBXT", false, &result)) {
1152
 
                        xt_logf(XT_NT_ERROR, "pbms_initialize() Error: %s", result.mr_message);
1153
 
                        goto error_2;
1154
 
                }
1155
 
#endif
1156
 
 
1157
 
                if (!xt_init_memory())                                  /* Initialize memory */
1158
 
                        goto error_3;
1159
 
 
1160
 
                self = xt_init_threading();                             /* Create the main self: */
1161
 
                if (!self)
1162
 
                        goto error_3;
1163
 
 
1164
 
                pbxt_inited = true;
1165
 
 
1166
 
                try_(a) {
1167
 
                        /* Initialize all systems */
1168
 
                        pbxt_call_init(self);
1169
 
 
1170
 
                        /* Conditional unit test: */
1171
 
#ifdef XT_UNIT_TEST
1172
 
                        //xt_unit_test_create_threads(self);
1173
 
                        //xt_unit_test_read_write_locks(self);
1174
 
                        //xt_unit_test_mutex_locks(self);
1175
 
#endif
1176
 
 
1177
 
                        /* {OPEN-DB-SWEEPER-WAIT}
1178
 
                         * I have to start the freeer before I open and recover the database
1179
 
                         * because it we run out of cache while waiting for the sweeper
1180
 
                         * we will hang!
1181
 
                         */
1182
 
                        xt_start_freeer(self);
1183
 
 
1184
 
                        /* This function is called with LOCK_plugin locked.
1185
 
                         * This prevents the opening of .frm files, which
1186
 
                         * is required for recovery.
1187
 
                         * Our solution is to start reovery in a thread
1188
 
                         * so that it can run after LOCK_plugin is released.
1189
 
                         */
1190
 
                        xt_xres_start_database_recovery(self);
1191
 
                }
1192
 
                catch_(a) {
1193
 
                        xt_log_exception(self, &self->t_exception, XT_LOG_DEFAULT);
1194
 
                        init_err = 1;
1195
 
                }
1196
 
                cont_(a);
1197
 
 
1198
 
                if (init_err) {
1199
 
                        /* {FREEER-HANG} The free-er will be hung in:
1200
 
                                #0      0x91fc6a2e in semaphore_wait_signal_trap
1201
 
                                #1      0x91fce505 in pthread_mutex_lock
1202
 
                                #2      0x00489633 in safe_mutex_lock at thr_mutex.c:149
1203
 
                                #3      0x002dfca9 in plugin_thdvar_init at sql_plugin.cc:2398
1204
 
                                #4      0x000d6a12 in THD::init at sql_class.cc:715
1205
 
                                #5      0x000de9d3 in THD::THD at sql_class.cc:597
1206
 
                                #6      0x000debe1 in THD::THD at sql_class.cc:631
1207
 
                                #7      0x00e207a4 in myxt_create_thread at myxt_xt.cc:2666
1208
 
                                #8      0x00e3134b in tabc_fr_run_thread at tabcache_xt.cc:982
1209
 
                                #9      0x00e422ca in xt_thread_main at thread_xt.cc:1006
1210
 
                                #10     0x91ff7c55 in _pthread_start
1211
 
                                #11     0x91ff7b12 in thread_start
1212
 
                         *
1213
 
                         * so it is not good trying to stop it here!
1214
 
                         *
1215
 
                         * With regard to this problem, see {OPEN-DB-SWEEPER-WAIT}
1216
 
                         * Due to this problem, I will probably have to hack
1217
 
                         * the mutex so that the freeer can get started...
1218
 
                         *
1219
 
                         * NOPE! problem has gone in 6.0.9. Also not a problem in
1220
 
                         * 5.1.29.
1221
 
                         */
1222
 
                        
1223
 
                        /* {OPEN-DB-SWEEPER-WAIT} 
1224
 
                         * I have to stop the freeer here because it was
1225
 
                         * started before opening the database.
1226
 
                         */
1227
 
 
1228
 
                        /* {FREEER-HANG-ON-INIT-ERROR}
1229
 
                         * pbxt_init is called with LOCK_plugin and if it fails and tries to exit
1230
 
                         * the freeer here it hangs because the freeer calls THD::~THD which tries
1231
 
                         * to aquire the same lock and hangs. OTOH MySQL calls pbxt_end() after
1232
 
                         * an unsuccessful call to pbxt_init, so we defer cleaup, except 
1233
 
                         * releasing 'self'
1234
 
                         */
1235
 
                        xt_free_thread(self);
1236
 
                        goto error_3;
1237
 
                }
1238
 
                xt_free_thread(self);
1239
 
        }
1240
 
        XT_RETURN(init_err);
1241
 
 
1242
 
        error_3:
1243
 
#ifdef PBMS_ENABLED
1244
 
        pbms_finalize();
1245
 
 
1246
 
        error_2:
1247
 
#endif
1248
 
 
1249
 
        error_1:
1250
 
        XT_RETURN(1);
1251
 
}
1252
 
 
1253
 
void PBXTStorageEngine::shutdownPlugin()
1254
 
{
1255
 
        XTThreadPtr self;
1256
 
 
1257
 
        XT_TRACE_CALL();
1258
 
 
1259
 
        if (pbxt_inited) {
1260
 
                XTExceptionRec  e;
1261
 
 
1262
 
                /* This flag also means "shutting down". */
1263
 
                pbxt_inited = false; 
1264
 
                self = xt_create_thread("TempForEnd", FALSE, TRUE, &e);
1265
 
                if (self) {
1266
 
                        self->t_main = TRUE;
1267
 
                        ha_exit(self);
1268
 
                }
1269
 
        }
1270
 
}
1271
 
 
1272
 
PBXTStorageEngine::~PBXTStorageEngine()
1273
 
{
1274
 
        /* We do nothing here, because it is now all done in shutdownPlugin(). */
1275
 
}
1276
 
 
1277
 
/*
1278
 
 * The following query from the DBT1 test is VERY slow
1279
 
 * if we do not set HA_READ_ORDER.
1280
 
 * The reason is that it must scan all duplicates, then
1281
 
 * sort.
1282
 
 *
1283
 
 * SELECT o_id, o_carrier_id, o_entry_d, o_ol_cnt
1284
 
 * FROM orders FORCE INDEX (o_w_id)
1285
 
 * WHERE o_w_id = 2
1286
 
   * AND o_d_id = 1
1287
 
   * AND o_c_id = 500
1288
 
 * ORDER BY o_id DESC limit 1;
1289
 
 *
1290
 
 */
1291
 
//#define FLAGS_ARE_READ_DYNAMICALLY
1292
 
 
1293
 
uint32_t PBXTStorageEngine::index_flags(enum  ha_key_alg) const
1294
 
{
1295
 
        /* It would be nice if the dynamic version of this function works,
1296
 
         * but it does not. MySQL loads this information when the table is openned,
1297
 
         * and then it is fixed.
1298
 
         *
1299
 
         * The problem is, I have had to remove the HA_READ_ORDER option although
1300
 
         * it applies to PBXT. PBXT returns entries in index order during an index
1301
 
         * scan in _almost_ all cases.
1302
 
         *
1303
 
         * A number of cases are demostrated here: [(11)]
1304
 
         *
1305
 
         * If involves the following conditions:
1306
 
         * - a SELECT FOR UPDATE, UPDATE or DELETE statement
1307
 
         * - an ORDER BY, or join that requires the sort order
1308
 
         * - another transaction which updates the index while it is being
1309
 
         *   scanned.
1310
 
         *
1311
 
         * In this "obscure" case, the index scan may return index
1312
 
         * entries in the wrong order.
1313
 
         */
1314
 
#ifdef FLAGS_ARE_READ_DYNAMICALLY
1315
 
        /* If were are in an update (SELECT FOR UPDATE, UPDATE or DELETE), then
1316
 
         * it may be that we return the rows from an index in the wrong
1317
 
         * order! This is due to the fact that update reads wait for transactions
1318
 
         * to commit and this means that index entries may change position during
1319
 
         * the scan!
1320
 
         */
1321
 
        if (pb_open_tab && pb_open_tab->ot_for_update)
1322
 
                return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
1323
 
        /* If I understand HA_KEYREAD_ONLY then this means I do not
1324
 
         * need to fetch the record associated with an index
1325
 
         * key.
1326
 
         */
1327
 
        return (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_READ_RANGE | HA_KEYREAD_ONLY);
1328
 
#else
1329
 
        return (HA_READ_NEXT | HA_READ_PREV | HA_READ_RANGE | HA_KEYREAD_ONLY);
1330
 
#endif
1331
 
}
1332
 
 
1333
 
/*
1334
 
 * Kill the PBXT thread associated with the MySQL thread.
1335
 
 */
1336
 
int PBXTStorageEngine::close_connection(Session *thd)
1337
 
{
1338
 
        PBXTStorageEngine * const hton = this;
1339
 
        XTThreadPtr             self;
1340
 
 
1341
 
        XT_TRACE_CALL();
1342
 
        if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1343
 
                *thd->getEngineData(pbxt_hton) = NULL;
1344
 
                /* Required because freeing the thread could cause
1345
 
                 * free of database which could call xt_close_file_ns()!
1346
 
                 */
1347
 
                xt_set_self(self);
1348
 
                xt_free_thread(self);
1349
 
        }
1350
 
        return 0;
1351
 
}
1352
 
 
1353
 
/*
1354
 
 * Currently does nothing because it was all done
1355
 
 * when the last PBXT table was removed from the 
1356
 
 * database.
1357
 
 */
1358
 
void PBXTStorageEngine::drop_database(char *)
1359
 
{
1360
 
        XT_TRACE_CALL();
1361
 
}
1362
 
 
1363
 
/*
1364
 
 * NOTES ON TRANSACTIONS:
1365
 
 *
1366
 
 * 1. If self->st_lock_count == 0 and transaction can be ended immediately.
1367
 
 *    If not, we must wait until the last lock is released on the last handler
1368
 
 *    to ensure that the tables are flushed before the transaction is
1369
 
 *    committed or aborted.
1370
 
 *
1371
 
 * 2. all (below) indicates, within a BEGIN/END (i.e. auto_commit off) whether
1372
 
 *    the statement or the entire transation is being terminated.
1373
 
 *    We currently ignore statement termination.
1374
 
 * 
1375
 
 * 3. If in BEGIN/END we must call ha_rollback() if we abort the transaction
1376
 
 *    internally.
1377
 
 *
1378
 
 * NOTE ON CONSISTENT SNAPSHOTS:
1379
 
 * 
1380
 
 * PBXT itself doesn't need this functiona as its transaction mechanism provides
1381
 
 * consistent snapshots for all transactions by default. This function is needed
1382
 
 * only for multi-engine cases like this:
1383
 
 *
1384
 
 * CREATE TABLE t1 ... ENGINE=INNODB
1385
 
 * CREATE TABLE t2 ... ENGINE=PBXT
1386
 
 * START TRANSACTION WITH CONSISTENT SNAPSHOT
1387
 
 * SELECT * FROM t1 <-- at this point we need to know about the snapshot
1388
 
 */
1389
 
 
1390
 
#ifndef DRIZZLED
1391
 
static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd)
1392
 
{
1393
 
        int err          = 0;
1394
 
        XTThreadPtr self = ha_set_current_thread(thd, &err);
1395
 
 
1396
 
        if (!self->st_database && pbxt_database) {
1397
 
                xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1398
 
        }
1399
 
 
1400
 
        thd_init_xact(thd, self, true);
1401
 
 
1402
 
        if (xt_xn_begin(self)) {
1403
 
                trans_register_ha(thd, TRUE, hton);     
1404
 
        } else {
1405
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1406
 
        }
1407
 
 
1408
 
        /*
1409
 
         * As of MySQL 5.1.41 the return value is not checked, so the server might assume 
1410
 
         * everything is fine even it isn't. InnoDB returns 0 on success.
1411
 
         */
1412
 
        return err;
1413
 
}
1414
 
#endif
1415
 
 
1416
 
/*
1417
 
 * Commit the PBXT transaction of the given thread.
1418
 
 * thd is the MySQL thread structure.
1419
 
 * pbxt_thr is a pointer the the PBXT thread structure.
1420
 
 *
1421
 
 */
1422
 
int PBXTStorageEngine::commit(Session *thd, bool all)
1423
 
{
1424
 
        PBXTStorageEngine * const hton = this;
1425
 
        int                     err = 0;
1426
 
        XTThreadPtr     self;
1427
 
 
1428
 
        if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1429
 
                XT_PRINT2(self, "%s pbxt_commit all=%d\n", all ? "END CONN XACT" : "END STAT", all);
1430
 
 
1431
 
                if (self->st_xact_data) {
1432
 
                        /* There are no table locks, commit immediately in all cases
1433
 
                         * except when this is a statement commit with an explicit
1434
 
                         * transaction (!all && !self->st_auto_commit).
1435
 
                         */
1436
 
                        if (all || self->st_auto_commit) {
1437
 
                                XT_PRINT0(self, "xt_xn_commit in pbxt_commit\n");
1438
 
 
1439
 
                                if (!xt_xn_commit(self))
1440
 
                                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1441
 
                        }
1442
 
                }
1443
 
                if (!all)
1444
 
                        self->st_stat_trans = FALSE;
1445
 
        }
1446
 
        return err;
1447
 
}
1448
 
 
1449
 
int PBXTStorageEngine::rollback(Session *thd, bool all)
1450
 
{
1451
 
        PBXTStorageEngine * const hton = this;
1452
 
        int                     err = 0;
1453
 
        XTThreadPtr     self;
1454
 
 
1455
 
        if ((self = (XTThreadPtr) *thd->getEngineData(hton))) {
1456
 
                XT_PRINT2(self, "%s pbxt_rollback all=%d\n", all ? "CONN END XACT" : "STAT END", all);
1457
 
 
1458
 
                if (self->st_xact_data) {
1459
 
                        /* There are no table locks, rollback immediately in all cases
1460
 
                         * except when this is a statement commit with an explicit
1461
 
                         * transaction (!all && !self->st_auto_commit).
1462
 
                         *
1463
 
                         * Note, the only reason for a rollback of a operation is
1464
 
                         * due to an error. In this case PBXT has already
1465
 
                         * undone the effects of the operation.
1466
 
                         *
1467
 
                         * However, this is not the same as statement rollback
1468
 
                         * which can involve a number of operations.
1469
 
                         *
1470
 
                         * TODO: Implement statement rollback.
1471
 
                         */
1472
 
                        if (all || self->st_auto_commit) {
1473
 
                                XT_PRINT0(self, "xt_xn_rollback\n");
1474
 
                                if (!xt_xn_rollback(self))
1475
 
                                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1476
 
                        }
1477
 
                }
1478
 
                if (!all)
1479
 
                        self->st_stat_trans = FALSE;
1480
 
        }
1481
 
        return 0;
1482
 
}
1483
 
 
1484
 
Cursor *PBXTStorageEngine::create(Table& table)
1485
 
{
1486
 
        return new ha_pbxt(*this, table);
1487
 
}
1488
 
 
1489
 
/*
1490
 
 * -----------------------------------------------------------------------
1491
 
 * 2-PHASE COMMIT
1492
 
 *
1493
 
 */
1494
 
 
1495
 
#ifndef DRIZZLED
1496
 
 
1497
 
static int pbxt_prepare(handlerton *hton, THD *thd, bool all)
1498
 
{
1499
 
        int                     err = 0;
1500
 
        XTThreadPtr     self;
1501
 
 
1502
 
        XT_TRACE_CALL();
1503
 
        if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
1504
 
                XT_PRINT1(self, "pbxt_commit all=%d\n", all);
1505
 
 
1506
 
                if (self->st_xact_data) {
1507
 
                        /* There are no table locks, commit immediately in all cases
1508
 
                         * except when this is a statement commit with an explicit
1509
 
                         * transaction (!all && !self->st_auto_commit).
1510
 
                         */
1511
 
                        if (all || self->st_auto_commit) {
1512
 
                                XID xid;
1513
 
 
1514
 
                                XT_PRINT0(self, "xt_xn_prepare in pbxt_prepare\n");
1515
 
                                thd_get_xid(thd, (MYSQL_XID*) &xid);
1516
 
 
1517
 
                                if (!xt_xn_prepare(xid.length(), (xtWord1 *) &xid, self))
1518
 
                                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1519
 
                        }
1520
 
                }
1521
 
        }
1522
 
        return err;
1523
 
}
1524
 
 
1525
 
static XTThreadPtr ha_temp_open_global_database(handlerton *hton, THD **ret_thd, int *temp_thread, const char *thread_name, int *err)
1526
 
{
1527
 
        THD                     *thd;
1528
 
        XTThreadPtr     self = NULL;
1529
 
 
1530
 
        *temp_thread = 0;
1531
 
        if ((thd = current_thd))
1532
 
                self = (XTThreadPtr) *thd_ha_data(thd, hton);
1533
 
        else {
1534
 
                //thd = (THD *) myxt_create_thread();
1535
 
                //*temp_thread |= 2;
1536
 
        }
1537
 
 
1538
 
        if (!self) {
1539
 
                XTExceptionRec e;
1540
 
 
1541
 
                if (!(self = xt_create_thread(thread_name, FALSE, TRUE, &e))) {
1542
 
                        *err = xt_ha_pbxt_to_mysql_error(e.e_xt_err);
1543
 
                        xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1544
 
                        return NULL;
1545
 
                }
1546
 
                *temp_thread |= 1;
1547
 
        }
1548
 
 
1549
 
        xt_xres_wait_for_recovery(self, XT_RECOVER_DONE);
1550
 
 
1551
 
        try_(a) {
1552
 
                xt_open_database(self, mysql_real_data_home, TRUE);
1553
 
        }
1554
 
        catch_(a) {
1555
 
                *err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1556
 
                if ((*temp_thread & 1))
1557
 
                        xt_free_thread(self);
1558
 
                if (*temp_thread & 2)
1559
 
                        myxt_destroy_thread(thd, FALSE);
1560
 
                self = NULL;
1561
 
        }
1562
 
        cont_(a);
1563
 
 
1564
 
        *ret_thd = thd;
1565
 
        return self;
1566
 
}
1567
 
 
1568
 
static void ha_temp_close_database(XTThreadPtr self, THD *thd, int temp_thread)
1569
 
{
1570
 
        xt_unuse_database(self, self);
1571
 
        if (temp_thread & 1)
1572
 
                xt_free_thread(self);
1573
 
        if (temp_thread & 2)
1574
 
                myxt_destroy_thread(thd, TRUE);
1575
 
}
1576
 
 
1577
 
/* Return all prepared transactions, found during recovery.
1578
 
 * This function returns a count. If len is returned, the
1579
 
 * function will be called again.
1580
 
 */
1581
 
static int pbxt_recover(handlerton *hton, XID *xid_list, uint len)
1582
 
{
1583
 
        xtBool                          temp_thread;
1584
 
        XTThreadPtr                     self;
1585
 
        XTDatabaseHPtr          db;
1586
 
        uint                            count = 0;
1587
 
        XTXactPreparePtr        xap;
1588
 
        int                                     err;
1589
 
        THD                                     *thd;
1590
 
 
1591
 
        if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRecover", &err)))
1592
 
                return 0;
1593
 
 
1594
 
        db = self->st_database;
1595
 
 
1596
 
        for (count=0; count<len; count++) {
1597
 
                xap = xt_xn_enum_xa_data(db, &pbxt_xa_enum);
1598
 
                if (!xap)
1599
 
                        break;
1600
 
                memcpy(&xid_list[count], xap->xp_xa_data, xap->xp_data_len);
1601
 
        }
1602
 
 
1603
 
        ha_temp_close_database(self, thd, temp_thread);
1604
 
        return (int) count;
1605
 
}
1606
 
 
1607
 
static int pbxt_commit_by_xid(handlerton *hton, XID *xid)
1608
 
{
1609
 
        xtBool                          temp_thread;
1610
 
        XTThreadPtr                     self;
1611
 
        XTDatabaseHPtr          db;
1612
 
        int                                     err = 0;
1613
 
        XTXactPreparePtr        xap;
1614
 
        THD                                     *thd;
1615
 
 
1616
 
        XT_TRACE_CALL();
1617
 
 
1618
 
        if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForCommitXA", &err)))
1619
 
                return err;
1620
 
        db = self->st_database;
1621
 
 
1622
 
        if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1623
 
                if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1624
 
                        self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED;  // Prepared transactions cannot be swept!
1625
 
                        if (!xt_xn_commit(self))
1626
 
                                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1627
 
                }
1628
 
                xt_xn_delete_xa_data(db, xap, TRUE, self);
1629
 
        }
1630
 
 
1631
 
        ha_temp_close_database(self, thd, temp_thread);
1632
 
        return 0;
1633
 
}
1634
 
 
1635
 
static int pbxt_rollback_by_xid(handlerton *hton, XID *xid)
1636
 
{
1637
 
        int                                     temp_thread;
1638
 
        XTThreadPtr                     self;
1639
 
        XTDatabaseHPtr          db;
1640
 
        int                                     err = 0;
1641
 
        XTXactPreparePtr        xap;
1642
 
        THD                                     *thd;
1643
 
 
1644
 
        XT_TRACE_CALL();
1645
 
 
1646
 
        if (!(self = ha_temp_open_global_database(hton, &thd, &temp_thread, "TempForRollbackXA", &err)))
1647
 
                return err;
1648
 
        db = self->st_database;
1649
 
 
1650
 
        if ((xap = xt_xn_find_xa_data(db, xid->length(), (xtWord1 *) xid, TRUE, self))) {
1651
 
                if ((self->st_xact_data = xt_xn_get_xact(db, xap->xp_xact_id, self))) {
1652
 
                        self->st_xact_data->xd_flags &= ~XT_XN_XAC_PREPARED;  // Prepared transactions cannot be swept!
1653
 
                        if (!xt_xn_rollback(self))
1654
 
                                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1655
 
                }
1656
 
                xt_xn_delete_xa_data(db, xap, TRUE, self);
1657
 
        }
1658
 
 
1659
 
        ha_temp_close_database(self, thd, temp_thread);
1660
 
        return 0;
1661
 
}
1662
 
 
1663
 
#endif
1664
 
 
1665
 
/*
1666
 
 * -----------------------------------------------------------------------
1667
 
 * HANDLER LOCKING FUNCTIONS
1668
 
 *
1669
 
 * These functions are used get a lock on all handles of a particular table.
1670
 
 *
1671
 
 */
1672
 
 
1673
 
static void ha_add_to_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1674
 
{
1675
 
        xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1676
 
        pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1677
 
 
1678
 
        handler->pb_ex_next = share->sh_handlers;
1679
 
        handler->pb_ex_prev = NULL;
1680
 
        if (share->sh_handlers)
1681
 
                share->sh_handlers->pb_ex_prev = handler;
1682
 
        share->sh_handlers = handler;
1683
 
 
1684
 
        freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1685
 
}
1686
 
 
1687
 
static void ha_remove_from_handler_list(XTThreadPtr self, XTSharePtr share, ha_pbxt *handler)
1688
 
{
1689
 
        xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1690
 
        pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1691
 
 
1692
 
        /* Move front pointer: */
1693
 
        if (share->sh_handlers == handler)
1694
 
                share->sh_handlers = handler->pb_ex_next;
1695
 
 
1696
 
        /* Remove from list: */
1697
 
        if (handler->pb_ex_prev)
1698
 
                handler->pb_ex_prev->pb_ex_next = handler->pb_ex_next;
1699
 
        if (handler->pb_ex_next)
1700
 
                handler->pb_ex_next->pb_ex_prev = handler->pb_ex_prev;
1701
 
 
1702
 
        freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1703
 
}
1704
 
 
1705
 
/*
1706
 
 * Aquire exclusive use of a table, by waiting for all
1707
 
 * threads to complete use of all handlers of the table.
1708
 
 * At the same time we hold up all threads
1709
 
 * that want to use handlers belonging to the table.
1710
 
 *
1711
 
 * But we do not hold up threads that close the handlers.
1712
 
 */
1713
 
static void ha_aquire_exclusive_use(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1714
 
{
1715
 
        ha_pbxt *handler;
1716
 
        time_t  end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1717
 
 
1718
 
        XT_PRINT1(self, "ha_aquire_exclusive_use (%s) PBXT X lock\n", share->sh_table_path->ps_path);
1719
 
        /* GOTCHA: It is possible to hang here, if you hold
1720
 
         * onto the sh_ex_mutex lock, before we really
1721
 
         * have the exclusive lock (i.e. before all
1722
 
         * handlers are no longer in use.
1723
 
         * The reason is, because reopen() is not possible
1724
 
         * when some other thread holds sh_ex_mutex.
1725
 
         * So this can prevent a thread from completing its
1726
 
         * use of a handler, when prevents exclusive use
1727
 
         * here.
1728
 
         */
1729
 
        xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1730
 
        pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1731
 
 
1732
 
        /* Wait until we can get an exclusive lock: */
1733
 
        while (share->sh_table_lock) {
1734
 
                xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1735
 
                if (time(NULL) > end_time) {
1736
 
                        freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1737
 
                        xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1738
 
                }
1739
 
        }
1740
 
 
1741
 
        /* This tells readers (and other exclusive lockers) that someone has an exclusive lock. */
1742
 
        share->sh_table_lock = TRUE;
1743
 
        
1744
 
        /* Wait for all open handlers use count to go to 0 */   
1745
 
        retry:
1746
 
        handler = share->sh_handlers;
1747
 
        while (handler) {
1748
 
                if (handler == mine || !handler->pb_ex_in_use)
1749
 
                        handler = handler->pb_ex_next;
1750
 
                else {
1751
 
                        /* Wait a bit, and try again: */
1752
 
                        xt_timed_wait_cond(self, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT);
1753
 
                        if (time(NULL) > end_time) {
1754
 
                                freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1755
 
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1756
 
                        }
1757
 
                        /* Handler may have been freed, check from the begining again: */
1758
 
                        goto retry;
1759
 
                }
1760
 
        }
1761
 
 
1762
 
        freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1763
 
}
1764
 
 
1765
 
/*
1766
 
 * If you have exclusively locked the table, you can close all handler
1767
 
 * open tables.
1768
 
 *
1769
 
 * Call ha_close_open_tables() to get an exclusive lock.
1770
 
 */
1771
 
static void ha_close_open_tables(XTThreadPtr self, XTSharePtr share, ha_pbxt *mine)
1772
 
{
1773
 
        ha_pbxt *handler;
1774
 
 
1775
 
        xt_lock_mutex(self, (xt_mutex_type *) share->sh_ex_mutex);
1776
 
        pushr_(xt_unlock_mutex, share->sh_ex_mutex);
1777
 
 
1778
 
        /* Now that we know no handler is in use, we can close all the
1779
 
         * open tables...
1780
 
         */
1781
 
        handler = share->sh_handlers;
1782
 
        while (handler) {
1783
 
                if (handler != mine && handler->pb_open_tab) {
1784
 
                        xt_db_return_table_to_pool_ns(handler->pb_open_tab);
1785
 
                        handler->pb_open_tab = NULL;
1786
 
                }
1787
 
                handler = handler->pb_ex_next;
1788
 
        }
1789
 
 
1790
 
        freer_(); // xt_unlock_mutex(share->sh_ex_mutex)
1791
 
}
1792
 
 
1793
 
#ifdef PBXT_ALLOW_PRINTING
1794
 
static void ha_release_exclusive_use(XTThreadPtr self, XTSharePtr share)
1795
 
#else
1796
 
static void ha_release_exclusive_use(XTThreadPtr XT_UNUSED(self), XTSharePtr share)
1797
 
#endif
1798
 
{
1799
 
        XT_PRINT1(self, "ha_release_exclusive_use (%s) PBXT X UNLOCK\n", share->sh_table_path->ps_path);
1800
 
        xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1801
 
        share->sh_table_lock = FALSE;
1802
 
        xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1803
 
        xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1804
 
}
1805
 
 
1806
 
static xtBool ha_wait_for_shared_use(ha_pbxt *mine, XTSharePtr share)
1807
 
{
1808
 
        time_t  end_time = time(NULL) + XT_SHARE_LOCK_TIMEOUT / 1000;
1809
 
 
1810
 
        XT_PRINT1(xt_get_self(), "ha_wait_for_shared_use (%s) share lock wait...\n", share->sh_table_path->ps_path);
1811
 
        mine->pb_ex_in_use = 0;
1812
 
        xt_lock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1813
 
        while (share->sh_table_lock) {
1814
 
                /* Wake up the exclusive locker (may be waiting). He can try to continue: */
1815
 
                xt_broadcast_cond_ns((xt_cond_type *) share->sh_ex_cond);
1816
 
 
1817
 
                if (!xt_timed_wait_cond(NULL, (xt_cond_type *) share->sh_ex_cond, (xt_mutex_type *) share->sh_ex_mutex, XT_SHARE_LOCK_WAIT)) {
1818
 
                        xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1819
 
                        return FAILED;
1820
 
                }
1821
 
 
1822
 
                if (time(NULL) > end_time) {
1823
 
                        xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1824
 
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_LOCK_TIMEOUT, share->sh_table_path);
1825
 
                        return FAILED;
1826
 
                }
1827
 
        }
1828
 
        mine->pb_ex_in_use = 1;
1829
 
        xt_unlock_mutex_ns((xt_mutex_type *) share->sh_ex_mutex);
1830
 
        return OK;
1831
 
}
1832
 
 
1833
 
xtPublic int ha_pbxt::reopen()
1834
 
{
1835
 
        THD                             *thd = current_thd;
1836
 
        int                             err = 0;
1837
 
        XTThreadPtr             self;   
1838
 
 
1839
 
        if (!(self = ha_set_current_thread(thd, &err)))
1840
 
                return xt_ha_pbxt_to_mysql_error(err);
1841
 
 
1842
 
        try_(a) {
1843
 
                xt_ha_open_database_of_table(self, pb_share->sh_table_path);
1844
 
 
1845
 
                ha_open_share(self, pb_share);
1846
 
 
1847
 
                if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
1848
 
                        xt_throw(self);
1849
 
                pb_open_tab->ot_thread = self;
1850
 
 
1851
 
                /* {TABLE-STATS}
1852
 
                 * We no longer use the information that a table
1853
 
                 * was opened in order to know when to calculate
1854
 
                 * statistics.
1855
 
                 */
1856
 
                if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
1857
 
#ifdef LOAD_TABLE_ON_OPEN
1858
 
                        xt_tab_load_table(self, pb_open_tab);
1859
 
#else
1860
 
                        xt_tab_load_row_pointers(self, pb_open_tab);
1861
 
#endif
1862
 
                        xt_ind_set_index_selectivity(pb_open_tab, self);
1863
 
                        /* If the number of rows is less than 150 we will recalculate the
1864
 
                         * selectity of the indices, as soon as the number of rows
1865
 
                         * exceeds 200 (see [**])
1866
 
                         */
1867
 
#ifdef XT_ROW_COUNT_CORRECTED
1868
 
                        /* {CORRECTED-ROW-COUNT} */
1869
 
                        pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150;
1870
 
#else
1871
 
                        /* {FREE-ROWS-BAD} */
1872
 
                        pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
1873
 
#endif
1874
 
                }
1875
 
 
1876
 
                /* I am not doing this anymore because it was only required
1877
 
                 * for DELETE FROM table;, which is now implemented
1878
 
                 * by deleting each row.
1879
 
                 * TRUNCATE TABLE does not preserve the counter value.
1880
 
                 */
1881
 
                //init_auto_increment(pb_share->sh_min_auto_inc);
1882
 
                init_auto_increment(0);
1883
 
        }
1884
 
        catch_(a) {
1885
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
1886
 
        }
1887
 
        cont_(a);
1888
 
        
1889
 
        return err;
1890
 
}
1891
 
 
1892
 
/*
1893
 
 * -----------------------------------------------------------------------
1894
 
 * INFORMATION SCHEMA FUNCTIONS
1895
 
 *
1896
 
 */
1897
 
#ifdef DRI_IS
1898
 
static int pbxt_statistics_fill_table(THD *thd, TABLE_LIST *tables, COND *cond)
1899
 
{
1900
 
        XTThreadPtr             self = NULL;    
1901
 
        int                             err = 0;
1902
 
 
1903
 
        if (!pbxt_hton) {
1904
 
                /* Can't do if PBXT is not loaded! */
1905
 
                XTExceptionRec  e;
1906
 
 
1907
 
                xt_exception_xterr(&e, XT_CONTEXT, XT_ERR_PBXT_NOT_INSTALLED);
1908
 
                xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
1909
 
                /* Just return an empty set: */
1910
 
                return 0;
1911
 
        }
1912
 
 
1913
 
        if (!(self = ha_set_current_thread(thd, &err)))
1914
 
                return xt_ha_pbxt_to_mysql_error(err);
1915
 
 
1916
 
 
1917
 
        try_(a) {
1918
 
                /* If the thread has no open database, and the global
1919
 
                 * database is already open, then open
1920
 
                 * the database. Otherwise the statement will be
1921
 
                 * executed without an open database, which means
1922
 
                 * that the related statistics will be missing.
1923
 
                 *
1924
 
                 * This includes all background threads.
1925
 
                 */
1926
 
                if (!self->st_database && pbxt_database) {
1927
 
                        xt_ha_open_database_of_table(self, (XTPathStrPtr) NULL);
1928
 
                }
1929
 
 
1930
 
                err = myxt_statistics_fill_table(self, thd, tables, cond, system_charset_info);
1931
 
        }
1932
 
        catch_(a) {
1933
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
1934
 
        }
1935
 
        cont_(a);
1936
 
        return err;
1937
 
}
1938
 
#endif // DRI_IS
1939
 
 
1940
 
#ifdef DRIZZLED
1941
 
#ifdef DRI_IS
1942
 
ColumnInfo pbxt_statistics_fields_info[]=
1943
 
{
1944
 
        ColumnInfo("ID", 4, MYSQL_TYPE_LONG,  0, 0, "The ID of the statistic", SKIP_OPEN_TABLE),
1945
 
        ColumnInfo("Name", 40, MYSQL_TYPE_STRING, 0, 0, "The name of the statistic", SKIP_OPEN_TABLE),
1946
 
        ColumnInfo("Value", 8, MYSQL_TYPE_LONGLONG, 0, 0, "The accumulated value", SKIP_OPEN_TABLE),
1947
 
        ColumnInfo()
1948
 
};
1949
 
 
1950
 
class PBXTStatisticsMethods : public InfoSchemaMethods
1951
 
{
1952
 
public:
1953
 
  int fillTable(Session *session, TableList *tables, COND *cond)
1954
 
  {
1955
 
        return pbxt_statistics_fill_table(session, tables, cond);
1956
 
  }
1957
 
};
1958
 
#endif // DRI_IS
1959
 
#else
1960
 
ST_FIELD_INFO pbxt_statistics_fields_info[]=
1961
 
{
1962
 
        { "ID",         4,      MYSQL_TYPE_LONG,                0, 0, "The ID of the statistic", SKIP_OPEN_TABLE},
1963
 
        { "Name",       40, MYSQL_TYPE_STRING,          0, 0, "The name of the statistic", SKIP_OPEN_TABLE},
1964
 
        { "Value",      8,      MYSQL_TYPE_LONGLONG,    0, 0, "The accumulated value", SKIP_OPEN_TABLE},
1965
 
        { 0,            0,      MYSQL_TYPE_STRING,              0, 0, 0, SKIP_OPEN_TABLE}
1966
 
};
1967
 
#endif
1968
 
 
1969
 
#ifdef DRIZZLED
1970
 
#ifdef DRI_IS
1971
 
static InfoSchemaTable  *pbxt_statistics_table;
1972
 
static PBXTStatisticsMethods pbxt_statistics_methods;
1973
 
static int pbxt_init_statistics(Registry &registry)
1974
 
{
1975
 
        //pbxt_statistics_table = (InfoSchemaTable *)xt_calloc_ns(sizeof(InfoSchemaTable));
1976
 
        //pbxt_statistics_table->table_name= "PBXT_STATISTICS";
1977
 
        pbxt_statistics_table = new InfoSchemaTable("PBXT_STATISTICS");
1978
 
        pbxt_statistics_table->setColumnInfo(pbxt_statistics_fields_info);
1979
 
        pbxt_statistics_table->setInfoSchemaMethods(&pbxt_statistics_methods);
1980
 
        registry.add(pbxt_statistics_table);
1981
 
        return 0;
1982
 
}
1983
 
#endif // DRI_IS
1984
 
#else  // DRIZZLED
1985
 
static int pbxt_init_statistics(void *p)
1986
 
{
1987
 
        ST_SCHEMA_TABLE *pbxt_statistics_table = (ST_SCHEMA_TABLE *) p;
1988
 
        pbxt_statistics_table->fields_info = pbxt_statistics_fields_info;
1989
 
        pbxt_statistics_table->fill_table = pbxt_statistics_fill_table;
1990
 
 
1991
 
#if defined(XT_WIN) && defined(XT_COREDUMP)
1992
 
        void register_crash_filter();
1993
 
 
1994
 
        if (pbxt_crash_debug)
1995
 
                register_crash_filter();
1996
 
#endif
1997
 
        return 0;
1998
 
}
1999
 
#endif
2000
 
 
2001
 
#ifdef DRIZZLED
2002
 
#ifdef DRI_IS
2003
 
static int pbxt_exit_statistics(Registry &registry)
2004
 
        registry.remove(pbxt_statistics_table);
2005
 
        delete pbxt_statistics_table;
2006
 
        return(0);
2007
 
}
2008
 
#endif // DRI_IS
2009
 
#else  // DRIZZLED
2010
 
static int pbxt_exit_statistics(void *XT_UNUSED(p))
2011
 
{
2012
 
        return(0);
2013
 
}
2014
 
#endif  // DRIZZLED
2015
 
 
2016
 
/*
2017
 
 * -----------------------------------------------------------------------
2018
 
 * DYNAMIC HOOKS
2019
 
 *
2020
 
 */
2021
 
 
2022
 
ha_pbxt::ha_pbxt(plugin::StorageEngine &engine_arg, Table &table_arg) : Cursor(engine_arg, table_arg)
2023
 
{
2024
 
        pb_share = NULL;
2025
 
        pb_open_tab = NULL;
2026
 
        pb_key_read = FALSE;
2027
 
        pb_ignore_dup_key = 0;
2028
 
        pb_lock_table = FALSE;
2029
 
        pb_table_locked = 0;
2030
 
        pb_ex_next = NULL;
2031
 
        pb_ex_prev = NULL;
2032
 
        pb_ex_in_use = 0;
2033
 
        pb_in_stat = FALSE;
2034
 
}
2035
 
 
2036
 
/*
2037
 
 * If frm_error() is called then we will use this to to find out what file extentions
2038
 
 * exist for the storage engine. This is also used by the default rename_table and
2039
 
 * delete_table method in handler.cc.
2040
 
 */
2041
 
#ifdef DRIZZLED
2042
 
const char **PBXTStorageEngine::bas_ext() const
2043
 
#else
2044
 
const char **ha_pbxt::bas_ext() const
2045
 
#endif
2046
 
{
2047
 
        return pbxt_extensions;
2048
 
}
2049
 
 
2050
 
/*
2051
 
 * Specify the caching type: HA_CACHE_TBL_NONTRANSACT, HA_CACHE_TBL_NOCACHE
2052
 
 * HA_CACHE_TBL_ASKTRANSACT, HA_CACHE_TBL_TRANSACT
2053
 
 */
2054
 
MX_UINT8_T ha_pbxt::table_cache_type()
2055
 
{
2056
 
        return HA_CACHE_TBL_TRANSACT; /* Use transactional query cache */
2057
 
}
2058
 
 
2059
 
#ifndef DRIZZLED
2060
 
MX_TABLE_TYPES_T ha_pbxt::table_flags() const
2061
 
{
2062
 
        return (
2063
 
                /* We need this flag because records are not packed
2064
 
                 * into a table which means #ROWID != offset
2065
 
                 */
2066
 
                HA_REC_NOT_IN_SEQ |
2067
 
                /* Since PBXT caches read records itself, I believe
2068
 
                 * this to be the case.
2069
 
                 */
2070
 
                HA_FAST_KEY_READ |
2071
 
                /*
2072
 
                 * I am assuming a "key" means a unique index.
2073
 
                 * Of course a primary key does not allow nulls.
2074
 
                 */
2075
 
                HA_NULL_IN_KEY |
2076
 
                /*
2077
 
                 * This is necessary because a MySQL blob can be
2078
 
                 * fairly small.
2079
 
                 */
2080
 
                HA_CAN_INDEX_BLOBS |
2081
 
                /*
2082
 
                 * Due to transactional influences, this will be
2083
 
                 * the case.
2084
 
                 * Although the count is good enough for practical
2085
 
                 * purposes!
2086
 
                HA_NOT_EXACT_COUNT |
2087
 
                 */
2088
 
                /*
2089
 
                 * This basically means we have a file with the name of
2090
 
                 * database table (which we do).
2091
 
                 */
2092
 
                HA_FILE_BASED |
2093
 
                /*
2094
 
                 * Not sure what this does (but MyISAM and InnoDB have it)?!
2095
 
                 * Could it mean that we support the handler functions.
2096
 
                 */
2097
 
                HA_CAN_SQL_HANDLER |
2098
 
                /*
2099
 
                 * This is not true, we cannot insert delayed, but a
2100
 
                 * really cannot see what's wrong with inserting normally
2101
 
                 * when asked to insert delayed!
2102
 
                 * And the functionallity is required to pass the alter_table
2103
 
                 * test.
2104
 
                 *
2105
 
                 * Disabled because of MySQL bug #40505
2106
 
                 */
2107
 
                /*HA_CAN_INSERT_DELAYED |*/
2108
 
#if MYSQL_VERSION_ID > 50119
2109
 
                /* We can do row logging, but not statement, because
2110
 
                 * MVCC is not serializable!
2111
 
                 */
2112
 
                HA_BINLOG_ROW_CAPABLE |
2113
 
#endif
2114
 
                /*
2115
 
                 * Auto-increment is allowed on a partial key.
2116
 
                 */
2117
 
                HA_AUTO_PART_KEY);
2118
 
}
2119
 
#endif
2120
 
 
2121
 
void ha_pbxt::internal_close(THD *thd, struct XTThread *self)
2122
 
{
2123
 
        if (pb_share) {
2124
 
                xtBool                  removed;
2125
 
                XTOpenTablePtr  ot;
2126
 
 
2127
 
                try_(a) {
2128
 
                        /* This lock must be held when we remove the handler's
2129
 
                         * open table because ha_close_open_tables() can run
2130
 
                         * concurrently.
2131
 
                         */
2132
 
                        xt_lock_mutex_ns(pb_share->sh_ex_mutex);
2133
 
                        if ((ot = pb_open_tab)) {
2134
 
                                pb_open_tab->ot_thread = self;
2135
 
                                if (self->st_database != pb_open_tab->ot_table->tab_db)
2136
 
                                        xt_ha_open_database_of_table(self, pb_share->sh_table_path);
2137
 
                                pb_open_tab = NULL;
2138
 
                                pushr_(xt_db_return_table_to_pool, ot);
2139
 
                        }
2140
 
                        xt_unlock_mutex_ns(pb_share->sh_ex_mutex);
2141
 
 
2142
 
                        ha_remove_from_handler_list(self, pb_share, this);
2143
 
 
2144
 
                        /* Someone may be waiting for me to complete: */
2145
 
                        xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2146
 
 
2147
 
                        removed = ha_unget_share_removed(self, pb_share);
2148
 
 
2149
 
                        if (ot) {
2150
 
                                /* Flush the table if this was the last handler: */
2151
 
                                /* This is not necessary but has the affect that
2152
 
                                 * FLUSH TABLES; does a checkpoint!
2153
 
                                 */
2154
 
                                if (removed) {
2155
 
                                        /* GOTCHA:
2156
 
                                         * This was killing performance as the number of threads increased!
2157
 
                                         *
2158
 
                                         * When MySQL runs out of table handlers because the table
2159
 
                                         * handler cache is too small, it starts to close handlers.
2160
 
                                         * (open_cache.records > table_cache_size)
2161
 
                                         *
2162
 
                                         * Which can lead to closing all handlers for a particular table.
2163
 
                                         *
2164
 
                                         * It does this while holding lock_OPEN!
2165
 
                                         * So this code below leads to a sync operation while lock_OPEN
2166
 
                                         * is held. The result is that the whole server comes to a stop.
2167
 
                                         */
2168
 
                                        if (!thd || thd_sql_command(thd) == SQLCOM_FLUSH) // FLUSH TABLES
2169
 
                                                xt_sync_flush_table(self, ot, thd ? 0 : 4);
2170
 
                                        else {
2171
 
                                                /* This change is a result of a problem mentioned by Arjen.
2172
 
                                                 * REPAIR and ALTER lead to the following sequence:
2173
 
                                                 * 1. tab  -- copy --> tmp1
2174
 
                                                 * 2. tab  -- rename --> tmp2
2175
 
                                                 * 3. tmp1 -- rename --> tab
2176
 
                                                 * 4. delete tmp2
2177
 
                                                 *
2178
 
                                                 * PBXT flushes a table before rename.
2179
 
                                                 * In the sequence above results in a table flush in step 3 which can
2180
 
                                                 * take a very long time.
2181
 
                                                 *
2182
 
                                                 * The problem is, during this time frame we have only temp tables.
2183
 
                                                 * A crash in this state leaves the database in a bad state.
2184
 
                                                 *
2185
 
                                                 * To reduce the time in this state, the flush needs to be done
2186
 
                                                 * elsewhere. The code below causes the flish to occur after
2187
 
                                                 * step 1:
2188
 
                                                 */ 
2189
 
                                                switch (thd_sql_command(thd)) {
2190
 
                                                        case SQLCOM_RENAME_TABLE:
2191
 
                                                        case SQLCOM_ANALYZE:
2192
 
                                                        case SQLCOM_ALTER_TABLE:
2193
 
                                                        case SQLCOM_CREATE_INDEX:
2194
 
                                                                xt_sync_flush_table(self, ot, thd ? 0 : 4);
2195
 
                                                                break;
2196
 
                                                }
2197
 
                                        }
2198
 
                                }
2199
 
                                freer_(); // xt_db_return_table_to_pool(ot);
2200
 
                        }
2201
 
                }
2202
 
                catch_(a) {
2203
 
                        xt_log_and_clear_exception(self);
2204
 
                }
2205
 
                cont_(a);
2206
 
 
2207
 
                pb_share = NULL;
2208
 
        }
2209
 
}
2210
 
 
2211
 
/*
2212
 
 * Used for opening tables. The name will be the name of the file.
2213
 
 * A table is opened when it needs to be opened. For instance
2214
 
 * when a request comes in for a select on the table (tables are not
2215
 
 * open and closed for each request, they are cached).
2216
 
 
2217
 
 * Called from handler.cc by handler::ha_open(). The server opens all tables by
2218
 
 * calling ha_open() which then calls the handler specific open().
2219
 
 */
2220
 
int ha_pbxt::open(const char *table_path, int XT_UNUSED(mode), uint XT_UNUSED(test_if_locked))
2221
 
{
2222
 
        THD                     *thd = current_thd;
2223
 
        int                     err = 0;
2224
 
        XTThreadPtr     self;
2225
 
 
2226
 
        ref_length = XT_RECORD_OFFS_SIZE;
2227
 
 
2228
 
        if (!(self = ha_set_current_thread(thd, &err)))
2229
 
                return xt_ha_pbxt_to_mysql_error(err);
2230
 
 
2231
 
        XT_PRINT1(self, "open (%s)\n", table_path);
2232
 
 
2233
 
        pb_ex_in_use = 1;
2234
 
        try_(a) {
2235
 
                xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
2236
 
 
2237
 
                pb_share = ha_get_share(self, table_path, false);
2238
 
                ha_add_to_handler_list(self, pb_share, this);
2239
 
                if (pb_share->sh_table_lock) {
2240
 
                        if (!ha_wait_for_shared_use(this, pb_share))
2241
 
                                xt_throw(self);
2242
 
                }
2243
 
 
2244
 
                ha_open_share(self, pb_share);
2245
 
 
2246
 
                pb_lock.init(&pb_share->sh_lock);
2247
 
                if (!(pb_open_tab = xt_db_open_table_using_tab(pb_share->sh_table, self)))
2248
 
                        xt_throw(self);
2249
 
                pb_open_tab->ot_thread = self;
2250
 
 
2251
 
                /* {TABLE-STATS} */
2252
 
                if (!pb_open_tab->ot_table->tab_ind_stat_calc_time) {
2253
 
#ifdef LOAD_TABLE_ON_OPEN
2254
 
                        xt_tab_load_table(self, pb_open_tab);
2255
 
#else
2256
 
                        xt_tab_load_row_pointers(self, pb_open_tab);
2257
 
#endif
2258
 
 
2259
 
                        xt_ind_set_index_selectivity(pb_open_tab, self);
2260
 
#ifdef XT_ROW_COUNT_CORRECTED
2261
 
                        /* {CORRECTED-ROW-COUNT} */
2262
 
                        pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150;
2263
 
#else
2264
 
                        /* {FREE-ROWS-BAD} */
2265
 
                        pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
2266
 
#endif
2267
 
                }
2268
 
 
2269
 
                init_auto_increment(0);
2270
 
        }
2271
 
        catch_(a) {
2272
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2273
 
                internal_close(thd, self);
2274
 
        }
2275
 
        cont_(a);
2276
 
 
2277
 
        if (!err)
2278
 
                info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2279
 
 
2280
 
        pb_ex_in_use = 0;
2281
 
        if (pb_share) {
2282
 
                /* Someone may be waiting for me to complete: */
2283
 
                if (pb_share->sh_table_lock)
2284
 
                        xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
2285
 
        }
2286
 
        return err;
2287
 
}
2288
 
 
2289
 
 
2290
 
/*
2291
 
        Closes a table. We call the free_share() function to free any resources
2292
 
        that we have allocated in the "shared" structure.
2293
 
 
2294
 
        Called from sql_base.cc, sql_select.cc, and table.cc.
2295
 
        In sql_select.cc it is only used to close up temporary tables or during
2296
 
        the process where a temporary table is converted over to being a
2297
 
        myisam table.
2298
 
        For sql_base.cc look at close_data_tables().
2299
 
*/
2300
 
int ha_pbxt::close(void)
2301
 
{
2302
 
        THD                                             *thd = current_thd;
2303
 
        volatile int                    err = 0;
2304
 
        volatile XTThreadPtr    self;
2305
 
 
2306
 
        if (thd)
2307
 
                self = ha_set_current_thread(thd, (int *) &err);
2308
 
        else {
2309
 
                XTExceptionRec e;
2310
 
 
2311
 
                if (!(self = xt_create_thread("TempForClose", FALSE, TRUE, &e))) {
2312
 
                        xt_log_exception(NULL, &e, XT_LOG_DEFAULT);
2313
 
                        return 0;
2314
 
                }
2315
 
        }
2316
 
 
2317
 
        XT_PRINT1(self, "close (%s)\n", pb_share && pb_share->sh_table_path->ps_path ? pb_share->sh_table_path->ps_path : "unknown");
2318
 
 
2319
 
        if (self) {
2320
 
                try_(a) {
2321
 
                        internal_close(thd, self);
2322
 
                }
2323
 
                catch_(a) {
2324
 
                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
2325
 
                }
2326
 
                cont_(a);
2327
 
 
2328
 
                if (!thd)
2329
 
                        xt_free_thread(self);
2330
 
        }
2331
 
        else
2332
 
                xt_log(XT_NS_CONTEXT, XT_LOG_WARNING, "Unable to release table reference\n");
2333
 
                
2334
 
        return err;
2335
 
}
2336
 
 
2337
 
void ha_pbxt::init_auto_increment(xtWord8 min_auto_inc)
2338
 
{
2339
 
        XTTableHPtr     tab;
2340
 
        xtWord8         nr = 0;
2341
 
        int                     err;
2342
 
 
2343
 
        /* Get the value of the auto-increment value by
2344
 
         * loading the highest value from the index...
2345
 
         */
2346
 
        tab = pb_open_tab->ot_table;
2347
 
 
2348
 
        /* Cannot do this if the index version is bad! */
2349
 
        if (tab->tab_dic.dic_disable_index)
2350
 
                return;
2351
 
 
2352
 
        xt_spinlock_lock(&tab->tab_ainc_lock);
2353
 
        if (getTable()->found_next_number_field && !tab->tab_auto_inc) {
2354
 
                Field           *tmp_fie = getTable()->next_number_field;
2355
 
                THD                     *tmp_thd = getTable()->in_use;
2356
 
                xtBool          xn_started = FALSE;
2357
 
                XTThreadPtr     self = pb_open_tab->ot_thread;
2358
 
 
2359
 
                /*
2360
 
                 * A table may be opened by a thread with a running
2361
 
                 * transaction!
2362
 
                 * Since get_auto_increment() does not do an update,
2363
 
                 * it should be OK to use the transaction we already
2364
 
                 * have to get the next auto-increment value.
2365
 
                 */
2366
 
                if (!self->st_xact_data) {
2367
 
                        self->st_xact_mode = XT_XACT_REPEATABLE_READ;
2368
 
                        self->st_ignore_fkeys = FALSE;
2369
 
                        self->st_auto_commit = TRUE;
2370
 
                        self->st_table_trans = FALSE;
2371
 
                        self->st_abort_trans = FALSE;
2372
 
                        self->st_stat_ended = FALSE;
2373
 
                        self->st_stat_trans = FALSE;
2374
 
                        self->st_is_update = NULL;
2375
 
                        if (!xt_xn_begin(self)) {
2376
 
                                xt_spinlock_unlock(&tab->tab_ainc_lock);
2377
 
                                xt_throw(self);
2378
 
                        }
2379
 
                        xn_started = TRUE;
2380
 
                }
2381
 
 
2382
 
                /* Setup the conditions for the next call! */
2383
 
                getTable()->in_use = current_thd;
2384
 
                getTable()->next_number_field = getTable()->found_next_number_field;
2385
 
 
2386
 
                extra(HA_EXTRA_KEYREAD);
2387
 
                getTable()->mark_columns_used_by_index_no_reset(getTable()->getShare()->next_number_index, *getTable()->read_set);
2388
 
                column_bitmaps_signal();
2389
 
                doStartIndexScan(getTable()->getShare()->next_number_index, 0);
2390
 
                if (!getTable()->getShare()->next_number_key_offset) {
2391
 
                        // Autoincrement at key-start
2392
 
                        err = index_last(getTable()->getUpdateRecord());
2393
 
                        if (!err && !getTable()->next_number_field->is_null(getTable()->getShare()->rec_buff_length)) {
2394
 
                                /* {PRE-INC} */
2395
 
                                nr = (xtWord8) getTable()->next_number_field->val_int_offset(getTable()->getShare()->rec_buff_length);
2396
 
                        }
2397
 
                }
2398
 
                else {
2399
 
                        /* Do an index scan to find the largest value! */
2400
 
                        /* The standard method will not work because it forces
2401
 
                         * us to lock that table!
2402
 
                         */
2403
 
                        xtWord8 val;
2404
 
 
2405
 
                        err = index_first(getTable()->getUpdateRecord());
2406
 
                        while (!err) {
2407
 
                                /* {PRE-INC} */
2408
 
                                val = (xtWord8) getTable()->next_number_field->val_int_offset(getTable()->getShare()->rec_buff_length);
2409
 
                                if (val > nr)
2410
 
                                        nr = val;
2411
 
                                err = index_next(getTable()->getUpdateRecord());
2412
 
                        }
2413
 
                }
2414
 
 
2415
 
                doEndIndexScan();
2416
 
                extra(HA_EXTRA_NO_KEYREAD);
2417
 
 
2418
 
                /* {PRE-INC}
2419
 
                 * I have changed this from post increment to pre-increment!
2420
 
                 * The reason is:
2421
 
                 * When using post increment we are not able to return
2422
 
                 * the last valid value in the range.
2423
 
                 *
2424
 
                 * Here the test example:
2425
 
                 *
2426
 
                 * drop table if exists t1;
2427
 
                 * create table t1 (i tinyint unsigned not null auto_increment primary key) engine=pbxt;
2428
 
                 * insert into t1 set i = 254;
2429
 
                 * insert into t1 set i = null;
2430
 
                 *
2431
 
                 * With post-increment, this last insert fails because on post increment
2432
 
                 * the value overflows!
2433
 
                 *
2434
 
                 * Pre-increment means we store the current max, and increment
2435
 
                 * before returning the next value.
2436
 
                 *
2437
 
                 * This will work in this situation.
2438
 
                 */
2439
 
                tab->tab_auto_inc = nr;
2440
 
                if (tab->tab_auto_inc < tab->tab_dic.dic_min_auto_inc)
2441
 
                        tab->tab_auto_inc = tab->tab_dic.dic_min_auto_inc-1;
2442
 
                if (tab->tab_auto_inc < min_auto_inc)
2443
 
                        tab->tab_auto_inc = min_auto_inc-1;
2444
 
 
2445
 
                /* Restore the changed values: */
2446
 
                getTable()->next_number_field = tmp_fie;
2447
 
                getTable()->in_use = tmp_thd;
2448
 
 
2449
 
                if (xn_started) {
2450
 
                        XT_PRINT0(self, "xt_xn_commit in init_auto_increment\n");
2451
 
                        xt_xn_commit(self);
2452
 
                }
2453
 
        }
2454
 
        xt_spinlock_unlock(&tab->tab_ainc_lock);
2455
 
}
2456
 
 
2457
 
void ha_pbxt::get_auto_increment(MX_ULONGLONG_T offset, MX_ULONGLONG_T increment,
2458
 
                                 MX_ULONGLONG_T XT_UNUSED(nb_desired_values),
2459
 
                                 MX_ULONGLONG_T *first_value,
2460
 
                                 MX_ULONGLONG_T *nb_reserved_values)
2461
 
{
2462
 
        register XTTableHPtr    tab;
2463
 
        MX_ULONGLONG_T                  nr, nr_less_inc;
2464
 
 
2465
 
        ASSERT_NS(pb_ex_in_use);
2466
 
 
2467
 
        tab = pb_open_tab->ot_table;
2468
 
 
2469
 
        /* {PRE-INC}
2470
 
         * Assume that nr contains the last value returned!
2471
 
         * We will increment and then return the value.
2472
 
         */
2473
 
        xt_spinlock_lock(&tab->tab_ainc_lock);
2474
 
        nr = (MX_ULONGLONG_T) tab->tab_auto_inc;
2475
 
        nr_less_inc = nr;
2476
 
        if (nr < offset)
2477
 
                nr = offset;
2478
 
        else if (increment > 1 && ((nr - offset) % increment) != 0)
2479
 
                nr += increment - ((nr - offset) % increment);
2480
 
        else
2481
 
                nr += increment;
2482
 
        if (getTable()->next_number_field->cmp((const unsigned char *)&nr_less_inc, (const unsigned char *)&nr) < 0)
2483
 
                tab->tab_auto_inc = (xtWord8) (nr);
2484
 
        else
2485
 
                nr = ~0;        /* indicate error to the caller */
2486
 
        xt_spinlock_unlock(&tab->tab_ainc_lock);
2487
 
 
2488
 
        *first_value = nr;
2489
 
        *nb_reserved_values = 1;
2490
 
}
2491
 
 
2492
 
/* GOTCHA: We need to use signed value here because of the test
2493
 
 * (from auto_increment.test):
2494
 
 * create table t1 (a int not null auto_increment primary key);
2495
 
 * insert into t1 values (NULL);
2496
 
 * insert into t1 values (-1);
2497
 
 * insert into t1 values (NULL);
2498
 
 */
2499
 
xtPublic void ha_set_auto_increment(XTOpenTablePtr ot, Field *nr)
2500
 
{
2501
 
        register XTTableHPtr    tab;
2502
 
        MX_ULONGLONG_T                  nr_int_val;
2503
 
        
2504
 
        nr_int_val = nr->val_int();
2505
 
        tab = ot->ot_table;
2506
 
 
2507
 
        if (nr->cmp_internal((const unsigned char *)&tab->tab_auto_inc) > 0) {
2508
 
                xt_spinlock_lock(&tab->tab_ainc_lock);
2509
 
 
2510
 
                if (nr->cmp_internal((const unsigned char *)&tab->tab_auto_inc) > 0) {
2511
 
                  /* {PRE-INC}
2512
 
                   * We increment later, so just set the value!
2513
 
                   MX_ULONGLONG_T nr_int_val_plus_one = nr_int_val + 1;
2514
 
                   if (nr->cmp((const unsigned char *)&nr_int_val_plus_one) < 0)
2515
 
                   tab->tab_auto_inc = nr_int_val_plus_one;
2516
 
                   else
2517
 
                 */
2518
 
                  tab->tab_auto_inc = nr_int_val;
2519
 
                }
2520
 
                xt_spinlock_unlock(&tab->tab_ainc_lock);
2521
 
        }
2522
 
 
2523
 
        if (xt_db_auto_increment_mode == 1) {
2524
 
                if (nr_int_val > (MX_ULONGLONG_T) tab->tab_dic.dic_min_auto_inc) {
2525
 
                        /* Do this every 100 calls: */
2526
 
#ifdef DEBUG
2527
 
                        tab->tab_dic.dic_min_auto_inc = nr_int_val + 5;
2528
 
#else
2529
 
                        tab->tab_dic.dic_min_auto_inc = nr_int_val + 100;
2530
 
#endif
2531
 
                        ot->ot_thread = xt_get_self();
2532
 
                        if (!xt_tab_write_min_auto_inc(ot))
2533
 
                                xt_log_and_clear_exception(ot->ot_thread);
2534
 
                }
2535
 
        }
2536
 
}
2537
 
 
2538
 
/*
2539
 
static void dump_buf(unsigned char *buf, int len)
2540
 
{
2541
 
        int i;
2542
 
        
2543
 
        for (i=0; i<len; i++) printf("%2c", buf[i] <= 127 ? buf[i] : '.');
2544
 
        printf("\n");
2545
 
        for (i=0; i<len; i++) printf("%02x", buf[i]);
2546
 
        printf("\n");
2547
 
}
2548
 
*/
2549
 
 
2550
 
/*
2551
 
 * doInsertRecord() inserts a row. No extra() hint is given currently if a bulk load
2552
 
 * is happeneding. buf() is a byte array of data. You can use the field
2553
 
 * information to extract the data from the native byte array type.
2554
 
 * Example of this would be:
2555
 
 * for (Field **field=table->field ; *field ; field++)
2556
 
 * {
2557
 
 *              ...
2558
 
 * }
2559
 
 
2560
 
 * See ha_tina.cc for an example of extracting all of the data as strings.
2561
 
 * ha_berekly.cc has an example of how to store it intact by "packing" it
2562
 
 * for ha_berkeley's own native storage type.
2563
 
 
2564
 
 * See the note for doUpdateRecord() on auto_increments and timestamps. This
2565
 
 * case also applied to doInsertRecord().
2566
 
 
2567
 
 * Called from item_sum.cc, item_sum.cc, sql_acl.cc, sql_insert.cc,
2568
 
 * sql_insert.cc, sql_select.cc, sql_table.cc, sql_udf.cc, and sql_update.cc.
2569
 
 */
2570
 
int ha_pbxt::doInsertRecord(byte *buf)
2571
 
{
2572
 
        int err = 0;
2573
 
 
2574
 
        ASSERT_NS(pb_ex_in_use);
2575
 
 
2576
 
        XT_PRINT1(pb_open_tab->ot_thread, "doInsertRecord (%s)\n", pb_share->sh_table_path->ps_path);
2577
 
        XT_DISABLED_TRACE(("INSERT tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2578
 
        //statistic_increment(ha_write_count,&LOCK_status);
2579
 
#ifdef PBMS_ENABLED
2580
 
        PBMSResultRec result;
2581
 
        err = pbms_doInsertRecord_blobs(table, buf, &result);
2582
 
        if (err) {
2583
 
                xt_logf(XT_NT_ERROR, "pbms_doInsertRecord_blobs() Error: %s", result.mr_message);
2584
 
                return err;
2585
 
        }
2586
 
#endif
2587
 
 
2588
 
        /* {START-STAT-HACK} previously position of start statement hack. */
2589
 
        xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2590
 
 
2591
 
        if (pb_open_tab->ot_thread->st_import_stat) {
2592
 
                if (pb_import_row_count >= XT_IMPORT_ROW_COUNT) {
2593
 
                        /* Commit and restart the transaction. */
2594
 
                        XTThreadPtr thread = pb_open_tab->ot_thread;
2595
 
 
2596
 
                        XT_PRINT0(thread, "xt_xn_commit in doInsertRecord\n");
2597
 
                        if (!xt_xn_commit(thread)) {
2598
 
                                err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2599
 
                                return err;
2600
 
                        }
2601
 
                        XT_PRINT0(thread, "xt_xn_begin in doInsertRecord\n");
2602
 
                        if (!xt_xn_begin(thread)) {
2603
 
                                err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, thread, pb_ignore_dup_key);
2604
 
                                return err;
2605
 
                        }
2606
 
                        pb_import_row_count = 0;
2607
 
                }
2608
 
                else
2609
 
                        pb_import_row_count++;
2610
 
        }
2611
 
 
2612
 
        if (getTable()->next_number_field && buf == getTable()->getInsertRecord()) {
2613
 
                int update_err = update_auto_increment();
2614
 
                if (update_err) {
2615
 
                        ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2616
 
                        err = update_err;
2617
 
                        goto done;
2618
 
                }
2619
 
                ha_set_auto_increment(pb_open_tab, getTable()->next_number_field);
2620
 
        }
2621
 
 
2622
 
        if (!xt_tab_new_record(pb_open_tab, (xtWord1 *) buf)) {
2623
 
                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2624
 
 
2625
 
                /*
2626
 
                 * This is needed to allow the same row to be updated multiple times in case of bulk REPLACE.
2627
 
                 * This happens during execution of LOAD DATA...REPLACE MySQL first tries to INSERT the row 
2628
 
                 * and if it gets dup-key error it tries UPDATE, so the same row can be overwriten multiple 
2629
 
                 * times within the same statement
2630
 
                 */
2631
 
                if (err == HA_ERR_FOUND_DUPP_KEY && pb_open_tab->ot_thread->st_is_update) {
2632
 
                        /* Pop the update stack: */
2633
 
                        //pb_open_tab->ot_thread->st_update_id++;
2634
 
                        XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
2635
 
 
2636
 
                        pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
2637
 
                        curr->ot_prev_update = NULL;
2638
 
                }
2639
 
        }
2640
 
 
2641
 
        done:
2642
 
#ifdef PBMS_ENABLED
2643
 
        pbms_completed(table, (err == 0));
2644
 
#endif
2645
 
        return err;
2646
 
}
2647
 
 
2648
 
#ifdef UNUSED_CODE
2649
 
static int equ_bin(const byte *a, const char *b)
2650
 
{
2651
 
        while (*a && *b) {
2652
 
                if (*a != *b)
2653
 
                        return 0;
2654
 
                a++;
2655
 
                b++;
2656
 
        }
2657
 
        return 1;
2658
 
}
2659
 
static void dump_bin(const byte *a_in, int offset, int len_in)
2660
 
{
2661
 
        const byte      *a = a_in;
2662
 
        int                     len = len_in;
2663
 
        
2664
 
        a += offset;
2665
 
        while (len > 0) {
2666
 
                xt_trace("%02X", (int) *a);
2667
 
                a++;
2668
 
                len--;
2669
 
        }
2670
 
        xt_trace("==");
2671
 
        a = a_in;
2672
 
        len = len_in;
2673
 
        a += offset;
2674
 
        while (len > 0) {
2675
 
                xt_trace("%c", (*a > 8 && *a < 127) ? *a : '.');
2676
 
                a++;
2677
 
                len--;
2678
 
        }
2679
 
        xt_trace("\n");
2680
 
}
2681
 
#endif
2682
 
 
2683
 
/*
2684
 
 * Yes, doUpdateRecord() does what you expect, it updates a row. old_data will have
2685
 
 * the previous row record in it, while new_data will have the newest data in
2686
 
 * it. Keep in mind that the server can do updates based on ordering if an ORDER BY
2687
 
 * clause was used. Consecutive ordering is not guarenteed.
2688
 
 *
2689
 
 * Called from sql_select.cc, sql_acl.cc, sql_update.cc, and sql_insert.cc.
2690
 
 */
2691
 
int ha_pbxt::doUpdateRecord(const byte * old_data, byte * new_data)
2692
 
{
2693
 
        int                                             err = 0;
2694
 
        register XTThreadPtr    self = pb_open_tab->ot_thread;
2695
 
 
2696
 
        ASSERT_NS(pb_ex_in_use);
2697
 
 
2698
 
        XT_PRINT1(self, "update_row (%s)\n", pb_share->sh_table_path->ps_path);
2699
 
        XT_DISABLED_TRACE(("UPDATE tx=%d val=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&new_data[1])));
2700
 
        //statistic_increment(ha_update_count,&LOCK_status);
2701
 
 
2702
 
        /* {START-STAT-HACK} previously position of start statement hack. */
2703
 
 
2704
 
        xt_xlog_check_long_writer(self);
2705
 
 
2706
 
        /* {UPDATE-STACK} */
2707
 
        if (self->st_is_update != pb_open_tab) {
2708
 
                /* Push the update stack: */
2709
 
                pb_open_tab->ot_prev_update = self->st_is_update;
2710
 
                self->st_is_update = pb_open_tab;
2711
 
                pb_open_tab->ot_update_id++;
2712
 
        }
2713
 
 
2714
 
#ifdef PBMS_ENABLED
2715
 
        PBMSResultRec result;
2716
 
 
2717
 
        err = pbms_delete_row_blobs(table, old_data, &result);
2718
 
        if (err) {
2719
 
                xt_logf(XT_NT_ERROR, "update_row:pbms_delete_row_blobs() Error: %s", result.mr_message);
2720
 
                return err;
2721
 
        }
2722
 
        err = pbms_doInsertRecord_blobs(table, new_data, &result);
2723
 
        if (err) { 
2724
 
                xt_logf(XT_NT_ERROR, "update_row:pbms_doInsertRecord_blobs() Error: %s", result.mr_message);
2725
 
                goto pbms_done;
2726
 
        }
2727
 
#endif
2728
 
 
2729
 
        /* GOTCHA: We need to check the auto-increment value on update
2730
 
         * because of the following test (which fails for InnoDB) -
2731
 
         * auto_increment.test:
2732
 
         * create table t1 (a int not null auto_increment primary key, val int);
2733
 
         * insert into t1 (val) values (1);
2734
 
         * update t1 set a=2 where a=1;
2735
 
         * insert into t1 (val) values (1);
2736
 
         */
2737
 
        if (getTable()->found_next_number_field && new_data == getTable()->getInsertRecord()) {
2738
 
                MX_LONGLONG_T   nr;
2739
 
        const boost::dynamic_bitset<>& old_bitmap= getTable()->use_all_columns(*getTable()->read_set);
2740
 
                nr = getTable()->found_next_number_field->val_int();
2741
 
                ha_set_auto_increment(pb_open_tab, getTable()->found_next_number_field);
2742
 
        getTable()->restore_column_map(old_bitmap);
2743
 
        }
2744
 
 
2745
 
        if (!xt_tab_update_record(pb_open_tab, (xtWord1 *) old_data, (xtWord1 *) new_data))
2746
 
                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2747
 
 
2748
 
        pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2749
 
        
2750
 
#ifdef PBMS_ENABLED
2751
 
        pbms_done:
2752
 
        pbms_completed(table, (err == 0));
2753
 
#endif
2754
 
 
2755
 
        return err;
2756
 
}
2757
 
 
2758
 
/*
2759
 
 * This will delete a row. buf will contain a copy of the row to be deleted.
2760
 
 * The server will call this right after the current row has been called (from
2761
 
 * either a previous rnd_next() or index call).
2762
 
 *
2763
 
 * Called in sql_acl.cc and sql_udf.cc to manage internal table information.
2764
 
 * Called in sql_delete.cc, sql_insert.cc, and sql_select.cc. In sql_select it is
2765
 
 * used for removing duplicates while in insert it is used for REPLACE calls.
2766
 
*/
2767
 
int ha_pbxt::doDeleteRecord(const byte * buf)
2768
 
{
2769
 
        int err = 0;
2770
 
 
2771
 
        ASSERT_NS(pb_ex_in_use);
2772
 
 
2773
 
        XT_PRINT1(pb_open_tab->ot_thread, "delete_row (%s)\n", pb_share->sh_table_path->ps_path);
2774
 
        XT_DISABLED_TRACE(("DELETE tx=%d val=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(&buf[1])));
2775
 
        //statistic_increment(ha_delete_count,&LOCK_status);
2776
 
 
2777
 
#ifdef PBMS_ENABLED
2778
 
        PBMSResultRec result;
2779
 
 
2780
 
        err = pbms_delete_row_blobs(table, buf, &result);
2781
 
        if (err) {
2782
 
                xt_logf(XT_NT_ERROR, "pbms_delete_row_blobs() Error: %s", result.mr_message);
2783
 
                return err;
2784
 
        }
2785
 
#endif
2786
 
 
2787
 
        /* {START-STAT-HACK} previously position of start statement hack. */
2788
 
 
2789
 
        xt_xlog_check_long_writer(pb_open_tab->ot_thread);
2790
 
 
2791
 
        if (!xt_tab_delete_record(pb_open_tab, (xtWord1 *) buf))
2792
 
                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2793
 
 
2794
 
        pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, TRUE);
2795
 
 
2796
 
#ifdef PBMS_ENABLED
2797
 
        pbms_completed(table, (err == 0));
2798
 
#endif
2799
 
        return err;
2800
 
}
2801
 
 
2802
 
/*
2803
 
 * -----------------------------------------------------------------------
2804
 
 * INDEX METHODS
2805
 
 */
2806
 
 
2807
 
/*
2808
 
 * This looks like a hack, but actually, it is OK.
2809
 
 * It depends on the setup done by the super-class. It involves an extra
2810
 
 * range check that we need to do if a "new" record is returned during
2811
 
 * an index scan.
2812
 
 *
2813
 
 * A new record is returned if a row is updated (by another transaction)
2814
 
 * during the index scan. If an update is detected, then the scan stops
2815
 
 * and waits for the transaction to end.
2816
 
 *
2817
 
 * If the transaction commits, then the updated row is returned instead
2818
 
 * of the row it would have returned when doing a consistant read
2819
 
 * (repeatable read).
2820
 
 *
2821
 
 * These new records can appear out of index order, and may not even
2822
 
 * belong to the index range that we are concerned with.
2823
 
 *
2824
 
 * Notice that there is not check for the start of the range. It appears
2825
 
 * that this is not necessary, MySQL seems to have no problem ignoring
2826
 
 * such values.
2827
 
 *
2828
 
 * A number of test have been given below which demonstrate the use
2829
 
 * of the function.
2830
 
 *
2831
 
 * They also demonstrate the ORDER BY problem described here: [(11)].
2832
 
 *
2833
 
 * DROP TABLE IF EXISTS test_tab, test_tab_1, test_tab_2;
2834
 
 * CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20), index(Value, Name)) ENGINE=pbxt;
2835
 
 * INSERT test_tab values(1, 1, 'A');
2836
 
 * INSERT test_tab values(2, 1, 'B');
2837
 
 * INSERT test_tab values(3, 1, 'C');
2838
 
 * INSERT test_tab values(4, 2, 'D');
2839
 
 * INSERT test_tab values(5, 2, 'E');
2840
 
 * INSERT test_tab values(6, 2, 'F');
2841
 
 * INSERT test_tab values(7, 2, 'G');
2842
 
 * 
2843
 
 * select * from test_tab where value = 1 order by value, name for update;
2844
 
 * 
2845
 
 * -- Test: 1
2846
 
 * -- C1
2847
 
 * begin;
2848
 
 * select * from test_tab where id = 5 for update;
2849
 
 * 
2850
 
 * -- C2
2851
 
 * begin;
2852
 
 * select * from test_tab where value = 2 order by value, name for update;
2853
 
 * 
2854
 
 * -- C1
2855
 
 * update test_tab set value = 3 where id = 6;
2856
 
 * commit;
2857
 
 * 
2858
 
 * -- Test: 2
2859
 
 * -- C1
2860
 
 * begin;
2861
 
 * select * from test_tab where id = 5 for update;
2862
 
 * 
2863
 
 * -- C2
2864
 
 * begin;
2865
 
 * select * from test_tab where value >= 2 order by value, name for update;
2866
 
 * 
2867
 
 * -- C1
2868
 
 * update test_tab set value = 3 where id = 6;
2869
 
 * commit;
2870
 
 * 
2871
 
 * -- Test: 3
2872
 
 * -- C1
2873
 
 * begin;
2874
 
 * select * from test_tab where id = 5 for update;
2875
 
 * 
2876
 
 * -- C2
2877
 
 * begin;
2878
 
 * select * from test_tab where value = 2 order by value, name for update;
2879
 
 * 
2880
 
 * -- C1
2881
 
 * update test_tab set value = 1 where id = 6;
2882
 
 * commit;
2883
 
 */
2884
 
 
2885
 
int ha_pbxt::xt_index_in_range(register XTOpenTablePtr XT_UNUSED(ot), register XTIndexPtr ind,
2886
 
        register XTIdxSearchKeyPtr search_key, xtWord1 *buf)
2887
 
{
2888
 
        /* If search key is given, this means we want an exact match. */
2889
 
        if (search_key) {
2890
 
                xtWord1 key_buf[XT_INDEX_MAX_KEY_SIZE];
2891
 
 
2892
 
                myxt_create_key_from_row(ind, key_buf, buf, NULL);
2893
 
                search_key->sk_on_key = myxt_compare_key(ind, search_key->sk_key_value.sv_flags, search_key->sk_key_value.sv_length,
2894
 
                        search_key->sk_key_value.sv_key, key_buf) == 0;
2895
 
                return search_key->sk_on_key;
2896
 
        }
2897
 
 
2898
 
        /* Otherwise, check the end of the range. */
2899
 
        if (end_range)
2900
 
                return compare_key(end_range) <= 0;
2901
 
        return 1;
2902
 
}
2903
 
 
2904
 
int ha_pbxt::xt_index_next_read(register XTOpenTablePtr ot, register XTIndexPtr ind, xtBool key_only,
2905
 
        register XTIdxSearchKeyPtr search_key, byte *buf)
2906
 
{
2907
 
        xt_xlog_check_long_writer(ot->ot_thread);
2908
 
 
2909
 
        if (key_only) {
2910
 
                /* We only need to read the data from the key: */
2911
 
                while (ot->ot_curr_rec_id) {
2912
 
                        if (search_key && !search_key->sk_on_key)
2913
 
                                break;
2914
 
 
2915
 
                        switch (xt_tab_visible(ot)) {
2916
 
                                case FALSE:
2917
 
                                        if (xt_idx_next(ot, ind, search_key))
2918
 
                                                break;
2919
 
                                case XT_ERR:
2920
 
                                        goto failed;
2921
 
                                case XT_NEW:
2922
 
                                        if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
2923
 
                                                goto failed;
2924
 
                                        if (xt_index_in_range(ot, ind, search_key, buf)) {
2925
 
                                                return 0;
2926
 
                                        }
2927
 
                                        if (!xt_idx_next(ot, ind, search_key))
2928
 
                                                goto failed;
2929
 
                                        break;
2930
 
                                case XT_RETRY:
2931
 
                                        /* We cannot start from the beginning again, if we have
2932
 
                                         * already output rows!
2933
 
                                         * And we need the orginal search key.
2934
 
                                         *
2935
 
                                         * The case in which this occurs is:
2936
 
                                         *
2937
 
                                         * T1: UPDATE tbl_file SET GlobalID = 'DBCD5C4514210200825501089884844_6M' WHERE ID = 39
2938
 
                                         * Locks a particular row.
2939
 
                                         *
2940
 
                                         * T2: SELECT ID,Flags FROM tbl_file WHERE SpaceID = 1 AND Path = '/zi/America/' AND 
2941
 
                                         * Name = 'Cuiaba' AND Flags IN ( 0,1,4,5 ) FOR UPDATE
2942
 
                                         * scans the index and stops on the lock (of the before image) above.
2943
 
                                         *
2944
 
                                         * T1 quits, the sweeper deletes the record updated by T1?!
2945
 
                                         * BUG: Cleanup should wait until T2 is complete!
2946
 
                                         *
2947
 
                                         * T2 continues, and returns XT_RETRY.
2948
 
                                         *
2949
 
                                         * At this stage T2 has already returned some rows, so it may not retry from the
2950
 
                                         * start. Instead it tries to locate the last record it tried to lock.
2951
 
                                         * This record is gone (or not visible), so it finds the next one.
2952
 
                                         *
2953
 
                                         * POTENTIAL BUG: If cleanup does not wait until T2 is complete, then
2954
 
                                         * I may miss the update record, if it is moved before the index scan
2955
 
                                         * position.
2956
 
                                         */
2957
 
                                        if (!pb_ind_row_count && search_key) {
2958
 
                                                if (!xt_idx_search(pb_open_tab, ind, search_key))
2959
 
                                                        return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2960
 
                                        }
2961
 
                                        else {
2962
 
                                                if (!xt_idx_research(pb_open_tab, ind))
2963
 
                                                        goto failed;
2964
 
                                        }
2965
 
                                        break;
2966
 
                                default:
2967
 
                                        if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
2968
 
                                                goto failed;
2969
 
                                        return 0;
2970
 
                        }
2971
 
                }
2972
 
        }
2973
 
        else {
2974
 
                while (ot->ot_curr_rec_id) {
2975
 
                        if (search_key && !search_key->sk_on_key)
2976
 
                                break;
2977
 
 
2978
 
                        switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
2979
 
                                case FALSE:
2980
 
                                        XT_DISABLED_TRACE(("not visi tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
2981
 
                                        if (xt_idx_next(ot, ind, search_key))
2982
 
                                                break;
2983
 
                                case XT_ERR:
2984
 
                                        goto failed;
2985
 
                                case XT_NEW:
2986
 
                                        if (xt_index_in_range(ot, ind, search_key, buf))
2987
 
                                                return 0;
2988
 
                                        if (!xt_idx_next(ot, ind, search_key))
2989
 
                                                goto failed;
2990
 
                                        break;
2991
 
                                case XT_RETRY:
2992
 
                                        if (!pb_ind_row_count && search_key) {
2993
 
                                                if (!xt_idx_search(pb_open_tab, ind, search_key))
2994
 
                                                        return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
2995
 
                                        }
2996
 
                                        else {
2997
 
                                                if (!xt_idx_research(pb_open_tab, ind))
2998
 
                                                        goto failed;
2999
 
                                        }
3000
 
                                        break;
3001
 
                                default:
3002
 
                                        XT_DISABLED_TRACE(("visible tx=%d rec=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) ot->ot_curr_rec_id));
3003
 
                                        return 0;
3004
 
                        }
3005
 
                }
3006
 
        }
3007
 
        return HA_ERR_END_OF_FILE;
3008
 
 
3009
 
        failed:
3010
 
        return ha_log_pbxt_thread_error_for_mysql(FALSE);
3011
 
}
3012
 
 
3013
 
int ha_pbxt::xt_index_prev_read(XTOpenTablePtr ot, XTIndexPtr ind, xtBool key_only,
3014
 
        register XTIdxSearchKeyPtr search_key, byte *buf)
3015
 
{
3016
 
        if (key_only) {
3017
 
                /* We only need to read the data from the key: */
3018
 
                while (ot->ot_curr_rec_id) {
3019
 
                        if (search_key && !search_key->sk_on_key)
3020
 
                                break;
3021
 
 
3022
 
                        switch (xt_tab_visible(ot)) {
3023
 
                                case FALSE:
3024
 
                                        if (xt_idx_prev(ot, ind, search_key))
3025
 
                                                break;
3026
 
                                case XT_ERR:
3027
 
                                        goto failed;
3028
 
                                case XT_NEW:
3029
 
                                        if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3030
 
                                                goto failed;
3031
 
                                        if (xt_index_in_range(ot, ind, search_key, buf))
3032
 
                                                return 0;
3033
 
                                        if (!xt_idx_next(ot, ind, search_key))
3034
 
                                                goto failed;
3035
 
                                        break;
3036
 
                                case XT_RETRY:
3037
 
                                        if (!pb_ind_row_count && search_key) {
3038
 
                                                if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3039
 
                                                        return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3040
 
                                        }
3041
 
                                        else {
3042
 
                                                if (!xt_idx_research(pb_open_tab, ind))
3043
 
                                                        goto failed;
3044
 
                                        }
3045
 
                                        break;
3046
 
                                default:
3047
 
                                        if (!xt_idx_read(ot, ind, (xtWord1 *) buf))
3048
 
                                                goto failed;
3049
 
                                        return 0;
3050
 
                        }
3051
 
                }
3052
 
        }
3053
 
        else {
3054
 
                /* We need to read the entire record: */
3055
 
                while (ot->ot_curr_rec_id) {
3056
 
                        if (search_key && !search_key->sk_on_key)
3057
 
                                break;
3058
 
 
3059
 
                        switch (xt_tab_read_record(ot, (xtWord1 *) buf)) {
3060
 
                                case FALSE:
3061
 
                                        if (xt_idx_prev(ot, ind, search_key))
3062
 
                                                break;
3063
 
                                case XT_ERR:
3064
 
                                        goto failed;
3065
 
                                case XT_NEW:
3066
 
                                        if (xt_index_in_range(ot, ind, search_key, buf))
3067
 
                                                return 0;
3068
 
                                        if (!xt_idx_next(ot, ind, search_key))
3069
 
                                                goto failed;
3070
 
                                        break;
3071
 
                                case XT_RETRY:
3072
 
                                        if (!pb_ind_row_count && search_key) {
3073
 
                                                if (!xt_idx_search_prev(pb_open_tab, ind, search_key))
3074
 
                                                        return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3075
 
                                        }
3076
 
                                        else {
3077
 
                                                if (!xt_idx_research(pb_open_tab, ind))
3078
 
                                                        goto failed;
3079
 
                                        }
3080
 
                                        break;
3081
 
                                default:
3082
 
                                        return 0;
3083
 
                        }
3084
 
                }
3085
 
        }
3086
 
        return HA_ERR_END_OF_FILE;
3087
 
 
3088
 
        failed:
3089
 
        return ha_log_pbxt_thread_error_for_mysql(FALSE);
3090
 
}
3091
 
 
3092
 
#ifdef DRIZZLED
3093
 
 
3094
 
static std::string convert_long_to_bit_string(uint64_t bitset, uint64_t bitset_size)
3095
 
{
3096
 
  std::string res; 
3097
 
  while (bitset)
3098
 
  {
3099
 
    res.push_back((bitset & 1) + '0');
3100
 
    bitset>>= 1;
3101
 
  }
3102
 
  if (! res.empty())
3103
 
  {
3104
 
    std::reverse(res.begin(), res.end());
3105
 
  }
3106
 
  else
3107
 
  {
3108
 
    res= "0";
3109
 
  }
3110
 
  std::string final(bitset_size - res.length(), '0');
3111
 
  final.append(res);
3112
 
  return final;
3113
 
}
3114
 
#endif
3115
 
 
3116
 
int ha_pbxt::doStartIndexScan(uint idx, bool XT_UNUSED(sorted))
3117
 
{
3118
 
        XTIndexPtr      ind;
3119
 
        XTThreadPtr     thread = pb_open_tab->ot_thread;
3120
 
 
3121
 
        /* select count(*) from smalltab_PBXT;
3122
 
         * ignores the error below, and continues to
3123
 
         * call index_first!
3124
 
         */
3125
 
        active_index = idx;
3126
 
 
3127
 
        if (pb_open_tab->ot_table->tab_dic.dic_disable_index) {
3128
 
                active_index = MAX_KEY;
3129
 
                xt_tab_set_index_error(pb_open_tab->ot_table);
3130
 
                return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3131
 
        }
3132
 
 
3133
 
        /* The number of columns required: */
3134
 
        if (pb_open_tab->ot_is_modify) {
3135
 
 
3136
 
                pb_open_tab->ot_cols_req = getTable()->read_set->MX_BIT_SIZE();
3137
 
#ifdef XT_PRINT_INDEX_OPT
3138
 
                ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3139
 
 
3140
 
                printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, pb_open_tab->ot_cols_req, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap);
3141
 
#endif
3142
 
                /* {START-STAT-HACK} previously position of start statement hack,
3143
 
                 * previous comment to code below: */
3144
 
                /* Start a statement based transaction as soon
3145
 
                 * as a read is done for a modify type statement!
3146
 
                 * Previously, this was done too late!
3147
 
                 */
3148
 
        }
3149
 
        else {
3150
 
                //pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3151
 
                pb_open_tab->ot_cols_req = getTable()->read_set->MX_BIT_SIZE();
3152
 
 
3153
 
                /* Check for index coverage!
3154
 
                 *
3155
 
                 * Given the following table:
3156
 
                 *
3157
 
                 * CREATE TABLE `customer` (
3158
 
                 * `c_id` int(11) NOT NULL DEFAULT '0',
3159
 
                 * `c_d_id` int(11) NOT NULL DEFAULT '0',
3160
 
                 * `c_w_id` int(11) NOT NULL DEFAULT '0',
3161
 
                 * `c_first` varchar(16) DEFAULT NULL,
3162
 
                 * `c_middle` char(2) DEFAULT NULL,
3163
 
                 * `c_last` varchar(16) DEFAULT NULL,
3164
 
                 * `c_street_1` varchar(20) DEFAULT NULL,
3165
 
                 * `c_street_2` varchar(20) DEFAULT NULL,
3166
 
                 * `c_city` varchar(20) DEFAULT NULL,
3167
 
                 * `c_state` char(2) DEFAULT NULL,
3168
 
                 * `c_zip` varchar(9) DEFAULT NULL,
3169
 
                 * `c_phone` varchar(16) DEFAULT NULL,
3170
 
                 * `c_since` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
3171
 
                 * `c_credit` char(2) DEFAULT NULL,
3172
 
                 * `c_credit_lim` decimal(24,12) DEFAULT NULL,
3173
 
                 * `c_discount` double DEFAULT NULL,
3174
 
                 * `c_balance` decimal(24,12) DEFAULT NULL,
3175
 
                 * `c_ytd_payment` decimal(24,12) DEFAULT NULL,
3176
 
                 * `c_payment_cnt` double DEFAULT NULL,
3177
 
                 * `c_delivery_cnt` double DEFAULT NULL,
3178
 
                 * `c_data` text,
3179
 
                 * PRIMARY KEY (`c_w_id`,`c_d_id`,`c_id`),
3180
 
                 * KEY `c_w_id` (`c_w_id`,`c_d_id`,`c_last`,`c_first`,`c_id`)
3181
 
                 * ) ENGINE=PBXT;
3182
 
                 *
3183
 
                 * MySQL does not recognize index coverage on the followin select:
3184
 
                 *
3185
 
                 * SELECT c_id FROM customer WHERE c_w_id = 3 AND c_d_id = 8 AND 
3186
 
                 * c_last = 'EINGATIONANTI' ORDER BY c_first ASC LIMIT 1;
3187
 
                 *
3188
 
                 * TODO: Find out why this is necessary, MyISAM does not
3189
 
                 * seem to have this problem!
3190
 
                 */
3191
 
                ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3192
 
#ifdef DRIZZLED
3193
 
        /*
3194
 
         * Need to do this for drizzle because we use boost's dynamic_bitset
3195
 
         * to represent the bitsets and allocating memory for an object of that 
3196
 
         * type does not play well with the memory allocation routines in PBXT.
3197
 
         * For that reason, we just store a uint which represents the bitset
3198
 
         * in the XTIndexPtr structure for PBXT. 
3199
 
         */
3200
 
        std::string bitmap_str= convert_long_to_bit_string(ind->mi_col_map, ind->mi_col_map_size);
3201
 
        MX_BITMAP tmp(bitmap_str);
3202
 
                if (MX_BIT_IS_SUBSET(getTable()->read_set, tmp))
3203
 
#else
3204
 
                if (MX_BIT_IS_SUBSET(getTable()->read_set, ind->mi_col_map))
3205
 
#endif
3206
 
                        pb_key_read = TRUE;
3207
 
#ifdef XT_PRINT_INDEX_OPT
3208
 
                printf("index_init %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X converage=%d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) idx, pb_open_tab->ot_cols_req, table->read_set->MX_BIT_SIZE(), (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) (MX_BIT_IS_SUBSET(table->read_set, &ind->mi_col_map) != 0));
3209
 
#endif
3210
 
        }
3211
 
        
3212
 
        xt_xlog_check_long_writer(thread);
3213
 
 
3214
 
        pb_open_tab->ot_thread->st_statistics.st_scan_index++;
3215
 
        return 0;
3216
 
}
3217
 
 
3218
 
int ha_pbxt::doEndIndexScan()
3219
 
{
3220
 
        int err = 0;
3221
 
 
3222
 
        XT_TRACE_METHOD();
3223
 
 
3224
 
        XTThreadPtr thread = pb_open_tab->ot_thread;
3225
 
 
3226
 
        /*
3227
 
         * the assertion below is not always held, because the sometimes handler is unlocked
3228
 
         * before this function is called
3229
 
         */
3230
 
        /*ASSERT_NS(pb_ex_in_use);*/
3231
 
 
3232
 
        if (pb_open_tab->ot_ind_rhandle) {
3233
 
                xt_ind_release_handle(pb_open_tab->ot_ind_rhandle, FALSE, thread);
3234
 
                pb_open_tab->ot_ind_rhandle = NULL;
3235
 
        }
3236
 
 
3237
 
        /*
3238
 
         * make permanent the lock for the last scanned row
3239
 
         */
3240
 
        if (pb_open_tab)
3241
 
                pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3242
 
 
3243
 
        xt_xlog_check_long_writer(thread);
3244
 
 
3245
 
        active_index = MAX_KEY;
3246
 
        XT_RETURN(err);
3247
 
}
3248
 
 
3249
 
#ifdef XT_TRACK_RETURNED_ROWS
3250
 
void ha_start_scan(XTOpenTablePtr ot, u_int index)
3251
 
{
3252
 
        xt_ttracef(ot->ot_thread, "SCAN %d:%d\n", (int) ot->ot_table->tab_id, (int) index);
3253
 
        ot->ot_rows_ret_curr = 0;
3254
 
        for (u_int i=0; i<ot->ot_rows_ret_max; i++)
3255
 
                ot->ot_rows_returned[i] = 0;
3256
 
}
3257
 
 
3258
 
void ha_return_row(XTOpenTablePtr ot, u_int index)
3259
 
{
3260
 
        xt_ttracef(ot->ot_thread, "%d:%d ROW=%d:%d\n",
3261
 
                (int) ot->ot_table->tab_id, (int) index, (int) ot->ot_curr_row_id, (int) ot->ot_curr_rec_id);
3262
 
        ot->ot_rows_ret_curr++;
3263
 
        if (ot->ot_curr_row_id >= ot->ot_rows_ret_max) {
3264
 
                if (!xt_realloc_ns((void **) &ot->ot_rows_returned, (ot->ot_curr_row_id+1) * sizeof(xtRecordID)))
3265
 
                        ASSERT_NS(FALSE);
3266
 
                memset(&ot->ot_rows_returned[ot->ot_rows_ret_max], 0, (ot->ot_curr_row_id+1 - ot->ot_rows_ret_max) * sizeof(xtRecordID));
3267
 
                ot->ot_rows_ret_max = ot->ot_curr_row_id+1;
3268
 
        }
3269
 
        if (!ot->ot_curr_row_id || !ot->ot_curr_rec_id || ot->ot_rows_returned[ot->ot_curr_row_id]) {
3270
 
                char *sql = *thd_query(current_thd);
3271
 
 
3272
 
                xt_ttracef(ot->ot_thread, "DUP %d:%d %s\n",
3273
 
                        (int) ot->ot_table->tab_id, (int) index, *thd_query(current_thd));
3274
 
                xt_dump_trace();
3275
 
                printf("ERROR: row=%d rec=%d newr=%d, already returned!\n", (int) ot->ot_curr_row_id, (int) ot->ot_rows_returned[ot->ot_curr_row_id], (int) ot->ot_curr_rec_id);
3276
 
                printf("ERROR: %s\n", sql);
3277
 
#ifdef XT_WIN
3278
 
                FatalAppExit(0, "Debug Me!");
3279
 
#endif
3280
 
        }
3281
 
        else
3282
 
                ot->ot_rows_returned[ot->ot_curr_row_id] = ot->ot_curr_rec_id;
3283
 
}
3284
 
#endif
3285
 
 
3286
 
int ha_pbxt::index_read_xt(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3287
 
{
3288
 
        int                                     err = 0;
3289
 
        XTIndexPtr                      ind;
3290
 
        int                                     prefix = 0;
3291
 
        XTIdxSearchKeyRec       search_key;
3292
 
 
3293
 
        if (idx == MAX_KEY) {
3294
 
                err = HA_ERR_WRONG_INDEX;
3295
 
                goto done;
3296
 
        }
3297
 
#ifdef XT_TRACK_RETURNED_ROWS
3298
 
        ha_start_scan(pb_open_tab, idx);
3299
 
#endif
3300
 
 
3301
 
        /* This call starts a search on this handler! */
3302
 
        pb_ind_row_count = 0;
3303
 
 
3304
 
        ASSERT_NS(pb_ex_in_use);
3305
 
 
3306
 
        XT_PRINT1(pb_open_tab->ot_thread, "index_read_xt (%s)\n", pb_share->sh_table_path->ps_path);
3307
 
        XT_DISABLED_TRACE(("search tx=%d val=%d update=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), pb_modified));
3308
 
        ind = (XTIndexPtr) pb_share->sh_dic_keys[idx];
3309
 
 
3310
 
        switch (find_flag) {
3311
 
                case HA_READ_PREFIX_LAST:
3312
 
                case HA_READ_PREFIX_LAST_OR_PREV:
3313
 
                        prefix = SEARCH_PREFIX;
3314
 
                case HA_READ_BEFORE_KEY:
3315
 
                case HA_READ_KEY_OR_PREV: // I assume you want to be positioned on the last entry in the key duplicate list!! 
3316
 
                        xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_BEFORE_KEY) ? 0 : XT_SEARCH_AFTER_KEY) | prefix, (xtWord1 *) key, (size_t) key_len);
3317
 
                        if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3318
 
                                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3319
 
                        else
3320
 
                                err = xt_index_prev_read(pb_open_tab, ind, pb_key_read,
3321
 
                                        (find_flag == HA_READ_PREFIX_LAST) ? &search_key : NULL, buf);
3322
 
                        break;
3323
 
                case HA_READ_PREFIX:
3324
 
                        prefix = SEARCH_PREFIX;
3325
 
                case HA_READ_KEY_EXACT:
3326
 
                case HA_READ_KEY_OR_NEXT:
3327
 
                case HA_READ_AFTER_KEY:
3328
 
                default:
3329
 
                        xt_idx_prep_key(ind, &search_key, ((find_flag == HA_READ_AFTER_KEY) ? XT_SEARCH_AFTER_KEY : 0) | prefix, (xtWord1 *) key, key_len);
3330
 
                        if (!xt_idx_search(pb_open_tab, ind, &search_key))
3331
 
                                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3332
 
                        else {
3333
 
                                err = xt_index_next_read(pb_open_tab, ind, pb_key_read,
3334
 
                                        (find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX) ? &search_key : NULL, buf);
3335
 
                                if (err == HA_ERR_END_OF_FILE && find_flag == HA_READ_AFTER_KEY)
3336
 
                                        err = HA_ERR_KEY_NOT_FOUND;                     
3337
 
                        }
3338
 
                        break;
3339
 
        }
3340
 
 
3341
 
        pb_ind_row_count++;
3342
 
#ifdef XT_TRACK_RETURNED_ROWS
3343
 
        if (!err)
3344
 
                ha_return_row(pb_open_tab, idx);
3345
 
#endif
3346
 
        XT_DISABLED_TRACE(("search tx=%d val=%d err=%d\n", (int) pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id, (int) XT_GET_DISK_4(key), err));
3347
 
        done:
3348
 
        if (err)
3349
 
                getTable()->status = STATUS_NOT_FOUND;
3350
 
        else {
3351
 
                pb_open_tab->ot_thread->st_statistics.st_row_select++;
3352
 
                getTable()->status = 0;
3353
 
        }
3354
 
        return err;
3355
 
}
3356
 
 
3357
 
/*
3358
 
 * Positions an index cursor to the index specified in the handle. Fetches the
3359
 
 * row if available. If the key value is null, begin at the first key of the
3360
 
 * index.
3361
 
 */
3362
 
int ha_pbxt::index_read(byte * buf, const byte * key, uint key_len, enum ha_rkey_function find_flag)
3363
 
{
3364
 
        //statistic_increment(ha_read_key_count,&LOCK_status);
3365
 
        return index_read_xt(buf, active_index, key, key_len, find_flag);
3366
 
}
3367
 
 
3368
 
int ha_pbxt::index_read_idx(byte * buf, uint idx, const byte *key, uint key_len, enum ha_rkey_function find_flag)
3369
 
{
3370
 
        //statistic_increment(ha_read_key_count,&LOCK_status);
3371
 
        return index_read_xt(buf, idx, key, key_len, find_flag);
3372
 
}
3373
 
 
3374
 
int ha_pbxt::index_read_last(byte * buf, const byte * key, uint key_len)
3375
 
{
3376
 
        //statistic_increment(ha_read_key_count,&LOCK_status);
3377
 
        return index_read_xt(buf, active_index, key, key_len, HA_READ_PREFIX_LAST);
3378
 
}
3379
 
 
3380
 
/*
3381
 
 * Used to read forward through the index.
3382
 
 */
3383
 
int ha_pbxt::index_next(byte * buf)
3384
 
{
3385
 
        int                     err = 0;
3386
 
        XTIndexPtr      ind;
3387
 
 
3388
 
        XT_TRACE_METHOD();
3389
 
        //statistic_increment(ha_read_next_count,&LOCK_status);
3390
 
        ASSERT_NS(pb_ex_in_use);
3391
 
 
3392
 
        if (active_index == MAX_KEY) {
3393
 
                err = HA_ERR_WRONG_INDEX;
3394
 
                goto done;
3395
 
        }
3396
 
        ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3397
 
 
3398
 
        if (!xt_idx_next(pb_open_tab, ind, NULL))
3399
 
                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3400
 
        else
3401
 
                err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3402
 
 
3403
 
        pb_ind_row_count++;
3404
 
#ifdef XT_TRACK_RETURNED_ROWS
3405
 
        if (!err)
3406
 
                ha_return_row(pb_open_tab, active_index);
3407
 
#endif
3408
 
        done:
3409
 
        if (err)
3410
 
                getTable()->status = STATUS_NOT_FOUND;
3411
 
        else {
3412
 
                pb_open_tab->ot_thread->st_statistics.st_row_select++;
3413
 
                getTable()->status = 0;
3414
 
        }
3415
 
        XT_RETURN(err);
3416
 
}
3417
 
 
3418
 
/*
3419
 
 * I have implemented this because there is currently a
3420
 
 * bug in handler::index_next_same().
3421
 
 *
3422
 
 * drop table if exists t1;
3423
 
 * CREATE TABLE t1 (a int, b int, primary key(a,b))
3424
 
 * PARTITION BY KEY(b,a) PARTITIONS 2;
3425
 
 * insert into t1 values (0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6);
3426
 
 * select * from t1 where a = 4;
3427
 
 * 
3428
 
 */
3429
 
int ha_pbxt::index_next_same(byte * buf, const byte *key, uint length)
3430
 
{
3431
 
        int                                     err = 0;
3432
 
        XTIndexPtr                      ind;
3433
 
        XTIdxSearchKeyRec       search_key;
3434
 
 
3435
 
        XT_TRACE_METHOD();
3436
 
        //statistic_increment(ha_read_next_count,&LOCK_status);
3437
 
        ASSERT_NS(pb_ex_in_use);
3438
 
 
3439
 
        if (active_index == MAX_KEY) {
3440
 
                err = HA_ERR_WRONG_INDEX;
3441
 
                goto done;
3442
 
        }
3443
 
        ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3444
 
 
3445
 
        search_key.sk_key_value.sv_flags = HA_READ_KEY_EXACT;
3446
 
        search_key.sk_key_value.sv_rec_id = 0;
3447
 
        search_key.sk_key_value.sv_row_id = 0;
3448
 
        search_key.sk_key_value.sv_key = search_key.sk_key_buf;
3449
 
        search_key.sk_key_value.sv_length = myxt_create_key_from_key(ind, search_key.sk_key_buf, (xtWord1 *) key, (u_int) length);
3450
 
        search_key.sk_on_key = TRUE;
3451
 
 
3452
 
        if (!xt_idx_next(pb_open_tab, ind, &search_key))
3453
 
                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3454
 
        else
3455
 
                err = xt_index_next_read(pb_open_tab, ind, pb_key_read, &search_key, buf);
3456
 
 
3457
 
        pb_ind_row_count++;
3458
 
#ifdef XT_TRACK_RETURNED_ROWS
3459
 
        if (!err)
3460
 
                ha_return_row(pb_open_tab, active_index);
3461
 
#endif
3462
 
        done:
3463
 
        if (err)
3464
 
                getTable()->status = STATUS_NOT_FOUND;
3465
 
        else {
3466
 
                pb_open_tab->ot_thread->st_statistics.st_row_select++;
3467
 
                getTable()->status = 0;
3468
 
        }
3469
 
        XT_RETURN(err);
3470
 
}
3471
 
 
3472
 
/*
3473
 
 * Used to read backwards through the index.
3474
 
 */
3475
 
int ha_pbxt::index_prev(byte * buf)
3476
 
{
3477
 
        int                     err = 0;
3478
 
        XTIndexPtr      ind;
3479
 
 
3480
 
        XT_TRACE_METHOD();
3481
 
        //statistic_increment(ha_read_prev_count,&LOCK_status);
3482
 
        ASSERT_NS(pb_ex_in_use);
3483
 
 
3484
 
        if (active_index == MAX_KEY) {
3485
 
                err = HA_ERR_WRONG_INDEX;
3486
 
                goto done;
3487
 
        }
3488
 
        ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3489
 
 
3490
 
        if (!xt_idx_prev(pb_open_tab, ind, NULL))
3491
 
                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3492
 
        else
3493
 
                err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3494
 
 
3495
 
        pb_ind_row_count++;
3496
 
#ifdef XT_TRACK_RETURNED_ROWS
3497
 
        if (!err)
3498
 
                ha_return_row(pb_open_tab, active_index);
3499
 
#endif
3500
 
        done:
3501
 
        if (err)
3502
 
                getTable()->status = STATUS_NOT_FOUND;
3503
 
        else {
3504
 
                pb_open_tab->ot_thread->st_statistics.st_row_select++;
3505
 
                getTable()->status = 0;
3506
 
        }
3507
 
        XT_RETURN(err);
3508
 
}
3509
 
 
3510
 
/*
3511
 
 * index_first() asks for the first key in the index.
3512
 
 */
3513
 
int ha_pbxt::index_first(byte * buf)
3514
 
{
3515
 
        int                                     err = 0;
3516
 
        XTIndexPtr                      ind;
3517
 
        XTIdxSearchKeyRec       search_key;
3518
 
 
3519
 
        XT_TRACE_METHOD();
3520
 
        //statistic_increment(ha_read_first_count,&LOCK_status);
3521
 
        ASSERT_NS(pb_ex_in_use);
3522
 
 
3523
 
        /* This is required because MySQL ignores the error returned
3524
 
         * init init_index sometimes, for example:
3525
 
         *
3526
 
     * if (!table->file->inited)
3527
 
     *    table->file->startIndexScan(tab->index, tab->sorted);
3528
 
     *  if ((error=tab->table->file->index_first(tab->table->getInsertRecord())))
3529
 
         */
3530
 
        if (active_index == MAX_KEY) {
3531
 
                err = HA_ERR_WRONG_INDEX;
3532
 
                goto done;
3533
 
        }
3534
 
 
3535
 
#ifdef XT_TRACK_RETURNED_ROWS
3536
 
        ha_start_scan(pb_open_tab, active_index);
3537
 
#endif
3538
 
        pb_ind_row_count = 0;
3539
 
 
3540
 
        ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3541
 
 
3542
 
        xt_idx_prep_key(ind, &search_key, XT_SEARCH_FIRST_FLAG, NULL, 0);
3543
 
        if (!xt_idx_search(pb_open_tab, ind, &search_key))
3544
 
                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3545
 
        else
3546
 
                err = xt_index_next_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3547
 
 
3548
 
        pb_ind_row_count++;
3549
 
#ifdef XT_TRACK_RETURNED_ROWS
3550
 
        if (!err)
3551
 
                ha_return_row(pb_open_tab, active_index);
3552
 
#endif
3553
 
        done:
3554
 
        if (err)
3555
 
                getTable()->status = STATUS_NOT_FOUND;
3556
 
        else {
3557
 
                pb_open_tab->ot_thread->st_statistics.st_row_select++;
3558
 
                getTable()->status = 0;
3559
 
        }
3560
 
        XT_RETURN(err);
3561
 
}
3562
 
 
3563
 
/*
3564
 
 * index_last() asks for the last key in the index.
3565
 
 */
3566
 
int ha_pbxt::index_last(byte * buf)
3567
 
{
3568
 
        int                                     err = 0;
3569
 
        XTIndexPtr                      ind;
3570
 
        XTIdxSearchKeyRec       search_key;
3571
 
 
3572
 
        XT_TRACE_METHOD();
3573
 
        //statistic_increment(ha_read_last_count,&LOCK_status);
3574
 
        ASSERT_NS(pb_ex_in_use);
3575
 
 
3576
 
        if (active_index == MAX_KEY) {
3577
 
                err = HA_ERR_WRONG_INDEX;
3578
 
                goto done;
3579
 
        }
3580
 
 
3581
 
#ifdef XT_TRACK_RETURNED_ROWS
3582
 
        ha_start_scan(pb_open_tab, active_index);
3583
 
#endif
3584
 
        pb_ind_row_count = 0;
3585
 
 
3586
 
        ind = (XTIndexPtr) pb_share->sh_dic_keys[active_index];
3587
 
 
3588
 
        xt_idx_prep_key(ind, &search_key, XT_SEARCH_AFTER_LAST_FLAG, NULL, 0);
3589
 
        if (!xt_idx_search_prev(pb_open_tab, ind, &search_key))
3590
 
                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3591
 
        else
3592
 
                err = xt_index_prev_read(pb_open_tab, ind, pb_key_read, NULL, buf);
3593
 
 
3594
 
        pb_ind_row_count++;
3595
 
#ifdef XT_TRACK_RETURNED_ROWS
3596
 
        if (!err)
3597
 
                ha_return_row(pb_open_tab, active_index);
3598
 
#endif
3599
 
        done:
3600
 
        if (err)
3601
 
                getTable()->status = STATUS_NOT_FOUND;
3602
 
        else {
3603
 
                pb_open_tab->ot_thread->st_statistics.st_row_select++;
3604
 
                getTable()->status = 0;
3605
 
        }
3606
 
        XT_RETURN(err);
3607
 
}
3608
 
 
3609
 
/*
3610
 
 * -----------------------------------------------------------------------
3611
 
 * RAMDOM/SEQUENTIAL READ METHODS
3612
 
 */
3613
 
 
3614
 
/*
3615
 
 * doStartTableScan() is called when the system wants the storage engine to do a table
3616
 
 * scan.
3617
 
 * See the example in the introduction at the top of this file to see when
3618
 
 * doStartTableScan() is called.
3619
 
 *
3620
 
 * Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3621
 
 * and sql_update.cc.
3622
 
 */
3623
 
int ha_pbxt::doStartTableScan(bool scan)
3624
 
{
3625
 
        int                     err = 0;
3626
 
        XTThreadPtr     thread = pb_open_tab->ot_thread;
3627
 
 
3628
 
        XT_PRINT1(thread, "rnd_init (%s)\n", pb_share->sh_table_path->ps_path);
3629
 
        XT_DISABLED_TRACE(("seq scan tx=%d\n", (int) thread->st_xact_data->xd_start_xn_id));
3630
 
 
3631
 
        /* Call xt_tab_seq_exit() to make sure the resources used by the previous
3632
 
         * scan are freed. In particular make sure cache page ref count is decremented.
3633
 
         * This is needed as doStartTableScan() can be called mulitple times w/o matching calls 
3634
 
         * to doEndTableScan(). Our experience is that currently this is done in queries like:
3635
 
         *
3636
 
         * SELECT t1.c1,t2.c1 FROM t1 LEFT JOIN t2 USING (c1);
3637
 
         * UPDATE t1 LEFT JOIN t2 USING (c1) SET t1.c1 = t2.c1 WHERE t1.c1 = t2.c1;
3638
 
         *
3639
 
         * when scanning inner tables. It is important to understand that in such case
3640
 
         * multiple calls to doStartTableScan() are not semantically equal to a new query. For
3641
 
         * example we cannot make row locks permanent as we do in doEndTableScan(), as 
3642
 
         * ha_pbxt::unlock_row still can be called.
3643
 
         */
3644
 
        xt_tab_seq_exit(pb_open_tab);
3645
 
 
3646
 
        /* The number of columns required: */
3647
 
        if (pb_open_tab->ot_is_modify) {
3648
 
                pb_open_tab->ot_cols_req = getTable()->read_set->MX_BIT_SIZE();
3649
 
                /* {START-STAT-HACK} previously position of start statement hack,
3650
 
                 * previous comment to code below: */
3651
 
                /* Start a statement based transaction as soon
3652
 
                 * as a read is done for a modify type statement!
3653
 
                 * Previously, this was done too late!
3654
 
                 */
3655
 
        }
3656
 
        else {
3657
 
                //pb_open_tab->ot_cols_req = ha_get_max_bit(table->read_set);
3658
 
                pb_open_tab->ot_cols_req = getTable()->read_set->MX_BIT_SIZE();
3659
 
 
3660
 
                /*
3661
 
                 * in case of queries like SELECT COUNT(*) FROM t
3662
 
                 * table->read_set is empty. Otoh, ot_cols_req == 0 can be treated
3663
 
                 * as "all columns" by some internal code (see e.g. myxt_load_row), 
3664
 
                 * which makes such queries very ineffective for the records with 
3665
 
                 * extended part. Setting column count to 1 makes sure that the 
3666
 
                 * extended part will not be acessed in most cases.
3667
 
                 */
3668
 
 
3669
 
                if (pb_open_tab->ot_cols_req == 0)
3670
 
                        pb_open_tab->ot_cols_req = 1;
3671
 
        }
3672
 
 
3673
 
        ASSERT_NS(pb_ex_in_use);
3674
 
        if (scan) {
3675
 
                if (!xt_tab_seq_init(pb_open_tab))
3676
 
                        err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3677
 
        }
3678
 
        else
3679
 
                xt_tab_seq_reset(pb_open_tab);
3680
 
 
3681
 
        xt_xlog_check_long_writer(thread);
3682
 
 
3683
 
        return err;
3684
 
}
3685
 
 
3686
 
int ha_pbxt::doEndTableScan()
3687
 
{
3688
 
        XT_TRACE_METHOD();
3689
 
 
3690
 
        /*
3691
 
         * make permanent the lock for the last scanned row
3692
 
         */
3693
 
        XTThreadPtr thread = pb_open_tab->ot_thread;
3694
 
        if (pb_open_tab)
3695
 
                pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &thread->st_lock_list);
3696
 
 
3697
 
        xt_xlog_check_long_writer(thread);
3698
 
 
3699
 
        xt_tab_seq_exit(pb_open_tab);
3700
 
        XT_RETURN(0);
3701
 
}
3702
 
 
3703
 
/*
3704
 
 * This is called for each row of the table scan. When you run out of records
3705
 
 * you should return HA_ERR_END_OF_FILE. Fill buff up with the row information.
3706
 
 * The Field structure for the table is the key to getting data into buf
3707
 
 * in a manner that will allow the server to understand it.
3708
 
 *
3709
 
 * Called from filesort.cc, records.cc, sql_handler.cc, sql_select.cc, sql_table.cc,
3710
 
 * and sql_update.cc.
3711
 
 */
3712
 
int ha_pbxt::rnd_next(byte *buf)
3713
 
{
3714
 
        int             err = 0;
3715
 
        xtBool  eof;
3716
 
 
3717
 
        XT_TRACE_METHOD();
3718
 
        ASSERT_NS(pb_ex_in_use);
3719
 
        //statistic_increment(ha_read_rnd_next_count, &LOCK_status);
3720
 
        xt_xlog_check_long_writer(pb_open_tab->ot_thread);
3721
 
 
3722
 
        if (!xt_tab_seq_next(pb_open_tab, (xtWord1 *) buf, &eof))
3723
 
                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3724
 
        else if (eof)
3725
 
                err = HA_ERR_END_OF_FILE;
3726
 
 
3727
 
        if (err)
3728
 
                getTable()->status = STATUS_NOT_FOUND;
3729
 
        else {
3730
 
                pb_open_tab->ot_thread->st_statistics.st_row_select++;
3731
 
                getTable()->status = 0;
3732
 
        }
3733
 
        XT_RETURN(err);
3734
 
}
3735
 
 
3736
 
/*
3737
 
 * position() is called after each call to rnd_next() if the data needs
3738
 
 * to be ordered. You can do something like the following to store
3739
 
 * the position:
3740
 
 * ha_store_ptr(ref, ref_length, current_position);
3741
 
 *
3742
 
 * The server uses ref to store data. ref_length in the above case is
3743
 
 * the size needed to store current_position. ref is just a byte array
3744
 
 * that the server will maintain. If you are using offsets to mark rows, then
3745
 
 * current_position should be the offset. If it is a primary key like in
3746
 
 * BDB, then it needs to be a primary key.
3747
 
 *
3748
 
 * Called from filesort.cc, sql_select.cc, sql_delete.cc and sql_update.cc.
3749
 
 */
3750
 
void ha_pbxt::position(const byte *XT_UNUSED(record))
3751
 
{
3752
 
        XT_TRACE_METHOD();
3753
 
        ASSERT_NS(pb_ex_in_use);
3754
 
        /*
3755
 
         * I changed this from using little endian to big endian.
3756
 
         *
3757
 
         * The reason is because sometime the pointer are sorted.
3758
 
         * When they are are sorted a binary compare is used.
3759
 
         * A binary compare sorts big endian values correctly!
3760
 
         *
3761
 
         * Take the followin example:
3762
 
         *
3763
 
         * create table t1 (a int, b text);
3764
 
         * insert into t1 values (1, 'aa'), (1, 'bb'), (1, 'cc');
3765
 
         * select group_concat(b) from t1 group by a;
3766
 
         *
3767
 
         * With little endian pointers the result is:
3768
 
         * aa,bb,cc
3769
 
         *
3770
 
         * With big-endian pointer the result is:
3771
 
         * aa,cc,bb
3772
 
         *
3773
 
         */
3774
 
        (void) ASSERT_NS(XT_RECORD_OFFS_SIZE == 4);
3775
 
        mi_int4store((xtWord1 *) ref, pb_open_tab->ot_curr_rec_id);
3776
 
        XT_RETURN_VOID;
3777
 
}
3778
 
 
3779
 
/*
3780
 
 * Given the #ROWID retrieve the record.
3781
 
 *
3782
 
 * Called from filesort.cc records.cc sql_insert.cc sql_select.cc sql_update.cc.
3783
 
 */
3784
 
int ha_pbxt::rnd_pos(byte * buf, byte *pos)
3785
 
{
3786
 
        int err = 0;
3787
 
 
3788
 
        XT_TRACE_METHOD();
3789
 
        ASSERT_NS(pb_ex_in_use);
3790
 
        //statistic_increment(ha_read_rnd_count, &LOCK_status);
3791
 
        XT_PRINT1(pb_open_tab->ot_thread, "rnd_pos (%s)\n", pb_share->sh_table_path->ps_path);
3792
 
 
3793
 
        pb_open_tab->ot_curr_rec_id = mi_uint4korr((xtWord1 *) pos);
3794
 
        switch (xt_tab_dirty_read_record(pb_open_tab, (xtWord1 *) buf)) {
3795
 
                case FALSE:
3796
 
                        err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3797
 
                        break;
3798
 
                default:
3799
 
                        break;
3800
 
        }               
3801
 
 
3802
 
        if (err)
3803
 
                getTable()->status = STATUS_NOT_FOUND;
3804
 
        else {
3805
 
                pb_open_tab->ot_thread->st_statistics.st_row_select++;
3806
 
                getTable()->status = 0;
3807
 
        }
3808
 
        XT_RETURN(err);
3809
 
}
3810
 
 
3811
 
/*
3812
 
 * -----------------------------------------------------------------------
3813
 
 * INFO METHODS
3814
 
 */
3815
 
 
3816
 
/*
3817
 
        ::info() is used to return information to the optimizer.
3818
 
        Currently this table handler doesn't implement most of the fields
3819
 
        really needed. SHOW also makes use of this data
3820
 
        Another note, you will probably want to have the following in your
3821
 
        code:
3822
 
        if (records < 2)
3823
 
                records = 2;
3824
 
        The reason is that the server will optimize for cases of only a single
3825
 
        record. If in a table scan you don't know the number of records
3826
 
        it will probably be better to set records to two so you can return
3827
 
        as many records as you need.
3828
 
        Along with records a few more variables you may wish to set are:
3829
 
                records
3830
 
                deleted
3831
 
                data_file_length
3832
 
                index_file_length
3833
 
                delete_length
3834
 
                check_time
3835
 
        Take a look at the public variables in handler.h for more information.
3836
 
 
3837
 
        Called in:
3838
 
                filesort.cc
3839
 
                ha_heap.cc
3840
 
                item_sum.cc
3841
 
                opt_sum.cc
3842
 
                sql_delete.cc
3843
 
                sql_delete.cc
3844
 
                sql_derived.cc
3845
 
                sql_select.cc
3846
 
                sql_select.cc
3847
 
                sql_select.cc
3848
 
                sql_select.cc
3849
 
                sql_select.cc
3850
 
                sql_show.cc
3851
 
                sql_show.cc
3852
 
                sql_show.cc
3853
 
                sql_show.cc
3854
 
                sql_table.cc
3855
 
                sql_union.cc
3856
 
                sql_update.cc
3857
 
 
3858
 
*/
3859
 
#if MYSQL_VERSION_ID < 50114
3860
 
void ha_pbxt::info(uint flag)
3861
 
#else
3862
 
int ha_pbxt::info(uint flag)
3863
 
#endif
3864
 
{
3865
 
        XTOpenTablePtr  ot;
3866
 
        int                             in_use;
3867
 
 
3868
 
        XT_TRACE_METHOD();
3869
 
        
3870
 
        if (!(in_use = pb_ex_in_use)) {
3871
 
                pb_ex_in_use = 1;
3872
 
                if (pb_share && pb_share->sh_table_lock) {
3873
 
                        /* If some thread has an exclusive lock, then
3874
 
                         * we wait for the lock to be removed:
3875
 
                         */
3876
 
#if MYSQL_VERSION_ID < 50114
3877
 
                        ha_wait_for_shared_use(this, pb_share);
3878
 
                        pb_ex_in_use = 1;
3879
 
#else
3880
 
                        if (!ha_wait_for_shared_use(this, pb_share))
3881
 
                                return ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
3882
 
#endif
3883
 
                }
3884
 
        }
3885
 
 
3886
 
        if ((ot = pb_open_tab)) {
3887
 
                if (flag & HA_STATUS_VARIABLE) {
3888
 
                        register XTTableHPtr tab = ot->ot_table;
3889
 
 
3890
 
                        /* {FREE-ROWS-BAD}
3891
 
                         * Free row count is not reliable, so ignore it.
3892
 
                         * The problem is if tab_row_fnum > tab_row_eof_id - 1 then
3893
 
                         * we have a very bad result.
3894
 
                         *
3895
 
                         * If stats.records+EXTRA_RECORDS == 0 as returned by 
3896
 
                         * estimate_rows_upper_bound(), then filesort will crash here:
3897
 
                         *
3898
 
                         * make_sortkey(param,sort_keys[idx++],ref_pos);
3899
 
                         * 
3900
 
                         * #0   0x000bf69c in Field_long::sort_string at field.cc:3766
3901
 
                         * #1   0x0022e1f1 in make_sortkey at filesort.cc:769
3902
 
                         * #2   0x0022f1cf in find_all_keys at filesort.cc:619
3903
 
                         * #3   0x00230eec in filesort at filesort.cc:243
3904
 
                         * #4   0x001b9d89 in update_query at sql_update.cc:415
3905
 
                         * #5   0x0010db12 in mysql_execute_command at sql_parse.cc:2959
3906
 
                         * #6   0x0011480d in mysql_parse at sql_parse.cc:5787
3907
 
                         * #7   0x00115afb in dispatch_command at sql_parse.cc:1200
3908
 
                         * #8   0x00116de2 in do_command at sql_parse.cc:857
3909
 
                         * #9   0x00101ee4 in handle_one_connection at sql_connect.cc:1115
3910
 
                         *
3911
 
                         * The problem is that sort_keys is allocated to handle just 1 vector.
3912
 
                         * Sorting one vector crashes. Although I could not find a check for
3913
 
                         * the actual number of vectors. But it must assume that it has at
3914
 
                         * least EXTRA_RECORDS vectors.
3915
 
                         */
3916
 
#ifdef XT_ROW_COUNT_CORRECTED
3917
 
                        if (tab->tab_row_eof_id <= tab->tab_row_fnum ||
3918
 
                                (!tab->tab_row_free_id && tab->tab_row_fnum))
3919
 
                                xt_tab_check_free_lists(NULL, ot, false, true);
3920
 
                        stats.records = (ha_rows) tab->tab_row_eof_id - 1;
3921
 
                        if (stats.records >= tab->tab_row_fnum) {
3922
 
                                stats.deleted = tab->tab_row_fnum;
3923
 
                                stats.records -= stats.deleted;
3924
 
                        }
3925
 
                        else {
3926
 
                                stats.deleted = 0;
3927
 
                                stats.records = 2;
3928
 
                        }
3929
 
#else
3930
 
                        stats.deleted = /* tab->tab_row_fnum */ 0;
3931
 
                        stats.records = (ha_rows) (tab->tab_row_eof_id - 1 /* - stats.deleted */);
3932
 
#endif
3933
 
                        stats.data_file_length = xt_rec_id_to_rec_offset(tab, tab->tab_rec_eof_id);
3934
 
                        stats.index_file_length = xt_ind_node_to_offset(tab, tab->tab_ind_eof);
3935
 
                        stats.delete_length = tab->tab_rec_fnum * ot->ot_rec_size;
3936
 
                        //check_time = info.check_time;
3937
 
                        stats.mean_rec_length = (ulong) ot->ot_rec_size;
3938
 
                }
3939
 
 
3940
 
#if 0 // Commented out, I am pretty sure this will blow up on someone since the global share should be treated as being non-mutable
3941
 
                if (flag & HA_STATUS_CONST) {
3942
 
                        ha_rows         rec_per_key;
3943
 
                        XTIndexPtr      ind;
3944
 
                        TABLE_SHARE     *share= TS(table);
3945
 
 
3946
 
                        stats.max_data_file_length = 0x00FFFFFF;
3947
 
                        stats.max_index_file_length = 0x00FFFFFF;
3948
 
                        //stats.create_time = info.create_time;
3949
 
                        ref_length = XT_RECORD_OFFS_SIZE;
3950
 
                        //share->db_options_in_use = info.options;
3951
 
                        stats.block_size = XT_INDEX_PAGE_SIZE;
3952
 
 
3953
 
                        if (share->getType() == message::Table::STANDARD)
3954
 
#ifdef DRIZZLED
3955
 
#define WHICH_MUTEX                     mutex
3956
 
#elif MYSQL_VERSION_ID >= 50404
3957
 
#define WHICH_MUTEX                     LOCK_ha_data
3958
 
#else
3959
 
                        if (share->tmp_table == NO_TMP_TABLE)
3960
 
#define WHICH_MUTEX                     mutex
3961
 
#endif
3962
 
 
3963
 
#ifdef SAFE_MUTEX
3964
 
 
3965
 
#if MYSQL_VERSION_ID < 50404
3966
 
#if MYSQL_VERSION_ID < 50123
3967
 
                                safe_mutex_lock(&share->mutex,__FILE__,__LINE__);
3968
 
#else
3969
 
                                safe_mutex_lock(&share->mutex,0,__FILE__,__LINE__);
3970
 
#endif
3971
 
#else
3972
 
                                safe_mutex_lock(&share->WHICH_MUTEX,0,__FILE__,__LINE__);
3973
 
#endif
3974
 
 
3975
 
#else // SAFE_MUTEX
3976
 
 
3977
 
#ifdef MY_PTHREAD_FASTMUTEX
3978
 
                                my_pthread_fastmutex_lock(&share->WHICH_MUTEX);
3979
 
#else
3980
 
                                share->lock();
3981
 
#endif
3982
 
 
3983
 
#endif // SAFE_MUTEX
3984
 
#ifdef DRIZZLED
3985
 
                        set_prefix(share->keys_in_use, share->keys);
3986
 
                        share->keys_for_keyread&= share->keys_in_use;
3987
 
#else
3988
 
                        share->keys_in_use.set_prefix(share->keys);
3989
 
                        //share->keys_in_use.intersect_extended(info.key_map);
3990
 
                        share->keys_for_keyread.intersect(share->keys_in_use);
3991
 
                        //share->db_record_offset = info.record_offset;
3992
 
#endif
3993
 
                        for (u_int i = 0; i < share->keys; i++) {
3994
 
                                ind = pb_share->sh_dic_keys[i];
3995
 
 
3996
 
                                rec_per_key = 0;
3997
 
                                if (ind->mi_seg_count == 1 && (ind->mi_flags & HA_NOSAME))
3998
 
                                        rec_per_key = 1;
3999
 
                                else {
4000
 
                                        rec_per_key = 1;        
4001
 
                                }
4002
 
                                for (u_int j = 0; j < table->key_info[i].key_parts; j++)
4003
 
                                        table->key_info[i].rec_per_key[j] = (ulong) rec_per_key;
4004
 
                        }
4005
 
#ifdef DRIZZLED
4006
 
                        if (share->getType() == message::Table::STANDARD)
4007
 
#else
4008
 
                        if (share->tmp_table == NO_TMP_TABLE)
4009
 
#endif
4010
 
#ifdef SAFE_MUTEX
4011
 
                                safe_mutex_unlock(&share->WHICH_MUTEX,__FILE__,__LINE__);
4012
 
#else
4013
 
#ifdef MY_PTHREAD_FASTMUTEX
4014
 
                                pthread_mutex_unlock(&share->WHICH_MUTEX.mutex);
4015
 
#else
4016
 
                                share->unlock();
4017
 
#endif
4018
 
#endif
4019
 
                        /*
4020
 
                         Set data_file_name and index_file_name to point at the symlink value
4021
 
                         if table is symlinked (Ie;  Real name is not same as generated name)
4022
 
                        */
4023
 
                        /*
4024
 
                        data_file_name = index_file_name = 0;
4025
 
                        fn_format(name_buff, file->filename, "", MI_NAME_DEXT, 2);
4026
 
                        if (strcmp(name_buff, info.data_file_name))
4027
 
                                data_file_name = info.data_file_name;
4028
 
                        strmov(fn_ext(name_buff), MI_NAME_IEXT);
4029
 
                        if (strcmp(name_buff, info.index_file_name))
4030
 
                                index_file_name = info.index_file_name;
4031
 
                        */
4032
 
                }
4033
 
#endif  // if(0)
4034
 
 
4035
 
                if (flag & HA_STATUS_ERRKEY)
4036
 
                        errkey = ot->ot_err_index_no;
4037
 
 
4038
 
                /* {PRE-INC}
4039
 
                 * We assume they want the next value to be returned!
4040
 
                 *
4041
 
                 * At least, this is what works for the following code:
4042
 
                 *
4043
 
                 * create table t1 (a int auto_increment primary key)
4044
 
                 * auto_increment=100
4045
 
                 * engine=pbxt
4046
 
                 * partition by list (a)
4047
 
                 * (partition p0 values in (1, 98,99, 100, 101));
4048
 
                 * create index inx on t1 (a);
4049
 
                 * insert into t1 values (null);
4050
 
                 * select * from t1;
4051
 
                 */
4052
 
                if (flag & HA_STATUS_AUTO)
4053
 
                        stats.auto_increment_value = (ulonglong) ot->ot_table->tab_auto_inc+1;
4054
 
        }
4055
 
        else
4056
 
                errkey = (uint) -1;
4057
 
 
4058
 
        if (!in_use) {
4059
 
                pb_ex_in_use = 0;
4060
 
                if (pb_share) {
4061
 
                        /* Someone may be waiting for me to complete: */
4062
 
                        if (pb_share->sh_table_lock)
4063
 
                                xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4064
 
                }
4065
 
        }
4066
 
#if MYSQL_VERSION_ID < 50114
4067
 
        XT_RETURN_VOID;
4068
 
#else
4069
 
        XT_RETURN(0);
4070
 
#endif
4071
 
}
4072
 
 
4073
 
/*
4074
 
 * extra() is called whenever the server wishes to send a hint to
4075
 
 * the storage engine. The myisam engine implements the most hints.
4076
 
 * ha_innodb.cc has the most exhaustive list of these hints.
4077
 
 */
4078
 
int ha_pbxt::extra(enum ha_extra_function operation)
4079
 
{
4080
 
        int err = 0;
4081
 
 
4082
 
        XT_PRINT2(xt_get_self(), "ha_pbxt::extra (%s) operation=%d\n", pb_share->sh_table_path->ps_path, operation);
4083
 
 
4084
 
        switch (operation) {
4085
 
                case HA_EXTRA_RESET_STATE:
4086
 
                        pb_key_read = FALSE;
4087
 
                        pb_ignore_dup_key = 0;
4088
 
                        /* As far as I can tell, this function is called for
4089
 
                         * every table at the end of a statement.
4090
 
                         *
4091
 
                         * So, during a LOCK TABLES ... UNLOCK TABLES, I use
4092
 
                         * this to find the end of a statement.
4093
 
                         * start_stmt() indicates the start of a statement,
4094
 
                         * and is also called once for each table in the
4095
 
                         * statement.
4096
 
                         *
4097
 
                         * So the statement boundary is indicated by 
4098
 
                         * self->st_stat_count == 0
4099
 
                         *
4100
 
                         * GOTCHA: I cannot end the transaction here!
4101
 
                         * I must end it in start_stmt().
4102
 
                         * The reason is because there are situations
4103
 
                         * where this would end a transaction that
4104
 
                         * was begin by external_lock().
4105
 
                         *
4106
 
                         * An example of this is when a function
4107
 
                         * is called when doing CREATE TABLE SELECT.
4108
 
                         */
4109
 
                        if (pb_in_stat) {
4110
 
                                /* NOTE: pb_in_stat is just used to avoid getting
4111
 
                                 * self, if it is not necessary!!
4112
 
                                 */
4113
 
                                XTThreadPtr self;
4114
 
 
4115
 
                                pb_in_stat = FALSE;
4116
 
 
4117
 
                                if (!(self = ha_set_current_thread(pb_mysql_thd, &err)))
4118
 
                                        return xt_ha_pbxt_to_mysql_error(err);
4119
 
 
4120
 
                                if (self->st_stat_count > 0) {
4121
 
                                        self->st_stat_count--;
4122
 
                                        if (self->st_stat_count == 0)
4123
 
                                                self->st_stat_ended = TRUE;
4124
 
                                }
4125
 
 
4126
 
                                /* This is the end of a statement, I can turn any locks into perminant locks now: */
4127
 
                                if (pb_open_tab)
4128
 
                                        pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4129
 
                        }
4130
 
                        if (pb_open_tab)
4131
 
                                pb_open_tab->ot_for_update = 0;
4132
 
                        break;
4133
 
                case HA_EXTRA_KEYREAD:
4134
 
                        /* This means we so not need to read the entire record. */
4135
 
                        pb_key_read = TRUE;
4136
 
                        break;
4137
 
                case HA_EXTRA_NO_KEYREAD:
4138
 
                        pb_key_read = FALSE;
4139
 
                        break;
4140
 
                case HA_EXTRA_IGNORE_DUP_KEY:
4141
 
                        /* NOTE!!! Calls to extra(HA_EXTRA_IGNORE_DUP_KEY) can be nested!
4142
 
                         * In fact, the calls are from different threads, so
4143
 
                         * strictly speaking I should protect this variable!!
4144
 
                         * Here is the sequence that produces the duplicate call:
4145
 
                         *
4146
 
                         * drop table if exists t1;
4147
 
                         * CREATE TABLE t1 (x int not null, y int, primary key (x)) engine=pbxt;
4148
 
                         * insert into t1 values (1, 3), (4, 1);
4149
 
                         * replace DELAYED into t1 (x, y) VALUES (4, 2);
4150
 
                         * select * from t1 order by x;
4151
 
                         *
4152
 
                         */
4153
 
                        pb_ignore_dup_key++;
4154
 
                        break;
4155
 
                case HA_EXTRA_NO_IGNORE_DUP_KEY:
4156
 
                        pb_ignore_dup_key--;
4157
 
                        break;
4158
 
                case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
4159
 
                        /* MySQL needs all fields */
4160
 
                        pb_key_read = FALSE;
4161
 
                        break;
4162
 
                default:
4163
 
                        break;
4164
 
        }
4165
 
 
4166
 
        return err;
4167
 
}
4168
 
 
4169
 
 
4170
 
/*
4171
 
 * Deprecated and likely to be removed in the future. Storage engines normally
4172
 
 * just make a call like:
4173
 
 * ha_pbxt::extra(HA_EXTRA_RESET);
4174
 
 * to handle it.
4175
 
 */
4176
 
int ha_pbxt::reset(void)
4177
 
{
4178
 
        XT_TRACE_METHOD();
4179
 
        extra(HA_EXTRA_RESET_STATE);
4180
 
        XT_RETURN(0);
4181
 
}
4182
 
 
4183
 
void ha_pbxt::unlock_row()
4184
 
{
4185
 
        XT_TRACE_METHOD();
4186
 
        if (pb_open_tab)
4187
 
                pb_open_tab->ot_table->tab_locks.xt_remove_temp_lock(pb_open_tab, FALSE);
4188
 
}
4189
 
 
4190
 
/*
4191
 
 * Used to delete all rows in a table. Both for cases of truncate and
4192
 
 * for cases where the optimizer realizes that all rows will be
4193
 
 * removed as a result of a SQL statement.
4194
 
 *
4195
 
 * Called from item_sum.cc by Item_func_group_concat::clear(),
4196
 
 * Item_sum_count_distinct::clear(), and Item_func_group_concat::clear().
4197
 
 * Called from sql_delete.cc by delete_query().
4198
 
 * Called from sql_select.cc by JOIN::reinit().
4199
 
 * Called from sql_union.cc by st_select_lex_unit::exec().
4200
 
 */
4201
 
int ha_pbxt::delete_all_rows()
4202
 
{
4203
 
        THD                             *thd = current_thd;
4204
 
        int                             err = 0;
4205
 
        XTThreadPtr             self;
4206
 
        XTDDTable               *tab_def = NULL;
4207
 
        char                    path[PATH_MAX];
4208
 
 
4209
 
        XT_TRACE_METHOD();
4210
 
 
4211
 
        if (thd_sql_command(thd) != SQLCOM_TRUNCATE) {
4212
 
                /* Just like InnoDB we only handle TRUNCATE TABLE
4213
 
                 * by recreating the table.
4214
 
                 * DELETE FROM t must be handled by deleting
4215
 
                 * each row because it may be part of a transaction,
4216
 
                 * and there may be foreign key actions.
4217
 
                 */
4218
 
                XT_RETURN (errno = HA_ERR_WRONG_COMMAND);
4219
 
        }
4220
 
 
4221
 
        if (!(self = ha_set_current_thread(thd, &err)))
4222
 
                return xt_ha_pbxt_to_mysql_error(err);
4223
 
 
4224
 
        try_(a) {
4225
 
                XTDictionaryRec dic;
4226
 
 
4227
 
                memset(&dic, 0, sizeof(dic));
4228
 
 
4229
 
                dic = pb_share->sh_table->tab_dic;
4230
 
                xt_strcpy(PATH_MAX, path, pb_share->sh_table->tab_name->ps_path);
4231
 
 
4232
 
                if ((tab_def = dic.dic_table))
4233
 
                        tab_def->reference();
4234
 
 
4235
 
                if (!(thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)))
4236
 
                        tab_def->deleteAllRows(self);
4237
 
 
4238
 
                /* We should have a table lock! */
4239
 
                //ASSERT(pb_lock_table);
4240
 
                if (!pb_table_locked) {
4241
 
                        ha_aquire_exclusive_use(self, pb_share, this);
4242
 
                        pushr_(ha_release_exclusive_use, pb_share);
4243
 
                }
4244
 
                ha_close_open_tables(self, pb_share, NULL);
4245
 
 
4246
 
                /* This is required in the case of delete_all_rows, because we must
4247
 
                 * ensure that the handlers no longer reference the old
4248
 
                 * table, so that it will not be used again. The table
4249
 
                 * must be re-openned, because the ID has changed!
4250
 
                 *
4251
 
                 * 0.9.86+ Must check if this is still necessary.
4252
 
                 *
4253
 
                 * the ha_close_share(self, pb_share) call was moved from above
4254
 
                 * (before tab_def = dic.dic_table), because of a crash.
4255
 
                 * Test case:
4256
 
                 *
4257
 
                 * set storage_engine = pbxt;
4258
 
                 * create table t1 (s1 int primary key);
4259
 
                 * insert into t1 values (1);
4260
 
                 * create table t2 (s1 int, foreign key (s1) references t1 (s1));
4261
 
                 * insert into t2 values (1); 
4262
 
                 * truncate table t1; -- this should fail because of FK constraint
4263
 
                 * alter table t1 engine = myisam; -- this caused crash
4264
 
                 *
4265
 
                 */
4266
 
                ha_close_share(self, pb_share);
4267
 
 
4268
 
                /* MySQL documentation requires us to reset auto increment value to 1
4269
 
                 * on truncate even if the table was created with a different value. 
4270
 
                 * This is also consistent with other engines.
4271
 
                 */
4272
 
                dic.dic_min_auto_inc = 1;
4273
 
 
4274
 
                xt_create_table(self, (XTPathStrPtr) path, &dic);
4275
 
                if (!pb_table_locked)
4276
 
                        freer_(); // ha_release_exclusive_use(pb_share)
4277
 
        }
4278
 
        catch_(a) {
4279
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4280
 
        }
4281
 
        cont_(a);
4282
 
 
4283
 
        if (tab_def)
4284
 
                tab_def->release(self);
4285
 
 
4286
 
        XT_RETURN(err);
4287
 
}
4288
 
 
4289
 
/*
4290
 
 * TODO: Implement!
4291
 
 * Assuming a key (a,b,c)
4292
 
 * 
4293
 
 * rec_per_key[0] = SELECT COUNT(*)/COUNT(DISTINCT a) FROM t;
4294
 
 * rec_per_key[1] = SELECT COUNT(*)/COUNT(DISTINCT a,b) FROM t;
4295
 
 * rec_per_key[2] = SELECT COUNT(*)/COUNT(DISTINCT a,b,c) FROM t;
4296
 
 *
4297
 
 * After this is implemented, the selectivity can serve as
4298
 
 * a quick estimate of records_in_range().
4299
 
 *
4300
 
 * After you have done this, you need to redo the index_merge*
4301
 
 * tests. Restore the standard result to check if we
4302
 
 * now agree with the MyISAM strategy.
4303
 
 * 
4304
 
 */
4305
 
int ha_pbxt::analyze(THD *thd)
4306
 
{
4307
 
        int                             err = 0;
4308
 
        XTDatabaseHPtr  db;
4309
 
        xtXactID                my_xn_id;
4310
 
        xtXactID                clean_xn_id = 0;
4311
 
        uint                    cnt = 10;
4312
 
 
4313
 
        XT_TRACE_METHOD();
4314
 
 
4315
 
        if (!pb_open_tab) {
4316
 
                if ((err = reopen()))
4317
 
                        XT_RETURN(err);
4318
 
        }
4319
 
 
4320
 
        /* Wait until the sweeper is no longer busy!
4321
 
         * If you want an accurate count(*) value, then call
4322
 
         * ANALYZE TABLE first. This function waits until the
4323
 
         * sweeper has completed.
4324
 
         */
4325
 
        db = pb_open_tab->ot_table->tab_db;
4326
 
        
4327
 
        /*
4328
 
         * Wait until everything is cleaned up before this transaction.
4329
 
         * But this will only work if the we quit out transaction!
4330
 
         *
4331
 
         * GOTCHA: When a PBXT table is partitioned, then analyze() is
4332
 
         * called for each component. The first calls xt_xn_commit().
4333
 
         * All following calls have no transaction!:
4334
 
         *
4335
 
         * CREATE TABLE t1 (a int)
4336
 
         * PARTITION BY LIST (a)
4337
 
         * (PARTITION x1 VALUES IN (10), PARTITION x2 VALUES IN (20));
4338
 
         * 
4339
 
         * analyze table t1;
4340
 
         * 
4341
 
         */
4342
 
        if (pb_open_tab->ot_thread && pb_open_tab->ot_thread->st_xact_data) {
4343
 
                my_xn_id = pb_open_tab->ot_thread->st_xact_data->xd_start_xn_id;
4344
 
                XT_PRINT0(xt_get_self(), "xt_xn_commit\n");
4345
 
                xt_xn_commit(pb_open_tab->ot_thread);
4346
 
        }
4347
 
        else
4348
 
                my_xn_id = db->db_xn_to_clean_id;
4349
 
 
4350
 
        while ((!db->db_sw_idle || xt_xn_is_before(db->db_xn_to_clean_id, my_xn_id)) && not (thd->getKilled())) {
4351
 
                xt_busy_wait();
4352
 
 
4353
 
                /*
4354
 
                 * It is possible that the sweeper gets stuck because
4355
 
                 * it has no dictionary information!
4356
 
                 * As in the example below.
4357
 
                 *
4358
 
                 * create table t4 (
4359
 
                 *   pk_col int auto_increment primary key, a1 char(64), a2 char(64), b char(16), c char(16) not null, d char(16), dummy char(64) default ' '
4360
 
                 * ) engine=pbxt;
4361
 
                 *
4362
 
                 * insert into t4 (a1, a2, b, c, d, dummy) select * from t1;
4363
 
                 * 
4364
 
                 * create index idx12672_0 on t4 (a1);
4365
 
                 * create index idx12672_1 on t4 (a1,a2,b,c);
4366
 
                 * create index idx12672_2 on t4 (a1,a2,b);
4367
 
                 * analyze table t1;
4368
 
                 */
4369
 
                if (db->db_sw_idle) {
4370
 
                        /* This will make sure we don't wait forever: */
4371
 
                        if (clean_xn_id != db->db_xn_to_clean_id) {
4372
 
                                clean_xn_id = db->db_xn_to_clean_id;
4373
 
                                cnt = 10;
4374
 
                        }
4375
 
                        else {
4376
 
                                cnt--;
4377
 
                                if (!cnt)
4378
 
                                        break;
4379
 
                        }
4380
 
                        xt_wakeup_sweeper(db);
4381
 
                }
4382
 
        }
4383
 
 
4384
 
        XT_RETURN(err);
4385
 
}
4386
 
 
4387
 
#ifdef DEBUG
4388
 
extern int pbxt_mysql_trace_on;
4389
 
#endif
4390
 
 
4391
 
int ha_pbxt::check(THD* thd)
4392
 
{
4393
 
        int                             err = 0;
4394
 
        XTThreadPtr             self;
4395
 
 
4396
 
        if (!(self = ha_set_current_thread(thd, &err)))
4397
 
                return xt_ha_pbxt_to_mysql_error(err);
4398
 
        if (self->st_lock_count)
4399
 
                ASSERT(self->st_xact_data);
4400
 
 
4401
 
        if (!pb_table_locked) {
4402
 
                ha_aquire_exclusive_use(self, pb_share, this);
4403
 
                pushr_(ha_release_exclusive_use, pb_share);
4404
 
        }
4405
 
 
4406
 
#ifdef CHECK_TABLE_LOADS
4407
 
        xt_tab_load_table(self, pb_open_tab);
4408
 
#endif
4409
 
        xt_check_table(self, pb_open_tab);
4410
 
 
4411
 
        if (!pb_table_locked)
4412
 
                freer_(); // ha_release_exclusive_use(pb_share)
4413
 
 
4414
 
        //pbxt_mysql_trace_on = TRUE;
4415
 
        return 0;
4416
 
}
4417
 
 
4418
 
/*
4419
 
 * This function is called:
4420
 
 * For each table in LOCK TABLES,
4421
 
 * OR
4422
 
 * For each table in a statement.
4423
 
 *
4424
 
 * It is called with F_UNLCK:
4425
 
 * in UNLOCK TABLES
4426
 
 * OR
4427
 
 * at the end of a statement.
4428
 
 *
4429
 
 */
4430
 
xtPublic int ha_pbxt::external_lock(THD *thd, int lock_type)
4431
 
{
4432
 
        /* Some compiler complain that: variable 'err' might be clobbered by 'longjmp' or 'vfork' */
4433
 
        volatile int                            err = 0;
4434
 
        XTThreadPtr             self;
4435
 
        
4436
 
        if (!(self = ha_set_current_thread(thd, (int *) &err)))
4437
 
                return xt_ha_pbxt_to_mysql_error(err);
4438
 
 
4439
 
        /* F_UNLCK is set when this function is called at end
4440
 
         * of statement or UNLOCK TABLES
4441
 
         */
4442
 
        if (lock_type == F_UNLCK) {
4443
 
                /* This is not TRUE if external_lock() FAILED!
4444
 
                 * Can we rely on external_unlock being called when
4445
 
                 * external_lock() fails? Currently yes, but it does
4446
 
                 * not make sense!
4447
 
                ASSERT_NS(pb_ex_in_use);
4448
 
                */
4449
 
 
4450
 
                XT_PRINT1(self, "EXTERNAL_LOCK (%s) lock_type=UNLOCK\n", pb_share->sh_table_path->ps_path);
4451
 
 
4452
 
                /* Make any temporary locks on this table permanent.
4453
 
                 *
4454
 
                 * This is required here because of the following example:
4455
 
                 * create table t1 (a int NOT NULL, b int, primary key (a));
4456
 
                 * create table t2 (a int NOT NULL, b int, primary key (a));
4457
 
                 * insert into t1 values (0, 10),(1, 11),(2, 12);
4458
 
                 * insert into t2 values (1, 21),(2, 22),(3, 23);
4459
 
                 * update t1 set b= (select b from t2 where t1.a = t2.a);
4460
 
                 * update t1 set b= (select b from t2 where t1.a = t2.a);
4461
 
                 * select * from t1;
4462
 
                 * drop table t1, t2;
4463
 
                 *
4464
 
                 */
4465
 
 
4466
 
                /* GOTCHA! It's weird, but, if this function returns an error
4467
 
                 * on lock, then UNLOCK is called?!
4468
 
                 * This should not be done, because if lock fails, it should be
4469
 
                 * assumed that no UNLOCK is required.
4470
 
                 * Basically, I have to assume that some code will presume this,
4471
 
                 * although the function lock_external() calls unlock, even
4472
 
                 * when lock fails.
4473
 
                 * The result is, that my lock count can go wrong. So I could
4474
 
                 * change the lock method, and increment the lock count, even
4475
 
                 * if it fails. However, the consequences are more serious,
4476
 
                 * if some code decides not to call UNLOCK after lock fails.
4477
 
                 * The result is that I would have a permanent too high lock,
4478
 
                 * count and nothing will work.
4479
 
                 * So instead, I handle the fact that I might too many unlocks
4480
 
                 * here.
4481
 
                 */
4482
 
                if (self->st_lock_count > 0)
4483
 
                        self->st_lock_count--;
4484
 
                if (!self->st_lock_count) {
4485
 
                        /* This section handles "auto-commit"... */
4486
 
 
4487
 
#ifdef XT_IMPLEMENT_NO_ACTION
4488
 
                        /* {NO-ACTION-BUG}
4489
 
                         * This is required here because it marks the end of a statement.
4490
 
                         * If we are in a non-auto-commit mode, then we cannot
4491
 
                         * wait for st_is_update to be set by the begining of a new transaction.
4492
 
                         */
4493
 
                        if (self->st_restrict_list.bl_count) {
4494
 
                                if (!xt_tab_restrict_rows(&self->st_restrict_list, self))
4495
 
                                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4496
 
                        }
4497
 
#endif
4498
 
 
4499
 
                        if (self->st_xact_data) {
4500
 
                                if (self->st_auto_commit) {
4501
 
                                        /*
4502
 
                                         * Normally I could assume that if the transaction
4503
 
                                         * has not been aborted by now, then it should be committed.
4504
 
                                         *
4505
 
                                         * Unfortunately, this is not the case!
4506
 
                                         *
4507
 
                                         * create table t1 (id int primary key) engine = pbxt;
4508
 
                                         * create table t2 (id int) engine = pbxt;
4509
 
                                         * 
4510
 
                                         * insert into t1 values ( 1 ) ;
4511
 
                                         * insert into t1 values ( 2 ) ;
4512
 
                                         * insert into t2 values ( 1 ) ;
4513
 
                                         * insert into t2 values ( 2 ) ;
4514
 
                                         * 
4515
 
                                         * --This statement is returns an error calls ha_autocommit_or_rollback():
4516
 
                                         * update t1 set t1.id=1 where t1.id=2;
4517
 
                                         * 
4518
 
                                         * --This statement is returns no error and calls ha_autocommit_or_rollback():
4519
 
                                         * update t1,t2 set t1.id=3, t2.id=3 where t1.id=2 and t2.id = t1.id;
4520
 
                                         * 
4521
 
                                         * --But this statement returns an error and does not call ha_autocommit_or_rollback():
4522
 
                                         * update t1,t2 set t1.id=1, t2.id=1 where t1.id=3 and t2.id = t1.id;
4523
 
                                         * 
4524
 
                                         * The result is, I cannot rely on ha_autocommit_or_rollback() being called :(
4525
 
                                         * So I have to abort myself here...
4526
 
                                         */
4527
 
                                        if (pb_open_tab)
4528
 
                                                pb_open_tab->ot_table->tab_locks.xt_make_lock_permanent(pb_open_tab, &self->st_lock_list);
4529
 
 
4530
 
                                        if (self->st_abort_trans) {
4531
 
                                                XT_PRINT0(self, "xt_xn_rollback in unlock\n");
4532
 
                                                if (!xt_xn_rollback(self))
4533
 
                                                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4534
 
                                        }
4535
 
                                        else {
4536
 
                                                XT_PRINT0(self, "xt_xn_commit in unlock\n");
4537
 
                                                if (!xt_xn_commit(self))
4538
 
                                                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4539
 
                                        }
4540
 
                                }
4541
 
                        }
4542
 
 
4543
 
                        /* If the previous statement was "for update", then set the visibilty
4544
 
                         * so that non- for update SELECTs will see what the for update select
4545
 
                         * (or update statement) just saw.
4546
 
                         */
4547
 
                        if (pb_open_tab) {
4548
 
                                if (pb_open_tab->ot_for_update) {
4549
 
                                        self->st_visible_time = self->st_database->db_xn_end_time;
4550
 
                                        pb_open_tab->ot_for_update = 0;
4551
 
                                }
4552
 
 
4553
 
                                if (pb_share->sh_recalc_selectivity) {
4554
 
#ifdef XT_ROW_COUNT_CORRECTED
4555
 
                                        /* {CORRECTED-ROW-COUNT} */
4556
 
                                        if ((pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) >= 200)
4557
 
#else
4558
 
                                        /* {FREE-ROWS-BAD} */
4559
 
                                        if ((pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) >= 200)
4560
 
#endif
4561
 
                                        {
4562
 
                                                /* [**] */
4563
 
                                                pb_share->sh_recalc_selectivity = FALSE;
4564
 
                                                xt_ind_set_index_selectivity(pb_open_tab, self);
4565
 
#ifdef XT_ROW_COUNT_CORRECTED
4566
 
                                                /* {CORRECTED-ROW-COUNT} */
4567
 
                                                pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 - pb_share->sh_table->tab_row_fnum) < 150;
4568
 
#else
4569
 
                                                /* {FREE-ROWS-BAD} */
4570
 
                                                pb_share->sh_recalc_selectivity = (pb_share->sh_table->tab_row_eof_id - 1 /* - pb_share->sh_table->tab_row_fnum */) < 150;
4571
 
#endif
4572
 
                                        }
4573
 
                                }
4574
 
                        }
4575
 
 
4576
 
                        if (self->st_stat_modify)
4577
 
                                self->st_statistics.st_stat_write++;
4578
 
                        else
4579
 
                                self->st_statistics.st_stat_read++;
4580
 
                        self->st_stat_modify = FALSE;
4581
 
                        self->st_import_stat = XT_IMP_NO_IMPORT;
4582
 
                }
4583
 
 
4584
 
                if (pb_table_locked) {
4585
 
                        pb_table_locked--;
4586
 
                        if (!pb_table_locked)
4587
 
                                ha_release_exclusive_use(self, pb_share);
4588
 
                }
4589
 
 
4590
 
                /* No longer in use: */
4591
 
                pb_ex_in_use = 0;
4592
 
                /* Someone may be waiting for me to complete: */
4593
 
                if (pb_share->sh_table_lock)
4594
 
                        xt_broadcast_cond_ns((xt_cond_type *) pb_share->sh_ex_cond);
4595
 
        }
4596
 
        else {
4597
 
                XT_PRINT2(self, "ha_pbxt::EXTERNAL_LOCK (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, lock_type);
4598
 
                
4599
 
                if (pb_lock_table) {
4600
 
                        pb_ex_in_use = 1;
4601
 
                        try_(a) {
4602
 
                                if (!pb_table_locked)
4603
 
                                        ha_aquire_exclusive_use(self, pb_share, this);
4604
 
                                pb_table_locked++;
4605
 
 
4606
 
                                ha_close_open_tables(self, pb_share, this);
4607
 
 
4608
 
                                if (!pb_share->sh_table) {
4609
 
                                        xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4610
 
 
4611
 
                                        ha_open_share(self, pb_share);
4612
 
                                }
4613
 
                        }
4614
 
                        catch_(a) {
4615
 
                                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4616
 
                                pb_ex_in_use = 0;
4617
 
                                goto complete;
4618
 
                        }
4619
 
                        cont_(a);
4620
 
 
4621
 
                        /* Occurs if you do:
4622
 
                         * truncate table t1;
4623
 
                         * truncate table t1;
4624
 
                         */
4625
 
                        if (!pb_open_tab) {
4626
 
                                if ((err = reopen())) {
4627
 
                                        pb_ex_in_use = 0;
4628
 
                                        goto complete;
4629
 
                                }
4630
 
                        }
4631
 
                }
4632
 
                else {
4633
 
                        pb_ex_in_use = 1;
4634
 
                        if (pb_share->sh_table_lock && !pb_table_locked) {
4635
 
                                /* If some thread has an exclusive lock, then
4636
 
                                 * we wait for the lock to be removed:
4637
 
                                 */
4638
 
                                if (!ha_wait_for_shared_use(this, pb_share)) {
4639
 
                                        err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4640
 
                                        goto complete;
4641
 
                                }
4642
 
                        }
4643
 
 
4644
 
                        if (!pb_open_tab) {
4645
 
                                if ((err = reopen())) {
4646
 
                                        pb_ex_in_use = 0;
4647
 
                                        goto complete;
4648
 
                                }
4649
 
                        }
4650
 
 
4651
 
                        /* Set the current thread for this open table: */
4652
 
                        pb_open_tab->ot_thread = self;
4653
 
 
4654
 
                        /* If this is a set, then it is in UPDATE/DELETE TABLE ...
4655
 
                         * or SELECT ... FOR UPDATE
4656
 
                         */     
4657
 
                        pb_open_tab->ot_is_modify = FALSE;
4658
 
                        if ((pb_open_tab->ot_for_update = (lock_type == F_WRLCK))) {
4659
 
                                switch ((int) thd_sql_command(thd)) {
4660
 
                                        case SQLCOM_DELETE:
4661
 
#ifndef DRIZZLED
4662
 
                                        case SQLCOM_DELETE_MULTI:
4663
 
#endif
4664
 
                                                /* turn DELETE IGNORE into normal DELETE. The IGNORE option causes problems because 
4665
 
                                                 * when a record is deleted we add an xlog record which we cannot "rollback" later
4666
 
                                                 * when we find that an FK-constraint has failed. 
4667
 
                                                 */
4668
 
                                                thd->lex->ignore = false;
4669
 
                                        case SQLCOM_UPDATE:
4670
 
#ifndef DRIZZLED
4671
 
                                        case SQLCOM_UPDATE_MULTI:
4672
 
#endif
4673
 
                                        case SQLCOM_REPLACE:
4674
 
                                        case SQLCOM_REPLACE_SELECT:
4675
 
                                        case SQLCOM_INSERT:
4676
 
                                        case SQLCOM_INSERT_SELECT:
4677
 
                                                pb_open_tab->ot_is_modify = TRUE;
4678
 
                                                self->st_stat_modify = TRUE;
4679
 
                                                break;
4680
 
                                        case SQLCOM_ALTER_TABLE:
4681
 
                                        case SQLCOM_CREATE_INDEX:
4682
 
#ifndef DRIZZLED
4683
 
                                        case SQLCOM_REPAIR:
4684
 
                                        case SQLCOM_OPTIMIZE:
4685
 
#endif
4686
 
                                        case SQLCOM_DROP_INDEX:
4687
 
                                                self->st_stat_modify = TRUE;
4688
 
                                                self->st_import_stat = XT_IMP_COPY_TABLE;
4689
 
                                                pb_import_row_count = 0;
4690
 
                                                /* Do not read FOR UPDATE!
4691
 
                                                 * this avoids taking locks on the rows that are read
4692
 
                                                 * Which leads to the assertion failure:
4693
 
                                                 * int XTRowLocks::xt_make_lock_permanent(XTOpenTable*, XTRowLockList*)(lock_xt.cc:646) item
4694
 
                                                 * after the transaction is committed in doInsertRecord.
4695
 
                                                 */
4696
 
                                                pb_open_tab->ot_for_update = FALSE;
4697
 
                                                break;
4698
 
                                        case SQLCOM_LOAD:
4699
 
                                                self->st_stat_modify = TRUE;
4700
 
                                                self->st_import_stat = XT_IMP_LOAD_TABLE;
4701
 
                                                pb_import_row_count = 0;
4702
 
                                                pb_open_tab->ot_for_update = FALSE;
4703
 
                                                break;
4704
 
                                        case SQLCOM_CREATE_TABLE:
4705
 
                                        case SQLCOM_TRUNCATE:
4706
 
                                        case SQLCOM_DROP_TABLE:
4707
 
                                                self->st_stat_modify = TRUE;
4708
 
                                                break;
4709
 
                                }
4710
 
                        }
4711
 
 
4712
 
                        if (pb_open_tab->ot_is_modify && pb_open_tab->ot_table->tab_dic.dic_disable_index) {
4713
 
                                xt_tab_set_index_error(pb_open_tab->ot_table);
4714
 
                                err = ha_log_pbxt_thread_error_for_mysql(pb_ignore_dup_key);
4715
 
                                goto complete;
4716
 
                        }
4717
 
                }
4718
 
 
4719
 
                /* Record the associated MySQL thread: */
4720
 
                pb_mysql_thd = thd;
4721
 
 
4722
 
                if (self->st_database != pb_share->sh_table->tab_db) {                          
4723
 
                        try_(b) {
4724
 
                                /* PBXT does not permit multiple databases us one statement,
4725
 
                                 * or in a single transaction!
4726
 
                                 *
4727
 
                                 * Example query:
4728
 
                                 *
4729
 
                                 * update mysqltest_1.t1, mysqltest_2.t2 set a=10,d=10;
4730
 
                                 */
4731
 
                                if (self->st_lock_count > 0)
4732
 
                                        xt_throw_xterr(XT_CONTEXT, XT_ERR_MULTIPLE_DATABASES);
4733
 
 
4734
 
                                xt_ha_open_database_of_table(self, pb_share->sh_table_path);
4735
 
                        }
4736
 
                        catch_(b) {
4737
 
                                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4738
 
                                pb_ex_in_use = 0;
4739
 
                                goto complete;
4740
 
                        }
4741
 
                        cont_(b);
4742
 
                }
4743
 
 
4744
 
                /* See {IS-UPDATE-STAT} nad {UPDATE-STACK} */
4745
 
                self->st_is_update = NULL;
4746
 
 
4747
 
                /* Auto begin a transaction (if one is not already running): */
4748
 
                if (!self->st_xact_data) {
4749
 
                        /* Transaction mode numbers must be identical! */
4750
 
                        (void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
4751
 
                        (void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
4752
 
 
4753
 
                        thd_init_xact(thd, self, true);
4754
 
                        
4755
 
                        if (!xt_xn_begin(self)) {
4756
 
                                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
4757
 
                                pb_ex_in_use = 0;
4758
 
                                goto complete;
4759
 
                        }
4760
 
                        /*
4761
 
                         * {START-TRANS} GOTCHA: trans_register_ha() is not mentioned in the documentation.
4762
 
                         * It must be called to inform MySQL that we have a transaction (see start_stmt).
4763
 
                         *
4764
 
                         * Here are some tests that confirm whether things are done correctly:
4765
 
                         *
4766
 
                         * drop table if exists t1, t2;
4767
 
                         * create table t1 (c1 int);
4768
 
                         * insert t1 values (1);
4769
 
                         * select * from t1;
4770
 
                         * rename table t1 to t2;
4771
 
                         *
4772
 
                         * rename will generate an error if MySQL thinks a transaction is
4773
 
                         * still running.
4774
 
                         *
4775
 
                         * create table t1 (a text character set utf8, b text character set latin1);
4776
 
                         * insert t1 values (0x4F736E616272C3BC636B, 0x4BF66C6E);
4777
 
                         * select * from t1;
4778
 
                         * --exec $MYSQL_DUMP --tab=$MYSQLTEST_VARDIR/tmp/ test
4779
 
                         * --exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/t1.sql
4780
 
                         * --exec $MYSQL_IMPORT test $MYSQLTEST_VARDIR/tmp/t1.txt
4781
 
                         * select * from t1;
4782
 
                         *
4783
 
                         * This test forces a begin transaction in start_stmt()
4784
 
                         *
4785
 
                         * drop tables if exists t1;
4786
 
                         * create table t1 (c1 int);
4787
 
                         * lock tables t1 write;
4788
 
                         * insert t1 values (1);
4789
 
                         * insert t1 values (2);
4790
 
                         * unlock tables;
4791
 
                         *
4792
 
                         * The second select will return an empty result of the
4793
 
                         * MySQL is not informed that a transaction is running (auto-commit 
4794
 
                         * in external_lock comes too late)!
4795
 
                         *
4796
 
                         */
4797
 
#ifndef DRIZZLED
4798
 
                        if (!self->st_auto_commit) {
4799
 
                                trans_register_ha(thd, TRUE, pbxt_hton);
4800
 
                                XT_PRINT0(self, "CONN START XACT - ha_pbxt::external_lock --> trans_register_ha\n");
4801
 
                        }
4802
 
#endif
4803
 
                }
4804
 
 
4805
 
                /* Start a statment transaction: */
4806
 
                /* {START-STAT-HACK} The problem that ha_commit_trans() is not
4807
 
                 * called by MySQL seems to be fixed (tests confirm this).
4808
 
                 * Here is the previous comment when this code was execute 
4809
 
                 * here {START-STAT-HACK}
4810
 
                 *
4811
 
                 * GOTCHA: I have a huge problem with the transaction statement.
4812
 
                 * It is not ALWAYS committed (I mean ha_commit_trans() is
4813
 
                 * not always called - for example in SELECT).
4814
 
                 *
4815
 
                 * If I call trans_register_ha() but ha_commit_trans() is not called
4816
 
                 * then MySQL thinks a transaction is still running (while
4817
 
                 * I have committed the auto-transaction in ha_pbxt::external_lock()).
4818
 
                 *
4819
 
                 * This causes all kinds of problems, like transactions
4820
 
                 * are killed when they should not be.
4821
 
                 *
4822
 
                 * To prevent this, I only inform MySQL that a transaction
4823
 
                 * has beens started when an update is performed. I have determined that
4824
 
                 * ha_commit_trans() is only guarenteed to be called if an update is done.
4825
 
                 * --------
4826
 
                 *
4827
 
                 * So, this is the correct place to start a statement transaction.
4828
 
                 *
4829
 
                 * Note: if trans_register_ha() is not called before insertRecord(), then 
4830
 
                 * PBXT is not registered correctly as a modification transaction.
4831
 
                 * (mark_trx_read_write call in insertRecord).
4832
 
                 * This leads to 2-phase commit not being called as it should when
4833
 
                 * binary logging is enabled.
4834
 
                 */
4835
 
#ifndef DRIZZLED
4836
 
                if (!pb_open_tab->ot_thread->st_stat_trans) {
4837
 
                        trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
4838
 
                        XT_PRINT0(pb_open_tab->ot_thread, "STAT START - ha_pbxt::external_lock --> trans_register_ha\n");
4839
 
                        pb_open_tab->ot_thread->st_stat_trans = TRUE;
4840
 
                }
4841
 
#endif
4842
 
                if (lock_type == F_WRLCK || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
4843
 
                        self->st_visible_time = self->st_database->db_xn_end_time;
4844
 
 
4845
 
#ifdef TRACE_STATEMENTS
4846
 
                if (self->st_lock_count == 0)
4847
 
                        STAT_TRACE(self, *thd_query(thd));
4848
 
#endif
4849
 
                self->st_lock_count++;
4850
 
        }
4851
 
 
4852
 
        complete:
4853
 
        return err;
4854
 
}
4855
 
 
4856
 
/*
4857
 
 * This function is called for each table in a statement
4858
 
 * after LOCK TABLES has been used.
4859
 
 *
4860
 
 * Currently I only use this function to set the
4861
 
 * current thread of the table handle. 
4862
 
 *
4863
 
 * GOTCHA: The prototype of start_stmt() has changed
4864
 
 * from version 4.1 to 5.1!
4865
 
 */
4866
 
int ha_pbxt::start_stmt(THD *thd, thr_lock_type lock_type)
4867
 
{
4868
 
        int                             err = 0;
4869
 
        XTThreadPtr             self;
4870
 
 
4871
 
        ASSERT_NS(pb_ex_in_use);
4872
 
 
4873
 
        if (!(self = ha_set_current_thread(thd, &err)))
4874
 
                return xt_ha_pbxt_to_mysql_error(err);
4875
 
 
4876
 
        XT_PRINT2(self, "ha_pbxt::start_stmt (%s) lock_type=%d\n", pb_share->sh_table_path->ps_path, (int) lock_type);
4877
 
 
4878
 
        if (!pb_open_tab) {
4879
 
                if ((err = reopen()))
4880
 
                        goto complete;
4881
 
        }
4882
 
 
4883
 
        ASSERT_NS(pb_open_tab->ot_thread == self);
4884
 
        ASSERT_NS(thd == pb_mysql_thd);
4885
 
        ASSERT_NS(self->st_database == pb_open_tab->ot_table->tab_db);
4886
 
 
4887
 
        if (self->st_stat_ended) {
4888
 
                self->st_stat_ended = FALSE;
4889
 
                self->st_stat_trans = FALSE;
4890
 
 
4891
 
#ifdef XT_IMPLEMENT_NO_ACTION
4892
 
                if (self->st_restrict_list.bl_count) {
4893
 
                        if (!xt_tab_restrict_rows(&self->st_restrict_list, self)) {
4894
 
                                err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4895
 
                        }
4896
 
                }
4897
 
#endif
4898
 
 
4899
 
                /* This section handles "auto-commit"... */
4900
 
                if (self->st_xact_data && self->st_auto_commit && self->st_table_trans) {
4901
 
                        if (self->st_abort_trans) {
4902
 
                                XT_PRINT0(self, "xt_xn_rollback in start_stmt\n");
4903
 
                                if (!xt_xn_rollback(self))
4904
 
                                        err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4905
 
                        }
4906
 
                        else {
4907
 
                                XT_PRINT0(self, "xt_xn_commit in start_stmt\n");
4908
 
                                if (!xt_xn_commit(self))
4909
 
                                        err = xt_ha_pbxt_thread_error_for_mysql(pb_mysql_thd, self, pb_ignore_dup_key);
4910
 
                        }
4911
 
                }
4912
 
 
4913
 
                if (self->st_stat_modify)
4914
 
                        self->st_statistics.st_stat_write++;
4915
 
                else
4916
 
                        self->st_statistics.st_stat_read++;
4917
 
                self->st_stat_modify = FALSE;
4918
 
                self->st_import_stat = XT_IMP_NO_IMPORT;
4919
 
 
4920
 
                /* If the previous statement was "for update", then set the visibilty
4921
 
                 * so that non- for update SELECTs will see what the for update select
4922
 
                 * (or update statement) just saw.
4923
 
                 */
4924
 
                if (pb_open_tab->ot_for_update)
4925
 
                        self->st_visible_time = self->st_database->db_xn_end_time;
4926
 
        }
4927
 
 
4928
 
        pb_open_tab->ot_for_update =
4929
 
                (lock_type != TL_READ && 
4930
 
                 lock_type != TL_READ_WITH_SHARED_LOCKS &&
4931
 
#ifndef DRIZZLED
4932
 
                 lock_type != TL_READ_HIGH_PRIORITY && 
4933
 
#endif
4934
 
                 lock_type != TL_READ_NO_INSERT);
4935
 
        pb_open_tab->ot_is_modify = FALSE;
4936
 
        if (pb_open_tab->ot_for_update) {
4937
 
                switch ((int) thd_sql_command(thd)) {
4938
 
                        case SQLCOM_UPDATE:
4939
 
                        case SQLCOM_DELETE:
4940
 
#ifndef DRIZZLED
4941
 
                        case SQLCOM_UPDATE_MULTI:
4942
 
                        case SQLCOM_DELETE_MULTI:
4943
 
#endif
4944
 
                        case SQLCOM_REPLACE:
4945
 
                        case SQLCOM_REPLACE_SELECT:
4946
 
                        case SQLCOM_INSERT:
4947
 
                        case SQLCOM_INSERT_SELECT:
4948
 
                                pb_open_tab->ot_is_modify = TRUE;
4949
 
                                self->st_stat_modify = TRUE;
4950
 
                                break;
4951
 
                        case SQLCOM_CREATE_TABLE:
4952
 
                        case SQLCOM_CREATE_INDEX:
4953
 
                        case SQLCOM_ALTER_TABLE:
4954
 
                        case SQLCOM_TRUNCATE:
4955
 
                        case SQLCOM_DROP_TABLE:
4956
 
                        case SQLCOM_DROP_INDEX:
4957
 
                        case SQLCOM_LOAD:
4958
 
#ifndef DRIZZLED
4959
 
                        case SQLCOM_REPAIR:
4960
 
                        case SQLCOM_OPTIMIZE:
4961
 
#endif
4962
 
                                self->st_stat_modify = TRUE;
4963
 
                                break;
4964
 
                }
4965
 
        }
4966
 
 
4967
 
        /* {IS-UPDATE-STAT} This is required at this level!
4968
 
         * No matter how often it is called, it is still the start of a
4969
 
         * statement. We need to make sure statements that are NOT mistaken
4970
 
         * for different type of statement.
4971
 
         *
4972
 
         * Here is an example:
4973
 
         * select * from t1 where data = getcount("bar")
4974
 
         *
4975
 
         * If the procedure getcount() addresses another table.
4976
 
         * then open and close of the statements in getcount()
4977
 
         * are nested within an open close of the select t1
4978
 
         * statement.
4979
 
         */
4980
 
        /* {UPDATE-STACK}
4981
 
         * Add to this I add the following:
4982
 
         * A trigger in the middle of an update also causes nested
4983
 
         * statements. If I reset st_is_update, then then
4984
 
         * when the trigger returns the system thinks we
4985
 
         * are in a different update statement, and may
4986
 
         * update the same row again.
4987
 
         */
4988
 
        if (self->st_is_update == pb_open_tab) {
4989
 
                /* Pop the update stack: */
4990
 
                XTOpenTablePtr curr = pb_open_tab->ot_thread->st_is_update;
4991
 
 
4992
 
                pb_open_tab->ot_thread->st_is_update = curr->ot_prev_update;
4993
 
                curr->ot_prev_update = NULL;
4994
 
        }
4995
 
 
4996
 
        /* See comment {START-TRANS} */
4997
 
        if (!self->st_xact_data) {
4998
 
 
4999
 
                thd_init_xact(thd, self, false);
5000
 
 
5001
 
                if (!xt_xn_begin(self)) {
5002
 
                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5003
 
                        goto complete;
5004
 
                }
5005
 
#ifndef DRIZZLED
5006
 
                if (!self->st_auto_commit) {
5007
 
                        trans_register_ha(thd, TRUE, pbxt_hton);
5008
 
                        XT_PRINT0(self, "START CONN XACT - ha_pbxt::start_stmt --> trans_register_ha\n");
5009
 
                }
5010
 
#endif
5011
 
        }
5012
 
 
5013
 
        /* Start a statment (see {START-STAT-HACK}): */
5014
 
#ifndef DRIZZLED
5015
 
        if (!pb_open_tab->ot_thread->st_stat_trans) {
5016
 
                trans_register_ha(pb_mysql_thd, FALSE, pbxt_hton);
5017
 
                XT_PRINT0(pb_open_tab->ot_thread, "START STAT - ha_pbxt::start_stmt --> trans_register_ha\n");
5018
 
                pb_open_tab->ot_thread->st_stat_trans = TRUE;
5019
 
        }
5020
 
#endif
5021
 
        if (pb_open_tab->ot_for_update || self->st_xact_mode < XT_XACT_REPEATABLE_READ)
5022
 
                self->st_visible_time = self->st_database->db_xn_end_time;
5023
 
 
5024
 
        pb_in_stat = TRUE;
5025
 
 
5026
 
        self->st_stat_count++;
5027
 
 
5028
 
        complete:
5029
 
        return err;
5030
 
}
5031
 
 
5032
 
/*
5033
 
 * The idea with handler::store_lock() is the following:
5034
 
 *
5035
 
 * The statement decided which locks we should need for the table
5036
 
 * for updates/deletes/inserts we get WRITE locks, for SELECT... we get
5037
 
 * read locks.
5038
 
 *
5039
 
 * Before adding the lock into the table lock handler (see thr_lock.c)
5040
 
 * mysqld calls store lock with the requested locks. Store lock can now
5041
 
 * modify a write lock to a read lock (or some other lock), ignore the
5042
 
 * lock (if we don't want to use MySQL table locks at all) or add locks
5043
 
 * for many tables (like we do when we are using a MERGE handler).
5044
 
 *
5045
 
 * When releasing locks, store_lock() are also called. In this case one
5046
 
 * usually doesn't have to do anything.
5047
 
 *
5048
 
 * In some exceptional cases MySQL may send a request for a TL_IGNORE;
5049
 
 * This means that we are requesting the same lock as last time and this
5050
 
 * should also be ignored. (This may happen when someone does a flush
5051
 
 * table when we have opened a part of the tables, in which case mysqld
5052
 
 * closes and reopens the tables and tries to get the same locks at last
5053
 
 * time). In the future we will probably try to remove this.
5054
 
 *
5055
 
 * Called from lock.cc by get_lock_data().
5056
 
 */
5057
 
THR_LOCK_DATA **ha_pbxt::store_lock(THD *thd, THR_LOCK_DATA **to, enum thr_lock_type lock_type)
5058
 
{
5059
 
        /*
5060
 
         * TL_READ means concurrent INSERTs are allowed. This is a problem as in this mode
5061
 
         * PBXT is not compatible with MyISAM which allows INSERTs but isolates them from
5062
 
         * current "transaction" (started by LOCK TABLES, ended by UNLOCK TABLES). PBXT 
5063
 
         * used to allow INSERTs and made them visible to the locker (on commit). 
5064
 
         * While MySQL manual doesn't state anything regarding row visibility limitations 
5065
 
         * we choose to convert local locks into normal read locks for better compatibility 
5066
 
         * with MyISAM.
5067
 
         */
5068
 
        if (lock_type == TL_READ)
5069
 
                lock_type = TL_READ_NO_INSERT;
5070
 
 
5071
 
        if (lock_type != TL_IGNORE && pb_lock.type == TL_UNLOCK) {
5072
 
                /* Set to TRUE for operations that require a table lock: */
5073
 
                switch (thd_sql_command(thd)) {
5074
 
                        case SQLCOM_TRUNCATE:
5075
 
                                /* GOTCHA:
5076
 
                                 * The problem is, if I do not do this, then
5077
 
                                 * TRUNCATE TABLE deadlocks with a normal update of the table!
5078
 
                                 * The reason is:
5079
 
                                 *
5080
 
                                 * external_lock() is called before MySQL actually locks the
5081
 
                                 * table. In external_lock(), the table is shared locked,
5082
 
                                 * by indicating that the handler is in use.
5083
 
                                 *
5084
 
                                 * Then later, in delete_all_rows(), a exclusive lock must be
5085
 
                                 * obtained. If an UPDATE or INSERT has also gained a shared
5086
 
                                 * lock in the meantime, then TRUNCATE TABLE hangs.
5087
 
                                 *
5088
 
                                 * By setting pb_lock_table we indicate that an exclusive lock
5089
 
                                 * should be gained in external_lock().
5090
 
                                 *
5091
 
                                 * This is the locking behaviour:
5092
 
                                 *
5093
 
                                 * TRUNCATE TABLE:
5094
 
                                 * XT SHARE LOCK (mysql_lock_tables calls external_lock)
5095
 
                                 * MySQL WRITE LOCK (mysql_lock_tables)
5096
 
                                 * ...
5097
 
                                 * XT EXCLUSIVE LOCK (delete_all_rows)
5098
 
                                 *
5099
 
                                 * INSERT:
5100
 
                                 * XT SHARED LOCK (mysql_lock_tables calls external_lock)
5101
 
                                 * MySQL WRITE_ALLOW_WRITE LOCK (mysql_lock_tables)
5102
 
                                 *
5103
 
                                 * If the locking for INSERT is done in the ... phase
5104
 
                                 * above, then we have a deadlock because 
5105
 
                                 * WRITE_ALLOW_WRITE conflicts with WRITE.
5106
 
                                 *
5107
 
                                 * Making TRUNCATE TABLE take a WRITE_ALLOW_WRITE LOCK, will
5108
 
                                 * not solve the problem because then 2 TRUNCATE TABLES
5109
 
                                 * can deadlock due to lock escalation.
5110
 
                                 *
5111
 
                                 * What may work is if MySQL were to lock BEFORE calling
5112
 
                                 * external_lock()!
5113
 
                                 *
5114
 
                                 * However, using this method, TRUNCATE TABLE does deadlock
5115
 
                                 * with other operations such as ALTER TABLE!
5116
 
                                 *
5117
 
                                 * This is handled with a lock timeout. Assuming 
5118
 
                                 * TRUNCATE TABLE will be mixed with DML this is the
5119
 
                                 * best solution!
5120
 
                                 */
5121
 
                                pb_lock_table = TRUE;
5122
 
                                break;
5123
 
                        default:
5124
 
                                pb_lock_table = FALSE;
5125
 
                                break;
5126
 
                }
5127
 
 
5128
 
#ifdef PBXT_HANDLER_TRACE
5129
 
                pb_lock.type = lock_type;
5130
 
#endif
5131
 
                /* GOTCHA: Before it was OK to weaken the lock after just checking
5132
 
                 * that !thd->in_lock_tables. However, when starting a procedure, MySQL
5133
 
                 * simulates a LOCK TABLES statement.
5134
 
                 *
5135
 
                 * So we need to be more specific here, and check what the actual statement
5136
 
                 * type. Before doing this I got a deadlock (undetected) on the following test.
5137
 
                 * However, now we get a failed assertion in ha_rollback_trans():
5138
 
                 * TODO: Check this with InnoDB!
5139
 
                 *
5140
 
                 * DBUG_ASSERT(0);
5141
 
                 * my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
5142
 
                 *
5143
 
                 * drop table if exists t3;
5144
 
                 * create table t3 (a smallint primary key) engine=pbxt;
5145
 
                 * insert into t3 (a) values (40);
5146
 
                 * insert into t3 (a) values (50);
5147
 
                 * 
5148
 
                 * delimiter |
5149
 
                 * 
5150
 
                 * drop function if exists t3_update|
5151
 
                 * 
5152
 
                 * create function t3_update() returns int
5153
 
                 * begin
5154
 
                 *   insert into t3 values (10);
5155
 
                 *   return 100;
5156
 
                 * end|
5157
 
                 * 
5158
 
                 * delimiter ;
5159
 
                 * 
5160
 
                 * CONN 1:
5161
 
                 * 
5162
 
                 * begin;
5163
 
                 * update t3 set a = 5 where a = 50;
5164
 
                 * 
5165
 
                 * CONN 2:
5166
 
                 * 
5167
 
                 * begin;
5168
 
                 * update t3 set a = 4 where a = 40;
5169
 
                 * 
5170
 
                 * CONN 1:
5171
 
                 * 
5172
 
                 * update t3 set a = 4 where a = 40; // Hangs waiting CONN 2.
5173
 
                 * 
5174
 
                 * CONN 2:
5175
 
                 * 
5176
 
                 * select t3_update(); // Hangs waiting for table lock.
5177
 
                 * 
5178
 
                 */
5179
 
                if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && lock_type <= TL_WRITE) && 
5180
 
#ifndef DRIZZLED
5181
 
                        !(thd_in_lock_tables(thd) && thd_sql_command(thd) == SQLCOM_LOCK_TABLES) &&
5182
 
#endif
5183
 
                        !thd_tablespace_op(thd) &&
5184
 
                        thd_sql_command(thd) != SQLCOM_TRUNCATE &&
5185
 
#ifndef DRIZZLED
5186
 
                        thd_sql_command(thd) != SQLCOM_OPTIMIZE &&
5187
 
#endif
5188
 
                        thd_sql_command(thd) != SQLCOM_CREATE_TABLE) {
5189
 
                        lock_type = TL_WRITE_ALLOW_WRITE;
5190
 
                }
5191
 
 
5192
 
                /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
5193
 
                 * MySQL would use the lock TL_READ_NO_INSERT on t2, and that
5194
 
                 * would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
5195
 
                 * to t2. Convert the lock to a normal read lock to allow
5196
 
                 * concurrent inserts to t2.
5197
 
                 * 
5198
 
                 * (This one from InnoDB)
5199
 
 
5200
 
                 * Stewart: removed SQLCOM_CALL, not sure of implications.
5201
 
                 */
5202
 
                if (lock_type == TL_READ_NO_INSERT
5203
 
#ifndef DRIZZLED
5204
 
                        && (!thd_in_lock_tables(thd)
5205
 
                         || thd_sql_command(thd) == SQLCOM_CALL
5206
 
                        )
5207
 
#endif
5208
 
                        )
5209
 
                {
5210
 
                        lock_type = TL_READ;
5211
 
                }
5212
 
 
5213
 
                XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d\n", pb_share->sh_table_path->ps_path, pb_lock.type, lock_type);
5214
 
                pb_lock.type = lock_type;
5215
 
        }
5216
 
#ifdef PBXT_HANDLER_TRACE
5217
 
        else {
5218
 
                XT_PRINT3(xt_get_self(), "store_lock (%s) %d->%d (ignore/unlock)\n", pb_share->sh_table_path->ps_path, lock_type, lock_type);
5219
 
        }
5220
 
#endif
5221
 
        *to++= &pb_lock;
5222
 
        return to;
5223
 
}
5224
 
 
5225
 
/*
5226
 
 * Used to delete a table. By the time delete_table() has been called all
5227
 
 * opened references to this table will have been closed (and your globally
5228
 
 * shared references released. The variable name will just be the name of
5229
 
 * the table. You will need to remove any files you have created at this point.
5230
 
 *
5231
 
 * Called from handler.cc by delete_table and ha_create_table(). Only used
5232
 
 * during create if the table_flag HA_DROP_BEFORE_CREATE was specified for
5233
 
 * the storage engine.
5234
 
*/
5235
 
int PBXTStorageEngine::doDropTable(Session &, const TableIdentifier& ident)
5236
 
{
5237
 
        const std::string& path = ident.getPath();
5238
 
        const char *table_path = path.c_str();
5239
 
        THD                             *thd = current_thd;
5240
 
        int                             err = 0;
5241
 
        XTThreadPtr             self = NULL;
5242
 
        XTSharePtr              share;
5243
 
 
5244
 
        STAT_TRACE(self, *thd_query(thd));
5245
 
        XT_PRINT1(self, "delete_table (%s)\n", table_path);
5246
 
 
5247
 
        if (XTSystemTableShare::isSystemTable(table_path))
5248
 
                return delete_system_table(table_path);
5249
 
 
5250
 
        if (!(self = ha_set_current_thread(thd, &err)))
5251
 
                return xt_ha_pbxt_to_mysql_error(err);
5252
 
 
5253
 
        self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5254
 
 
5255
 
        try_(a) {
5256
 
                xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5257
 
 
5258
 
                ASSERT(xt_get_self() == self);
5259
 
                try_(b) {
5260
 
                        /* NOTE: MySQL does not drop a table by first locking it!
5261
 
                         * We also cannot use pb_share because the handler used
5262
 
                         * to delete a table is not openned correctly.
5263
 
                         */
5264
 
                        share = ha_get_share(self, table_path, false);
5265
 
                        pushr_(ha_unget_share, share);
5266
 
                        ha_aquire_exclusive_use(self, share, NULL);
5267
 
                        pushr_(ha_release_exclusive_use, share);
5268
 
                        ha_close_open_tables(self, share, NULL);
5269
 
 
5270
 
                        xt_drop_table(self, (XTPathStrPtr) table_path, thd_sql_command(thd) == SQLCOM_DROP_DB);
5271
 
 
5272
 
                        freer_(); // ha_release_exclusive_use(share)
5273
 
                        freer_(); // ha_unget_share(share)
5274
 
                }
5275
 
                catch_(b) {
5276
 
                        /* In MySQL if the table does not exist, just log the error and continue. This is
5277
 
                         * needed to delete table in the case when CREATE TABLE fails and no PBXT disk
5278
 
                         * structures were created. 
5279
 
                         * Drizzle unlike MySQL iterates over all handlers and tries to delete table. It
5280
 
                         * stops after when a handler returns TRUE, so in Drizzle we need to report error.  
5281
 
                         */
5282
 
#ifndef DRIZZLED
5283
 
                        if (self->t_exception.e_xt_err == XT_ERR_TABLE_NOT_FOUND)
5284
 
                                xt_log_and_clear_exception(self);
5285
 
                        else
5286
 
#endif
5287
 
                                throw_();
5288
 
                }
5289
 
                cont_(b);
5290
 
 
5291
 
                /*
5292
 
                 * If there are no more PBXT tables in the database, we
5293
 
                 * "drop the database", which deletes all PBXT resources
5294
 
                 * in the database.
5295
 
                 */
5296
 
                /* We now only drop the pbxt system data,
5297
 
                 * when the PBXT database is dropped.
5298
 
                 */
5299
 
#ifndef XT_USE_GLOBAL_DB
5300
 
                if (!xt_table_exists(self->st_database)) {
5301
 
                        xt_ha_all_threads_close_database(self, self->st_database);
5302
 
                        xt_drop_database(self, self->st_database);
5303
 
                        xt_unuse_database(self, self);
5304
 
                        xt_ha_close_global_database(self);
5305
 
                }
5306
 
#endif
5307
 
        }
5308
 
        catch_(a) {
5309
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5310
 
#ifdef DRIZZLED
5311
 
                if (err == HA_ERR_NO_SUCH_TABLE)
5312
 
                        err = ENOENT;
5313
 
#endif
5314
 
        }
5315
 
        cont_(a);
5316
 
        
5317
 
#ifdef PBMS_ENABLED
5318
 
        /* Call pbms_delete_table_with_blobs() last because it cannot be undone. */
5319
 
        if (!err) {
5320
 
                PBMSResultRec result;
5321
 
 
5322
 
                if (pbms_delete_table_with_blobs(table_path, &result)) {
5323
 
                        xt_logf(XT_NT_WARNING, "pbms_delete_table_with_blobs() Error: %s", result.mr_message);
5324
 
                }
5325
 
                
5326
 
                pbms_completed(NULL, true);
5327
 
        }
5328
 
#endif
5329
 
 
5330
 
        std::string path2(ident.getPath());
5331
 
        path2.append(DEFAULT_FILE_EXTENSION);
5332
 
        (void)internal::my_delete(path2.c_str(), MYF(0));
5333
 
 
5334
 
        return err;
5335
 
}
5336
 
 
5337
 
#ifdef DRIZZLED
5338
 
int PBXTStorageEngine::delete_system_table(const char *table_path)
5339
 
#else
5340
 
int ha_pbxt::delete_system_table(const char *table_path)
5341
 
#endif
5342
 
{
5343
 
        THD                             *thd = current_thd;
5344
 
        XTExceptionRec  e;
5345
 
        int                             err = 0;
5346
 
        XTThreadPtr             self;
5347
 
 
5348
 
        if (!(self = xt_ha_set_current_thread(thd, &e)))
5349
 
                return xt_ha_pbxt_to_mysql_error(e.e_xt_err);
5350
 
 
5351
 
        try_(a) {
5352
 
                xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5353
 
 
5354
 
                if (xt_table_exists(self->st_database))
5355
 
                        xt_throw_xterr(XT_CONTEXT, XT_ERR_PBXT_TABLE_EXISTS);
5356
 
 
5357
 
                XTSystemTableShare::setSystemTableDeleted(table_path);
5358
 
 
5359
 
                if (!XTSystemTableShare::doesSystemTableExist()) {
5360
 
                        xt_ha_all_threads_close_database(self, self->st_database);
5361
 
                        xt_drop_database(self, self->st_database);
5362
 
                        xt_unuse_database(self, self);
5363
 
                        xt_ha_close_global_database(self);
5364
 
                }
5365
 
        }
5366
 
        catch_(a) {
5367
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5368
 
        }
5369
 
        cont_(a);
5370
 
 
5371
 
        return err;
5372
 
}
5373
 
 
5374
 
/*
5375
 
 * Renames a table from one name to another from alter table call.
5376
 
 * This function can be used to move a table from one database to
5377
 
 * another.
5378
 
 */
5379
 
int PBXTStorageEngine::doRenameTable(Session&,
5380
 
                                     const TableIdentifier& from_ident,
5381
 
                                     const TableIdentifier& to_ident)
5382
 
{
5383
 
        const char *from = from_ident.getPath().c_str();
5384
 
        const char *to = to_ident.getPath().c_str();
5385
 
 
5386
 
        if (strcmp(from, to) == 0)
5387
 
                return 0;
5388
 
 
5389
 
        THD                             *thd = current_thd;
5390
 
        int                             err = 0;
5391
 
        XTThreadPtr             self;
5392
 
        XTSharePtr              share;
5393
 
        XTDatabaseHPtr  to_db;
5394
 
 
5395
 
        if (XTSystemTableShare::isSystemTable(from))
5396
 
                return rename_system_table(from, to);
5397
 
 
5398
 
        if (!(self = ha_set_current_thread(thd, &err)))
5399
 
                return xt_ha_pbxt_to_mysql_error(err);
5400
 
 
5401
 
        XT_PRINT2(self, "rename_table (%s -> %s)\n", from, to);
5402
 
 
5403
 
#ifdef PBMS_ENABLED
5404
 
        PBMSResultRec result;
5405
 
 
5406
 
        err = pbms_rename_table_with_blobs(from, to, &result);
5407
 
        if (err) {
5408
 
                xt_logf(XT_NT_ERROR, "pbms_rename_table_with_blobs() Error: %s", result.mr_message);
5409
 
                return err;
5410
 
        }
5411
 
#endif
5412
 
 
5413
 
        try_(a) {
5414
 
                xt_ha_open_database_of_table(self, (XTPathStrPtr) to);
5415
 
                to_db = self->st_database;
5416
 
 
5417
 
                xt_ha_open_database_of_table(self, (XTPathStrPtr) from);
5418
 
 
5419
 
                if (self->st_database != to_db)
5420
 
                        xt_throw_xterr(XT_CONTEXT, XT_ERR_CANNOT_CHANGE_DB);
5421
 
 
5422
 
                /*
5423
 
                 * NOTE: MySQL does not lock before calling rename table!
5424
 
                 *
5425
 
                 * We cannot use pb_share because rename_table() is
5426
 
                 * called without correctly initializing
5427
 
                 * the handler!
5428
 
                 */
5429
 
                share = ha_get_share(self, from, true);
5430
 
                pushr_(ha_unget_share, share);
5431
 
                ha_aquire_exclusive_use(self, share, NULL);
5432
 
                pushr_(ha_release_exclusive_use, share);
5433
 
                ha_close_open_tables(self, share, NULL);
5434
 
 
5435
 
                self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5436
 
                xt_rename_table(self, (XTPathStrPtr) from, (XTPathStrPtr) to);
5437
 
 
5438
 
                freer_(); // ha_release_exclusive_use(share)
5439
 
                freer_(); // ha_unget_share(share)
5440
 
 
5441
 
                /*
5442
 
                 * If there are no more PBXT tables in the database, we
5443
 
                 * "drop the database", which deletes all PBXT resources
5444
 
                 * in the database.
5445
 
                 */
5446
 
#ifdef XT_USE_GLOBAL_DB
5447
 
                /* We now only drop the pbxt system data,
5448
 
                 * when the PBXT database is dropped.
5449
 
                 */
5450
 
                if (!xt_table_exists(self->st_database)) {
5451
 
                        xt_ha_all_threads_close_database(self, self->st_database);
5452
 
                        xt_drop_database(self, self->st_database);
5453
 
                }
5454
 
#endif
5455
 
        }
5456
 
        catch_(a) {
5457
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5458
 
        }
5459
 
        cont_(a);
5460
 
        
5461
 
#ifdef PBMS_ENABLED
5462
 
        pbms_completed(NULL, (err == 0));
5463
 
#endif
5464
 
 
5465
 
        if (err == 0)
5466
 
                plugin::StorageEngine::renameDefinitionFromPath(to_ident, from_ident);
5467
 
 
5468
 
        XT_RETURN(err);
5469
 
}
5470
 
 
5471
 
#ifdef DRIZZLED
5472
 
int PBXTStorageEngine::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5473
 
#else
5474
 
int ha_pbxt::rename_system_table(const char *XT_UNUSED(from), const char *XT_UNUSED(to))
5475
 
#endif
5476
 
{
5477
 
        return ER_NOT_SUPPORTED_YET;
5478
 
}
5479
 
 
5480
 
uint ha_pbxt::max_supported_key_length() const
5481
 
{
5482
 
        return XT_INDEX_MAX_KEY_SIZE;
5483
 
}
5484
 
 
5485
 
uint ha_pbxt::max_supported_key_part_length() const
5486
 
{
5487
 
        /* There is a little overhead in order to fit! */
5488
 
        return XT_INDEX_MAX_KEY_SIZE-4;
5489
 
}
5490
 
 
5491
 
/*
5492
 
 * Called in test_quick_select to determine if indexes should be used.
5493
 
 *
5494
 
 * As far as I can tell, time is measured in "disk reads". So the
5495
 
 * calculation below means the system reads about 20 rows per read.
5496
 
 *
5497
 
 * For example a sequence scan uses a read buffer which reads a
5498
 
 * number of rows at once, or a sequential scan can make use
5499
 
 * of the cache (so it need to read less).
5500
 
 */
5501
 
double ha_pbxt::scan_time()
5502
 
{
5503
 
        double result = (double) (stats.records + stats.deleted) / 38.0 + 2;
5504
 
        return result;
5505
 
}
5506
 
 
5507
 
/*
5508
 
 * The next method will never be called if you do not implement indexes.
5509
 
 */
5510
 
double ha_pbxt::read_time(uint XT_UNUSED(index), uint ranges, ha_rows rows)
5511
 
{
5512
 
        double result = rows2double(ranges+rows);
5513
 
        return result;
5514
 
}
5515
 
 
5516
 
/*
5517
 
 * Given a starting key, and an ending key estimate the number of rows that
5518
 
 * will exist between the two. end_key may be empty which in case determine
5519
 
 * if start_key matches any rows.
5520
 
 * 
5521
 
 * Called from opt_range.cc by check_quick_keys().
5522
 
 *
5523
 
 */
5524
 
ha_rows ha_pbxt::records_in_range(uint inx, key_range *min_key, key_range *max_key)
5525
 
{
5526
 
        XTIndexPtr              ind;
5527
 
        key_part_map    keypart_map;
5528
 
        u_int                   segement = 0;
5529
 
        ha_rows                 result;
5530
 
 
5531
 
        if (min_key)
5532
 
                keypart_map = min_key->keypart_map;
5533
 
        else if (max_key)
5534
 
                keypart_map = max_key->keypart_map;
5535
 
        else
5536
 
                return 1;
5537
 
        ind = (XTIndexPtr) pb_share->sh_dic_keys[inx];
5538
 
        
5539
 
        while (keypart_map & 1) {
5540
 
                segement++;
5541
 
                keypart_map = keypart_map >> 1;
5542
 
        }
5543
 
 
5544
 
        if (segement < 1 || segement > ind->mi_seg_count)
5545
 
                result = 1;
5546
 
        else
5547
 
                result = ind->mi_seg[segement-1].is_recs_in_range;
5548
 
#ifdef XT_PRINT_INDEX_OPT
5549
 
        printf("records_in_range %s index %d cols req=%d/%d read_bits=%X write_bits=%X index_bits=%X --> %d\n", pb_open_tab->ot_table->tab_name->ps_path, (int) inx, segement, ind->mi_seg_count, (int) *table->read_set->bitmap, (int) *table->write_set->bitmap, (int) *ind->mi_col_map.bitmap, (int) result);
5550
 
#endif
5551
 
        return result;
5552
 
}
5553
 
 
5554
 
/*
5555
 
 * create() is called to create a table/database. The variable name will have the name
5556
 
 * of the table. When create() is called you do not need to worry about opening
5557
 
 * the table. Also, the FRM file will have already been created so adjusting
5558
 
 * create_info will not do you any good. You can overwrite the frm file at this
5559
 
 * point if you wish to change the table definition, but there are no methods
5560
 
 * currently provided for doing that.
5561
 
 
5562
 
 * Called from handle.cc by ha_create_table().
5563
 
*/
5564
 
int PBXTStorageEngine::doCreateTable(Session&, 
5565
 
                                     Table& table_arg, 
5566
 
                                     const TableIdentifier& ident,
5567
 
                                     drizzled::message::Table& proto)
5568
 
{
5569
 
        const std::string& path = ident.getPath();
5570
 
        const char *table_path = path.c_str();
5571
 
        THD                             *thd = current_thd;
5572
 
        int                             err = 0;
5573
 
        XTThreadPtr             self;
5574
 
        XTDDTable               *tab_def = NULL;
5575
 
        XTDictionaryRec dic, source_dic;
5576
 
 
5577
 
        if ((strcmp(table_path, "./pbxt/location") == 0) || 
5578
 
                (strcmp(table_path, "./pbxt/tables") == 0) ||
5579
 
                (strcmp(table_path, "./pbxt/statistics") == 0))
5580
 
                return 0;
5581
 
 
5582
 
        if ((strcmp(table_path, "./pbxt/location") == 0) || (strcmp(table_path, "./pbxt/statistics") == 0))
5583
 
                return 0;
5584
 
 
5585
 
        memset(&dic, 0, sizeof(dic));
5586
 
        memset(&source_dic, 0, sizeof(source_dic));
5587
 
 
5588
 
        if (!(self = ha_set_current_thread(thd, &err)))
5589
 
                return xt_ha_pbxt_to_mysql_error(err);
5590
 
        XT_PRINT2(self, "create (%s) %s\n", table_path, (proto.type() == message::Table::TEMPORARY) ? "temporary" : "");
5591
 
        switch(ident.getType()) {
5592
 
                case message::Table::STANDARD:
5593
 
                        dic.dic_table_type = XT_TABLE_TYPE_STANDARD;
5594
 
                        break;
5595
 
 
5596
 
                case message::Table::TEMPORARY:
5597
 
                        dic.dic_table_type = XT_TABLE_TYPE_TEMPORARY;
5598
 
                        break;
5599
 
 
5600
 
                case message::Table::INTERNAL:
5601
 
                        dic.dic_table_type = XT_TABLE_TYPE_INTERNAL;
5602
 
                        break;
5603
 
 
5604
 
                case message::Table::FUNCTION:
5605
 
                        dic.dic_table_type = XT_TABLE_TYPE_FUNCTION;
5606
 
                        break;
5607
 
        }
5608
 
 
5609
 
        STAT_TRACE(self, *thd_query(thd));
5610
 
 
5611
 
        try_(a) {
5612
 
                xt_ha_open_database_of_table(self, (XTPathStrPtr) table_path);
5613
 
 
5614
 
                for (uint i=0; i<table_arg.getShare()->keys; i++) {
5615
 
                        if (table_arg.key_info[i].key_length > XT_INDEX_MAX_KEY_SIZE)
5616
 
                                xt_throw_sulxterr(XT_CONTEXT, XT_ERR_KEY_TOO_LARGE, table_arg.key_info[i].name, (u_long) XT_INDEX_MAX_KEY_SIZE);
5617
 
                }
5618
 
 
5619
 
                /* ($) auto_increment_value will be zero if 
5620
 
                 * AUTO_INCREMENT is not used. Otherwise
5621
 
                 * Query was ALTER TABLE ... AUTO_INCREMENT = x; or 
5622
 
                 * CREATE TABLE ... AUTO_INCREMENT = x;
5623
 
                 */
5624
 
#ifdef XT_USE_DEFAULT_MEMORY_TABS
5625
 
                if (create_info->storage_media == HA_SM_DEFAULT)
5626
 
                        source_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5627
 
#endif
5628
 
 
5629
 
                StorageEngine::writeDefinitionFromPath(ident, proto);
5630
 
 
5631
 
                Session::QueryString query_string(thd->getQueryString());
5632
 
                tab_def = xt_ri_create_table(self, true, (XTPathStrPtr) table_path, const_cast<char *>(query_string->c_str()), myxt_create_table_from_table(self, table_arg.getMutableShare()), &source_dic);
5633
 
                tab_def->checkForeignKeys(self, proto.type() == message::Table::TEMPORARY);
5634
 
 
5635
 
                dic.dic_table = tab_def;
5636
 
                dic.dic_my_table = table_arg.getMutableShare();
5637
 
                dic.dic_tab_flags = source_dic.dic_tab_flags;
5638
 
                //if (create_info.storage_media == HA_SM_MEMORY)
5639
 
                //      dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
5640
 
                if (proto.type() == message::Table::TEMPORARY)
5641
 
                        dic.dic_tab_flags |= XT_TF_REAL_TEMP_TABLE;
5642
 
                if (myxt_temp_table_name(table_path))
5643
 
                        dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
5644
 
 
5645
 
                dic.dic_min_auto_inc = (xtWord8) proto.options().auto_increment_value(); /* ($) */
5646
 
                dic.dic_def_ave_row_size =  proto.options().avg_row_length();
5647
 
                myxt_setup_dictionary(self, &dic);
5648
 
 
5649
 
                /*
5650
 
                 * We used to ignore the value of foreign_key_checks flag and allowed creation
5651
 
                 * of tables with "hanging" references. Now we validate FKs if foreign_key_checks != 0
5652
 
                 */
5653
 
                self->st_ignore_fkeys = (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5654
 
 
5655
 
                /*
5656
 
                 * Previously I set delete_if_exists=TRUE because
5657
 
                 * CREATE TABLE was being used to TRUNCATE.
5658
 
                 * This was due to the flag HTON_CAN_RECREATE.
5659
 
                 * Now I could set delete_if_exists=FALSE, but
5660
 
                 * leaving it TRUE should not cause any problems.
5661
 
                 */
5662
 
                xt_create_table(self, (XTPathStrPtr) table_path, &dic);
5663
 
        }
5664
 
        catch_(a) {
5665
 
                if (tab_def)
5666
 
                        tab_def->finalize(self);
5667
 
                dic.dic_table = NULL;
5668
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5669
 
        }
5670
 
        cont_(a);
5671
 
 
5672
 
        /* Free the dictionary, but not 'table_arg'! */
5673
 
        dic.dic_my_table = NULL;
5674
 
        myxt_free_dictionary(self, &dic);
5675
 
 
5676
 
        XT_RETURN(err);
5677
 
}
5678
 
 
5679
 
#ifdef DRIZZLED
5680
 
int PBXTStorageEngine::doStartTransaction(Session *thd, start_transaction_option_t XT_UNUSED(options))
5681
 
{
5682
 
        int err = 0;
5683
 
        XTThreadPtr self = ha_set_current_thread(thd, &err);    
5684
 
 
5685
 
        XT_PRINT0(self, "PBXTStorageEngine::doStartTransaction\n");
5686
 
 
5687
 
        /* Transaction mode numbers must be identical! */
5688
 
        (void) ASSERT_NS(ISO_READ_UNCOMMITTED == XT_XACT_UNCOMMITTED_READ);
5689
 
        (void) ASSERT_NS(ISO_SERIALIZABLE == XT_XACT_SERIALIZABLE);
5690
 
 
5691
 
        self->st_xact_mode = thd_tx_isolation(thd) <= ISO_READ_COMMITTED ? XT_XACT_COMMITTED_READ : XT_XACT_REPEATABLE_READ;
5692
 
        self->st_ignore_fkeys = (thd_test_options(thd,OPTION_NO_FOREIGN_KEY_CHECKS)) != 0;
5693
 
        self->st_auto_commit = (thd_test_options(thd, (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) == 0;
5694
 
        self->st_table_trans = FALSE;
5695
 
        self->st_abort_trans = FALSE;
5696
 
        self->st_stat_ended = FALSE;
5697
 
        self->st_stat_trans = FALSE;
5698
 
        xt_xres_wait_for_recovery(self, XT_RECOVER_SWEPT);
5699
 
 
5700
 
        if (!self->st_database)
5701
 
                xt_ha_open_database_of_table(self, NULL);
5702
 
 
5703
 
        assert(!self->st_xact_data); // Check we're not called twice
5704
 
        if (!xt_xn_begin(self)) {
5705
 
          err = xt_ha_pbxt_thread_error_for_mysql(thd, self, /*pb_ignore_dup_key*/false);
5706
 
        }
5707
 
 
5708
 
        return err;
5709
 
}
5710
 
 
5711
 
int PBXTStorageEngine::doSetSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&)
5712
 
5713
 
        return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false); 
5714
 
}
5715
 
        
5716
 
int PBXTStorageEngine::doRollbackToSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&) 
5717
 
{
5718
 
        return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5719
 
}
5720
 
 
5721
 
int PBXTStorageEngine::doReleaseSavepoint(drizzled::Session* thd, drizzled::NamedSavepoint&) 
5722
 
{
5723
 
        return xt_ha_pbxt_thread_error_for_mysql(thd, xt_ha_thd_to_self(thd), false);
5724
 
}
5725
 
 
5726
 
int PBXTStorageEngine::doCommit(drizzled::Session* thd, bool real_commit)
5727
 
{
5728
 
        int err = 0;
5729
 
        XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5730
 
 
5731
 
        XT_PRINT1(self, "PBXTStorageEngine::doCommit(real_commit = %s)\n", real_commit ? "true" : "false");
5732
 
 
5733
 
        if (real_commit && self) {
5734
 
                if (!xt_xn_commit(self))
5735
 
                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5736
 
        }
5737
 
 
5738
 
        return err;
5739
 
}
5740
 
 
5741
 
int PBXTStorageEngine::doRollback(drizzled::Session* thd, bool real_commit)
5742
 
{
5743
 
        int err = 0;
5744
 
        XTThreadPtr self = (XTThreadPtr) *thd->getEngineData(pbxt_hton);
5745
 
 
5746
 
        XT_PRINT1(self, "PBXTStorageEngine::doRollback(real_commit = %s)\n", real_commit ? "true" : "false");
5747
 
 
5748
 
        if (real_commit && self) {
5749
 
                if (!xt_xn_rollback(self))
5750
 
                        err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
5751
 
        }
5752
 
 
5753
 
        return err;
5754
 
}
5755
 
 
5756
 
#if 0
5757
 
void PBXTStorageEngine::doGetTableIdentifiers(drizzled::CachedDirectory &directory,
5758
 
                                           drizzled::SchemaIdentifier &schema_identifier,
5759
 
                                           drizzled::TableIdentifiers &set_of_identifiers)
5760
 
{
5761
 
  CachedDirectory::Entries entries= directory.getEntries();
5762
 
 
5763
 
  for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5764
 
       entry_iter != entries.end(); ++entry_iter)
5765
 
  {
5766
 
    CachedDirectory::Entry *entry= *entry_iter;
5767
 
    const std::string *filename= &entry->filename;
5768
 
 
5769
 
    assert(filename->size());
5770
 
 
5771
 
    const char *ext= strchr(filename->c_str(), '.');
5772
 
 
5773
 
    if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5774
 
        (filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5775
 
    { }
5776
 
    else
5777
 
    {
5778
 
      char uname[NAME_LEN + 1];
5779
 
      uint32_t file_name_len;
5780
 
 
5781
 
      file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5782
 
      // TODO: Remove need for memory copy here
5783
 
      uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL 
5784
 
 
5785
 
      set_of_identifiers.push_back(TableIdentifier(schema_identifier, uname));
5786
 
    }
5787
 
  }
5788
 
}
5789
 
 
5790
 
void PBXTStorageEngine::doGetTableNames(
5791
 
        CachedDirectory &directory, 
5792
 
        SchemaIdentifier&, 
5793
 
        std::set<std::string>& set_of_names)
5794
 
{
5795
 
  CachedDirectory::Entries entries= directory.getEntries();
5796
 
 
5797
 
  for (CachedDirectory::Entries::iterator entry_iter= entries.begin();
5798
 
       entry_iter != entries.end(); ++entry_iter)
5799
 
  {
5800
 
    CachedDirectory::Entry *entry= *entry_iter;
5801
 
    const std::string *filename= &entry->filename;
5802
 
 
5803
 
    assert(filename->size());
5804
 
 
5805
 
    const char *ext= strchr(filename->c_str(), '.');
5806
 
 
5807
 
    if (ext == NULL || my_strcasecmp(system_charset_info, ext, DEFAULT_FILE_EXTENSION) ||
5808
 
        (filename->compare(0, strlen(TMP_FILE_PREFIX), TMP_FILE_PREFIX) == 0))
5809
 
    { }
5810
 
    else
5811
 
    {
5812
 
      char uname[NAME_LEN + 1];
5813
 
      uint32_t file_name_len;
5814
 
 
5815
 
      file_name_len= filename_to_tablename(filename->c_str(), uname, sizeof(uname));
5816
 
      // TODO: Remove need for memory copy here
5817
 
      uname[file_name_len - sizeof(DEFAULT_FILE_EXTENSION) + 1]= '\0'; // Subtract ending, place NULL 
5818
 
      set_of_names.insert(uname);
5819
 
    }
5820
 
  }
5821
 
}
5822
 
#endif
5823
 
 
5824
 
bool PBXTStorageEngine::doDoesTableExist(Session&, const TableIdentifier &identifier)
5825
 
{
5826
 
  std::string proto_path(identifier.getPath());
5827
 
  proto_path.append(DEFAULT_FILE_EXTENSION);
5828
 
 
5829
 
  if (access(proto_path.c_str(), F_OK))
5830
 
  {
5831
 
    return false;
5832
 
  }
5833
 
 
5834
 
  return true;
5835
 
}
5836
 
 
5837
 
#endif // DRIZZLED
5838
 
 
5839
 
char *ha_pbxt::get_foreign_key_create_info()
5840
 
{
5841
 
        THD                                     *thd = current_thd;
5842
 
        int                                     err = 0;
5843
 
        XTThreadPtr                     self;
5844
 
        XTStringBufferRec       tab_def = { 0, 0, 0 };
5845
 
 
5846
 
        if (!(self = ha_set_current_thread(thd, &err))) {
5847
 
                xt_ha_pbxt_to_mysql_error(err);
5848
 
                return NULL;
5849
 
        }
5850
 
 
5851
 
        if (!pb_open_tab) {
5852
 
                if ((err = reopen()))
5853
 
                        return NULL;
5854
 
        }
5855
 
 
5856
 
        if (!pb_open_tab->ot_table->tab_dic.dic_table)
5857
 
                return NULL;
5858
 
 
5859
 
        try_(a) {
5860
 
                pb_open_tab->ot_table->tab_dic.dic_table->loadForeignKeyString(self, &tab_def);
5861
 
        }
5862
 
        catch_(a) {
5863
 
                xt_sb_set_size(self, &tab_def, 0);
5864
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
5865
 
        }
5866
 
        cont_(a);
5867
 
 
5868
 
        return tab_def.sb_cstring;
5869
 
}
5870
 
 
5871
 
void ha_pbxt::free_foreign_key_create_info(char* str)
5872
 
{
5873
 
        xt_free(NULL, str);
5874
 
}
5875
 
 
5876
 
bool ha_pbxt::get_error_message(int XT_UNUSED(error), String *buf)
5877
 
{
5878
 
        THD                             *thd = current_thd;
5879
 
        int                             err = 0;
5880
 
        XTThreadPtr             self;
5881
 
 
5882
 
        if (!(self = ha_set_current_thread(thd, &err)))
5883
 
                return FALSE;
5884
 
 
5885
 
        if (!self->t_exception.e_xt_err)
5886
 
                return FALSE;
5887
 
 
5888
 
        buf->copy(self->t_exception.e_err_msg, (uint32_t) strlen(self->t_exception.e_err_msg), system_charset_info);
5889
 
        return TRUE;
5890
 
}
5891
 
 
5892
 
/* 
5893
 
 * get info about FKs of the currently open table
5894
 
 * used in 
5895
 
 * 1. REPLACE; is > 0 if table is referred by a FOREIGN KEY 
5896
 
 * 2. INFORMATION_SCHEMA tables: TABLE_CONSTRAINTS, REFERENTIAL_CONSTRAINTS
5897
 
 * Return value: as of 5.1.24 it's ignored
5898
 
 */
5899
 
#ifdef DRI_IS
5900
 
int ha_pbxt::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
5901
 
{
5902
 
        int err = 0;
5903
 
        XTThreadPtr     self;
5904
 
        const char *action;
5905
 
 
5906
 
        if (!(self = ha_set_current_thread(thd, &err))) {
5907
 
                return xt_ha_pbxt_to_mysql_error(err);
5908
 
        }
5909
 
 
5910
 
        try_(a) {
5911
 
                XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
5912
 
 
5913
 
                if (table_dic == NULL)
5914
 
                        xt_throw_errno(XT_CONTEXT, XT_ERR_NO_DICTIONARY);
5915
 
 
5916
 
                for (int i = 0, sz = table_dic->dt_fkeys.size(); i < sz; i++) {
5917
 
                        FOREIGN_KEY_INFO *fk_info= new  // assumed that C++ exceptions are disabled
5918
 
                                (thd_alloc(thd, sizeof(FOREIGN_KEY_INFO))) FOREIGN_KEY_INFO;
5919
 
 
5920
 
                        if (fk_info == NULL)
5921
 
                                xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
5922
 
 
5923
 
                        XTDDForeignKey *fk = table_dic->dt_fkeys.itemAt(i);
5924
 
 
5925
 
                        const char *path = fk->fk_ref_tab_name->ps_path;
5926
 
                        const char *ref_tbl_name = path + strlen(path);
5927
 
 
5928
 
                        while (ref_tbl_name != path && !XT_IS_DIR_CHAR(*ref_tbl_name)) 
5929
 
                                ref_tbl_name--;
5930
 
 
5931
 
                        const char * ref_db_name = ref_tbl_name - 1;
5932
 
 
5933
 
                        while (ref_db_name != path && !XT_IS_DIR_CHAR(*ref_db_name)) 
5934
 
                                ref_db_name--;
5935
 
 
5936
 
                        ref_tbl_name++;
5937
 
                        ref_db_name++;
5938
 
 
5939
 
                        fk_info->forein_id = thd_make_lex_string(thd, 0,
5940
 
                                fk->co_name, (uint) strlen(fk->co_name), 1);
5941
 
 
5942
 
                        fk_info->referenced_db = thd_make_lex_string(thd, 0,
5943
 
                                ref_db_name, (uint) (ref_tbl_name - ref_db_name - 1), 1);
5944
 
 
5945
 
                        fk_info->referenced_table = thd_make_lex_string(thd, 0,
5946
 
                                ref_tbl_name, (uint) strlen(ref_tbl_name), 1);
5947
 
 
5948
 
                        fk_info->referenced_key_name = NULL;                    
5949
 
 
5950
 
                        XTIndex *ix = fk->getReferenceIndexPtr();
5951
 
                        if (ix == NULL) /* can be NULL if another thread changes referenced table at the moment */
5952
 
                                continue;
5953
 
                        
5954
 
                        XTDDTable *ref_table = fk->fk_ref_table;
5955
 
 
5956
 
                        // might be a self-reference
5957
 
                        if ((ref_table == NULL) 
5958
 
                                && (xt_tab_compare_names(path, table_dic->dt_table->tab_name->ps_path) == 0)) {
5959
 
                                ref_table = table_dic;
5960
 
                        }
5961
 
 
5962
 
                        if (ref_table != NULL) {
5963
 
                                const XTList<XTDDIndex>& ix_list = ref_table->dt_indexes;
5964
 
                                for (int j = 0, sz2 = ix_list.size(); j < sz2; j++) {
5965
 
                                        XTDDIndex *ddix = ix_list.itemAt(j);
5966
 
                                        if (ddix->in_index ==  ix->mi_index_no) {
5967
 
                                                const char *ix_name = 
5968
 
                                                        ddix->co_name ? ddix->co_name : ddix->co_ind_name;
5969
 
                                                fk_info->referenced_key_name = thd_make_lex_string(thd, 0,
5970
 
                                                        ix_name, (uint) strlen(ix_name), 1);
5971
 
                                                break;
5972
 
                                        }
5973
 
                                }
5974
 
                        }
5975
 
 
5976
 
                        action = XTDDForeignKey::actionTypeToString(fk->fk_on_delete);
5977
 
                        fk_info->delete_method = thd_make_lex_string(thd, 0,
5978
 
                                action, (uint) strlen(action), 1);
5979
 
                        action = XTDDForeignKey::actionTypeToString(fk->fk_on_update);
5980
 
                        fk_info->update_method = thd_make_lex_string(thd, 0,
5981
 
                                action, (uint) strlen(action), 1);
5982
 
 
5983
 
                        const XTList<XTDDColumnRef>& cols = fk->co_cols;
5984
 
                        for (int j = 0, sz2 = cols.size(); j < sz2; j++) {
5985
 
                                XTDDColumnRef *col_ref= cols.itemAt(j);
5986
 
                                fk_info->foreign_fields.push_back(thd_make_lex_string(thd, 0,
5987
 
                                        col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
5988
 
                        }
5989
 
 
5990
 
                        const XTList<XTDDColumnRef>& ref_cols = fk->fk_ref_cols;
5991
 
                        for (int j = 0, sz2 = ref_cols.size(); j < sz2; j++) {
5992
 
                                XTDDColumnRef *col_ref= ref_cols.itemAt(j);
5993
 
                                fk_info->referenced_fields.push_back(thd_make_lex_string(thd, 0,
5994
 
                                        col_ref->cr_col_name, (uint) strlen(col_ref->cr_col_name), 1));
5995
 
                        }
5996
 
 
5997
 
                        f_key_list->push_back(fk_info);
5998
 
                }
5999
 
        }
6000
 
        catch_(a) {
6001
 
                err = xt_ha_pbxt_thread_error_for_mysql(thd, self, pb_ignore_dup_key);
6002
 
        }
6003
 
        cont_(a);
6004
 
 
6005
 
        return err; 
6006
 
}
6007
 
 
6008
 
uint ha_pbxt::referenced_by_foreign_key()
6009
 
{
6010
 
        XTDDTable *table_dic = pb_open_tab->ot_table->tab_dic.dic_table;
6011
 
 
6012
 
        if (!table_dic)
6013
 
                return 0;
6014
 
        /* Check the list of referencing tables: */
6015
 
        return table_dic->dt_trefs ? 1 : 0;
6016
 
}
6017
 
#endif // DRI_IS
6018
 
 
6019
 
#ifndef DRIZZLED
6020
 
struct st_mysql_sys_var
6021
 
{
6022
 
        MYSQL_PLUGIN_VAR_HEADER;
6023
 
};
6024
 
 
6025
 
#if MYSQL_VERSION_ID < 60000
6026
 
#if MYSQL_VERSION_ID >= 50124
6027
 
#define USE_CONST_SAVE
6028
 
#endif
6029
 
#else
6030
 
#if MYSQL_VERSION_ID >= 60005
6031
 
#define USE_CONST_SAVE
6032
 
#endif
6033
 
#endif
6034
 
#endif
6035
 
 
6036
 
#ifdef DRIZZLED
6037
 
#define st_mysql_sys_var drizzled::drizzle_sys_var
6038
 
#endif
6039
 
 
6040
 
#ifndef DRIZZLED
6041
 
#ifdef USE_CONST_SAVE
6042
 
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, const void *save)
6043
 
#else
6044
 
static void pbxt_record_cache_size_func(THD *XT_UNUSED(thd), struct st_mysql_sys_var *var, void *tgt, void *save)
6045
 
#endif
6046
 
{
6047
 
        xtInt8  record_cache_size;
6048
 
 
6049
 
        char *old= *(char **) tgt;
6050
 
        *(char **)tgt= *(char **) save;
6051
 
        if (var->flags & PLUGIN_VAR_MEMALLOC)
6052
 
        {
6053
 
                *(char **)tgt= my_strdup(*(char **) save, MYF(0));
6054
 
                my_free(old, MYF(0));
6055
 
        }
6056
 
        record_cache_size = ha_set_variable(&pbxt_record_cache_size, &vp_record_cache_size);
6057
 
        xt_tc_set_cache_size((size_t) record_cache_size);
6058
 
#ifdef DEBUG
6059
 
        char buffer[200];
6060
 
 
6061
 
        sprintf(buffer, "pbxt_record_cache_size=%llu\n", (u_llong) record_cache_size);
6062
 
        xt_logf(XT_NT_INFO, buffer);
6063
 
#endif
6064
 
}
6065
 
 
6066
 
struct st_mysql_storage_engine pbxt_storage_engine = {
6067
 
        MYSQL_HANDLERTON_INTERFACE_VERSION
6068
 
};
6069
 
static st_mysql_information_schema pbxt_statitics = {
6070
 
        MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
6071
 
};
6072
 
 
6073
 
#if MYSQL_VERSION_ID >= 50118
6074
 
static MYSQL_SYSVAR_STR(index_cache_size, pbxt_index_cache_size,
6075
 
  PLUGIN_VAR_READONLY,
6076
 
  "The amount of memory allocated to the index cache, used only to cache index data.",
6077
 
  NULL, NULL, NULL);
6078
 
 
6079
 
static MYSQL_SYSVAR_STR(record_cache_size, pbxt_record_cache_size,
6080
 
  PLUGIN_VAR_READONLY, // PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
6081
 
  "The amount of memory allocated to the record cache used to cache table data.",
6082
 
  NULL, pbxt_record_cache_size_func, NULL);
6083
 
 
6084
 
static MYSQL_SYSVAR_STR(log_cache_size, pbxt_log_cache_size,
6085
 
  PLUGIN_VAR_READONLY,
6086
 
  "The amount of memory allocated to the transaction log cache used to cache transaction log data.",
6087
 
  NULL, NULL, NULL);
6088
 
 
6089
 
static MYSQL_SYSVAR_STR(log_file_threshold, pbxt_log_file_threshold,
6090
 
  PLUGIN_VAR_READONLY,
6091
 
  "The size of a transaction log before rollover, and a new log is created.",
6092
 
  NULL, NULL, NULL);
6093
 
 
6094
 
static MYSQL_SYSVAR_STR(transaction_buffer_size, pbxt_transaction_buffer_size,
6095
 
  PLUGIN_VAR_READONLY,
6096
 
  "The size of the global transaction log buffer (the engine allocates 2 buffers of this size).",
6097
 
  NULL, NULL, NULL);
6098
 
 
6099
 
static MYSQL_SYSVAR_STR(log_buffer_size, pbxt_log_buffer_size,
6100
 
  PLUGIN_VAR_READONLY,
6101
 
  "The size of the buffer used to cache data from transaction and data logs during sequential scans, or when writing a data log.",
6102
 
  NULL, NULL, NULL);
6103
 
 
6104
 
static MYSQL_SYSVAR_STR(checkpoint_frequency, pbxt_checkpoint_frequency,
6105
 
  PLUGIN_VAR_READONLY,
6106
 
  "The size of the transaction data buffer which is allocate by each thread.",
6107
 
  NULL, NULL, NULL);
6108
 
 
6109
 
static MYSQL_SYSVAR_STR(data_log_threshold, pbxt_data_log_threshold,
6110
 
  PLUGIN_VAR_READONLY,
6111
 
  "The maximum size of a data log file.",
6112
 
  NULL, NULL, NULL);
6113
 
 
6114
 
static MYSQL_SYSVAR_STR(data_file_grow_size, pbxt_data_file_grow_size,
6115
 
  PLUGIN_VAR_READONLY,
6116
 
  "The amount by which the handle data files (.xtd) grow.",
6117
 
  NULL, NULL, NULL);
6118
 
 
6119
 
static MYSQL_SYSVAR_STR(row_file_grow_size, pbxt_row_file_grow_size,
6120
 
  PLUGIN_VAR_READONLY,
6121
 
  "The amount by which the row pointer files (.xtr) grow.",
6122
 
  NULL, NULL, NULL);
6123
 
 
6124
 
static MYSQL_SYSVAR_STR(record_write_threshold, pbxt_record_write_threshold,
6125
 
  PLUGIN_VAR_READONLY,
6126
 
  "The amount data written to the record files (.xtd and .xtr) before the changes are applied to the database.",
6127
 
  NULL, NULL, NULL);
6128
 
 
6129
 
static MYSQL_SYSVAR_INT(garbage_threshold, xt_db_garbage_threshold,
6130
 
        PLUGIN_VAR_OPCMDARG,
6131
 
        "The percentage of garbage in a repository file before it is compacted.",
6132
 
        NULL, NULL, XT_DL_DEFAULT_GARBAGE_LEVEL, 0, 100, 1);
6133
 
 
6134
 
static MYSQL_SYSVAR_INT(log_file_count, xt_db_log_file_count,
6135
 
        PLUGIN_VAR_OPCMDARG,
6136
 
        "The minimum number of transaction logs used.",
6137
 
        NULL, NULL, XT_DL_DEFAULT_XLOG_COUNT, 1, 20000, 1);
6138
 
 
6139
 
static MYSQL_SYSVAR_INT(auto_increment_mode, xt_db_auto_increment_mode,
6140
 
        PLUGIN_VAR_OPCMDARG,
6141
 
        "The auto-increment mode, 0 = MySQL standard (default), 1 = previous ID's never reused.",
6142
 
        NULL, NULL, XT_AUTO_INCREMENT_DEF, 0, 1, 1);
6143
 
 
6144
 
/* {RN145} */
6145
 
static MYSQL_SYSVAR_INT(offline_log_function, xt_db_offline_log_function,
6146
 
        PLUGIN_VAR_OPCMDARG,
6147
 
        "Determines what happens to transaction logs when the are moved offline, 0 = recycle logs (default), 1 = delete logs (default on Mac OS X), 2 = keep logs.",
6148
 
        NULL, NULL, XT_OFFLINE_LOG_FUNCTION_DEF, 0, 2, 1);
6149
 
 
6150
 
/* {RN150} */
6151
 
static MYSQL_SYSVAR_INT(sweeper_priority, xt_db_sweeper_priority,
6152
 
        PLUGIN_VAR_OPCMDARG,
6153
 
        "Determines the priority of the background sweeper process, 0 = low (default), 1 = normal (same as user threads), 2 = high.",
6154
 
        NULL, NULL, XT_PRIORITY_LOW, XT_PRIORITY_LOW, XT_PRIORITY_HIGH, 1);
6155
 
 
6156
 
#ifndef DEBUG
6157
 
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6158
 
        PLUGIN_VAR_OPCMDARG,
6159
 
        "Enable PBXT support for the XA two-phase commit, default is enabled",
6160
 
        NULL, NULL, TRUE);
6161
 
#else
6162
 
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
6163
 
        PLUGIN_VAR_OPCMDARG,
6164
 
        "Enable PBXT support for the XA two-phase commit, default is disabled (due to assertion failure in MySQL)",
6165
 
        /* The problem is, in MySQL an assertion fails in debug mode: 
6166
 
         * Assertion failed: (total_ha_2pc == (ulong) opt_bin_log+1), function ha_recover, file handler.cc, line 1557.
6167
 
     */
6168
 
        NULL, NULL, FALSE);
6169
 
#endif
6170
 
 
6171
 
static MYSQL_SYSVAR_INT(index_dirty_threshold, xt_db_index_dirty_threshold,
6172
 
        PLUGIN_VAR_OPCMDARG,
6173
 
        "The percentage of the index cache that must be dirty before the index cache is flushed.",
6174
 
        NULL, NULL, XT_DL_DEFAULT_INDEX_DIRTY_LEVEL, 0, 100, 1);
6175
 
        
6176
 
static MYSQL_SYSVAR_INT(flush_log_at_trx_commit, xt_db_flush_log_at_trx_commit,
6177
 
        PLUGIN_VAR_OPCMDARG,
6178
 
        "Determines whether the transaction log is written and/or flushed when a transaction is committed (no matter what the setting the log is written and flushed once per second), 0 = no write & no flush, 1 = write & flush (default), 2 = write & no flush.",
6179
 
        NULL, NULL, 1, 0, 2, 1);
6180
 
 
6181
 
static struct st_mysql_sys_var* pbxt_system_variables[] = {
6182
 
  MYSQL_SYSVAR(index_cache_size),
6183
 
  MYSQL_SYSVAR(record_cache_size),
6184
 
  MYSQL_SYSVAR(log_cache_size),
6185
 
  MYSQL_SYSVAR(log_file_threshold),
6186
 
  MYSQL_SYSVAR(transaction_buffer_size),
6187
 
  MYSQL_SYSVAR(log_buffer_size),
6188
 
  MYSQL_SYSVAR(checkpoint_frequency),
6189
 
  MYSQL_SYSVAR(data_log_threshold),
6190
 
  MYSQL_SYSVAR(data_file_grow_size),
6191
 
  MYSQL_SYSVAR(row_file_grow_size),
6192
 
  MYSQL_SYSVAR(record_write_threshold),
6193
 
  MYSQL_SYSVAR(garbage_threshold),
6194
 
  MYSQL_SYSVAR(log_file_count),
6195
 
  MYSQL_SYSVAR(auto_increment_mode),
6196
 
  MYSQL_SYSVAR(offline_log_function),
6197
 
  MYSQL_SYSVAR(sweeper_priority),
6198
 
  MYSQL_SYSVAR(support_xa),
6199
 
  MYSQL_SYSVAR(index_dirty_threshold),
6200
 
  MYSQL_SYSVAR(flush_log_at_trx_commit),
6201
 
  NULL
6202
 
};
6203
 
#endif
6204
 
#endif
6205
 
 
6206
 
#ifdef DRIZZLED
6207
 
DRIZZLE_DECLARE_PLUGIN
6208
 
{
6209
 
        DRIZZLE_VERSION_ID,
6210
 
        "PBXT",
6211
 
        "1.0",
6212
 
        "Paul McCullagh, PrimeBase Technologies GmbH",
6213
 
        "High performance, multi-versioning transactional engine",
6214
 
        PLUGIN_LICENSE_GPL,
6215
 
        pbxt_init, /* Plugin Init */
6216
 
        NULL,          /* depends */
6217
 
        NULL                                            /* config options                  */
6218
 
}
6219
 
DRIZZLE_DECLARE_PLUGIN_END;
6220
 
#else // MySQL case
6221
 
mysql_declare_plugin(pbxt)
6222
 
{
6223
 
        MYSQL_STORAGE_ENGINE_PLUGIN,
6224
 
        &pbxt_storage_engine,
6225
 
        "PBXT",
6226
 
        "Paul McCullagh, PrimeBase Technologies GmbH",
6227
 
        "High performance, multi-versioning transactional engine",
6228
 
        PLUGIN_LICENSE_GPL,
6229
 
        pbxt_init, /* Plugin Init */
6230
 
        pbxt_end, /* Plugin Deinit */
6231
 
        0x0001 /* 0.1 */,
6232
 
        NULL,                       /* status variables                */
6233
 
#if MYSQL_VERSION_ID >= 50118
6234
 
        pbxt_system_variables,          /* depends */
6235
 
#else
6236
 
        NULL,
6237
 
#endif
6238
 
        NULL                                            /* config options                  */
6239
 
}, {
6240
 
        MYSQL_INFORMATION_SCHEMA_PLUGIN,
6241
 
        &pbxt_statitics,
6242
 
        "PBXT_STATISTICS",
6243
 
        "Paul McCullagh, PrimeBase Technologies GmbH",
6244
 
        "PBXT internal system statitics",
6245
 
        PLUGIN_LICENSE_GPL,
6246
 
        pbxt_init_statistics,                                           /* plugin init */
6247
 
        pbxt_exit_statistics,                                           /* plugin deinit */
6248
 
        0x0005,
6249
 
        NULL,                                                                           /* status variables */
6250
 
        NULL,                                                                           /* depends */
6251
 
        NULL                                                                            /* config options */
6252
 
}
6253
 
mysql_declare_plugin_end;
6254
 
#endif
6255
 
 
6256
 
#if defined(XT_WIN) && defined(XT_COREDUMP)
6257
 
 
6258
 
/*
6259
 
 * WINDOWS CORE DUMP SUPPORT
6260
 
 *
6261
 
 * MySQL supports core dumping on Windows with --core-file command line option. 
6262
 
 * However it creates dumps with the MiniDumpNormal option which saves only stack traces.
6263
 
 *
6264
 
 * We instead (or in addition) create dumps with MiniDumpWithoutOptionalData option
6265
 
 * which saves all available information. To enable core dumping enable XT_COREDUMP
6266
 
 * at compile time.
6267
 
 * In addition, pbxt_crash_debug must be set to TRUE which is the case if XT_CRASH_DEBUG
6268
 
 * is defined.
6269
 
 * This switch is also controlled by creating a file called "no-debug" or "crash-debug"
6270
 
 * in the pbxt database directory.
6271
 
 */
6272
 
 
6273
 
typedef enum _MINIDUMP_TYPE {
6274
 
    MiniDumpNormal                         = 0x0000,
6275
 
    MiniDumpWithDataSegs                   = 0x0001,
6276
 
    MiniDumpWithFullMemory                 = 0x0002,
6277
 
    MiniDumpWithHandleData                 = 0x0004,
6278
 
    MiniDumpFilterMemory                   = 0x0008,
6279
 
    MiniDumpScanMemory                     = 0x0010,
6280
 
    MiniDumpWithUnloadedModules            = 0x0020,
6281
 
    MiniDumpWithIndirectlyReferencedMemory = 0x0040,
6282
 
    MiniDumpFilterModulePaths              = 0x0080,
6283
 
    MiniDumpWithProcessThreadData          = 0x0100,
6284
 
    MiniDumpWithPrivateReadWriteMemory     = 0x0200,
6285
 
} MINIDUMP_TYPE;
6286
 
 
6287
 
typedef struct _MINIDUMP_EXCEPTION_INFORMATION {
6288
 
    DWORD ThreadId;
6289
 
    PEXCEPTION_POINTERS ExceptionPointers;
6290
 
    BOOL ClientPointers;
6291
 
} MINIDUMP_EXCEPTION_INFORMATION, *PMINIDUMP_EXCEPTION_INFORMATION;
6292
 
 
6293
 
typedef BOOL (WINAPI *MINIDUMPWRITEDUMP)(
6294
 
        HANDLE hProcess, 
6295
 
        DWORD dwPid, 
6296
 
        HANDLE hFile, 
6297
 
        MINIDUMP_TYPE DumpType,
6298
 
        void *ExceptionParam,
6299
 
        void *UserStreamParam,
6300
 
        void *CallbackParam
6301
 
        );
6302
 
 
6303
 
char base_path[_MAX_PATH] = {0};
6304
 
char dump_path[_MAX_PATH] = {0};
6305
 
 
6306
 
void core_dump(struct _EXCEPTION_POINTERS *pExceptionInfo)
6307
 
{
6308
 
        SECURITY_ATTRIBUTES     sa = { sizeof(SECURITY_ATTRIBUTES), 0, 0 };
6309
 
        int i;
6310
 
        HMODULE hDll = NULL;
6311
 
        HANDLE hFile;
6312
 
        MINIDUMPWRITEDUMP pDump;
6313
 
        char *end_ptr = base_path;
6314
 
 
6315
 
        MINIDUMP_EXCEPTION_INFORMATION ExInfo, *ExInfoPtr = NULL;
6316
 
 
6317
 
        if (pExceptionInfo) {
6318
 
                ExInfo.ThreadId = GetCurrentThreadId();
6319
 
                ExInfo.ExceptionPointers = pExceptionInfo;
6320
 
                ExInfo.ClientPointers = NULL;
6321
 
                ExInfoPtr = &ExInfo;
6322
 
        }
6323
 
 
6324
 
        end_ptr = base_path + strlen(base_path);
6325
 
 
6326
 
        strcat(base_path, "DBGHELP.DLL" );
6327
 
        hDll = LoadLibrary(base_path);
6328
 
        *end_ptr = 0;
6329
 
        if (hDll==NULL) {
6330
 
                int err;
6331
 
                err = HRESULT_CODE(GetLastError());
6332
 
                hDll = LoadLibrary( "DBGHELP.DLL" );
6333
 
                if (hDll==NULL) {
6334
 
                        err = HRESULT_CODE(GetLastError());
6335
 
                        return;
6336
 
                }
6337
 
        }
6338
 
 
6339
 
        pDump = (MINIDUMPWRITEDUMP)GetProcAddress( hDll, "MiniDumpWriteDump" );
6340
 
        if (!pDump) {
6341
 
                int err;
6342
 
                err = HRESULT_CODE(GetLastError());
6343
 
                return;
6344
 
        }
6345
 
 
6346
 
        for (i = 1; i < INT_MAX; i++) {
6347
 
                sprintf(dump_path, "%sPBXTCore%08d.dmp", base_path, i);
6348
 
                hFile = CreateFile( dump_path, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_NEW,
6349
 
                                                        FILE_ATTRIBUTE_NORMAL, NULL );
6350
 
 
6351
 
                if ( hFile != INVALID_HANDLE_VALUE )
6352
 
                        break;
6353
 
 
6354
 
                if (HRESULT_CODE(GetLastError()) == ERROR_FILE_EXISTS )
6355
 
                        continue;
6356
 
 
6357
 
                return;
6358
 
        }
6359
 
 
6360
 
        // write the dump
6361
 
        BOOL bOK = pDump( GetCurrentProcess(), GetCurrentProcessId(), hFile, 
6362
 
                MiniDumpWithPrivateReadWriteMemory, ExInfoPtr, NULL, NULL );
6363
 
 
6364
 
        CloseHandle(hFile);
6365
 
}
6366
 
 
6367
 
LONG crash_filter( struct _EXCEPTION_POINTERS *pExceptionInfo )
6368
 
{
6369
 
        core_dump(pExceptionInfo);
6370
 
        return EXCEPTION_EXECUTE_HANDLER;
6371
 
}
6372
 
 
6373
 
void register_crash_filter()
6374
 
{
6375
 
        SetUnhandledExceptionFilter( (LPTOP_LEVEL_EXCEPTION_FILTER) crash_filter );
6376
 
}
6377
 
 
6378
 
#endif // XT_WIN && XT_COREDUMP