1
/* Copyright (C) 2005 PrimeBase Technologies GmbH
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
* 2005-02-08 Paul McCullagh
24
#include "xt_config.h"
35
#include <drizzled/common.h>
36
#include <drizzled/dtcollation.h>
38
#include "mysql_priv.h"
42
#include "database_xt.h"
44
#include "strutil_xt.h"
49
#include "systab_xt.h"
52
//#define TRACE_VARIATIONS
53
//#define TRACE_VARIATIONS_IN_DUP_CHECK
54
//#define DUMP_CHECK_TABLE
55
//#define CHECK_INDEX_ON_CHECK_TABLE
56
//#define TRACE_TABLE_IDS
57
//#define TRACE_FLUSH_TABLE
58
//#define TRACE_CREATE_TABLES
61
#define CHECK_TABLE_STATS
63
/* The problem is that this can take a long time
64
* if the table is very large!
66
//#define CHECK_TABLE_READ_DATA_LOG
68
#ifdef TRACE_TABLE_IDS
69
//#define PRINTF xt_ftracef
70
#define PRINTF xt_trace
73
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr dic);
74
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic);
75
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def);
76
static void tab_free_ext_records(XTTableHPtr tab);
79
* -----------------------------------------------------------------------
83
#define XT_MAX_TABLE_FILE_NAME_SIZE (XT_TABLE_NAME_SIZE+6+40)
86
* -----------------------------------------------------------------------
87
* Handle Error Detected in a Table
92
xtRecordID ter_rec_id;
95
static int tab_comp_tab_error(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
97
XTTableError *ter_a = ((XTTableError *) a);
98
XTTableError *ter_b = (XTTableError *) b;
100
if (ter_a->ter_tab_id < ter_b->ter_tab_id)
102
if (ter_a->ter_tab_id == ter_b->ter_tab_id) {
103
if (ter_a->ter_rec_id < ter_b->ter_rec_id)
105
if (ter_a->ter_rec_id == ter_b->ter_rec_id)
112
static xtBool tab_record_corrupt(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, bool not_valid, int where)
114
XTTableHPtr tab = ot->ot_table;
115
XTDatabaseHPtr db = tab->tab_db;
117
XTTableError *ter_ptr;
119
ter.ter_tab_id = tab->tab_id;
120
ter.ter_rec_id = rec_id;
122
xt_sl_lock_ns(db->db_error_list, ot->ot_thread);
123
if (!(ter_ptr = (XTTableError *) xt_sl_find(NULL, db->db_error_list, &ter))) {
125
char table_name[XT_IDENTIFIER_NAME_SIZE*3+3];
127
ok = xt_sl_insert(NULL, db->db_error_list, &ter, &ter);
128
xt_sl_unlock_ns(db->db_error_list);
131
xt_tab_set_table_repair_pending(tab);
132
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
133
xt_logf(XT_NT_ERROR, "#%d Table %s: row %llu, record %llu, is %s, REPAIR TABLE required.\n", where,
137
not_valid ? "not valid" : "free");
140
xt_sl_unlock_ns(db->db_error_list);
145
* -----------------------------------------------------------------------
149
/* GOTCHA! The problem:
151
* The server uses names like: "./test/my_tab",
152
* the BLOB streaming engine uses: "test/my_tab"
153
* which leads to the same table being loaded twice.
155
xtPublic int xt_tab_compare_paths(char *n1, char *n2)
157
n1 = xt_last_2_names_of_path(n1);
158
n2 = xt_last_2_names_of_path(n2);
159
if (pbxt_ignore_case)
160
return strcasecmp(n1, n2);
161
return strcmp(n1, n2);
165
* This function only compares only the last 2 components of
166
* the path because table names must differ in this area.
168
xtPublic int xt_tab_compare_names(const char *n1, const char *n2)
170
n1 = xt_last_2_names_of_path(n1);
171
n2 = xt_last_2_names_of_path(n2);
172
if (pbxt_ignore_case)
173
return strcasecmp(n1, n2);
174
return strcmp(n1, n2);
178
* -----------------------------------------------------------------------
182
static xtBool tab_list_comp(void *key, void *data)
184
XTTableHPtr tab = (XTTableHPtr) data;
186
return strcmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
189
static xtHashValue tab_list_hash(xtBool is_key, void *key_data)
191
XTTableHPtr tab = (XTTableHPtr) key_data;
194
return xt_ht_hash(xt_last_2_names_of_path((char *) key_data));
195
return xt_ht_hash(xt_last_2_names_of_path(tab->tab_name->ps_path));
198
static xtBool tab_list_comp_ci(void *key, void *data)
200
XTTableHPtr tab = (XTTableHPtr) data;
202
return strcasecmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
205
static xtHashValue tab_list_hash_ci(xtBool is_key, void *key_data)
207
XTTableHPtr tab = (XTTableHPtr) key_data;
210
return xt_ht_casehash(xt_last_2_names_of_path((char *) key_data));
211
return xt_ht_casehash(xt_last_2_names_of_path(tab->tab_name->ps_path));
214
static void tab_list_free(XTThreadPtr self, void *data)
216
XTTableHPtr tab = (XTTableHPtr) data;
217
XTDatabaseHPtr db = tab->tab_db;
218
XTTableEntryPtr te_ptr;
220
/* Remove the reference from the ID list, whem the table is
221
* removed from the name list:
223
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab->tab_id)))
224
te_ptr->te_table = NULL;
226
if (tab->tab_dic.dic_table)
227
tab->tab_dic.dic_table->removeReferences(self);
228
xt_heap_release(self, tab);
231
static void tab_close_files(XTThreadPtr self, XTTableHPtr tab)
233
if (tab->tab_rec_file) {
234
xt_fs_release_file(self, tab->tab_rec_file);
235
tab->tab_rec_file = NULL;
237
if (tab->tab_row_file) {
238
xt_fs_release_file(self, tab->tab_row_file);
239
tab->tab_row_file = NULL;
241
if (tab->tab_ind_file) {
242
xt_fs_release_file(self, tab->tab_ind_file);
243
tab->tab_ind_file = NULL;
247
static void tab_finalize(XTThreadPtr self, void *x)
249
XTTableHPtr tab = (XTTableHPtr) x;
251
xt_exit_row_locks(&tab->tab_locks);
253
xt_xres_exit_tab(self, tab);
255
if (tab->tab_ind_free_list) {
256
XTIndFreeListPtr list, flist;
258
list = tab->tab_ind_free_list;
261
list = list->fl_next_list;
262
xt_free(self, flist);
264
tab->tab_ind_free_list = NULL;
267
tab_close_files(self, tab);
269
if (tab->tab_index_head) {
270
xt_free(self, tab->tab_index_head);
271
tab->tab_index_head = NULL;
274
tab_free_ext_records(tab);
276
#ifdef TRACE_TABLE_IDS
277
PRINTF("%s: free TABLE: db=%d tab=%d %s\n", self->t_name, (int) tab->tab_db ? tab->tab_db->db_id : 0, (int) tab->tab_id,
278
tab->tab_name ? xt_last_2_names_of_path(tab->tab_name->ps_path) : "?");
281
xt_free(self, tab->tab_name);
282
tab->tab_name = NULL;
284
myxt_free_dictionary(self, &tab->tab_dic);
285
if (tab->tab_free_locks) {
286
tab->tab_seq.xt_op_seq_exit(self);
287
xt_spinlock_free(self, &tab->tab_mem_lock);
288
xt_spinlock_free(self, &tab->tab_ainc_lock);
289
xt_free_mutex(&tab->tab_rec_flush_lock);
290
xt_free_mutex(&tab->tab_ind_flush_lock);
291
xt_free_mutex(&tab->tab_ind_stat_lock);
292
xt_free_mutex(&tab->tab_dic_field_lock);
293
xt_free_mutex(&tab->tab_row_lock);
294
xt_free_mutex(&tab->tab_ind_lock);
295
xt_free_mutex(&tab->tab_rec_lock);
296
for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
297
XT_TAB_ROW_FREE_LOCK(self, &tab->tab_row_rwlock[i]);
299
#ifdef XT_SORT_REC_WRITES
300
if (tab->tab_rec_dw_writes) {
301
xt_free_sortedlist(self, tab->tab_rec_dw_writes);
302
tab->tab_rec_dw_writes = NULL;
304
if (tab->tab_rec_dw_data)
305
xt_free_ns(tab->tab_rec_dw_data);
307
if (tab->tab_rec_flush_task)
308
tab->tab_rec_flush_task->tk_exit();
309
if (tab->tab_ind_flush_task)
310
tab->tab_ind_flush_task->tk_exit();
313
static void tab_onrelease(void *x)
315
XTTableHPtr tab = (XTTableHPtr) x;
317
/* Signal threads waiting for exclusive use of the table: */
318
if (tab->tab_db->db_tables)
319
xt_ht_signal(NULL, tab->tab_db->db_tables);
323
* -----------------------------------------------------------------------
328
* This function sets the table name to "", if the file
329
* does not belong to XT.
331
xtPublic char *xt_tab_file_to_name(size_t size, char *tab_name, char *file_name)
336
file_name = xt_last_name_of_path(file_name);
337
cptr = file_name + strlen(file_name) - 1;
338
while (cptr > file_name && *cptr != '.')
340
if (cptr > file_name && *cptr == '.') {
341
if (strcmp(cptr, ".xtl") == 0 || strcmp(cptr, ".xtr") == 0) {
343
while (cptr > file_name && isdigit(*cptr))
347
const char **ext = pbxt_extensions;
350
if (strcmp(cptr, *ext) == 0)
359
len = cptr - file_name;
363
memcpy(tab_name, file_name, len);
366
/* Return a pointer to what was removed! */
367
return file_name + len;
370
static void tab_get_row_file_name(char *table_name, char *name, xtTableID tab_id)
372
sprintf(table_name, "%s-%lu.xtr", name, (u_long) tab_id);
375
static void tab_get_data_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
377
sprintf(table_name, "%s.xtd", name);
380
static void tab_get_index_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
382
sprintf(table_name, "%s.xti", name);
385
static void tab_free_by_id(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
387
XTTableEntryPtr te_ptr = (XTTableEntryPtr) item;
389
if (te_ptr->te_tab_name) {
390
xt_free(self, te_ptr->te_tab_name);
391
te_ptr->te_tab_name = NULL;
393
te_ptr->te_tab_id = 0;
394
te_ptr->te_heap_tab = FALSE;
395
te_ptr->te_table = NULL;
398
static int tab_comp_by_id(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
400
xtTableID te_id = *((xtTableID *) a);
401
XTTableEntryPtr te_ptr = (XTTableEntryPtr) b;
403
if (te_id < te_ptr->te_tab_id)
405
if (te_id == te_ptr->te_tab_id)
410
static void tab_free_path(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
412
XTTablePathPtr tp_ptr = *((XTTablePathPtr *) item);
414
xt_free(self, tp_ptr);
417
static int tab_comp_path(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
419
char *path = (char *) a;
420
XTTablePathPtr tp_ptr = *((XTTablePathPtr *) b);
422
return xt_tab_compare_paths(path, tp_ptr->tp_path);
425
static xtBool tab_get_name_value(XTTableDescPtr td, char **ret_name, char **ret_value)
427
char *ptr = td->x.z.td_curr_ptr;
429
while (*ptr && isspace(*ptr)) ptr++;
431
td->x.z.td_curr_ptr = ptr;
436
while (*ptr && *ptr != '=' && *ptr != '\r' && *ptr != '\n') ptr++;
441
while (*ptr && *ptr != '\r' && *ptr != '\n') ptr++;
454
td->x.z.td_curr_ptr = ptr;
458
xtPublic void xt_describe_tables_init(XTThreadPtr self, XTDatabaseHPtr db, XTTableDescPtr td)
463
xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
464
xt_add_tables_file(PATH_MAX, pbuf);
465
if (xt_fs_exists(pbuf))
466
td->td_type = XT_TD_FROM_TAB_FILE;
468
td->td_type = XT_TD_FROM_DIRECTORY;
470
switch (td->td_type) {
471
case XT_TD_FROM_DIRECTORY:
472
td->x.y.td_path_idx = 0;
473
if (td->x.y.td_path_idx < xt_sl_get_size(db->db_table_paths)) {
474
XTTablePathPtr *tp_ptr;
476
tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, td->x.y.td_path_idx);
477
td->td_tab_path = *tp_ptr;
478
td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
481
td->x.y.td_open_dir = NULL;
483
case XT_TD_FROM_TAB_FILE:
490
of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
491
pushr_(xt_close_file, of);
492
len = (int) xt_seek_eof_file(self, of);
493
buffer = (char *) xt_malloc(self, len + 1);
494
pushr_(xt_free, buffer);
495
if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
498
popr_(); // Discard xt_free(buffer)
499
freer_(); // xt_close_file(of)
501
td->x.z.td_table_info = buffer;
502
td->x.z.td_curr_ptr = buffer;
503
while (tab_get_name_value(td, &name, &value)) {
504
if (strcmp(name, "[table]") == 0)
511
xtPublic xtBool xt_describe_tables_next(XTThreadPtr self, XTTableDescPtr td)
518
switch (td->td_type) {
519
case XT_TD_FROM_DIRECTORY:
521
if (!td->x.y.td_open_dir)
524
r = xt_dir_next(self, td->x.y.td_open_dir);
527
xt_describe_tables_exit(self, td);
532
XTTablePathPtr *tp_ptr;
534
if (td->x.y.td_path_idx+1 >= xt_sl_get_size(td->td_db->db_table_paths))
537
if (td->x.y.td_open_dir)
538
xt_dir_close(NULL, td->x.y.td_open_dir);
539
td->x.y.td_open_dir = NULL;
541
td->x.y.td_path_idx++;
542
tp_ptr = (XTTablePathPtr *) xt_sl_item_at(td->td_db->db_table_paths, td->x.y.td_path_idx);
543
td->td_tab_path = *tp_ptr;
544
td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
548
tab_name = xt_dir_name(self, td->x.y.td_open_dir);
549
td->td_tab_id = (xtTableID) xt_file_name_to_id(tab_name);
550
xt_tab_file_to_name(XT_TABLE_NAME_SIZE, td->td_tab_name, tab_name);
551
td->td_heap_tab = FALSE;
553
case XT_TD_FROM_TAB_FILE:
558
while (tab_get_name_value(td, &name, &value)) {
559
if (strcmp(name, "name") == 0)
560
xt_strcpy(XT_TABLE_NAME_SIZE, td->td_tab_name, value);
561
else if (strcmp(name, "id") == 0) {
564
sscanf(value, "%lu", &lvalue);
565
td->td_tab_id = (xtTableID) lvalue;
567
else if (strcmp(name, "storage") == 0) {
568
if (strcmp(value, "heap") == 0)
569
td->td_heap_tab = TRUE;
571
td->td_heap_tab = FALSE;
573
else if (strcmp(name, "location") == 0) {
575
XTTablePathPtr db_path;
580
/* Convert path to WIN path: */
587
if ((tp = (XTTablePathPtr *) xt_sl_find(self, td->td_db->db_table_paths, value)))
593
db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
594
db_path->tp_tab_count = 0;
595
memcpy(db_path->tp_path, value, len);
596
db_path->tp_path[len] = 0;
597
xt_sl_insert(self, td->td_db->db_table_paths, db_path->tp_path, &db_path);
599
td->td_tab_path = db_path;
601
else if (strcmp(name, "type") == 0) {
604
sscanf(value, "%lu", &lvalue);
605
td->td_tab_type = (xtWord1) lvalue;
607
else if (strcmp(name, "[table]") == 0)
617
xtPublic void xt_describe_tables_exit(XTThreadPtr self, XTTableDescPtr td)
619
switch (td->td_type) {
620
case XT_TD_FROM_DIRECTORY:
621
if (td->x.y.td_open_dir)
622
xt_dir_close(NULL, td->x.y.td_open_dir);
623
td->x.y.td_open_dir = NULL;
625
case XT_TD_FROM_TAB_FILE:
626
if (td->x.z.td_table_info) {
627
xt_free(self, td->x.z.td_table_info);
628
td->x.z.td_table_info = NULL;
630
td->x.z.td_curr_ptr = NULL;
633
td->td_tab_path = NULL;
636
xtPublic void xt_tab_init_db(XTThreadPtr self, XTDatabaseHPtr db)
639
XTTableEntryRec te_tab;
640
XTTableEntryPtr te_ptr;
641
XTTablePathPtr db_path;
647
pushr_(xt_tab_exit_db, db);
648
if (pbxt_ignore_case)
649
db->db_tables = xt_new_hashtable(self, tab_list_comp_ci, tab_list_hash_ci, tab_list_free, TRUE, TRUE);
651
db->db_tables = xt_new_hashtable(self, tab_list_comp, tab_list_hash, tab_list_free, TRUE, TRUE);
652
db->db_table_by_id = xt_new_sortedlist(self, sizeof(XTTableEntryRec), 20, 20, tab_comp_by_id, db, tab_free_by_id, FALSE, FALSE);
653
db->db_table_paths = xt_new_sortedlist(self, sizeof(XTTablePathPtr), 20, 20, tab_comp_path, db, tab_free_path, FALSE, FALSE);
654
db->db_error_list = xt_new_sortedlist(self, sizeof(XTTableError), 20, 20, tab_comp_tab_error, db, NULL, TRUE, FALSE);
656
if (db->db_multi_path) {
658
char *buffer, *ptr, *path;
660
xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
661
xt_add_tables_file(PATH_MAX, pbuf);
662
if (!xt_fs_exists(pbuf)) {
663
/* Load the location file, if a tables file does not
666
xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
667
xt_add_location_file(PATH_MAX, pbuf);
668
if (xt_fs_exists(pbuf)) {
669
of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
670
pushr_(xt_close_file, of);
671
len = (int) xt_seek_eof_file(self, of);
672
buffer = (char *) xt_malloc(self, len + 1);
673
pushr_(xt_free, buffer);
674
if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
679
/* Ignore preceeding space: */
680
while (*ptr && isspace(*ptr))
683
while (*ptr && *ptr != '\n' && *ptr != '\r') {
685
/* Undo the conversion below: */
691
if (*path != '#' && ptr > path) {
692
len = (int) (ptr - path);
693
db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
694
db_path->tp_tab_count = 0;
695
memcpy(db_path->tp_path, path, len);
696
db_path->tp_path[len] = 0;
697
xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
701
freer_(); // xt_free(buffer)
702
freer_(); // xt_close_file(of)
707
len = (int) strlen(db->db_main_path);
708
db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
709
db_path->tp_tab_count = 0;
710
strcpy(db_path->tp_path, db->db_main_path);
711
xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
714
xt_describe_tables_init(self, db, &desc);
715
pushr_(xt_describe_tables_exit, &desc);
716
while (xt_describe_tables_next(self, &desc)) {
717
te_tab.te_tab_id = desc.td_tab_id;
718
te_tab.te_heap_tab = desc.td_heap_tab;
720
if (te_tab.te_tab_id > db->db_curr_tab_id)
721
db->db_curr_tab_id = te_tab.te_tab_id;
723
te_tab.te_tab_name = xt_dup_string(self, desc.td_tab_name);
724
te_tab.te_tab_path = desc.td_tab_path;
725
desc.td_tab_path->tp_tab_count++;
726
te_tab.te_table = NULL;
727
te_tab.te_type = desc.td_tab_type;
728
xt_sl_insert(self, db->db_table_by_id, &desc.td_tab_id, &te_tab);
730
freer_(); // xt_describe_tables_exit(&desc)
733
* When we open all tables, we ignore problems with foreign keys.
734
* This must be done or we will not be able to load tables that
735
* were created with foreign key checks off.
737
self->st_ignore_fkeys = 1;
739
* The purpose of this code is to ensure that all tables are opened and cached,
740
* which is actually only required if tables have foreign key references.
742
* In other words, a side affect of this code is that FK references between tables
743
* are registered, and checked.
745
* Unfortunately we don't know if a table is referenced by a FK, so we have to open
748
* Cannot open tables in the loop above because db->db_table_by_id which is built
749
* above is used by xt_use_table_no_lock()
752
* NOTE: The code also lead to the statistics failing to work because
753
* the tables were already open when the handler was opened.
754
* Previously we only caclulated statistics when a handler was opened
755
* and the underlying table was also opened.
759
xt_enum_tables_init(&edx);
760
while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
761
xt_strcpy(PATH_MAX, pbuf, te_ptr->te_tab_path->tp_path);
762
xt_add_dir_char(PATH_MAX, pbuf);
763
xt_strcat(PATH_MAX, pbuf, te_ptr->te_tab_name);
764
if ((tab = xt_use_table_no_lock_ns(db, (XTPathStrPtr) pbuf, FALSE, FALSE, NULL)))
765
xt_heap_release_ns(tab);
767
xt_log_and_clear_warning(self);
769
self->st_ignore_fkeys = 0;
771
popr_(); // Discard xt_tab_exit_db(db)
775
static void tab_save_tables(XTThreadPtr self, XTDatabaseHPtr db)
777
XTTableEntryPtr te_ptr;
778
XTStringBufferRec buffer;
782
memset(&buffer, 0, sizeof(buffer));
784
xt_strcpy(PATH_MAX, path, db->db_main_path);
785
xt_add_tables_file(PATH_MAX, path);
787
if (xt_sl_get_size(db->db_table_by_id)) {
788
pushr_(xt_sb_free, &buffer);
789
for (u_int i=0; i<xt_sl_get_size(db->db_table_by_id); i++) {
790
te_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, i);
791
xt_sb_concat(self, &buffer, "[table]\n");
792
xt_sb_concat(self, &buffer, "id=");
793
xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_tab_id);
794
xt_sb_concat(self, &buffer, "\n");
795
xt_sb_concat(self, &buffer, "name=");
796
xt_sb_concat(self, &buffer, te_ptr->te_tab_name);
797
xt_sb_concat(self, &buffer, "\n");
798
xt_sb_concat(self, &buffer, "location=");
799
xt_sb_concat(self, &buffer, te_ptr->te_tab_path->tp_path);
800
xt_sb_concat(self, &buffer, "\n");
801
xt_sb_concat(self, &buffer, "storage=");
802
if (te_ptr->te_heap_tab)
803
xt_sb_concat(self, &buffer, "heap\n");
805
xt_sb_concat(self, &buffer, "disk\n");
806
xt_sb_concat(self, &buffer, "type=");
807
xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_type);
808
xt_sb_concat(self, &buffer, "\n");
812
/* To make the location file cross-platform (at least
813
* as long as relative paths are used) we replace all '\'
817
ptr = buffer.sb_cstring;
824
of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
825
pushr_(xt_close_file, of);
826
if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
828
xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
829
freer_(); // xt_close_file(of)
831
freer_(); // xt_sb_free(&buffer);
834
xt_fs_delete(NULL, path);
837
static void tab_save_table_paths(XTThreadPtr self, XTDatabaseHPtr db)
839
XTTablePathPtr *tp_ptr;
840
XTStringBufferRec buffer;
844
memset(&buffer, 0, sizeof(buffer));
846
xt_strcpy(PATH_MAX, path, db->db_main_path);
847
xt_add_location_file(PATH_MAX, path);
849
if (xt_sl_get_size(db->db_table_paths)) {
850
pushr_(xt_sb_free, &buffer);
851
for (u_int i=0; i<xt_sl_get_size(db->db_table_paths); i++) {
852
tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, i);
853
xt_sb_concat(self, &buffer, (*tp_ptr)->tp_path);
854
xt_sb_concat(self, &buffer, "\n");
858
/* To make the location file cross-platform (at least
859
* as long as relative paths are used) we replace all '\'
863
ptr = buffer.sb_cstring;
871
of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
872
pushr_(xt_close_file, of);
873
if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
875
xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
876
freer_(); // xt_close_file(of)
878
freer_(); // xt_sb_free(&buffer);
881
xt_fs_delete(NULL, path);
884
static XTTablePathPtr tab_get_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr tab_name, xtBool save_it)
886
XTTablePathPtr *tp, tab_path;
889
xt_strcpy(PATH_MAX, path, tab_name->ps_path);
890
xt_remove_last_name_of_path(path);
891
xt_remove_dir_char(path);
892
tp = (XTTablePathPtr *) xt_sl_find(self, db->db_table_paths, path);
896
int len = (int) strlen(path);
898
tab_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
899
tab_path->tp_tab_count = 0;
900
memcpy(tab_path->tp_path, path, len);
901
tab_path->tp_path[len] = 0;
902
xt_sl_insert(self, db->db_table_paths, tab_path->tp_path, &tab_path);
904
tab_save_table_paths(self, db);
905
if (xt_sl_get_size(db->db_table_paths) == 1) {
906
XTSystemTableShare::createSystemTables(self, db);
910
tab_path->tp_tab_count++;
914
static void tab_remove_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTTablePathPtr tab_path)
916
if (tab_path->tp_tab_count > 0) {
917
tab_path->tp_tab_count--;
918
if (tab_path->tp_tab_count == 0) {
919
xt_sl_delete(self, db->db_table_paths, tab_path->tp_path);
920
tab_save_table_paths(self, db);
925
static void tab_free_table_path(XTThreadPtr self, XTTablePathPtr tab_path)
927
XTDatabaseHPtr db = self->st_database;
929
tab_remove_table_path(self, db, tab_path);
932
xtPublic void xt_tab_exit_db(XTThreadPtr self, XTDatabaseHPtr db)
935
xt_free_hashtable(self, db->db_tables);
936
db->db_tables = NULL;
938
if (db->db_table_by_id) {
939
xt_free_sortedlist(self, db->db_table_by_id);
940
db->db_table_by_id = NULL;
942
if (db->db_table_paths) {
943
xt_free_sortedlist(self, db->db_table_paths);
944
db->db_table_paths = NULL;
946
if (db->db_error_list) {
947
xt_free_sortedlist(self, db->db_error_list);
948
db->db_error_list = NULL;
953
xtPublic xtBool xt_table_exists(XTDatabaseHPtr db)
955
return xt_sl_get_size(db->db_table_by_id) > 0;
959
* Enumerate all tables in the current database.
962
xtPublic void xt_enum_tables_init(u_int *edx)
967
xtPublic XTTableEntryPtr xt_enum_tables_next(XTThreadPtr XT_UNUSED(self), XTDatabaseHPtr db, u_int *edx)
969
XTTableEntryPtr en_ptr;
971
if (*edx >= xt_sl_get_size(db->db_table_by_id))
973
en_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, *edx);
978
xtPublic void xt_enum_files_of_tables_init(XTPathStrPtr tab_name, xtTableID tab_id, XTFilesOfTablePtr ft)
981
ft->ft_tab_name = tab_name;
982
ft->ft_tab_id = tab_id;
985
xtPublic xtBool xt_enum_files_of_tables_next(XTFilesOfTablePtr ft)
987
char file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
990
switch (ft->ft_state) {
992
tab_get_row_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
995
tab_get_data_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
998
tab_get_index_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
1005
xt_strcpy(PATH_MAX, ft->ft_file_path, ft->ft_tab_name->ps_path);
1006
xt_remove_last_name_of_path(ft->ft_file_path);
1007
xt_strcat(PATH_MAX, ft->ft_file_path, file_name);
1008
if (!xt_fs_exists(ft->ft_file_path))
1014
static xtBool tab_find_table(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtTableID *tab_id)
1017
XTTableEntryPtr te_ptr;
1018
char path[PATH_MAX];
1020
xt_enum_tables_init(&edx);
1021
while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
1022
xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
1023
xt_add_dir_char(PATH_MAX, path);
1024
xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
1025
if (xt_tab_compare_names(path, name->ps_path) == 0) {
1026
*tab_id = te_ptr->te_tab_id;
1033
xtPublic void xt_tab_disable_index(XTTableHPtr tab, u_int ind_error)
1035
tab->tab_dic.dic_disable_index = ind_error;
1036
xt_tab_set_table_repair_pending(tab);
1039
xtPublic void xt_tab_set_index_error(XTTableHPtr tab)
1041
switch (tab->tab_dic.dic_disable_index) {
1044
case XT_INDEX_TOO_OLD:
1045
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_OLD_VERSION, tab->tab_name);
1047
case XT_INDEX_TOO_NEW:
1048
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NEW_VERSION, tab->tab_name);
1050
case XT_INDEX_BAD_BLOCK:
1053
sprintf(number, "%d", (int) tab->tab_index_page_size);
1054
xt_register_i2xterr(XT_REG_CONTEXT, XT_ERR_BAD_IND_BLOCK_SIZE, xt_last_name_of_path(tab->tab_name->ps_path), number);
1056
case XT_INDEX_CORRUPTED:
1057
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, tab->tab_name);
1059
case XT_INDEX_MISSING:
1060
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_MISSING, tab->tab_name);
1062
case XT_INDEX_NOT_RECOVERED:
1063
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NOT_RECOVERED, tab->tab_name);
1068
static void tab_load_index_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file, XTPathStrPtr table_name)
1073
XTIndexFormatDPtr index_fmt;
1075
/* Load the pointers: */
1076
if (tab->tab_index_head)
1077
xt_free_ns(tab->tab_index_head);
1078
tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc(self, XT_INDEX_HEAD_SIZE);
1081
if (!xt_pread_file(file, 0, XT_INDEX_HEAD_SIZE, 0, tab->tab_index_head, NULL, &self->st_statistics.st_ind, self))
1084
tab->tab_index_format_offset = XT_GET_DISK_4(tab->tab_index_head->tp_format_offset_4);
1085
index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
1087
/* If the table version is less than or equal to an incompatible (unsupported
1088
* version), or greater than the current version, then we cannot open this table
1090
if (XT_GET_DISK_2(index_fmt->if_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
1091
XT_GET_DISK_2(index_fmt->if_tab_version_2) > XT_TAB_CURRENT_VERSION) {
1092
switch (XT_GET_DISK_2(index_fmt->if_tab_version_2)) {
1094
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
1097
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
1100
xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
1106
tab->tab_dic.dic_index_ver = XT_GET_DISK_2(index_fmt->if_ind_version_2);
1107
tab->tab_dic.dic_disable_index = XT_INDEX_OK;
1109
if (tab->tab_dic.dic_index_ver == 1) {
1110
tab->tab_index_header_size = 1024 * 16;
1111
tab->tab_index_page_size = 1024 * 16;
1114
tab->tab_index_header_size = XT_GET_DISK_4(tab->tab_index_head->tp_header_size_4);
1115
tab->tab_index_page_size = XT_GET_DISK_4(index_fmt->if_page_size_4);
1118
#ifdef XT_USE_LAZY_DELETE
1119
if (tab->tab_dic.dic_index_ver <= XT_IND_NO_LAZY_DELETE)
1120
tab->tab_dic.dic_no_lazy_delete = TRUE;
1122
tab->tab_dic.dic_no_lazy_delete = FALSE;
1124
tab->tab_dic.dic_no_lazy_delete = TRUE;
1127
/* Incorrect version of index is handled by allowing a sequential scan, but no index access.
1128
* Recovery with the wrong index type will not recover the indexes, a REPAIR TABLE
1131
if (tab->tab_dic.dic_index_ver != XT_IND_CURRENT_VERSION) {
1132
switch (tab->tab_dic.dic_index_ver) {
1133
case XT_IND_NO_LAZY_DELETE:
1134
case XT_IND_LAZY_DELETE_OK:
1135
/* I can handle this type of index. */
1138
if (tab->tab_dic.dic_index_ver < XT_IND_CURRENT_VERSION)
1139
xt_tab_disable_index(tab, XT_INDEX_TOO_OLD);
1141
xt_tab_disable_index(tab, XT_INDEX_TOO_NEW);
1145
else if (tab->tab_index_page_size != XT_INDEX_PAGE_SIZE)
1146
xt_tab_disable_index(tab, XT_INDEX_BAD_BLOCK);
1149
memset(tab->tab_index_head, 0, XT_INDEX_HEAD_SIZE);
1150
xt_tab_disable_index(tab, XT_INDEX_MISSING);
1151
tab->tab_index_header_size = XT_INDEX_HEAD_SIZE;
1152
tab->tab_index_page_size = XT_INDEX_PAGE_SIZE;
1153
tab->tab_dic.dic_index_ver = 0;
1154
tab->tab_index_format_offset = 0;
1158
if (tab->tab_dic.dic_disable_index) {
1159
xt_tab_set_index_error(tab);
1160
xt_log_and_clear_exception_ns();
1163
if (tab->tab_dic.dic_disable_index) {
1164
/* Reset, as if we have empty indexes.
1165
* Flush will wipe things out, of course.
1166
* REPAIR TABLE will be required...
1168
XT_NODE_ID(tab->tab_ind_eof) = 1;
1169
XT_NODE_ID(tab->tab_ind_free) = 0;
1171
ind = tab->tab_dic.dic_keys;
1172
for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++)
1173
XT_NODE_ID((*ind)->mi_root) = 0;
1176
XT_NODE_ID(tab->tab_ind_eof) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_eof_6);
1177
XT_NODE_ID(tab->tab_ind_free) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_free_6);
1179
data = tab->tab_index_head->tp_data;
1180
ind = tab->tab_dic.dic_keys;
1181
for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
1182
(*ind)->mi_root = XT_GET_NODE_REF(tab, data);
1183
data += XT_NODE_REF_SIZE;
1188
static void tab_load_table_format(XTThreadPtr self, XTOpenFilePtr file, XTPathStrPtr table_name, size_t *ret_format_offset, size_t *ret_head_size, XTDictionaryPtr dic)
1190
XTDiskValue4 size_buf;
1192
XTTableFormatDRec tab_fmt;
1195
if (!xt_pread_file(file, 0, 4, 4, &size_buf, NULL, &self->st_statistics.st_rec, self))
1198
head_size = XT_GET_DISK_4(size_buf);
1199
*ret_format_offset = head_size;
1201
/* Load the table format information: */
1202
if (!xt_pread_file(file, head_size, offsetof(XTTableFormatDRec, tf_definition), offsetof(XTTableFormatDRec, tf_tab_version_2) + 2, &tab_fmt, NULL, &self->st_statistics.st_rec, self))
1205
/* If the table version is less than or equal to an incompatible (unsupported
1206
* version), or greater than the current version, then we cannot open this table
1208
if (XT_GET_DISK_2(tab_fmt.tf_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
1209
XT_GET_DISK_2(tab_fmt.tf_tab_version_2) > XT_TAB_CURRENT_VERSION) {
1210
switch (XT_GET_DISK_2(tab_fmt.tf_tab_version_2)) {
1212
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
1215
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
1218
xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
1224
fmt_size = XT_GET_DISK_4(tab_fmt.tf_format_size_4);
1225
*ret_head_size = XT_GET_DISK_4(tab_fmt.tf_tab_head_size_4);
1226
dic->dic_rec_size = XT_GET_DISK_4(tab_fmt.tf_rec_size_4);
1227
dic->dic_rec_fixed = XT_GET_DISK_1(tab_fmt.tf_rec_fixed_1);
1228
dic->dic_min_auto_inc = XT_GET_DISK_8(tab_fmt.tf_min_auto_inc_8);
1229
if (fmt_size > offsetof(XTTableFormatDRec, tf_definition)) {
1230
size_t def_size = fmt_size - offsetof(XTTableFormatDRec, tf_definition);
1233
pushsr_(def_sql, xt_free, (char *) xt_malloc(self, def_size));
1234
if (!xt_pread_file(file, head_size+offsetof(XTTableFormatDRec, tf_definition), def_size, def_size, def_sql, NULL, &self->st_statistics.st_rec, self))
1236
dic->dic_table = xt_ri_create_table(self, false, table_name, def_sql, myxt_create_table_from_table(self, dic->dic_my_table), NULL);
1237
freer_(); // xt_free(def_sql)
1240
dic->dic_table = myxt_create_table_from_table(self, dic->dic_my_table);
1243
static void tab_load_table_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file)
1245
XTTableHeadDRec rec_head;
1247
if (!xt_pread_file(file, 0, sizeof(XTTableHeadDRec), sizeof(XTTableHeadDRec), (xtWord1 *) &rec_head, NULL, &self->st_statistics.st_rec, self))
1250
tab->tab_head_op_seq = XT_GET_DISK_4(rec_head.th_op_seq_4);
1251
tab->tab_head_row_free_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_free_6);
1252
tab->tab_head_row_eof_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_eof_6);
1253
tab->tab_head_row_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_row_fnum_6);
1254
tab->tab_head_rec_free_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_free_6);
1255
tab->tab_head_rec_eof_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_eof_6);
1256
tab->tab_head_rec_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_rec_fnum_6);
1257
tab->tab_wr_op_seq = tab->tab_head_op_seq;
1260
xtPublic void xt_tab_store_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
1262
XTTableHPtr tab = ot->ot_table;
1264
XT_SET_DISK_4(rec_head->th_op_seq_4, tab->tab_head_op_seq);
1265
XT_SET_DISK_6(rec_head->th_row_free_6, tab->tab_head_row_free_id);
1266
XT_SET_DISK_6(rec_head->th_row_eof_6, tab->tab_head_row_eof_id);
1267
XT_SET_DISK_6(rec_head->th_row_fnum_6, tab->tab_head_row_fnum);
1268
XT_SET_DISK_6(rec_head->th_rec_free_6, tab->tab_head_rec_free_id);
1269
XT_SET_DISK_6(rec_head->th_rec_eof_6, tab->tab_head_rec_eof_id);
1270
XT_SET_DISK_6(rec_head->th_rec_fnum_6, tab->tab_head_rec_fnum);
1273
static xtBool tab_write_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
1275
if (!xt_tab_write_rec(ot, offsetof(XTTableHeadDRec, th_op_seq_4), 40, (xtWord1 *) rec_head->th_op_seq_4))
1277
if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
1282
xtPublic xtBool xt_tab_write_min_auto_inc(XTOpenTablePtr ot)
1287
XT_SET_DISK_8(value, ot->ot_table->tab_dic.dic_min_auto_inc);
1288
offset = ot->ot_table->tab_table_format_offset + offsetof(XTTableFormatDRec, tf_min_auto_inc_8);
1289
if (!xt_tab_write_rec(ot, offset, 8, value))
1291
if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
1296
/* a helper function to remove table from the open tables hash on exception
1297
* used in tab_new_handle() below
1299
#ifdef NO_LONGER_REQ
1300
static void xt_del_from_db_tables_ht(XTThreadPtr self, XTTableHPtr tab)
1302
XTTableEntryPtr te_ptr;
1303
XTDatabaseHPtr db = tab->tab_db;
1304
xtTableID tab_id = tab->tab_id;
1306
/* Oops! should use tab->tab_name, instead of tab! */
1307
xt_ht_del(self, db->db_tables, tab->tab_name);
1309
/* Remove the reference from the ID list, when a table is
1310
* removed from the table name list:
1312
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id)))
1313
te_ptr->te_table = NULL;
1317
xtPublic XTFileType xt_rec_file_type(xtBool heap_tab)
1321
if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1322
return XT_FT_REWRITE_FLUSH;
1323
return XT_REC_FILE_TYPE;
1326
xtPublic XTFileType xt_row_file_type(xtBool heap_tab)
1330
if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1331
return XT_FT_REWRITE_FLUSH;
1332
return XT_ROW_FILE_TYPE;
1335
xtPublic XTFileType xt_ind_file_type(xtBool heap_tab)
1339
if (XT_IND_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1340
return XT_FT_REWRITE_FLUSH;
1341
return XT_IND_FILE_TYPE;
1344
#ifdef XT_SORT_REC_WRITES
1345
static int tab_cmp_dw_rec_id(struct XTThread *XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
1347
xtRecordID rec_id = *((xtRecordID *) a);
1348
XTDelayWritePtr dw_ptr = (XTDelayWritePtr) b;
1350
if (rec_id == dw_ptr->dw_rec_id)
1352
if (rec_id < dw_ptr->dw_rec_id)
1359
* Create a new table handle (i.e. open a table).
1360
* Return NULL if the table is missing, and it is OK for the table
1363
static int tab_new_handle(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id, XTPathStrPtr tab_path, xtBool missing_ok, XTDictionaryPtr dic)
1365
char path[PATH_MAX];
1367
char file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
1368
XTOpenFilePtr of_rec, of_ind;
1369
XTTableEntryPtr te_ptr;
1370
size_t tab_format_offset;
1371
size_t tab_head_size = 0;
1375
te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
1378
tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
1379
pushr_(xt_heap_release, tab);
1381
tab->tab_name = (XTPathStrPtr) xt_dup_string(self, tab_path->ps_path);
1383
tab->tab_id = tab_id;
1384
tab->tab_dic.dic_table_type = te_ptr->te_type;
1385
#ifdef TRACE_TABLE_IDS
1386
PRINTF("%s: allocated TABLE: db=%d tab=%d %s\n", self->t_name, (int) db->db_id, (int) tab->tab_id, xt_last_2_names_of_path(tab->tab_name->ps_path));
1390
myxt_move_dictionary(&tab->tab_dic, dic);
1391
myxt_setup_dictionary(self, &tab->tab_dic);
1394
if (!myxt_load_dictionary(self, &tab->tab_dic, db, tab_path)) {
1395
freer_(); // xt_heap_release(tab)
1396
return_(XT_TAB_NO_DICTIONARY);
1400
/* Do not use the XT_TF_DDL_TEMP_TABLE bit from the given dic
1401
* This bit depends only on the
1402
* name of the table, and must be set explicitly.
1404
if (myxt_temp_table_name(tab_path->ps_path))
1405
tab->tab_dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
1407
tab->tab_dic.dic_tab_flags &= ~XT_TF_DDL_TEMP_TABLE;
1409
tab->tab_seq.xt_op_seq_init(self);
1410
xt_spinlock_init_with_autoname(self, &tab->tab_ainc_lock);
1411
xt_init_mutex_with_autoname(self, &tab->tab_rec_flush_lock);
1412
xt_init_mutex_with_autoname(self, &tab->tab_ind_flush_lock);
1413
xt_init_mutex_with_autoname(self, &tab->tab_ind_stat_lock);
1414
xt_init_mutex_with_autoname(self, &tab->tab_dic_field_lock);
1415
xt_init_mutex_with_autoname(self, &tab->tab_row_lock);
1416
xt_init_mutex_with_autoname(self, &tab->tab_ind_lock);
1417
xt_init_mutex_with_autoname(self, &tab->tab_rec_lock);
1418
xt_spinlock_init_with_autoname(self, &tab->tab_mem_lock);
1419
if (!(tab->tab_rec_flush_task = new XTFlushRecRowTask()))
1420
xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
1421
tab->tab_rec_flush_task->tk_init(self);
1422
tab->tab_rec_flush_task->frt_table = tab;
1423
if (!(tab->tab_ind_flush_task = new XTFlushIndexTask()))
1424
xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
1425
tab->tab_ind_flush_task->tk_init(self);
1426
tab->tab_ind_flush_task->fit_table = tab;
1427
for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
1428
XT_TAB_ROW_INIT_LOCK(self, &tab->tab_row_rwlock[i]);
1429
tab->tab_free_locks = TRUE;
1431
xt_strcpy(PATH_MAX, path, tab_path->ps_path);
1432
xt_remove_last_name_of_path(path);
1433
tab_get_row_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1434
xt_strcat(PATH_MAX, path, file_name);
1435
tab->tab_row_file = xt_fs_get_file(self, path, xt_row_file_type(te_ptr->te_heap_tab));
1437
xt_remove_last_name_of_path(path);
1438
tab_get_data_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1439
xt_strcat(PATH_MAX, path, file_name);
1440
tab->tab_rec_file = xt_fs_get_file(self, path, xt_rec_file_type(te_ptr->te_heap_tab));
1442
xt_remove_last_name_of_path(path);
1443
tab_get_index_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1444
xt_strcat(PATH_MAX, path, file_name);
1445
tab->tab_ind_file = xt_fs_get_file(self, path, xt_ind_file_type(te_ptr->te_heap_tab));
1447
if (te_ptr->te_heap_tab) {
1448
XTOpenFilePtr of_row;
1450
tab->tab_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
1451
of_row = xt_open_file(self, tab->tab_row_file->fil_path, xt_row_file_type(TRUE), XT_FS_CREATE, xt_db_row_file_grow_size);
1452
pushr_(xt_close_file, of_row);
1453
if (xt_seek_eof_file(self, of_row) == 0)
1454
tab_init_row_file(self, of_row, tab, &tab->tab_dic);
1455
freer_(); // xt_close_file(of_row)
1457
of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(TRUE), XT_FS_CREATE, XT_INDEX_PAGE_SIZE*256);
1458
if (xt_seek_eof_file(self, of_ind) == 0)
1459
tab_init_ind_file(self, of_ind, tab, &tab->tab_dic);
1460
pushr_(xt_close_file, of_ind);
1461
tab_load_index_header(self, tab, of_ind, tab_path);
1462
freer_(); // xt_close_file(of_ind)
1464
of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(te_ptr->te_heap_tab), XT_FS_CREATE, xt_db_data_file_grow_size);
1465
pushr_(xt_close_file, of_rec);
1466
if (xt_seek_eof_file(self, of_rec) == 0)
1467
tab_init_data_file(self, of_rec, tab, &tab->tab_dic, 0, NULL);
1470
#ifdef XT_SORT_REC_WRITES
1471
tab->tab_rec_dw_writes = xt_new_sortedlist(self, sizeof(XTDelayWriteRec), 20, 10, tab_cmp_dw_rec_id, NULL, NULL, TRUE, FALSE);
1473
of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(FALSE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
1475
pushr_(xt_close_file, of_ind);
1476
tab_load_index_header(self, tab, of_ind, tab_path);
1477
freer_(); // xt_close_file(of_ind)
1480
tab_load_index_header(self, tab, of_ind, tab_path);
1482
of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(FALSE), missing_ok ? XT_FS_MISSING_OK : XT_FS_DEFAULT, xt_db_data_file_grow_size);
1484
freer_(); // xt_heap_release(tab)
1485
return_(XT_TAB_NOT_FOUND);
1487
pushr_(xt_close_file, of_rec);
1490
tab_load_table_format(self, of_rec, tab_path, &tab_format_offset, &tab_head_size, &tab->tab_dic);
1491
tab->tab_table_format_offset = tab_format_offset;
1492
tab->tab_table_head_size = tab_head_size;
1493
tab->tab_dic.dic_table->dt_table = tab;
1494
tab_load_table_header(self, tab, of_rec);
1495
freer_(); // xt_close_file(of_rec)
1497
tab->tab_seq.xt_op_seq_set(self, tab->tab_wr_op_seq+1);
1498
tab->tab_row_eof_id = tab->tab_head_row_eof_id;
1499
tab->tab_row_free_id = tab->tab_head_row_free_id;
1500
tab->tab_row_fnum = tab->tab_head_row_fnum;
1501
tab->tab_rec_eof_id = tab->tab_head_rec_eof_id;
1502
tab->tab_rec_free_id = tab->tab_head_rec_free_id;
1503
tab->tab_rec_fnum = tab->tab_head_rec_fnum;
1505
tab->tab_rows.xt_tc_setup(tab, FALSE, sizeof(XTTabRowHeadDRec), sizeof(XTTabRowRefDRec));
1506
tab->tab_recs.xt_tc_setup(tab, TRUE, tab_head_size, tab->tab_dic.dic_rec_size);
1508
xt_xres_init_tab(self, tab);
1510
if (!xt_init_row_locks(&tab->tab_locks))
1513
xt_heap_set_release_callback(tab, tab_onrelease);
1515
tab->tab_repair_pending = xt_tab_is_table_repair_pending(tab);
1517
popr_(); // Discard xt_heap_release(tab)
1519
xt_ht_put(self, db->db_tables, tab);
1521
/* Add a reference to the ID list, when a table is
1522
* added to the table name list:
1524
te_ptr->te_table = tab;
1526
/* Moved from after xt_init_row_locks() above, so that calling
1527
* xt_use_table_no_lock() with no_load == FALSE from attachReferences()
1528
* will work if we have cyclic foreign key references.
1530
if (tab->tab_dic.dic_table) {
1532
tab->tab_dic.dic_table->attachReferences(self, db);
1535
/* Errors are thrown when: set foreign_key_checks = 1 */
1536
/* Undo everything done above: */
1537
xt_ht_del(self, db->db_tables, tab->tab_name);
1548
* Get a reference to a table in the current database. The table reference is valid,
1549
* as long as the thread is using the database!!!
1551
xtPublic XTTableHPtr xt_use_table_no_lock(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
1556
xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
1558
tab = (XTTableHPtr) xt_ht_get(self, db->db_tables, name);
1559
if (!tab && !no_load) {
1560
xtTableID tab_id = 0;
1562
if (!tab_find_table(self, db, name, &tab_id)) {
1565
xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
1568
switch (tab_new_handle(self, &tab, db, tab_id, name, missing_ok, dic)) {
1569
case XT_TAB_NO_DICTIONARY:
1570
xt_throw_taberr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, name);
1571
case XT_TAB_POOL_CLOSED:
1572
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
1573
case XT_TAB_NOT_FOUND:
1576
xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
1583
xt_heap_reference(self, tab);
1588
xtPublic XTTableHPtr xt_use_table_no_lock_ns(struct XTDatabase *db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
1591
XTThreadPtr self = xt_get_self();
1594
tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, dic);
1603
static void tab_close_table(XTOpenTablePtr ot)
1605
xt_ind_free_reserved(ot);
1607
if (ot->ot_rec_file) {
1608
XT_CLOSE_RR_FILE_NS(ot->ot_rec_file);
1609
ot->ot_rec_file = NULL;
1612
if (ot->ot_ind_file) {
1613
xt_close_file_ns(ot->ot_ind_file);
1614
ot->ot_ind_file = NULL;
1617
if (ot->ot_row_file) {
1618
XT_CLOSE_RR_FILE_NS(ot->ot_row_file);
1619
ot->ot_row_file = NULL;
1623
xt_heap_release(xt_get_self(), ot->ot_table);
1624
ot->ot_table = NULL;
1626
if (ot->ot_ind_rhandle) {
1627
xt_ind_release_handle(ot->ot_ind_rhandle, FALSE, ot->ot_thread);
1628
ot->ot_ind_rhandle = NULL;
1630
if (ot->ot_row_rbuffer) {
1631
xt_free_ns(ot->ot_row_rbuffer);
1632
ot->ot_row_rbuf_size = 0;
1633
ot->ot_row_rbuffer = NULL;
1635
if (ot->ot_row_wbuffer) {
1636
xt_free_ns(ot->ot_row_wbuffer);
1637
ot->ot_row_wbuf_size = 0;
1638
ot->ot_row_wbuffer = NULL;
1640
#ifdef XT_TRACK_RETURNED_ROWS
1641
if (ot->ot_rows_returned) {
1642
xt_free_ns(ot->ot_rows_returned);
1643
ot->ot_rows_returned = NULL;
1645
ot->ot_rows_ret_curr = 0;
1646
ot->ot_rows_ret_max = 0;
1651
static void tab_delete_table_files(XTThreadPtr self, XTPathStrPtr tab_name, xtTableID tab_id)
1653
XTFilesOfTableRec ft;
1655
xt_enum_files_of_tables_init(tab_name, tab_id, &ft);
1656
while (xt_enum_files_of_tables_next(&ft)) {
1657
if (!xt_fs_delete(NULL, ft.ft_file_path))
1658
xt_log_and_clear_exception(self);
1662
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr XT_UNUSED(dic))
1664
XTTabRowHeadDRec row_head;
1666
tab->tab_row_eof_id = 1;
1667
tab->tab_row_free_id = 0;
1668
tab->tab_row_fnum = 0;
1670
tab->tab_head_row_eof_id = 1;
1671
tab->tab_head_row_free_id = 0;
1672
tab->tab_head_row_fnum = 0;
1674
XT_SET_DISK_4(row_head.rh_magic_4, XT_TAB_ROW_MAGIC);
1675
if (!xt_pwrite_file(of_row, 0, sizeof(row_head), &row_head, &self->st_statistics.st_rec, self))
1679
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def)
1682
XTTableHeadDRec rec_head;
1683
XTTableFormatDRec table_fmt;
1685
/* Calculate the offset of the first record in the data handle file. */
1686
eof = sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition) + def_len + XT_FORMAT_DEF_SPACE;
1687
eof = (eof + 1024 - 1) / 1024 * 1024; // Round to a value divisible by 1024
1689
tab->tab_table_format_offset = sizeof(XTTableHeadDRec);
1690
tab->tab_table_head_size = (size_t) eof;
1692
tab->tab_rec_eof_id = 1; // This is the first record ID!
1693
tab->tab_rec_free_id = 0;
1694
tab->tab_rec_fnum = 0;
1696
tab->tab_head_rec_eof_id = 1; // The first record ID
1697
tab->tab_head_rec_free_id = 0;
1698
tab->tab_head_rec_fnum = 0;
1700
tab->tab_dic.dic_rec_size = dic->dic_rec_size;
1701
tab->tab_dic.dic_rec_fixed = dic->dic_rec_fixed;
1702
tab->tab_dic.dic_tab_flags = dic->dic_tab_flags;
1703
tab->tab_dic.dic_min_auto_inc = dic->dic_min_auto_inc;
1704
tab->tab_dic.dic_def_ave_row_size = dic->dic_def_ave_row_size;
1705
tab->tab_dic.dic_table_type = dic->dic_table_type;
1707
XT_SET_DISK_4(rec_head.th_head_size_4, sizeof(XTTableHeadDRec));
1708
XT_SET_DISK_4(rec_head.th_op_seq_4, tab->tab_head_op_seq);
1709
XT_SET_DISK_6(rec_head.th_row_free_6, tab->tab_head_row_free_id);
1710
XT_SET_DISK_6(rec_head.th_row_eof_6, tab->tab_head_row_eof_id);
1711
XT_SET_DISK_6(rec_head.th_row_fnum_6, tab->tab_head_row_fnum);
1712
XT_SET_DISK_6(rec_head.th_rec_free_6, tab->tab_head_rec_free_id);
1713
XT_SET_DISK_6(rec_head.th_rec_eof_6, tab->tab_head_rec_eof_id);
1714
XT_SET_DISK_6(rec_head.th_rec_fnum_6, tab->tab_head_rec_fnum);
1716
if (!xt_pwrite_file(of_rec, 0, sizeof(XTTableHeadDRec), &rec_head, &self->st_statistics.st_rec, self))
1719
/* Store the table format: */
1720
memset(&table_fmt, 0, offsetof(XTTableFormatDRec, tf_definition));
1721
XT_SET_DISK_4(table_fmt.tf_format_size_4, offsetof(XTTableFormatDRec, tf_definition) + def_len);
1722
XT_SET_DISK_4(table_fmt.tf_tab_head_size_4, eof);
1723
XT_SET_DISK_2(table_fmt.tf_tab_version_2, XT_TAB_CURRENT_VERSION);
1724
XT_SET_DISK_4(table_fmt.tf_rec_size_4, tab->tab_dic.dic_rec_size);
1725
XT_SET_DISK_1(table_fmt.tf_rec_fixed_1, tab->tab_dic.dic_rec_fixed);
1726
XT_SET_DISK_2(table_fmt.tf_tab_unused_2, 0);
1727
XT_SET_DISK_8(table_fmt.tf_min_auto_inc_8, tab->tab_dic.dic_min_auto_inc);
1729
if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec), offsetof(XTTableFormatDRec, tf_definition), &table_fmt, &self->st_statistics.st_rec, self))
1732
if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition), def_len, tab_def->sb_cstring, &self->st_statistics.st_rec, self))
1737
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic)
1739
XTIndexFormatDPtr index_fmt;
1741
/* This is the size of the index header: */
1742
tab->tab_index_format_offset = offsetof(XTIndexHeadDRec, tp_data) + dic->dic_key_count * XT_NODE_REF_SIZE;
1743
if (!(tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc_ns(XT_INDEX_HEAD_SIZE)))
1746
XT_NODE_ID(tab->tab_ind_eof) = 1;
1747
XT_NODE_ID(tab->tab_ind_free) = 0;
1749
XT_SET_DISK_4(tab->tab_index_head->tp_header_size_4, XT_INDEX_HEAD_SIZE);
1750
XT_SET_DISK_4(tab->tab_index_head->tp_format_offset_4, tab->tab_index_format_offset);
1751
XT_SET_DISK_6(tab->tab_index_head->tp_ind_eof_6, XT_NODE_ID(tab->tab_ind_eof));
1752
XT_SET_DISK_6(tab->tab_index_head->tp_ind_free_6, XT_NODE_ID(tab->tab_ind_free));
1754
/* Store the index format: */
1755
index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
1756
XT_SET_DISK_4(index_fmt->if_format_size_4, sizeof(XTIndexFormatDRec));
1757
XT_SET_DISK_2(index_fmt->if_tab_version_2, XT_TAB_CURRENT_VERSION);
1758
XT_SET_DISK_2(index_fmt->if_ind_version_2, XT_IND_CURRENT_VERSION);
1759
XT_SET_DISK_1(index_fmt->if_node_ref_size_1, XT_NODE_REF_SIZE);
1760
XT_SET_DISK_1(index_fmt->if_rec_ref_size_1, XT_RECORD_REF_SIZE);
1761
XT_SET_DISK_4(index_fmt->if_page_size_4, XT_INDEX_PAGE_SIZE);
1763
/* Save the header: */
1764
if (!xt_pwrite_file(of_ind, 0, XT_INDEX_HEAD_SIZE, tab->tab_index_head, &self->st_statistics.st_ind, self))
1768
xtPublic void xt_create_table(XTThreadPtr self, XTPathStrPtr name, XTDictionaryPtr dic)
1770
char table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
1771
char path[PATH_MAX];
1772
XTDatabaseHPtr db = self->st_database;
1773
XTOpenTablePoolPtr table_pool;
1775
XTTableHPtr old_tab = NULL;
1776
xtTableID old_tab_id = 0;
1777
xtTableID tab_id = 0;
1778
XTStringBufferRec tab_def = { 0, 0, 0 };
1779
XTTableEntryRec te_tab;
1780
XTSortedListInfoRec li_undo;
1782
#ifdef TRACE_CREATE_TABLES
1783
printf("CREATE %s\n", name->ps_path);
1786
if (strlen(xt_last_name_of_path(name->ps_path)) > XT_TABLE_NAME_SIZE-1)
1787
xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, name);
1789
xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
1791
/* Lock to prevent table list change during creation. */
1792
table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, name, FALSE, TRUE, TRUE, &old_tab);
1793
pushr_(xt_db_unlock_table_pool, table_pool);
1794
xt_ht_lock(self, db->db_tables);
1795
pushr_(xt_ht_unlock, db->db_tables);
1796
pushr_(xt_heap_release, old_tab);
1798
/* This must be done before we remove the old table
1799
* from the directory, or we will not be able
1800
* to find the table, which could is require
1803
if (xt_sl_get_size(db->db_table_by_id) >= XT_MAX_TABLES)
1804
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TOO_MANY_TABLES, (u_long) XT_MAX_TABLES);
1806
tab_id = db->db_curr_tab_id + 1;
1809
old_tab_id = old_tab->tab_id;
1810
xt_dl_delete_ext_data(self, old_tab, FALSE, TRUE);
1811
freer_(); // xt_heap_release(self, old_tab)
1813
/* For the Windows version this must be done before we
1814
* start to delete the underlying files!
1816
tab_close_files(self, old_tab);
1818
tab_delete_table_files(self, name, old_tab_id);
1820
/* Remove the PBMS table: */
1821
ASSERT(xt_get_self() == self);
1823
/* Remove the table from the directory. It will get a new
1824
* ID so the handle in the directory will no longer be valid.
1826
xt_ht_del(self, db->db_tables, name);
1829
freer_(); // xt_heap_release(self, old_tab)
1832
/* Add the table to the directory, well remove on error! */
1833
li_undo.li_sl = db->db_table_by_id;
1834
li_undo.li_key = &tab_id;
1835
te_tab.te_tab_id = tab_id;
1836
te_tab.te_heap_tab = dic->dic_tab_flags & XT_TF_MEMORY_TABLE;
1837
te_tab.te_tab_name = xt_dup_string(self, xt_last_name_of_path(name->ps_path));
1838
te_tab.te_tab_path = tab_get_table_path(self, db, name, TRUE);
1839
te_tab.te_table = NULL;
1840
te_tab.te_type = dic->dic_table_type;
1841
xt_sl_insert(self, db->db_table_by_id, &tab_id, &te_tab);
1845
XTOpenFilePtr of_row, of_rec, of_ind;
1848
tab_save_tables(self, db);
1850
tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
1851
pushr_(xt_heap_release, tab);
1853
/* The length of the foreign key definition: */
1854
if (dic->dic_table) {
1855
dic->dic_table->loadString(self, &tab_def);
1856
def_len = tab_def.sb_len + 1;
1859
tab->tab_head_op_seq = 0;
1860
tab->tab_wr_op_seq = 0;
1862
/* This tests operation number overflow. */
1863
//tab->tab_head_op_seq = 0xFFFFFFFF - 12;
1864
//tab->tab_wr_op_seq = 0xFFFFFFFF - 12;
1867
/* ------- ROW FILE: */
1868
xt_strcpy(PATH_MAX, path, name->ps_path);
1869
xt_remove_last_name_of_path(path);
1870
tab_get_row_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1871
xt_strcat(PATH_MAX, path, table_name);
1872
of_row = xt_open_file(self, path, xt_row_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_row_file_grow_size);
1873
pushr_(xt_close_file, of_row);
1874
tab_init_row_file(self, of_row, tab, dic);
1875
freer_(); // xt_close_file(of_row)
1877
(void) ASSERT(sizeof(XTTabRowHeadDRec) == sizeof(XTTabRowRefDRec));
1878
(void) ASSERT(sizeof(XTTabRowRefDRec) == 1 << XT_TAB_ROW_SHIFTS);
1880
/* ------------ DATA FILE: */
1881
xt_remove_last_name_of_path(path);
1882
tab_get_data_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1883
xt_strcat(PATH_MAX, path, table_name);
1884
of_rec = xt_open_file(self, path, xt_rec_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_data_file_grow_size);
1885
pushr_(xt_close_file, of_rec);
1886
tab_init_data_file(self, of_rec, tab, dic, def_len, &tab_def);
1887
freer_(); // xt_close_file(of_rec)
1889
/* ----------- INDEX FILE: */
1890
xt_remove_last_name_of_path(path);
1891
tab_get_index_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1892
xt_strcat(PATH_MAX, path, table_name);
1893
of_ind = xt_open_file(self, path, xt_ind_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, XT_INDEX_PAGE_SIZE*256);
1894
pushr_(xt_close_file, of_ind);
1895
tab_init_ind_file(self, of_ind, tab, dic);
1896
freer_(); // xt_close_file(of_ind)
1899
/* Log the new table ID! */
1900
db->db_curr_tab_id = tab_id;
1901
if (!xt_xn_log_tab_id(self, tab_id)) {
1902
db->db_curr_tab_id = tab_id - 1;
1906
freer_(); // xt_heap_release(tab)
1909
* 2008-12-10: Note, there is another problem, example:
1910
* set storage_engine = pbxt;
1912
* CREATE TABLE t1 (s1 INT PRIMARY KEY, s2 INT);
1913
* CREATE TABLE t2 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t1 (s1) ON UPDATE CASCADE);
1914
* CREATE TABLE t3 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t2 (s1) ON UPDATE CASCADE);
1916
* DROP TABLE IF EXISTS t2,t1;
1917
* CREATE TABLE t1 (s1 ENUM('a','b') PRIMARY KEY);
1918
* CREATE TABLE t2 (s1 ENUM('A','B'), FOREIGN KEY (s1) REFERENCES t1 (s1));
1920
* DROP TABLE IF EXISTS t2,t1;
1922
* In the example above. The second create t2 does not fail, although t3 references it,
1923
* and the data types do not match.
1925
* The main problem is that this error comes on DROP TABLE IF EXISTS t2! Which prevents
1926
* the table from being dropped - not good.
1928
* So my idea here is to open the table, and if it fails, then the create table fails
1933
* We pass table type separately and provide NULL for the dic parameter, this is because
1934
* we want to force loading table (which is triggered by dic == NULL) but we still need table type
1938
tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1939
xt_heap_release(self, tab);
1944
/* Creation failed, delete the table files: */
1947
xt_enter_exception_handler(self, &e);
1949
tab_delete_table_files(self, name, tab_id);
1950
tab_remove_table_path(self, db, te_tab.te_tab_path);
1951
xt_sl_delete(NULL, db->db_table_by_id, &tab_id);
1952
tab_save_tables(self, db);
1953
xt_sb_set_size(self, &tab_def, 0);
1954
xt_exit_exception_handler(self, &e);
1959
xt_sb_set_size(self, &tab_def, 0);
1963
XTTableEntryPtr te_ptr;
1965
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &old_tab_id))) {
1966
tab_remove_table_path(self, db, te_ptr->te_tab_path);
1967
xt_sl_delete(self, db->db_table_by_id, &old_tab_id);
1968
tab_save_tables(self, db);
1971
/* Same purpose as above {LOAD-FOR-FKS} (although this should work,
1972
* beacuse this is a TRUNCATE TABLE.
1974
tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1975
xt_heap_release(self, tab);
1978
/* Log this error, but do not return it, because
1979
* it just involves the cleanup of the old table,
1980
* the new table has been successfully created.
1982
xt_log_and_clear_exception(self);
1987
freer_(); // xt_ht_unlock(db->db_tables)
1988
freer_(); // xt_db_unlock_table_pool(table_pool)
1990
/* I open the table here, because I cannot rely on MySQL to do
1991
* it after a create. This is normally OK, but with foreign keys
1992
* tables can be referenced and then they are not opened
1993
* before use. In this example, the INSERT opens t2, but t1 is
1994
* not opened of the create. As a result the foreign key
1995
* reference is not resolved.
1997
* drop table t1, t2;
2000
* id INT PRIMARY KEY
2006
* CONSTRAINT c1 FOREIGN KEY (v) REFERENCES t1(id)
2010
* INSERT INTO t2 VALUES(2);
2012
/* this code is not needed anymore as we open tables referred by FKs as necessary during checks
2013
xt_ht_lock(self, db->db_tables);
2014
pushr_(xt_ht_unlock, db->db_tables);
2015
tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
2016
freer_(); // xt_ht_unlock(db->db_tables)
2017
xt_heap_release(self, tab);
2018
* CHANGED see {LOAD-FOR-FKS} above.
2024
xtPublic void xt_drop_table(XTThreadPtr self, XTPathStrPtr tab_name, xtBool drop_db)
2026
XTDatabaseHPtr db = self->st_database;
2027
XTOpenTablePoolPtr table_pool;
2028
XTTableHPtr tab = NULL;
2029
xtTableID tab_id = 0;
2030
xtBool can_drop = TRUE;
2034
#ifdef TRACE_CREATE_TABLES
2035
printf("DROP %s\n", tab_name->ps_path);
2038
table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, tab_name, FALSE, FALSE, TRUE, &tab);
2039
pushr_(xt_db_unlock_table_pool, table_pool);
2040
xt_ht_lock(self, db->db_tables);
2041
pushr_(xt_ht_unlock, db->db_tables);
2042
pushr_(xt_heap_release, tab);
2045
tab_id = tab->tab_id; /* tab is not null if returned table_pool is not null */
2046
/* check if other tables refer this */
2047
if (!self->st_ignore_fkeys)
2048
can_drop = tab->tab_dic.dic_table->checkCanDrop(drop_db);
2051
/* See the comment in ha_pbxt::delete_table regarding different implmentation of DROP TABLE
2052
* in MySQL and Drizzle
2055
xt_throw_xterr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND);
2061
XTTableEntryPtr te_ptr;
2063
xt_dl_delete_ext_data(self, tab, FALSE, TRUE);
2064
freer_(); // xt_heap_release(self, tab)
2066
/* For the Windows version this must be done before we
2067
* start to delete the underlying files!
2069
tab_close_files(self, tab);
2071
tab_delete_table_files(self, tab_name, tab_id);
2073
ASSERT(xt_get_self() == self);
2074
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id))) {
2075
tab_remove_table_path(self, db, te_ptr->te_tab_path);
2076
xt_sl_delete(self, db->db_table_by_id, &tab_id);
2077
tab_save_tables(self, db);
2081
freer_(); // xt_heap_release(self, tab)
2084
xt_ht_del(self, db->db_tables, tab_name);
2086
else { /* cannot drop table because of FK dependencies */
2087
xt_throw_xterr(XT_CONTEXT, XT_ERR_ROW_IS_REFERENCED);
2090
freer_(); // xt_ht_unlock(db->db_tables)
2091
freer_(); // xt_db_unlock_table_pool(table_pool)
2095
xtPublic void xt_tab_check_free_lists(XTThreadPtr self, XTOpenTablePtr ot, bool check_recs, bool correct_count)
2097
char table_name[XT_IDENTIFIER_NAME_SIZE*3+3];
2098
register XTTableHPtr tab = ot->ot_table;
2099
xtRowID prev_row_id;
2101
xtRefID next_row_id;
2104
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
2106
xtRecordID prev_rec_id;
2108
XTTabRecExtDRec rec_buf;
2110
xt_lock_mutex_ns(&tab->tab_rec_lock);
2111
/* Checking the free list: */
2114
rec_id = tab->tab_rec_free_id;
2116
if (rec_id >= tab->tab_rec_eof_id) {
2117
xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free list: %llu, ", table_name, (u_llong) rec_id);
2119
xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_rec_id);
2121
xt_logf(XT_NT_ERROR, "reference by list head pointer\n");
2122
xt_tab_set_table_repair_pending(tab);
2125
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) &rec_buf)) {
2129
xt_log_and_clear_warning(ot->ot_thread);
2132
if ((rec_buf.tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED)
2133
xt_logf(XT_NT_INFO, "Table %s: record, %llu, on free list is not free\n", table_name, (u_llong) rec_id);
2135
prev_rec_id = rec_id;
2136
rec_id = XT_GET_DISK_4(rec_buf.tr_prev_rec_id_4);
2138
if (free_count != tab->tab_rec_fnum) {
2139
if (correct_count) {
2140
tab->tab_rec_fnum = free_count;
2141
tab->tab_head_rec_fnum = free_count;
2142
tab->tab_flush_pending = TRUE;
2143
xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) has been set to the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count);
2146
xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) differs from the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count);
2148
xt_unlock_mutex_ns(&tab->tab_rec_lock);
2151
/* Check the row free list: */
2152
xt_lock_mutex_ns(&tab->tab_row_lock);
2156
row_id = tab->tab_row_free_id;
2158
if (row_id >= tab->tab_row_eof_id) {
2159
xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free row: %llu, ", table_name, (u_llong) row_id);
2161
xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_row_id);
2163
xt_logf(XT_NT_ERROR, "reference by list head pointer\n");
2164
xt_tab_set_table_repair_pending(tab);
2167
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) {
2171
xt_log_and_clear_warning(ot->ot_thread);
2175
prev_row_id = row_id;
2176
row_id = next_row_id;
2178
if (free_count != tab->tab_row_fnum) {
2179
if (correct_count) {
2180
/* tab_row_fnum is the current value, and tab_head_row_fnum is the value on
2181
* disk. tab_head_row_fnum is set by the writer as the changes are applied
2184
* This is the value then stored in the header of the file. This value
2185
* is in sync with other changes to the file.
2187
* So the fact that I am setting both value means this will not work at
2188
* runtime, unless all changes have been applied by the writer.
2190
* The correct way to do this at run time would be to add the change to the
2191
* transaction log, so that it is applied by the writer.
2193
tab->tab_row_fnum = free_count;
2194
tab->tab_head_row_fnum = free_count;
2195
tab->tab_flush_pending = TRUE;
2196
xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) has been set to the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count);
2199
xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) differs from the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count);
2202
xt_unlock_mutex_ns(&tab->tab_row_lock);
2206
* Record buffer size:
2207
* -------------------
2208
* The size of the record buffer used to hold the row
2209
* in memory. This buffer size does not include the BLOB data.
2210
* About 8 bytes (a pointer and a size) is reserved for each BLOB
2213
* The buffer size includes a number of "NULL" bytes followed by
2214
* the data area. The NULL bytes contain 1 bit for every column,
2215
* to indicate of the columns is NULL or not.
2217
* The size of the buffer is 4/8-byte aligned, so it may be padded
2220
* Fixed length rec. len.:
2221
* -----------------------
2222
* If the record does not include any BLOBs then this is the size of the
2223
* fixed length record. The size if the data in the data handle record
2224
* need never be bigger then this length, if the record does not
2225
* contain BLOBs. So this should be the maximum size set for
2226
* AVG_ROW_LENGTH in this case.
2228
* Handle data record size:
2229
* ------------------------
2230
* This is the size of the handle data record. It is the data size
2231
* plus the "max header size".
2233
* Min/max header size:
2234
* The min and max header size of the header in the data handle file.
2235
* The larger header is used if a record has an extended data (data log
2238
* Min/avg/max record size:
2239
* ------------------------
2240
* These are variable length records sizes. That is, the size of records
2241
* when stored in the variable length format. Variable length records
2242
* do not have fixed fields sizes, instead the fields are packed one
2243
* after the other, prefixed by a number of size indicator bytes.
2245
* The average is an estimate of the average record size. This estimate
2246
* is used if no AVG_ROW_LENGTH is specifically given.
2248
* If the average estimate is withing 20% of the maximum size of the record,
2249
* then the record will be handled as a fixed length record.
2251
* Avg row len set for tab:
2252
* ------------------------
2253
* This is the value set using AVG_ROW_LENGTH when the table is declared.
2255
* Rows fixed length:
2256
* ------------------
2257
* YES if the records of this table are handled as a fixed length records.
2258
* In this case the table records will never have an extended record
2261
* The size of the data area in the handle data record is set to the
2262
* size of the MySQL data record ("Fixed length rec. len.").
2264
* It also means that the record format used is identical to the MySQL
2267
* If the records are not fixed, then the variable length record format
2268
* is used. Records size are then in the range specified by
2269
* "Min/avg/max record size".
2271
* Maximum fixed size:
2272
* -------------------
2273
* This is the maximum size of a data log record.
2275
* Minimum variable size:
2276
* ------------------------
2277
* Records below this size are handled as a fixed length record size, unless
2278
* the AVG_ROW_LENGTH is specifically set.
2280
xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot)
2282
XTTableHPtr tab = ot->ot_table;
2284
XTTabRecExtDPtr rec_buf = (XTTabRecExtDPtr) ot->ot_row_rbuffer;
2285
#ifdef CHECK_TABLE_READ_DATA_LOG
2286
XTactExtRecEntryDRec ext_rec;
2289
xtLogOffset log_offset;
2292
xtRecordID prev_rec_id;
2295
u_llong free_rec_count = 0, free_count2 = 0;
2296
u_llong delete_rec_count = 0;
2297
u_llong alloc_rec_count = 0;
2298
u_llong alloc_rec_bytes = 0;
2299
u_llong min_comp_rec_len = 0;
2300
u_llong max_comp_rec_len = 0;
2303
u_llong ext_data_len = 0;
2304
u_llong ext_rec_count = 0;
2306
#if defined(DUMP_CHECK_TABLE) || defined(CHECK_TABLE_STATS)
2307
printf("\nCHECK TABLE: %s\n", tab->tab_name->ps_path);
2310
xt_lock_mutex(self, &tab->tab_db->db_co_ext_lock);
2311
pushr_(xt_unlock_mutex, &tab->tab_db->db_co_ext_lock);
2313
xt_lock_mutex(self, &tab->tab_rec_lock);
2314
pushr_(xt_unlock_mutex, &tab->tab_rec_lock);
2316
#ifdef CHECK_TABLE_STATS
2317
printf("Record buffer size = %lu\n", (u_long) tab->tab_dic.dic_mysql_buf_size);
2318
printf("Fixed length rec. len. = %lu\n", (u_long) tab->tab_dic.dic_mysql_rec_size);
2319
printf("Handle data record size = %lu\n", (u_long) tab->tab_dic.dic_rec_size);
2320
printf("Min/max header size = %d/%d\n", (int) offsetof(XTTabRecFix, rf_data), tab->tab_dic.dic_rec_fixed ? (int) offsetof(XTTabRecFix, rf_data) : (int) offsetof(XTTabRecExtDRec, re_data));
2321
printf("Min/avg/max record size = %llu/%llu/%llu\n", (u_llong) tab->tab_dic.dic_min_row_size, (u_llong) tab->tab_dic.dic_ave_row_size, (u_llong) tab->tab_dic.dic_max_row_size);
2322
if (tab->tab_dic.dic_def_ave_row_size)
2323
printf("Avg row len set for tab = %lu\n", (u_long) tab->tab_dic.dic_def_ave_row_size);
2325
printf("Avg row len set for tab = not specified\n");
2326
printf("Rows fixed length = %s\n", tab->tab_dic.dic_rec_fixed ? "YES" : "NO");
2327
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
2328
printf("Table type = MEMORY\n");
2329
else if (tab->tab_dic.dic_tab_flags & XT_TF_REAL_TEMP_TABLE)
2330
printf("Table type = TEMPORARY\n");
2331
else if (tab->tab_dic.dic_tab_flags & XT_TF_DDL_TEMP_TABLE)
2332
printf("Table type = DDL-TEMPORARY\n");
2333
if (tab->tab_dic.dic_def_ave_row_size)
2334
printf("Maximum fixed size = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH_SPEC);
2336
printf("Maximum fixed size = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH);
2337
printf("Minimum variable size = %lu\n", (u_long) XT_TAB_MIN_VAR_REC_LENGTH);
2338
printf("Minimum auto-increment = %llu\n", (u_llong) tab->tab_dic.dic_min_auto_inc);
2339
printf("Number of columns = %lu\n", (u_long) tab->tab_dic.dic_no_of_cols);
2340
printf("Number of fixed columns = %lu\n", (u_long) tab->tab_dic.dic_fix_col_count);
2341
printf("Columns req. for index = %lu\n", (u_long) tab->tab_dic.dic_ind_cols_req);
2342
if (tab->tab_dic.dic_ind_rec_len)
2343
printf("Rec len req. for index = %llu\n", (u_llong) tab->tab_dic.dic_ind_rec_len);
2344
printf("Columns req. for blobs = %lu\n", (u_long) tab->tab_dic.dic_blob_cols_req);
2345
printf("Number of blob columns = %lu\n", (u_long) tab->tab_dic.dic_blob_count);
2346
printf("Number of indices = %lu\n", (u_long) tab->tab_dic.dic_key_count);
2349
#ifdef DUMP_CHECK_TABLE
2350
printf("Records:-\n");
2351
printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_rec_free_id, (u_llong) tab->tab_rec_fnum);
2352
printf("EOF: %llu\n", (u_llong) tab->tab_rec_eof_id);
2355
rec_size = XT_REC_EXT_HEADER_SIZE;
2356
if (rec_size > tab->tab_recs.tci_rec_size)
2357
rec_size = tab->tab_recs.tci_rec_size;
2359
while (rec_id < tab->tab_rec_eof_id) {
2360
if (!xt_tab_get_rec_data(ot, rec_id, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer))
2363
#ifdef DUMP_CHECK_TABLE
2364
printf("%-4llu ", (u_llong) rec_id);
2366
switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
2367
case XT_TAB_STATUS_FREED:
2368
#ifdef DUMP_CHECK_TABLE
2369
printf("======== ");
2373
case XT_TAB_STATUS_DELETE:
2374
#ifdef DUMP_CHECK_TABLE
2379
case XT_TAB_STATUS_FIXED:
2380
#ifdef DUMP_CHECK_TABLE
2381
printf("record-F ");
2384
row_size = myxt_store_row_length(ot, (char *) ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE);
2385
alloc_rec_bytes += row_size;
2386
if (!min_comp_rec_len || row_size < min_comp_rec_len)
2387
min_comp_rec_len = row_size;
2388
if (row_size > max_comp_rec_len)
2389
max_comp_rec_len = row_size;
2391
case XT_TAB_STATUS_VARIABLE:
2392
#ifdef DUMP_CHECK_TABLE
2393
printf("record-V ");
2396
row_size = myxt_load_row_length(ot, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, NULL);
2397
alloc_rec_bytes += row_size;
2398
if (!min_comp_rec_len || row_size < min_comp_rec_len)
2399
min_comp_rec_len = row_size;
2400
if (row_size > max_comp_rec_len)
2401
max_comp_rec_len = row_size;
2403
case XT_TAB_STATUS_EXT_DLOG:
2404
#ifdef DUMP_CHECK_TABLE
2405
printf("record-X ");
2409
ext_data_len += XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
2410
row_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4) + ot->ot_rec_size - XT_REC_EXT_HEADER_SIZE;
2411
alloc_rec_bytes += row_size;
2412
if (!min_comp_rec_len || row_size < min_comp_rec_len)
2413
min_comp_rec_len = row_size;
2414
if (row_size > max_comp_rec_len)
2415
max_comp_rec_len = row_size;
2418
#ifdef DUMP_CHECK_TABLE
2419
if (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_CLEANED_BIT)
2424
prev_rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
2425
xn_id = XT_GET_DISK_4(rec_buf->tr_xact_id_4);
2426
row_id = XT_GET_DISK_4(rec_buf->tr_row_id_4);
2427
switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
2428
case XT_TAB_STATUS_FREED:
2429
#ifdef DUMP_CHECK_TABLE
2430
printf(" prev=%-3llu (xact=%-3llu row=%lu)\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
2433
case XT_TAB_STATUS_EXT_DLOG:
2434
#ifdef DUMP_CHECK_TABLE
2435
printf(" prev=%-3llu xact=%-3llu row=%lu Xlog=%lu Xoff=%llu Xsiz=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id, (u_long) XT_GET_DISK_2(rec_buf->re_log_id_2), (u_llong) XT_GET_DISK_6(rec_buf->re_log_offs_6), (u_long) XT_GET_DISK_4(rec_buf->re_log_dat_siz_4));
2438
#ifdef CHECK_TABLE_READ_DATA_LOG
2441
log_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
2442
XT_GET_LOG_REF(log_id, log_offset, rec_buf);
2443
if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
2444
xt_tab_read_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec);
2448
if (!(ok = self->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec, self)))
2449
xt_log_and_clear_exception(self);
2453
xtTableID curr_tab_id;
2454
xtRecordID curr_rec_id;
2456
log_size2 = XT_GET_DISK_4(ext_rec.er_data_size_4);
2457
curr_tab_id = XT_GET_DISK_4(ext_rec.er_tab_id_4);
2458
curr_rec_id = XT_GET_DISK_4(ext_rec.er_rec_id_4);
2459
if (log_size2 != log_size || curr_tab_id != tab->tab_id || curr_rec_id != rec_id) {
2460
xt_logf(XT_INFO, "Table %s: record %llu, extended record %lu:%llu not valid\n", tab->tab_name, (u_llong) rec_id, (u_long) log_id, (u_llong) log_offset);
2466
#ifdef DUMP_CHECK_TABLE
2467
printf(" prev=%-3llu xact=%-3llu row=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
2474
#ifdef CHECK_TABLE_STATS
2475
u_long rec, row, ind;
2478
rec = xt_seek_eof_file(self, ot->ot_rec_file);
2479
row = xt_seek_eof_file(self, ot->ot_row_file);
2480
ind = xt_seek_eof_file(self, ot->ot_ind_file);
2481
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
2482
if (!tab->tab_dic.dic_rec_fixed) {
2483
xt_int8_to_byte_size((xtInt8) tab->tab_mem_total, value);
2484
printf("Ext. record memory used = %s\n", value);
2485
printf("Extended record count = %llu\n", ext_rec_count);
2487
xt_int8_to_byte_size((xtInt8) ind, value);
2488
printf("Index data memory used = %s\n", value);
2489
xt_int8_to_byte_size((xtInt8) rec + row, value);
2490
printf("Table data memory used = %s\n", value);
2491
xt_int8_to_byte_size((xtInt8) tab->tab_mem_total + rec + row + ind, value);
2492
printf("Total memory used = %s\n", value);
2495
if (!tab->tab_dic.dic_rec_fixed) {
2496
xt_int8_to_byte_size((xtInt8) ext_data_len, value);
2497
printf("Ext. record disk used = %s\n", value);
2498
printf("Extended record count = %llu\n", ext_rec_count);
2500
xt_int8_to_byte_size((xtInt8) ind, value);
2501
printf("Index disk space used = %s\n", value);
2502
xt_int8_to_byte_size((xtInt8) rec + row, value);
2503
printf("Table disk space used = %s\n", value);
2504
xt_int8_to_byte_size((xtInt8) ext_data_len + rec + row + ind, value);
2505
printf("Total disk space used = %s\n", value);
2508
if (alloc_rec_count) {
2509
printf("Minumum comp. rec. len. = %llu\n", (u_llong) min_comp_rec_len);
2510
printf("Average comp. rec. len. = %llu\n", (u_llong) ((double) alloc_rec_bytes / (double) alloc_rec_count + (double) 0.5));
2511
printf("Maximum comp. rec. len. = %llu\n", (u_llong) max_comp_rec_len);
2513
printf("Free record count = %llu\n", (u_llong) free_rec_count);
2514
printf("Deleted record count = %llu\n", (u_llong) delete_rec_count);
2515
printf("Allocated record count = %llu\n", (u_llong) alloc_rec_count);
2518
if (tab->tab_rec_fnum != free_rec_count)
2519
xt_logf(XT_INFO, "Table %s: incorrect number of free blocks, %llu, should be: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) tab->tab_rec_fnum);
2521
/* Checking the free list: */
2523
rec_id = tab->tab_rec_free_id;
2525
if (rec_id >= tab->tab_rec_eof_id) {
2526
xt_logf(XT_INFO, "Table %s: invalid reference on free list: %llu, ", tab->tab_name, (u_llong) rec_id);
2528
xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id);
2530
xt_logf(XT_INFO, "reference by list head pointer\n");
2533
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) rec_buf)) {
2534
xt_log_and_clear_exception(self);
2537
if ((rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED)
2538
xt_logf(XT_INFO, "Table %s: record, %llu, on free list is not free\n", tab->tab_name, (u_llong) rec_id);
2541
rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
2543
if (free_count2 != free_rec_count)
2544
xt_logf(XT_INFO, "Table %s: not all free blocks (%llu) on free list: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) free_count2);
2546
freer_(); // xt_unlock_mutex_ns(&tab->tab_rec_lock);
2550
xt_lock_mutex(self, &tab->tab_row_lock);
2551
pushr_(xt_unlock_mutex, &tab->tab_row_lock);
2553
#ifdef DUMP_CHECK_TABLE
2555
printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_row_free_id, (u_llong) tab->tab_row_fnum);
2556
printf("EOF: %llu\n", (u_llong) tab->tab_row_eof_id);
2560
while (rec_id < tab->tab_row_eof_id) {
2561
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, rec_id, &ref_id, self))
2563
#ifdef DUMP_CHECK_TABLE
2564
printf("%-3llu ", (u_llong) rec_id);
2566
#ifdef DUMP_CHECK_TABLE
2568
printf("====== 0\n");
2570
printf("in use %llu\n", (u_llong) ref_id);
2577
row_id = tab->tab_row_free_id;
2579
if (row_id >= tab->tab_row_eof_id) {
2580
xt_logf(XT_INFO, "Table %s: invalid reference on free row: %llu, ", tab->tab_name, (u_llong) row_id);
2582
xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id);
2584
xt_logf(XT_INFO, "reference by list head pointer\n");
2587
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &ref_id, self)) {
2588
xt_log_and_clear_exception(self);
2595
if (free_count2 != tab->tab_row_fnum)
2596
xt_logf(XT_INFO, "Table %s: free row count (%llu) differs from the number of row on the list: %llu\n", tab->tab_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count2);
2598
freer_(); // xt_unlock_mutex(&tab->tab_row_lock);
2600
#ifdef CHECK_INDEX_ON_CHECK_TABLE
2601
xt_check_indices(ot);
2603
freer_(); // xt_unlock_mutex(&tab->tab_db->db_co_ext_lock);
2606
xtPublic void xt_rename_table(XTThreadPtr self, XTPathStrPtr old_name, XTPathStrPtr new_name)
2608
XTDatabaseHPtr db = self->st_database;
2609
XTOpenTablePoolPtr table_pool;
2610
XTTableHPtr tab = NULL;
2611
char table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
2613
XTFilesOfTableRec ft;
2614
XTDictionaryRec dic;
2616
XTTableEntryPtr te_ptr;
2618
XTTablePathPtr te_new_path;
2619
XTTablePathPtr te_old_path;
2620
char to_path[PATH_MAX];
2622
memset(&dic, 0, sizeof(dic));
2624
#ifdef TRACE_CREATE_TABLES
2625
printf("RENAME %s --> %s\n", old_name->ps_path, new_name->ps_path);
2627
if (strlen(xt_last_name_of_path(new_name->ps_path)) > XT_TABLE_NAME_SIZE-1)
2628
xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, new_name);
2630
/* MySQL renames the table while it is in use. Here is
2636
* COPY tab1 -> tmp_tab
2638
* RENAME tab1 -> tmp2_tab
2639
* RENAME tmp_tab -> tab1
2640
* CLOSE tab1 (tmp2_tab)
2644
* Since the table is open when it is renamed, I cannot
2645
* get exclusive use of the table for this operation.
2647
* So instead we just make sure that the sweeper is not
2650
table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, old_name, FALSE, TRUE, FALSE, &tab);
2651
pushr_(xt_db_unlock_table_pool, table_pool);
2652
xt_ht_lock(self, db->db_tables);
2653
pushr_(xt_ht_unlock, db->db_tables);
2654
tab_id = tab->tab_id;
2655
myxt_move_dictionary(&dic, &tab->tab_dic);
2656
pushr_(myxt_free_dictionary, &dic);
2657
pushr_(xt_heap_release, tab);
2659
/* Unmap the memory mapped table files:
2660
* For windows this must be done before we
2661
* can rename the files.
2663
tab_close_files(self, tab);
2665
freer_(); // xt_heap_release(self, old_tab)
2667
/* Create the new name and path: */
2668
te_new_name = xt_dup_string(self, xt_last_name_of_path(new_name->ps_path));
2669
pushr_(xt_free, te_new_name);
2670
te_new_path = tab_get_table_path(self, db, new_name, FALSE);
2671
pushr_(tab_free_table_path, te_new_path);
2673
te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
2675
/* Remove the table from the Database directory: */
2676
xt_ht_del(self, db->db_tables, old_name);
2678
xt_enum_files_of_tables_init(old_name, tab_id, &ft);
2679
while (xt_enum_files_of_tables_next(&ft)) {
2680
postfix = xt_tab_file_to_name(XT_MAX_TABLE_FILE_NAME_SIZE, table_name, ft.ft_file_path);
2682
xt_strcpy(PATH_MAX, to_path, new_name->ps_path);
2683
xt_strcat(PATH_MAX, to_path, postfix);
2685
if (!xt_fs_rename(NULL, ft.ft_file_path, to_path))
2686
xt_log_and_clear_exception(self);
2689
/* Switch the table name and path: */
2690
xt_free(self, te_ptr->te_tab_name);
2691
te_ptr->te_tab_name = te_new_name;
2692
te_old_path = te_ptr->te_tab_path;
2693
te_ptr->te_tab_path = te_new_path;
2694
tab_remove_table_path(self, db, te_old_path);
2695
tab_save_tables(self, db);
2697
popr_(); // Discard tab_free_table_path(te_new_path);
2698
popr_(); // Discard xt_free(te_new_name);
2700
tab = xt_use_table_no_lock(self, db, new_name, FALSE, FALSE, &dic);
2701
/* All renamed tables are considered repaired! */
2702
xt_tab_table_repaired(tab);
2703
xt_heap_release(self, tab);
2705
freer_(); // myxt_free_dictionary(&dic)
2706
freer_(); // xt_ht_unlock(db->db_tables)
2707
freer_(); // xt_db_unlock_table_pool(table_pool)
2710
xtPublic XTTableHPtr xt_use_table(XTThreadPtr self, XTPathStrPtr name, xtBool no_load, xtBool missing_ok)
2713
XTDatabaseHPtr db = self->st_database;
2715
xt_ht_lock(self, db->db_tables);
2716
pushr_(xt_ht_unlock, db->db_tables);
2717
tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, NULL);
2722
xtPublic void xt_sync_flush_table(XTThreadPtr self, XTOpenTablePtr ot, int timeout)
2724
XTTableHPtr tab = ot->ot_table;
2725
XTDatabaseHPtr db = tab->tab_db;
2727
/* Wakeup the sweeper:
2728
* We want the sweeper to check if there is anything to do,
2729
* so we must wake it up.
2730
* Once it has done all it can, it will go back to sleep.
2731
* This should be good enough.
2733
* NOTE: I all cases, we do not wait if the sweeper is in
2736
if (db->db_sw_idle) {
2737
u_int check_count = db->db_sw_check_count;
2740
xt_wakeup_sweeper(db);
2741
if (!db->db_sw_thread || db->db_sw_idle != XT_THREAD_IDLE || check_count != db->db_sw_check_count)
2743
xt_sleep_milli_second(10);
2747
/* Wait for the sweeper to become idle: */
2748
xt_lock_mutex(self, &db->db_sw_lock);
2749
pushr_(xt_unlock_mutex, &db->db_sw_lock);
2750
while (db->db_sw_thread && !db->db_sw_idle) {
2751
xt_timed_wait_cond(self, &db->db_sw_cond, &db->db_sw_lock, 10);
2753
freer_(); // xt_unlock_mutex(&db->db_sw_lock)
2755
/* Wait for the writer to write out all operations on the table:
2756
* We also do not wait for the writer if it is in
2759
time_t start_time = time(NULL);
2760
while (db->db_wr_thread &&
2761
db->db_wr_idle != XT_THREAD_INERR &&
2762
XTTableSeq::xt_op_is_before(tab->tab_head_op_seq+1, tab->tab_seq.ts_next_seq)) {
2763
if (timeout && time(NULL) > start_time + timeout) {
2764
char name_buf[XT_TABLE_NAME_BUF_SIZE];
2766
xt_tab_make_table_name(tab->tab_name, name_buf, XT_TABLE_NAME_BUF_SIZE);
2767
xt_logf(XT_WARNING, "Timeout waiting for writer while flushing %s\n", name_buf);
2771
/* Flush the log, in case this is holding up the
2774
if (!db->db_xlog.xlog_flush(self))
2777
xt_lock_mutex(self, &db->db_wr_lock);
2778
pushr_(xt_unlock_mutex, &db->db_wr_lock);
2779
db->db_wr_thread_waiting++;
2781
* Wake the writer if it is sleeping. In order to
2782
* flush a table we must wait for the writer to complete
2783
* committing all the changes in the table to the database.
2785
if (db->db_wr_idle) {
2786
if (!xt_broadcast_cond_ns(&db->db_wr_cond))
2787
xt_log_and_clear_exception_ns();
2790
freer_(); // xt_unlock_mutex(&db->db_wr_lock)
2791
xt_sleep_milli_second(10);
2793
xt_lock_mutex(self, &db->db_wr_lock);
2794
pushr_(xt_unlock_mutex, &db->db_wr_lock);
2795
db->db_wr_thread_waiting--;
2796
freer_(); // xt_unlock_mutex(&db->db_wr_lock)
2799
xt_flush_table(self, ot);
2802
xtBool XTFlushRecRowTask::tk_task(XTThreadPtr thread)
2806
/* {TASK-TABLE-GONE}
2807
* If this task was scheduled before the table was deleted
2808
* or renamed, then we may be caught holding an invalid
2809
* table (frt_table) object.
2811
* As a result we just use the ID, to get the open table
2814
* If the tables are not identical, then there is no point
2817
if (!(xt_db_open_pool_table_ns(&ot, frt_table->tab_db, frt_table->tab_id)))
2821
/* Can happen if the table has been dropped: */
2822
if (thread->t_exception.e_xt_err)
2823
xt_log_and_clear_exception(thread);
2824
xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table was not found\n", (u_long) frt_table->tab_id);
2825
xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
2829
if (ot->ot_table != frt_table) {
2830
/* Can happen if the table has been renamed: */
2831
if (thread->t_exception.e_xt_err)
2832
xt_log_and_clear_exception(thread);
2833
xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table has been renamed\n", (u_long) frt_table->tab_id);
2834
xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
2838
if (!xt_flush_record_row(ot, NULL, FALSE)) {
2839
xt_db_return_table_to_pool_ns(ot);
2844
xt_db_return_table_to_pool_ns(ot);
2848
void XTFlushRecRowTask::tk_reference()
2850
xt_heap_reference_ns(frt_table);
2853
void XTFlushRecRowTask::tk_release()
2855
xt_heap_release_ns(frt_table);
2859
* Start a flush of this file in background.
2861
xtPublic xtBool xt_async_flush_record_row(XTTableHPtr tab, xtBool notify_complete, XTThreadPtr thread)
2863
if (tab->tab_rec_flush_task->tk_is_running())
2867
return xt_run_async_task(tab->tab_rec_flush_task, notify_complete, FALSE, thread, tab->tab_db);
2870
xtPublic xtBool xt_flush_record_row(XTOpenTablePtr ot, off_t *bytes_flushed, xtBool have_table_lock)
2872
XTTableHeadDRec rec_head;
2873
XTTableHPtr tab = ot->ot_table;
2875
#ifdef TRACE_FLUSH_TABLE
2879
if (!xt_begin_checkpoint(tab->tab_db, have_table_lock, ot->ot_thread))
2882
xt_lock_mutex_ns(&tab->tab_rec_flush_lock);
2883
#ifdef XT_SORT_REC_WRITES
2884
if (!xt_xres_delay_flush(ot, TRUE))
2887
xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_START_REC_ROW);
2889
ASSERT_NS(ot->ot_thread == xt_get_self());
2890
/* Make sure that the table recovery point, in
2891
* particular the operation ID is recorded
2892
* before all other flush activity!
2894
* This is because only operations after the
2895
* recovery point in the header are applied
2896
* to the table on recovery.
2898
* So the operation ID is recorded before the
2899
* flush activity, and written after all is done.
2901
xt_tab_store_header(ot, &rec_head);
2903
/* Write the table header: */
2904
if (tab->tab_flush_pending) {
2905
tab->tab_flush_pending = FALSE;
2907
#ifdef TRACE_FLUSH_TABLE
2909
printf("FLUSH TABLE bytes=%lu %s\n", (u_long) tab->tab_bytes_to_flush, tab->tab_name->ps_path);
2912
// Want to see how much was to be flushed in the debugger:
2913
to_flush = tab->tab_bytes_to_flush;
2914
tab->tab_bytes_to_flush = 0;
2916
*bytes_flushed += to_flush;
2918
#ifdef XT_REC_FLUSH_THRESHOLD
2921
/* Reset the writer's byte level: */
2922
if ((writer = ot->ot_table->tab_db->db_wr_thread))
2923
tab->tab_rec_wr_last_flush = writer->st_statistics.st_rec.ts_write;
2926
/* Flush the table data: */
2927
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags)) {
2928
if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread) ||
2929
!XT_FLUSH_RR_FILE(ot->ot_row_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread)) {
2930
tab->tab_flush_pending = TRUE;
2935
/* The header includes the operation number which
2936
* must be written AFTER all other data,
2937
* because operations will not be applied again.
2939
if (!tab_write_header(ot, &rec_head)) {
2940
tab->tab_flush_pending = TRUE;
2945
/* Flush the auto-increment: */
2946
if (xt_db_auto_increment_mode == 1) {
2947
if (tab->tab_auto_inc != tab->tab_dic.dic_min_auto_inc) {
2948
tab->tab_dic.dic_min_auto_inc = tab->tab_auto_inc;
2949
if (!xt_tab_write_min_auto_inc(ot))
2954
/* Mark this table as record/row flushed: */
2955
xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_DONE_REC_ROW);
2957
#ifdef TRACE_FLUSH_TABLE
2959
printf("flush table (%d) %s DONE\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
2964
xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
2966
if (!xt_end_checkpoint(tab->tab_db, ot->ot_thread, NULL))
2971
xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_STOP_REC_ROW);
2973
#ifdef TRACE_FLUSH_TABLE
2975
printf("flush table (%d) %s FAILED\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
2980
xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
2984
xtPublic void xt_flush_table(XTThreadPtr self, XTOpenTablePtr ot)
2986
/* GOTCHA {FLUSH-BUG}: This bug was difficult to find.
2987
* It occured on Windows in the multi_update
2990
* What happens is the checkpointer starts to
2991
* flush the table, and gets to the
2992
* XT_FLUSH_RR_FILE part.
2994
* Then a rename occurs, and the user thread
2995
* flushes the table, and goes through and
2996
* writes the table header, with the most
2997
* recent table operation (the last operation
3000
* The checkpointer the completes and
3001
* also writes the header, but with old
3002
* values (as read in xt_tab_store_header()).
3004
* The then user thread continues, and
3005
* reopens the table after rename.
3006
* On reopen, it reads the old value from the header,
3007
* and sets the current operation number.
3009
* Now there is a problem in the table cache,
3010
* because some cache pages have operation numbers
3011
* that are greater than current operation
3014
* This later lead to the free-er hanging while
3015
* it waited for an operation to be
3016
* written to the disk that never would be.
3017
* This is because a page can only be freed when
3018
* the head operation number has passed the
3019
* page operation number.
3021
* Which indicates that the page has been written
3025
* As a result I now use mutex so that only one
3026
* thread can flush at a time.
3029
if (!xt_flush_record_row(ot, NULL, FALSE))
3032
/* This was before the table data flush,
3033
* (after xt_tab_store_header() above,
3034
* but I don't think it makes any difference.
3035
* Because in the checkpointer it was at this
3038
if (!xt_flush_indices(ot, NULL, FALSE, NULL))
3043
static XTOpenTablePtr tab_open_table(XTTableHPtr tab)
3045
volatile XTOpenTablePtr ot;
3048
if (!(ot = (XTOpenTablePtr) xt_malloc_ns(sizeof(XTOpenTableRec))))
3050
memset(ot, 0, offsetof(XTOpenTableRec, ot_ind_wbuf));
3052
ot->ot_seq_page = NULL;
3053
ot->ot_seq_data = NULL;
3055
self = xt_get_self();
3057
xt_heap_reference(self, tab);
3059
ot->ot_row_file = xt_open_file(self, ot->ot_table->tab_row_file->fil_path, xt_row_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_row_file_grow_size);
3060
ot->ot_rec_file = xt_open_file(self, ot->ot_table->tab_rec_file->fil_path, xt_rec_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_data_file_grow_size);
3061
#ifdef XT_USE_DIRECT_IO_ON_INDEX
3062
ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK | XT_FS_DIRECT_IO, XT_INDEX_PAGE_SIZE*256);
3064
ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
3072
if (!ot->ot_table || !ot->ot_row_file || !ot->ot_rec_file)
3075
if (!(ot->ot_row_rbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
3077
ot->ot_row_rbuf_size = ot->ot_table->tab_dic.dic_rec_size;
3078
if (!(ot->ot_row_wbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
3080
ot->ot_row_wbuf_size = ot->ot_table->tab_dic.dic_rec_size;
3082
/* Cache this stuff to speed access a bit: */
3083
ot->ot_rec_fixed = ot->ot_table->tab_dic.dic_rec_fixed;
3084
ot->ot_rec_size = ot->ot_table->tab_dic.dic_rec_size;
3089
tab_close_table(ot);
3093
xtPublic XTOpenTablePtr xt_open_table(XTTableHPtr tab)
3095
return tab_open_table(tab);
3098
xtPublic void xt_close_table(XTOpenTablePtr ot, xtBool flush, xtBool have_table_lock)
3101
if (!xt_flush_record_row(ot, NULL, have_table_lock))
3102
xt_log_and_clear_exception_ns();
3104
if (!xt_flush_indices(ot, NULL, have_table_lock, NULL))
3105
xt_log_and_clear_exception_ns();
3107
tab_close_table(ot);
3110
static int tab_use_table_by_id(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id)
3112
XTTableEntryPtr te_ptr;
3113
XTTableHPtr tab = NULL;
3115
char path[PATH_MAX];
3118
xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
3119
xt_ht_lock(self, db->db_tables);
3120
pushr_(xt_ht_unlock, db->db_tables);
3122
te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
3124
if (!(tab = te_ptr->te_table)) {
3125
/* Open the table: */
3126
xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
3127
xt_add_dir_char(PATH_MAX, path);
3128
xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
3129
r = tab_new_handle(self, &tab, db, tab_id, (XTPathStrPtr) path, TRUE, NULL);
3133
r = XT_TAB_NOT_FOUND;
3136
xt_heap_reference(self, tab);
3139
freer_(); // xt_ht_unlock(db->db_tables)
3143
xtPublic XTTableHPtr xt_use_table_by_id(XTThreadPtr self, XTDatabaseHPtr db, xtTableID tab_id, int *result)
3148
r = tab_use_table_by_id(self, &tab, db, tab_id);
3150
if (r != XT_TAB_OK) {
3157
case XT_TAB_NOT_FOUND:
3159
case XT_TAB_NO_DICTIONARY:
3160
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, (u_long) tab_id);
3161
case XT_TAB_POOL_CLOSED:
3162
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
3171
xtPublic XTTableHPtr xt_use_table_by_id_ns(XTDatabaseHPtr db, xtTableID tab_id)
3174
XTThreadPtr self = xt_get_self();
3177
tab = xt_use_table_by_id(self, db, tab_id, NULL);
3186
/* The fixed part of the record is already in the row buffer.
3187
* This function loads the extended part, expanding the row
3188
* buffer if necessary.
3190
xtPublic xtBool xt_tab_load_ext_data(XTOpenTablePtr ot, xtRecordID load_rec_id, xtWord1 *buffer, u_int cols_req)
3194
xtLogOffset log_offset;
3195
xtWord1 save_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
3196
xtBool retried = FALSE;
3197
XTactExtRecEntryDPtr ext_data_ptr;
3199
xtTableID curr_tab_id;
3200
xtRecordID curr_rec_id;
3202
log_size = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->re_log_dat_siz_4);
3203
XT_GET_LOG_REF(log_id, log_offset, (XTTabRecExtDPtr) ot->ot_row_rbuffer);
3205
if (ot->ot_rec_size + log_size > ot->ot_row_rbuf_size) {
3206
if (!xt_realloc_ns((void **) &ot->ot_row_rbuffer, ot->ot_rec_size + log_size))
3208
ot->ot_row_rbuf_size = ot->ot_rec_size + log_size;
3211
/* Read the extended part first: */
3212
ext_data_ptr = (XTactExtRecEntryDPtr) (ot->ot_row_rbuffer + ot->ot_rec_size - offsetof(XTactExtRecEntryDRec, er_data));
3214
/* Save the data which the header will overwrite: */
3215
memcpy(save_buffer, ext_data_ptr, offsetof(XTactExtRecEntryDRec, er_data));
3218
if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
3219
xt_tab_read_ext_record(ot->ot_table, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr);
3221
if (!ot->ot_thread->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr, ot->ot_thread))
3225
log_size2 = XT_GET_DISK_4(ext_data_ptr->er_data_size_4);
3226
curr_tab_id = XT_GET_DISK_4(ext_data_ptr->er_tab_id_4);
3227
curr_rec_id = XT_GET_DISK_4(ext_data_ptr->er_rec_id_4);
3229
if (log_size2 != log_size || curr_tab_id != ot->ot_table->tab_id || curr_rec_id != load_rec_id) {
3230
/* [(3)] This can happen in the following circumstances:
3231
* - A new record is created, but the data log is not
3233
* - The server quits.
3234
* - On restart the transaction is rolled back, but the data record
3235
* was not written, so later a new record could be written at this
3237
* - Later the sweeper tries to cleanup this record, and finds
3238
* that a different record has been written at this position.
3240
* NOTE: Index entries can only be written to disk for records
3241
* that have been committed to the disk, because uncommitted
3242
* records may not exist in order to remove the index entry
3245
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_EXT_RECORD);
3249
/* Restore the saved area: */
3250
memcpy(ext_data_ptr, save_buffer, offsetof(XTactExtRecEntryDRec, er_data));
3253
xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3254
return myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req);
3258
/* (1) It may be that reading the log fails because the garbage collector
3259
* has moved the record since we determined the location.
3260
* We handle this here, by re-reading the data the garbage collector
3261
* would have updated.
3263
* (2) It may also happen that a new record is just being updated or
3264
* inserted. It is possible that the handle part of the record
3265
* has been written, but not yet the overflow.
3266
* This means that repeating the read attempt could work.
3268
* (3) The extended data has been written by another handler and not yet
3269
* flushed. This should not happen because on committed extended
3270
* records are read, and all data should be flushed before
3273
* NOTE: (2) above is not a problem when versioning is working
3274
* correctly. In this case, we should never try to read the extended
3275
* part of an uncommitted record (belonging to some other thread/
3278
XTTabRecExtDRec rec_buf;
3280
xt_lock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3283
if (!xt_tab_get_rec_data(ot, load_rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &rec_buf))
3286
XT_GET_LOG_REF(log_id, log_offset, &rec_buf);
3292
xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3296
xtPublic xtBool xt_tab_put_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
3298
register XTTableHPtr tab = ot->ot_table;
3302
return tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread);
3305
xtPublic xtBool xt_tab_put_log_op_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer)
3307
register XTTableHPtr tab = ot->ot_table;
3312
if (status == XT_LOG_ENT_REC_MOVED) {
3313
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, &op_seq, TRUE, ot->ot_thread))
3317
else if (status == XT_LOG_ENT_REC_CLEANED_1) {
3318
ASSERT_NS(0); // shouldn't be used anymore
3322
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, &op_seq, TRUE, ot->ot_thread))
3326
return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
3329
xtPublic xtBool xt_tab_put_log_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
3331
register XTTableHPtr tab = ot->ot_table;
3335
if (status == XT_LOG_ENT_REC_MOVED) {
3336
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, op_seq, TRUE, ot->ot_thread))
3340
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread))
3344
return xt_xlog_modify_table(tab->tab_id, status, *op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
3347
xtPublic xtBool xt_tab_get_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer)
3349
register XTTableHPtr tab = ot->ot_table;
3353
return tab->tab_recs.xt_tc_read(ot->ot_rec_file, rec_id, (size_t) size, buffer, ot->ot_thread);
3357
* Note: this function grants locks even to transactions that
3358
* are not specifically waiting for this transaction.
3359
* This is required, because all threads waiting for
3360
* a lock should be considered "equal". In other words,
3361
* they should not have to wait for the "right" transaction
3362
* before they get the lock, or it will turn into a
3363
* race to wait for the correct transaction.
3365
* A transaction T1 can end up waiting for the wrong transaction
3366
* T2, because T2 has released the lock, and given it to T3.
3367
* Of course, T1 will wake up soon and realize this, but
3368
* it is a matter of timing.
3370
* The main point is that T2 has release the lock because
3371
* it has ended (see {RELEASING-LOCKS} for more details)
3372
* and therefore, there is no danger of it claiming the
3373
* lock again, which can lead to a deadlock if T1 is
3374
* given the lock instead of T3 in the example above.
3375
* Then, if T2 tries to regain the lock before T1
3376
* realizes that it has the lock.
3378
//static xtBool tab_get_lock_after_wait(XTThreadPtr thread, XTLockWaitPtr lw)
3380
// register XTTableHPtr tab = lw->lw_ot->ot_table;
3383
* I don't believe this lock is required. If it is, please explain why!!
3384
* XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[gl->lw_row_id % XT_ROW_RWLOCKS], thread);
3386
* With the old row lock implementation a XT_TAB_ROW_WRITE_LOCK was required because
3387
* the row locking did not have its own locks.
3388
* The new list locking has its own locks. I was using XT_TAB_ROW_READ_LOCK,
3389
* but i don't think this is required.
3391
// return tab->tab_locks.xt_set_temp_lock(lw->lw_ot, lw, &lw->lw_thread->st_lock_list);
3395
* NOTE: Previously this function did not gain the row lock.
3396
* If this change is a problem, please document why!
3397
* The previously implementation did wait until no lock was on the
3400
* I am thinking that it is simply a good idea to grab the lock,
3401
* instead of waiting for no lock, before the retry. But it could
3402
* result in locking more than required!
3404
static xtBool tab_wait_for_update(register XTOpenTablePtr ot, xtRowID row_id, xtXactID xn_id, XTThreadPtr thread)
3410
xw.xw_xn_id = xn_id;
3412
lw.lw_thread = thread;
3414
lw.lw_row_id = row_id;
3415
lw.lw_row_updated = FALSE;
3417
/* First try to get the lock: */
3418
if (!ot->ot_table->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list))
3420
if (lw.lw_curr_lock != XT_NO_LOCK)
3421
/* Wait for the lock, then the transaction: */
3422
ok = xt_xn_wait_for_xact(thread, &xw, &lw);
3424
/* Just wait for the transaction: */
3425
ok = xt_xn_wait_for_xact(thread, &xw, NULL);
3427
#ifdef DEBUG_LOCK_QUEUE
3428
ot->ot_table->tab_locks.rl_check(&lw);
3434
* XT_OLD - The record is old. No longer visible because there is
3435
* newer committed record before it in the record list.
3436
* This is a special case of FALSE (the record is not visible).
3437
* (see {WAIT-FOR} for details).
3438
* It is significant because if we find too many of these when
3439
* searching for records, then we have reason to believe the
3440
* sweeper is far behind. This can happen in a test like this:
3441
* runTest(INCREMENT_TEST, 2, INCREMENT_TEST_UPDATE_COUNT);
3442
* What happens is T1 detects an updated row by T2,
3443
* but T2 has not committed yet.
3444
* It waits for T2. T2 commits and updates again before T1
3447
* Of course if we got a lock on the row when T2 quits, then
3448
* this would not happen!
3452
* Is a record visible?
3453
* Returns TRUE, FALSE, XT_ERR.
3455
* TRUE - The record is visible.
3456
* FALSE - The record is not visible.
3457
* XT_ERR - An exception (error) occurred.
3458
* XT_NEW - The most recent variation of this row has been returned
3459
* and is to be used instead of the input!
3460
* XT_REREAD - Re-read the record, and try again.
3462
* Basically, a record is visible if it was committed on or before
3463
* the transactions "visible time" (st_visible_time), and there
3464
* are no other visible records before this record in the
3465
* variation chain for the record.
3467
* This holds in general, but you don't always get to see the
3468
* visible record (as defined in this sence).
3470
* On any kind of update (SELECT FOR UPDATE, UPDATE or DELETE), you
3471
* get to see the most recent variation of the row!
3473
* So on update, this function will wait if necessary for a recent
3474
* update to be committed.
3476
* So an update is a kind of "committed read" with a wait for
3477
* uncommitted records.
3480
* - INSERTS may not seen by the update read, depending on when
3482
* - Records may be returned in non-index order.
3483
* - New records returned must be checked again by an index scan
3484
* to make sure they conform to the condition!
3486
* CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20),
3487
* index(Value, Name)) ENGINE=pbxt;
3488
* INSERT test_tab values(4, 2, 'D');
3489
* INSERT test_tab values(5, 2, 'E');
3490
* INSERT test_tab values(6, 2, 'F');
3491
* INSERT test_tab values(7, 2, 'G');
3495
* select * from test_tab where id = 6 for update;
3498
* select * from test_tab where value = 2 order by value, name for update;
3500
* update test_tab set Name = 'A' where id = 7;
3503
* Result order D, E, F, A.
3505
* But Jim does it like this, so it should be OK.
3507
static int tab_visible(register XTOpenTablePtr ot, XTTabRecHeadDPtr rec_head, xtRecordID *new_rec_id)
3509
XTThreadPtr thread = ot->ot_thread;
3511
XTTabRecHeadDRec var_head;
3513
xtRecordID var_rec_id;
3514
register XTTableHPtr tab;
3515
xtBool wait = FALSE;
3516
xtXactID wait_xn_id = 0;
3517
#ifdef TRACE_VARIATIONS
3523
xtRecordID invalid_rec;
3526
/* It can be that between the time that I read the index,
3527
* and the time that I try to access the
3528
* record, that the record is removed by
3531
if (XT_REC_NOT_VALID(rec_head->tr_rec_type_1))
3534
row_id = XT_GET_DISK_4(rec_head->tr_row_id_4);
3536
/* This can happen if the row has been removed, and
3539
if (ot->ot_curr_row_id && row_id != ot->ot_curr_row_id)
3542
#ifdef TRACE_VARIATIONS
3543
len = sprintf(t_buf, "row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
3545
if (!(rec_clean = XT_REC_IS_CLEAN(rec_head->tr_rec_type_1))) {
3546
/* The record is not clean, which means it has not been swept.
3547
* So we have to check if it is visible.
3549
xn_id = XT_GET_DISK_4(rec_head->tr_xact_id_4);
3550
switch (xt_xn_status(ot, xn_id, ot->ot_curr_rec_id)) {
3553
case XT_XN_NOT_VISIBLE:
3554
if (ot->ot_for_update) {
3555
/* It is visible, only if it is an insert,
3556
* which means if has no previous variation.
3557
* Note, if an insert is updated, the record
3558
* should be overwritten (TODO - check this).
3560
var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
3563
#ifdef TRACE_VARIATIONS
3565
len += sprintf(t_buf+len, "OTHER COMMIT (OVERWRITTEN) T%d\n", (int) xn_id);
3566
xt_ttracef(thread, "%s", t_buf);
3569
#ifdef TRACE_VARIATIONS
3572
len += sprintf(t_buf+len, "OTHER COMMIT T%d\n", (int) xn_id);
3573
xt_ttracef(thread, "%s", t_buf);
3577
* The record is not visible, although it has been committed.
3578
* Clean the transaction ASAP.
3580
ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3584
* Reading an aborted record, this transaction
3585
* must be cleaned up ASAP!
3587
ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3588
#ifdef TRACE_VARIATIONS
3590
len += sprintf(t_buf+len, "ABORTED T%d\n", (int) xn_id);
3591
xt_ttracef(thread, "%s", t_buf);
3594
case XT_XN_MY_UPDATE:
3595
/* This is a record written by this transaction. */
3596
if (thread->st_is_update) {
3597
/* Check that it was not written by the current update statement: */
3598
if (XT_STAT_ID_MASK(ot->ot_update_id) == rec_head->tr_stat_id_1) {
3599
#ifdef TRACE_VARIATIONS
3601
len += sprintf(t_buf+len, "MY UPDATE IN THIS STATEMENT T%d\n", (int) xn_id);
3602
xt_ttracef(thread, "%s", t_buf);
3607
ot->ot_curr_row_id = row_id;
3608
ot->ot_curr_updated = TRUE;
3609
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3611
/* It is visible if it is at the front of the list.
3612
* An update can end up not being at the front of the list
3613
* if it is deleted afterwards!
3615
#ifdef TRACE_VARIATIONS
3617
if (var_rec_id == ot->ot_curr_rec_id)
3618
len += sprintf(t_buf+len, "MY UPDATE T%d\n", (int) xn_id);
3620
len += sprintf(t_buf+len, "MY UPDATE (OVERWRITTEN) T%d\n", (int) xn_id);
3622
xt_ttracef(thread, "%s", t_buf);
3624
return var_rec_id == ot->ot_curr_rec_id;
3625
case XT_XN_OTHER_UPDATE:
3626
if (ot->ot_for_update) {
3627
/* If this is an insert, we are interested!
3628
* Updated values are handled below. This is because
3629
* the changed (new) records returned below are always
3630
* followed (in the version chain) by the record
3631
* we would have returned (if nothing had changed).
3633
* As a result, we only return records here which have
3636
var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
3638
#ifdef TRACE_VARIATIONS
3640
len += sprintf(t_buf+len, "OTHER INSERT (WAIT FOR) T%d\n", (int) xn_id);
3641
xt_ttracef(thread, "%s", t_buf);
3643
if (!tab_wait_for_update(ot, row_id, xn_id, thread))
3645
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
3647
rec_head = &var_head;
3651
#ifdef TRACE_VARIATIONS
3653
len += sprintf(t_buf+len, "OTHER UPDATE T%d\n", (int) xn_id);
3654
xt_ttracef(thread, "%s", t_buf);
3658
#ifdef TRACE_VARIATIONS
3660
len += sprintf(t_buf+len, "REREAD?! T%d\n", (int) xn_id);
3661
xt_ttracef(thread, "%s", t_buf);
3667
/* Follow the variation chain until we come to this record.
3668
* If it is not the first visible variation then
3669
* it is not visible at all. If it in not found on the
3670
* variation chain, it is also not visible.
3676
#ifdef XT_USE_LIST_BASED_ROW_LOCKS
3677
/* The list based row locks used there own locks, so
3678
* it is not necessary to get a write lock here.
3680
XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3682
if (ot->ot_for_update)
3683
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3685
XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3690
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3692
#ifdef TRACE_VARIATIONS
3693
len += sprintf(t_buf+len, "ROW=%d", (int) row_id);
3695
while (var_rec_id != ot->ot_curr_rec_id) {
3697
#ifdef TRACE_VARIATIONS
3698
xt_ttracef(thread, "row=%d rec=%d NOT VISI not found in list\n", (int) row_id, (int) ot->ot_curr_rec_id);
3702
if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
3704
#ifdef TRACE_VARIATIONS
3706
len += sprintf(t_buf+len, " -> %d(%d)", (int) var_rec_id, (int) var_head.tr_rec_type_1);
3708
/* All clean records are visible, by all transactions: */
3709
if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1)) {
3710
#ifdef TRACE_VARIATIONS
3711
xt_ttracef(thread, "row=%d rec=%d NOT VISI clean rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3715
if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) {
3716
#ifdef TRACE_VARIATIONS
3717
xt_ttracef(thread, "row=%d rec=%d NOT VISI free rec found?!\n", (int) row_id, (int) ot->ot_curr_rec_id);
3720
* After an analysis we came to conclusion that this situation is
3721
* possible and valid. It can happen if index scan and row deletion
3724
* Client Thread Sweeper
3725
* ------------- -------
3726
* 1. start index scan, lock the index file.
3727
* 2. start row deletion, wait for index lock
3728
* 3. unlock the index file, start search for
3729
* the valid version of the record
3730
* 4. delete the row, mark record as freed,
3731
* but not yet cleaned by sweeper
3732
* 5. observe the record being freed
3734
* after these steps we can get here, if the record was marked as free after
3735
* the tab_visible was entered by the scanning thread.
3738
if (invalid_rec != var_rec_id) {
3739
/* This was "var_rec_id = invalid_rec", caused an infinite loop (bug #310184!) */
3740
invalid_rec = var_rec_id;
3743
/* Assume end of list. */
3747
/* This can happen if the row has been removed, and
3750
if (row_id != XT_GET_DISK_4(var_head.tr_row_id_4))
3753
xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
3754
/* This variation is visibleif committed before this
3755
* transaction started, or updated by this transaction.
3757
* We now know that this is the valid variation for
3758
* this record (for this table) for this transaction!
3759
* This will not change, unless the transaction
3760
* updates the record (again).
3762
* So we can store this information as a hint, if
3763
* we see other variations belonging to this record,
3764
* then we can ignore them immediately!
3766
switch (xt_xn_status(ot, xn_id, var_rec_id)) {
3769
* We have encountered a record that has been overwritten, if the
3770
* record has not been cleaned, then the sweeper is too far
3774
ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3775
#ifdef TRACE_VARIATIONS
3776
xt_ttracef(thread, "row=%d rec=%d NOT VISI committed rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3779
case XT_XN_NOT_VISIBLE:
3780
if (ot->ot_for_update) {
3781
/* Substitute this record for the one we
3784
if (result == TRUE) {
3785
if (XT_REC_IS_DELETE(var_head.tr_rec_type_1))
3788
*new_rec_id = var_rec_id;
3795
/* Ignore the record, it will be removed. */
3797
case XT_XN_MY_UPDATE:
3798
#ifdef TRACE_VARIATIONS
3799
xt_ttracef(thread, "row=%d rec=%d NOT VISI my update found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3802
case XT_XN_OTHER_UPDATE:
3803
/* Wait for this update to commit or abort: */
3808
#ifdef TRACE_VARIATIONS
3810
len += sprintf(t_buf+len, "-T%d", (int) wait_xn_id);
3815
* TODO: This is not as "correct" as it could be.
3816
* Such records should be considered to be aborted,
3817
* and removed from the list.
3819
if (invalid_rec != var_rec_id) {
3820
invalid_rec = var_rec_id;
3823
if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 1))
3826
/* Assume end of list. */
3827
#ifdef XT_CRASH_DEBUG
3828
/* Should not happen! */
3833
var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
3835
#ifdef TRACE_VARIATIONS
3837
sprintf(t_buf+len, " -> %d(%d)\n", (int) var_rec_id, (int) rec_head->tr_rec_type_1);
3839
sprintf(t_buf+len, " ...\n");
3840
//xt_ttracef(thread, "%s", t_buf);
3843
if (ot->ot_for_update) {
3848
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3849
#ifdef TRACE_VARIATIONS
3850
xt_ttracef(thread, "T%d WAIT FOR T%d (will retry)\n", (int) thread->st_xact_data->xd_start_xn_id, (int) wait_xn_id);
3852
if (!tab_wait_for_update(ot, row_id, wait_xn_id, thread))
3857
* Retry in order to try to avoid missing
3858
* any records that we should see in FOR UPDATE
3861
* We also want to take another look at the record
3862
* we just tried to read.
3864
* If it has been updated, then a new record has
3865
* been created. This will be detected when we
3866
* try to read it again, and XT_NEW will be returned.
3868
thread->st_statistics.st_retry_index_scan++;
3872
/* {ROW-LIST-LOCK} */
3873
lw.lw_thread = thread;
3875
lw.lw_row_id = row_id;
3876
lw.lw_row_updated = FALSE;
3877
ok = tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list);
3878
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3880
#ifdef DEBUG_LOCK_QUEUE
3881
ot->ot_table->tab_locks.rl_check(&lw);
3885
if (lw.lw_curr_lock != XT_NO_LOCK) {
3886
#ifdef TRACE_VARIATIONS
3887
xt_ttracef(thread, "T%d WAIT FOR LOCK(%s) T%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) lw.lw_curr_lock == XT_TEMP_LOCK ? "temp" : "perm", (int) xn_id);
3889
if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
3890
#ifdef DEBUG_LOCK_QUEUE
3891
ot->ot_table->tab_locks.rl_check(&lw);
3895
#ifdef DEBUG_LOCK_QUEUE
3896
ot->ot_table->tab_locks.rl_check(&lw);
3898
#ifdef TRACE_VARIATIONS
3899
len = sprintf(t_buf, "(retry): row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
3902
* Reset the result before we go down the list again, to make sure we
3903
* get the latest record!!
3906
thread->st_statistics.st_reread_record_list++;
3909
#ifdef DEBUG_LOCK_QUEUE
3910
ot->ot_table->tab_locks.rl_check(&lw);
3914
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3917
#ifdef TRACE_VARIATIONS
3918
if (result == XT_NEW)
3919
xt_ttracef(thread, "row=%d rec=%d RETURN NEW %d\n", (int) row_id, (int) ot->ot_curr_rec_id, (int) *new_rec_id);
3921
xt_ttracef(thread, "row=%d rec=%d VISIBLE\n", (int) row_id, (int) ot->ot_curr_rec_id);
3923
xt_ttracef(thread, "row=%d rec=%d RETURN NOT VISIBLE (NEW)\n", (int) row_id, (int) ot->ot_curr_rec_id);
3926
ot->ot_curr_row_id = row_id;
3927
ot->ot_curr_updated = FALSE;
3931
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3935
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3940
* Return TRUE if the record has been read, and is visible.
3941
* Return FALSE if the record is not visible.
3942
* Return XT_ERR if an error occurs.
3944
xtPublic int xt_tab_visible(XTOpenTablePtr ot)
3947
XTTabRecHeadDRec rec_head;
3948
xtRecordID new_rec_id;
3949
xtBool read_again = FALSE;
3952
if ((row_id = ot->ot_curr_row_id)) {
3953
/* Fast track, do a quick check.
3954
* Row ID is only set if this record has been committed,
3956
* Check if it is the first on the list!
3958
xtRecordID var_rec_id;
3961
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3963
if (ot->ot_curr_rec_id == var_rec_id) {
3965
if (ot->ot_for_update) {
3966
XTThreadPtr thread = ot->ot_thread;
3967
XTTableHPtr tab = ot->ot_table;
3970
/* {ROW-LIST-LOCK} */
3971
lw.lw_thread = thread;
3973
lw.lw_row_id = row_id;
3974
lw.lw_row_updated = FALSE;
3975
if (!tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list)) {
3976
#ifdef DEBUG_LOCK_QUEUE
3977
ot->ot_table->tab_locks.rl_check(&lw);
3981
if (lw.lw_curr_lock != XT_NO_LOCK) {
3982
if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
3983
#ifdef DEBUG_LOCK_QUEUE
3984
ot->ot_table->tab_locks.rl_check(&lw);
3988
#ifdef DEBUG_LOCK_QUEUE
3989
ot->ot_table->tab_locks.rl_check(&lw);
3993
#ifdef DEBUG_LOCK_QUEUE
3994
ot->ot_table->tab_locks.rl_check(&lw);
4002
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
4005
switch ((r = tab_visible(ot, &rec_head, &new_rec_id))) {
4007
ot->ot_curr_rec_id = new_rec_id;
4010
/* Avoid infinite loop: */
4012
/* Should not happen! */
4013
if (!tab_record_corrupt(ot, row_id, ot->ot_curr_rec_id, true, 2))
4015
#ifdef XT_CRASH_DEBUG
4016
/* Generate a core dump! */
4030
* Read a record, and return one of the following:
4031
* TRUE - the record has been read, and is visible.
4032
* FALSE - the record is not visible.
4033
* XT_ERR - an error occurs.
4034
* XT_NEW - Means the expected record has been changed.
4035
* When doing an index scan, the conditions must be checked again!
4037
xtPublic int xt_tab_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
4039
register XTTableHPtr tab = ot->ot_table;
4040
size_t rec_size = tab->tab_dic.dic_rec_size;
4041
xtRecordID new_rec_id;
4043
xtBool read_again = FALSE;
4045
if (!(ot->ot_thread->st_xact_data)) {
4046
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
4051
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
4054
switch (tab_visible(ot, (XTTabRecHeadDPtr) ot->ot_row_rbuffer, &new_rec_id)) {
4060
if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
4062
ot->ot_curr_rec_id = new_rec_id;
4068
/* Avoid infinite loop: */
4070
/* Should not happen! */
4071
if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4), ot->ot_curr_rec_id, true, 3))
4073
#ifdef XT_CRASH_DEBUG
4074
/* Generate a core dump! */
4086
if (ot->ot_rec_fixed)
4087
memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
4088
else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
4089
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
4093
u_int cols_req = ot->ot_cols_req;
4095
ASSERT_NS(cols_req);
4096
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
4097
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
4101
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
4112
* TRUE/OK - record was read.
4113
* FALSE/FAILED - An error occurred.
4115
xtPublic int xt_tab_dirty_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
4117
register XTTableHPtr tab = ot->ot_table;
4118
size_t rec_size = tab->tab_dic.dic_rec_size;
4120
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
4123
if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
4124
/* Should not happen! */
4125
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_DELETED);
4129
ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
4130
ot->ot_curr_updated =
4131
(XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
4133
if (ot->ot_rec_fixed)
4134
memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
4135
else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
4136
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
4140
u_int cols_req = ot->ot_cols_req;
4142
ASSERT_NS(cols_req);
4143
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
4144
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
4148
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
4156
#ifdef XT_USE_ROW_REC_MMAP_FILES
4157
/* Loading into cache is not required,
4158
* Instead we copy the memory map to load the
4161
#define TAB_ROW_LOAD_CACHE FALSE
4163
#define TAB_ROW_LOAD_CACHE TRUE
4167
* Pull the entire row pointer file into memory.
4169
xtPublic void xt_tab_load_row_pointers(XTThreadPtr self, XTOpenTablePtr ot)
4171
XTTableHPtr tab = ot->ot_table;
4172
xtRecordID eof_rec_id = tab->tab_row_eof_id;
4174
xtWord1 *buffer = NULL;
4176
/* Check if there is enough cache: */
4177
usage = xt_tc_get_usage();
4178
if (xt_tc_get_high() > usage)
4179
usage = xt_tc_get_high();
4180
if (usage + ((xtInt8) eof_rec_id * (xtInt8) tab->tab_rows.tci_rec_size) < xt_tc_get_size()) {
4182
size_t poffset, tfer;
4183
off_t offset, end_offset;
4184
XTTabCachePagePtr page;
4186
end_offset = xt_row_id_to_row_offset(tab, eof_rec_id);
4188
while (rec_id < eof_rec_id) {
4189
if (!tab->tab_rows.xt_tc_get_page(ot->ot_row_file, rec_id, TAB_ROW_LOAD_CACHE, &page, &poffset, self))
4192
tab->tab_rows.xt_tc_release_page(ot->ot_row_file, page, self);
4197
buffer = (xtWord1 *) xt_malloc(self, tab->tab_rows.tci_page_size);
4198
offset = xt_row_id_to_row_offset(tab, rec_id);
4199
tfer = tab->tab_rows.tci_page_size;
4200
if (offset + (off_t) tfer > end_offset)
4201
tfer = (size_t) (end_offset - offset);
4202
XT_LOCK_MEMORY_PTR(buff_ptr, ot->ot_row_file, offset, tfer, &self->st_statistics.st_rec, self);
4204
memcpy(buffer, buff_ptr, tfer);
4205
XT_UNLOCK_MEMORY_PTR(ot->ot_row_file, buff_ptr, self);
4208
rec_id += tab->tab_rows.tci_rows_per_page;
4211
xt_free(self, buffer);
4215
xtPublic void xt_tab_load_table(XTThreadPtr self, XTOpenTablePtr ot)
4217
xt_load_pages(self, ot);
4218
xt_load_indices(self, ot);
4221
xtPublic xtBool xt_tab_load_record(register XTOpenTablePtr ot, xtRecordID rec_id, XTInfoBufferPtr rec_buf)
4223
register XTTableHPtr tab = ot->ot_table;
4224
size_t rec_size = tab->tab_dic.dic_rec_size;
4226
if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
4229
if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
4230
/* Should not happen! */
4231
XTThreadPtr self = ot->ot_thread;
4233
xt_log(XT_WARNING, "Recently updated record invalid\n");
4237
ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
4238
ot->ot_curr_updated =
4239
(XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
4241
if (ot->ot_rec_fixed) {
4242
size_t size = rec_size - XT_REC_FIX_HEADER_SIZE;
4243
if (!xt_ib_alloc(NULL, rec_buf, size))
4245
memcpy(rec_buf->ib_db.db_data, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, size);
4248
if (!xt_ib_alloc(NULL, rec_buf, tab->tab_dic.dic_mysql_buf_size))
4250
if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
4251
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_buf->ib_db.db_data, ot->ot_cols_req))
4255
u_int cols_req = ot->ot_cols_req;
4257
ASSERT_NS(cols_req);
4258
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
4259
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_buf->ib_db.db_data, cols_req))
4263
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, rec_buf->ib_db.db_data, cols_req))
4272
xtPublic xtBool xt_tab_free_row(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
4274
XTTabRowRefDRec free_row;
4278
ASSERT_NS(row_id); // Cannot free the header!
4280
xt_lock_mutex_ns(&tab->tab_row_lock);
4281
prev_row = tab->tab_row_free_id;
4282
XT_SET_DISK_4(free_row.rr_ref_id_4, prev_row);
4283
if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, &op_seq, TRUE, ot->ot_thread)) {
4284
xt_unlock_mutex_ns(&tab->tab_row_lock);
4287
tab->tab_row_free_id = row_id;
4288
tab->tab_row_fnum++;
4289
ASSERT_NS(tab->tab_row_fnum < tab->tab_row_eof_id);
4290
xt_unlock_mutex_ns(&tab->tab_row_lock);
4292
if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_ROW_FREED, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, ot->ot_thread))
4298
static void tab_free_ext_record_on_fail(XTOpenTablePtr ot, xtRecordID rec_id, XTTabRecExtDPtr ext_rec, xtBool log_err)
4300
xtWord4 log_over_size = XT_GET_DISK_4(ext_rec->re_log_dat_siz_4);
4302
xtLogOffset log_offset;
4304
XT_GET_LOG_REF(log_id, log_offset, ext_rec);
4306
if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
4307
xt_tab_free_ext_slot(ot->ot_table, log_id, log_offset, log_over_size);
4309
if (!ot->ot_thread->st_dlog_buf.dlb_delete_log(log_id, log_offset, log_over_size, ot->ot_table->tab_id, rec_id, ot->ot_thread)) {
4311
xt_log_and_clear_exception_ns();
4316
static void tab_save_exception(XTExceptionPtr e)
4318
XTThreadPtr self = xt_get_self();
4320
*e = self->t_exception;
4323
static void tab_restore_exception(XTExceptionPtr e)
4325
XTThreadPtr self = xt_get_self();
4327
self->t_exception = *e;
4331
* This function assumes that a record may be partially written.
4332
* It removes all associated data and references to the record.
4334
* This function return XT_ERR if an error occurs.
4335
* TRUE if the record has been removed, and may be freed.
4336
* FALSE if the record has already been freed.
4339
xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 *rec_data, xtRecordID *prev_var_id, xtBool clean_delete, xtRowID row_id, xtXactID XT_UNUSED(xn_id))
4341
register XTTableHPtr tab = ot->ot_table;
4343
xtWord1 old_rec_type;
4345
u_int cols_in_buffer;
4353
* NOTE: This function uses the read buffer. This should be OK because
4354
* the function is only called by the sweeper. The read buffer
4355
* is REQUIRED because of the call to xt_tab_load_ext_data()!!!
4357
rec_size = tab->tab_dic.dic_rec_size;
4358
if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
4360
old_rec_type = ot->ot_row_rbuffer[0];
4362
/* Check of the record has not already been freed: */
4363
if (XT_REC_IS_FREE(old_rec_type))
4366
/* This record must belong to the given row: */
4367
if (XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_row_id_4) != row_id)
4370
/* The transaction ID of the record must be BEFORE or equal to the given
4373
* No, this does not always hold. Because we wait for updates now,
4374
* a "younger" transaction can update before an older
4376
* Commit order determined the actual order in which the transactions
4377
* should be replicated. This is determined by the log number of
4378
* the commit record!
4379
if (db->db_xn_curr_id(xn_id, XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_xact_id_4)))
4383
*prev_var_id = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_prev_rec_id_4);
4385
if (tab->tab_dic.dic_key_count) {
4388
switch (old_rec_type) {
4389
case XT_TAB_STATUS_DELETE:
4390
case XT_TAB_STATUS_DEL_CLEAN:
4391
rec_size = sizeof(XTTabRecHeadDRec);
4393
case XT_TAB_STATUS_FIXED:
4394
case XT_TAB_STATUS_FIX_CLEAN:
4395
/* We know that for a fixed length record,
4396
* dic_ind_rec_len <= dic_rec_size! */
4397
rec_size = (size_t) tab->tab_dic.dic_ind_rec_len + XT_REC_FIX_HEADER_SIZE;
4398
rec_data = ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE;
4400
case XT_TAB_STATUS_VARIABLE:
4401
case XT_TAB_STATUS_VAR_CLEAN:
4402
cols_req = tab->tab_dic.dic_ind_cols_req;
4404
cols_in_buffer = cols_req;
4405
rec_size = myxt_load_row_length(ot, rec_size - XT_REC_FIX_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, &cols_in_buffer);
4406
if (cols_in_buffer < cols_req)
4407
rec_size = tab->tab_dic.dic_rec_size;
4409
rec_size += XT_REC_FIX_HEADER_SIZE;
4410
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_data, cols_req)) {
4411
xt_log_and_clear_exception_ns();
4415
case XT_TAB_STATUS_EXT_DLOG:
4416
case XT_TAB_STATUS_EXT_CLEAN:
4417
cols_req = tab->tab_dic.dic_ind_cols_req;
4419
ASSERT_NS(cols_req);
4420
cols_in_buffer = cols_req;
4421
rec_size = myxt_load_row_length(ot, rec_size - XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, &cols_in_buffer);
4422
if (cols_in_buffer < cols_req) {
4423
rec_size = tab->tab_dic.dic_rec_size;
4424
if (!xt_tab_load_ext_data(ot, rec_id, rec_data, cols_req)) {
4425
/* This is actually quite possible after recovery, see [(3)] */
4426
if (ot->ot_thread->t_exception.e_xt_err != XT_ERR_BAD_EXT_RECORD &&
4427
ot->ot_thread->t_exception.e_xt_err != XT_ERR_DATA_LOG_NOT_FOUND)
4428
xt_log_and_clear_exception_ns();
4433
/* All the records we require are in the buffer... */
4434
rec_size += XT_REC_EXT_HEADER_SIZE;
4435
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_data, cols_req)) {
4436
xt_log_and_clear_exception_ns();
4445
/* Could this be the case?: This change may only be flushed after the
4446
* operation below has been flushed to the log.
4448
* No, remove records are never "undone". The sweeper will delete
4449
* the record again if it does not land in the log.
4451
* The fact that the index entries have already been removed is not
4454
if (!tab->tab_dic.dic_disable_index) {
4455
ind = tab->tab_dic.dic_keys;
4456
for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
4457
if (!xt_idx_delete(ot, *ind, rec_id, rec_data))
4458
xt_log_and_clear_exception_ns();
4464
switch (old_rec_type) {
4465
case XT_TAB_STATUS_DELETE:
4466
case XT_TAB_STATUS_DEL_CLEAN:
4467
rec_size = XT_REC_FIX_HEADER_SIZE;
4469
case XT_TAB_STATUS_FIXED:
4470
case XT_TAB_STATUS_FIX_CLEAN:
4471
case XT_TAB_STATUS_VARIABLE:
4472
case XT_TAB_STATUS_VAR_CLEAN:
4473
rec_size = XT_REC_FIX_HEADER_SIZE;
4475
case XT_TAB_STATUS_EXT_DLOG:
4476
case XT_TAB_STATUS_EXT_CLEAN:
4477
rec_size = XT_REC_EXT_HEADER_SIZE;
4483
if (XT_REC_IS_EXT_DLOG(old_rec_type)) {
4484
/* {LOCK-EXT-REC} Lock, and read again to make sure that the
4485
* compactor does not change this record, while
4486
* we are removing it! */
4487
xt_lock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4488
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer)) {
4489
xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4492
xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4497
/* A record is "clean" deleted if the record was
4498
* XT_TAB_STATUS_DELETE which was comitted.
4499
* This makes sure that the record will still invalidate
4500
* following records in a row.
4504
* 1. INSERT A ROW, then DELETE it, assume the sweeper is delayed.
4506
* We now have the sequence row X --> del rec A --> valid rec B.
4508
* 2. A SELECT can still find B. Assume it now goes to check
4509
* if the record is valid, it reads row X, and gets A.
4511
* 3. Now the sweeper gets control and removes X, A and B.
4512
* It frees A with the clean bit.
4514
* 4. Now the SELECT gets control and reads A. Normally a freed record
4515
* would be ignored, and it would go onto B, which would then
4516
* be considered valid (note, even after the free, the next
4517
* pointer is not affected).
4519
* However, because the clean bit has been set, it will stop at A
4520
* and consider B invalid (which is the desired result).
4522
* NOTE: We assume it is not possible for A to be allocated and refer
4523
* to B, because B is freed before A. This means that B may refer to
4524
* A after the next allocation.
4528
XTTabRecFreeDPtr free_rec = (XTTabRecFreeDPtr) ot->ot_row_rbuffer;
4529
xtRecordID prev_rec_id;
4530
xtWord1 new_rec_type = XT_TAB_STATUS_FREED | (clean_delete ? XT_TAB_STATUS_CLEANED_BIT : 0);
4531
u_int status = XT_LOG_ENT_REC_REMOVED_BI;
4533
xt_lock_mutex_ns(&tab->tab_rec_lock);
4534
free_rec->rf_rec_type_1 = new_rec_type;
4535
#ifdef XT_CLUSTER_FREE_RECORDS
4536
XTTabCachePagePtr page;
4539
if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
4540
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4544
if (page->tcp_free_rec == 0xFFFF) {
4545
/* There is no free record on this page. */
4546
prev_rec_id = tab->tab_rec_free_id;
4547
XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
4548
memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
4549
tab->tab_rec_free_id = rec_id;
4552
XTTabRecFreeDPtr prev_free_rec = (XTTabRecFreeDPtr) (page->tcp_data + page->tcp_free_rec);
4554
status = XT_LOG_ENT_REC_REMOVED_BI_L;
4555
XT_COPY_DISK_4(free_rec->rf_next_rec_id_4, prev_free_rec->rf_next_rec_id_4);
4556
memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
4558
/* The previous now references the next: */
4559
XT_SET_DISK_4(prev_free_rec->rf_next_rec_id_4, rec_id);
4561
/* This is the record ID of the previous record: */
4562
ASSERT_NS((page->tcp_free_rec % tab->tab_recs.tci_rec_size) == 0);
4563
prev_rec_id = (page->tcp_page_idx * tab->tab_recs.tci_rows_per_page) + (page->tcp_free_rec / tab->tab_recs.tci_rec_size) + 1;
4564
ASSERT_NS(prev_rec_id != rec_id);
4567
/* Link after this page in future! */
4568
ASSERT_NS((offset % tab->tab_recs.tci_rec_size) == 0);
4569
page->tcp_free_rec = offset;
4570
tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
4572
prev_rec_id = tab->tab_rec_free_id;
4573
XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
4574
if (!xt_tab_put_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), ot->ot_row_rbuffer, &op_seq)) {
4575
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4578
tab->tab_rec_free_id = rec_id;
4579
ASSERT_NS(tab->tab_rec_free_id < tab->tab_rec_eof_id);
4581
tab->tab_rec_fnum++;
4582
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4584
free_rec->rf_rec_type_1 = old_rec_type;
4585
if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, new_rec_type, prev_rec_id, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread))
4590
static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab)
4594
xtRowID next_row_id = 0;
4597
xt_lock_mutex_ns(&tab->tab_row_lock);
4598
if ((row_id = tab->tab_row_free_id)) {
4599
status = XT_LOG_ENT_ROW_NEW_FL;
4601
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) {
4602
xt_unlock_mutex_ns(&tab->tab_row_lock);
4605
tab->tab_row_free_id = next_row_id;
4606
ASSERT_NS(tab->tab_row_fnum > 0);
4607
tab->tab_row_fnum--;
4610
status = XT_LOG_ENT_ROW_NEW;
4611
row_id = tab->tab_row_eof_id;
4612
if (row_id == 0xFFFFFFFF) {
4613
xt_unlock_mutex_ns(&tab->tab_row_lock);
4614
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_MAX_ROW_COUNT);
4617
if (((row_id - 1) % tab->tab_rows.tci_rows_per_page) == 0) {
4618
/* By fetching the page now, we avoid reading it later... */
4619
XTTabCachePagePtr page;
4620
XTTabCacheSegPtr seg;
4623
if (!tab->tab_rows.tc_fetch(ot->ot_row_file, row_id, &seg, &page, &poffset, FALSE, ot->ot_thread)) {
4624
xt_unlock_mutex_ns(&tab->tab_row_lock);
4627
TAB_CAC_UNLOCK(&seg->tcs_lock, ot->ot_thread->t_id);
4629
tab->tab_row_eof_id++;
4631
op_seq = tab->tab_seq.ts_get_op_seq();
4632
xt_unlock_mutex_ns(&tab->tab_row_lock);
4634
if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_row_id, row_id, 0, NULL, ot->ot_thread))
4637
XT_DISABLED_TRACE(("new row tx=%d row=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) row_id));
4642
xtPublic xtBool xt_tab_get_row(register XTOpenTablePtr ot, xtRowID row_id, xtRecordID *var_rec_id)
4644
register XTTableHPtr tab = ot->ot_table;
4646
(void) ASSERT_NS(sizeof(XTTabRowRefDRec) == 4);
4648
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, var_rec_id, ot->ot_thread))
4653
xtPublic xtBool xt_tab_set_row(XTOpenTablePtr ot, u_int status, xtRowID row_id, xtRecordID var_rec_id)
4655
register XTTableHPtr tab = ot->ot_table;
4656
XTTabRowRefDRec row_buf;
4659
ASSERT_NS(var_rec_id < tab->tab_rec_eof_id);
4660
XT_SET_DISK_4(row_buf.rr_ref_id_4, var_rec_id);
4662
if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, &op_seq, TRUE, ot->ot_thread))
4665
return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, ot->ot_thread);
4668
static void tab_free_row_on_fail(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
4672
tab_save_exception(&e);
4673
xt_tab_free_row(ot, tab, row_id);
4674
tab_restore_exception(&e);
4677
#ifdef CHECK_CLUSTER_EFFICIENCY
4678
u_int next_on_page = 0;
4679
u_int next_off_page = 0;
4682
static xtBool tab_write_ext_record(XTOpenTablePtr XT_UNUSED(ot), XTTableHPtr tab, XTTabRecInfoPtr rec_info, xtRecordID rec_id, xtLogID log_id, xtLogOffset log_offset, XTThreadPtr thread)
4684
xtWord1 tmp_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
4687
memcpy(tmp_buffer, rec_info->ri_log_buf, sizeof(tmp_buffer));
4688
rec_info->ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
4689
XT_SET_DISK_4(rec_info->ri_log_buf->er_data_size_4, rec_info->ri_log_data_size);
4690
XT_SET_DISK_4(rec_info->ri_log_buf->er_tab_id_4, tab->tab_id);
4691
XT_SET_DISK_4(rec_info->ri_log_buf->er_rec_id_4, rec_id);
4692
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
4693
ok = xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf);
4695
ok = thread->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf, thread);
4696
memcpy(rec_info->ri_log_buf, tmp_buffer, sizeof(tmp_buffer));
4700
static xtBool tab_add_record(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, u_int status)
4702
register XTTableHPtr tab = ot->ot_table;
4703
XTThreadPtr thread = ot->ot_thread;
4706
xtLogOffset log_offset;
4708
xtRecordID next_rec_id = 0;
4710
if (rec_info->ri_ext_rec) {
4711
/* Determine where the overflow will go... */
4712
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
4713
if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
4717
if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), ot->ot_thread))
4720
XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
4723
/* Write the record to disk: */
4724
xt_lock_mutex_ns(&tab->tab_rec_lock);
4725
if ((rec_id = tab->tab_rec_free_id)) {
4726
ASSERT_NS(rec_id < tab->tab_rec_eof_id);
4727
#ifdef XT_CLUSTER_FREE_RECORDS
4728
XTTabCachePagePtr page;
4730
XTTabRecFreeDPtr free_block;
4732
if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
4733
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4737
/* Read the data from the old record: */
4738
free_block = (XTTabRecFreeDPtr) (page->tcp_data + offset);
4739
next_rec_id = XT_GET_DISK_4(free_block->rf_next_rec_id_4);
4741
#ifdef CHECK_CLUSTER_EFFICIENCY
4742
xtRecordID dbg_rec_id;
4744
dbg_rec_id = next_rec_id-1;
4745
if (page->tcp_page_idx == dbg_rec_id / tab->tab_recs.tci_rows_per_page)
4749
if ((next_on_page % 1000) == 0)
4750
printf("Next on: %d off: %d\n", next_on_page, next_off_page);
4753
if (page->tcp_free_rec == offset) {
4754
/* Adjust the free record: */
4755
xtRecordID tmp_rec_id;
4757
/* Check if the next record is on the same page: */
4758
tmp_rec_id = next_rec_id-1;
4759
if (page->tcp_page_idx == tmp_rec_id / tab->tab_recs.tci_rows_per_page)
4760
/* This is the next free record on this page: */
4761
page->tcp_free_rec = (xtWord2) ((tmp_rec_id % tab->tab_recs.tci_rows_per_page) * tab->tab_recs.tci_rec_size);
4763
/* Not on the same page, so there are no more free records on this page: */
4764
page->tcp_free_rec = 0xFFFF;
4767
/* Write the new record: */
4768
memcpy(free_block, rec_info->ri_fix_rec_buf, rec_info->ri_rec_buf_size);
4769
tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
4771
XTTabRecFreeDRec free_block;
4773
if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), (xtWord1 *) &free_block)) {
4774
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4777
next_rec_id = XT_GET_DISK_4(free_block.rf_next_rec_id_4);
4778
if (!xt_tab_put_rec_data(ot, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq)) {
4779
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4783
tab->tab_rec_free_id = next_rec_id;
4784
tab->tab_rec_fnum--;
4786
/* XT_LOG_ENT_UPDATE --> XT_LOG_ENT_UPDATE_FL */
4787
/* XT_LOG_ENT_INSERT --> XT_LOG_ENT_INSERT_FL */
4788
/* XT_LOG_ENT_DELETE --> XT_LOG_ENT_DELETE_FL */
4794
rec_id = tab->tab_rec_eof_id;
4795
tab->tab_rec_eof_id++;
4797
/* If we are writing to a new page (at the EOF)
4798
* then we do not need to read the page from the
4799
* file because it is new.
4801
* Note that this only works because we are holding
4802
* a lock on the record file.
4804
read = ((rec_id - 1) % tab->tab_recs.tci_rows_per_page) != 0;
4806
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq, read, thread)) {
4807
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4811
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4813
if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_rec_id, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, thread))
4816
if (rec_info->ri_ext_rec) {
4817
/* Write the log buffer overflow: */
4818
if (!tab_write_ext_record(ot, tab, rec_info, rec_id, log_id, log_offset, thread))
4822
XT_DISABLED_TRACE(("new rec tx=%d val=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) rec_id));
4823
rec_info->ri_rec_id = rec_id;
4827
static void tab_delete_record_on_fail(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, XTTabRecHeadDPtr XT_UNUSED(row_ptr), xtWord1 *rec_data, u_int key_count)
4830
xtBool log_err = TRUE;
4831
XTTabRecInfoRec rec_info;
4833
tab_save_exception(&e);
4835
if (e.e_xt_err == XT_ERR_DUPLICATE_KEY ||
4836
e.e_xt_err == XT_ERR_DUPLICATE_FKEY) {
4837
/* If the error does not cause rollback, then we will ignore the
4838
* error if an error occurs in the UNDO!
4841
tab_restore_exception(&e);
4846
ind = ot->ot_table->tab_dic.dic_keys;
4847
for (u_int i=0; i<key_count; i++, ind++) {
4848
if (!xt_idx_delete(ot, *ind, rec_id, rec_data)) {
4850
xt_log_and_clear_exception_ns();
4855
/* This is not required because the extended record will be free
4856
* later when the record is freed!
4857
if (row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_DLOG || row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_CLEAN)
4858
tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) row_ptr, log_err);
4861
rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
4862
rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
4863
rec_info.ri_ext_rec = NULL;
4864
rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
4865
rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
4866
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
4867
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, rec_id);
4868
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, ot->ot_thread->st_xact_data->xd_start_xn_id);
4870
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
4873
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
4877
tab_restore_exception(&e);
4882
xt_log_and_clear_exception_ns();
4884
tab_restore_exception(&e);
4888
* Wait until all the variations between the start of the chain, and
4889
* the given record have been rolled-back.
4890
* If any is committed, register a locked error, and return FAILED.
4892
static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordID commit_rec_id)
4894
register XTTableHPtr tab = ot->ot_table;
4895
xtRecordID var_rec_id;
4896
XTTabRecHeadDRec var_head;
4898
xtRecordID invalid_rec = 0;
4902
if (!xt_tab_get_row(ot, row_id, &var_rec_id))
4905
while (var_rec_id != commit_rec_id) {
4908
if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
4910
if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1))
4912
if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) {
4913
/* Should not happen: */
4914
if (!tab_record_corrupt(ot, row_id, var_rec_id, false, 4))
4916
goto record_invalid;
4918
xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
4919
switch (xt_xn_status(ot, xn_id, var_rec_id)) {
4921
case XT_XN_NOT_VISIBLE:
4924
/* Ingore the record, it will be removed. */
4926
case XT_XN_MY_UPDATE:
4927
/* Should not happen: */
4929
case XT_XN_OTHER_UPDATE:
4930
/* Wait for the transaction to commit or rollback: */
4931
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4932
xw.xw_xn_id = xn_id;
4933
if (!xt_xn_wait_for_xact(ot->ot_thread, &xw, NULL)) {
4934
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4937
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4940
if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 5))
4942
goto record_invalid;
4944
var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
4949
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_CHANGED);
4954
/* Prevent an infinite loop due to a bad record: */
4955
if (invalid_rec != var_rec_id) {
4956
invalid_rec = var_rec_id;
4959
/* The record is invalid, it will be "overwritten"... */
4960
#ifdef XT_CRASH_DEBUG
4961
/* Should not happen! */
4967
/* Check if a record may be visible:
4968
* Return TRUE of the record may be visible now.
4969
* Return XT_MAYBE if the record may be visible in the future (set out_xn_id).
4970
* Return FALSE of the record is not valid (freed or is a delete record).
4971
* Return XT_ERR if an error occurred.
4973
xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXactID *out_xn_id, xtRowID *out_rowid, xtBool *out_updated)
4975
XTTabRecHeadDRec rec_head;
4976
xtXactID rec_xn_id = 0;
4977
xtBool wait = FALSE;
4978
xtXactID wait_xn_id = 0;
4979
xtRowID row_id = 0; // Initialized unnecessarily to satisfy (Drizzle) compile [-Wuninitialized]
4980
xtRecordID var_rec_id;
4982
register XTTableHPtr tab = NULL; // Initialized unnecessarily to satisfy (Drizzle) compile [-Wuninitialized]
4983
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4988
xtRecordID invalid_rec = 0;
4991
if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
4994
if (XT_REC_NOT_VALID(rec_head.tr_rec_type_1))
4997
if (!XT_REC_IS_CLEAN(rec_head.tr_rec_type_1)) {
4998
rec_xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
4999
switch (xt_xn_status(ot, rec_xn_id, rec_id)) {
5001
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5005
case XT_XN_NOT_VISIBLE:
5006
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5012
case XT_XN_MY_UPDATE:
5013
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5017
case XT_XN_OTHER_UPDATE:
5018
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5022
wait_xn_id = rec_xn_id;
5025
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5029
/* Avoid infinite loop: */
5030
if (invalid_rec == rec_id) {
5031
/* Should not happen! */
5032
if (!tab_record_corrupt(ot, XT_GET_DISK_4(rec_head.tr_row_id_4), rec_id, true, 6))
5034
#ifdef XT_CRASH_DEBUG
5035
/* Generate a core dump! */
5040
invalid_rec = rec_id;
5045
/* Follow the variation chain until we come to this record.
5046
* If it is not the first visible variation then
5047
* it is not visible at all. If it in not found on the
5048
* variation chain, it is also not visible.
5050
row_id = XT_GET_DISK_4(rec_head.tr_row_id_4);
5053
XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5057
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
5059
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5060
len = sprintf(t_buf, "dup row=%d", (int) row_id);
5062
while (var_rec_id != rec_id) {
5065
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5067
len += sprintf(t_buf+len, " -> %d", (int) var_rec_id);
5069
if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
5071
/* All clean records are visible, by all transactions: */
5072
if (XT_REC_IS_CLEAN(rec_head.tr_rec_type_1))
5075
if (XT_REC_IS_FREE(rec_head.tr_rec_type_1)) {
5076
/* Should not happen: */
5077
if (invalid_rec != var_rec_id) {
5078
invalid_rec = var_rec_id;
5081
/* Assume end of list. */
5082
#ifdef XT_CRASH_DEBUG
5083
/* Should not happen! */
5089
xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
5090
switch (xt_xn_status(ot, xn_id, var_rec_id)) {
5092
case XT_XN_NOT_VISIBLE:
5095
/* Ingore the record, it will be removed. */
5096
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5098
len += sprintf(t_buf+len, "(T%d-A)", (int) xn_id);
5101
case XT_XN_MY_UPDATE:
5103
case XT_XN_OTHER_UPDATE:
5104
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5106
len += sprintf(t_buf+len, "(T%d-wait)", (int) xn_id);
5108
/* Wait for this update to commit or abort: */
5116
if (invalid_rec != var_rec_id) {
5117
invalid_rec = var_rec_id;
5120
/* Assume end of list. */
5121
if (!tab_record_corrupt(ot, row_id, invalid_rec, true, 7))
5123
#ifdef XT_CRASH_DEBUG
5124
/* Should not happen! */
5129
var_rec_id = XT_GET_DISK_4(rec_head.tr_prev_rec_id_4);
5131
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5133
sprintf(t_buf+len, " -> %d(T%d-%s)\n", (int) var_rec_id, (int) rec_xn_id, t_type);
5135
sprintf(t_buf+len, " ...(T%d-%s)\n", (int) rec_xn_id, t_type);
5138
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5140
*out_xn_id = wait_xn_id;
5143
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5144
xt_ttracef(thread, "%s", t_buf);
5147
*out_rowid = row_id;
5148
*out_updated = (rec_xn_id == ot->ot_thread->st_xact_data->xd_start_xn_id);
5153
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5157
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5161
xtPublic xtBool xt_tab_new_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
5163
register XTTableHPtr tab = ot->ot_table;
5164
register XTThreadPtr self = ot->ot_thread;
5165
XTTabRecInfoRec rec_info;
5170
/* A non-temporary table has been updated: */
5171
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5172
self->st_non_temp_updated = TRUE;
5174
if (!myxt_store_row(ot, &rec_info, (char *) rec_buf))
5177
/* Get a new row ID: */
5178
if (!(row_id = tab_new_row(ot, tab)))
5181
rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5182
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5183
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, 0);
5184
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5186
/* Note, it is important that this record is written BEFORE the row
5187
* due to the problem distributed here [(5)]
5189
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_INSERT))
5192
#ifdef TRACE_VARIATIONS
5193
xt_ttracef(self, "insert: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
5195
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5197
XT_DISABLED_TRACE(("set new tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5199
/* Add the index references: */
5200
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5201
if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, rec_buf, NULL, FALSE)) {
5202
ot->ot_err_index_no = (*ind)->mi_index_no;
5207
/* Do the foreign key stuff: */
5208
if (ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5209
if (!ot->ot_table->tab_dic.dic_table->insertRow(ot, rec_buf))
5213
self->st_statistics.st_row_insert++;
5217
/* Once the row has been inserted, it is to late to remove it!
5218
* Now all we can do is delete it!
5220
tab_delete_record_on_fail(ot, row_id, rec_info.ri_rec_id, (XTTabRecHeadDPtr) rec_info.ri_fix_rec_buf, rec_buf, idx_cnt);
5224
tab_free_row_on_fail(ot, tab, row_id);
5230
/* We cannot remove a change we have made to a row while a transaction
5231
* is running, so we have to undo what we have done by
5232
* overwriting the record we just created with
5235
static xtBool tab_overwrite_record_on_fail(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, xtWord1 *before_buf, xtWord1 *after_buf, u_int idx_cnt)
5237
register XTTableHPtr tab = ot->ot_table;
5238
XTTabRecHeadDRec prev_rec_head;
5241
XTThreadPtr thread = ot->ot_thread;
5243
xtLogOffset log_offset;
5244
xtRecordID rec_id = rec_info->ri_rec_id;
5246
/* Remove the new extended record: */
5247
if (rec_info->ri_ext_rec)
5248
tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info->ri_fix_rec_buf, TRUE);
5250
/* Undo index entries of the new record: */
5252
for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
5253
if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
5258
memcpy(&prev_rec_head, rec_info->ri_fix_rec_buf, sizeof(XTTabRecHeadDRec));
5261
/* Can happen if the delete was called from some cascaded action.
5262
* And this is better than a crash...
5264
* TODO: to make sure the change will not be applied in case the
5265
* transaction will be commited, we'd need to add a log entry to
5266
* restore the record like it's done for top-level operation. In
5267
* order to do this we'd need to read the before-image of the
5268
* record before modifying it.
5270
if (!thread->t_exception.e_xt_err)
5271
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_BEFORE_IMAGE);
5275
/* Restore the previous record! */
5276
if (!myxt_store_row(ot, rec_info, (char *) before_buf))
5279
memcpy(rec_info->ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
5281
if (rec_info->ri_ext_rec) {
5282
/* Determine where the overflow will go... */
5283
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5284
if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
5288
if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), thread))
5291
XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
5294
if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf))
5297
if (rec_info->ri_ext_rec) {
5298
/* Write the log buffer overflow: */
5299
if (!tab_write_ext_record(ot, tab, rec_info, rec_id, log_id, log_offset, thread))
5303
/* Put the index entries back: */
5304
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5305
if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
5306
/* Incomplete restore, there will be a rollback... */
5315
* If a transaction updates the same record over again, we should update
5316
* in place. This prevents producing unnecessary variations!
5318
static xtBool tab_overwrite_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
5320
register XTTableHPtr tab = ot->ot_table;
5321
xtRowID row_id = ot->ot_curr_row_id;
5322
register XTThreadPtr self = ot->ot_thread;
5323
xtRecordID rec_id = ot->ot_curr_rec_id;
5324
XTTabRecExtDRec prev_rec_head;
5325
XTTabRecInfoRec rec_info;
5326
u_int idx_cnt = 0, i;
5329
xtLogOffset log_offset;
5330
xtBool prev_ext_rec;
5332
/* A non-temporary table has been updated: */
5333
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5334
self->st_non_temp_updated = TRUE;
5336
if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
5339
/* Read before we overwrite! */
5340
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &prev_rec_head))
5343
prev_ext_rec = prev_rec_head.tr_rec_type_1 & XT_TAB_STATUS_EXT_DLOG;
5345
if (rec_info.ri_ext_rec) {
5346
/* Determine where the overflow will go... */
5347
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5348
if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size))
5352
if (!self->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, self))
5355
XT_SET_LOG_REF(rec_info.ri_ext_rec, log_id, log_offset);
5358
rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5359
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5360
XT_COPY_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, prev_rec_head.tr_prev_rec_id_4);
5361
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5363
/* Remove the index references, that have changed: */
5364
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5365
if (!xt_idx_delete(ot, *ind, rec_id, before_buf)) {
5370
#ifdef TRACE_VARIATIONS
5371
xt_ttracef(self, "overwrite: row=%d rec=%d T%d\n", (int) row_id, (int) rec_id, (int) self->st_xact_data->xd_start_xn_id);
5373
/* Overwrite the record: */
5374
if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
5377
if (rec_info.ri_ext_rec) {
5378
/* Write the log buffer overflow: */
5379
if (!tab_write_ext_record(ot, tab, &rec_info, rec_id, log_id, log_offset, self))
5383
/* Add the index references that have changed: */
5384
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5385
if (!xt_idx_insert(ot, *ind, 0, rec_id, after_buf, before_buf, FALSE)) {
5386
ot->ot_err_index_no = (*ind)->mi_index_no;
5391
/* Do the foreign key stuff: */
5392
if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5393
if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
5397
/* Delete the previous overflow area: */
5399
tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
5404
/* Remove the new extended record: */
5405
if (rec_info.ri_ext_rec)
5406
tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info.ri_fix_rec_buf, TRUE);
5408
/* Restore the previous record! */
5409
/* Undo index entries: */
5410
for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
5411
if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
5415
/* Restore the record: */
5416
if (!myxt_store_row(ot, &rec_info, (char *) before_buf))
5419
if (rec_info.ri_ext_rec)
5420
memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, XT_REC_EXT_HEADER_SIZE);
5422
memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
5424
if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
5427
/* Put the index entries back: */
5428
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5429
if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
5430
/* Incomplete restore, there will be a rollback... */
5434
/* The previous record has now been restored. */
5438
/* The old record is overwritten, I must free the previous extended record: */
5440
tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
5446
xtPublic xtBool xt_tab_update_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
5448
register XTTableHPtr tab;
5450
register XTThreadPtr self;
5451
xtRecordID curr_var_rec_id;
5452
XTTabRecInfoRec rec_info;
5457
* Originally only the flag ot->ot_curr_updated was checked, and if it was on, then
5458
* tab_overwrite_record() was called, but this caused crashes in some cases like:
5460
* set @@autocommit = 0;
5461
* create table t1 (s1 int primary key);
5462
* create table t2 (s1 int primary key, foreign key (s1) references t1 (s1) on update cascade);
5463
* insert into t1 values (1);
5464
* insert into t2 values (1);
5465
* update t1 set s1 = 1;
5467
* the last update lead to a crash on t2 cascade update because before_buf argument is NULL
5468
* in the call below. It is NULL only during cascade update of child table. In that case we
5469
* cannot pass before_buf value from XTDDTableRef::modifyRow as the before_buf is the original
5470
* row for the parent (t1) table and it would be used to update any existing indexes
5471
* in the child table which would be wrong of course.
5473
* Alternative solution would be to copy the after_info in the XTDDTableRef::modifyRow():
5476
* if (!xt_tab_load_record(ot, ot->ot_curr_rec_id, &after_info))
5480
* here the xt_tab_load_record() loads the original row, so we can copy it from there, but in
5481
* that case we'd need to allocate a new (possibly up to 65536 bytes long) buffer, which makes
5482
* the optimization questionable
5485
if (ot->ot_curr_updated && before_buf)
5486
/* This record has already been updated by this transaction.
5487
* Do the update in place!
5489
return tab_overwrite_record(ot, before_buf, after_buf);
5492
row_id = ot->ot_curr_row_id;
5493
self = ot->ot_thread;
5495
/* A non-temporary table has been updated: */
5496
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5497
self->st_non_temp_updated = TRUE;
5499
if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
5502
rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5503
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5504
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
5505
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5507
/* Create the new record: */
5508
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_UPDATE))
5511
/* Link the new variation into the list: */
5512
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
5514
if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
5517
if (curr_var_rec_id != ot->ot_curr_rec_id) {
5518
/* If the transaction does not rollback, I will get an
5521
if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
5523
/* [(4)] This is the situation when we overwrite the
5524
* reference to curr_var_rec_id!
5525
* When curr_var_rec_id is cleaned up by the sweeper, the
5526
* sweeper will notice that the record is no longer in
5531
#ifdef TRACE_VARIATIONS
5532
xt_ttracef(self, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
5534
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5536
XT_DISABLED_TRACE(("set upd tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5538
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
5540
/* Add the index references: */
5541
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5542
if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, after_buf, before_buf, FALSE)) {
5543
ot->ot_err_index_no = (*ind)->mi_index_no;
5548
if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5549
if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
5553
self->st_statistics.st_row_update++;
5557
tab_overwrite_record_on_fail(ot, &rec_info, before_buf, after_buf, idx_cnt);
5561
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
5567
xtPublic xtBool xt_tab_delete_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
5569
register XTTableHPtr tab = ot->ot_table;
5570
register XTThreadPtr thread = ot->ot_thread;
5571
xtRowID row_id = ot->ot_curr_row_id;
5572
xtRecordID curr_var_rec_id;
5573
XTTabRecInfoRec rec_info;
5575
/* A non-temporary table has been updated: */
5576
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5577
thread->st_non_temp_updated = TRUE;
5579
/* Setup a delete record: */
5580
rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
5581
rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
5582
rec_info.ri_ext_rec = NULL;
5583
rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
5584
rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
5585
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5586
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
5587
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, thread->st_xact_data->xd_start_xn_id);
5589
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
5592
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
5594
if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
5597
if (curr_var_rec_id != ot->ot_curr_rec_id) {
5598
if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
5602
#ifdef TRACE_VARIATIONS
5603
xt_ttracef(thread, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) thread->st_xact_data->xd_start_xn_id);
5605
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5607
XT_DISABLED_TRACE(("del row tx=%d row=%d rec=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5609
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
5611
if (ot->ot_table->tab_dic.dic_table->dt_trefs) {
5612
if (!ot->ot_table->tab_dic.dic_table->deleteRow(ot, rec_buf))
5616
thread->st_statistics.st_row_delete++;
5620
tab_overwrite_record_on_fail(ot, &rec_info, rec_buf, NULL, 0);
5624
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
5628
xtPublic xtBool xt_tab_restrict_rows(XTBasicListPtr list, XTThreadPtr thread)
5631
XTRestrictItemPtr item;
5632
XTOpenTablePtr pot = NULL;
5633
XTDatabaseHPtr db = thread->st_database;
5636
for (i=0; i<list->bl_count; i++) {
5637
item = (XTRestrictItemPtr) xt_bl_item_at(list, i);
5640
if (pot->ot_table->tab_id == item->ri_tab_id)
5642
xt_db_return_table_to_pool_ns(pot);
5646
if (!xt_db_open_pool_table_ns(&pot, db, item->ri_tab_id)) {
5647
/* Should not happen, but just in case, we just don't
5648
* remove the lock. We will probably end up with a deadlock
5651
xt_log_and_clear_exception_ns();
5652
goto skip_check_action;
5655
/* Can happen of the table has been dropped: */
5656
goto skip_check_action;
5659
if (!pot->ot_table->tab_dic.dic_table->checkNoAction(pot, item->ri_rec_id)) {
5667
xt_db_return_table_to_pool_ns(pot);
5668
xt_bl_free(NULL, list);
5673
xtPublic xtBool xt_tab_seq_init(XTOpenTablePtr ot)
5675
register XTTableHPtr tab = ot->ot_table;
5677
ASSERT_NS(!ot->ot_seq_page);
5678
ot->ot_seq_page = NULL;
5679
ot->ot_seq_data = NULL;
5680
ot->ot_on_page = FALSE;
5681
ot->ot_seq_offset = 0;
5683
ot->ot_curr_rec_id = 0; // 0 is an invalid position!
5684
ot->ot_curr_row_id = 0; // 0 is an invalid row ID!
5685
ot->ot_curr_updated = FALSE;
5687
/* We note the current EOF before we start a sequential scan.
5688
* It is basically possible to update the same record more than
5689
* once because an updated record creates a new record which
5690
* has a new position which may be in the area that is
5691
* still to be scanned.
5693
* By noting the EOF before we start a sequential scan we
5694
* reduce the possibility of this.
5696
* However, the possibility still remains, but it should
5697
* not be a problem because a record is not modified
5698
* if there is nothing to change, which is the case
5699
* if the record has already been changed!
5701
* NOTE (2008-01-29) There is no longer a problem with updating a
5702
* record twice because records are marked by an update.
5704
* [(10)] I have changed this (see below). I now check the
5705
* current EOF of the table.
5707
* The reason is that committed read must be able to see the
5708
* changes that occur during table table scan. *
5710
ot->ot_seq_eof_id = tab->tab_rec_eof_id;
5712
if (!ot->ot_thread->st_xact_data) {
5713
/* MySQL ignores this error, so we
5714
* setup the sequential scan so that it will
5717
ot->ot_seq_rec_id = ot->ot_seq_eof_id;
5718
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
5722
ot->ot_seq_rec_id = 1;
5723
ot->ot_thread->st_statistics.st_scan_table++;
5727
xtPublic void xt_tab_seq_reset(XTOpenTablePtr ot)
5729
ot->ot_seq_rec_id = 0;
5730
ot->ot_seq_eof_id = 0;
5731
ASSERT_NS(!ot->ot_seq_page);
5732
ot->ot_seq_page = NULL;
5733
ot->ot_seq_data = NULL;
5734
ot->ot_on_page = FALSE;
5735
ot->ot_seq_offset = 0;
5738
xtPublic void xt_tab_seq_exit(XTOpenTablePtr ot)
5740
register XTTableHPtr tab = ot->ot_table;
5742
if (ot->ot_seq_page) {
5743
tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5744
ot->ot_seq_page = NULL;
5746
if (ot->ot_seq_data) {
5747
XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
5748
ot->ot_seq_data = NULL;
5750
ot->ot_on_page = FALSE;
5753
#ifdef XT_USE_ROW_REC_MMAP_FILES
5754
#define TAB_SEQ_LOAD_CACHE FALSE
5756
#ifdef XT_SEQ_SCAN_LOADS_CACHE
5757
#define TAB_SEQ_LOAD_CACHE TRUE
5759
#define TAB_SEQ_LOAD_CACHE FALSE
5763
xtPublic void xt_tab_seq_repeat(XTOpenTablePtr ot)
5765
ot->ot_seq_rec_id--;
5766
ot->ot_seq_offset -= ot->ot_table->tab_dic.dic_rec_size;
5769
xtPublic xtBool xt_tab_seq_next(XTOpenTablePtr ot, xtWord1 *buffer, xtBool *eof)
5771
register XTTableHPtr tab = ot->ot_table;
5772
register size_t rec_size = tab->tab_dic.dic_rec_size;
5774
xtRecordID new_rec_id;
5775
xtRecordID invalid_rec = 0;
5778
if (!ot->ot_on_page) {
5779
ASSERT_NS(!ot->ot_seq_page);
5780
if (!(ot->ot_on_page = tab->tab_recs.xt_tc_get_page(ot->ot_rec_file, ot->ot_seq_rec_id, TAB_SEQ_LOAD_CACHE, &ot->ot_seq_page, &ot->ot_seq_offset, ot->ot_thread)))
5782
if (!ot->ot_seq_page) {
5783
XT_LOCK_MEMORY_PTR(ot->ot_seq_data, ot->ot_rec_file, xt_rec_id_to_rec_offset(tab, ot->ot_seq_rec_id), tab->tab_rows.tci_page_size, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread);
5784
if (!ot->ot_seq_data)
5786
ot->ot_on_page = TRUE;
5787
ot->ot_seq_offset = 0;
5792
/* [(10)] The current EOF is used: */
5793
if (ot->ot_seq_rec_id >= ot->ot_seq_eof_id) {
5798
if (ot->ot_seq_offset >= tab->tab_recs.tci_page_size) {
5799
if (ot->ot_seq_page) {
5800
tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5801
ot->ot_seq_page = NULL;
5803
if (ot->ot_seq_data) {
5804
/* NULL here means that in the case of non-memory mapped
5805
* files we "keep" the lock.
5807
XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
5808
ot->ot_seq_data = NULL;
5810
ot->ot_on_page = FALSE;
5814
if (ot->ot_seq_page)
5815
buff_ptr = ot->ot_seq_page->tcp_data + ot->ot_seq_offset;
5817
buff_ptr = ot->ot_seq_data + ot->ot_seq_offset;
5819
/* This is the current record: */
5820
ot->ot_curr_rec_id = ot->ot_seq_rec_id;
5821
ot->ot_curr_row_id = 0;
5823
/* Move to the next record: */
5824
ot->ot_seq_rec_id++;
5825
ot->ot_seq_offset += rec_size;
5828
switch (tab_visible(ot, (XTTabRecHeadDPtr) buff_ptr, &new_rec_id)) {
5834
buff_ptr = ot->ot_row_rbuffer;
5835
if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
5837
ot->ot_curr_rec_id = new_rec_id;
5842
if (invalid_rec != ot->ot_curr_rec_id) {
5843
/* Don't re-read for the same record twice: */
5844
invalid_rec = ot->ot_curr_rec_id;
5846
/* Undo move to next: */
5847
ot->ot_seq_rec_id--;
5848
ot->ot_seq_offset -= rec_size;
5850
/* Prepare to reread the page: */
5851
if (ot->ot_seq_page) {
5852
tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5853
ot->ot_seq_page = NULL;
5855
ot->ot_on_page = FALSE;
5858
if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) buff_ptr)->tr_row_id_4), invalid_rec, true, 8))
5860
#ifdef XT_CRASH_DEBUG
5861
/* Should not happen! */
5864
/* Continue, and skip the record... */
5871
switch (*buff_ptr) {
5872
case XT_TAB_STATUS_FIXED:
5873
case XT_TAB_STATUS_FIX_CLEAN:
5874
memcpy(buffer, buff_ptr + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
5876
case XT_TAB_STATUS_VARIABLE:
5877
case XT_TAB_STATUS_VAR_CLEAN:
5878
if (!myxt_load_row(ot, buff_ptr + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
5881
case XT_TAB_STATUS_EXT_DLOG:
5882
case XT_TAB_STATUS_EXT_CLEAN: {
5883
u_int cols_req = ot->ot_cols_req;
5885
ASSERT_NS(cols_req);
5886
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
5887
if (!myxt_load_row(ot, buff_ptr + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
5891
if (buff_ptr != ot->ot_row_rbuffer)
5892
memcpy(ot->ot_row_rbuffer, buff_ptr, rec_size);
5893
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
5910
* -----------------------------------------------------------------------
5918
static xtBool tab_exec_repair_pending(XTDatabaseHPtr db, int what, char *table_name)
5920
XTThreadPtr thread = xt_get_self();
5921
char file_path[PATH_MAX];
5922
XTOpenFilePtr of = NULL;
5924
char *buffer = NULL, *ptr, *name;
5926
xtBool found = FALSE;
5928
xt_strcpy(PATH_MAX, file_path, db->db_main_path);
5929
xt_add_pbxt_file(PATH_MAX, file_path, "repair-pending");
5931
if (what == REP_ADD) {
5932
if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 0))
5936
if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_DEFAULT | XT_FS_MISSING_OK, 0))
5942
len = (int) xt_seek_eof_file(NULL, of);
5944
if (!(buffer = (char *) xt_malloc_ns(len + 1)))
5947
if (!xt_pread_file(of, 0, len, len, buffer, NULL, &thread->st_statistics.st_x, thread))
5954
while (*ptr && *ptr != '\n' && *ptr != '\r')
5959
if (xt_tab_compare_names(name, table_name) == 0) {
5974
/* Remove any trailing empty lines: */
5976
if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
5981
if (!xt_pwrite_file(of, len, 1, (void *) "\n", &thread->st_statistics.st_x, thread))
5985
if (!xt_pwrite_file(of, len, strlen(table_name), table_name, &thread->st_statistics.st_x, thread))
5987
len += strlen(table_name);
5988
if (!xt_set_eof_file(NULL, of, len))
5996
memmove(name, ptr, len - (ptr - buffer));
5997
len = len - (ptr - name);
5999
/* Remove trailing empty lines: */
6001
if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
6007
if (!xt_pwrite_file(of, 0, len, buffer, &thread->st_statistics.st_x, thread))
6009
if (!xt_set_eof_file(NULL, of, len))
6016
xt_close_file_ns(of);
6020
xt_fs_delete(NULL, file_path);
6025
xt_close_file_ns(of);
6028
xt_log_and_clear_exception(thread);
6032
xtPublic void xt_tab_make_table_name(XTPathStrPtr tab_path, char *table_name, size_t size)
6036
nptr = xt_last_name_of_path(tab_path->ps_path);
6037
if (xt_starts_with(nptr, "#sql")) {
6038
/* {INVALID-OLD-TABLE-FIX}
6039
* Temporary files can have strange paths, for example
6040
* ..../var/tmp/mysqld.1/#sqldaec_1_6
6041
* This occurs, for example, occurs when the temp_table.test is
6042
* run using the PBXT suite in MariaDB:
6043
* ./mtr --suite=pbxt --do-test=temp_table
6045
* Calling myxt_static_convert_file_name, with a '.', in the name
6047
* [ERROR] Invalid (old?) table or database name 'mysqld.1'
6048
* To prevent this, we do not convert the temporary
6049
* table names using the mysql functions.
6051
* Note, this bug was found by Monty, and fixed by modifying
6052
* xt_2nd_last_name_of_path(), see {INVALID-OLD-TABLE-FIX}.
6055
xt_2nd_last_name_of_path(size, table_name, tab_path->ps_path);
6056
xt_strcat(size, table_name, ".");
6057
xt_strcat(size, table_name, nptr);
6060
char name_buf[XT_TABLE_NAME_SIZE*3+3];
6064
xt_2nd_last_name_of_path(sizeof(name_buf), name_buf, tab_path->ps_path);
6065
myxt_static_convert_file_name(name_buf, table_name, size);
6066
xt_strcat(size, table_name, ".");
6068
/* Handle partition extensions to table names: */
6069
if ((part_ptr = strstr(nptr, "#P#")))
6070
xt_strncpy(sizeof(name_buf), name_buf, nptr, part_ptr - nptr);
6072
xt_strcpy(sizeof(name_buf), name_buf, nptr);
6074
len = strlen(table_name);
6075
myxt_static_convert_file_name(name_buf, table_name + len, size - len);
6078
/* Add the partition extension (which is relevant to the engine). */
6082
if ((sub_part_ptr = strstr(part_ptr, "#SP#")))
6083
xt_strncpy(sizeof(name_buf), name_buf, part_ptr, sub_part_ptr - part_ptr);
6085
xt_strcpy(sizeof(name_buf), name_buf, part_ptr);
6087
xt_strcat(size, table_name, " (");
6088
len = strlen(table_name);
6089
myxt_static_convert_file_name(name_buf, table_name + len, size - len);
6094
xt_strcat(size, table_name, " - ");
6095
len = strlen(table_name);
6096
myxt_static_convert_file_name(sub_part_ptr, table_name + len, size - len);
6099
xt_strcat(size, table_name, ")");
6104
xtPublic xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab)
6106
char table_name[XT_TABLE_NAME_BUF_SIZE];
6108
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
6109
return tab_exec_repair_pending(tab->tab_db, REP_FIND, table_name);
6112
xtPublic void xt_tab_table_repaired(XTTableHPtr tab)
6114
if (tab->tab_repair_pending) {
6115
char table_name[XT_TABLE_NAME_BUF_SIZE];
6117
tab->tab_repair_pending = FALSE;
6118
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
6119
tab_exec_repair_pending(tab->tab_db, REP_DEL, table_name);
6123
xtPublic void xt_tab_set_table_repair_pending(XTTableHPtr tab)
6125
if (!tab->tab_repair_pending) {
6126
char table_name[XT_TABLE_NAME_BUF_SIZE];
6128
tab->tab_repair_pending = TRUE;
6129
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
6130
tab_exec_repair_pending(tab->tab_db, REP_ADD, table_name);
6135
* -----------------------------------------------------------------------
6136
* EXTENDED DATA FOR RAM TABLES
6139
xtPublic xtBool xt_tab_get_ext_slot(XTTableHPtr tab, xtLogID *log_id, xtLogOffset *log_offset, size_t XT_UNUSED(req_size))
6143
xt_spinlock_lock(&tab->tab_mem_lock);
6144
if (tab->tab_mem_ind_free) {
6145
new_slot = tab->tab_mem_ind_free - 1;
6146
tab->tab_mem_ind_free = (size_t) tab->tab_mem_index[new_slot];
6149
if (tab->tab_mem_ind_usage == tab->tab_mem_ind_size) {
6150
/* Grow the index: */
6151
if (!xt_realloc_ns((void **) &tab->tab_mem_index, (tab->tab_mem_ind_size + 100) * sizeof(xtWord1 *)))
6153
tab->tab_mem_ind_size += 100;
6155
new_slot = tab->tab_mem_ind_usage;
6156
tab->tab_mem_ind_usage++;
6158
xt_spinlock_unlock(&tab->tab_mem_lock);
6159
tab->tab_mem_index[new_slot] = NULL;
6161
*log_offset = new_slot + 1;
6165
xtPublic xtBool xt_tab_save_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
6167
size_t slot = ((size_t) log_offset) - 1;
6170
if (!(rec_data = (xtWord1 *) xt_malloc_ns(size)))
6172
memcpy(rec_data, data, size);
6173
xt_spinlock_lock(&tab->tab_mem_lock);
6174
tab->tab_mem_total += size;
6175
tab->tab_mem_index[slot] = rec_data;
6176
xt_spinlock_unlock(&tab->tab_mem_lock);
6180
xtPublic void xt_tab_read_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
6182
size_t slot = ((size_t) log_offset) - 1;
6184
if (slot < tab->tab_mem_ind_usage && tab->tab_mem_index[slot])
6185
memcpy(data, tab->tab_mem_index[slot], size);
6187
memset(data, 0, size);
6190
xtPublic void xt_tab_free_ext_slot(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size)
6192
size_t slot = ((size_t) log_offset) - 1;
6194
xt_spinlock_lock(&tab->tab_mem_lock);
6195
if (tab->tab_mem_index[slot]) {
6196
xt_free_ns(tab->tab_mem_index[slot]);
6197
tab->tab_mem_total -= size;
6199
tab->tab_mem_index[slot] = (xtWord1 *) tab->tab_mem_ind_free;
6200
tab->tab_mem_ind_free = slot + 1;
6201
xt_spinlock_unlock(&tab->tab_mem_lock);
6204
static void tab_free_ext_records(XTTableHPtr tab)
6208
if (!tab->tab_mem_index)
6211
i = tab->tab_mem_ind_free;
6213
next = (size_t) tab->tab_mem_index[i-1];
6214
tab->tab_mem_index[i-1] = NULL;
6218
for (i=0; i<tab->tab_mem_ind_usage; i++) {
6219
if (tab->tab_mem_index[i])
6220
xt_free_ns(tab->tab_mem_index[i]);
6223
xt_free_ns(tab->tab_mem_index);