1
/* Copyright (c) 2005 PrimeBase Technologies GmbH
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* 2005-02-08 Paul McCullagh
24
#include "xt_config.h"
35
#include <drizzled/common.h>
36
#include <drizzled/dtcollation.h>
38
#include "mysql_priv.h"
42
#include "database_xt.h"
44
#include "strutil_xt.h"
49
#include "systab_xt.h"
52
//#define TRACE_VARIATIONS
53
//#define TRACE_VARIATIONS_IN_DUP_CHECK
54
//#define DUMP_CHECK_TABLE
55
//#define CHECK_INDEX_ON_CHECK_TABLE
56
//#define TRACE_TABLE_IDS
57
//#define TRACE_FLUSH_TABLE
58
//#define TRACE_CREATE_TABLES
61
#define CHECK_TABLE_STATS
63
#ifdef TRACE_TABLE_IDS
64
//#define PRINTF xt_ftracef
65
#define PRINTF xt_trace
68
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr dic);
69
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic);
70
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def);
71
static void tab_free_ext_records(XTTableHPtr tab);
74
* -----------------------------------------------------------------------
78
#define XT_MAX_TABLE_FILE_NAME_SIZE (XT_TABLE_NAME_SIZE+6+40)
81
* -----------------------------------------------------------------------
85
/* GOTCHA! The problem:
87
* The server uses names like: "./test/my_tab",
88
* the BLOB streaming engine uses: "test/my_tab"
89
* which leads to the same table being loaded twice.
91
xtPublic int xt_tab_compare_paths(char *n1, char *n2)
93
n1 = xt_last_2_names_of_path(n1);
94
n2 = xt_last_2_names_of_path(n2);
96
return strcasecmp(n1, n2);
97
return strcmp(n1, n2);
101
* This function only compares only the last 2 components of
102
* the path because table names must differ in this area.
104
xtPublic int xt_tab_compare_names(const char *n1, const char *n2)
106
n1 = xt_last_2_names_of_path(n1);
107
n2 = xt_last_2_names_of_path(n2);
108
if (pbxt_ignore_case)
109
return strcasecmp(n1, n2);
110
return strcmp(n1, n2);
114
* -----------------------------------------------------------------------
118
static xtBool tab_list_comp(void *key, void *data)
120
XTTableHPtr tab = (XTTableHPtr) data;
122
return strcmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
125
static xtHashValue tab_list_hash(xtBool is_key, void *key_data)
127
XTTableHPtr tab = (XTTableHPtr) key_data;
130
return xt_ht_hash(xt_last_2_names_of_path((char *) key_data));
131
return xt_ht_hash(xt_last_2_names_of_path(tab->tab_name->ps_path));
134
static xtBool tab_list_comp_ci(void *key, void *data)
136
XTTableHPtr tab = (XTTableHPtr) data;
138
return strcasecmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
141
static xtHashValue tab_list_hash_ci(xtBool is_key, void *key_data)
143
XTTableHPtr tab = (XTTableHPtr) key_data;
146
return xt_ht_casehash(xt_last_2_names_of_path((char *) key_data));
147
return xt_ht_casehash(xt_last_2_names_of_path(tab->tab_name->ps_path));
150
static void tab_list_free(XTThreadPtr self, void *data)
152
XTTableHPtr tab = (XTTableHPtr) data;
153
XTDatabaseHPtr db = tab->tab_db;
154
XTTableEntryPtr te_ptr;
156
/* Remove the reference from the ID list, whem the table is
157
* removed from the name list:
159
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab->tab_id)))
160
te_ptr->te_table = NULL;
162
if (tab->tab_dic.dic_table)
163
tab->tab_dic.dic_table->removeReferences(self);
164
xt_heap_release(self, tab);
167
static void tab_close_files(XTThreadPtr self, XTTableHPtr tab)
169
if (tab->tab_rec_file) {
170
xt_fs_release_file(self, tab->tab_rec_file);
171
tab->tab_rec_file = NULL;
173
if (tab->tab_row_file) {
174
xt_fs_release_file(self, tab->tab_row_file);
175
tab->tab_row_file = NULL;
177
if (tab->tab_ind_file) {
178
xt_fs_release_file(self, tab->tab_ind_file);
179
tab->tab_ind_file = NULL;
183
static void tab_finalize(XTThreadPtr self, void *x)
185
XTTableHPtr tab = (XTTableHPtr) x;
187
xt_exit_row_locks(&tab->tab_locks);
189
xt_xres_exit_tab(self, tab);
191
if (tab->tab_ind_free_list) {
192
XTIndFreeListPtr list, flist;
194
list = tab->tab_ind_free_list;
197
list = list->fl_next_list;
198
xt_free(self, flist);
200
tab->tab_ind_free_list = NULL;
203
tab_close_files(self, tab);
205
if (tab->tab_index_head) {
206
xt_free(self, tab->tab_index_head);
207
tab->tab_index_head = NULL;
210
tab_free_ext_records(tab);
212
#ifdef TRACE_TABLE_IDS
213
PRINTF("%s: free TABLE: db=%d tab=%d %s\n", self->t_name, (int) tab->tab_db ? tab->tab_db->db_id : 0, (int) tab->tab_id,
214
tab->tab_name ? xt_last_2_names_of_path(tab->tab_name->ps_path) : "?");
217
xt_free(self, tab->tab_name);
218
tab->tab_name = NULL;
220
myxt_free_dictionary(self, &tab->tab_dic);
221
if (tab->tab_free_locks) {
222
tab->tab_seq.xt_op_seq_exit(self);
223
xt_spinlock_free(self, &tab->tab_mem_lock);
224
xt_spinlock_free(self, &tab->tab_ainc_lock);
225
xt_free_mutex(&tab->tab_rec_flush_lock);
226
xt_free_mutex(&tab->tab_ind_flush_lock);
227
xt_free_mutex(&tab->tab_ind_stat_lock);
228
xt_free_mutex(&tab->tab_dic_field_lock);
229
xt_free_mutex(&tab->tab_row_lock);
230
xt_free_mutex(&tab->tab_ind_lock);
231
xt_free_mutex(&tab->tab_rec_lock);
232
for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
233
XT_TAB_ROW_FREE_LOCK(self, &tab->tab_row_rwlock[i]);
235
#ifdef XT_SORT_REC_WRITES
236
if (tab->tab_rec_dw_writes) {
237
xt_free_sortedlist(self, tab->tab_rec_dw_writes);
238
tab->tab_rec_dw_writes = NULL;
240
if (tab->tab_rec_dw_data)
241
xt_free_ns(tab->tab_rec_dw_data);
243
if (tab->tab_rec_flush_task)
244
tab->tab_rec_flush_task->tk_exit();
245
if (tab->tab_ind_flush_task)
246
tab->tab_ind_flush_task->tk_exit();
249
static void tab_onrelease(void *x)
251
XTTableHPtr tab = (XTTableHPtr) x;
253
/* Signal threads waiting for exclusive use of the table: */
254
if (tab->tab_db->db_tables)
255
xt_ht_signal(NULL, tab->tab_db->db_tables);
259
* -----------------------------------------------------------------------
264
* This function sets the table name to "", if the file
265
* does not belong to XT.
267
xtPublic char *xt_tab_file_to_name(size_t size, char *tab_name, char *file_name)
272
file_name = xt_last_name_of_path(file_name);
273
cptr = file_name + strlen(file_name) - 1;
274
while (cptr > file_name && *cptr != '.')
276
if (cptr > file_name && *cptr == '.') {
277
if (strcmp(cptr, ".xtl") == 0 || strcmp(cptr, ".xtr") == 0) {
279
while (cptr > file_name && isdigit(*cptr))
283
const char **ext = pbxt_extensions;
286
if (strcmp(cptr, *ext) == 0)
295
len = cptr - file_name;
299
memcpy(tab_name, file_name, len);
302
/* Return a pointer to what was removed! */
303
return file_name + len;
306
static void tab_get_row_file_name(char *table_name, char *name, xtTableID tab_id)
308
sprintf(table_name, "%s-%lu.xtr", name, (u_long) tab_id);
311
static void tab_get_data_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
313
sprintf(table_name, "%s.xtd", name);
316
static void tab_get_index_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
318
sprintf(table_name, "%s.xti", name);
321
static void tab_free_by_id(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
323
XTTableEntryPtr te_ptr = (XTTableEntryPtr) item;
325
if (te_ptr->te_tab_name) {
326
xt_free(self, te_ptr->te_tab_name);
327
te_ptr->te_tab_name = NULL;
329
te_ptr->te_tab_id = 0;
330
te_ptr->te_heap_tab = FALSE;
331
te_ptr->te_table = NULL;
334
static int tab_comp_by_id(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
336
xtTableID te_id = *((xtTableID *) a);
337
XTTableEntryPtr te_ptr = (XTTableEntryPtr) b;
339
if (te_id < te_ptr->te_tab_id)
341
if (te_id == te_ptr->te_tab_id)
346
static void tab_free_path(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
348
XTTablePathPtr tp_ptr = *((XTTablePathPtr *) item);
350
xt_free(self, tp_ptr);
353
static int tab_comp_path(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
355
char *path = (char *) a;
356
XTTablePathPtr tp_ptr = *((XTTablePathPtr *) b);
358
return xt_tab_compare_paths(path, tp_ptr->tp_path);
361
static xtBool tab_get_name_value(XTTableDescPtr td, char **ret_name, char **ret_value)
363
char *ptr = td->x.z.td_curr_ptr;
365
while (*ptr && isspace(*ptr)) ptr++;
367
td->x.z.td_curr_ptr = ptr;
372
while (*ptr && *ptr != '=' && *ptr != '\r' && *ptr != '\n') ptr++;
377
while (*ptr && *ptr != '\r' && *ptr != '\n') ptr++;
390
td->x.z.td_curr_ptr = ptr;
394
xtPublic void xt_describe_tables_init(XTThreadPtr self, XTDatabaseHPtr db, XTTableDescPtr td)
399
xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
400
xt_add_tables_file(PATH_MAX, pbuf);
401
if (xt_fs_exists(pbuf))
402
td->td_type = XT_TD_FROM_TAB_FILE;
404
td->td_type = XT_TD_FROM_DIRECTORY;
406
switch (td->td_type) {
407
case XT_TD_FROM_DIRECTORY:
408
td->x.y.td_path_idx = 0;
409
if (td->x.y.td_path_idx < xt_sl_get_size(db->db_table_paths)) {
410
XTTablePathPtr *tp_ptr;
412
tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, td->x.y.td_path_idx);
413
td->td_tab_path = *tp_ptr;
414
td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
417
td->x.y.td_open_dir = NULL;
419
case XT_TD_FROM_TAB_FILE:
426
of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
427
pushr_(xt_close_file, of);
428
len = (int) xt_seek_eof_file(self, of);
429
buffer = (char *) xt_malloc(self, len + 1);
430
pushr_(xt_free, buffer);
431
if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
434
popr_(); // Discard xt_free(buffer)
435
freer_(); // xt_close_file(of)
437
td->x.z.td_table_info = buffer;
438
td->x.z.td_curr_ptr = buffer;
439
while (tab_get_name_value(td, &name, &value)) {
440
if (strcmp(name, "[table]") == 0)
447
xtPublic xtBool xt_describe_tables_next(XTThreadPtr self, XTTableDescPtr td)
454
switch (td->td_type) {
455
case XT_TD_FROM_DIRECTORY:
457
if (!td->x.y.td_open_dir)
460
r = xt_dir_next(self, td->x.y.td_open_dir);
463
xt_describe_tables_exit(self, td);
468
XTTablePathPtr *tp_ptr;
470
if (td->x.y.td_path_idx+1 >= xt_sl_get_size(td->td_db->db_table_paths))
473
if (td->x.y.td_open_dir)
474
xt_dir_close(NULL, td->x.y.td_open_dir);
475
td->x.y.td_open_dir = NULL;
477
td->x.y.td_path_idx++;
478
tp_ptr = (XTTablePathPtr *) xt_sl_item_at(td->td_db->db_table_paths, td->x.y.td_path_idx);
479
td->td_tab_path = *tp_ptr;
480
td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
484
tab_name = xt_dir_name(self, td->x.y.td_open_dir);
485
td->td_tab_id = (xtTableID) xt_file_name_to_id(tab_name);
486
xt_tab_file_to_name(XT_TABLE_NAME_SIZE, td->td_tab_name, tab_name);
487
td->td_heap_tab = FALSE;
489
case XT_TD_FROM_TAB_FILE:
494
while (tab_get_name_value(td, &name, &value)) {
495
if (strcmp(name, "name") == 0)
496
xt_strcpy(XT_TABLE_NAME_SIZE, td->td_tab_name, value);
497
else if (strcmp(name, "id") == 0) {
500
sscanf(value, "%lu", &lvalue);
501
td->td_tab_id = (xtTableID) lvalue;
503
else if (strcmp(name, "storage") == 0) {
504
if (strcmp(value, "heap") == 0)
505
td->td_heap_tab = TRUE;
507
td->td_heap_tab = FALSE;
509
else if (strcmp(name, "location") == 0) {
511
XTTablePathPtr db_path;
516
/* Convert path to WIN path: */
523
if ((tp = (XTTablePathPtr *) xt_sl_find(self, td->td_db->db_table_paths, value)))
529
db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
530
db_path->tp_tab_count = 0;
531
memcpy(db_path->tp_path, value, len);
532
db_path->tp_path[len] = 0;
533
xt_sl_insert(self, td->td_db->db_table_paths, db_path->tp_path, &db_path);
535
td->td_tab_path = db_path;
537
else if (strcmp(name, "type") == 0) {
540
sscanf(value, "%lu", &lvalue);
541
td->td_tab_type = (xtWord1) lvalue;
543
else if (strcmp(name, "[table]") == 0)
553
xtPublic void xt_describe_tables_exit(XTThreadPtr self, XTTableDescPtr td)
555
switch (td->td_type) {
556
case XT_TD_FROM_DIRECTORY:
557
if (td->x.y.td_open_dir)
558
xt_dir_close(NULL, td->x.y.td_open_dir);
559
td->x.y.td_open_dir = NULL;
561
case XT_TD_FROM_TAB_FILE:
562
if (td->x.z.td_table_info) {
563
xt_free(self, td->x.z.td_table_info);
564
td->x.z.td_table_info = NULL;
566
td->x.z.td_curr_ptr = NULL;
569
td->td_tab_path = NULL;
572
xtPublic void xt_tab_init_db(XTThreadPtr self, XTDatabaseHPtr db)
575
XTTableEntryRec te_tab;
576
XTTableEntryPtr te_ptr;
577
XTTablePathPtr db_path;
583
pushr_(xt_tab_exit_db, db);
584
if (pbxt_ignore_case)
585
db->db_tables = xt_new_hashtable(self, tab_list_comp_ci, tab_list_hash_ci, tab_list_free, TRUE, TRUE);
587
db->db_tables = xt_new_hashtable(self, tab_list_comp, tab_list_hash, tab_list_free, TRUE, TRUE);
588
db->db_table_by_id = xt_new_sortedlist(self, sizeof(XTTableEntryRec), 20, 20, tab_comp_by_id, db, tab_free_by_id, FALSE, FALSE);
589
db->db_table_paths = xt_new_sortedlist(self, sizeof(XTTablePathPtr), 20, 20, tab_comp_path, db, tab_free_path, FALSE, FALSE);
591
if (db->db_multi_path) {
593
char *buffer, *ptr, *path;
595
xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
596
xt_add_tables_file(PATH_MAX, pbuf);
597
if (!xt_fs_exists(pbuf)) {
598
/* Load the location file, if a tables file does not
601
xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
602
xt_add_location_file(PATH_MAX, pbuf);
603
if (xt_fs_exists(pbuf)) {
604
of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
605
pushr_(xt_close_file, of);
606
len = (int) xt_seek_eof_file(self, of);
607
buffer = (char *) xt_malloc(self, len + 1);
608
pushr_(xt_free, buffer);
609
if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
614
/* Ignore preceeding space: */
615
while (*ptr && isspace(*ptr))
618
while (*ptr && *ptr != '\n' && *ptr != '\r') {
620
/* Undo the conversion below: */
626
if (*path != '#' && ptr > path) {
627
len = (int) (ptr - path);
628
db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
629
db_path->tp_tab_count = 0;
630
memcpy(db_path->tp_path, path, len);
631
db_path->tp_path[len] = 0;
632
xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
636
freer_(); // xt_free(buffer)
637
freer_(); // xt_close_file(of)
642
len = (int) strlen(db->db_main_path);
643
db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
644
db_path->tp_tab_count = 0;
645
strcpy(db_path->tp_path, db->db_main_path);
646
xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
649
xt_describe_tables_init(self, db, &desc);
650
pushr_(xt_describe_tables_exit, &desc);
651
while (xt_describe_tables_next(self, &desc)) {
652
te_tab.te_tab_id = desc.td_tab_id;
653
te_tab.te_heap_tab = desc.td_heap_tab;
655
if (te_tab.te_tab_id > db->db_curr_tab_id)
656
db->db_curr_tab_id = te_tab.te_tab_id;
658
te_tab.te_tab_name = xt_dup_string(self, desc.td_tab_name);
659
te_tab.te_tab_path = desc.td_tab_path;
660
desc.td_tab_path->tp_tab_count++;
661
te_tab.te_table = NULL;
662
te_tab.te_type = desc.td_tab_type;
663
xt_sl_insert(self, db->db_table_by_id, &desc.td_tab_id, &te_tab);
665
freer_(); // xt_describe_tables_exit(&desc)
668
* The purpose of this code is to ensure that all tables are opened and cached,
669
* which is actually only required if tables have foreign key references.
671
* In other words, a side affect of this code is that FK references between tables
672
* are registered, and checked.
674
* Unfortunately we don't know if a table is referenced by a FK, so we have to open
677
* Cannot open tables in the loop above because db->db_table_by_id which is built
678
* above is used by xt_use_table_no_lock()
681
* NOTE: The code also lead to the statistics failing to work because
682
* the tables were already open when the handler was opened.
683
* Previously we only caclulated statistics when a handler was opened
684
* and the underlying table was also opened.
688
xt_enum_tables_init(&edx);
689
while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
690
xt_strcpy(PATH_MAX, pbuf, te_ptr->te_tab_path->tp_path);
691
xt_add_dir_char(PATH_MAX, pbuf);
692
xt_strcat(PATH_MAX, pbuf, te_ptr->te_tab_name);
693
if ((tab = xt_use_table_no_lock_ns(db, (XTPathStrPtr) pbuf, FALSE, FALSE, NULL)))
694
xt_heap_release_ns(tab);
696
xt_log_and_clear_warning(self);
699
popr_(); // Discard xt_tab_exit_db(db)
703
static void tab_save_tables(XTThreadPtr self, XTDatabaseHPtr db)
705
XTTableEntryPtr te_ptr;
706
XTStringBufferRec buffer;
710
memset(&buffer, 0, sizeof(buffer));
712
xt_strcpy(PATH_MAX, path, db->db_main_path);
713
xt_add_tables_file(PATH_MAX, path);
715
if (xt_sl_get_size(db->db_table_by_id)) {
716
pushr_(xt_sb_free, &buffer);
717
for (u_int i=0; i<xt_sl_get_size(db->db_table_by_id); i++) {
718
te_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, i);
719
xt_sb_concat(self, &buffer, "[table]\n");
720
xt_sb_concat(self, &buffer, "id=");
721
xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_tab_id);
722
xt_sb_concat(self, &buffer, "\n");
723
xt_sb_concat(self, &buffer, "name=");
724
xt_sb_concat(self, &buffer, te_ptr->te_tab_name);
725
xt_sb_concat(self, &buffer, "\n");
726
xt_sb_concat(self, &buffer, "location=");
727
xt_sb_concat(self, &buffer, te_ptr->te_tab_path->tp_path);
728
xt_sb_concat(self, &buffer, "\n");
729
xt_sb_concat(self, &buffer, "storage=");
730
if (te_ptr->te_heap_tab)
731
xt_sb_concat(self, &buffer, "heap\n");
733
xt_sb_concat(self, &buffer, "disk\n");
734
xt_sb_concat(self, &buffer, "type=");
735
xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_type);
736
xt_sb_concat(self, &buffer, "\n");
740
/* To make the location file cross-platform (at least
741
* as long as relative paths are used) we replace all '\'
745
ptr = buffer.sb_cstring;
752
of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
753
pushr_(xt_close_file, of);
754
if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
756
xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
757
freer_(); // xt_close_file(of)
759
freer_(); // xt_sb_free(&buffer);
762
xt_fs_delete(NULL, path);
765
static void tab_save_table_paths(XTThreadPtr self, XTDatabaseHPtr db)
767
XTTablePathPtr *tp_ptr;
768
XTStringBufferRec buffer;
772
memset(&buffer, 0, sizeof(buffer));
774
xt_strcpy(PATH_MAX, path, db->db_main_path);
775
xt_add_location_file(PATH_MAX, path);
777
if (xt_sl_get_size(db->db_table_paths)) {
778
pushr_(xt_sb_free, &buffer);
779
for (u_int i=0; i<xt_sl_get_size(db->db_table_paths); i++) {
780
tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, i);
781
xt_sb_concat(self, &buffer, (*tp_ptr)->tp_path);
782
xt_sb_concat(self, &buffer, "\n");
786
/* To make the location file cross-platform (at least
787
* as long as relative paths are used) we replace all '\'
791
ptr = buffer.sb_cstring;
799
of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
800
pushr_(xt_close_file, of);
801
if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
803
xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
804
freer_(); // xt_close_file(of)
806
freer_(); // xt_sb_free(&buffer);
809
xt_fs_delete(NULL, path);
812
static XTTablePathPtr tab_get_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr tab_name, xtBool save_it)
814
XTTablePathPtr *tp, tab_path;
817
xt_strcpy(PATH_MAX, path, tab_name->ps_path);
818
xt_remove_last_name_of_path(path);
819
xt_remove_dir_char(path);
820
tp = (XTTablePathPtr *) xt_sl_find(self, db->db_table_paths, path);
824
int len = (int) strlen(path);
826
tab_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
827
tab_path->tp_tab_count = 0;
828
memcpy(tab_path->tp_path, path, len);
829
tab_path->tp_path[len] = 0;
830
xt_sl_insert(self, db->db_table_paths, tab_path->tp_path, &tab_path);
832
tab_save_table_paths(self, db);
833
if (xt_sl_get_size(db->db_table_paths) == 1) {
834
XTSystemTableShare::createSystemTables(self, db);
838
tab_path->tp_tab_count++;
842
static void tab_remove_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTTablePathPtr tab_path)
844
if (tab_path->tp_tab_count > 0) {
845
tab_path->tp_tab_count--;
846
if (tab_path->tp_tab_count == 0) {
847
xt_sl_delete(self, db->db_table_paths, tab_path->tp_path);
848
tab_save_table_paths(self, db);
853
static void tab_free_table_path(XTThreadPtr self, XTTablePathPtr tab_path)
855
XTDatabaseHPtr db = self->st_database;
857
tab_remove_table_path(self, db, tab_path);
860
xtPublic void xt_tab_exit_db(XTThreadPtr self, XTDatabaseHPtr db)
863
xt_free_hashtable(self, db->db_tables);
864
db->db_tables = NULL;
866
if (db->db_table_by_id) {
867
xt_free_sortedlist(self, db->db_table_by_id);
868
db->db_table_by_id = NULL;
870
if (db->db_table_paths) {
871
xt_free_sortedlist(self, db->db_table_paths);
872
db->db_table_paths = NULL;
877
xtPublic xtBool xt_table_exists(XTDatabaseHPtr db)
879
return xt_sl_get_size(db->db_table_by_id) > 0;
883
* Enumerate all tables in the current database.
886
xtPublic void xt_enum_tables_init(u_int *edx)
891
xtPublic XTTableEntryPtr xt_enum_tables_next(XTThreadPtr XT_UNUSED(self), XTDatabaseHPtr db, u_int *edx)
893
XTTableEntryPtr en_ptr;
895
if (*edx >= xt_sl_get_size(db->db_table_by_id))
897
en_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, *edx);
902
xtPublic void xt_enum_files_of_tables_init(XTPathStrPtr tab_name, xtTableID tab_id, XTFilesOfTablePtr ft)
905
ft->ft_tab_name = tab_name;
906
ft->ft_tab_id = tab_id;
909
xtPublic xtBool xt_enum_files_of_tables_next(XTFilesOfTablePtr ft)
911
char file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
914
switch (ft->ft_state) {
916
tab_get_row_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
919
tab_get_data_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
922
tab_get_index_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
929
xt_strcpy(PATH_MAX, ft->ft_file_path, ft->ft_tab_name->ps_path);
930
xt_remove_last_name_of_path(ft->ft_file_path);
931
xt_strcat(PATH_MAX, ft->ft_file_path, file_name);
932
if (!xt_fs_exists(ft->ft_file_path))
938
static xtBool tab_find_table(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtTableID *tab_id)
941
XTTableEntryPtr te_ptr;
944
xt_enum_tables_init(&edx);
945
while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
946
xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
947
xt_add_dir_char(PATH_MAX, path);
948
xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
949
if (xt_tab_compare_names(path, name->ps_path) == 0) {
950
*tab_id = te_ptr->te_tab_id;
957
xtPublic void xt_tab_disable_index(XTTableHPtr tab, u_int ind_error)
959
tab->tab_dic.dic_disable_index = ind_error;
960
xt_tab_set_table_repair_pending(tab);
963
xtPublic void xt_tab_set_index_error(XTTableHPtr tab)
965
switch (tab->tab_dic.dic_disable_index) {
968
case XT_INDEX_TOO_OLD:
969
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_OLD_VERSION, tab->tab_name);
971
case XT_INDEX_TOO_NEW:
972
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NEW_VERSION, tab->tab_name);
974
case XT_INDEX_BAD_BLOCK:
977
sprintf(number, "%d", (int) tab->tab_index_page_size);
978
xt_register_i2xterr(XT_REG_CONTEXT, XT_ERR_BAD_IND_BLOCK_SIZE, xt_last_name_of_path(tab->tab_name->ps_path), number);
980
case XT_INDEX_CORRUPTED:
981
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, tab->tab_name);
983
case XT_INDEX_MISSING:
984
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_MISSING, tab->tab_name);
986
case XT_INDEX_NOT_RECOVERED:
987
xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NOT_RECOVERED, tab->tab_name);
992
static void tab_load_index_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file, XTPathStrPtr table_name)
997
XTIndexFormatDPtr index_fmt;
999
/* Load the pointers: */
1000
if (tab->tab_index_head)
1001
xt_free_ns(tab->tab_index_head);
1002
tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc(self, XT_INDEX_HEAD_SIZE);
1005
if (!xt_pread_file(file, 0, XT_INDEX_HEAD_SIZE, 0, tab->tab_index_head, NULL, &self->st_statistics.st_ind, self))
1008
tab->tab_index_format_offset = XT_GET_DISK_4(tab->tab_index_head->tp_format_offset_4);
1009
index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
1011
/* If the table version is less than or equal to an incompatible (unsupported
1012
* version), or greater than the current version, then we cannot open this table
1014
if (XT_GET_DISK_2(index_fmt->if_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
1015
XT_GET_DISK_2(index_fmt->if_tab_version_2) > XT_TAB_CURRENT_VERSION) {
1016
switch (XT_GET_DISK_2(index_fmt->if_tab_version_2)) {
1018
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
1021
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
1024
xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
1030
tab->tab_dic.dic_index_ver = XT_GET_DISK_2(index_fmt->if_ind_version_2);
1031
tab->tab_dic.dic_disable_index = XT_INDEX_OK;
1033
if (tab->tab_dic.dic_index_ver == 1) {
1034
tab->tab_index_header_size = 1024 * 16;
1035
tab->tab_index_page_size = 1024 * 16;
1038
tab->tab_index_header_size = XT_GET_DISK_4(tab->tab_index_head->tp_header_size_4);
1039
tab->tab_index_page_size = XT_GET_DISK_4(index_fmt->if_page_size_4);
1042
#ifdef XT_USE_LAZY_DELETE
1043
if (tab->tab_dic.dic_index_ver <= XT_IND_NO_LAZY_DELETE)
1044
tab->tab_dic.dic_no_lazy_delete = TRUE;
1046
tab->tab_dic.dic_no_lazy_delete = FALSE;
1048
tab->tab_dic.dic_no_lazy_delete = TRUE;
1051
/* Incorrect version of index is handled by allowing a sequential scan, but no index access.
1052
* Recovery with the wrong index type will not recover the indexes, a REPAIR TABLE
1055
if (tab->tab_dic.dic_index_ver != XT_IND_CURRENT_VERSION) {
1056
switch (tab->tab_dic.dic_index_ver) {
1057
case XT_IND_NO_LAZY_DELETE:
1058
case XT_IND_LAZY_DELETE_OK:
1059
/* I can handle this type of index. */
1062
if (tab->tab_dic.dic_index_ver < XT_IND_CURRENT_VERSION)
1063
xt_tab_disable_index(tab, XT_INDEX_TOO_OLD);
1065
xt_tab_disable_index(tab, XT_INDEX_TOO_NEW);
1069
else if (tab->tab_index_page_size != XT_INDEX_PAGE_SIZE)
1070
xt_tab_disable_index(tab, XT_INDEX_BAD_BLOCK);
1073
memset(tab->tab_index_head, 0, XT_INDEX_HEAD_SIZE);
1074
xt_tab_disable_index(tab, XT_INDEX_MISSING);
1075
tab->tab_index_header_size = XT_INDEX_HEAD_SIZE;
1076
tab->tab_index_page_size = XT_INDEX_PAGE_SIZE;
1077
tab->tab_dic.dic_index_ver = 0;
1078
tab->tab_index_format_offset = 0;
1082
if (tab->tab_dic.dic_disable_index) {
1083
xt_tab_set_index_error(tab);
1084
xt_log_and_clear_exception_ns();
1087
if (tab->tab_dic.dic_disable_index) {
1088
/* Reset, as if we have empty indexes.
1089
* Flush will wipe things out, of course.
1090
* REPAIR TABLE will be required...
1092
XT_NODE_ID(tab->tab_ind_eof) = 1;
1093
XT_NODE_ID(tab->tab_ind_free) = 0;
1095
ind = tab->tab_dic.dic_keys;
1096
for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++)
1097
XT_NODE_ID((*ind)->mi_root) = 0;
1100
XT_NODE_ID(tab->tab_ind_eof) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_eof_6);
1101
XT_NODE_ID(tab->tab_ind_free) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_free_6);
1103
data = tab->tab_index_head->tp_data;
1104
ind = tab->tab_dic.dic_keys;
1105
for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
1106
(*ind)->mi_root = XT_GET_NODE_REF(tab, data);
1107
data += XT_NODE_REF_SIZE;
1112
static void tab_load_table_format(XTThreadPtr self, XTOpenFilePtr file, XTPathStrPtr table_name, size_t *ret_format_offset, size_t *ret_head_size, XTDictionaryPtr dic)
1114
XTDiskValue4 size_buf;
1116
XTTableFormatDRec tab_fmt;
1119
if (!xt_pread_file(file, 0, 4, 4, &size_buf, NULL, &self->st_statistics.st_rec, self))
1122
head_size = XT_GET_DISK_4(size_buf);
1123
*ret_format_offset = head_size;
1125
/* Load the table format information: */
1126
if (!xt_pread_file(file, head_size, offsetof(XTTableFormatDRec, tf_definition), offsetof(XTTableFormatDRec, tf_tab_version_2) + 2, &tab_fmt, NULL, &self->st_statistics.st_rec, self))
1129
/* If the table version is less than or equal to an incompatible (unsupported
1130
* version), or greater than the current version, then we cannot open this table
1132
if (XT_GET_DISK_2(tab_fmt.tf_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
1133
XT_GET_DISK_2(tab_fmt.tf_tab_version_2) > XT_TAB_CURRENT_VERSION) {
1134
switch (XT_GET_DISK_2(tab_fmt.tf_tab_version_2)) {
1136
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
1139
xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
1142
xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
1148
fmt_size = XT_GET_DISK_4(tab_fmt.tf_format_size_4);
1149
*ret_head_size = XT_GET_DISK_4(tab_fmt.tf_tab_head_size_4);
1150
dic->dic_rec_size = XT_GET_DISK_4(tab_fmt.tf_rec_size_4);
1151
dic->dic_rec_fixed = XT_GET_DISK_1(tab_fmt.tf_rec_fixed_1);
1152
dic->dic_min_auto_inc = XT_GET_DISK_8(tab_fmt.tf_min_auto_inc_8);
1153
if (fmt_size > offsetof(XTTableFormatDRec, tf_definition)) {
1154
size_t def_size = fmt_size - offsetof(XTTableFormatDRec, tf_definition);
1157
pushsr_(def_sql, xt_free, (char *) xt_malloc(self, def_size));
1158
if (!xt_pread_file(file, head_size+offsetof(XTTableFormatDRec, tf_definition), def_size, def_size, def_sql, NULL, &self->st_statistics.st_rec, self))
1160
dic->dic_table = xt_ri_create_table(self, false, table_name, def_sql, myxt_create_table_from_table(self, dic->dic_my_table), NULL);
1161
freer_(); // xt_free(def_sql)
1164
dic->dic_table = myxt_create_table_from_table(self, dic->dic_my_table);
1167
static void tab_load_table_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file)
1169
XTTableHeadDRec rec_head;
1171
if (!xt_pread_file(file, 0, sizeof(XTTableHeadDRec), sizeof(XTTableHeadDRec), (xtWord1 *) &rec_head, NULL, &self->st_statistics.st_rec, self))
1174
tab->tab_head_op_seq = XT_GET_DISK_4(rec_head.th_op_seq_4);
1175
tab->tab_head_row_free_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_free_6);
1176
tab->tab_head_row_eof_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_eof_6);
1177
tab->tab_head_row_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_row_fnum_6);
1178
tab->tab_head_rec_free_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_free_6);
1179
tab->tab_head_rec_eof_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_eof_6);
1180
tab->tab_head_rec_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_rec_fnum_6);
1181
tab->tab_wr_op_seq = tab->tab_head_op_seq;
1184
xtPublic void xt_tab_store_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
1186
XTTableHPtr tab = ot->ot_table;
1188
XT_SET_DISK_4(rec_head->th_op_seq_4, tab->tab_head_op_seq);
1189
XT_SET_DISK_6(rec_head->th_row_free_6, tab->tab_head_row_free_id);
1190
XT_SET_DISK_6(rec_head->th_row_eof_6, tab->tab_head_row_eof_id);
1191
XT_SET_DISK_6(rec_head->th_row_fnum_6, tab->tab_head_row_fnum);
1192
XT_SET_DISK_6(rec_head->th_rec_free_6, tab->tab_head_rec_free_id);
1193
XT_SET_DISK_6(rec_head->th_rec_eof_6, tab->tab_head_rec_eof_id);
1194
XT_SET_DISK_6(rec_head->th_rec_fnum_6, tab->tab_head_rec_fnum);
1197
static xtBool tab_write_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
1199
if (!xt_tab_write_rec(ot, offsetof(XTTableHeadDRec, th_op_seq_4), 40, (xtWord1 *) rec_head->th_op_seq_4))
1201
if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
1206
xtPublic xtBool xt_tab_write_min_auto_inc(XTOpenTablePtr ot)
1211
XT_SET_DISK_8(value, ot->ot_table->tab_dic.dic_min_auto_inc);
1212
offset = ot->ot_table->tab_table_format_offset + offsetof(XTTableFormatDRec, tf_min_auto_inc_8);
1213
if (!xt_tab_write_rec(ot, offset, 8, value))
1215
if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
1220
/* a helper function to remove table from the open tables hash on exception
1221
* used in tab_new_handle() below
1223
#ifdef NO_LONGER_REQ
1224
static void xt_del_from_db_tables_ht(XTThreadPtr self, XTTableHPtr tab)
1226
XTTableEntryPtr te_ptr;
1227
XTDatabaseHPtr db = tab->tab_db;
1228
xtTableID tab_id = tab->tab_id;
1230
/* Oops! should use tab->tab_name, instead of tab! */
1231
xt_ht_del(self, db->db_tables, tab->tab_name);
1233
/* Remove the reference from the ID list, when a table is
1234
* removed from the table name list:
1236
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id)))
1237
te_ptr->te_table = NULL;
1241
xtPublic XTFileType xt_rec_file_type(xtBool heap_tab)
1245
if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1246
return XT_FT_REWRITE_FLUSH;
1247
return XT_REC_FILE_TYPE;
1250
xtPublic XTFileType xt_row_file_type(xtBool heap_tab)
1254
if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1255
return XT_FT_REWRITE_FLUSH;
1256
return XT_ROW_FILE_TYPE;
1259
xtPublic XTFileType xt_ind_file_type(xtBool heap_tab)
1263
if (XT_IND_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1264
return XT_FT_REWRITE_FLUSH;
1265
return XT_IND_FILE_TYPE;
1268
#ifdef XT_SORT_REC_WRITES
1269
static int tab_cmp_dw_rec_id(struct XTThread *XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
1271
xtRecordID rec_id = *((xtRecordID *) a);
1272
XTDelayWritePtr dw_ptr = (XTDelayWritePtr) b;
1274
if (rec_id == dw_ptr->dw_rec_id)
1276
if (rec_id < dw_ptr->dw_rec_id)
1283
* Create a new table handle (i.e. open a table).
1284
* Return NULL if the table is missing, and it is OK for the table
1287
static int tab_new_handle(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id, XTPathStrPtr tab_path, xtBool missing_ok, XTDictionaryPtr dic)
1289
char path[PATH_MAX];
1291
char file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
1292
XTOpenFilePtr of_rec, of_ind;
1293
XTTableEntryPtr te_ptr;
1294
size_t tab_format_offset;
1295
size_t tab_head_size = 0;
1299
te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
1302
tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
1303
pushr_(xt_heap_release, tab);
1305
tab->tab_name = (XTPathStrPtr) xt_dup_string(self, tab_path->ps_path);
1307
tab->tab_id = tab_id;
1308
tab->tab_dic.dic_table_type = te_ptr->te_type;
1309
#ifdef TRACE_TABLE_IDS
1310
PRINTF("%s: allocated TABLE: db=%d tab=%d %s\n", self->t_name, (int) db->db_id, (int) tab->tab_id, xt_last_2_names_of_path(tab->tab_name->ps_path));
1314
myxt_move_dictionary(&tab->tab_dic, dic);
1315
myxt_setup_dictionary(self, &tab->tab_dic);
1318
if (!myxt_load_dictionary(self, &tab->tab_dic, db, tab_path)) {
1319
freer_(); // xt_heap_release(tab)
1320
return_(XT_TAB_NO_DICTIONARY);
1324
/* Do not use the XT_TF_DDL_TEMP_TABLE bit from the given dic
1325
* This bit depends only on the
1326
* name of the table, and must be set explicitly.
1328
if (myxt_temp_table_name(tab_path->ps_path))
1329
tab->tab_dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
1331
tab->tab_dic.dic_tab_flags &= ~XT_TF_DDL_TEMP_TABLE;
1333
tab->tab_seq.xt_op_seq_init(self);
1334
xt_spinlock_init_with_autoname(self, &tab->tab_ainc_lock);
1335
xt_init_mutex_with_autoname(self, &tab->tab_rec_flush_lock);
1336
xt_init_mutex_with_autoname(self, &tab->tab_ind_flush_lock);
1337
xt_init_mutex_with_autoname(self, &tab->tab_ind_stat_lock);
1338
xt_init_mutex_with_autoname(self, &tab->tab_dic_field_lock);
1339
xt_init_mutex_with_autoname(self, &tab->tab_row_lock);
1340
xt_init_mutex_with_autoname(self, &tab->tab_ind_lock);
1341
xt_init_mutex_with_autoname(self, &tab->tab_rec_lock);
1342
xt_spinlock_init_with_autoname(self, &tab->tab_mem_lock);
1343
if (!(tab->tab_rec_flush_task = new XTFlushRecRowTask()))
1344
xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
1345
tab->tab_rec_flush_task->tk_init(self);
1346
tab->tab_rec_flush_task->frt_table = tab;
1347
if (!(tab->tab_ind_flush_task = new XTFlushIndexTask()))
1348
xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
1349
tab->tab_ind_flush_task->tk_init(self);
1350
tab->tab_ind_flush_task->fit_table = tab;
1351
for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
1352
XT_TAB_ROW_INIT_LOCK(self, &tab->tab_row_rwlock[i]);
1353
tab->tab_free_locks = TRUE;
1355
xt_strcpy(PATH_MAX, path, tab_path->ps_path);
1356
xt_remove_last_name_of_path(path);
1357
tab_get_row_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1358
xt_strcat(PATH_MAX, path, file_name);
1359
tab->tab_row_file = xt_fs_get_file(self, path, xt_row_file_type(te_ptr->te_heap_tab));
1361
xt_remove_last_name_of_path(path);
1362
tab_get_data_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1363
xt_strcat(PATH_MAX, path, file_name);
1364
tab->tab_rec_file = xt_fs_get_file(self, path, xt_rec_file_type(te_ptr->te_heap_tab));
1366
xt_remove_last_name_of_path(path);
1367
tab_get_index_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1368
xt_strcat(PATH_MAX, path, file_name);
1369
tab->tab_ind_file = xt_fs_get_file(self, path, xt_ind_file_type(te_ptr->te_heap_tab));
1371
if (te_ptr->te_heap_tab) {
1372
XTOpenFilePtr of_row;
1374
tab->tab_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
1375
of_row = xt_open_file(self, tab->tab_row_file->fil_path, xt_row_file_type(TRUE), XT_FS_CREATE, xt_db_row_file_grow_size);
1376
pushr_(xt_close_file, of_row);
1377
if (xt_seek_eof_file(self, of_row) == 0)
1378
tab_init_row_file(self, of_row, tab, &tab->tab_dic);
1379
freer_(); // xt_close_file(of_row)
1381
of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(TRUE), XT_FS_CREATE, XT_INDEX_PAGE_SIZE*256);
1382
if (xt_seek_eof_file(self, of_ind) == 0)
1383
tab_init_ind_file(self, of_ind, tab, &tab->tab_dic);
1384
pushr_(xt_close_file, of_ind);
1385
tab_load_index_header(self, tab, of_ind, tab_path);
1386
freer_(); // xt_close_file(of_ind)
1388
of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(te_ptr->te_heap_tab), XT_FS_CREATE, xt_db_data_file_grow_size);
1389
pushr_(xt_close_file, of_rec);
1390
if (xt_seek_eof_file(self, of_rec) == 0)
1391
tab_init_data_file(self, of_rec, tab, &tab->tab_dic, 0, NULL);
1394
#ifdef XT_SORT_REC_WRITES
1395
tab->tab_rec_dw_writes = xt_new_sortedlist(self, sizeof(XTDelayWriteRec), 20, 10, tab_cmp_dw_rec_id, NULL, NULL, TRUE, FALSE);
1397
of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(FALSE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
1399
pushr_(xt_close_file, of_ind);
1400
tab_load_index_header(self, tab, of_ind, tab_path);
1401
freer_(); // xt_close_file(of_ind)
1404
tab_load_index_header(self, tab, of_ind, tab_path);
1406
of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(FALSE), missing_ok ? XT_FS_MISSING_OK : XT_FS_DEFAULT, xt_db_data_file_grow_size);
1408
freer_(); // xt_heap_release(tab)
1409
return_(XT_TAB_NOT_FOUND);
1411
pushr_(xt_close_file, of_rec);
1414
tab_load_table_format(self, of_rec, tab_path, &tab_format_offset, &tab_head_size, &tab->tab_dic);
1415
tab->tab_table_format_offset = tab_format_offset;
1416
tab->tab_table_head_size = tab_head_size;
1417
tab->tab_dic.dic_table->dt_table = tab;
1418
tab_load_table_header(self, tab, of_rec);
1419
freer_(); // xt_close_file(of_rec)
1421
tab->tab_seq.xt_op_seq_set(self, tab->tab_wr_op_seq+1);
1422
tab->tab_row_eof_id = tab->tab_head_row_eof_id;
1423
tab->tab_row_free_id = tab->tab_head_row_free_id;
1424
tab->tab_row_fnum = tab->tab_head_row_fnum;
1425
tab->tab_rec_eof_id = tab->tab_head_rec_eof_id;
1426
tab->tab_rec_free_id = tab->tab_head_rec_free_id;
1427
tab->tab_rec_fnum = tab->tab_head_rec_fnum;
1429
tab->tab_rows.xt_tc_setup(tab, FALSE, sizeof(XTTabRowHeadDRec), sizeof(XTTabRowRefDRec));
1430
tab->tab_recs.xt_tc_setup(tab, TRUE, tab_head_size, tab->tab_dic.dic_rec_size);
1432
xt_xres_init_tab(self, tab);
1434
if (!xt_init_row_locks(&tab->tab_locks))
1437
xt_heap_set_release_callback(tab, tab_onrelease);
1439
tab->tab_repair_pending = xt_tab_is_table_repair_pending(tab);
1441
popr_(); // Discard xt_heap_release(tab)
1443
xt_ht_put(self, db->db_tables, tab);
1445
/* Add a reference to the ID list, when a table is
1446
* added to the table name list:
1448
te_ptr->te_table = tab;
1450
/* Moved from after xt_init_row_locks() above, so that calling
1451
* xt_use_table_no_lock() with no_load == FALSE from attachReferences()
1452
* will work if we have cyclic foreign key references.
1454
if (tab->tab_dic.dic_table) {
1456
tab->tab_dic.dic_table->attachReferences(self, db);
1458
/* ignore problems of referenced tables */
1459
xt_log_and_clear_warning(self);
1468
* Get a reference to a table in the current database. The table reference is valid,
1469
* as long as the thread is using the database!!!
1471
xtPublic XTTableHPtr xt_use_table_no_lock(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
1476
xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
1478
tab = (XTTableHPtr) xt_ht_get(self, db->db_tables, name);
1479
if (!tab && !no_load) {
1480
xtTableID tab_id = 0;
1482
if (!tab_find_table(self, db, name, &tab_id)) {
1485
xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
1488
switch (tab_new_handle(self, &tab, db, tab_id, name, missing_ok, dic)) {
1489
case XT_TAB_NO_DICTIONARY:
1490
xt_throw_taberr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, name);
1491
case XT_TAB_POOL_CLOSED:
1492
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
1493
case XT_TAB_NOT_FOUND:
1496
xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
1503
xt_heap_reference(self, tab);
1508
xtPublic XTTableHPtr xt_use_table_no_lock_ns(struct XTDatabase *db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
1511
XTThreadPtr self = xt_get_self();
1514
tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, dic);
1523
static void tab_close_table(XTOpenTablePtr ot)
1525
xt_ind_free_reserved(ot);
1527
if (ot->ot_rec_file) {
1528
XT_CLOSE_RR_FILE_NS(ot->ot_rec_file);
1529
ot->ot_rec_file = NULL;
1532
if (ot->ot_ind_file) {
1533
xt_close_file_ns(ot->ot_ind_file);
1534
ot->ot_ind_file = NULL;
1537
if (ot->ot_row_file) {
1538
XT_CLOSE_RR_FILE_NS(ot->ot_row_file);
1539
ot->ot_row_file = NULL;
1543
xt_heap_release(xt_get_self(), ot->ot_table);
1544
ot->ot_table = NULL;
1546
if (ot->ot_ind_rhandle) {
1547
xt_ind_release_handle(ot->ot_ind_rhandle, FALSE, ot->ot_thread);
1548
ot->ot_ind_rhandle = NULL;
1550
if (ot->ot_row_rbuffer) {
1551
xt_free_ns(ot->ot_row_rbuffer);
1552
ot->ot_row_rbuf_size = 0;
1553
ot->ot_row_rbuffer = NULL;
1555
if (ot->ot_row_wbuffer) {
1556
xt_free_ns(ot->ot_row_wbuffer);
1557
ot->ot_row_wbuf_size = 0;
1558
ot->ot_row_wbuffer = NULL;
1560
#ifdef XT_TRACK_RETURNED_ROWS
1561
if (ot->ot_rows_returned) {
1562
xt_free_ns(ot->ot_rows_returned);
1563
ot->ot_rows_returned = NULL;
1565
ot->ot_rows_ret_curr = 0;
1566
ot->ot_rows_ret_max = 0;
1571
static void tab_delete_table_files(XTThreadPtr self, XTPathStrPtr tab_name, xtTableID tab_id)
1573
XTFilesOfTableRec ft;
1575
xt_enum_files_of_tables_init(tab_name, tab_id, &ft);
1576
while (xt_enum_files_of_tables_next(&ft)) {
1577
if (!xt_fs_delete(NULL, ft.ft_file_path))
1578
xt_log_and_clear_exception(self);
1582
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr XT_UNUSED(dic))
1584
XTTabRowHeadDRec row_head;
1586
tab->tab_row_eof_id = 1;
1587
tab->tab_row_free_id = 0;
1588
tab->tab_row_fnum = 0;
1590
tab->tab_head_row_eof_id = 1;
1591
tab->tab_head_row_free_id = 0;
1592
tab->tab_head_row_fnum = 0;
1594
XT_SET_DISK_4(row_head.rh_magic_4, XT_TAB_ROW_MAGIC);
1595
if (!xt_pwrite_file(of_row, 0, sizeof(row_head), &row_head, &self->st_statistics.st_rec, self))
1599
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def)
1602
XTTableHeadDRec rec_head;
1603
XTTableFormatDRec table_fmt;
1605
/* Calculate the offset of the first record in the data handle file. */
1606
eof = sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition) + def_len + XT_FORMAT_DEF_SPACE;
1607
eof = (eof + 1024 - 1) / 1024 * 1024; // Round to a value divisible by 1024
1609
tab->tab_table_format_offset = sizeof(XTTableHeadDRec);
1610
tab->tab_table_head_size = (size_t) eof;
1612
tab->tab_rec_eof_id = 1; // This is the first record ID!
1613
tab->tab_rec_free_id = 0;
1614
tab->tab_rec_fnum = 0;
1616
tab->tab_head_rec_eof_id = 1; // The first record ID
1617
tab->tab_head_rec_free_id = 0;
1618
tab->tab_head_rec_fnum = 0;
1620
tab->tab_dic.dic_rec_size = dic->dic_rec_size;
1621
tab->tab_dic.dic_rec_fixed = dic->dic_rec_fixed;
1622
tab->tab_dic.dic_tab_flags = dic->dic_tab_flags;
1623
tab->tab_dic.dic_min_auto_inc = dic->dic_min_auto_inc;
1624
tab->tab_dic.dic_def_ave_row_size = dic->dic_def_ave_row_size;
1625
tab->tab_dic.dic_table_type = dic->dic_table_type;
1627
XT_SET_DISK_4(rec_head.th_head_size_4, sizeof(XTTableHeadDRec));
1628
XT_SET_DISK_4(rec_head.th_op_seq_4, tab->tab_head_op_seq);
1629
XT_SET_DISK_6(rec_head.th_row_free_6, tab->tab_head_row_free_id);
1630
XT_SET_DISK_6(rec_head.th_row_eof_6, tab->tab_head_row_eof_id);
1631
XT_SET_DISK_6(rec_head.th_row_fnum_6, tab->tab_head_row_fnum);
1632
XT_SET_DISK_6(rec_head.th_rec_free_6, tab->tab_head_rec_free_id);
1633
XT_SET_DISK_6(rec_head.th_rec_eof_6, tab->tab_head_rec_eof_id);
1634
XT_SET_DISK_6(rec_head.th_rec_fnum_6, tab->tab_head_rec_fnum);
1636
if (!xt_pwrite_file(of_rec, 0, sizeof(XTTableHeadDRec), &rec_head, &self->st_statistics.st_rec, self))
1639
/* Store the table format: */
1640
memset(&table_fmt, 0, offsetof(XTTableFormatDRec, tf_definition));
1641
XT_SET_DISK_4(table_fmt.tf_format_size_4, offsetof(XTTableFormatDRec, tf_definition) + def_len);
1642
XT_SET_DISK_4(table_fmt.tf_tab_head_size_4, eof);
1643
XT_SET_DISK_2(table_fmt.tf_tab_version_2, XT_TAB_CURRENT_VERSION);
1644
XT_SET_DISK_4(table_fmt.tf_rec_size_4, tab->tab_dic.dic_rec_size);
1645
XT_SET_DISK_1(table_fmt.tf_rec_fixed_1, tab->tab_dic.dic_rec_fixed);
1646
XT_SET_DISK_2(table_fmt.tf_tab_unused_2, 0);
1647
XT_SET_DISK_8(table_fmt.tf_min_auto_inc_8, tab->tab_dic.dic_min_auto_inc);
1649
if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec), offsetof(XTTableFormatDRec, tf_definition), &table_fmt, &self->st_statistics.st_rec, self))
1652
if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition), def_len, tab_def->sb_cstring, &self->st_statistics.st_rec, self))
1657
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic)
1659
XTIndexFormatDPtr index_fmt;
1661
/* This is the size of the index header: */
1662
tab->tab_index_format_offset = offsetof(XTIndexHeadDRec, tp_data) + dic->dic_key_count * XT_NODE_REF_SIZE;
1663
if (!(tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc_ns(XT_INDEX_HEAD_SIZE)))
1666
XT_NODE_ID(tab->tab_ind_eof) = 1;
1667
XT_NODE_ID(tab->tab_ind_free) = 0;
1669
XT_SET_DISK_4(tab->tab_index_head->tp_header_size_4, XT_INDEX_HEAD_SIZE);
1670
XT_SET_DISK_4(tab->tab_index_head->tp_format_offset_4, tab->tab_index_format_offset);
1671
XT_SET_DISK_6(tab->tab_index_head->tp_ind_eof_6, XT_NODE_ID(tab->tab_ind_eof));
1672
XT_SET_DISK_6(tab->tab_index_head->tp_ind_free_6, XT_NODE_ID(tab->tab_ind_free));
1674
/* Store the index format: */
1675
index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
1676
XT_SET_DISK_4(index_fmt->if_format_size_4, sizeof(XTIndexFormatDRec));
1677
XT_SET_DISK_2(index_fmt->if_tab_version_2, XT_TAB_CURRENT_VERSION);
1678
XT_SET_DISK_2(index_fmt->if_ind_version_2, XT_IND_CURRENT_VERSION);
1679
XT_SET_DISK_1(index_fmt->if_node_ref_size_1, XT_NODE_REF_SIZE);
1680
XT_SET_DISK_1(index_fmt->if_rec_ref_size_1, XT_RECORD_REF_SIZE);
1681
XT_SET_DISK_4(index_fmt->if_page_size_4, XT_INDEX_PAGE_SIZE);
1683
/* Save the header: */
1684
if (!xt_pwrite_file(of_ind, 0, XT_INDEX_HEAD_SIZE, tab->tab_index_head, &self->st_statistics.st_ind, self))
1688
xtPublic void xt_create_table(XTThreadPtr self, XTPathStrPtr name, XTDictionaryPtr dic)
1690
char table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
1691
char path[PATH_MAX];
1692
XTDatabaseHPtr db = self->st_database;
1693
XTOpenTablePoolPtr table_pool;
1695
XTTableHPtr old_tab = NULL;
1696
xtTableID old_tab_id = 0;
1697
xtTableID tab_id = 0;
1698
XTStringBufferRec tab_def = { 0, 0, 0 };
1699
XTTableEntryRec te_tab;
1700
XTSortedListInfoRec li_undo;
1702
#ifdef TRACE_CREATE_TABLES
1703
printf("CREATE %s\n", name->ps_path);
1706
if (strlen(xt_last_name_of_path(name->ps_path)) > XT_TABLE_NAME_SIZE-1)
1707
xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, name);
1709
xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
1711
/* Lock to prevent table list change during creation. */
1712
table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, name, FALSE, TRUE, TRUE, &old_tab);
1713
pushr_(xt_db_unlock_table_pool, table_pool);
1714
xt_ht_lock(self, db->db_tables);
1715
pushr_(xt_ht_unlock, db->db_tables);
1716
pushr_(xt_heap_release, old_tab);
1718
/* This must be done before we remove the old table
1719
* from the directory, or we will not be able
1720
* to find the table, which could is require
1723
if (xt_sl_get_size(db->db_table_by_id) >= XT_MAX_TABLES)
1724
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TOO_MANY_TABLES, (u_long) XT_MAX_TABLES);
1726
tab_id = db->db_curr_tab_id + 1;
1729
old_tab_id = old_tab->tab_id;
1730
xt_dl_delete_ext_data(self, old_tab, FALSE, TRUE);
1731
freer_(); // xt_heap_release(self, old_tab)
1733
/* For the Windows version this must be done before we
1734
* start to delete the underlying files!
1736
tab_close_files(self, old_tab);
1738
tab_delete_table_files(self, name, old_tab_id);
1740
/* Remove the PBMS table: */
1741
ASSERT(xt_get_self() == self);
1743
/* Remove the table from the directory. It will get a new
1744
* ID so the handle in the directory will no longer be valid.
1746
xt_ht_del(self, db->db_tables, name);
1749
freer_(); // xt_heap_release(self, old_tab)
1752
/* Add the table to the directory, well remove on error! */
1753
li_undo.li_sl = db->db_table_by_id;
1754
li_undo.li_key = &tab_id;
1755
te_tab.te_tab_id = tab_id;
1756
te_tab.te_heap_tab = dic->dic_tab_flags & XT_TF_MEMORY_TABLE;
1757
te_tab.te_tab_name = xt_dup_string(self, xt_last_name_of_path(name->ps_path));
1758
te_tab.te_tab_path = tab_get_table_path(self, db, name, TRUE);
1759
te_tab.te_table = NULL;
1760
te_tab.te_type = dic->dic_table_type;
1761
xt_sl_insert(self, db->db_table_by_id, &tab_id, &te_tab);
1765
XTOpenFilePtr of_row, of_rec, of_ind;
1768
tab_save_tables(self, db);
1770
tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
1771
pushr_(xt_heap_release, tab);
1773
/* The length of the foreign key definition: */
1774
if (dic->dic_table) {
1775
dic->dic_table->loadString(self, &tab_def);
1776
def_len = tab_def.sb_len + 1;
1779
tab->tab_head_op_seq = 0;
1780
tab->tab_wr_op_seq = 0;
1782
/* This tests operation number overflow. */
1783
//tab->tab_head_op_seq = 0xFFFFFFFF - 12;
1784
//tab->tab_wr_op_seq = 0xFFFFFFFF - 12;
1787
/* ------- ROW FILE: */
1788
xt_strcpy(PATH_MAX, path, name->ps_path);
1789
xt_remove_last_name_of_path(path);
1790
tab_get_row_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1791
xt_strcat(PATH_MAX, path, table_name);
1792
of_row = xt_open_file(self, path, xt_row_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_row_file_grow_size);
1793
pushr_(xt_close_file, of_row);
1794
tab_init_row_file(self, of_row, tab, dic);
1795
freer_(); // xt_close_file(of_row)
1797
(void) ASSERT(sizeof(XTTabRowHeadDRec) == sizeof(XTTabRowRefDRec));
1798
(void) ASSERT(sizeof(XTTabRowRefDRec) == 1 << XT_TAB_ROW_SHIFTS);
1800
/* ------------ DATA FILE: */
1801
xt_remove_last_name_of_path(path);
1802
tab_get_data_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1803
xt_strcat(PATH_MAX, path, table_name);
1804
of_rec = xt_open_file(self, path, xt_rec_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_data_file_grow_size);
1805
pushr_(xt_close_file, of_rec);
1806
tab_init_data_file(self, of_rec, tab, dic, def_len, &tab_def);
1807
freer_(); // xt_close_file(of_rec)
1809
/* ----------- INDEX FILE: */
1810
xt_remove_last_name_of_path(path);
1811
tab_get_index_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1812
xt_strcat(PATH_MAX, path, table_name);
1813
of_ind = xt_open_file(self, path, xt_ind_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, XT_INDEX_PAGE_SIZE*256);
1814
pushr_(xt_close_file, of_ind);
1815
tab_init_ind_file(self, of_ind, tab, dic);
1816
freer_(); // xt_close_file(of_ind)
1819
/* Log the new table ID! */
1820
db->db_curr_tab_id = tab_id;
1821
if (!xt_xn_log_tab_id(self, tab_id)) {
1822
db->db_curr_tab_id = tab_id - 1;
1826
freer_(); // xt_heap_release(tab)
1829
* 2008-12-10: Note, there is another problem, example:
1830
* set storage_engine = pbxt;
1832
* CREATE TABLE t1 (s1 INT PRIMARY KEY, s2 INT);
1833
* CREATE TABLE t2 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t1 (s1) ON UPDATE CASCADE);
1834
* CREATE TABLE t3 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t2 (s1) ON UPDATE CASCADE);
1836
* DROP TABLE IF EXISTS t2,t1;
1837
* CREATE TABLE t1 (s1 ENUM('a','b') PRIMARY KEY);
1838
* CREATE TABLE t2 (s1 ENUM('A','B'), FOREIGN KEY (s1) REFERENCES t1 (s1));
1840
* DROP TABLE IF EXISTS t2,t1;
1842
* In the example above. The second create t2 does not fail, although t3 references it,
1843
* and the data types do not match.
1845
* The main problem is that this error comes on DROP TABLE IF EXISTS t2! Which prevents
1846
* the table from being dropped - not good.
1848
* So my idea here is to open the table, and if it fails, then the create table fails
1853
* We pass table type separately and provide NULL for the dic parameter, this is because
1854
* we want to force loading table (which is triggered by dic == NULL) but we still need table type
1858
tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1859
xt_heap_release(self, tab);
1864
/* Creation failed, delete the table files: */
1867
xt_enter_exception_handler(self, &e);
1869
tab_delete_table_files(self, name, tab_id);
1870
tab_remove_table_path(self, db, te_tab.te_tab_path);
1871
xt_sl_delete(NULL, db->db_table_by_id, &tab_id);
1872
tab_save_tables(self, db);
1873
xt_sb_set_size(self, &tab_def, 0);
1874
xt_exit_exception_handler(self, &e);
1879
xt_sb_set_size(self, &tab_def, 0);
1883
XTTableEntryPtr te_ptr;
1885
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &old_tab_id))) {
1886
tab_remove_table_path(self, db, te_ptr->te_tab_path);
1887
xt_sl_delete(self, db->db_table_by_id, &old_tab_id);
1888
tab_save_tables(self, db);
1891
/* Same purpose as above {LOAD-FOR-FKS} (although this should work,
1892
* beacuse this is a TRUNCATE TABLE.
1894
tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1895
xt_heap_release(self, tab);
1898
/* Log this error, but do not return it, because
1899
* it just involves the cleanup of the old table,
1900
* the new table has been successfully created.
1902
xt_log_and_clear_exception(self);
1907
freer_(); // xt_ht_unlock(db->db_tables)
1908
freer_(); // xt_db_unlock_table_pool(table_pool)
1910
/* I open the table here, because I cannot rely on MySQL to do
1911
* it after a create. This is normally OK, but with foreign keys
1912
* tables can be referenced and then they are not opened
1913
* before use. In this example, the INSERT opens t2, but t1 is
1914
* not opened of the create. As a result the foreign key
1915
* reference is not resolved.
1917
* drop table t1, t2;
1920
* id INT PRIMARY KEY
1926
* CONSTRAINT c1 FOREIGN KEY (v) REFERENCES t1(id)
1930
* INSERT INTO t2 VALUES(2);
1932
/* this code is not needed anymore as we open tables referred by FKs as necessary during checks
1933
xt_ht_lock(self, db->db_tables);
1934
pushr_(xt_ht_unlock, db->db_tables);
1935
tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1936
freer_(); // xt_ht_unlock(db->db_tables)
1937
xt_heap_release(self, tab);
1938
* CHANGED see {LOAD-FOR-FKS} above.
1944
xtPublic void xt_drop_table(XTThreadPtr self, XTPathStrPtr tab_name, xtBool drop_db)
1946
XTDatabaseHPtr db = self->st_database;
1947
XTOpenTablePoolPtr table_pool;
1948
XTTableHPtr tab = NULL;
1949
xtTableID tab_id = 0;
1950
xtBool can_drop = TRUE;
1954
#ifdef TRACE_CREATE_TABLES
1955
printf("DROP %s\n", tab_name->ps_path);
1958
table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, tab_name, FALSE, FALSE, TRUE, &tab);
1959
pushr_(xt_db_unlock_table_pool, table_pool);
1960
xt_ht_lock(self, db->db_tables);
1961
pushr_(xt_ht_unlock, db->db_tables);
1962
pushr_(xt_heap_release, tab);
1965
tab_id = tab->tab_id; /* tab is not null if returned table_pool is not null */
1966
/* check if other tables refer this */
1967
if (!self->st_ignore_fkeys)
1968
can_drop = tab->tab_dic.dic_table->checkCanDrop(drop_db);
1971
/* See the comment in ha_pbxt::delete_table regarding different implmentation of DROP TABLE
1972
* in MySQL and Drizzle
1975
xt_throw_xterr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND);
1981
XTTableEntryPtr te_ptr;
1983
xt_dl_delete_ext_data(self, tab, FALSE, TRUE);
1984
freer_(); // xt_heap_release(self, tab)
1986
/* For the Windows version this must be done before we
1987
* start to delete the underlying files!
1989
tab_close_files(self, tab);
1991
tab_delete_table_files(self, tab_name, tab_id);
1993
ASSERT(xt_get_self() == self);
1994
if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id))) {
1995
tab_remove_table_path(self, db, te_ptr->te_tab_path);
1996
xt_sl_delete(self, db->db_table_by_id, &tab_id);
1997
tab_save_tables(self, db);
2001
freer_(); // xt_heap_release(self, tab)
2004
xt_ht_del(self, db->db_tables, tab_name);
2006
else { /* cannot drop table because of FK dependencies */
2007
xt_throw_xterr(XT_CONTEXT, XT_ERR_ROW_IS_REFERENCED);
2010
freer_(); // xt_ht_unlock(db->db_tables)
2011
freer_(); // xt_db_unlock_table_pool(table_pool)
2016
* Record buffer size:
2017
* -------------------
2018
* The size of the record buffer used to hold the row
2019
* in memory. This buffer size does not include the BLOB data.
2020
* About 8 bytes (a pointer and a size) is reserved for each BLOB
2023
* The buffer size includes a number of "NULL" bytes followed by
2024
* the data area. The NULL bytes contain 1 bit for every column,
2025
* to indicate of the columns is NULL or not.
2027
* The size of the buffer is 4/8-byte aligned, so it may be padded
2030
* Fixed length rec. len.:
2031
* -----------------------
2032
* If the record does not include any BLOBs then this is the size of the
2033
* fixed length record. The size if the data in the data handle record
2034
* need never be bigger then this length, if the record does not
2035
* contain BLOBs. So this should be the maximum size set for
2036
* AVG_ROW_LENGTH in this case.
2038
* Handle data record size:
2039
* ------------------------
2040
* This is the size of the handle data record. It is the data size
2041
* plus the "max header size".
2043
* Min/max header size:
2044
* The min and max header size of the header in the data handle file.
2045
* The larger header is used if a record has an extended data (data log
2048
* Min/avg/max record size:
2049
* ------------------------
2050
* These are variable length records sizes. That is, the size of records
2051
* when stored in the variable length format. Variable length records
2052
* do not have fixed fields sizes, instead the fields are packed one
2053
* after the other, prefixed by a number of size indicator bytes.
2055
* The average is an estimate of the average record size. This estimate
2056
* is used if no AVG_ROW_LENGTH is specifically given.
2058
* If the average estimate is withing 20% of the maximum size of the record,
2059
* then the record will be handled as a fixed length record.
2061
* Avg row len set for tab:
2062
* ------------------------
2063
* This is the value set using AVG_ROW_LENGTH when the table is declared.
2065
* Rows fixed length:
2066
* ------------------
2067
* YES if the records of this table are handled as a fixed length records.
2068
* In this case the table records will never have an extended record
2071
* The size of the data area in the handle data record is set to the
2072
* size of the MySQL data record ("Fixed length rec. len.").
2074
* It also means that the record format used is identical to the MySQL
2077
* If the records are not fixed, then the variable length record format
2078
* is used. Records size are then in the range specified by
2079
* "Min/avg/max record size".
2081
* Maximum fixed size:
2082
* -------------------
2083
* This is the maximum size of a data log record.
2085
* Minimum variable size:
2086
* ------------------------
2087
* Records below this size are handled as a fixed length record size, unless
2088
* the AVG_ROW_LENGTH is specifically set.
2090
xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot)
2092
XTTableHPtr tab = ot->ot_table;
2094
XTTabRecExtDPtr rec_buf = (XTTabRecExtDPtr) ot->ot_row_rbuffer;
2095
XTactExtRecEntryDRec ext_rec;
2098
xtLogOffset log_offset;
2100
xtRecordID prev_rec_id;
2103
u_llong free_rec_count = 0, free_count2 = 0;
2104
u_llong delete_rec_count = 0;
2105
u_llong alloc_rec_count = 0;
2106
u_llong alloc_rec_bytes = 0;
2107
u_llong min_comp_rec_len = 0;
2108
u_llong max_comp_rec_len = 0;
2111
u_llong ext_data_len = 0;
2113
#if defined(DUMP_CHECK_TABLE) || defined(CHECK_TABLE_STATS)
2114
printf("\nCHECK TABLE: %s\n", tab->tab_name->ps_path);
2117
xt_lock_mutex(self, &tab->tab_db->db_co_ext_lock);
2118
pushr_(xt_unlock_mutex, &tab->tab_db->db_co_ext_lock);
2120
xt_lock_mutex(self, &tab->tab_rec_lock);
2121
pushr_(xt_unlock_mutex, &tab->tab_rec_lock);
2123
#ifdef CHECK_TABLE_STATS
2124
printf("Record buffer size = %lu\n", (u_long) tab->tab_dic.dic_mysql_buf_size);
2125
printf("Fixed length rec. len. = %lu\n", (u_long) tab->tab_dic.dic_mysql_rec_size);
2126
printf("Handle data record size = %lu\n", (u_long) tab->tab_dic.dic_rec_size);
2127
printf("Min/max header size = %d/%d\n", (int) offsetof(XTTabRecFix, rf_data), tab->tab_dic.dic_rec_fixed ? (int) offsetof(XTTabRecFix, rf_data) : (int) offsetof(XTTabRecExtDRec, re_data));
2128
printf("Min/avg/max record size = %llu/%llu/%llu\n", (u_llong) tab->tab_dic.dic_min_row_size, (u_llong) tab->tab_dic.dic_ave_row_size, (u_llong) tab->tab_dic.dic_max_row_size);
2129
if (tab->tab_dic.dic_def_ave_row_size)
2130
printf("Avg row len set for tab = %lu\n", (u_long) tab->tab_dic.dic_def_ave_row_size);
2132
printf("Avg row len set for tab = not specified\n");
2133
printf("Rows fixed length = %s\n", tab->tab_dic.dic_rec_fixed ? "YES" : "NO");
2134
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
2135
printf("Table type = MEMORY\n");
2136
else if (tab->tab_dic.dic_tab_flags & XT_TF_REAL_TEMP_TABLE)
2137
printf("Table type = TEMPORARY\n");
2138
else if (tab->tab_dic.dic_tab_flags & XT_TF_DDL_TEMP_TABLE)
2139
printf("Table type = DDL-TEMPORARY\n");
2140
if (tab->tab_dic.dic_def_ave_row_size)
2141
printf("Maximum fixed size = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH_SPEC);
2143
printf("Maximum fixed size = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH);
2144
printf("Minimum variable size = %lu\n", (u_long) XT_TAB_MIN_VAR_REC_LENGTH);
2145
printf("Minimum auto-increment = %llu\n", (u_llong) tab->tab_dic.dic_min_auto_inc);
2146
printf("Number of columns = %lu\n", (u_long) tab->tab_dic.dic_no_of_cols);
2147
printf("Number of fixed columns = %lu\n", (u_long) tab->tab_dic.dic_fix_col_count);
2148
printf("Columns req. for index = %lu\n", (u_long) tab->tab_dic.dic_ind_cols_req);
2149
if (tab->tab_dic.dic_ind_rec_len)
2150
printf("Rec len req. for index = %llu\n", (u_llong) tab->tab_dic.dic_ind_rec_len);
2151
printf("Columns req. for blobs = %lu\n", (u_long) tab->tab_dic.dic_blob_cols_req);
2152
printf("Number of blob columns = %lu\n", (u_long) tab->tab_dic.dic_blob_count);
2153
printf("Number of indices = %lu\n", (u_long) tab->tab_dic.dic_key_count);
2156
#ifdef DUMP_CHECK_TABLE
2157
printf("Records:-\n");
2158
printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_rec_free_id, (u_llong) tab->tab_rec_fnum);
2159
printf("EOF: %llu\n", (u_llong) tab->tab_rec_eof_id);
2162
rec_size = XT_REC_EXT_HEADER_SIZE;
2163
if (rec_size > tab->tab_recs.tci_rec_size)
2164
rec_size = tab->tab_recs.tci_rec_size;
2166
while (rec_id < tab->tab_rec_eof_id) {
2167
if (!xt_tab_get_rec_data(ot, rec_id, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer))
2170
#ifdef DUMP_CHECK_TABLE
2171
printf("%-4llu ", (u_llong) rec_id);
2173
switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
2174
case XT_TAB_STATUS_FREED:
2175
#ifdef DUMP_CHECK_TABLE
2176
printf("======== ");
2180
case XT_TAB_STATUS_DELETE:
2181
#ifdef DUMP_CHECK_TABLE
2186
case XT_TAB_STATUS_FIXED:
2187
#ifdef DUMP_CHECK_TABLE
2188
printf("record-F ");
2191
row_size = myxt_store_row_length(ot, (char *) ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE);
2192
alloc_rec_bytes += row_size;
2193
if (!min_comp_rec_len || row_size < min_comp_rec_len)
2194
min_comp_rec_len = row_size;
2195
if (row_size > max_comp_rec_len)
2196
max_comp_rec_len = row_size;
2198
case XT_TAB_STATUS_VARIABLE:
2199
#ifdef DUMP_CHECK_TABLE
2200
printf("record-V ");
2203
row_size = myxt_load_row_length(ot, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, NULL);
2204
alloc_rec_bytes += row_size;
2205
if (!min_comp_rec_len || row_size < min_comp_rec_len)
2206
min_comp_rec_len = row_size;
2207
if (row_size > max_comp_rec_len)
2208
max_comp_rec_len = row_size;
2210
case XT_TAB_STATUS_EXT_DLOG:
2211
#ifdef DUMP_CHECK_TABLE
2212
printf("record-X ");
2215
ext_data_len += XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
2216
row_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4) + ot->ot_rec_size - XT_REC_EXT_HEADER_SIZE;
2217
alloc_rec_bytes += row_size;
2218
if (!min_comp_rec_len || row_size < min_comp_rec_len)
2219
min_comp_rec_len = row_size;
2220
if (row_size > max_comp_rec_len)
2221
max_comp_rec_len = row_size;
2224
#ifdef DUMP_CHECK_TABLE
2225
if (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_CLEANED_BIT)
2230
prev_rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
2231
xn_id = XT_GET_DISK_4(rec_buf->tr_xact_id_4);
2232
row_id = XT_GET_DISK_4(rec_buf->tr_row_id_4);
2233
switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
2234
case XT_TAB_STATUS_FREED:
2235
#ifdef DUMP_CHECK_TABLE
2236
printf(" prev=%-3llu (xact=%-3llu row=%lu)\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
2239
case XT_TAB_STATUS_EXT_DLOG:
2242
#ifdef DUMP_CHECK_TABLE
2243
printf(" prev=%-3llu xact=%-3llu row=%lu Xlog=%lu Xoff=%llu Xsiz=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id, (u_long) XT_GET_DISK_2(rec_buf->re_log_id_2), (u_llong) XT_GET_DISK_6(rec_buf->re_log_offs_6), (u_long) XT_GET_DISK_4(rec_buf->re_log_dat_siz_4));
2246
log_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
2247
XT_GET_LOG_REF(log_id, log_offset, rec_buf);
2248
if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
2249
xt_tab_read_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec);
2253
if (!(ok = self->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec, self)))
2254
xt_log_and_clear_exception(self);
2258
xtTableID curr_tab_id;
2259
xtRecordID curr_rec_id;
2261
log_size2 = XT_GET_DISK_4(ext_rec.er_data_size_4);
2262
curr_tab_id = XT_GET_DISK_4(ext_rec.er_tab_id_4);
2263
curr_rec_id = XT_GET_DISK_4(ext_rec.er_rec_id_4);
2264
if (log_size2 != log_size || curr_tab_id != tab->tab_id || curr_rec_id != rec_id) {
2265
xt_logf(XT_INFO, "Table %s: record %llu, extended record %lu:%llu not valid\n", tab->tab_name, (u_llong) rec_id, (u_long) log_id, (u_llong) log_offset);
2270
#ifdef DUMP_CHECK_TABLE
2271
printf(" prev=%-3llu xact=%-3llu row=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
2278
#ifdef CHECK_TABLE_STATS
2279
u_long rec, row, ind;
2282
rec = xt_seek_eof_file(self, ot->ot_rec_file);
2283
row = xt_seek_eof_file(self, ot->ot_row_file);
2284
ind = xt_seek_eof_file(self, ot->ot_ind_file);
2285
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
2286
if (!tab->tab_dic.dic_rec_fixed) {
2287
xt_int8_to_byte_size((xtInt8) tab->tab_mem_total, value);
2288
printf("Ext. record memory used = %s\n", value);
2290
xt_int8_to_byte_size((xtInt8) ind, value);
2291
printf("Index data memory used = %s\n", value);
2292
xt_int8_to_byte_size((xtInt8) rec + row, value);
2293
printf("Table data memory used = %s\n", value);
2294
xt_int8_to_byte_size((xtInt8) tab->tab_mem_total + rec + row + ind, value);
2295
printf("Total memory used = %s\n", value);
2298
if (!tab->tab_dic.dic_rec_fixed) {
2299
xt_int8_to_byte_size((xtInt8) ext_data_len, value);
2300
printf("Ext. record disk used = %s\n", value);
2302
xt_int8_to_byte_size((xtInt8) ind, value);
2303
printf("Index disk space used = %s\n", value);
2304
xt_int8_to_byte_size((xtInt8) rec + row, value);
2305
printf("Table disk space used = %s\n", value);
2306
xt_int8_to_byte_size((xtInt8) ext_data_len + rec + row + ind, value);
2307
printf("Total disk space used = %s\n", value);
2310
if (alloc_rec_count) {
2311
printf("Minumum comp. rec. len. = %llu\n", (u_llong) min_comp_rec_len);
2312
printf("Average comp. rec. len. = %llu\n", (u_llong) ((double) alloc_rec_bytes / (double) alloc_rec_count + (double) 0.5));
2313
printf("Maximum comp. rec. len. = %llu\n", (u_llong) max_comp_rec_len);
2315
printf("Free record count = %llu\n", (u_llong) free_rec_count);
2316
printf("Deleted record count = %llu\n", (u_llong) delete_rec_count);
2317
printf("Allocated record count = %llu\n", (u_llong) alloc_rec_count);
2320
if (tab->tab_rec_fnum != free_rec_count)
2321
xt_logf(XT_INFO, "Table %s: incorrect number of free blocks, %llu, should be: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) tab->tab_rec_fnum);
2323
/* Checking the free list: */
2325
rec_id = tab->tab_rec_free_id;
2327
if (rec_id >= tab->tab_rec_eof_id) {
2328
xt_logf(XT_INFO, "Table %s: invalid reference on free list: %llu, ", tab->tab_name, (u_llong) rec_id);
2330
xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id);
2332
xt_logf(XT_INFO, "reference by list head pointer\n");
2335
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) rec_buf)) {
2336
xt_log_and_clear_exception(self);
2339
if ((rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED)
2340
xt_logf(XT_INFO, "Table %s: record, %llu, on free list is not free\n", tab->tab_name, (u_llong) rec_id);
2343
rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
2345
if (free_count2 < free_rec_count)
2346
xt_logf(XT_INFO, "Table %s: not all free blocks (%llu) on free list: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) free_count2);
2348
freer_(); // xt_unlock_mutex_ns(&tab->tab_rec_lock);
2352
xt_lock_mutex(self, &tab->tab_row_lock);
2353
pushr_(xt_unlock_mutex, &tab->tab_row_lock);
2355
#ifdef DUMP_CHECK_TABLE
2357
printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_row_free_id, (u_llong) tab->tab_row_fnum);
2358
printf("EOF: %llu\n", (u_llong) tab->tab_row_eof_id);
2362
while (rec_id < tab->tab_row_eof_id) {
2363
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, rec_id, &ref_id, self))
2365
#ifdef DUMP_CHECK_TABLE
2366
printf("%-3llu ", (u_llong) rec_id);
2368
#ifdef DUMP_CHECK_TABLE
2370
printf("====== 0\n");
2372
printf("in use %llu\n", (u_llong) ref_id);
2377
freer_(); // xt_unlock_mutex(&tab->tab_row_lock);
2379
#ifdef CHECK_INDEX_ON_CHECK_TABLE
2380
xt_check_indices(ot);
2382
freer_(); // xt_unlock_mutex(&tab->tab_db->db_co_ext_lock);
2385
xtPublic void xt_rename_table(XTThreadPtr self, XTPathStrPtr old_name, XTPathStrPtr new_name)
2387
XTDatabaseHPtr db = self->st_database;
2388
XTOpenTablePoolPtr table_pool;
2389
XTTableHPtr tab = NULL;
2390
char table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
2392
XTFilesOfTableRec ft;
2393
XTDictionaryRec dic;
2395
XTTableEntryPtr te_ptr;
2397
XTTablePathPtr te_new_path;
2398
XTTablePathPtr te_old_path;
2399
char to_path[PATH_MAX];
2401
memset(&dic, 0, sizeof(dic));
2403
#ifdef TRACE_CREATE_TABLES
2404
printf("RENAME %s --> %s\n", old_name->ps_path, new_name->ps_path);
2406
if (strlen(xt_last_name_of_path(new_name->ps_path)) > XT_TABLE_NAME_SIZE-1)
2407
xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, new_name);
2409
/* MySQL renames the table while it is in use. Here is
2415
* COPY tab1 -> tmp_tab
2417
* RENAME tab1 -> tmp2_tab
2418
* RENAME tmp_tab -> tab1
2419
* CLOSE tab1 (tmp2_tab)
2423
* Since the table is open when it is renamed, I cannot
2424
* get exclusive use of the table for this operation.
2426
* So instead we just make sure that the sweeper is not
2429
table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, old_name, FALSE, TRUE, FALSE, &tab);
2430
pushr_(xt_db_unlock_table_pool, table_pool);
2431
xt_ht_lock(self, db->db_tables);
2432
pushr_(xt_ht_unlock, db->db_tables);
2433
tab_id = tab->tab_id;
2434
myxt_move_dictionary(&dic, &tab->tab_dic);
2435
pushr_(myxt_free_dictionary, &dic);
2436
pushr_(xt_heap_release, tab);
2438
/* Unmap the memory mapped table files:
2439
* For windows this must be done before we
2440
* can rename the files.
2442
tab_close_files(self, tab);
2444
freer_(); // xt_heap_release(self, old_tab)
2446
/* Create the new name and path: */
2447
te_new_name = xt_dup_string(self, xt_last_name_of_path(new_name->ps_path));
2448
pushr_(xt_free, te_new_name);
2449
te_new_path = tab_get_table_path(self, db, new_name, FALSE);
2450
pushr_(tab_free_table_path, te_new_path);
2452
te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
2454
/* Remove the table from the Database directory: */
2455
xt_ht_del(self, db->db_tables, old_name);
2457
xt_enum_files_of_tables_init(old_name, tab_id, &ft);
2458
while (xt_enum_files_of_tables_next(&ft)) {
2459
postfix = xt_tab_file_to_name(XT_MAX_TABLE_FILE_NAME_SIZE, table_name, ft.ft_file_path);
2461
xt_strcpy(PATH_MAX, to_path, new_name->ps_path);
2462
xt_strcat(PATH_MAX, to_path, postfix);
2464
if (!xt_fs_rename(NULL, ft.ft_file_path, to_path))
2465
xt_log_and_clear_exception(self);
2468
/* Switch the table name and path: */
2469
xt_free(self, te_ptr->te_tab_name);
2470
te_ptr->te_tab_name = te_new_name;
2471
te_old_path = te_ptr->te_tab_path;
2472
te_ptr->te_tab_path = te_new_path;
2473
tab_remove_table_path(self, db, te_old_path);
2474
tab_save_tables(self, db);
2476
popr_(); // Discard tab_free_table_path(te_new_path);
2477
popr_(); // Discard xt_free(te_new_name);
2479
tab = xt_use_table_no_lock(self, db, new_name, FALSE, FALSE, &dic);
2480
/* All renamed tables are considered repaired! */
2481
xt_tab_table_repaired(tab);
2482
xt_heap_release(self, tab);
2484
freer_(); // myxt_free_dictionary(&dic)
2485
freer_(); // xt_ht_unlock(db->db_tables)
2486
freer_(); // xt_db_unlock_table_pool(table_pool)
2489
xtPublic XTTableHPtr xt_use_table(XTThreadPtr self, XTPathStrPtr name, xtBool no_load, xtBool missing_ok)
2492
XTDatabaseHPtr db = self->st_database;
2494
xt_ht_lock(self, db->db_tables);
2495
pushr_(xt_ht_unlock, db->db_tables);
2496
tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, NULL);
2501
xtPublic void xt_sync_flush_table(XTThreadPtr self, XTOpenTablePtr ot, int timeout)
2503
XTTableHPtr tab = ot->ot_table;
2504
XTDatabaseHPtr db = tab->tab_db;
2506
/* Wakeup the sweeper:
2507
* We want the sweeper to check if there is anything to do,
2508
* so we must wake it up.
2509
* Once it has done all it can, it will go back to sleep.
2510
* This should be good enough.
2512
* NOTE: I all cases, we do not wait if the sweeper is in
2515
if (db->db_sw_idle) {
2516
u_int check_count = db->db_sw_check_count;
2519
xt_wakeup_sweeper(db);
2520
if (!db->db_sw_thread || db->db_sw_idle != XT_THREAD_IDLE || check_count != db->db_sw_check_count)
2522
xt_sleep_milli_second(10);
2526
/* Wait for the sweeper to become idle: */
2527
xt_lock_mutex(self, &db->db_sw_lock);
2528
pushr_(xt_unlock_mutex, &db->db_sw_lock);
2529
while (db->db_sw_thread && !db->db_sw_idle) {
2530
xt_timed_wait_cond(self, &db->db_sw_cond, &db->db_sw_lock, 10);
2532
freer_(); // xt_unlock_mutex(&db->db_sw_lock)
2534
/* Wait for the writer to write out all operations on the table:
2535
* We also do not wait for the writer if it is in
2538
time_t start_time = time(NULL);
2539
while (db->db_wr_thread &&
2540
db->db_wr_idle != XT_THREAD_INERR &&
2541
XTTableSeq::xt_op_is_before(tab->tab_head_op_seq+1, tab->tab_seq.ts_next_seq)) {
2542
if (timeout && time(NULL) > start_time + timeout) {
2543
char name_buf[XT_TABLE_NAME_BUF_SIZE];
2545
xt_tab_make_table_name(tab->tab_name, name_buf, XT_TABLE_NAME_BUF_SIZE);
2546
xt_logf(XT_WARNING, "Timeout waiting for writer while flushing %s\n", name_buf);
2550
/* Flush the log, in case this is holding up the
2553
if (!db->db_xlog.xlog_flush(self))
2556
xt_lock_mutex(self, &db->db_wr_lock);
2557
pushr_(xt_unlock_mutex, &db->db_wr_lock);
2558
db->db_wr_thread_waiting++;
2560
* Wake the writer if it is sleeping. In order to
2561
* flush a table we must wait for the writer to complete
2562
* committing all the changes in the table to the database.
2564
if (db->db_wr_idle) {
2565
if (!xt_broadcast_cond_ns(&db->db_wr_cond))
2566
xt_log_and_clear_exception_ns();
2569
freer_(); // xt_unlock_mutex(&db->db_wr_lock)
2570
xt_sleep_milli_second(10);
2572
xt_lock_mutex(self, &db->db_wr_lock);
2573
pushr_(xt_unlock_mutex, &db->db_wr_lock);
2574
db->db_wr_thread_waiting--;
2575
freer_(); // xt_unlock_mutex(&db->db_wr_lock)
2578
xt_flush_table(self, ot);
2581
xtBool XTFlushRecRowTask::tk_task(XTThreadPtr thread)
2585
/* {TASK-TABLE-GONE}
2586
* If this task was scheduled before the table was deleted
2587
* or renamed, then we may be caught holding an invalid
2588
* table (frt_table) object.
2590
* As a result we just use the ID, to get the open table
2593
* If the tables are not identical, then there is no point
2596
if (!(xt_db_open_pool_table_ns(&ot, frt_table->tab_db, frt_table->tab_id)))
2600
/* Can happen if the table has been dropped: */
2601
if (thread->t_exception.e_xt_err)
2602
xt_log_and_clear_exception(thread);
2603
xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table was not found\n", (u_long) frt_table->tab_id);
2604
xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
2608
if (ot->ot_table != frt_table) {
2609
/* Can happen if the table has been renamed: */
2610
if (thread->t_exception.e_xt_err)
2611
xt_log_and_clear_exception(thread);
2612
xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table has been renamed\n", (u_long) frt_table->tab_id);
2613
xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
2617
if (!xt_flush_record_row(ot, NULL, FALSE)) {
2618
xt_db_return_table_to_pool_ns(ot);
2623
xt_db_return_table_to_pool_ns(ot);
2627
void XTFlushRecRowTask::tk_reference()
2629
xt_heap_reference_ns(frt_table);
2632
void XTFlushRecRowTask::tk_release()
2634
xt_heap_release_ns(frt_table);
2638
* Start a flush of this file in background.
2640
xtPublic xtBool xt_async_flush_record_row(XTTableHPtr tab, xtBool notify_complete, XTThreadPtr thread)
2642
if (tab->tab_rec_flush_task->tk_is_running())
2646
return xt_run_async_task(tab->tab_rec_flush_task, notify_complete, FALSE, thread, tab->tab_db);
2649
xtPublic xtBool xt_flush_record_row(XTOpenTablePtr ot, off_t *bytes_flushed, xtBool have_table_lock)
2651
XTTableHeadDRec rec_head;
2652
XTTableHPtr tab = ot->ot_table;
2654
#ifdef TRACE_FLUSH_TABLE
2658
if (!xt_begin_checkpoint(tab->tab_db, have_table_lock, ot->ot_thread))
2661
xt_lock_mutex_ns(&tab->tab_rec_flush_lock);
2662
#ifdef XT_SORT_REC_WRITES
2663
if (!xt_xres_delay_flush(ot, TRUE))
2666
xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_START_REC_ROW);
2668
ASSERT_NS(ot->ot_thread == xt_get_self());
2669
/* Make sure that the table recovery point, in
2670
* particular the operation ID is recorded
2671
* before all other flush activity!
2673
* This is because only operations after the
2674
* recovery point in the header are applied
2675
* to the table on recovery.
2677
* So the operation ID is recorded before the
2678
* flush activity, and written after all is done.
2680
xt_tab_store_header(ot, &rec_head);
2682
/* Write the table header: */
2683
if (tab->tab_flush_pending) {
2684
tab->tab_flush_pending = FALSE;
2686
#ifdef TRACE_FLUSH_TABLE
2688
printf("FLUSH TABLE bytes=%lu %s\n", (u_long) tab->tab_bytes_to_flush, tab->tab_name->ps_path);
2691
// Want to see how much was to be flushed in the debugger:
2692
to_flush = tab->tab_bytes_to_flush;
2693
tab->tab_bytes_to_flush = 0;
2695
*bytes_flushed += to_flush;
2697
#ifdef XT_REC_FLUSH_THRESHOLD
2700
/* Reset the writer's byte level: */
2701
if ((writer = ot->ot_table->tab_db->db_wr_thread))
2702
tab->tab_rec_wr_last_flush = writer->st_statistics.st_rec.ts_write;
2705
/* Flush the table data: */
2706
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags)) {
2707
if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread) ||
2708
!XT_FLUSH_RR_FILE(ot->ot_row_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread)) {
2709
tab->tab_flush_pending = TRUE;
2714
/* The header includes the operation number which
2715
* must be written AFTER all other data,
2716
* because operations will not be applied again.
2718
if (!tab_write_header(ot, &rec_head)) {
2719
tab->tab_flush_pending = TRUE;
2724
/* Flush the auto-increment: */
2725
if (xt_db_auto_increment_mode == 1) {
2726
if (tab->tab_auto_inc != tab->tab_dic.dic_min_auto_inc) {
2727
tab->tab_dic.dic_min_auto_inc = tab->tab_auto_inc;
2728
if (!xt_tab_write_min_auto_inc(ot))
2733
/* Mark this table as record/row flushed: */
2734
xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_DONE_REC_ROW);
2736
#ifdef TRACE_FLUSH_TABLE
2738
printf("flush table (%d) %s DONE\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
2743
xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
2745
if (!xt_end_checkpoint(tab->tab_db, ot->ot_thread, NULL))
2750
xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_STOP_REC_ROW);
2752
#ifdef TRACE_FLUSH_TABLE
2754
printf("flush table (%d) %s FAILED\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
2759
xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
2763
xtPublic void xt_flush_table(XTThreadPtr self, XTOpenTablePtr ot)
2765
/* GOTCHA {FLUSH-BUG}: This bug was difficult to find.
2766
* It occured on Windows in the multi_update
2769
* What happens is the checkpointer starts to
2770
* flush the table, and gets to the
2771
* XT_FLUSH_RR_FILE part.
2773
* Then a rename occurs, and the user thread
2774
* flushes the table, and goes through and
2775
* writes the table header, with the most
2776
* recent table operation (the last operation
2779
* The checkpointer the completes and
2780
* also writes the header, but with old
2781
* values (as read in xt_tab_store_header()).
2783
* The then user thread continues, and
2784
* reopens the table after rename.
2785
* On reopen, it reads the old value from the header,
2786
* and sets the current operation number.
2788
* Now there is a problem in the table cache,
2789
* because some cache pages have operation numbers
2790
* that are greater than current operation
2793
* This later lead to the free-er hanging while
2794
* it waited for an operation to be
2795
* written to the disk that never would be.
2796
* This is because a page can only be freed when
2797
* the head operation number has passed the
2798
* page operation number.
2800
* Which indicates that the page has been written
2804
* As a result I now use mutex so that only one
2805
* thread can flush at a time.
2808
if (!xt_flush_record_row(ot, NULL, FALSE))
2811
/* This was before the table data flush,
2812
* (after xt_tab_store_header() above,
2813
* but I don't think it makes any difference.
2814
* Because in the checkpointer it was at this
2817
if (!xt_flush_indices(ot, NULL, FALSE, NULL))
2822
static XTOpenTablePtr tab_open_table(XTTableHPtr tab)
2824
volatile XTOpenTablePtr ot;
2827
if (!(ot = (XTOpenTablePtr) xt_malloc_ns(sizeof(XTOpenTableRec))))
2829
memset(ot, 0, offsetof(XTOpenTableRec, ot_ind_wbuf));
2831
ot->ot_seq_page = NULL;
2832
ot->ot_seq_data = NULL;
2834
self = xt_get_self();
2836
xt_heap_reference(self, tab);
2838
ot->ot_row_file = xt_open_file(self, ot->ot_table->tab_row_file->fil_path, xt_row_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_row_file_grow_size);
2839
ot->ot_rec_file = xt_open_file(self, ot->ot_table->tab_rec_file->fil_path, xt_rec_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_data_file_grow_size);
2840
#ifdef XT_USE_DIRECT_IO_ON_INDEX
2841
ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK | XT_FS_DIRECT_IO, XT_INDEX_PAGE_SIZE*256);
2843
ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
2851
if (!ot->ot_table || !ot->ot_row_file || !ot->ot_rec_file)
2854
if (!(ot->ot_row_rbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
2856
ot->ot_row_rbuf_size = ot->ot_table->tab_dic.dic_rec_size;
2857
if (!(ot->ot_row_wbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
2859
ot->ot_row_wbuf_size = ot->ot_table->tab_dic.dic_rec_size;
2861
/* Cache this stuff to speed access a bit: */
2862
ot->ot_rec_fixed = ot->ot_table->tab_dic.dic_rec_fixed;
2863
ot->ot_rec_size = ot->ot_table->tab_dic.dic_rec_size;
2868
tab_close_table(ot);
2872
xtPublic XTOpenTablePtr xt_open_table(XTTableHPtr tab)
2874
return tab_open_table(tab);
2877
xtPublic void xt_close_table(XTOpenTablePtr ot, xtBool flush, xtBool have_table_lock)
2880
if (!xt_flush_record_row(ot, NULL, have_table_lock))
2881
xt_log_and_clear_exception_ns();
2883
if (!xt_flush_indices(ot, NULL, have_table_lock, NULL))
2884
xt_log_and_clear_exception_ns();
2886
tab_close_table(ot);
2889
static int tab_use_table_by_id(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id)
2891
XTTableEntryPtr te_ptr;
2892
XTTableHPtr tab = NULL;
2894
char path[PATH_MAX];
2897
xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
2898
xt_ht_lock(self, db->db_tables);
2899
pushr_(xt_ht_unlock, db->db_tables);
2901
te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
2903
if (!(tab = te_ptr->te_table)) {
2904
/* Open the table: */
2905
xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
2906
xt_add_dir_char(PATH_MAX, path);
2907
xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
2908
r = tab_new_handle(self, &tab, db, tab_id, (XTPathStrPtr) path, TRUE, NULL);
2912
r = XT_TAB_NOT_FOUND;
2915
xt_heap_reference(self, tab);
2918
freer_(); // xt_ht_unlock(db->db_tables)
2922
xtPublic XTTableHPtr xt_use_table_by_id(XTThreadPtr self, XTDatabaseHPtr db, xtTableID tab_id, int *result)
2927
r = tab_use_table_by_id(self, &tab, db, tab_id);
2929
if (r != XT_TAB_OK) {
2936
case XT_TAB_NOT_FOUND:
2938
case XT_TAB_NO_DICTIONARY:
2939
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, (u_long) tab_id);
2940
case XT_TAB_POOL_CLOSED:
2941
xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
2950
xtPublic XTTableHPtr xt_use_table_by_id_ns(XTDatabaseHPtr db, xtTableID tab_id)
2953
XTThreadPtr self = xt_get_self();
2956
tab = xt_use_table_by_id(self, db, tab_id, NULL);
2965
/* The fixed part of the record is already in the row buffer.
2966
* This function loads the extended part, expanding the row
2967
* buffer if necessary.
2969
xtPublic xtBool xt_tab_load_ext_data(XTOpenTablePtr ot, xtRecordID load_rec_id, xtWord1 *buffer, u_int cols_req)
2973
xtLogOffset log_offset;
2974
xtWord1 save_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
2975
xtBool retried = FALSE;
2976
XTactExtRecEntryDPtr ext_data_ptr;
2978
xtTableID curr_tab_id;
2979
xtRecordID curr_rec_id;
2981
log_size = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->re_log_dat_siz_4);
2982
XT_GET_LOG_REF(log_id, log_offset, (XTTabRecExtDPtr) ot->ot_row_rbuffer);
2984
if (ot->ot_rec_size + log_size > ot->ot_row_rbuf_size) {
2985
if (!xt_realloc_ns((void **) &ot->ot_row_rbuffer, ot->ot_rec_size + log_size))
2987
ot->ot_row_rbuf_size = ot->ot_rec_size + log_size;
2990
/* Read the extended part first: */
2991
ext_data_ptr = (XTactExtRecEntryDPtr) (ot->ot_row_rbuffer + ot->ot_rec_size - offsetof(XTactExtRecEntryDRec, er_data));
2993
/* Save the data which the header will overwrite: */
2994
memcpy(save_buffer, ext_data_ptr, offsetof(XTactExtRecEntryDRec, er_data));
2997
if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
2998
xt_tab_read_ext_record(ot->ot_table, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr);
3000
if (!ot->ot_thread->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr, ot->ot_thread))
3004
log_size2 = XT_GET_DISK_4(ext_data_ptr->er_data_size_4);
3005
curr_tab_id = XT_GET_DISK_4(ext_data_ptr->er_tab_id_4);
3006
curr_rec_id = XT_GET_DISK_4(ext_data_ptr->er_rec_id_4);
3008
if (log_size2 != log_size || curr_tab_id != ot->ot_table->tab_id || curr_rec_id != load_rec_id) {
3009
/* [(3)] This can happen in the following circumstances:
3010
* - A new record is created, but the data log is not
3012
* - The server quits.
3013
* - On restart the transaction is rolled back, but the data record
3014
* was not written, so later a new record could be written at this
3016
* - Later the sweeper tries to cleanup this record, and finds
3017
* that a different record has been written at this position.
3019
* NOTE: Index entries can only be written to disk for records
3020
* that have been committed to the disk, because uncommitted
3021
* records may not exist in order to remove the index entry
3024
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_EXT_RECORD);
3028
/* Restore the saved area: */
3029
memcpy(ext_data_ptr, save_buffer, offsetof(XTactExtRecEntryDRec, er_data));
3032
xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3033
return myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req);
3037
/* (1) It may be that reading the log fails because the garbage collector
3038
* has moved the record since we determined the location.
3039
* We handle this here, by re-reading the data the garbage collector
3040
* would have updated.
3042
* (2) It may also happen that a new record is just being updated or
3043
* inserted. It is possible that the handle part of the record
3044
* has been written, but not yet the overflow.
3045
* This means that repeating the read attempt could work.
3047
* (3) The extended data has been written by another handler and not yet
3048
* flushed. This should not happen because on committed extended
3049
* records are read, and all data should be flushed before
3052
* NOTE: (2) above is not a problem when versioning is working
3053
* correctly. In this case, we should never try to read the extended
3054
* part of an uncommitted record (belonging to some other thread/
3057
XTTabRecExtDRec rec_buf;
3059
xt_lock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3062
if (!xt_tab_get_rec_data(ot, load_rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &rec_buf))
3065
XT_GET_LOG_REF(log_id, log_offset, &rec_buf);
3071
xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3075
xtPublic xtBool xt_tab_put_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
3077
register XTTableHPtr tab = ot->ot_table;
3081
return tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread);
3084
xtPublic xtBool xt_tab_put_log_op_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer)
3086
register XTTableHPtr tab = ot->ot_table;
3091
if (status == XT_LOG_ENT_REC_MOVED) {
3092
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, &op_seq, TRUE, ot->ot_thread))
3096
else if (status == XT_LOG_ENT_REC_CLEANED_1) {
3097
ASSERT_NS(0); // shouldn't be used anymore
3101
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, &op_seq, TRUE, ot->ot_thread))
3105
return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
3108
xtPublic xtBool xt_tab_put_log_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
3110
register XTTableHPtr tab = ot->ot_table;
3114
if (status == XT_LOG_ENT_REC_MOVED) {
3115
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, op_seq, TRUE, ot->ot_thread))
3119
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread))
3123
return xt_xlog_modify_table(tab->tab_id, status, *op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
3126
xtPublic xtBool xt_tab_get_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer)
3128
register XTTableHPtr tab = ot->ot_table;
3132
return tab->tab_recs.xt_tc_read(ot->ot_rec_file, rec_id, (size_t) size, buffer, ot->ot_thread);
3136
* Note: this function grants locks even to transactions that
3137
* are not specifically waiting for this transaction.
3138
* This is required, because all threads waiting for
3139
* a lock should be considered "equal". In other words,
3140
* they should not have to wait for the "right" transaction
3141
* before they get the lock, or it will turn into a
3142
* race to wait for the correct transaction.
3144
* A transaction T1 can end up waiting for the wrong transaction
3145
* T2, because T2 has released the lock, and given it to T3.
3146
* Of course, T1 will wake up soon and realize this, but
3147
* it is a matter of timing.
3149
* The main point is that T2 has release the lock because
3150
* it has ended (see {RELEASING-LOCKS} for more details)
3151
* and therefore, there is no danger of it claiming the
3152
* lock again, which can lead to a deadlock if T1 is
3153
* given the lock instead of T3 in the example above.
3154
* Then, if T2 tries to regain the lock before T1
3155
* realizes that it has the lock.
3157
//static xtBool tab_get_lock_after_wait(XTThreadPtr thread, XTLockWaitPtr lw)
3159
// register XTTableHPtr tab = lw->lw_ot->ot_table;
3162
* I don't believe this lock is required. If it is, please explain why!!
3163
* XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[gl->lw_row_id % XT_ROW_RWLOCKS], thread);
3165
* With the old row lock implementation a XT_TAB_ROW_WRITE_LOCK was required because
3166
* the row locking did not have its own locks.
3167
* The new list locking has its own locks. I was using XT_TAB_ROW_READ_LOCK,
3168
* but i don't think this is required.
3170
// return tab->tab_locks.xt_set_temp_lock(lw->lw_ot, lw, &lw->lw_thread->st_lock_list);
3174
* NOTE: Previously this function did not gain the row lock.
3175
* If this change is a problem, please document why!
3176
* The previously implementation did wait until no lock was on the
3179
* I am thinking that it is simply a good idea to grab the lock,
3180
* instead of waiting for no lock, before the retry. But it could
3181
* result in locking more than required!
3183
static xtBool tab_wait_for_update(register XTOpenTablePtr ot, xtRowID row_id, xtXactID xn_id, XTThreadPtr thread)
3189
xw.xw_xn_id = xn_id;
3191
lw.lw_thread = thread;
3193
lw.lw_row_id = row_id;
3194
lw.lw_row_updated = FALSE;
3196
/* First try to get the lock: */
3197
if (!ot->ot_table->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list))
3199
if (lw.lw_curr_lock != XT_NO_LOCK)
3200
/* Wait for the lock, then the transaction: */
3201
ok = xt_xn_wait_for_xact(thread, &xw, &lw);
3203
/* Just wait for the transaction: */
3204
ok = xt_xn_wait_for_xact(thread, &xw, NULL);
3206
#ifdef DEBUG_LOCK_QUEUE
3207
ot->ot_table->tab_locks.rl_check(&lw);
3213
* XT_OLD - The record is old. No longer visible because there is
3214
* newer committed record before it in the record list.
3215
* This is a special case of FALSE (the record is not visible).
3216
* (see {WAIT-FOR} for details).
3217
* It is significant because if we find too many of these when
3218
* searching for records, then we have reason to believe the
3219
* sweeper is far behind. This can happen in a test like this:
3220
* runTest(INCREMENT_TEST, 2, INCREMENT_TEST_UPDATE_COUNT);
3221
* What happens is T1 detects an updated row by T2,
3222
* but T2 has not committed yet.
3223
* It waits for T2. T2 commits and updates again before T1
3226
* Of course if we got a lock on the row when T2 quits, then
3227
* this would not happen!
3231
* Is a record visible?
3232
* Returns TRUE, FALSE, XT_ERR.
3234
* TRUE - The record is visible.
3235
* FALSE - The record is not visible.
3236
* XT_ERR - An exception (error) occurred.
3237
* XT_NEW - The most recent variation of this row has been returned
3238
* and is to be used instead of the input!
3239
* XT_REREAD - Re-read the record, and try again.
3241
* Basically, a record is visible if it was committed on or before
3242
* the transactions "visible time" (st_visible_time), and there
3243
* are no other visible records before this record in the
3244
* variation chain for the record.
3246
* This holds in general, but you don't always get to see the
3247
* visible record (as defined in this sence).
3249
* On any kind of update (SELECT FOR UPDATE, UPDATE or DELETE), you
3250
* get to see the most recent variation of the row!
3252
* So on update, this function will wait if necessary for a recent
3253
* update to be committed.
3255
* So an update is a kind of "committed read" with a wait for
3256
* uncommitted records.
3259
* - INSERTS may not seen by the update read, depending on when
3261
* - Records may be returned in non-index order.
3262
* - New records returned must be checked again by an index scan
3263
* to make sure they conform to the condition!
3265
* CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20),
3266
* index(Value, Name)) ENGINE=pbxt;
3267
* INSERT test_tab values(4, 2, 'D');
3268
* INSERT test_tab values(5, 2, 'E');
3269
* INSERT test_tab values(6, 2, 'F');
3270
* INSERT test_tab values(7, 2, 'G');
3274
* select * from test_tab where id = 6 for update;
3277
* select * from test_tab where value = 2 order by value, name for update;
3279
* update test_tab set Name = 'A' where id = 7;
3282
* Result order D, E, F, A.
3284
* But Jim does it like this, so it should be OK.
3286
static int tab_visible(register XTOpenTablePtr ot, XTTabRecHeadDPtr rec_head, xtRecordID *new_rec_id)
3288
XTThreadPtr thread = ot->ot_thread;
3290
XTTabRecHeadDRec var_head;
3292
xtRecordID var_rec_id;
3293
register XTTableHPtr tab;
3294
xtBool wait = FALSE;
3295
xtXactID wait_xn_id = 0;
3296
#ifdef TRACE_VARIATIONS
3302
xtRecordID invalid_rec;
3305
/* It can be that between the time that I read the index,
3306
* and the time that I try to access the
3307
* record, that the record is removed by
3310
if (XT_REC_NOT_VALID(rec_head->tr_rec_type_1))
3313
row_id = XT_GET_DISK_4(rec_head->tr_row_id_4);
3315
/* This can happen if the row has been removed, and
3318
if (ot->ot_curr_row_id && row_id != ot->ot_curr_row_id)
3321
#ifdef TRACE_VARIATIONS
3322
len = sprintf(t_buf, "row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
3324
if (!(rec_clean = XT_REC_IS_CLEAN(rec_head->tr_rec_type_1))) {
3325
/* The record is not clean, which means it has not been swept.
3326
* So we have to check if it is visible.
3328
xn_id = XT_GET_DISK_4(rec_head->tr_xact_id_4);
3329
switch (xt_xn_status(ot, xn_id, ot->ot_curr_rec_id)) {
3332
case XT_XN_NOT_VISIBLE:
3333
if (ot->ot_for_update) {
3334
/* It is visible, only if it is an insert,
3335
* which means if has no previous variation.
3336
* Note, if an insert is updated, the record
3337
* should be overwritten (TODO - check this).
3339
var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
3342
#ifdef TRACE_VARIATIONS
3344
len += sprintf(t_buf+len, "OTHER COMMIT (OVERWRITTEN) T%d\n", (int) xn_id);
3345
xt_ttracef(thread, "%s", t_buf);
3348
#ifdef TRACE_VARIATIONS
3351
len += sprintf(t_buf+len, "OTHER COMMIT T%d\n", (int) xn_id);
3352
xt_ttracef(thread, "%s", t_buf);
3356
* The record is not visible, although it has been committed.
3357
* Clean the transaction ASAP.
3359
ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3363
* Reading an aborted record, this transaction
3364
* must be cleaned up ASAP!
3366
ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3367
#ifdef TRACE_VARIATIONS
3369
len += sprintf(t_buf+len, "ABORTED T%d\n", (int) xn_id);
3370
xt_ttracef(thread, "%s", t_buf);
3373
case XT_XN_MY_UPDATE:
3374
/* This is a record written by this transaction. */
3375
if (thread->st_is_update) {
3376
/* Check that it was not written by the current update statement: */
3377
if (XT_STAT_ID_MASK(ot->ot_update_id) == rec_head->tr_stat_id_1) {
3378
#ifdef TRACE_VARIATIONS
3380
len += sprintf(t_buf+len, "MY UPDATE IN THIS STATEMENT T%d\n", (int) xn_id);
3381
xt_ttracef(thread, "%s", t_buf);
3386
ot->ot_curr_row_id = row_id;
3387
ot->ot_curr_updated = TRUE;
3388
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3390
/* It is visible if it is at the front of the list.
3391
* An update can end up not being at the front of the list
3392
* if it is deleted afterwards!
3394
#ifdef TRACE_VARIATIONS
3396
if (var_rec_id == ot->ot_curr_rec_id)
3397
len += sprintf(t_buf+len, "MY UPDATE T%d\n", (int) xn_id);
3399
len += sprintf(t_buf+len, "MY UPDATE (OVERWRITTEN) T%d\n", (int) xn_id);
3401
xt_ttracef(thread, "%s", t_buf);
3403
return var_rec_id == ot->ot_curr_rec_id;
3404
case XT_XN_OTHER_UPDATE:
3405
if (ot->ot_for_update) {
3406
/* If this is an insert, we are interested!
3407
* Updated values are handled below. This is because
3408
* the changed (new) records returned below are always
3409
* followed (in the version chain) by the record
3410
* we would have returned (if nothing had changed).
3412
* As a result, we only return records here which have
3415
var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
3417
#ifdef TRACE_VARIATIONS
3419
len += sprintf(t_buf+len, "OTHER INSERT (WAIT FOR) T%d\n", (int) xn_id);
3420
xt_ttracef(thread, "%s", t_buf);
3422
if (!tab_wait_for_update(ot, row_id, xn_id, thread))
3424
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
3426
rec_head = &var_head;
3430
#ifdef TRACE_VARIATIONS
3432
len += sprintf(t_buf+len, "OTHER UPDATE T%d\n", (int) xn_id);
3433
xt_ttracef(thread, "%s", t_buf);
3437
#ifdef TRACE_VARIATIONS
3439
len += sprintf(t_buf+len, "REREAD?! T%d\n", (int) xn_id);
3440
xt_ttracef(thread, "%s", t_buf);
3446
/* Follow the variation chain until we come to this record.
3447
* If it is not the first visible variation then
3448
* it is not visible at all. If it in not found on the
3449
* variation chain, it is also not visible.
3455
#ifdef XT_USE_LIST_BASED_ROW_LOCKS
3456
/* The list based row locks used there own locks, so
3457
* it is not necessary to get a write lock here.
3459
XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3461
if (ot->ot_for_update)
3462
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3464
XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3469
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3471
#ifdef TRACE_VARIATIONS
3472
len += sprintf(t_buf+len, "ROW=%d", (int) row_id);
3474
while (var_rec_id != ot->ot_curr_rec_id) {
3476
#ifdef TRACE_VARIATIONS
3477
xt_ttracef(thread, "row=%d rec=%d NOT VISI not found in list\n", (int) row_id, (int) ot->ot_curr_rec_id);
3481
if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
3483
#ifdef TRACE_VARIATIONS
3485
len += sprintf(t_buf+len, " -> %d(%d)", (int) var_rec_id, (int) var_head.tr_rec_type_1);
3487
/* All clean records are visible, by all transactions: */
3488
if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1)) {
3489
#ifdef TRACE_VARIATIONS
3490
xt_ttracef(thread, "row=%d rec=%d NOT VISI clean rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3494
if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) {
3495
#ifdef TRACE_VARIATIONS
3496
xt_ttracef(thread, "row=%d rec=%d NOT VISI free rec found?!\n", (int) row_id, (int) ot->ot_curr_rec_id);
3499
* After an analysis we came to conclusion that this situation is
3500
* possible and valid. It can happen if index scan and row deletion
3503
* Client Thread Sweeper
3504
* ------------- -------
3505
* 1. start index scan, lock the index file.
3506
* 2. start row deletion, wait for index lock
3507
* 3. unlock the index file, start search for
3508
* the valid version of the record
3509
* 4. delete the row, mark record as freed,
3510
* but not yet cleaned by sweeper
3511
* 5. observe the record being freed
3513
* after these steps we can get here, if the record was marked as free after
3514
* the tab_visible was entered by the scanning thread.
3517
if (invalid_rec != var_rec_id) {
3518
/* This was "var_rec_id = invalid_rec", caused an infinite loop (bug #310184!) */
3519
invalid_rec = var_rec_id;
3522
/* Assume end of list. */
3526
/* This can happen if the row has been removed, and
3529
if (row_id != XT_GET_DISK_4(var_head.tr_row_id_4))
3532
xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
3533
/* This variation is visibleif committed before this
3534
* transaction started, or updated by this transaction.
3536
* We now know that this is the valid variation for
3537
* this record (for this table) for this transaction!
3538
* This will not change, unless the transaction
3539
* updates the record (again).
3541
* So we can store this information as a hint, if
3542
* we see other variations belonging to this record,
3543
* then we can ignore them immediately!
3545
switch (xt_xn_status(ot, xn_id, var_rec_id)) {
3548
* We have encountered a record that has been overwritten, if the
3549
* record has not been cleaned, then the sweeper is too far
3553
ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3554
#ifdef TRACE_VARIATIONS
3555
xt_ttracef(thread, "row=%d rec=%d NOT VISI committed rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3558
case XT_XN_NOT_VISIBLE:
3559
if (ot->ot_for_update) {
3560
/* Substitute this record for the one we
3563
if (result == TRUE) {
3564
if (XT_REC_IS_DELETE(var_head.tr_rec_type_1))
3567
*new_rec_id = var_rec_id;
3574
/* Ignore the record, it will be removed. */
3576
case XT_XN_MY_UPDATE:
3577
#ifdef TRACE_VARIATIONS
3578
xt_ttracef(thread, "row=%d rec=%d NOT VISI my update found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3581
case XT_XN_OTHER_UPDATE:
3582
/* Wait for this update to commit or abort: */
3587
#ifdef TRACE_VARIATIONS
3589
len += sprintf(t_buf+len, "-T%d", (int) wait_xn_id);
3593
if (invalid_rec != var_rec_id) {
3594
invalid_rec = var_rec_id;
3597
/* Assume end of list. */
3598
#ifdef XT_CRASH_DEBUG
3599
/* Should not happen! */
3604
var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
3606
#ifdef TRACE_VARIATIONS
3608
sprintf(t_buf+len, " -> %d(%d)\n", (int) var_rec_id, (int) rec_head->tr_rec_type_1);
3610
sprintf(t_buf+len, " ...\n");
3611
//xt_ttracef(thread, "%s", t_buf);
3614
if (ot->ot_for_update) {
3619
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3620
#ifdef TRACE_VARIATIONS
3621
xt_ttracef(thread, "T%d WAIT FOR T%d (will retry)\n", (int) thread->st_xact_data->xd_start_xn_id, (int) wait_xn_id);
3623
if (!tab_wait_for_update(ot, row_id, wait_xn_id, thread))
3628
* Retry in order to try to avoid missing
3629
* any records that we should see in FOR UPDATE
3632
* We also want to take another look at the record
3633
* we just tried to read.
3635
* If it has been updated, then a new record has
3636
* been created. This will be detected when we
3637
* try to read it again, and XT_NEW will be returned.
3639
thread->st_statistics.st_retry_index_scan++;
3643
/* {ROW-LIST-LOCK} */
3644
lw.lw_thread = thread;
3646
lw.lw_row_id = row_id;
3647
lw.lw_row_updated = FALSE;
3648
ok = tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list);
3649
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3651
#ifdef DEBUG_LOCK_QUEUE
3652
ot->ot_table->tab_locks.rl_check(&lw);
3656
if (lw.lw_curr_lock != XT_NO_LOCK) {
3657
#ifdef TRACE_VARIATIONS
3658
xt_ttracef(thread, "T%d WAIT FOR LOCK(%s) T%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) lw.lw_curr_lock == XT_TEMP_LOCK ? "temp" : "perm", (int) xn_id);
3660
if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
3661
#ifdef DEBUG_LOCK_QUEUE
3662
ot->ot_table->tab_locks.rl_check(&lw);
3666
#ifdef DEBUG_LOCK_QUEUE
3667
ot->ot_table->tab_locks.rl_check(&lw);
3669
#ifdef TRACE_VARIATIONS
3670
len = sprintf(t_buf, "(retry): row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
3673
* Reset the result before we go down the list again, to make sure we
3674
* get the latest record!!
3677
thread->st_statistics.st_reread_record_list++;
3680
#ifdef DEBUG_LOCK_QUEUE
3681
ot->ot_table->tab_locks.rl_check(&lw);
3685
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3688
#ifdef TRACE_VARIATIONS
3689
if (result == XT_NEW)
3690
xt_ttracef(thread, "row=%d rec=%d RETURN NEW %d\n", (int) row_id, (int) ot->ot_curr_rec_id, (int) *new_rec_id);
3692
xt_ttracef(thread, "row=%d rec=%d VISIBLE\n", (int) row_id, (int) ot->ot_curr_rec_id);
3694
xt_ttracef(thread, "row=%d rec=%d RETURN NOT VISIBLE (NEW)\n", (int) row_id, (int) ot->ot_curr_rec_id);
3697
ot->ot_curr_row_id = row_id;
3698
ot->ot_curr_updated = FALSE;
3702
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3706
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3711
* Return TRUE if the record has been read, and is visible.
3712
* Return FALSE if the record is not visible.
3713
* Return XT_ERR if an error occurs.
3715
xtPublic int xt_tab_visible(XTOpenTablePtr ot)
3718
XTTabRecHeadDRec rec_head;
3719
xtRecordID new_rec_id;
3720
xtBool read_again = FALSE;
3723
if ((row_id = ot->ot_curr_row_id)) {
3724
/* Fast track, do a quick check.
3725
* Row ID is only set if this record has been committed,
3727
* Check if it is the first on the list!
3729
xtRecordID var_rec_id;
3732
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3734
if (ot->ot_curr_rec_id == var_rec_id) {
3736
if (ot->ot_for_update) {
3737
XTThreadPtr thread = ot->ot_thread;
3738
XTTableHPtr tab = ot->ot_table;
3741
/* {ROW-LIST-LOCK} */
3742
lw.lw_thread = thread;
3744
lw.lw_row_id = row_id;
3745
lw.lw_row_updated = FALSE;
3746
if (!tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list)) {
3747
#ifdef DEBUG_LOCK_QUEUE
3748
ot->ot_table->tab_locks.rl_check(&lw);
3752
if (lw.lw_curr_lock != XT_NO_LOCK) {
3753
if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
3754
#ifdef DEBUG_LOCK_QUEUE
3755
ot->ot_table->tab_locks.rl_check(&lw);
3759
#ifdef DEBUG_LOCK_QUEUE
3760
ot->ot_table->tab_locks.rl_check(&lw);
3764
#ifdef DEBUG_LOCK_QUEUE
3765
ot->ot_table->tab_locks.rl_check(&lw);
3773
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
3776
switch ((r = tab_visible(ot, &rec_head, &new_rec_id))) {
3778
ot->ot_curr_rec_id = new_rec_id;
3781
/* Avoid infinite loop: */
3783
/* Should not happen! */
3784
#ifdef XT_CRASH_DEBUG
3785
/* Generate a core dump! */
3799
* Read a record, and return one of the following:
3800
* TRUE - the record has been read, and is visible.
3801
* FALSE - the record is not visible.
3802
* XT_ERR - an error occurs.
3803
* XT_NEW - Means the expected record has been changed.
3804
* When doing an index scan, the conditions must be checked again!
3806
xtPublic int xt_tab_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
3808
register XTTableHPtr tab = ot->ot_table;
3809
size_t rec_size = tab->tab_dic.dic_rec_size;
3810
xtRecordID new_rec_id;
3812
xtBool read_again = FALSE;
3814
if (!(ot->ot_thread->st_xact_data)) {
3815
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
3820
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
3823
switch (tab_visible(ot, (XTTabRecHeadDPtr) ot->ot_row_rbuffer, &new_rec_id)) {
3829
if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
3831
ot->ot_curr_rec_id = new_rec_id;
3837
/* Avoid infinite loop: */
3839
/* Should not happen! */
3840
#ifdef XT_CRASH_DEBUG
3841
/* Generate a core dump! */
3853
if (ot->ot_rec_fixed)
3854
memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
3855
else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
3856
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
3860
u_int cols_req = ot->ot_cols_req;
3862
ASSERT_NS(cols_req);
3863
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
3864
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
3868
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
3879
* TRUE/OK - record was read.
3880
* FALSE/FAILED - An error occurred.
3882
xtPublic int xt_tab_dirty_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
3884
register XTTableHPtr tab = ot->ot_table;
3885
size_t rec_size = tab->tab_dic.dic_rec_size;
3887
if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
3890
if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
3891
/* Should not happen! */
3892
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_DELETED);
3896
ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
3897
ot->ot_curr_updated =
3898
(XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
3900
if (ot->ot_rec_fixed)
3901
memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
3902
else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
3903
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
3907
u_int cols_req = ot->ot_cols_req;
3909
ASSERT_NS(cols_req);
3910
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
3911
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
3915
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
3923
#ifdef XT_USE_ROW_REC_MMAP_FILES
3924
/* Loading into cache is not required,
3925
* Instead we copy the memory map to load the
3928
#define TAB_ROW_LOAD_CACHE FALSE
3930
#define TAB_ROW_LOAD_CACHE TRUE
3934
* Pull the entire row pointer file into memory.
3936
xtPublic void xt_tab_load_row_pointers(XTThreadPtr self, XTOpenTablePtr ot)
3938
XTTableHPtr tab = ot->ot_table;
3939
xtRecordID eof_rec_id = tab->tab_row_eof_id;
3941
xtWord1 *buffer = NULL;
3943
/* Check if there is enough cache: */
3944
usage = xt_tc_get_usage();
3945
if (xt_tc_get_high() > usage)
3946
usage = xt_tc_get_high();
3947
if (usage + ((xtInt8) eof_rec_id * (xtInt8) tab->tab_rows.tci_rec_size) < xt_tc_get_size()) {
3949
size_t poffset, tfer;
3950
off_t offset, end_offset;
3951
XTTabCachePagePtr page;
3953
end_offset = xt_row_id_to_row_offset(tab, eof_rec_id);
3955
while (rec_id < eof_rec_id) {
3956
if (!tab->tab_rows.xt_tc_get_page(ot->ot_row_file, rec_id, TAB_ROW_LOAD_CACHE, &page, &poffset, self))
3959
tab->tab_rows.xt_tc_release_page(ot->ot_row_file, page, self);
3964
buffer = (xtWord1 *) xt_malloc(self, tab->tab_rows.tci_page_size);
3965
offset = xt_row_id_to_row_offset(tab, rec_id);
3966
tfer = tab->tab_rows.tci_page_size;
3967
if (offset + (off_t) tfer > end_offset)
3968
tfer = (size_t) (end_offset - offset);
3969
XT_LOCK_MEMORY_PTR(buff_ptr, ot->ot_row_file, offset, tfer, &self->st_statistics.st_rec, self);
3971
memcpy(buffer, buff_ptr, tfer);
3972
XT_UNLOCK_MEMORY_PTR(ot->ot_row_file, buff_ptr, self);
3975
rec_id += tab->tab_rows.tci_rows_per_page;
3978
xt_free(self, buffer);
3982
xtPublic void xt_tab_load_table(XTThreadPtr self, XTOpenTablePtr ot)
3984
xt_load_pages(self, ot);
3985
xt_load_indices(self, ot);
3988
xtPublic xtBool xt_tab_load_record(register XTOpenTablePtr ot, xtRecordID rec_id, XTInfoBufferPtr rec_buf)
3990
register XTTableHPtr tab = ot->ot_table;
3991
size_t rec_size = tab->tab_dic.dic_rec_size;
3993
if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
3996
if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
3997
/* Should not happen! */
3998
XTThreadPtr self = ot->ot_thread;
4000
xt_log(XT_WARNING, "Recently updated record invalid\n");
4004
ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
4005
ot->ot_curr_updated =
4006
(XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
4008
if (ot->ot_rec_fixed) {
4009
size_t size = rec_size - XT_REC_FIX_HEADER_SIZE;
4010
if (!xt_ib_alloc(NULL, rec_buf, size))
4012
memcpy(rec_buf->ib_db.db_data, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, size);
4015
if (!xt_ib_alloc(NULL, rec_buf, tab->tab_dic.dic_mysql_buf_size))
4017
if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
4018
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_buf->ib_db.db_data, ot->ot_cols_req))
4022
u_int cols_req = ot->ot_cols_req;
4024
ASSERT_NS(cols_req);
4025
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
4026
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_buf->ib_db.db_data, cols_req))
4030
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, rec_buf->ib_db.db_data, cols_req))
4039
xtPublic xtBool xt_tab_free_row(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
4041
XTTabRowRefDRec free_row;
4045
ASSERT_NS(row_id); // Cannot free the header!
4047
xt_lock_mutex_ns(&tab->tab_row_lock);
4048
prev_row = tab->tab_row_free_id;
4049
XT_SET_DISK_4(free_row.rr_ref_id_4, prev_row);
4050
if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, &op_seq, TRUE, ot->ot_thread)) {
4051
xt_unlock_mutex_ns(&tab->tab_row_lock);
4054
tab->tab_row_free_id = row_id;
4055
tab->tab_row_fnum++;
4056
xt_unlock_mutex_ns(&tab->tab_row_lock);
4058
if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_ROW_FREED, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, ot->ot_thread))
4064
static void tab_free_ext_record_on_fail(XTOpenTablePtr ot, xtRecordID rec_id, XTTabRecExtDPtr ext_rec, xtBool log_err)
4066
xtWord4 log_over_size = XT_GET_DISK_4(ext_rec->re_log_dat_siz_4);
4068
xtLogOffset log_offset;
4070
XT_GET_LOG_REF(log_id, log_offset, ext_rec);
4072
if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
4073
xt_tab_free_ext_slot(ot->ot_table, log_id, log_offset, log_over_size);
4075
if (!ot->ot_thread->st_dlog_buf.dlb_delete_log(log_id, log_offset, log_over_size, ot->ot_table->tab_id, rec_id, ot->ot_thread)) {
4077
xt_log_and_clear_exception_ns();
4082
static void tab_save_exception(XTExceptionPtr e)
4084
XTThreadPtr self = xt_get_self();
4086
*e = self->t_exception;
4089
static void tab_restore_exception(XTExceptionPtr e)
4091
XTThreadPtr self = xt_get_self();
4093
self->t_exception = *e;
4097
* This function assumes that a record may be partially written.
4098
* It removes all associated data and references to the record.
4100
* This function return XT_ERR if an error occurs.
4101
* TRUE if the record has been removed, and may be freed.
4102
* FALSE if the record has already been freed.
4105
xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 *rec_data, xtRecordID *prev_var_id, xtBool clean_delete, xtRowID row_id, xtXactID XT_UNUSED(xn_id))
4107
register XTTableHPtr tab = ot->ot_table;
4109
xtWord1 old_rec_type;
4111
u_int cols_in_buffer;
4119
* NOTE: This function uses the read buffer. This should be OK because
4120
* the function is only called by the sweeper. The read buffer
4121
* is REQUIRED because of the call to xt_tab_load_ext_data()!!!
4123
rec_size = tab->tab_dic.dic_rec_size;
4124
if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
4126
old_rec_type = ot->ot_row_rbuffer[0];
4128
/* Check of the record has not already been freed: */
4129
if (XT_REC_IS_FREE(old_rec_type))
4132
/* This record must belong to the given row: */
4133
if (XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_row_id_4) != row_id)
4136
/* The transaction ID of the record must be BEFORE or equal to the given
4139
* No, this does not always hold. Because we wait for updates now,
4140
* a "younger" transaction can update before an older
4142
* Commit order determined the actual order in which the transactions
4143
* should be replicated. This is determined by the log number of
4144
* the commit record!
4145
if (db->db_xn_curr_id(xn_id, XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_xact_id_4)))
4149
*prev_var_id = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_prev_rec_id_4);
4151
if (tab->tab_dic.dic_key_count) {
4154
switch (old_rec_type) {
4155
case XT_TAB_STATUS_DELETE:
4156
case XT_TAB_STATUS_DEL_CLEAN:
4157
rec_size = sizeof(XTTabRecHeadDRec);
4159
case XT_TAB_STATUS_FIXED:
4160
case XT_TAB_STATUS_FIX_CLEAN:
4161
/* We know that for a fixed length record,
4162
* dic_ind_rec_len <= dic_rec_size! */
4163
rec_size = (size_t) tab->tab_dic.dic_ind_rec_len + XT_REC_FIX_HEADER_SIZE;
4164
rec_data = ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE;
4166
case XT_TAB_STATUS_VARIABLE:
4167
case XT_TAB_STATUS_VAR_CLEAN:
4168
cols_req = tab->tab_dic.dic_ind_cols_req;
4170
cols_in_buffer = cols_req;
4171
rec_size = myxt_load_row_length(ot, rec_size - XT_REC_FIX_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, &cols_in_buffer);
4172
if (cols_in_buffer < cols_req)
4173
rec_size = tab->tab_dic.dic_rec_size;
4175
rec_size += XT_REC_FIX_HEADER_SIZE;
4176
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_data, cols_req)) {
4177
xt_log_and_clear_exception_ns();
4181
case XT_TAB_STATUS_EXT_DLOG:
4182
case XT_TAB_STATUS_EXT_CLEAN:
4183
cols_req = tab->tab_dic.dic_ind_cols_req;
4185
ASSERT_NS(cols_req);
4186
cols_in_buffer = cols_req;
4187
rec_size = myxt_load_row_length(ot, rec_size - XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, &cols_in_buffer);
4188
if (cols_in_buffer < cols_req) {
4189
rec_size = tab->tab_dic.dic_rec_size;
4190
if (!xt_tab_load_ext_data(ot, rec_id, rec_data, cols_req)) {
4191
/* This is actually quite possible after recovery, see [(3)] */
4192
if (ot->ot_thread->t_exception.e_xt_err != XT_ERR_BAD_EXT_RECORD &&
4193
ot->ot_thread->t_exception.e_xt_err != XT_ERR_DATA_LOG_NOT_FOUND)
4194
xt_log_and_clear_exception_ns();
4199
/* All the records we require are in the buffer... */
4200
rec_size += XT_REC_EXT_HEADER_SIZE;
4201
if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_data, cols_req)) {
4202
xt_log_and_clear_exception_ns();
4211
/* Could this be the case?: This change may only be flushed after the
4212
* operation below has been flushed to the log.
4214
* No, remove records are never "undone". The sweeper will delete
4215
* the record again if it does not land in the log.
4217
* The fact that the index entries have already been removed is not
4220
if (!tab->tab_dic.dic_disable_index) {
4221
ind = tab->tab_dic.dic_keys;
4222
for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
4223
if (!xt_idx_delete(ot, *ind, rec_id, rec_data))
4224
xt_log_and_clear_exception_ns();
4230
switch (old_rec_type) {
4231
case XT_TAB_STATUS_DELETE:
4232
case XT_TAB_STATUS_DEL_CLEAN:
4233
rec_size = XT_REC_FIX_HEADER_SIZE;
4235
case XT_TAB_STATUS_FIXED:
4236
case XT_TAB_STATUS_FIX_CLEAN:
4237
case XT_TAB_STATUS_VARIABLE:
4238
case XT_TAB_STATUS_VAR_CLEAN:
4239
rec_size = XT_REC_FIX_HEADER_SIZE;
4241
case XT_TAB_STATUS_EXT_DLOG:
4242
case XT_TAB_STATUS_EXT_CLEAN:
4243
rec_size = XT_REC_EXT_HEADER_SIZE;
4249
if (XT_REC_IS_EXT_DLOG(old_rec_type)) {
4250
/* {LOCK-EXT-REC} Lock, and read again to make sure that the
4251
* compactor does not change this record, while
4252
* we are removing it! */
4253
xt_lock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4254
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer)) {
4255
xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4258
xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4263
/* A record is "clean" deleted if the record was
4264
* XT_TAB_STATUS_DELETE which was comitted.
4265
* This makes sure that the record will still invalidate
4266
* following records in a row.
4270
* 1. INSERT A ROW, then DELETE it, assume the sweeper is delayed.
4272
* We now have the sequence row X --> del rec A --> valid rec B.
4274
* 2. A SELECT can still find B. Assume it now goes to check
4275
* if the record is valid, it reads row X, and gets A.
4277
* 3. Now the sweeper gets control and removes X, A and B.
4278
* It frees A with the clean bit.
4280
* 4. Now the SELECT gets control and reads A. Normally a freed record
4281
* would be ignored, and it would go onto B, which would then
4282
* be considered valid (note, even after the free, the next
4283
* pointer is not affected).
4285
* However, because the clean bit has been set, it will stop at A
4286
* and consider B invalid (which is the desired result).
4288
* NOTE: We assume it is not possible for A to be allocated and refer
4289
* to B, because B is freed before A. This means that B may refer to
4290
* A after the next allocation.
4294
XTTabRecFreeDPtr free_rec = (XTTabRecFreeDPtr) ot->ot_row_rbuffer;
4295
xtRecordID prev_rec_id;
4296
xtWord1 new_rec_type = XT_TAB_STATUS_FREED | (clean_delete ? XT_TAB_STATUS_CLEANED_BIT : 0);
4297
u_int status = XT_LOG_ENT_REC_REMOVED_BI;
4299
xt_lock_mutex_ns(&tab->tab_rec_lock);
4300
free_rec->rf_rec_type_1 = new_rec_type;
4301
#ifdef XT_CLUSTER_FREE_RECORDS
4302
XTTabCachePagePtr page;
4305
if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
4306
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4310
if (page->tcp_free_rec == 0xFFFF) {
4311
/* There is no free record on this page. */
4312
prev_rec_id = tab->tab_rec_free_id;
4313
XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
4314
memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
4315
tab->tab_rec_free_id = rec_id;
4318
XTTabRecFreeDPtr prev_free_rec = (XTTabRecFreeDPtr) (page->tcp_data + page->tcp_free_rec);
4320
status = XT_LOG_ENT_REC_REMOVED_BI_L;
4321
XT_COPY_DISK_4(free_rec->rf_next_rec_id_4, prev_free_rec->rf_next_rec_id_4);
4322
memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
4324
/* The previous now references the next: */
4325
XT_SET_DISK_4(prev_free_rec->rf_next_rec_id_4, rec_id);
4327
/* This is the record ID of the previous record: */
4328
ASSERT_NS((page->tcp_free_rec % tab->tab_recs.tci_rec_size) == 0);
4329
prev_rec_id = (page->tcp_page_idx * tab->tab_recs.tci_rows_per_page) + (page->tcp_free_rec / tab->tab_recs.tci_rec_size) + 1;
4330
ASSERT_NS(prev_rec_id != rec_id);
4333
/* Link after this page in future! */
4334
ASSERT_NS((offset % tab->tab_recs.tci_rec_size) == 0);
4335
page->tcp_free_rec = offset;
4336
tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
4338
prev_rec_id = tab->tab_rec_free_id;
4339
XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
4340
if (!xt_tab_put_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), ot->ot_row_rbuffer, &op_seq)) {
4341
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4344
tab->tab_rec_free_id = rec_id;
4345
ASSERT_NS(tab->tab_rec_free_id < tab->tab_rec_eof_id);
4347
tab->tab_rec_fnum++;
4348
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4350
free_rec->rf_rec_type_1 = old_rec_type;
4351
return xt_xlog_modify_table(tab->tab_id, status, op_seq, new_rec_type, prev_rec_id, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread);
4354
static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab)
4358
xtRowID next_row_id = 0;
4361
xt_lock_mutex_ns(&tab->tab_row_lock);
4362
if ((row_id = tab->tab_row_free_id)) {
4363
status = XT_LOG_ENT_ROW_NEW_FL;
4365
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) {
4366
xt_unlock_mutex_ns(&tab->tab_row_lock);
4369
tab->tab_row_free_id = next_row_id;
4370
tab->tab_row_fnum--;
4373
status = XT_LOG_ENT_ROW_NEW;
4374
row_id = tab->tab_row_eof_id;
4375
if (row_id == 0xFFFFFFFF) {
4376
xt_unlock_mutex_ns(&tab->tab_row_lock);
4377
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_MAX_ROW_COUNT);
4380
if (((row_id - 1) % tab->tab_rows.tci_rows_per_page) == 0) {
4381
/* By fetching the page now, we avoid reading it later... */
4382
XTTabCachePagePtr page;
4383
XTTabCacheSegPtr seg;
4386
if (!tab->tab_rows.tc_fetch(ot->ot_row_file, row_id, &seg, &page, &poffset, FALSE, ot->ot_thread)) {
4387
xt_unlock_mutex_ns(&tab->tab_row_lock);
4390
TAB_CAC_UNLOCK(&seg->tcs_lock, ot->ot_thread->t_id);
4392
tab->tab_row_eof_id++;
4394
op_seq = tab->tab_seq.ts_get_op_seq();
4395
xt_unlock_mutex_ns(&tab->tab_row_lock);
4397
if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_row_id, row_id, 0, NULL, ot->ot_thread))
4400
XT_DISABLED_TRACE(("new row tx=%d row=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) row_id));
4405
xtPublic xtBool xt_tab_get_row(register XTOpenTablePtr ot, xtRowID row_id, xtRecordID *var_rec_id)
4407
register XTTableHPtr tab = ot->ot_table;
4409
(void) ASSERT_NS(sizeof(XTTabRowRefDRec) == 4);
4411
if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, var_rec_id, ot->ot_thread))
4416
xtPublic xtBool xt_tab_set_row(XTOpenTablePtr ot, u_int status, xtRowID row_id, xtRecordID var_rec_id)
4418
register XTTableHPtr tab = ot->ot_table;
4419
XTTabRowRefDRec row_buf;
4422
ASSERT_NS(var_rec_id < tab->tab_rec_eof_id);
4423
XT_SET_DISK_4(row_buf.rr_ref_id_4, var_rec_id);
4425
if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, &op_seq, TRUE, ot->ot_thread))
4428
return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, ot->ot_thread);
4431
static void tab_free_row_on_fail(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
4435
tab_save_exception(&e);
4436
xt_tab_free_row(ot, tab, row_id);
4437
tab_restore_exception(&e);
4440
#ifdef CHECK_CLUSTER_EFFICIENCY
4441
u_int next_on_page = 0;
4442
u_int next_off_page = 0;
4445
static xtBool tab_write_ext_record(XTOpenTablePtr XT_UNUSED(ot), XTTableHPtr tab, XTTabRecInfoPtr rec_info, xtRecordID rec_id, xtLogID log_id, xtLogOffset log_offset, XTThreadPtr thread)
4447
xtWord1 tmp_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
4450
memcpy(tmp_buffer, rec_info->ri_log_buf, sizeof(tmp_buffer));
4451
rec_info->ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
4452
XT_SET_DISK_4(rec_info->ri_log_buf->er_data_size_4, rec_info->ri_log_data_size);
4453
XT_SET_DISK_4(rec_info->ri_log_buf->er_tab_id_4, tab->tab_id);
4454
XT_SET_DISK_4(rec_info->ri_log_buf->er_rec_id_4, rec_id);
4455
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
4456
ok = xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf);
4458
ok = thread->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf, thread);
4459
memcpy(rec_info->ri_log_buf, tmp_buffer, sizeof(tmp_buffer));
4463
static xtBool tab_add_record(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, u_int status)
4465
register XTTableHPtr tab = ot->ot_table;
4466
XTThreadPtr thread = ot->ot_thread;
4469
xtLogOffset log_offset;
4471
xtRecordID next_rec_id = 0;
4473
if (rec_info->ri_ext_rec) {
4474
/* Determine where the overflow will go... */
4475
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
4476
if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
4480
if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), ot->ot_thread))
4483
XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
4486
/* Write the record to disk: */
4487
xt_lock_mutex_ns(&tab->tab_rec_lock);
4488
if ((rec_id = tab->tab_rec_free_id)) {
4489
ASSERT_NS(rec_id < tab->tab_rec_eof_id);
4490
#ifdef XT_CLUSTER_FREE_RECORDS
4491
XTTabCachePagePtr page;
4493
XTTabRecFreeDPtr free_block;
4495
if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
4496
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4500
/* Read the data from the old record: */
4501
free_block = (XTTabRecFreeDPtr) (page->tcp_data + offset);
4502
next_rec_id = XT_GET_DISK_4(free_block->rf_next_rec_id_4);
4504
#ifdef CHECK_CLUSTER_EFFICIENCY
4505
xtRecordID dbg_rec_id;
4507
dbg_rec_id = next_rec_id-1;
4508
if (page->tcp_page_idx == dbg_rec_id / tab->tab_recs.tci_rows_per_page)
4512
if ((next_on_page % 1000) == 0)
4513
printf("Next on: %d off: %d\n", next_on_page, next_off_page);
4516
if (page->tcp_free_rec == offset) {
4517
/* Adjust the free record: */
4518
xtRecordID tmp_rec_id;
4520
/* Check if the next record is on the same page: */
4521
tmp_rec_id = next_rec_id-1;
4522
if (page->tcp_page_idx == tmp_rec_id / tab->tab_recs.tci_rows_per_page)
4523
/* This is the next free record on this page: */
4524
page->tcp_free_rec = (xtWord2) ((tmp_rec_id % tab->tab_recs.tci_rows_per_page) * tab->tab_recs.tci_rec_size);
4526
/* Not on the same page, so there are no more free records on this page: */
4527
page->tcp_free_rec = 0xFFFF;
4530
/* Write the new record: */
4531
memcpy(free_block, rec_info->ri_fix_rec_buf, rec_info->ri_rec_buf_size);
4532
tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
4534
XTTabRecFreeDRec free_block;
4536
if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), (xtWord1 *) &free_block)) {
4537
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4540
next_rec_id = XT_GET_DISK_4(free_block.rf_next_rec_id_4);
4541
if (!xt_tab_put_rec_data(ot, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq)) {
4542
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4546
tab->tab_rec_free_id = next_rec_id;
4547
tab->tab_rec_fnum--;
4549
/* XT_LOG_ENT_UPDATE --> XT_LOG_ENT_UPDATE_FL */
4550
/* XT_LOG_ENT_INSERT --> XT_LOG_ENT_INSERT_FL */
4551
/* XT_LOG_ENT_DELETE --> XT_LOG_ENT_DELETE_FL */
4557
rec_id = tab->tab_rec_eof_id;
4558
tab->tab_rec_eof_id++;
4560
/* If we are writing to a new page (at the EOF)
4561
* then we do not need to read the page from the
4562
* file because it is new.
4564
* Note that this only works because we are holding
4565
* a lock on the record file.
4567
read = ((rec_id - 1) % tab->tab_recs.tci_rows_per_page) != 0;
4569
if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq, read, thread)) {
4570
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4574
xt_unlock_mutex_ns(&tab->tab_rec_lock);
4576
if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_rec_id, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, thread))
4579
if (rec_info->ri_ext_rec) {
4580
/* Write the log buffer overflow: */
4581
if (!tab_write_ext_record(ot, tab, rec_info, rec_id, log_id, log_offset, thread))
4585
XT_DISABLED_TRACE(("new rec tx=%d val=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) rec_id));
4586
rec_info->ri_rec_id = rec_id;
4590
static void tab_delete_record_on_fail(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, XTTabRecHeadDPtr XT_UNUSED(row_ptr), xtWord1 *rec_data, u_int key_count)
4593
xtBool log_err = TRUE;
4594
XTTabRecInfoRec rec_info;
4596
tab_save_exception(&e);
4598
if (e.e_xt_err == XT_ERR_DUPLICATE_KEY ||
4599
e.e_xt_err == XT_ERR_DUPLICATE_FKEY) {
4600
/* If the error does not cause rollback, then we will ignore the
4601
* error if an error occurs in the UNDO!
4604
tab_restore_exception(&e);
4609
ind = ot->ot_table->tab_dic.dic_keys;
4610
for (u_int i=0; i<key_count; i++, ind++) {
4611
if (!xt_idx_delete(ot, *ind, rec_id, rec_data)) {
4613
xt_log_and_clear_exception_ns();
4618
/* This is not required because the extended record will be free
4619
* later when the record is freed!
4620
if (row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_DLOG || row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_CLEAN)
4621
tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) row_ptr, log_err);
4624
rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
4625
rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
4626
rec_info.ri_ext_rec = NULL;
4627
rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
4628
rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
4629
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
4630
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, rec_id);
4631
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, ot->ot_thread->st_xact_data->xd_start_xn_id);
4633
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
4636
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
4640
tab_restore_exception(&e);
4645
xt_log_and_clear_exception_ns();
4647
tab_restore_exception(&e);
4651
* Wait until all the variations between the start of the chain, and
4652
* the given record have been rolled-back.
4653
* If any is committed, register a locked error, and return FAILED.
4655
static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordID commit_rec_id)
4657
register XTTableHPtr tab = ot->ot_table;
4658
xtRecordID var_rec_id;
4659
XTTabRecHeadDRec var_head;
4661
xtRecordID invalid_rec = 0;
4665
if (!xt_tab_get_row(ot, row_id, &var_rec_id))
4668
while (var_rec_id != commit_rec_id) {
4671
if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
4673
if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1))
4675
if (XT_REC_IS_FREE(var_head.tr_rec_type_1))
4676
/* Should not happen: */
4677
goto record_invalid;
4678
xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
4679
switch (xt_xn_status(ot, xn_id, var_rec_id)) {
4681
case XT_XN_NOT_VISIBLE:
4684
/* Ingore the record, it will be removed. */
4686
case XT_XN_MY_UPDATE:
4687
/* Should not happen: */
4689
case XT_XN_OTHER_UPDATE:
4690
/* Wait for the transaction to commit or rollback: */
4691
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4692
xw.xw_xn_id = xn_id;
4693
if (!xt_xn_wait_for_xact(ot->ot_thread, &xw, NULL)) {
4694
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4697
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4700
goto record_invalid;
4702
var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
4707
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_CHANGED);
4711
/* Prevent an infinite loop due to a bad record: */
4712
if (invalid_rec != var_rec_id) {
4713
var_rec_id = invalid_rec;
4716
/* The record is invalid, it will be "overwritten"... */
4717
#ifdef XT_CRASH_DEBUG
4718
/* Should not happen! */
4724
/* Check if a record may be visible:
4725
* Return TRUE of the record may be visible now.
4726
* Return XT_MAYBE if the record may be visible in the future (set out_xn_id).
4727
* Return FALSE of the record is not valid (freed or is a delete record).
4728
* Return XT_ERR if an error occurred.
4730
xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXactID *out_xn_id, xtRowID *out_rowid, xtBool *out_updated)
4732
XTTabRecHeadDRec rec_head;
4733
xtXactID rec_xn_id = 0;
4734
xtBool wait = FALSE;
4735
xtXactID wait_xn_id = 0;
4737
xtRecordID var_rec_id;
4739
register XTTableHPtr tab;
4740
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4745
xtRecordID invalid_rec = 0;
4748
if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
4751
if (XT_REC_NOT_VALID(rec_head.tr_rec_type_1))
4754
if (!XT_REC_IS_CLEAN(rec_head.tr_rec_type_1)) {
4755
rec_xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
4756
switch (xt_xn_status(ot, rec_xn_id, rec_id)) {
4758
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4762
case XT_XN_NOT_VISIBLE:
4763
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4769
case XT_XN_MY_UPDATE:
4770
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4774
case XT_XN_OTHER_UPDATE:
4775
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4779
wait_xn_id = rec_xn_id;
4782
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4785
/* Avoid infinite loop: */
4786
if (invalid_rec == rec_id) {
4787
/* Should not happen! */
4788
#ifdef XT_CRASH_DEBUG
4789
/* Generate a core dump! */
4794
invalid_rec = rec_id;
4799
/* Follow the variation chain until we come to this record.
4800
* If it is not the first visible variation then
4801
* it is not visible at all. If it in not found on the
4802
* variation chain, it is also not visible.
4804
row_id = XT_GET_DISK_4(rec_head.tr_row_id_4);
4807
XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4811
if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
4813
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4814
len = sprintf(t_buf, "dup row=%d", (int) row_id);
4816
while (var_rec_id != rec_id) {
4819
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4821
len += sprintf(t_buf+len, " -> %d", (int) var_rec_id);
4823
if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
4825
/* All clean records are visible, by all transactions: */
4826
if (XT_REC_IS_CLEAN(rec_head.tr_rec_type_1))
4829
if (XT_REC_IS_FREE(rec_head.tr_rec_type_1)) {
4830
/* Should not happen: */
4831
if (invalid_rec != var_rec_id) {
4832
var_rec_id = invalid_rec;
4835
/* Assume end of list. */
4836
#ifdef XT_CRASH_DEBUG
4837
/* Should not happen! */
4843
xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
4844
switch (xt_xn_status(ot, xn_id, var_rec_id)) {
4846
case XT_XN_NOT_VISIBLE:
4849
/* Ingore the record, it will be removed. */
4850
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4852
len += sprintf(t_buf+len, "(T%d-A)", (int) xn_id);
4855
case XT_XN_MY_UPDATE:
4857
case XT_XN_OTHER_UPDATE:
4858
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4860
len += sprintf(t_buf+len, "(T%d-wait)", (int) xn_id);
4862
/* Wait for this update to commit or abort: */
4869
if (invalid_rec != var_rec_id) {
4870
var_rec_id = invalid_rec;
4873
/* Assume end of list. */
4874
#ifdef XT_CRASH_DEBUG
4875
/* Should not happen! */
4880
var_rec_id = XT_GET_DISK_4(rec_head.tr_prev_rec_id_4);
4882
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4884
sprintf(t_buf+len, " -> %d(T%d-%s)\n", (int) var_rec_id, (int) rec_xn_id, t_type);
4886
sprintf(t_buf+len, " ...(T%d-%s)\n", (int) rec_xn_id, t_type);
4889
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4891
*out_xn_id = wait_xn_id;
4894
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4895
xt_ttracef(thread, "%s", t_buf);
4898
*out_rowid = row_id;
4899
*out_updated = (rec_xn_id == ot->ot_thread->st_xact_data->xd_start_xn_id);
4904
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4908
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4912
xtPublic xtBool xt_tab_new_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
4914
register XTTableHPtr tab = ot->ot_table;
4915
register XTThreadPtr self = ot->ot_thread;
4916
XTTabRecInfoRec rec_info;
4921
/* A non-temporary table has been updated: */
4922
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
4923
self->st_non_temp_updated = TRUE;
4925
if (!myxt_store_row(ot, &rec_info, (char *) rec_buf))
4928
/* Get a new row ID: */
4929
if (!(row_id = tab_new_row(ot, tab)))
4932
rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
4933
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
4934
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, 0);
4935
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
4937
/* Note, it is important that this record is written BEFORE the row
4938
* due to the problem distributed here [(5)]
4940
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_INSERT))
4943
#ifdef TRACE_VARIATIONS
4944
xt_ttracef(self, "insert: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
4946
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
4948
XT_DISABLED_TRACE(("set new tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
4950
/* Add the index references: */
4951
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
4952
if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, rec_buf, NULL, FALSE)) {
4953
ot->ot_err_index_no = (*ind)->mi_index_no;
4958
/* Do the foreign key stuff: */
4959
if (ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
4960
if (!ot->ot_table->tab_dic.dic_table->insertRow(ot, rec_buf))
4964
self->st_statistics.st_row_insert++;
4968
/* Once the row has been inserted, it is to late to remove it!
4969
* Now all we can do is delete it!
4971
tab_delete_record_on_fail(ot, row_id, rec_info.ri_rec_id, (XTTabRecHeadDPtr) rec_info.ri_fix_rec_buf, rec_buf, idx_cnt);
4975
tab_free_row_on_fail(ot, tab, row_id);
4981
/* We cannot remove a change we have made to a row while a transaction
4982
* is running, so we have to undo what we have done by
4983
* overwriting the record we just created with
4986
static xtBool tab_overwrite_record_on_fail(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, xtWord1 *before_buf, xtWord1 *after_buf, u_int idx_cnt)
4988
register XTTableHPtr tab = ot->ot_table;
4989
XTTabRecHeadDRec prev_rec_head;
4992
XTThreadPtr thread = ot->ot_thread;
4994
xtLogOffset log_offset;
4995
xtRecordID rec_id = rec_info->ri_rec_id;
4997
/* Remove the new extended record: */
4998
if (rec_info->ri_ext_rec)
4999
tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info->ri_fix_rec_buf, TRUE);
5001
/* Undo index entries of the new record: */
5003
for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
5004
if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
5009
memcpy(&prev_rec_head, rec_info->ri_fix_rec_buf, sizeof(XTTabRecHeadDRec));
5012
/* Can happen if the delete was called from some cascaded action.
5013
* And this is better than a crash...
5015
* TODO: to make sure the change will not be applied in case the
5016
* transaction will be commited, we'd need to add a log entry to
5017
* restore the record like it's done for top-level operation. In
5018
* order to do this we'd need to read the before-image of the
5019
* record before modifying it.
5021
if (!thread->t_exception.e_xt_err)
5022
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_BEFORE_IMAGE);
5026
/* Restore the previous record! */
5027
if (!myxt_store_row(ot, rec_info, (char *) before_buf))
5030
memcpy(rec_info->ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
5032
if (rec_info->ri_ext_rec) {
5033
/* Determine where the overflow will go... */
5034
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5035
if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
5039
if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), thread))
5042
XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
5045
if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf))
5048
if (rec_info->ri_ext_rec) {
5049
/* Write the log buffer overflow: */
5050
if (!tab_write_ext_record(ot, tab, rec_info, rec_id, log_id, log_offset, thread))
5054
/* Put the index entries back: */
5055
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5056
if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
5057
/* Incomplete restore, there will be a rollback... */
5066
* If a transaction updates the same record over again, we should update
5067
* in place. This prevents producing unnecessary variations!
5069
static xtBool tab_overwrite_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
5071
register XTTableHPtr tab = ot->ot_table;
5072
xtRowID row_id = ot->ot_curr_row_id;
5073
register XTThreadPtr self = ot->ot_thread;
5074
xtRecordID rec_id = ot->ot_curr_rec_id;
5075
XTTabRecExtDRec prev_rec_head;
5076
XTTabRecInfoRec rec_info;
5077
u_int idx_cnt = 0, i;
5080
xtLogOffset log_offset;
5081
xtBool prev_ext_rec;
5083
/* A non-temporary table has been updated: */
5084
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5085
self->st_non_temp_updated = TRUE;
5087
if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
5090
/* Read before we overwrite! */
5091
if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &prev_rec_head))
5094
prev_ext_rec = prev_rec_head.tr_rec_type_1 & XT_TAB_STATUS_EXT_DLOG;
5096
if (rec_info.ri_ext_rec) {
5097
/* Determine where the overflow will go... */
5098
if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5099
if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size))
5103
if (!self->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, self))
5106
XT_SET_LOG_REF(rec_info.ri_ext_rec, log_id, log_offset);
5109
rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5110
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5111
XT_COPY_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, prev_rec_head.tr_prev_rec_id_4);
5112
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5114
/* Remove the index references, that have changed: */
5115
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5116
if (!xt_idx_delete(ot, *ind, rec_id, before_buf)) {
5121
#ifdef TRACE_VARIATIONS
5122
xt_ttracef(self, "overwrite: row=%d rec=%d T%d\n", (int) row_id, (int) rec_id, (int) self->st_xact_data->xd_start_xn_id);
5124
/* Overwrite the record: */
5125
if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
5128
if (rec_info.ri_ext_rec) {
5129
/* Write the log buffer overflow: */
5130
if (!tab_write_ext_record(ot, tab, &rec_info, rec_id, log_id, log_offset, self))
5134
/* Add the index references that have changed: */
5135
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5136
if (!xt_idx_insert(ot, *ind, 0, rec_id, after_buf, before_buf, FALSE)) {
5137
ot->ot_err_index_no = (*ind)->mi_index_no;
5142
/* Do the foreign key stuff: */
5143
if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5144
if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
5148
/* Delete the previous overflow area: */
5150
tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
5155
/* Remove the new extended record: */
5156
if (rec_info.ri_ext_rec)
5157
tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info.ri_fix_rec_buf, TRUE);
5159
/* Restore the previous record! */
5160
/* Undo index entries: */
5161
for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
5162
if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
5166
/* Restore the record: */
5167
if (!myxt_store_row(ot, &rec_info, (char *) before_buf))
5170
if (rec_info.ri_ext_rec)
5171
memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, XT_REC_EXT_HEADER_SIZE);
5173
memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
5175
if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
5178
/* Put the index entries back: */
5179
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5180
if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
5181
/* Incomplete restore, there will be a rollback... */
5185
/* The previous record has now been restored. */
5189
/* The old record is overwritten, I must free the previous extended record: */
5191
tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
5197
xtPublic xtBool xt_tab_update_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
5199
register XTTableHPtr tab;
5201
register XTThreadPtr self;
5202
xtRecordID curr_var_rec_id;
5203
XTTabRecInfoRec rec_info;
5208
* Originally only the flag ot->ot_curr_updated was checked, and if it was on, then
5209
* tab_overwrite_record() was called, but this caused crashes in some cases like:
5211
* set @@autocommit = 0;
5212
* create table t1 (s1 int primary key);
5213
* create table t2 (s1 int primary key, foreign key (s1) references t1 (s1) on update cascade);
5214
* insert into t1 values (1);
5215
* insert into t2 values (1);
5216
* update t1 set s1 = 1;
5218
* the last update lead to a crash on t2 cascade update because before_buf argument is NULL
5219
* in the call below. It is NULL only during cascade update of child table. In that case we
5220
* cannot pass before_buf value from XTDDTableRef::modifyRow as the before_buf is the original
5221
* row for the parent (t1) table and it would be used to update any existing indexes
5222
* in the child table which would be wrong of course.
5224
* Alternative solution would be to copy the after_info in the XTDDTableRef::modifyRow():
5227
* if (!xt_tab_load_record(ot, ot->ot_curr_rec_id, &after_info))
5231
* here the xt_tab_load_record() loads the original row, so we can copy it from there, but in
5232
* that case we'd need to allocate a new (possibly up to 65536 bytes long) buffer, which makes
5233
* the optimization questionable
5236
if (ot->ot_curr_updated && before_buf)
5237
/* This record has already been updated by this transaction.
5238
* Do the update in place!
5240
return tab_overwrite_record(ot, before_buf, after_buf);
5243
row_id = ot->ot_curr_row_id;
5244
self = ot->ot_thread;
5246
/* A non-temporary table has been updated: */
5247
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5248
self->st_non_temp_updated = TRUE;
5250
if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
5253
rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5254
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5255
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
5256
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5258
/* Create the new record: */
5259
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_UPDATE))
5262
/* Link the new variation into the list: */
5263
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
5265
if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
5268
if (curr_var_rec_id != ot->ot_curr_rec_id) {
5269
/* If the transaction does not rollback, I will get an
5272
if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
5274
/* [(4)] This is the situation when we overwrite the
5275
* reference to curr_var_rec_id!
5276
* When curr_var_rec_id is cleaned up by the sweeper, the
5277
* sweeper will notice that the record is no longer in
5282
#ifdef TRACE_VARIATIONS
5283
xt_ttracef(self, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
5285
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5287
XT_DISABLED_TRACE(("set upd tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5289
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
5291
/* Add the index references: */
5292
for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5293
if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, after_buf, before_buf, FALSE)) {
5294
ot->ot_err_index_no = (*ind)->mi_index_no;
5299
if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5300
if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
5304
self->st_statistics.st_row_update++;
5308
tab_overwrite_record_on_fail(ot, &rec_info, before_buf, after_buf, idx_cnt);
5312
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
5318
xtPublic xtBool xt_tab_delete_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
5320
register XTTableHPtr tab = ot->ot_table;
5321
register XTThreadPtr thread = ot->ot_thread;
5322
xtRowID row_id = ot->ot_curr_row_id;
5323
xtRecordID curr_var_rec_id;
5324
XTTabRecInfoRec rec_info;
5326
/* A non-temporary table has been updated: */
5327
if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5328
thread->st_non_temp_updated = TRUE;
5330
/* Setup a delete record: */
5331
rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
5332
rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
5333
rec_info.ri_ext_rec = NULL;
5334
rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
5335
rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
5336
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5337
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
5338
XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, thread->st_xact_data->xd_start_xn_id);
5340
if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
5343
XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
5345
if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
5348
if (curr_var_rec_id != ot->ot_curr_rec_id) {
5349
if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
5353
#ifdef TRACE_VARIATIONS
5354
xt_ttracef(thread, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) thread->st_xact_data->xd_start_xn_id);
5356
if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5358
XT_DISABLED_TRACE(("del row tx=%d row=%d rec=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5360
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
5362
if (ot->ot_table->tab_dic.dic_table->dt_trefs) {
5363
if (!ot->ot_table->tab_dic.dic_table->deleteRow(ot, rec_buf))
5367
thread->st_statistics.st_row_delete++;
5371
tab_overwrite_record_on_fail(ot, &rec_info, rec_buf, NULL, 0);
5375
XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
5379
xtPublic xtBool xt_tab_restrict_rows(XTBasicListPtr list, XTThreadPtr thread)
5382
XTRestrictItemPtr item;
5383
XTOpenTablePtr pot = NULL;
5384
XTDatabaseHPtr db = thread->st_database;
5387
for (i=0; i<list->bl_count; i++) {
5388
item = (XTRestrictItemPtr) xt_bl_item_at(list, i);
5391
if (pot->ot_table->tab_id == item->ri_tab_id)
5393
xt_db_return_table_to_pool_ns(pot);
5397
if (!xt_db_open_pool_table_ns(&pot, db, item->ri_tab_id)) {
5398
/* Should not happen, but just in case, we just don't
5399
* remove the lock. We will probably end up with a deadlock
5402
xt_log_and_clear_exception_ns();
5403
goto skip_check_action;
5406
/* Can happen of the table has been dropped: */
5407
goto skip_check_action;
5410
if (!pot->ot_table->tab_dic.dic_table->checkNoAction(pot, item->ri_rec_id)) {
5418
xt_db_return_table_to_pool_ns(pot);
5419
xt_bl_free(NULL, list);
5424
xtPublic xtBool xt_tab_seq_init(XTOpenTablePtr ot)
5426
register XTTableHPtr tab = ot->ot_table;
5428
ASSERT_NS(!ot->ot_seq_page);
5429
ot->ot_seq_page = NULL;
5430
ot->ot_seq_data = NULL;
5431
ot->ot_on_page = FALSE;
5432
ot->ot_seq_offset = 0;
5434
ot->ot_curr_rec_id = 0; // 0 is an invalid position!
5435
ot->ot_curr_row_id = 0; // 0 is an invalid row ID!
5436
ot->ot_curr_updated = FALSE;
5438
/* We note the current EOF before we start a sequential scan.
5439
* It is basically possible to update the same record more than
5440
* once because an updated record creates a new record which
5441
* has a new position which may be in the area that is
5442
* still to be scanned.
5444
* By noting the EOF before we start a sequential scan we
5445
* reduce the possibility of this.
5447
* However, the possibility still remains, but it should
5448
* not be a problem because a record is not modified
5449
* if there is nothing to change, which is the case
5450
* if the record has already been changed!
5452
* NOTE (2008-01-29) There is no longer a problem with updating a
5453
* record twice because records are marked by an update.
5455
* [(10)] I have changed this (see below). I now check the
5456
* current EOF of the table.
5458
* The reason is that committed read must be able to see the
5459
* changes that occur during table table scan. *
5461
ot->ot_seq_eof_id = tab->tab_rec_eof_id;
5463
if (!ot->ot_thread->st_xact_data) {
5464
/* MySQL ignores this error, so we
5465
* setup the sequential scan so that it will
5468
ot->ot_seq_rec_id = ot->ot_seq_eof_id;
5469
xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
5473
ot->ot_seq_rec_id = 1;
5474
ot->ot_thread->st_statistics.st_scan_table++;
5478
xtPublic void xt_tab_seq_reset(XTOpenTablePtr ot)
5480
ot->ot_seq_rec_id = 0;
5481
ot->ot_seq_eof_id = 0;
5482
ASSERT_NS(!ot->ot_seq_page);
5483
ot->ot_seq_page = NULL;
5484
ot->ot_seq_data = NULL;
5485
ot->ot_on_page = FALSE;
5486
ot->ot_seq_offset = 0;
5489
xtPublic void xt_tab_seq_exit(XTOpenTablePtr ot)
5491
register XTTableHPtr tab = ot->ot_table;
5493
if (ot->ot_seq_page) {
5494
tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5495
ot->ot_seq_page = NULL;
5497
if (ot->ot_seq_data) {
5498
XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
5499
ot->ot_seq_data = NULL;
5501
ot->ot_on_page = FALSE;
5504
#ifdef XT_USE_ROW_REC_MMAP_FILES
5505
#define TAB_SEQ_LOAD_CACHE FALSE
5507
#ifdef XT_SEQ_SCAN_LOADS_CACHE
5508
#define TAB_SEQ_LOAD_CACHE TRUE
5510
#define TAB_SEQ_LOAD_CACHE FALSE
5514
xtPublic void xt_tab_seq_repeat(XTOpenTablePtr ot)
5516
ot->ot_seq_rec_id--;
5517
ot->ot_seq_offset -= ot->ot_table->tab_dic.dic_rec_size;
5520
xtPublic xtBool xt_tab_seq_next(XTOpenTablePtr ot, xtWord1 *buffer, xtBool *eof)
5522
register XTTableHPtr tab = ot->ot_table;
5523
register size_t rec_size = tab->tab_dic.dic_rec_size;
5525
xtRecordID new_rec_id;
5526
xtRecordID invalid_rec = 0;
5529
if (!ot->ot_on_page) {
5530
ASSERT_NS(!ot->ot_seq_page);
5531
if (!(ot->ot_on_page = tab->tab_recs.xt_tc_get_page(ot->ot_rec_file, ot->ot_seq_rec_id, TAB_SEQ_LOAD_CACHE, &ot->ot_seq_page, &ot->ot_seq_offset, ot->ot_thread)))
5533
if (!ot->ot_seq_page) {
5534
XT_LOCK_MEMORY_PTR(ot->ot_seq_data, ot->ot_rec_file, xt_rec_id_to_rec_offset(tab, ot->ot_seq_rec_id), tab->tab_rows.tci_page_size, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread);
5535
if (!ot->ot_seq_data)
5537
ot->ot_on_page = TRUE;
5538
ot->ot_seq_offset = 0;
5543
/* [(10)] The current EOF is used: */
5544
if (ot->ot_seq_rec_id >= ot->ot_seq_eof_id) {
5549
if (ot->ot_seq_offset >= tab->tab_recs.tci_page_size) {
5550
if (ot->ot_seq_page) {
5551
tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5552
ot->ot_seq_page = NULL;
5554
if (ot->ot_seq_data) {
5555
/* NULL here means that in the case of non-memory mapped
5556
* files we "keep" the lock.
5558
XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
5559
ot->ot_seq_data = NULL;
5561
ot->ot_on_page = FALSE;
5565
if (ot->ot_seq_page)
5566
buff_ptr = ot->ot_seq_page->tcp_data + ot->ot_seq_offset;
5568
buff_ptr = ot->ot_seq_data + ot->ot_seq_offset;
5570
/* This is the current record: */
5571
ot->ot_curr_rec_id = ot->ot_seq_rec_id;
5572
ot->ot_curr_row_id = 0;
5574
/* Move to the next record: */
5575
ot->ot_seq_rec_id++;
5576
ot->ot_seq_offset += rec_size;
5579
switch (tab_visible(ot, (XTTabRecHeadDPtr) buff_ptr, &new_rec_id)) {
5585
buff_ptr = ot->ot_row_rbuffer;
5586
if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
5588
ot->ot_curr_rec_id = new_rec_id;
5593
if (invalid_rec != ot->ot_curr_rec_id) {
5594
/* Don't re-read for the same record twice: */
5595
invalid_rec = ot->ot_curr_rec_id;
5597
/* Undo move to next: */
5598
ot->ot_seq_rec_id--;
5599
ot->ot_seq_offset -= rec_size;
5601
/* Prepare to reread the page: */
5602
if (ot->ot_seq_page) {
5603
tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5604
ot->ot_seq_page = NULL;
5606
ot->ot_on_page = FALSE;
5609
#ifdef XT_CRASH_DEBUG
5610
/* Should not happen! */
5613
/* Continue, and skip the record... */
5620
switch (*buff_ptr) {
5621
case XT_TAB_STATUS_FIXED:
5622
case XT_TAB_STATUS_FIX_CLEAN:
5623
memcpy(buffer, buff_ptr + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
5625
case XT_TAB_STATUS_VARIABLE:
5626
case XT_TAB_STATUS_VAR_CLEAN:
5627
if (!myxt_load_row(ot, buff_ptr + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
5630
case XT_TAB_STATUS_EXT_DLOG:
5631
case XT_TAB_STATUS_EXT_CLEAN: {
5632
u_int cols_req = ot->ot_cols_req;
5634
ASSERT_NS(cols_req);
5635
if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
5636
if (!myxt_load_row(ot, buff_ptr + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
5640
if (buff_ptr != ot->ot_row_rbuffer)
5641
memcpy(ot->ot_row_rbuffer, buff_ptr, rec_size);
5642
if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
5659
* -----------------------------------------------------------------------
5667
static xtBool tab_exec_repair_pending(XTDatabaseHPtr db, int what, char *table_name)
5669
XTThreadPtr thread = xt_get_self();
5670
char file_path[PATH_MAX];
5671
XTOpenFilePtr of = NULL;
5673
char *buffer = NULL, *ptr, *name;
5675
xtBool found = FALSE;
5677
xt_strcpy(PATH_MAX, file_path, db->db_main_path);
5678
xt_add_pbxt_file(PATH_MAX, file_path, "repair-pending");
5680
if (what == REP_ADD) {
5681
if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 0))
5685
if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_DEFAULT | XT_FS_MISSING_OK, 0))
5691
len = (int) xt_seek_eof_file(NULL, of);
5693
if (!(buffer = (char *) xt_malloc_ns(len + 1)))
5696
if (!xt_pread_file(of, 0, len, len, buffer, NULL, &thread->st_statistics.st_x, thread))
5703
while (*ptr && *ptr != '\n' && *ptr != '\r')
5708
if (xt_tab_compare_names(name, table_name) == 0) {
5723
/* Remove any trailing empty lines: */
5725
if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
5730
if (!xt_pwrite_file(of, len, 1, (void *) "\n", &thread->st_statistics.st_x, thread))
5734
if (!xt_pwrite_file(of, len, strlen(table_name), table_name, &thread->st_statistics.st_x, thread))
5736
len += strlen(table_name);
5737
if (!xt_set_eof_file(NULL, of, len))
5745
memmove(name, ptr, len - (ptr - buffer));
5746
len = len - (ptr - name);
5748
/* Remove trailing empty lines: */
5750
if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
5756
if (!xt_pwrite_file(of, 0, len, buffer, &thread->st_statistics.st_x, thread))
5758
if (!xt_set_eof_file(NULL, of, len))
5765
xt_close_file_ns(of);
5769
xt_fs_delete(NULL, file_path);
5774
xt_close_file_ns(of);
5777
xt_log_and_clear_exception(thread);
5781
xtPublic void xt_tab_make_table_name(XTPathStrPtr tab_path, char *table_name, size_t size)
5785
nptr = xt_last_name_of_path(tab_path->ps_path);
5786
if (xt_starts_with(nptr, "#sql")) {
5787
/* {INVALID-OLD-TABLE-FIX}
5788
* Temporary files can have strange paths, for example
5789
* ..../var/tmp/mysqld.1/#sqldaec_1_6
5790
* This occurs, for example, occurs when the temp_table.test is
5791
* run using the PBXT suite in MariaDB:
5792
* ./mtr --suite=pbxt --do-test=temp_table
5794
* Calling myxt_static_convert_file_name, with a '.', in the name
5796
* [ERROR] Invalid (old?) table or database name 'mysqld.1'
5797
* To prevent this, we do not convert the temporary
5798
* table names using the mysql functions.
5800
* Note, this bug was found by Monty, and fixed by modifying
5801
* xt_2nd_last_name_of_path(), see {INVALID-OLD-TABLE-FIX}.
5804
xt_2nd_last_name_of_path(size, table_name, tab_path->ps_path);
5805
xt_strcat(size, table_name, ".");
5806
xt_strcat(size, table_name, nptr);
5809
char name_buf[XT_TABLE_NAME_SIZE*3+3];
5813
xt_2nd_last_name_of_path(sizeof(name_buf), name_buf, tab_path->ps_path);
5814
myxt_static_convert_file_name(name_buf, table_name, size);
5815
xt_strcat(size, table_name, ".");
5817
/* Handle partition extensions to table names: */
5818
if ((part_ptr = strstr(nptr, "#P#")))
5819
xt_strncpy(sizeof(name_buf), name_buf, nptr, part_ptr - nptr);
5821
xt_strcpy(sizeof(name_buf), name_buf, nptr);
5823
len = strlen(table_name);
5824
myxt_static_convert_file_name(name_buf, table_name + len, size - len);
5827
/* Add the partition extension (which is relevant to the engine). */
5831
if ((sub_part_ptr = strstr(part_ptr, "#SP#")))
5832
xt_strncpy(sizeof(name_buf), name_buf, part_ptr, sub_part_ptr - part_ptr);
5834
xt_strcpy(sizeof(name_buf), name_buf, part_ptr);
5836
xt_strcat(size, table_name, " (");
5837
len = strlen(table_name);
5838
myxt_static_convert_file_name(name_buf, table_name + len, size - len);
5843
xt_strcat(size, table_name, " - ");
5844
len = strlen(table_name);
5845
myxt_static_convert_file_name(sub_part_ptr, table_name + len, size - len);
5848
xt_strcat(size, table_name, ")");
5853
xtPublic xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab)
5855
char table_name[XT_TABLE_NAME_BUF_SIZE];
5857
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
5858
return tab_exec_repair_pending(tab->tab_db, REP_FIND, table_name);
5861
xtPublic void xt_tab_table_repaired(XTTableHPtr tab)
5863
if (tab->tab_repair_pending) {
5864
char table_name[XT_TABLE_NAME_BUF_SIZE];
5866
tab->tab_repair_pending = FALSE;
5867
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
5868
tab_exec_repair_pending(tab->tab_db, REP_DEL, table_name);
5872
xtPublic void xt_tab_set_table_repair_pending(XTTableHPtr tab)
5874
if (!tab->tab_repair_pending) {
5875
char table_name[XT_TABLE_NAME_BUF_SIZE];
5877
tab->tab_repair_pending = TRUE;
5878
xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
5879
tab_exec_repair_pending(tab->tab_db, REP_ADD, table_name);
5884
* -----------------------------------------------------------------------
5885
* EXTENDED DATA FOR RAM TABLES
5888
xtPublic xtBool xt_tab_get_ext_slot(XTTableHPtr tab, xtLogID *log_id, xtLogOffset *log_offset, size_t XT_UNUSED(req_size))
5892
xt_spinlock_lock(&tab->tab_mem_lock);
5893
if (tab->tab_mem_ind_free) {
5894
new_slot = tab->tab_mem_ind_free - 1;
5895
tab->tab_mem_ind_free = (size_t) tab->tab_mem_index[new_slot];
5898
if (tab->tab_mem_ind_usage == tab->tab_mem_ind_size) {
5899
/* Grow the index: */
5900
if (!xt_realloc_ns((void **) &tab->tab_mem_index, (tab->tab_mem_ind_size + 100) * sizeof(xtWord1 *)))
5902
tab->tab_mem_ind_size += 100;
5904
new_slot = tab->tab_mem_ind_usage;
5905
tab->tab_mem_ind_usage++;
5907
xt_spinlock_unlock(&tab->tab_mem_lock);
5908
tab->tab_mem_index[new_slot] = NULL;
5910
*log_offset = new_slot + 1;
5914
xtPublic xtBool xt_tab_save_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
5916
size_t slot = ((size_t) log_offset) - 1;
5919
if (!(rec_data = (xtWord1 *) xt_malloc_ns(size)))
5921
memcpy(rec_data, data, size);
5922
xt_spinlock_lock(&tab->tab_mem_lock);
5923
tab->tab_mem_total += size;
5924
tab->tab_mem_index[slot] = rec_data;
5925
xt_spinlock_unlock(&tab->tab_mem_lock);
5929
xtPublic void xt_tab_read_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
5931
size_t slot = ((size_t) log_offset) - 1;
5933
if (slot < tab->tab_mem_ind_usage && tab->tab_mem_index[slot])
5934
memcpy(data, tab->tab_mem_index[slot], size);
5936
memset(data, 0, size);
5939
xtPublic void xt_tab_free_ext_slot(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size)
5941
size_t slot = ((size_t) log_offset) - 1;
5943
xt_spinlock_lock(&tab->tab_mem_lock);
5944
if (tab->tab_mem_index[slot]) {
5945
xt_free_ns(tab->tab_mem_index[slot]);
5946
tab->tab_mem_total -= size;
5948
tab->tab_mem_index[slot] = (xtWord1 *) tab->tab_mem_ind_free;
5949
tab->tab_mem_ind_free = slot + 1;
5950
xt_spinlock_unlock(&tab->tab_mem_lock);
5953
static void tab_free_ext_records(XTTableHPtr tab)
5957
if (!tab->tab_mem_index)
5960
i = tab->tab_mem_ind_free;
5962
next = (size_t) tab->tab_mem_index[i-1];
5963
tab->tab_mem_index[i-1] = NULL;
5967
for (i=0; i<tab->tab_mem_ind_usage; i++) {
5968
if (tab->tab_mem_index[i])
5969
xt_free_ns(tab->tab_mem_index[i]);
5972
xt_free_ns(tab->tab_mem_index);