~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/pbxt/src/table_xt.cc

  • Committer: Olaf van der Spek
  • Date: 2011-02-12 18:24:24 UTC
  • mto: (2167.1.2 build) (2172.1.4 build)
  • mto: This revision was merged to the branch mainline in revision 2168.
  • Revision ID: olafvdspek@gmail.com-20110212182424-kgnm9osi7qo97at2
casts

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/* Copyright (C) 2005 PrimeBase Technologies GmbH
 
2
 *
 
3
 * PrimeBase XT
 
4
 *
 
5
 * This program is free software; you can redistribute it and/or modify
 
6
 * it under the terms of the GNU General Public License as published by
 
7
 * the Free Software Foundation; either version 2 of the License, or
 
8
 * (at your option) any later version.
 
9
 *
 
10
 * This program is distributed in the hope that it will be useful,
 
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
13
 * GNU General Public License for more details.
 
14
 *
 
15
 * You should have received a copy of the GNU General Public License
 
16
 * along with this program; if not, write to the Free Software
 
17
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 
18
 *
 
19
 * 2005-02-08   Paul McCullagh
 
20
 *
 
21
 * H&G2JCtL
 
22
 */
 
23
 
 
24
#include "xt_config.h"
 
25
 
 
26
#include <string.h>
 
27
#include <stdio.h>
 
28
#ifndef XT_WIN
 
29
#include <strings.h>
 
30
#endif
 
31
#include <ctype.h>
 
32
#include <time.h>
 
33
 
 
34
#ifdef DRIZZLED
 
35
#include <drizzled/common.h>
 
36
#include <drizzled/dtcollation.h>
 
37
#else
 
38
#include "mysql_priv.h"
 
39
#endif
 
40
 
 
41
#include "table_xt.h"
 
42
#include "database_xt.h"
 
43
#include "heap_xt.h"
 
44
#include "strutil_xt.h"
 
45
#include "myxt_xt.h"
 
46
#include "cache_xt.h"
 
47
#include "trace_xt.h"
 
48
#include "index_xt.h"
 
49
#include "systab_xt.h"
 
50
 
 
51
#ifdef DEBUG
 
52
//#define TRACE_VARIATIONS
 
53
//#define TRACE_VARIATIONS_IN_DUP_CHECK
 
54
//#define DUMP_CHECK_TABLE
 
55
//#define CHECK_INDEX_ON_CHECK_TABLE
 
56
//#define TRACE_TABLE_IDS
 
57
//#define TRACE_FLUSH_TABLE
 
58
//#define TRACE_CREATE_TABLES
 
59
#endif
 
60
 
 
61
#define CHECK_TABLE_STATS
 
62
 
 
63
/* The problem is that this can take a long time
 
64
 * if the table is very large!
 
65
 */
 
66
//#define CHECK_TABLE_READ_DATA_LOG
 
67
 
 
68
#ifdef TRACE_TABLE_IDS
 
69
//#define PRINTF                xt_ftracef
 
70
#define PRINTF          xt_trace
 
71
#endif
 
72
 
 
73
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr dic);
 
74
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic);
 
75
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def);
 
76
static void tab_free_ext_records(XTTableHPtr tab);
 
77
 
 
78
/*
 
79
 * -----------------------------------------------------------------------
 
80
 * Internal structures
 
81
 */
 
82
 
 
83
#define XT_MAX_TABLE_FILE_NAME_SIZE             (XT_TABLE_NAME_SIZE+6+40)
 
84
 
 
85
/*
 
86
 * -----------------------------------------------------------------------
 
87
 * Handle Error Detected in a Table
 
88
 */
 
89
 
 
90
struct XTTableError {
 
91
        xtTableID               ter_tab_id;
 
92
        xtRecordID              ter_rec_id;
 
93
};
 
94
 
 
95
static int tab_comp_tab_error(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
 
96
{
 
97
        XTTableError    *ter_a = ((XTTableError *) a);
 
98
        XTTableError    *ter_b = (XTTableError *) b;
 
99
 
 
100
        if (ter_a->ter_tab_id < ter_b->ter_tab_id)
 
101
                return -1;
 
102
        if (ter_a->ter_tab_id == ter_b->ter_tab_id) {
 
103
                if (ter_a->ter_rec_id < ter_b->ter_rec_id)
 
104
                        return -1;
 
105
                if (ter_a->ter_rec_id == ter_b->ter_rec_id)
 
106
                        return 0;
 
107
                return 1;
 
108
        }
 
109
        return 1;
 
110
}
 
111
 
 
112
static xtBool tab_record_corrupt(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, bool not_valid, int where)
 
113
{
 
114
        XTTableHPtr             tab = ot->ot_table;
 
115
        XTDatabaseHPtr  db = tab->tab_db;
 
116
        XTTableError    ter;
 
117
        XTTableError    *ter_ptr;
 
118
        
 
119
        ter.ter_tab_id = tab->tab_id;
 
120
        ter.ter_rec_id = rec_id;
 
121
        
 
122
        xt_sl_lock_ns(db->db_error_list, ot->ot_thread);
 
123
        if (!(ter_ptr = (XTTableError *) xt_sl_find(NULL, db->db_error_list, &ter))) {
 
124
                xtBool  ok;
 
125
                char    table_name[XT_IDENTIFIER_NAME_SIZE*3+3];
 
126
 
 
127
                ok = xt_sl_insert(NULL, db->db_error_list, &ter, &ter);
 
128
                xt_sl_unlock_ns(db->db_error_list);
 
129
                if (!ok)
 
130
                        return FAILED;
 
131
                xt_tab_set_table_repair_pending(tab);
 
132
                xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
 
133
                xt_logf(XT_NT_ERROR, "#%d Table %s: row %llu, record %llu, is %s, REPAIR TABLE required.\n", where,
 
134
                        table_name, 
 
135
                        (u_llong) row_id,
 
136
                        (u_llong) rec_id,
 
137
                        not_valid ? "not valid" : "free");
 
138
        }
 
139
        else
 
140
                xt_sl_unlock_ns(db->db_error_list);
 
141
        return OK;
 
142
}
 
143
 
 
144
/*
 
145
 * -----------------------------------------------------------------------
 
146
 * Compare paths:
 
147
 */
 
148
 
 
149
/* GOTCHA! The problem:
 
150
 *
 
151
 * The server uses names like: "./test/my_tab",
 
152
 * the BLOB streaming engine uses: "test/my_tab"
 
153
 * which leads to the same table being loaded twice.
 
154
 */
 
155
xtPublic int xt_tab_compare_paths(char *n1, char *n2)
 
156
{
 
157
        n1 = xt_last_2_names_of_path(n1);
 
158
        n2 = xt_last_2_names_of_path(n2);
 
159
        if (pbxt_ignore_case)
 
160
                return strcasecmp(n1, n2);
 
161
        return strcmp(n1, n2);
 
162
}
 
163
 
 
164
/*
 
165
 * This function only compares only the last 2 components of
 
166
 * the path because table names must differ in this area.
 
167
 */
 
168
xtPublic int xt_tab_compare_names(const char *n1, const char *n2)
 
169
{
 
170
        n1 = xt_last_2_names_of_path(n1);
 
171
        n2 = xt_last_2_names_of_path(n2);
 
172
        if (pbxt_ignore_case)
 
173
                return strcasecmp(n1, n2);
 
174
        return strcmp(n1, n2);
 
175
}
 
176
 
 
177
/*
 
178
 * -----------------------------------------------------------------------
 
179
 * Private utilities
 
180
 */
 
181
 
 
182
static xtBool tab_list_comp(void *key, void *data)
 
183
{
 
184
        XTTableHPtr     tab = (XTTableHPtr) data;
 
185
 
 
186
        return strcmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
 
187
}
 
188
 
 
189
static xtHashValue tab_list_hash(xtBool is_key, void *key_data)
 
190
{
 
191
        XTTableHPtr     tab = (XTTableHPtr) key_data;
 
192
 
 
193
        if (is_key)
 
194
                return xt_ht_hash(xt_last_2_names_of_path((char *) key_data));
 
195
        return xt_ht_hash(xt_last_2_names_of_path(tab->tab_name->ps_path));
 
196
}
 
197
 
 
198
static xtBool tab_list_comp_ci(void *key, void *data)
 
199
{
 
200
        XTTableHPtr     tab = (XTTableHPtr) data;
 
201
 
 
202
        return strcasecmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
 
203
}
 
204
 
 
205
static xtHashValue tab_list_hash_ci(xtBool is_key, void *key_data)
 
206
{
 
207
        XTTableHPtr     tab = (XTTableHPtr) key_data;
 
208
 
 
209
        if (is_key)
 
210
                return xt_ht_casehash(xt_last_2_names_of_path((char *) key_data));
 
211
        return xt_ht_casehash(xt_last_2_names_of_path(tab->tab_name->ps_path));
 
212
}
 
213
 
 
214
static void tab_list_free(XTThreadPtr self, void *data)
 
215
{
 
216
        XTTableHPtr             tab = (XTTableHPtr) data;
 
217
        XTDatabaseHPtr  db = tab->tab_db;
 
218
        XTTableEntryPtr te_ptr;
 
219
 
 
220
        /* Remove the reference from the ID list, whem the table is
 
221
         * removed from the name list:
 
222
         */
 
223
        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab->tab_id)))
 
224
                te_ptr->te_table = NULL;
 
225
 
 
226
        if (tab->tab_dic.dic_table)
 
227
                tab->tab_dic.dic_table->removeReferences(self);
 
228
        xt_heap_release(self, tab);
 
229
}
 
230
 
 
231
static void tab_close_files(XTThreadPtr self, XTTableHPtr tab)
 
232
{
 
233
        if (tab->tab_rec_file) {
 
234
                xt_fs_release_file(self, tab->tab_rec_file);
 
235
                tab->tab_rec_file = NULL;
 
236
        }
 
237
        if (tab->tab_row_file) {
 
238
                xt_fs_release_file(self, tab->tab_row_file);
 
239
                tab->tab_row_file = NULL;
 
240
        }
 
241
        if (tab->tab_ind_file) {
 
242
                xt_fs_release_file(self, tab->tab_ind_file);
 
243
                tab->tab_ind_file = NULL;
 
244
        }
 
245
}
 
246
 
 
247
static void tab_finalize(XTThreadPtr self, void *x)
 
248
{
 
249
        XTTableHPtr     tab = (XTTableHPtr) x;
 
250
 
 
251
        xt_exit_row_locks(&tab->tab_locks);
 
252
 
 
253
        xt_xres_exit_tab(self, tab);
 
254
 
 
255
        if (tab->tab_ind_free_list) {
 
256
                XTIndFreeListPtr list, flist;
 
257
                
 
258
                list = tab->tab_ind_free_list;
 
259
                while (list) {
 
260
                        flist = list;
 
261
                        list = list->fl_next_list;
 
262
                        xt_free(self, flist);
 
263
                }
 
264
                tab->tab_ind_free_list = NULL;
 
265
        }
 
266
 
 
267
        tab_close_files(self, tab);
 
268
 
 
269
        if (tab->tab_index_head) {
 
270
                xt_free(self, tab->tab_index_head);
 
271
                tab->tab_index_head = NULL;
 
272
        }
 
273
 
 
274
        tab_free_ext_records(tab);
 
275
 
 
276
#ifdef TRACE_TABLE_IDS
 
277
        PRINTF("%s: free TABLE: db=%d tab=%d %s\n", self->t_name, (int) tab->tab_db ? tab->tab_db->db_id : 0, (int) tab->tab_id, 
 
278
                tab->tab_name ? xt_last_2_names_of_path(tab->tab_name->ps_path) : "?");
 
279
#endif
 
280
        if (tab->tab_name) {
 
281
                xt_free(self, tab->tab_name);
 
282
                tab->tab_name = NULL;
 
283
        }
 
284
        myxt_free_dictionary(self, &tab->tab_dic);
 
285
        if (tab->tab_free_locks) {
 
286
                tab->tab_seq.xt_op_seq_exit(self);
 
287
                xt_spinlock_free(self, &tab->tab_mem_lock);
 
288
                xt_spinlock_free(self, &tab->tab_ainc_lock);
 
289
                xt_free_mutex(&tab->tab_rec_flush_lock);
 
290
                xt_free_mutex(&tab->tab_ind_flush_lock);
 
291
                xt_free_mutex(&tab->tab_ind_stat_lock);
 
292
                xt_free_mutex(&tab->tab_dic_field_lock);
 
293
                xt_free_mutex(&tab->tab_row_lock);
 
294
                xt_free_mutex(&tab->tab_ind_lock);
 
295
                xt_free_mutex(&tab->tab_rec_lock);
 
296
                for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
 
297
                        XT_TAB_ROW_FREE_LOCK(self, &tab->tab_row_rwlock[i]);
 
298
        }
 
299
#ifdef XT_SORT_REC_WRITES
 
300
        if (tab->tab_rec_dw_writes) {
 
301
                xt_free_sortedlist(self, tab->tab_rec_dw_writes);
 
302
                tab->tab_rec_dw_writes = NULL;
 
303
        }
 
304
        if (tab->tab_rec_dw_data)
 
305
                xt_free_ns(tab->tab_rec_dw_data);
 
306
#endif
 
307
        if (tab->tab_rec_flush_task)
 
308
                tab->tab_rec_flush_task->tk_exit();
 
309
        if (tab->tab_ind_flush_task)
 
310
                tab->tab_ind_flush_task->tk_exit();
 
311
}
 
312
 
 
313
static void tab_onrelease(void *x)
 
314
{
 
315
        XTTableHPtr     tab = (XTTableHPtr) x;
 
316
 
 
317
        /* Signal threads waiting for exclusive use of the table: */
 
318
        if (tab->tab_db->db_tables)
 
319
                xt_ht_signal(NULL, tab->tab_db->db_tables);
 
320
}
 
321
 
 
322
/*
 
323
 * -----------------------------------------------------------------------
 
324
 * PUBLIC METHODS
 
325
 */
 
326
 
 
327
/*
 
328
 * This function sets the table name to "", if the file
 
329
 * does not belong to XT.
 
330
 */
 
331
xtPublic char *xt_tab_file_to_name(size_t size, char *tab_name, char *file_name)
 
332
{
 
333
        char    *cptr;
 
334
        size_t  len;
 
335
 
 
336
        file_name = xt_last_name_of_path(file_name);
 
337
        cptr = file_name + strlen(file_name) - 1;
 
338
        while (cptr > file_name && *cptr != '.')
 
339
                cptr--;
 
340
        if (cptr > file_name && *cptr == '.') {
 
341
                if (strcmp(cptr, ".xtl") == 0 || strcmp(cptr, ".xtr") == 0) {
 
342
                        cptr--;
 
343
                        while (cptr > file_name && isdigit(*cptr))
 
344
                                cptr--;
 
345
                }
 
346
                else {
 
347
                        const char **ext = pbxt_extensions;
 
348
                        
 
349
                        while (*ext) {
 
350
                                if (strcmp(cptr, *ext) == 0)
 
351
                                        goto ret_name;
 
352
                                ext++;
 
353
                        }
 
354
                        cptr = file_name;
 
355
                }
 
356
        }
 
357
 
 
358
        ret_name:
 
359
        len = cptr - file_name;
 
360
        if (len > size-1)
 
361
                len = size-1;
 
362
 
 
363
        memcpy(tab_name, file_name, len);
 
364
        tab_name[len] = 0;
 
365
 
 
366
        /* Return a pointer to what was removed! */
 
367
        return file_name + len;
 
368
}
 
369
 
 
370
static void tab_get_row_file_name(char *table_name, char *name, xtTableID tab_id)
 
371
{
 
372
        sprintf(table_name, "%s-%lu.xtr", name, (u_long) tab_id);
 
373
}
 
374
 
 
375
static void tab_get_data_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
 
376
{
 
377
        sprintf(table_name, "%s.xtd", name);
 
378
}
 
379
 
 
380
static void tab_get_index_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
 
381
{
 
382
        sprintf(table_name, "%s.xti", name);
 
383
}
 
384
 
 
385
static void tab_free_by_id(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
 
386
{
 
387
        XTTableEntryPtr te_ptr = (XTTableEntryPtr) item;
 
388
 
 
389
        if (te_ptr->te_tab_name) {
 
390
                xt_free(self, te_ptr->te_tab_name);
 
391
                te_ptr->te_tab_name = NULL;
 
392
        }
 
393
        te_ptr->te_tab_id = 0;
 
394
        te_ptr->te_heap_tab = FALSE;
 
395
        te_ptr->te_table = NULL;
 
396
}
 
397
 
 
398
static int tab_comp_by_id(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
 
399
{
 
400
        xtTableID               te_id = *((xtTableID *) a);
 
401
        XTTableEntryPtr te_ptr = (XTTableEntryPtr) b;
 
402
 
 
403
        if (te_id < te_ptr->te_tab_id)
 
404
                return -1;
 
405
        if (te_id == te_ptr->te_tab_id)
 
406
                return 0;
 
407
        return 1;
 
408
}
 
409
 
 
410
static void tab_free_path(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
 
411
{
 
412
        XTTablePathPtr  tp_ptr = *((XTTablePathPtr *) item);
 
413
 
 
414
        xt_free(self, tp_ptr);
 
415
}
 
416
 
 
417
static int tab_comp_path(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
 
418
{
 
419
        char                    *path = (char *) a;
 
420
        XTTablePathPtr  tp_ptr = *((XTTablePathPtr *) b);
 
421
 
 
422
        return xt_tab_compare_paths(path, tp_ptr->tp_path);
 
423
}
 
424
 
 
425
static xtBool tab_get_name_value(XTTableDescPtr td, char **ret_name, char **ret_value)
 
426
{
 
427
        char *ptr = td->x.z.td_curr_ptr;
 
428
 
 
429
        while (*ptr && isspace(*ptr)) ptr++;
 
430
        if (!*ptr) {
 
431
                td->x.z.td_curr_ptr = ptr;
 
432
                return FALSE;
 
433
        }
 
434
 
 
435
        *ret_name = ptr;
 
436
        while (*ptr && *ptr != '=' && *ptr != '\r' && *ptr != '\n') ptr++;
 
437
        if (*ptr == '=') {
 
438
                *ptr = 0;
 
439
                ptr++;
 
440
                *ret_value = ptr;
 
441
                while (*ptr && *ptr != '\r' && *ptr != '\n') ptr++;
 
442
                if (*ptr) {
 
443
                        *ptr = 0;
 
444
                        ptr++;
 
445
                }
 
446
        }
 
447
        else {
 
448
                if (*ptr) {
 
449
                        *ptr = 0;
 
450
                        ptr++;
 
451
                }
 
452
                *ret_value = NULL;
 
453
        }
 
454
        td->x.z.td_curr_ptr = ptr;
 
455
        return TRUE;
 
456
}
 
457
 
 
458
xtPublic void xt_describe_tables_init(XTThreadPtr self, XTDatabaseHPtr db, XTTableDescPtr td)
 
459
{
 
460
        char pbuf[PATH_MAX];
 
461
 
 
462
        td->td_db = db;
 
463
        xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
 
464
        xt_add_tables_file(PATH_MAX, pbuf);
 
465
        if (xt_fs_exists(pbuf))
 
466
                td->td_type = XT_TD_FROM_TAB_FILE;
 
467
        else
 
468
                td->td_type = XT_TD_FROM_DIRECTORY;
 
469
 
 
470
        switch (td->td_type) {
 
471
                case XT_TD_FROM_DIRECTORY:
 
472
                        td->x.y.td_path_idx = 0;
 
473
                        if (td->x.y.td_path_idx < xt_sl_get_size(db->db_table_paths)) {
 
474
                                XTTablePathPtr *tp_ptr;
 
475
 
 
476
                                tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, td->x.y.td_path_idx);
 
477
                                td->td_tab_path = *tp_ptr;
 
478
                                td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
 
479
                        }
 
480
                        else
 
481
                                td->x.y.td_open_dir = NULL;
 
482
                        break;
 
483
                case XT_TD_FROM_TAB_FILE:
 
484
                        XTOpenFilePtr   of;
 
485
                        int                             len;
 
486
                        char                    *buffer;
 
487
                        char                    *name;
 
488
                        char                    *value;
 
489
 
 
490
                        of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
 
491
                        pushr_(xt_close_file, of);
 
492
                        len = (int) xt_seek_eof_file(self, of);
 
493
                        buffer = (char *) xt_malloc(self, len + 1);
 
494
                        pushr_(xt_free, buffer);
 
495
                        if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
 
496
                                xt_throw(self);
 
497
                        buffer[len] = 0;
 
498
                        popr_(); // Discard xt_free(buffer)
 
499
                        freer_(); // xt_close_file(of)
 
500
 
 
501
                        td->x.z.td_table_info = buffer;
 
502
                        td->x.z.td_curr_ptr = buffer;
 
503
                        while (tab_get_name_value(td, &name, &value)) {
 
504
                                if (strcmp(name, "[table]") == 0)
 
505
                                        break;
 
506
                        }
 
507
                        break;
 
508
        }
 
509
}
 
510
 
 
511
xtPublic xtBool xt_describe_tables_next(XTThreadPtr self, XTTableDescPtr td)
 
512
{
 
513
        char    *tab_name;
 
514
        xtBool  r = FALSE;
 
515
 
 
516
        enter_();
 
517
 
 
518
        switch (td->td_type) {
 
519
                case XT_TD_FROM_DIRECTORY:
 
520
                        retry:
 
521
                        if (!td->x.y.td_open_dir)
 
522
                                return_(FALSE);
 
523
                        try_(a) {
 
524
                                r = xt_dir_next(self, td->x.y.td_open_dir);
 
525
                        }
 
526
                        catch_(a) {
 
527
                                xt_describe_tables_exit(self, td);
 
528
                                throw_();
 
529
                        }
 
530
                        cont_(a);
 
531
                        if (!r) {
 
532
                                XTTablePathPtr *tp_ptr;
 
533
 
 
534
                                if (td->x.y.td_path_idx+1 >= xt_sl_get_size(td->td_db->db_table_paths))
 
535
                                        return_(FALSE);
 
536
 
 
537
                                if (td->x.y.td_open_dir)
 
538
                                        xt_dir_close(NULL, td->x.y.td_open_dir);
 
539
                                td->x.y.td_open_dir = NULL;
 
540
 
 
541
                                td->x.y.td_path_idx++;
 
542
                                tp_ptr = (XTTablePathPtr *) xt_sl_item_at(td->td_db->db_table_paths, td->x.y.td_path_idx);
 
543
                                td->td_tab_path = *tp_ptr;
 
544
                                td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
 
545
                                goto retry;
 
546
                        }
 
547
 
 
548
                        tab_name = xt_dir_name(self, td->x.y.td_open_dir);
 
549
                        td->td_tab_id = (xtTableID) xt_file_name_to_id(tab_name);
 
550
                        xt_tab_file_to_name(XT_TABLE_NAME_SIZE, td->td_tab_name, tab_name);
 
551
                        td->td_heap_tab = FALSE;
 
552
                        break;
 
553
                case XT_TD_FROM_TAB_FILE:
 
554
                        char *name;
 
555
                        char *value;
 
556
 
 
557
                        td->td_tab_id = 0;
 
558
                        while (tab_get_name_value(td, &name, &value)) {
 
559
                                if (strcmp(name, "name") == 0)
 
560
                                        xt_strcpy(XT_TABLE_NAME_SIZE, td->td_tab_name, value);
 
561
                                else if (strcmp(name, "id") == 0) {
 
562
                                        u_long lvalue = 0;
 
563
 
 
564
                                        sscanf(value, "%lu", &lvalue);
 
565
                                        td->td_tab_id = (xtTableID) lvalue;
 
566
                                }
 
567
                                else if (strcmp(name, "storage") == 0) {
 
568
                                        if (strcmp(value, "heap") == 0)
 
569
                                                td->td_heap_tab = TRUE;
 
570
                                        else /* disk */
 
571
                                                td->td_heap_tab = FALSE;
 
572
                                }
 
573
                                else if (strcmp(name, "location") == 0) {
 
574
                                        XTTablePathPtr  *tp;
 
575
                                        XTTablePathPtr  db_path;
 
576
 
 
577
#ifdef XT_WIN
 
578
                                        char *ptr = value;
 
579
 
 
580
                                        /* Convert path to WIN path: */
 
581
                                        while (*ptr) {
 
582
                                                if (*ptr == '/')
 
583
                                                        *ptr = '\\';
 
584
                                                ptr++;
 
585
                                        }
 
586
#endif
 
587
                                        if ((tp = (XTTablePathPtr *) xt_sl_find(self, td->td_db->db_table_paths, value)))
 
588
                                                db_path = *tp;
 
589
                                        else {
 
590
                                                size_t                  len;
 
591
 
 
592
                                                len = strlen(value);
 
593
                                                db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
 
594
                                                db_path->tp_tab_count = 0;
 
595
                                                memcpy(db_path->tp_path, value, len);
 
596
                                                db_path->tp_path[len] = 0;
 
597
                                                xt_sl_insert(self, td->td_db->db_table_paths, db_path->tp_path, &db_path);
 
598
                                        }
 
599
                                        td->td_tab_path = db_path;
 
600
                                }
 
601
                                else if (strcmp(name, "type") == 0) {
 
602
                                        u_long lvalue = 0;
 
603
 
 
604
                                        sscanf(value, "%lu", &lvalue);
 
605
                                        td->td_tab_type = (xtWord1) lvalue;
 
606
                                } 
 
607
                                else if (strcmp(name, "[table]") == 0)
 
608
                                        break;
 
609
                        }
 
610
                        if (!td->td_tab_id)
 
611
                                return_(FALSE);
 
612
                        break;
 
613
        }
 
614
        return_(TRUE);
 
615
}
 
616
 
 
617
xtPublic void xt_describe_tables_exit(XTThreadPtr self, XTTableDescPtr td)
 
618
{
 
619
        switch (td->td_type) {
 
620
                case XT_TD_FROM_DIRECTORY:
 
621
                        if (td->x.y.td_open_dir)
 
622
                                xt_dir_close(NULL, td->x.y.td_open_dir);
 
623
                        td->x.y.td_open_dir = NULL;
 
624
                        break;
 
625
                case XT_TD_FROM_TAB_FILE:
 
626
                        if (td->x.z.td_table_info) {
 
627
                                xt_free(self, td->x.z.td_table_info);
 
628
                                td->x.z.td_table_info = NULL;
 
629
                        }
 
630
                        td->x.z.td_curr_ptr = NULL;
 
631
                        break;
 
632
        }
 
633
        td->td_tab_path = NULL;
 
634
}
 
635
 
 
636
xtPublic void xt_tab_init_db(XTThreadPtr self, XTDatabaseHPtr db)
 
637
{
 
638
        XTTableDescRec          desc;
 
639
        XTTableEntryRec         te_tab;
 
640
        XTTableEntryPtr         te_ptr;
 
641
        XTTablePathPtr          db_path;
 
642
        char                            pbuf[PATH_MAX];
 
643
        int                                     len;
 
644
        u_int                           edx;
 
645
 
 
646
        enter_();
 
647
        pushr_(xt_tab_exit_db, db);
 
648
        if (pbxt_ignore_case)
 
649
                db->db_tables = xt_new_hashtable(self, tab_list_comp_ci, tab_list_hash_ci, tab_list_free, TRUE, TRUE);
 
650
        else
 
651
                db->db_tables = xt_new_hashtable(self, tab_list_comp, tab_list_hash, tab_list_free, TRUE, TRUE);
 
652
        db->db_table_by_id = xt_new_sortedlist(self, sizeof(XTTableEntryRec), 20, 20, tab_comp_by_id, db, tab_free_by_id, FALSE, FALSE);
 
653
        db->db_table_paths = xt_new_sortedlist(self, sizeof(XTTablePathPtr), 20, 20, tab_comp_path, db, tab_free_path, FALSE, FALSE);
 
654
        db->db_error_list = xt_new_sortedlist(self, sizeof(XTTableError), 20, 20, tab_comp_tab_error, db, NULL, TRUE, FALSE);
 
655
 
 
656
        if (db->db_multi_path) {
 
657
                XTOpenFilePtr   of;
 
658
                char                    *buffer, *ptr, *path;
 
659
 
 
660
                xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
 
661
                xt_add_tables_file(PATH_MAX, pbuf);
 
662
                if (!xt_fs_exists(pbuf)) {
 
663
                        /* Load the location file, if a tables file does not
 
664
                         * exists:
 
665
                         */
 
666
                        xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
 
667
                        xt_add_location_file(PATH_MAX, pbuf);
 
668
                        if (xt_fs_exists(pbuf)) {
 
669
                                of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
 
670
                                pushr_(xt_close_file, of);
 
671
                                len = (int) xt_seek_eof_file(self, of);
 
672
                                buffer = (char *) xt_malloc(self, len + 1);
 
673
                                pushr_(xt_free, buffer);
 
674
                                if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
 
675
                                        xt_throw(self);
 
676
                                buffer[len] = 0;
 
677
                                ptr = buffer;
 
678
                                while (*ptr) {
 
679
                                        /* Ignore preceeding space: */
 
680
                                        while (*ptr && isspace(*ptr))
 
681
                                                ptr++;
 
682
                                        path = ptr;
 
683
                                        while (*ptr && *ptr != '\n' && *ptr != '\r') {
 
684
#ifdef XT_WIN
 
685
                                                /* Undo the conversion below: */
 
686
                                                if (*ptr == '/')
 
687
                                                        *ptr = '\\';
 
688
#endif
 
689
                                                ptr++;
 
690
                                        }
 
691
                                        if (*path != '#' && ptr > path) {
 
692
                                                len = (int) (ptr - path);
 
693
                                                db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
 
694
                                                db_path->tp_tab_count = 0;
 
695
                                                memcpy(db_path->tp_path, path, len);
 
696
                                                db_path->tp_path[len] = 0;
 
697
                                                xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
 
698
                                        }
 
699
                                        ptr++;
 
700
                                }
 
701
                                freer_(); // xt_free(buffer)
 
702
                                freer_(); // xt_close_file(of)
 
703
                        }
 
704
                }
 
705
        }
 
706
        else {
 
707
                len = (int) strlen(db->db_main_path);
 
708
                db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
 
709
                db_path->tp_tab_count = 0;
 
710
                strcpy(db_path->tp_path, db->db_main_path);
 
711
                xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
 
712
        }
 
713
 
 
714
        xt_describe_tables_init(self, db, &desc);
 
715
        pushr_(xt_describe_tables_exit, &desc);
 
716
        while (xt_describe_tables_next(self, &desc)) {
 
717
                te_tab.te_tab_id = desc.td_tab_id;
 
718
                te_tab.te_heap_tab = desc.td_heap_tab;
 
719
 
 
720
                if (te_tab.te_tab_id > db->db_curr_tab_id)
 
721
                        db->db_curr_tab_id = te_tab.te_tab_id;
 
722
 
 
723
                te_tab.te_tab_name = xt_dup_string(self, desc.td_tab_name);
 
724
                te_tab.te_tab_path = desc.td_tab_path;
 
725
                desc.td_tab_path->tp_tab_count++;
 
726
                te_tab.te_table = NULL;
 
727
                te_tab.te_type = desc.td_tab_type;
 
728
                xt_sl_insert(self, db->db_table_by_id, &desc.td_tab_id, &te_tab);
 
729
        }
 
730
        freer_(); // xt_describe_tables_exit(&desc)
 
731
 
 
732
        /*
 
733
         * When we open all tables, we ignore problems with foreign keys.
 
734
         * This must be done or we will not be able to load tables that
 
735
         * were created with foreign key checks off.
 
736
         */
 
737
        self->st_ignore_fkeys = 1;
 
738
        /* 
 
739
         * The purpose of this code is to ensure that all tables are opened and cached,
 
740
         * which is actually only required if tables have foreign key references.
 
741
         *
 
742
         * In other words, a side affect of this code is that FK references between tables
 
743
         * are registered, and checked.
 
744
         *
 
745
         * Unfortunately we don't know if a table is referenced by a FK, so we have to open
 
746
         * all tables.
 
747
         * 
 
748
         * Cannot open tables in the loop above because db->db_table_by_id which is built 
 
749
         * above is used by xt_use_table_no_lock() 
 
750
         *
 
751
         * {TABLE-STATS}
 
752
         * NOTE: The code also lead to the statistics failing to work because 
 
753
         * the tables were already open when the handler was opened.
 
754
         * Previously we only caclulated statistics when a handler was opened
 
755
         * and the underlying table was also opened.
 
756
         */
 
757
        XTTableHPtr tab;
 
758
 
 
759
        xt_enum_tables_init(&edx);
 
760
        while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
 
761
                xt_strcpy(PATH_MAX, pbuf, te_ptr->te_tab_path->tp_path);
 
762
                xt_add_dir_char(PATH_MAX, pbuf);
 
763
                xt_strcat(PATH_MAX, pbuf, te_ptr->te_tab_name);
 
764
                if ((tab = xt_use_table_no_lock_ns(db, (XTPathStrPtr) pbuf, FALSE, FALSE, NULL)))
 
765
                        xt_heap_release_ns(tab);
 
766
                else
 
767
                        xt_log_and_clear_warning(self);
 
768
        }
 
769
        self->st_ignore_fkeys = 0;
 
770
 
 
771
        popr_(); // Discard xt_tab_exit_db(db)
 
772
        exit_();
 
773
}
 
774
 
 
775
static void tab_save_tables(XTThreadPtr self, XTDatabaseHPtr db)
 
776
{
 
777
        XTTableEntryPtr         te_ptr;
 
778
        XTStringBufferRec       buffer;
 
779
        XTOpenFilePtr           of;
 
780
        char                            path[PATH_MAX];
 
781
 
 
782
        memset(&buffer, 0, sizeof(buffer));
 
783
 
 
784
        xt_strcpy(PATH_MAX, path, db->db_main_path);
 
785
        xt_add_tables_file(PATH_MAX, path);
 
786
 
 
787
        if (xt_sl_get_size(db->db_table_by_id)) {
 
788
                pushr_(xt_sb_free, &buffer);
 
789
                for (u_int i=0; i<xt_sl_get_size(db->db_table_by_id); i++) {
 
790
                        te_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, i);
 
791
                        xt_sb_concat(self, &buffer, "[table]\n");
 
792
                        xt_sb_concat(self, &buffer, "id=");
 
793
                        xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_tab_id);
 
794
                        xt_sb_concat(self, &buffer, "\n");
 
795
                        xt_sb_concat(self, &buffer, "name=");
 
796
                        xt_sb_concat(self, &buffer, te_ptr->te_tab_name);
 
797
                        xt_sb_concat(self, &buffer, "\n");
 
798
                        xt_sb_concat(self, &buffer, "location=");
 
799
                        xt_sb_concat(self, &buffer, te_ptr->te_tab_path->tp_path);
 
800
                        xt_sb_concat(self, &buffer, "\n");
 
801
                        xt_sb_concat(self, &buffer, "storage=");
 
802
                        if (te_ptr->te_heap_tab)
 
803
                                xt_sb_concat(self, &buffer, "heap\n");
 
804
                        else
 
805
                                xt_sb_concat(self, &buffer, "disk\n");
 
806
                        xt_sb_concat(self, &buffer, "type=");
 
807
                        xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_type);
 
808
                        xt_sb_concat(self, &buffer, "\n");
 
809
                }
 
810
 
 
811
#ifdef XT_WIN
 
812
                /* To make the location file cross-platform (at least
 
813
                 * as long as relative paths are used) we replace all '\' 
 
814
                 * with '/': */
 
815
                char *ptr;
 
816
                
 
817
                ptr = buffer.sb_cstring;
 
818
                while (*ptr) {
 
819
                        if (*ptr == '\\')
 
820
                                *ptr = '/';
 
821
                        ptr++;
 
822
                }
 
823
#endif
 
824
                of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
 
825
                pushr_(xt_close_file, of);
 
826
                if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
 
827
                        xt_throw(self);
 
828
                xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
 
829
                freer_(); // xt_close_file(of)
 
830
                
 
831
                freer_(); // xt_sb_free(&buffer);
 
832
        }
 
833
        else
 
834
                xt_fs_delete(NULL, path);
 
835
}
 
836
 
 
837
static void tab_save_table_paths(XTThreadPtr self, XTDatabaseHPtr db)
 
838
{
 
839
        XTTablePathPtr          *tp_ptr;
 
840
        XTStringBufferRec       buffer;
 
841
        XTOpenFilePtr           of;
 
842
        char                            path[PATH_MAX];
 
843
 
 
844
        memset(&buffer, 0, sizeof(buffer));
 
845
 
 
846
        xt_strcpy(PATH_MAX, path, db->db_main_path);
 
847
        xt_add_location_file(PATH_MAX, path);
 
848
 
 
849
        if (xt_sl_get_size(db->db_table_paths)) {
 
850
                pushr_(xt_sb_free, &buffer);
 
851
                for (u_int i=0; i<xt_sl_get_size(db->db_table_paths); i++) {
 
852
                        tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, i);
 
853
                        xt_sb_concat(self, &buffer, (*tp_ptr)->tp_path);
 
854
                        xt_sb_concat(self, &buffer, "\n");
 
855
                }
 
856
 
 
857
#ifdef XT_WIN
 
858
                /* To make the location file cross-platform (at least
 
859
                 * as long as relative paths are used) we replace all '\' 
 
860
                 * with '/': */
 
861
                char *ptr;
 
862
                
 
863
                ptr = buffer.sb_cstring;
 
864
                while (*ptr) {
 
865
                        if (*ptr == '\\')
 
866
                                *ptr = '/';
 
867
                        ptr++;
 
868
                }
 
869
#endif
 
870
 
 
871
                of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
 
872
                pushr_(xt_close_file, of);
 
873
                if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
 
874
                        xt_throw(self);
 
875
                xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
 
876
                freer_(); // xt_close_file(of)
 
877
                
 
878
                freer_(); // xt_sb_free(&buffer);
 
879
        }
 
880
        else
 
881
                xt_fs_delete(NULL, path);
 
882
}
 
883
 
 
884
static XTTablePathPtr tab_get_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr tab_name, xtBool save_it)
 
885
{
 
886
        XTTablePathPtr  *tp, tab_path;
 
887
        char                    path[PATH_MAX];
 
888
 
 
889
        xt_strcpy(PATH_MAX, path, tab_name->ps_path);
 
890
        xt_remove_last_name_of_path(path);
 
891
        xt_remove_dir_char(path);
 
892
        tp = (XTTablePathPtr *) xt_sl_find(self, db->db_table_paths, path);
 
893
        if (tp)
 
894
                tab_path = *tp;
 
895
        else {
 
896
                int len = (int) strlen(path);
 
897
 
 
898
                tab_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
 
899
                tab_path->tp_tab_count = 0;
 
900
                memcpy(tab_path->tp_path, path, len);
 
901
                tab_path->tp_path[len] = 0;
 
902
                xt_sl_insert(self, db->db_table_paths, tab_path->tp_path, &tab_path);
 
903
                if (save_it) {
 
904
                        tab_save_table_paths(self, db);
 
905
                        if (xt_sl_get_size(db->db_table_paths) == 1) {
 
906
                                XTSystemTableShare::createSystemTables(self, db);
 
907
                        }
 
908
                }
 
909
        }
 
910
        tab_path->tp_tab_count++;
 
911
        return tab_path;
 
912
}
 
913
 
 
914
static void tab_remove_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTTablePathPtr tab_path)
 
915
{
 
916
        if (tab_path->tp_tab_count > 0) {
 
917
                tab_path->tp_tab_count--;
 
918
                if (tab_path->tp_tab_count == 0) {
 
919
                        xt_sl_delete(self, db->db_table_paths, tab_path->tp_path);
 
920
                        tab_save_table_paths(self, db);
 
921
                }
 
922
        }
 
923
}
 
924
 
 
925
static void tab_free_table_path(XTThreadPtr self, XTTablePathPtr tab_path)
 
926
{
 
927
        XTDatabaseHPtr db = self->st_database;
 
928
 
 
929
        tab_remove_table_path(self, db, tab_path);
 
930
}
 
931
 
 
932
xtPublic void xt_tab_exit_db(XTThreadPtr self, XTDatabaseHPtr db)
 
933
{
 
934
        if (db->db_tables) {
 
935
                xt_free_hashtable(self, db->db_tables);
 
936
                db->db_tables = NULL;
 
937
        }
 
938
        if (db->db_table_by_id) {
 
939
                xt_free_sortedlist(self, db->db_table_by_id);
 
940
                db->db_table_by_id = NULL;
 
941
        }
 
942
        if (db->db_table_paths) {
 
943
                xt_free_sortedlist(self, db->db_table_paths);
 
944
                db->db_table_paths = NULL;
 
945
        }
 
946
        if (db->db_error_list) {
 
947
                xt_free_sortedlist(self, db->db_error_list);
 
948
                db->db_error_list = NULL;
 
949
        }
 
950
}
 
951
 
 
952
 
 
953
xtPublic xtBool xt_table_exists(XTDatabaseHPtr db)
 
954
{
 
955
        return xt_sl_get_size(db->db_table_by_id) > 0;
 
956
}
 
957
 
 
958
/*
 
959
 * Enumerate all tables in the current database.
 
960
 */
 
961
 
 
962
xtPublic void xt_enum_tables_init(u_int *edx)
 
963
{
 
964
        *edx = 0;
 
965
}
 
966
 
 
967
xtPublic XTTableEntryPtr xt_enum_tables_next(XTThreadPtr XT_UNUSED(self), XTDatabaseHPtr db, u_int *edx)
 
968
{
 
969
        XTTableEntryPtr en_ptr;
 
970
 
 
971
        if (*edx >= xt_sl_get_size(db->db_table_by_id))
 
972
                return NULL;
 
973
        en_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, *edx);
 
974
        (*edx)++;
 
975
        return en_ptr;
 
976
}
 
977
 
 
978
xtPublic void xt_enum_files_of_tables_init(XTPathStrPtr tab_name, xtTableID tab_id, XTFilesOfTablePtr ft)
 
979
{
 
980
        ft->ft_state = 0;
 
981
        ft->ft_tab_name = tab_name;
 
982
        ft->ft_tab_id = tab_id;
 
983
}
 
984
 
 
985
xtPublic xtBool xt_enum_files_of_tables_next(XTFilesOfTablePtr ft)
 
986
{
 
987
        char file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
 
988
 
 
989
        retry:
 
990
        switch (ft->ft_state) {
 
991
                case 0:
 
992
                        tab_get_row_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
 
993
                        break;
 
994
                case 1:
 
995
                        tab_get_data_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
 
996
                        break;
 
997
                case 2:
 
998
                        tab_get_index_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
 
999
                        break;
 
1000
                default:
 
1001
                        return FAILED;
 
1002
        }
 
1003
 
 
1004
        ft->ft_state++;
 
1005
        xt_strcpy(PATH_MAX, ft->ft_file_path, ft->ft_tab_name->ps_path);
 
1006
        xt_remove_last_name_of_path(ft->ft_file_path);
 
1007
        xt_strcat(PATH_MAX, ft->ft_file_path, file_name);
 
1008
        if (!xt_fs_exists(ft->ft_file_path))
 
1009
                goto retry;
 
1010
 
 
1011
        return TRUE;
 
1012
}
 
1013
 
 
1014
static xtBool tab_find_table(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtTableID *tab_id)
 
1015
{
 
1016
        u_int                   edx;
 
1017
        XTTableEntryPtr te_ptr;
 
1018
        char                    path[PATH_MAX];
 
1019
 
 
1020
        xt_enum_tables_init(&edx);
 
1021
        while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
 
1022
                xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
 
1023
                xt_add_dir_char(PATH_MAX, path);
 
1024
                xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
 
1025
                if (xt_tab_compare_names(path, name->ps_path) == 0) {
 
1026
                        *tab_id = te_ptr->te_tab_id;
 
1027
                        return TRUE;
 
1028
                }
 
1029
        }
 
1030
        return FALSE;
 
1031
}
 
1032
 
 
1033
xtPublic void xt_tab_disable_index(XTTableHPtr tab, u_int ind_error)
 
1034
{
 
1035
        tab->tab_dic.dic_disable_index = ind_error;
 
1036
        xt_tab_set_table_repair_pending(tab);
 
1037
}
 
1038
 
 
1039
xtPublic void xt_tab_set_index_error(XTTableHPtr tab)
 
1040
{
 
1041
        switch (tab->tab_dic.dic_disable_index) {
 
1042
                case XT_INDEX_OK:
 
1043
                        break;
 
1044
                case XT_INDEX_TOO_OLD:
 
1045
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_OLD_VERSION, tab->tab_name);
 
1046
                        break;
 
1047
                case XT_INDEX_TOO_NEW:
 
1048
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NEW_VERSION, tab->tab_name);
 
1049
                        break;
 
1050
                case XT_INDEX_BAD_BLOCK:
 
1051
                        char number[40];
 
1052
 
 
1053
                        sprintf(number, "%d", (int) tab->tab_index_page_size);
 
1054
                        xt_register_i2xterr(XT_REG_CONTEXT, XT_ERR_BAD_IND_BLOCK_SIZE, xt_last_name_of_path(tab->tab_name->ps_path), number);
 
1055
                        break;
 
1056
                case XT_INDEX_CORRUPTED:
 
1057
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, tab->tab_name);
 
1058
                        break;
 
1059
                case XT_INDEX_MISSING:
 
1060
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_MISSING, tab->tab_name);
 
1061
                        break;
 
1062
                case XT_INDEX_NOT_RECOVERED:
 
1063
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NOT_RECOVERED, tab->tab_name);
 
1064
                        break;
 
1065
        }
 
1066
}
 
1067
 
 
1068
static void tab_load_index_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file, XTPathStrPtr table_name)
 
1069
{
 
1070
        XT_NODE_TEMP;
 
1071
        XTIndexPtr                      *ind;
 
1072
        xtWord1                         *data;
 
1073
        XTIndexFormatDPtr       index_fmt;
 
1074
 
 
1075
        /* Load the pointers: */
 
1076
        if (tab->tab_index_head)
 
1077
                xt_free_ns(tab->tab_index_head);
 
1078
        tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc(self, XT_INDEX_HEAD_SIZE);
 
1079
 
 
1080
        if (file) {
 
1081
                if (!xt_pread_file(file, 0, XT_INDEX_HEAD_SIZE, 0, tab->tab_index_head, NULL, &self->st_statistics.st_ind, self))
 
1082
                        xt_throw(self);
 
1083
 
 
1084
                tab->tab_index_format_offset = XT_GET_DISK_4(tab->tab_index_head->tp_format_offset_4);
 
1085
                index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
 
1086
 
 
1087
                /* If the table version is less than or equal to an incompatible (unsupported
 
1088
                 * version), or greater than the current version, then we cannot open this table
 
1089
                 */
 
1090
                if (XT_GET_DISK_2(index_fmt->if_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
 
1091
                        XT_GET_DISK_2(index_fmt->if_tab_version_2) > XT_TAB_CURRENT_VERSION) {
 
1092
                        switch (XT_GET_DISK_2(index_fmt->if_tab_version_2)) {
 
1093
                                case 4: 
 
1094
                                        xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
 
1095
                                        break;
 
1096
                                case 3: 
 
1097
                                        xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
 
1098
                                        break;
 
1099
                                default:
 
1100
                                        xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
 
1101
                                        break;
 
1102
                        }
 
1103
                        return;
 
1104
                }
 
1105
 
 
1106
                tab->tab_dic.dic_index_ver = XT_GET_DISK_2(index_fmt->if_ind_version_2);
 
1107
                tab->tab_dic.dic_disable_index = XT_INDEX_OK;
 
1108
 
 
1109
                if (tab->tab_dic.dic_index_ver == 1) {
 
1110
                        tab->tab_index_header_size = 1024 * 16;
 
1111
                        tab->tab_index_page_size = 1024 * 16;
 
1112
                }
 
1113
                else {
 
1114
                        tab->tab_index_header_size = XT_GET_DISK_4(tab->tab_index_head->tp_header_size_4);
 
1115
                        tab->tab_index_page_size = XT_GET_DISK_4(index_fmt->if_page_size_4);
 
1116
                }       
 
1117
 
 
1118
#ifdef XT_USE_LAZY_DELETE
 
1119
                if (tab->tab_dic.dic_index_ver <= XT_IND_NO_LAZY_DELETE)
 
1120
                        tab->tab_dic.dic_no_lazy_delete = TRUE;
 
1121
                else
 
1122
                        tab->tab_dic.dic_no_lazy_delete = FALSE;
 
1123
#else
 
1124
                tab->tab_dic.dic_no_lazy_delete = TRUE;
 
1125
#endif
 
1126
 
 
1127
                /* Incorrect version of index is handled by allowing a sequential scan, but no index access.
 
1128
                 * Recovery with the wrong index type will not recover the indexes, a REPAIR TABLE
 
1129
                 * will be required!
 
1130
                 */
 
1131
                if (tab->tab_dic.dic_index_ver != XT_IND_CURRENT_VERSION) {
 
1132
                        switch (tab->tab_dic.dic_index_ver) {
 
1133
                                case XT_IND_NO_LAZY_DELETE:
 
1134
                                case XT_IND_LAZY_DELETE_OK:
 
1135
                                        /* I can handle this type of index. */
 
1136
                                        break;
 
1137
                                default:
 
1138
                                        if (tab->tab_dic.dic_index_ver < XT_IND_CURRENT_VERSION)
 
1139
                                                xt_tab_disable_index(tab, XT_INDEX_TOO_OLD);
 
1140
                                        else
 
1141
                                                xt_tab_disable_index(tab, XT_INDEX_TOO_NEW);
 
1142
                                        break;
 
1143
                        }
 
1144
                }
 
1145
                else if (tab->tab_index_page_size != XT_INDEX_PAGE_SIZE)
 
1146
                        xt_tab_disable_index(tab, XT_INDEX_BAD_BLOCK);
 
1147
        }
 
1148
        else {
 
1149
                memset(tab->tab_index_head, 0, XT_INDEX_HEAD_SIZE);
 
1150
                xt_tab_disable_index(tab, XT_INDEX_MISSING);
 
1151
                tab->tab_index_header_size = XT_INDEX_HEAD_SIZE;
 
1152
                tab->tab_index_page_size = XT_INDEX_PAGE_SIZE;
 
1153
                tab->tab_dic.dic_index_ver = 0;
 
1154
                tab->tab_index_format_offset = 0;
 
1155
        }
 
1156
 
 
1157
        
 
1158
        if (tab->tab_dic.dic_disable_index) {
 
1159
                xt_tab_set_index_error(tab);
 
1160
                xt_log_and_clear_exception_ns();
 
1161
        }
 
1162
 
 
1163
        if (tab->tab_dic.dic_disable_index) {
 
1164
                /* Reset, as if we have empty indexes.
 
1165
                 * Flush will wipe things out, of course.
 
1166
                 * REPAIR TABLE will be required...
 
1167
                 */
 
1168
                XT_NODE_ID(tab->tab_ind_eof) = 1;
 
1169
                XT_NODE_ID(tab->tab_ind_free) = 0;
 
1170
 
 
1171
                ind = tab->tab_dic.dic_keys;
 
1172
                for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++)
 
1173
                        XT_NODE_ID((*ind)->mi_root) = 0;
 
1174
        }
 
1175
        else {
 
1176
                XT_NODE_ID(tab->tab_ind_eof) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_eof_6);
 
1177
                XT_NODE_ID(tab->tab_ind_free) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_free_6);
 
1178
 
 
1179
                data = tab->tab_index_head->tp_data;
 
1180
                ind = tab->tab_dic.dic_keys;
 
1181
                for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
 
1182
                        (*ind)->mi_root = XT_GET_NODE_REF(tab, data);
 
1183
                        data += XT_NODE_REF_SIZE;
 
1184
                }
 
1185
        }
 
1186
}
 
1187
 
 
1188
static void tab_load_table_format(XTThreadPtr self, XTOpenFilePtr file, XTPathStrPtr table_name, size_t *ret_format_offset, size_t *ret_head_size, XTDictionaryPtr dic)
 
1189
{
 
1190
        XTDiskValue4            size_buf;
 
1191
        size_t                          head_size;
 
1192
        XTTableFormatDRec       tab_fmt;
 
1193
        size_t                          fmt_size;
 
1194
 
 
1195
        if (!xt_pread_file(file, 0, 4, 4, &size_buf, NULL, &self->st_statistics.st_rec, self))
 
1196
                xt_throw(self);
 
1197
 
 
1198
        head_size = XT_GET_DISK_4(size_buf);
 
1199
        *ret_format_offset = head_size;
 
1200
 
 
1201
        /* Load the table format information: */
 
1202
        if (!xt_pread_file(file, head_size, offsetof(XTTableFormatDRec, tf_definition), offsetof(XTTableFormatDRec, tf_tab_version_2) + 2, &tab_fmt, NULL, &self->st_statistics.st_rec, self))
 
1203
                xt_throw(self);
 
1204
 
 
1205
        /* If the table version is less than or equal to an incompatible (unsupported
 
1206
         * version), or greater than the current version, then we cannot open this table
 
1207
         */
 
1208
        if (XT_GET_DISK_2(tab_fmt.tf_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
 
1209
                XT_GET_DISK_2(tab_fmt.tf_tab_version_2) > XT_TAB_CURRENT_VERSION) {
 
1210
                switch (XT_GET_DISK_2(tab_fmt.tf_tab_version_2)) {
 
1211
                        case 4: 
 
1212
                                xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
 
1213
                                break;
 
1214
                        case 3: 
 
1215
                                xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
 
1216
                                break;
 
1217
                        default:
 
1218
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
 
1219
                                break;
 
1220
                }
 
1221
                return;
 
1222
        }
 
1223
 
 
1224
        fmt_size = XT_GET_DISK_4(tab_fmt.tf_format_size_4);
 
1225
        *ret_head_size = XT_GET_DISK_4(tab_fmt.tf_tab_head_size_4);
 
1226
        dic->dic_rec_size = XT_GET_DISK_4(tab_fmt.tf_rec_size_4);
 
1227
        dic->dic_rec_fixed = XT_GET_DISK_1(tab_fmt.tf_rec_fixed_1);
 
1228
        dic->dic_min_auto_inc = XT_GET_DISK_8(tab_fmt.tf_min_auto_inc_8);
 
1229
        if (fmt_size > offsetof(XTTableFormatDRec, tf_definition)) {
 
1230
                size_t  def_size = fmt_size - offsetof(XTTableFormatDRec, tf_definition);
 
1231
                char    *def_sql;
 
1232
 
 
1233
                pushsr_(def_sql, xt_free, (char *) xt_malloc(self, def_size));
 
1234
                if (!xt_pread_file(file, head_size+offsetof(XTTableFormatDRec, tf_definition), def_size, def_size, def_sql, NULL, &self->st_statistics.st_rec, self))
 
1235
                        xt_throw(self);
 
1236
                dic->dic_table = xt_ri_create_table(self, false, table_name, def_sql, myxt_create_table_from_table(self, dic->dic_my_table), NULL);
 
1237
                freer_(); // xt_free(def_sql)
 
1238
        }
 
1239
        else
 
1240
                dic->dic_table = myxt_create_table_from_table(self, dic->dic_my_table);
 
1241
}
 
1242
 
 
1243
static void tab_load_table_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file)
 
1244
{
 
1245
        XTTableHeadDRec rec_head;
 
1246
 
 
1247
        if (!xt_pread_file(file, 0, sizeof(XTTableHeadDRec), sizeof(XTTableHeadDRec), (xtWord1 *) &rec_head, NULL, &self->st_statistics.st_rec, self))
 
1248
                xt_throw(self);
 
1249
 
 
1250
        tab->tab_head_op_seq = XT_GET_DISK_4(rec_head.th_op_seq_4);
 
1251
        tab->tab_head_row_free_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_free_6);
 
1252
        tab->tab_head_row_eof_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_eof_6);
 
1253
        tab->tab_head_row_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_row_fnum_6);
 
1254
        tab->tab_head_rec_free_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_free_6);
 
1255
        tab->tab_head_rec_eof_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_eof_6);
 
1256
        tab->tab_head_rec_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_rec_fnum_6);
 
1257
        tab->tab_wr_op_seq = tab->tab_head_op_seq;
 
1258
}
 
1259
 
 
1260
xtPublic void xt_tab_store_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
 
1261
{
 
1262
        XTTableHPtr tab = ot->ot_table;
 
1263
 
 
1264
        XT_SET_DISK_4(rec_head->th_op_seq_4, tab->tab_head_op_seq);
 
1265
        XT_SET_DISK_6(rec_head->th_row_free_6, tab->tab_head_row_free_id);
 
1266
        XT_SET_DISK_6(rec_head->th_row_eof_6, tab->tab_head_row_eof_id);
 
1267
        XT_SET_DISK_6(rec_head->th_row_fnum_6, tab->tab_head_row_fnum);
 
1268
        XT_SET_DISK_6(rec_head->th_rec_free_6, tab->tab_head_rec_free_id);
 
1269
        XT_SET_DISK_6(rec_head->th_rec_eof_6, tab->tab_head_rec_eof_id);
 
1270
        XT_SET_DISK_6(rec_head->th_rec_fnum_6, tab->tab_head_rec_fnum);
 
1271
}
 
1272
 
 
1273
static xtBool tab_write_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
 
1274
{
 
1275
        if (!xt_tab_write_rec(ot, offsetof(XTTableHeadDRec, th_op_seq_4), 40, (xtWord1 *) rec_head->th_op_seq_4))
 
1276
                return FAILED;
 
1277
        if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
 
1278
                return FAILED;
 
1279
        return OK;
 
1280
}
 
1281
 
 
1282
xtPublic xtBool xt_tab_write_min_auto_inc(XTOpenTablePtr ot)
 
1283
{
 
1284
        xtWord1         value[8];
 
1285
        off_t           offset;
 
1286
 
 
1287
        XT_SET_DISK_8(value, ot->ot_table->tab_dic.dic_min_auto_inc);
 
1288
        offset = ot->ot_table->tab_table_format_offset + offsetof(XTTableFormatDRec, tf_min_auto_inc_8);
 
1289
        if (!xt_tab_write_rec(ot, offset, 8, value))
 
1290
                return FAILED;
 
1291
        if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
 
1292
                return FAILED;
 
1293
        return OK;
 
1294
}
 
1295
 
 
1296
/* a helper function to remove table from the open tables hash on exception
 
1297
 * used in tab_new_handle() below
 
1298
 */
 
1299
 #ifdef NO_LONGER_REQ
 
1300
static void xt_del_from_db_tables_ht(XTThreadPtr self, XTTableHPtr tab)
 
1301
{
 
1302
        XTTableEntryPtr te_ptr;
 
1303
        XTDatabaseHPtr  db = tab->tab_db;
 
1304
        xtTableID               tab_id = tab->tab_id;
 
1305
 
 
1306
        /* Oops! should use tab->tab_name, instead of tab! */
 
1307
        xt_ht_del(self, db->db_tables, tab->tab_name);
 
1308
 
 
1309
        /* Remove the reference from the ID list, when a table is
 
1310
         * removed from the table name list:
 
1311
         */
 
1312
        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id)))
 
1313
                te_ptr->te_table = NULL;
 
1314
}
 
1315
#endif
 
1316
 
 
1317
xtPublic XTFileType xt_rec_file_type(xtBool heap_tab)
 
1318
{
 
1319
        if (heap_tab)
 
1320
                return XT_FT_HEAP;
 
1321
        if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
 
1322
                return XT_FT_REWRITE_FLUSH;
 
1323
        return XT_REC_FILE_TYPE;
 
1324
}
 
1325
 
 
1326
xtPublic XTFileType xt_row_file_type(xtBool heap_tab)
 
1327
{
 
1328
        if (heap_tab)
 
1329
                return XT_FT_HEAP;
 
1330
        if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
 
1331
                return XT_FT_REWRITE_FLUSH;
 
1332
        return XT_ROW_FILE_TYPE;
 
1333
}
 
1334
 
 
1335
xtPublic XTFileType xt_ind_file_type(xtBool heap_tab)
 
1336
{
 
1337
        if (heap_tab)
 
1338
                return XT_FT_HEAP;
 
1339
        if (XT_IND_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
 
1340
                return XT_FT_REWRITE_FLUSH;
 
1341
        return XT_IND_FILE_TYPE;
 
1342
}
 
1343
 
 
1344
#ifdef XT_SORT_REC_WRITES
 
1345
static int tab_cmp_dw_rec_id(struct XTThread *XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
 
1346
{
 
1347
        xtRecordID              rec_id = *((xtRecordID *) a);
 
1348
        XTDelayWritePtr dw_ptr = (XTDelayWritePtr) b;
 
1349
 
 
1350
        if (rec_id == dw_ptr->dw_rec_id)
 
1351
                return 0;
 
1352
        if (rec_id < dw_ptr->dw_rec_id)
 
1353
                return -1;
 
1354
        return 1;
 
1355
}
 
1356
#endif
 
1357
 
 
1358
/*
 
1359
 * Create a new table handle (i.e. open a table).
 
1360
 * Return NULL if the table is missing, and it is OK for the table
 
1361
 * to be missing.
 
1362
 */
 
1363
static int tab_new_handle(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id, XTPathStrPtr tab_path, xtBool missing_ok, XTDictionaryPtr dic)
 
1364
{
 
1365
        char                    path[PATH_MAX];
 
1366
        XTTableHPtr             tab;
 
1367
        char                    file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
 
1368
        XTOpenFilePtr   of_rec, of_ind;
 
1369
        XTTableEntryPtr te_ptr;
 
1370
        size_t                  tab_format_offset;
 
1371
        size_t                  tab_head_size = 0;
 
1372
 
 
1373
        enter_();
 
1374
 
 
1375
        te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
 
1376
        ASSERT(te_ptr);
 
1377
 
 
1378
        tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
 
1379
        pushr_(xt_heap_release, tab);
 
1380
 
 
1381
        tab->tab_name = (XTPathStrPtr) xt_dup_string(self, tab_path->ps_path);
 
1382
        tab->tab_db = db;
 
1383
        tab->tab_id = tab_id;
 
1384
        tab->tab_dic.dic_table_type = te_ptr->te_type;
 
1385
#ifdef TRACE_TABLE_IDS
 
1386
        PRINTF("%s: allocated TABLE: db=%d tab=%d %s\n", self->t_name, (int) db->db_id, (int) tab->tab_id, xt_last_2_names_of_path(tab->tab_name->ps_path));
 
1387
#endif
 
1388
 
 
1389
        if (dic) {
 
1390
                myxt_move_dictionary(&tab->tab_dic, dic);
 
1391
                myxt_setup_dictionary(self, &tab->tab_dic);
 
1392
        }
 
1393
        else {
 
1394
                if (!myxt_load_dictionary(self, &tab->tab_dic, db, tab_path)) {
 
1395
                        freer_(); // xt_heap_release(tab)
 
1396
                        return_(XT_TAB_NO_DICTIONARY);
 
1397
                }
 
1398
        }
 
1399
 
 
1400
        /* Do not use the XT_TF_DDL_TEMP_TABLE bit from the given dic
 
1401
         * This bit depends only on the
 
1402
         * name of the table, and must be set explicitly.
 
1403
         */
 
1404
        if (myxt_temp_table_name(tab_path->ps_path))
 
1405
                tab->tab_dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
 
1406
        else
 
1407
                tab->tab_dic.dic_tab_flags &= ~XT_TF_DDL_TEMP_TABLE;
 
1408
 
 
1409
        tab->tab_seq.xt_op_seq_init(self);
 
1410
        xt_spinlock_init_with_autoname(self, &tab->tab_ainc_lock);
 
1411
        xt_init_mutex_with_autoname(self, &tab->tab_rec_flush_lock);
 
1412
        xt_init_mutex_with_autoname(self, &tab->tab_ind_flush_lock);
 
1413
        xt_init_mutex_with_autoname(self, &tab->tab_ind_stat_lock);
 
1414
        xt_init_mutex_with_autoname(self, &tab->tab_dic_field_lock);
 
1415
        xt_init_mutex_with_autoname(self, &tab->tab_row_lock);
 
1416
        xt_init_mutex_with_autoname(self, &tab->tab_ind_lock);
 
1417
        xt_init_mutex_with_autoname(self, &tab->tab_rec_lock);
 
1418
        xt_spinlock_init_with_autoname(self, &tab->tab_mem_lock);
 
1419
        if (!(tab->tab_rec_flush_task = new XTFlushRecRowTask()))
 
1420
                xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
 
1421
        tab->tab_rec_flush_task->tk_init(self);
 
1422
        tab->tab_rec_flush_task->frt_table = tab;
 
1423
        if (!(tab->tab_ind_flush_task = new XTFlushIndexTask()))
 
1424
                xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
 
1425
        tab->tab_ind_flush_task->tk_init(self);
 
1426
        tab->tab_ind_flush_task->fit_table = tab;
 
1427
        for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
 
1428
                XT_TAB_ROW_INIT_LOCK(self, &tab->tab_row_rwlock[i]);
 
1429
        tab->tab_free_locks = TRUE;
 
1430
 
 
1431
        xt_strcpy(PATH_MAX, path, tab_path->ps_path);
 
1432
        xt_remove_last_name_of_path(path);
 
1433
        tab_get_row_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
 
1434
        xt_strcat(PATH_MAX, path, file_name);
 
1435
        tab->tab_row_file = xt_fs_get_file(self, path, xt_row_file_type(te_ptr->te_heap_tab));
 
1436
 
 
1437
        xt_remove_last_name_of_path(path);
 
1438
        tab_get_data_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
 
1439
        xt_strcat(PATH_MAX, path, file_name);
 
1440
        tab->tab_rec_file = xt_fs_get_file(self, path, xt_rec_file_type(te_ptr->te_heap_tab));
 
1441
 
 
1442
        xt_remove_last_name_of_path(path);
 
1443
        tab_get_index_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
 
1444
        xt_strcat(PATH_MAX, path, file_name);
 
1445
        tab->tab_ind_file = xt_fs_get_file(self, path, xt_ind_file_type(te_ptr->te_heap_tab));
 
1446
 
 
1447
        if (te_ptr->te_heap_tab) {
 
1448
                XTOpenFilePtr   of_row;
 
1449
 
 
1450
                tab->tab_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
 
1451
                of_row = xt_open_file(self, tab->tab_row_file->fil_path, xt_row_file_type(TRUE), XT_FS_CREATE, xt_db_row_file_grow_size);
 
1452
                pushr_(xt_close_file, of_row);
 
1453
                if (xt_seek_eof_file(self, of_row) == 0)
 
1454
                        tab_init_row_file(self, of_row, tab, &tab->tab_dic);
 
1455
                freer_(); // xt_close_file(of_row)
 
1456
 
 
1457
                of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(TRUE), XT_FS_CREATE, XT_INDEX_PAGE_SIZE*256);
 
1458
                if (xt_seek_eof_file(self, of_ind) == 0)
 
1459
                        tab_init_ind_file(self, of_ind, tab, &tab->tab_dic);
 
1460
                pushr_(xt_close_file, of_ind);
 
1461
                tab_load_index_header(self, tab, of_ind, tab_path);
 
1462
                freer_(); // xt_close_file(of_ind)
 
1463
 
 
1464
                of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(te_ptr->te_heap_tab), XT_FS_CREATE, xt_db_data_file_grow_size);
 
1465
                pushr_(xt_close_file, of_rec);
 
1466
                if (xt_seek_eof_file(self, of_rec) == 0)
 
1467
                        tab_init_data_file(self, of_rec, tab, &tab->tab_dic, 0, NULL);
 
1468
        }
 
1469
        else {
 
1470
#ifdef XT_SORT_REC_WRITES
 
1471
                tab->tab_rec_dw_writes = xt_new_sortedlist(self, sizeof(XTDelayWriteRec), 20, 10, tab_cmp_dw_rec_id, NULL, NULL, TRUE, FALSE);
 
1472
#endif
 
1473
                of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(FALSE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
 
1474
                if (of_ind) {
 
1475
                        pushr_(xt_close_file, of_ind);
 
1476
                        tab_load_index_header(self, tab, of_ind, tab_path);
 
1477
                        freer_(); // xt_close_file(of_ind)
 
1478
                }
 
1479
                else
 
1480
                        tab_load_index_header(self, tab, of_ind, tab_path);
 
1481
 
 
1482
                of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(FALSE), missing_ok ? XT_FS_MISSING_OK : XT_FS_DEFAULT, xt_db_data_file_grow_size);
 
1483
                if (!of_rec) {
 
1484
                        freer_(); // xt_heap_release(tab)
 
1485
                        return_(XT_TAB_NOT_FOUND);
 
1486
                }
 
1487
                pushr_(xt_close_file, of_rec);
 
1488
        }
 
1489
 
 
1490
        tab_load_table_format(self, of_rec, tab_path, &tab_format_offset, &tab_head_size, &tab->tab_dic);
 
1491
        tab->tab_table_format_offset = tab_format_offset;
 
1492
        tab->tab_table_head_size = tab_head_size;
 
1493
        tab->tab_dic.dic_table->dt_table = tab;
 
1494
        tab_load_table_header(self, tab, of_rec);
 
1495
        freer_(); // xt_close_file(of_rec)
 
1496
 
 
1497
        tab->tab_seq.xt_op_seq_set(self, tab->tab_wr_op_seq+1);
 
1498
        tab->tab_row_eof_id = tab->tab_head_row_eof_id;
 
1499
        tab->tab_row_free_id = tab->tab_head_row_free_id;
 
1500
        tab->tab_row_fnum = tab->tab_head_row_fnum;
 
1501
        tab->tab_rec_eof_id = tab->tab_head_rec_eof_id;
 
1502
        tab->tab_rec_free_id = tab->tab_head_rec_free_id;
 
1503
        tab->tab_rec_fnum = tab->tab_head_rec_fnum;
 
1504
 
 
1505
        tab->tab_rows.xt_tc_setup(tab, FALSE, sizeof(XTTabRowHeadDRec), sizeof(XTTabRowRefDRec));
 
1506
        tab->tab_recs.xt_tc_setup(tab, TRUE, tab_head_size, tab->tab_dic.dic_rec_size);
 
1507
 
 
1508
        xt_xres_init_tab(self, tab);
 
1509
 
 
1510
        if (!xt_init_row_locks(&tab->tab_locks))
 
1511
                xt_throw(self);
 
1512
 
 
1513
        xt_heap_set_release_callback(tab, tab_onrelease);
 
1514
 
 
1515
        tab->tab_repair_pending = xt_tab_is_table_repair_pending(tab);
 
1516
 
 
1517
        popr_(); // Discard xt_heap_release(tab)
 
1518
 
 
1519
        xt_ht_put(self, db->db_tables, tab);
 
1520
 
 
1521
        /* Add a reference to the ID list, when a table is
 
1522
         * added to the table name list:
 
1523
         */
 
1524
        te_ptr->te_table = tab;
 
1525
 
 
1526
    /* Moved from after xt_init_row_locks() above, so that calling
 
1527
     * xt_use_table_no_lock() with no_load == FALSE from attachReferences()
 
1528
     * will work if we have cyclic foreign key references.
 
1529
     */ 
 
1530
        if (tab->tab_dic.dic_table) {
 
1531
                try_(a) {
 
1532
                        tab->tab_dic.dic_table->attachReferences(self, db);
 
1533
                }
 
1534
                catch_(a) {
 
1535
                        /* Errors are thrown when: set foreign_key_checks = 1 */
 
1536
                        /* Undo everything done above: */
 
1537
                        xt_ht_del(self, db->db_tables, tab->tab_name);
 
1538
                        xt_throw(self);
 
1539
                }
 
1540
                cont_(a);
 
1541
        }
 
1542
 
 
1543
        *r_tab = tab;
 
1544
        return_(XT_TAB_OK);
 
1545
}
 
1546
 
 
1547
/*
 
1548
 * Get a reference to a table in the current database. The table reference is valid,
 
1549
 * as long as the thread is using the database!!!
 
1550
 */
 
1551
xtPublic XTTableHPtr xt_use_table_no_lock(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
 
1552
{
 
1553
        XTTableHPtr tab;
 
1554
 
 
1555
        if (!db)
 
1556
                xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
 
1557
 
 
1558
        tab = (XTTableHPtr) xt_ht_get(self, db->db_tables, name);
 
1559
        if (!tab && !no_load) {
 
1560
                xtTableID       tab_id = 0;
 
1561
 
 
1562
                if (!tab_find_table(self, db, name, &tab_id)) {
 
1563
                        if (missing_ok)
 
1564
                                return NULL;
 
1565
                        xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
 
1566
                }
 
1567
 
 
1568
                switch (tab_new_handle(self, &tab, db, tab_id, name, missing_ok, dic)) {
 
1569
                        case XT_TAB_NO_DICTIONARY:
 
1570
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, name);
 
1571
                        case XT_TAB_POOL_CLOSED:
 
1572
                                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
 
1573
                        case XT_TAB_NOT_FOUND:
 
1574
                                if (missing_ok)
 
1575
                                        return NULL;
 
1576
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
 
1577
                        default:
 
1578
                                break;
 
1579
                }
 
1580
        }
 
1581
        
 
1582
        if (tab)
 
1583
                xt_heap_reference(self, tab);
 
1584
 
 
1585
        return tab;
 
1586
}
 
1587
 
 
1588
xtPublic XTTableHPtr xt_use_table_no_lock_ns(struct XTDatabase *db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
 
1589
{
 
1590
        XTTableHPtr     tab;
 
1591
        XTThreadPtr     self = xt_get_self();
 
1592
 
 
1593
        try_(a) {
 
1594
                tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, dic);
 
1595
        }
 
1596
        catch_(a) {
 
1597
                tab = NULL;
 
1598
        }
 
1599
        cont_(a);
 
1600
        return tab;
 
1601
}
 
1602
 
 
1603
static void tab_close_table(XTOpenTablePtr ot)
 
1604
{
 
1605
        xt_ind_free_reserved(ot);
 
1606
 
 
1607
        if (ot->ot_rec_file) {
 
1608
                XT_CLOSE_RR_FILE_NS(ot->ot_rec_file);
 
1609
                ot->ot_rec_file = NULL;
 
1610
                
 
1611
        }
 
1612
        if (ot->ot_ind_file) {
 
1613
                xt_close_file_ns(ot->ot_ind_file);
 
1614
                ot->ot_ind_file = NULL;
 
1615
                
 
1616
        }
 
1617
        if (ot->ot_row_file) {
 
1618
                XT_CLOSE_RR_FILE_NS(ot->ot_row_file);
 
1619
                ot->ot_row_file = NULL;
 
1620
                
 
1621
        }
 
1622
        if (ot->ot_table) {
 
1623
                xt_heap_release(xt_get_self(), ot->ot_table);
 
1624
                ot->ot_table = NULL;
 
1625
        }
 
1626
        if (ot->ot_ind_rhandle) {
 
1627
                xt_ind_release_handle(ot->ot_ind_rhandle, FALSE, ot->ot_thread);
 
1628
                ot->ot_ind_rhandle = NULL;
 
1629
        }
 
1630
        if (ot->ot_row_rbuffer) {
 
1631
                xt_free_ns(ot->ot_row_rbuffer);
 
1632
                ot->ot_row_rbuf_size = 0;
 
1633
                ot->ot_row_rbuffer = NULL;
 
1634
        }
 
1635
        if (ot->ot_row_wbuffer) {
 
1636
                xt_free_ns(ot->ot_row_wbuffer);
 
1637
                ot->ot_row_wbuf_size = 0;
 
1638
                ot->ot_row_wbuffer = NULL;
 
1639
        }
 
1640
#ifdef XT_TRACK_RETURNED_ROWS
 
1641
        if (ot->ot_rows_returned) {
 
1642
                xt_free_ns(ot->ot_rows_returned);
 
1643
                ot->ot_rows_returned = NULL;
 
1644
        }
 
1645
        ot->ot_rows_ret_curr = 0;
 
1646
        ot->ot_rows_ret_max = 0;
 
1647
#endif
 
1648
        xt_free(NULL, ot);
 
1649
}
 
1650
 
 
1651
static void tab_delete_table_files(XTThreadPtr self, XTPathStrPtr tab_name, xtTableID tab_id)
 
1652
{
 
1653
        XTFilesOfTableRec       ft;
 
1654
 
 
1655
        xt_enum_files_of_tables_init(tab_name, tab_id, &ft);
 
1656
        while (xt_enum_files_of_tables_next(&ft)) {
 
1657
                if (!xt_fs_delete(NULL, ft.ft_file_path))
 
1658
                        xt_log_and_clear_exception(self);
 
1659
        }
 
1660
}
 
1661
 
 
1662
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr XT_UNUSED(dic))
 
1663
{
 
1664
        XTTabRowHeadDRec        row_head;
 
1665
 
 
1666
        tab->tab_row_eof_id = 1;
 
1667
        tab->tab_row_free_id = 0;
 
1668
        tab->tab_row_fnum = 0;
 
1669
 
 
1670
        tab->tab_head_row_eof_id = 1;
 
1671
        tab->tab_head_row_free_id = 0;
 
1672
        tab->tab_head_row_fnum  = 0;
 
1673
 
 
1674
        XT_SET_DISK_4(row_head.rh_magic_4, XT_TAB_ROW_MAGIC);
 
1675
        if (!xt_pwrite_file(of_row, 0, sizeof(row_head), &row_head, &self->st_statistics.st_rec, self))
 
1676
                xt_throw(self);
 
1677
}
 
1678
 
 
1679
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def)
 
1680
{
 
1681
        off_t                           eof;
 
1682
        XTTableHeadDRec         rec_head;
 
1683
        XTTableFormatDRec       table_fmt;
 
1684
 
 
1685
        /* Calculate the offset of the first record in the data handle file. */
 
1686
        eof = sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition) + def_len + XT_FORMAT_DEF_SPACE;
 
1687
        eof = (eof + 1024 - 1) / 1024 * 1024;           // Round to a value divisible by 1024
 
1688
 
 
1689
        tab->tab_table_format_offset = sizeof(XTTableHeadDRec);
 
1690
        tab->tab_table_head_size = (size_t) eof;
 
1691
 
 
1692
        tab->tab_rec_eof_id = 1;                                                // This is the first record ID!
 
1693
        tab->tab_rec_free_id = 0;
 
1694
        tab->tab_rec_fnum = 0;
 
1695
        
 
1696
        tab->tab_head_rec_eof_id = 1;                                   // The first record ID
 
1697
        tab->tab_head_rec_free_id = 0;
 
1698
        tab->tab_head_rec_fnum = 0;
 
1699
 
 
1700
        tab->tab_dic.dic_rec_size = dic->dic_rec_size;
 
1701
        tab->tab_dic.dic_rec_fixed = dic->dic_rec_fixed;
 
1702
        tab->tab_dic.dic_tab_flags = dic->dic_tab_flags;
 
1703
        tab->tab_dic.dic_min_auto_inc = dic->dic_min_auto_inc;
 
1704
        tab->tab_dic.dic_def_ave_row_size = dic->dic_def_ave_row_size;
 
1705
        tab->tab_dic.dic_table_type = dic->dic_table_type;
 
1706
 
 
1707
        XT_SET_DISK_4(rec_head.th_head_size_4, sizeof(XTTableHeadDRec));
 
1708
        XT_SET_DISK_4(rec_head.th_op_seq_4, tab->tab_head_op_seq);
 
1709
        XT_SET_DISK_6(rec_head.th_row_free_6, tab->tab_head_row_free_id);
 
1710
        XT_SET_DISK_6(rec_head.th_row_eof_6, tab->tab_head_row_eof_id);
 
1711
        XT_SET_DISK_6(rec_head.th_row_fnum_6, tab->tab_head_row_fnum);
 
1712
        XT_SET_DISK_6(rec_head.th_rec_free_6, tab->tab_head_rec_free_id);
 
1713
        XT_SET_DISK_6(rec_head.th_rec_eof_6, tab->tab_head_rec_eof_id);
 
1714
        XT_SET_DISK_6(rec_head.th_rec_fnum_6, tab->tab_head_rec_fnum);
 
1715
 
 
1716
        if (!xt_pwrite_file(of_rec, 0, sizeof(XTTableHeadDRec), &rec_head, &self->st_statistics.st_rec, self))
 
1717
                xt_throw(self);
 
1718
 
 
1719
        /* Store the table format: */
 
1720
        memset(&table_fmt, 0, offsetof(XTTableFormatDRec, tf_definition));
 
1721
        XT_SET_DISK_4(table_fmt.tf_format_size_4, offsetof(XTTableFormatDRec, tf_definition) + def_len);
 
1722
        XT_SET_DISK_4(table_fmt.tf_tab_head_size_4, eof);
 
1723
        XT_SET_DISK_2(table_fmt.tf_tab_version_2, XT_TAB_CURRENT_VERSION);
 
1724
        XT_SET_DISK_4(table_fmt.tf_rec_size_4, tab->tab_dic.dic_rec_size);
 
1725
        XT_SET_DISK_1(table_fmt.tf_rec_fixed_1, tab->tab_dic.dic_rec_fixed);
 
1726
        XT_SET_DISK_2(table_fmt.tf_tab_unused_2, 0);
 
1727
        XT_SET_DISK_8(table_fmt.tf_min_auto_inc_8, tab->tab_dic.dic_min_auto_inc);
 
1728
 
 
1729
        if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec), offsetof(XTTableFormatDRec, tf_definition), &table_fmt, &self->st_statistics.st_rec, self))
 
1730
                xt_throw(self);
 
1731
        if (def_len) {
 
1732
                if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition), def_len, tab_def->sb_cstring, &self->st_statistics.st_rec, self))
 
1733
                        xt_throw(self);
 
1734
        }
 
1735
}
 
1736
 
 
1737
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic)
 
1738
{
 
1739
        XTIndexFormatDPtr       index_fmt;
 
1740
 
 
1741
        /* This is the size of the index header: */
 
1742
        tab->tab_index_format_offset = offsetof(XTIndexHeadDRec, tp_data) + dic->dic_key_count * XT_NODE_REF_SIZE;
 
1743
        if (!(tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc_ns(XT_INDEX_HEAD_SIZE)))
 
1744
                xt_throw(self);
 
1745
 
 
1746
        XT_NODE_ID(tab->tab_ind_eof) = 1;
 
1747
        XT_NODE_ID(tab->tab_ind_free) = 0;
 
1748
 
 
1749
        XT_SET_DISK_4(tab->tab_index_head->tp_header_size_4, XT_INDEX_HEAD_SIZE);
 
1750
        XT_SET_DISK_4(tab->tab_index_head->tp_format_offset_4, tab->tab_index_format_offset);
 
1751
        XT_SET_DISK_6(tab->tab_index_head->tp_ind_eof_6, XT_NODE_ID(tab->tab_ind_eof));
 
1752
        XT_SET_DISK_6(tab->tab_index_head->tp_ind_free_6, XT_NODE_ID(tab->tab_ind_free));
 
1753
 
 
1754
        /* Store the index format: */
 
1755
        index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
 
1756
        XT_SET_DISK_4(index_fmt->if_format_size_4, sizeof(XTIndexFormatDRec));
 
1757
        XT_SET_DISK_2(index_fmt->if_tab_version_2, XT_TAB_CURRENT_VERSION);
 
1758
        XT_SET_DISK_2(index_fmt->if_ind_version_2, XT_IND_CURRENT_VERSION);
 
1759
        XT_SET_DISK_1(index_fmt->if_node_ref_size_1, XT_NODE_REF_SIZE);
 
1760
        XT_SET_DISK_1(index_fmt->if_rec_ref_size_1, XT_RECORD_REF_SIZE);
 
1761
        XT_SET_DISK_4(index_fmt->if_page_size_4, XT_INDEX_PAGE_SIZE);
 
1762
 
 
1763
        /* Save the header: */
 
1764
        if (!xt_pwrite_file(of_ind, 0, XT_INDEX_HEAD_SIZE, tab->tab_index_head, &self->st_statistics.st_ind, self))
 
1765
                xt_throw(self);
 
1766
}
 
1767
 
 
1768
xtPublic void xt_create_table(XTThreadPtr self, XTPathStrPtr name, XTDictionaryPtr dic)
 
1769
{
 
1770
        char                            table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
 
1771
        char                            path[PATH_MAX];
 
1772
        XTDatabaseHPtr          db = self->st_database;
 
1773
        XTOpenTablePoolPtr      table_pool;
 
1774
        XTTableHPtr                     tab;
 
1775
        XTTableHPtr                     old_tab = NULL;
 
1776
        xtTableID                       old_tab_id = 0;
 
1777
        xtTableID                       tab_id = 0;
 
1778
        XTStringBufferRec       tab_def = { 0, 0, 0 };
 
1779
        XTTableEntryRec         te_tab;
 
1780
        XTSortedListInfoRec     li_undo;
 
1781
 
 
1782
#ifdef TRACE_CREATE_TABLES
 
1783
        printf("CREATE %s\n", name->ps_path);
 
1784
#endif
 
1785
        enter_();
 
1786
        if (strlen(xt_last_name_of_path(name->ps_path)) > XT_TABLE_NAME_SIZE-1)
 
1787
                xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, name);
 
1788
        if (!db)
 
1789
                xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
 
1790
 
 
1791
        /* Lock to prevent table list change during creation. */
 
1792
        table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, name, FALSE, TRUE, TRUE, &old_tab);
 
1793
        pushr_(xt_db_unlock_table_pool, table_pool);
 
1794
        xt_ht_lock(self, db->db_tables);
 
1795
        pushr_(xt_ht_unlock, db->db_tables);
 
1796
        pushr_(xt_heap_release, old_tab);
 
1797
 
 
1798
        /* This must be done before we remove the old table
 
1799
         * from the directory, or we will not be able
 
1800
         * to find the table, which could is require
 
1801
         * for TRUNCATE!
 
1802
         */
 
1803
        if (xt_sl_get_size(db->db_table_by_id) >= XT_MAX_TABLES)
 
1804
                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TOO_MANY_TABLES, (u_long) XT_MAX_TABLES);
 
1805
 
 
1806
        tab_id = db->db_curr_tab_id + 1;                
 
1807
 
 
1808
        if (old_tab) {
 
1809
                old_tab_id = old_tab->tab_id;           
 
1810
                xt_dl_delete_ext_data(self, old_tab, FALSE, TRUE);
 
1811
                freer_(); // xt_heap_release(self, old_tab)
 
1812
 
 
1813
                /* For the Windows version this must be done before we
 
1814
                 * start to delete the underlying files!
 
1815
                 */
 
1816
                tab_close_files(self, old_tab);
 
1817
 
 
1818
                tab_delete_table_files(self, name, old_tab_id);
 
1819
 
 
1820
                /* Remove the PBMS table: */
 
1821
                ASSERT(xt_get_self() == self);
 
1822
 
 
1823
                /* Remove the table from the directory. It will get a new
 
1824
                 * ID so the handle in the directory will no longer be valid.
 
1825
                 */
 
1826
                xt_ht_del(self, db->db_tables, name);
 
1827
        }
 
1828
        else {
 
1829
                freer_(); // xt_heap_release(self, old_tab)
 
1830
        }
 
1831
 
 
1832
        /* Add the table to the directory, well remove on error! */
 
1833
        li_undo.li_sl = db->db_table_by_id;
 
1834
        li_undo.li_key = &tab_id;
 
1835
        te_tab.te_tab_id = tab_id;
 
1836
        te_tab.te_heap_tab = dic->dic_tab_flags & XT_TF_MEMORY_TABLE;
 
1837
        te_tab.te_tab_name = xt_dup_string(self, xt_last_name_of_path(name->ps_path));
 
1838
        te_tab.te_tab_path = tab_get_table_path(self, db, name, TRUE);
 
1839
        te_tab.te_table = NULL;
 
1840
        te_tab.te_type = dic->dic_table_type;  
 
1841
        xt_sl_insert(self, db->db_table_by_id, &tab_id, &te_tab);
 
1842
 
 
1843
        *path = 0;
 
1844
        try_(a) {
 
1845
                XTOpenFilePtr   of_row, of_rec, of_ind;
 
1846
                size_t                  def_len = 0;
 
1847
 
 
1848
                tab_save_tables(self, db);
 
1849
 
 
1850
                tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
 
1851
                pushr_(xt_heap_release, tab);
 
1852
 
 
1853
                /* The length of the foreign key definition: */
 
1854
                if (dic->dic_table) {
 
1855
                        dic->dic_table->loadString(self, &tab_def);
 
1856
                        def_len = tab_def.sb_len + 1;
 
1857
                }
 
1858
 
 
1859
                tab->tab_head_op_seq = 0;
 
1860
                tab->tab_wr_op_seq = 0;
 
1861
#ifdef DEBUG
 
1862
                /* This tests operation number overflow. */
 
1863
                //tab->tab_head_op_seq = 0xFFFFFFFF - 12;
 
1864
                //tab->tab_wr_op_seq = 0xFFFFFFFF - 12;
 
1865
#endif
 
1866
 
 
1867
                /* ------- ROW FILE: */
 
1868
                xt_strcpy(PATH_MAX, path, name->ps_path);
 
1869
                xt_remove_last_name_of_path(path);
 
1870
                tab_get_row_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
 
1871
                xt_strcat(PATH_MAX, path, table_name);
 
1872
                of_row = xt_open_file(self, path, xt_row_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_row_file_grow_size);
 
1873
                pushr_(xt_close_file, of_row);
 
1874
                tab_init_row_file(self, of_row, tab, dic);
 
1875
                freer_(); // xt_close_file(of_row)
 
1876
 
 
1877
                (void) ASSERT(sizeof(XTTabRowHeadDRec) == sizeof(XTTabRowRefDRec));
 
1878
                (void) ASSERT(sizeof(XTTabRowRefDRec) == 1 << XT_TAB_ROW_SHIFTS);
 
1879
 
 
1880
                /* ------------ DATA FILE: */
 
1881
                xt_remove_last_name_of_path(path);
 
1882
                tab_get_data_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
 
1883
                xt_strcat(PATH_MAX, path, table_name);
 
1884
                of_rec = xt_open_file(self, path, xt_rec_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_data_file_grow_size);
 
1885
                pushr_(xt_close_file, of_rec);
 
1886
                tab_init_data_file(self, of_rec, tab, dic, def_len, &tab_def);
 
1887
                freer_(); // xt_close_file(of_rec)
 
1888
 
 
1889
                /* ----------- INDEX FILE: */
 
1890
                xt_remove_last_name_of_path(path);
 
1891
                tab_get_index_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
 
1892
                xt_strcat(PATH_MAX, path, table_name);
 
1893
                of_ind = xt_open_file(self, path, xt_ind_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, XT_INDEX_PAGE_SIZE*256);
 
1894
                pushr_(xt_close_file, of_ind);
 
1895
                tab_init_ind_file(self, of_ind, tab, dic);
 
1896
                freer_(); // xt_close_file(of_ind)
 
1897
 
 
1898
                /* ------------ */
 
1899
                /* Log the new table ID! */
 
1900
                db->db_curr_tab_id = tab_id;
 
1901
                if (!xt_xn_log_tab_id(self, tab_id)) {
 
1902
                        db->db_curr_tab_id = tab_id - 1;
 
1903
                        xt_throw(self);
 
1904
                }
 
1905
 
 
1906
                freer_(); // xt_heap_release(tab)
 
1907
 
 
1908
                /* {LOAD-FOR-FKS}
 
1909
                 * 2008-12-10: Note, there is another problem, example:
 
1910
                 * set storage_engine = pbxt;
 
1911
                 * 
 
1912
                 * CREATE TABLE t1 (s1 INT PRIMARY KEY, s2 INT);
 
1913
                 * CREATE TABLE t2 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t1 (s1) ON UPDATE CASCADE);
 
1914
                 * CREATE TABLE t3 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t2 (s1) ON UPDATE CASCADE);
 
1915
                 * 
 
1916
                 * DROP TABLE IF EXISTS t2,t1;
 
1917
                 * CREATE TABLE t1 (s1 ENUM('a','b') PRIMARY KEY);
 
1918
                 * CREATE TABLE t2 (s1 ENUM('A','B'), FOREIGN KEY (s1) REFERENCES t1 (s1));
 
1919
                 * 
 
1920
                 * DROP TABLE IF EXISTS t2,t1;
 
1921
                 * 
 
1922
                 * In the example above. The second create t2 does not fail, although t3 references it,
 
1923
                 * and the data types do not match.
 
1924
                 * 
 
1925
                 * The main problem is that this error comes on DROP TABLE IF EXISTS t2! Which prevents
 
1926
                 * the table from being dropped - not good.
 
1927
                 *
 
1928
                 * So my idea here is to open the table, and if it fails, then the create table fails
 
1929
                 * as well.
 
1930
                 */
 
1931
                /*
 
1932
                 * Drizzle-specific:
 
1933
                 * We pass table type separately and provide NULL for the dic parameter, this is because
 
1934
                 * we want to force loading table (which is triggered by dic == NULL) but we still need table type
 
1935
                 */
 
1936
                if (!old_tab_id) {
 
1937
#ifndef DRIZZLED
 
1938
                        tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
 
1939
                        xt_heap_release(self, tab);
 
1940
#endif
 
1941
                }
 
1942
        }
 
1943
        catch_(a) {
 
1944
                /* Creation failed, delete the table files: */
 
1945
                XTException e;
 
1946
 
 
1947
                xt_enter_exception_handler(self, &e);
 
1948
                if (*path)
 
1949
                        tab_delete_table_files(self, name, tab_id);
 
1950
                tab_remove_table_path(self, db, te_tab.te_tab_path);
 
1951
                xt_sl_delete(NULL, db->db_table_by_id, &tab_id);
 
1952
                tab_save_tables(self, db);
 
1953
                xt_sb_set_size(self, &tab_def, 0);
 
1954
                xt_exit_exception_handler(self, &e);
 
1955
                xt_throw(self);
 
1956
        }
 
1957
        cont_(a);
 
1958
 
 
1959
        xt_sb_set_size(self, &tab_def, 0);
 
1960
 
 
1961
        if (old_tab_id) {
 
1962
                try_(b) {
 
1963
                        XTTableEntryPtr te_ptr;
 
1964
 
 
1965
                        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &old_tab_id))) {
 
1966
                                tab_remove_table_path(self, db, te_ptr->te_tab_path);
 
1967
                                xt_sl_delete(self, db->db_table_by_id, &old_tab_id);
 
1968
                                tab_save_tables(self, db);
 
1969
                        }
 
1970
 
 
1971
                        /* Same purpose as above {LOAD-FOR-FKS} (although this should work, 
 
1972
                         * beacuse this is a TRUNCATE TABLE.
 
1973
                         */
 
1974
                        tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
 
1975
                        xt_heap_release(self, tab);
 
1976
                }
 
1977
                catch_(b) {
 
1978
                        /* Log this error, but do not return it, because
 
1979
                         * it just involves the cleanup of the old table,
 
1980
                         * the new table has been successfully created.
 
1981
                         */
 
1982
                        xt_log_and_clear_exception(self);
 
1983
                }
 
1984
                cont_(b);
 
1985
        }
 
1986
 
 
1987
        freer_(); // xt_ht_unlock(db->db_tables)
 
1988
        freer_(); // xt_db_unlock_table_pool(table_pool)
 
1989
 
 
1990
        /* I open the table here, because I cannot rely on MySQL to do
 
1991
         * it after a create. This is normally OK, but with foreign keys
 
1992
         * tables can be referenced and then they are not opened
 
1993
         * before use. In this example, the INSERT opens t2, but t1 is
 
1994
         * not opened of the create. As a result the foreign key
 
1995
         * reference is not resolved.
 
1996
         *
 
1997
         * drop table t1, t2;
 
1998
         * CREATE TABLE t1
 
1999
         * (
 
2000
         *  id INT PRIMARY KEY
 
2001
         * ) ENGINE=pbxt;
 
2002
         * 
 
2003
         * CREATE TABLE t2
 
2004
         * (
 
2005
         *  v INT,
 
2006
         *  CONSTRAINT c1 FOREIGN KEY (v) REFERENCES t1(id)
 
2007
         * ) ENGINE=pbxt;
 
2008
         * 
 
2009
         * --error 1452
 
2010
         * INSERT INTO t2 VALUES(2);
 
2011
         */
 
2012
        /* this code is not needed anymore as we open tables referred by FKs as necessary during checks
 
2013
        xt_ht_lock(self, db->db_tables);
 
2014
        pushr_(xt_ht_unlock, db->db_tables);
 
2015
        tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
 
2016
        freer_(); // xt_ht_unlock(db->db_tables)
 
2017
        xt_heap_release(self, tab);
 
2018
        * CHANGED see {LOAD-FOR-FKS} above.
 
2019
        */
 
2020
 
 
2021
        exit_();
 
2022
}
 
2023
 
 
2024
xtPublic void xt_drop_table(XTThreadPtr self, XTPathStrPtr tab_name, xtBool drop_db)
 
2025
{
 
2026
        XTDatabaseHPtr          db = self->st_database;
 
2027
        XTOpenTablePoolPtr      table_pool;
 
2028
        XTTableHPtr                     tab = NULL;
 
2029
        xtTableID                       tab_id = 0;
 
2030
        xtBool                          can_drop = TRUE;
 
2031
 
 
2032
        enter_();
 
2033
 
 
2034
#ifdef TRACE_CREATE_TABLES
 
2035
        printf("DROP %s\n", tab_name->ps_path);
 
2036
#endif
 
2037
 
 
2038
        table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, tab_name, FALSE, FALSE, TRUE, &tab);
 
2039
        pushr_(xt_db_unlock_table_pool, table_pool);
 
2040
        xt_ht_lock(self, db->db_tables);
 
2041
        pushr_(xt_ht_unlock, db->db_tables);
 
2042
        pushr_(xt_heap_release, tab);
 
2043
 
 
2044
        if (table_pool) {
 
2045
                tab_id = tab->tab_id;   /* tab is not null if returned table_pool is not null */
 
2046
                /* check if other tables refer this */
 
2047
                if (!self->st_ignore_fkeys) 
 
2048
                        can_drop = tab->tab_dic.dic_table->checkCanDrop(drop_db);
 
2049
        }
 
2050
#ifdef DRIZZLED 
 
2051
        /* See the comment in ha_pbxt::delete_table regarding different implmentation of DROP TABLE
 
2052
         * in MySQL and Drizzle
 
2053
         */
 
2054
        else {
 
2055
                xt_throw_xterr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND);
 
2056
        }
 
2057
#endif
 
2058
 
 
2059
        if (can_drop) {
 
2060
                if (tab_id) {
 
2061
                        XTTableEntryPtr te_ptr;
 
2062
 
 
2063
                        xt_dl_delete_ext_data(self, tab, FALSE, TRUE);
 
2064
                        freer_(); // xt_heap_release(self, tab)
 
2065
 
 
2066
                        /* For the Windows version this must be done before we
 
2067
                         * start to delete the underlying files!
 
2068
                         */
 
2069
                        tab_close_files(self, tab);
 
2070
 
 
2071
                        tab_delete_table_files(self, tab_name, tab_id);
 
2072
 
 
2073
                        ASSERT(xt_get_self() == self);
 
2074
                        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id))) {
 
2075
                                tab_remove_table_path(self, db, te_ptr->te_tab_path);
 
2076
                                xt_sl_delete(self, db->db_table_by_id, &tab_id);
 
2077
                                tab_save_tables(self, db);
 
2078
                        }
 
2079
                }
 
2080
                else {
 
2081
                        freer_(); // xt_heap_release(self, tab)
 
2082
                }
 
2083
 
 
2084
                xt_ht_del(self, db->db_tables, tab_name);
 
2085
        }
 
2086
        else {  /* cannot drop table because of FK dependencies */
 
2087
                xt_throw_xterr(XT_CONTEXT, XT_ERR_ROW_IS_REFERENCED);
 
2088
        }
 
2089
 
 
2090
        freer_(); // xt_ht_unlock(db->db_tables)
 
2091
        freer_(); // xt_db_unlock_table_pool(table_pool)
 
2092
        exit_();
 
2093
}
 
2094
 
 
2095
xtPublic void xt_tab_check_free_lists(XTThreadPtr self, XTOpenTablePtr ot, bool check_recs, bool correct_count)
 
2096
{
 
2097
        char                                    table_name[XT_IDENTIFIER_NAME_SIZE*3+3];
 
2098
        register XTTableHPtr    tab = ot->ot_table;
 
2099
        xtRowID                                 prev_row_id;
 
2100
        xtRowID                                 row_id;
 
2101
        xtRefID                                 next_row_id;
 
2102
        u_llong                                 free_count;
 
2103
 
 
2104
        xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
 
2105
        if (check_recs) {
 
2106
                xtRecordID              prev_rec_id;
 
2107
                xtRecordID              rec_id;
 
2108
                XTTabRecExtDRec rec_buf;
 
2109
 
 
2110
                xt_lock_mutex_ns(&tab->tab_rec_lock);
 
2111
                /* Checking the free list: */
 
2112
                prev_rec_id = 0;
 
2113
                free_count = 0;
 
2114
                rec_id = tab->tab_rec_free_id;
 
2115
                while (rec_id) {
 
2116
                        if (rec_id >= tab->tab_rec_eof_id) {
 
2117
                                xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free list: %llu, ", table_name, (u_llong) rec_id);
 
2118
                                if (prev_rec_id)
 
2119
                                        xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_rec_id);
 
2120
                                else
 
2121
                                        xt_logf(XT_NT_ERROR, "reference by list head pointer\n");
 
2122
                                xt_tab_set_table_repair_pending(tab);
 
2123
                                break;
 
2124
                        }
 
2125
                        if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) &rec_buf)) {
 
2126
                                if (self)
 
2127
                                        xt_throw(self);
 
2128
                                else
 
2129
                                        xt_log_and_clear_warning(ot->ot_thread);
 
2130
                                break;
 
2131
                        }
 
2132
                        if ((rec_buf.tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED)
 
2133
                                xt_logf(XT_NT_INFO, "Table %s: record, %llu, on free list is not free\n", table_name, (u_llong) rec_id);
 
2134
                        free_count++;
 
2135
                        prev_rec_id = rec_id;
 
2136
                        rec_id = XT_GET_DISK_4(rec_buf.tr_prev_rec_id_4);
 
2137
                }
 
2138
                if (free_count != tab->tab_rec_fnum) {
 
2139
                        if (correct_count) {
 
2140
                                tab->tab_rec_fnum = free_count;
 
2141
                                tab->tab_head_rec_fnum = free_count;
 
2142
                                tab->tab_flush_pending = TRUE;
 
2143
                                xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) has been set to the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count);
 
2144
                        }
 
2145
                        else
 
2146
                                xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) differs from the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count);
 
2147
                }
 
2148
                xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
2149
        }
 
2150
 
 
2151
        /* Check the row free list: */
 
2152
        xt_lock_mutex_ns(&tab->tab_row_lock);
 
2153
 
 
2154
        prev_row_id = 0;
 
2155
        free_count = 0;
 
2156
        row_id = tab->tab_row_free_id;
 
2157
        while (row_id) {
 
2158
                if (row_id >= tab->tab_row_eof_id) {
 
2159
                        xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free row: %llu, ", table_name, (u_llong) row_id);
 
2160
                        if (prev_row_id)
 
2161
                                xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_row_id);
 
2162
                        else
 
2163
                                xt_logf(XT_NT_ERROR, "reference by list head pointer\n");
 
2164
                        xt_tab_set_table_repair_pending(tab);
 
2165
                        break;
 
2166
                }
 
2167
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) {
 
2168
                        if (self)
 
2169
                                xt_throw(self);
 
2170
                        else
 
2171
                                xt_log_and_clear_warning(ot->ot_thread);
 
2172
                        break;
 
2173
                }
 
2174
                free_count++;
 
2175
                prev_row_id = row_id;
 
2176
                row_id = next_row_id;
 
2177
        }
 
2178
        if (free_count != tab->tab_row_fnum) {
 
2179
                if (correct_count) {
 
2180
                        /* tab_row_fnum is the current value, and tab_head_row_fnum is the value on
 
2181
                         * disk. tab_head_row_fnum is set by the writer as the changes are applied
 
2182
                         * to the database.
 
2183
                         *
 
2184
                         * This is the value then stored in the header of the file. This value
 
2185
                         * is in sync with other changes to the file.
 
2186
                         *
 
2187
                         * So the fact that I am setting both value means this will not work at
 
2188
                         * runtime, unless all changes have been applied by the writer.
 
2189
                         *
 
2190
                         * The correct way to do this at run time would be to add the change to the
 
2191
                         * transaction log, so that it is applied by the writer.
 
2192
                         */
 
2193
                        tab->tab_row_fnum = free_count;
 
2194
                        tab->tab_head_row_fnum = free_count;
 
2195
                        tab->tab_flush_pending = TRUE;
 
2196
                        xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) has been set to the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count);
 
2197
                }
 
2198
                else
 
2199
                        xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) differs from the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count);
 
2200
        }
 
2201
 
 
2202
        xt_unlock_mutex_ns(&tab->tab_row_lock);
 
2203
}
 
2204
 
 
2205
/*
 
2206
 * Record buffer size:
 
2207
 * -------------------
 
2208
 * The size of the record buffer used to hold the row
 
2209
 * in memory. This buffer size does not include the BLOB data.
 
2210
 * About 8 bytes (a pointer and a size) is reserved for each BLOB
 
2211
 * in this buffer.
 
2212
 *
 
2213
 * The buffer size includes a number of "NULL" bytes followed by
 
2214
 * the data area. The NULL bytes contain 1 bit for every column,
 
2215
 * to indicate of the columns is NULL or not.
 
2216
 *
 
2217
 * The size of the buffer is 4/8-byte aligned, so it may be padded
 
2218
 * at the end.
 
2219
 *
 
2220
 * Fixed length rec. len.:
 
2221
 * -----------------------
 
2222
 * If the record does not include any BLOBs then this is the size of the
 
2223
 * fixed length record. The size if the data in the data handle record
 
2224
 * need never be bigger then this length, if the record does not
 
2225
 * contain BLOBs. So this should be the maximum size set for
 
2226
 * AVG_ROW_LENGTH in this case.
 
2227
 *
 
2228
 * Handle data record size:
 
2229
 * ------------------------
 
2230
 * This is the size of the handle data record. It is the data size
 
2231
 * plus the "max header size".
 
2232
 *
 
2233
 * Min/max header size:
 
2234
 * The min and max header size of the header in the data handle file.
 
2235
 * The larger header is used if a record has an extended data (data log
 
2236
 * file) component.
 
2237
 *
 
2238
 * Min/avg/max record size:
 
2239
 * ------------------------
 
2240
 * These are variable length records sizes. That is, the size of records
 
2241
 * when stored in the variable length format. Variable length records
 
2242
 * do not have fixed fields sizes, instead the fields are packed one
 
2243
 * after the other, prefixed by a number of size indicator bytes.
 
2244
 *
 
2245
 * The average is an estimate of the average record size. This estimate
 
2246
 * is used if no AVG_ROW_LENGTH is specifically given.
 
2247
 *
 
2248
 * If the average estimate is withing 20% of the maximum size of the record,
 
2249
 * then the record will be handled as a fixed length record.
 
2250
 *
 
2251
 * Avg row len set for tab:
 
2252
 * ------------------------
 
2253
 * This is the value set using AVG_ROW_LENGTH when the table is declared.
 
2254
 *
 
2255
 * Rows fixed length:
 
2256
 * ------------------
 
2257
 * YES if the records of this table are handled as a fixed length records.
 
2258
 * In this case the table records will never have an extended record
 
2259
 * component.
 
2260
 *
 
2261
 * The size of the data area in the handle data record is set to the
 
2262
 * size of the MySQL data record ("Fixed length rec. len.").
 
2263
 *
 
2264
 * It also means that the record format used is identical to the MySQL
 
2265
 * record format.
 
2266
 *
 
2267
 * If the records are not fixed, then the variable length record format
 
2268
 * is used. Records size are then in the range specified by
 
2269
 * "Min/avg/max record size".
 
2270
 *
 
2271
 * Maximum fixed size:
 
2272
 * -------------------
 
2273
 * This is the maximum size of a data log record.
 
2274
 *
 
2275
 * Minimum variable size:
 
2276
 * ------------------------
 
2277
 * Records below this size are handled as a fixed length record size, unless
 
2278
 * the AVG_ROW_LENGTH is specifically set.
 
2279
 */
 
2280
xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot)
 
2281
{
 
2282
        XTTableHPtr                             tab = ot->ot_table;
 
2283
        xtRecordID                              prec_id;
 
2284
        XTTabRecExtDPtr                 rec_buf = (XTTabRecExtDPtr) ot->ot_row_rbuffer;
 
2285
#ifdef CHECK_TABLE_READ_DATA_LOG
 
2286
        XTactExtRecEntryDRec    ext_rec;
 
2287
        size_t                                  log_size;
 
2288
        xtLogID                                 log_id;
 
2289
        xtLogOffset                             log_offset;
 
2290
#endif
 
2291
        xtRecordID                              rec_id;
 
2292
        xtRecordID                              prev_rec_id;
 
2293
        xtXactID                                xn_id;
 
2294
        xtRowID                                 row_id;
 
2295
        u_llong                                 free_rec_count = 0, free_count2 = 0;
 
2296
        u_llong                                 delete_rec_count = 0;
 
2297
        u_llong                                 alloc_rec_count = 0;
 
2298
        u_llong                                 alloc_rec_bytes = 0;
 
2299
        u_llong                                 min_comp_rec_len = 0;
 
2300
        u_llong                                 max_comp_rec_len = 0;
 
2301
        size_t                                  rec_size;
 
2302
        size_t                                  row_size;
 
2303
        u_llong                                 ext_data_len = 0;
 
2304
        u_llong                                 ext_rec_count = 0;
 
2305
 
 
2306
#if defined(DUMP_CHECK_TABLE) || defined(CHECK_TABLE_STATS)
 
2307
        printf("\nCHECK TABLE: %s\n", tab->tab_name->ps_path);
 
2308
#endif
 
2309
 
 
2310
        xt_lock_mutex(self, &tab->tab_db->db_co_ext_lock);
 
2311
        pushr_(xt_unlock_mutex, &tab->tab_db->db_co_ext_lock);
 
2312
 
 
2313
        xt_lock_mutex(self, &tab->tab_rec_lock);
 
2314
        pushr_(xt_unlock_mutex, &tab->tab_rec_lock);
 
2315
 
 
2316
#ifdef CHECK_TABLE_STATS
 
2317
        printf("Record buffer size      = %lu\n", (u_long) tab->tab_dic.dic_mysql_buf_size);
 
2318
        printf("Fixed length rec. len.  = %lu\n", (u_long) tab->tab_dic.dic_mysql_rec_size);
 
2319
        printf("Handle data record size = %lu\n", (u_long) tab->tab_dic.dic_rec_size);
 
2320
        printf("Min/max header size     = %d/%d\n", (int) offsetof(XTTabRecFix, rf_data), tab->tab_dic.dic_rec_fixed ? (int) offsetof(XTTabRecFix, rf_data) : (int) offsetof(XTTabRecExtDRec, re_data));
 
2321
        printf("Min/avg/max record size = %llu/%llu/%llu\n", (u_llong) tab->tab_dic.dic_min_row_size, (u_llong) tab->tab_dic.dic_ave_row_size, (u_llong) tab->tab_dic.dic_max_row_size);
 
2322
        if (tab->tab_dic.dic_def_ave_row_size)
 
2323
                printf("Avg row len set for tab = %lu\n", (u_long) tab->tab_dic.dic_def_ave_row_size);
 
2324
        else
 
2325
                printf("Avg row len set for tab = not specified\n");
 
2326
        printf("Rows fixed length       = %s\n", tab->tab_dic.dic_rec_fixed ? "YES" : "NO");
 
2327
        if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
 
2328
                printf("Table type              = MEMORY\n");
 
2329
        else if (tab->tab_dic.dic_tab_flags & XT_TF_REAL_TEMP_TABLE)
 
2330
                printf("Table type              = TEMPORARY\n");
 
2331
        else if (tab->tab_dic.dic_tab_flags & XT_TF_DDL_TEMP_TABLE)
 
2332
                printf("Table type              = DDL-TEMPORARY\n");
 
2333
        if (tab->tab_dic.dic_def_ave_row_size)
 
2334
                printf("Maximum fixed size      = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH_SPEC);
 
2335
        else
 
2336
                printf("Maximum fixed size      = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH);
 
2337
        printf("Minimum variable size   = %lu\n", (u_long) XT_TAB_MIN_VAR_REC_LENGTH);
 
2338
        printf("Minimum auto-increment  = %llu\n", (u_llong) tab->tab_dic.dic_min_auto_inc);
 
2339
        printf("Number of columns       = %lu\n", (u_long) tab->tab_dic.dic_no_of_cols);
 
2340
        printf("Number of fixed columns = %lu\n", (u_long) tab->tab_dic.dic_fix_col_count);
 
2341
        printf("Columns req. for index  = %lu\n", (u_long) tab->tab_dic.dic_ind_cols_req);
 
2342
        if (tab->tab_dic.dic_ind_rec_len)
 
2343
                printf("Rec len req. for index  = %llu\n", (u_llong) tab->tab_dic.dic_ind_rec_len);
 
2344
        printf("Columns req. for blobs  = %lu\n", (u_long) tab->tab_dic.dic_blob_cols_req);
 
2345
        printf("Number of blob columns  = %lu\n", (u_long) tab->tab_dic.dic_blob_count);
 
2346
        printf("Number of indices       = %lu\n", (u_long) tab->tab_dic.dic_key_count);
 
2347
#endif
 
2348
 
 
2349
#ifdef DUMP_CHECK_TABLE
 
2350
        printf("Records:-\n");
 
2351
        printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_rec_free_id, (u_llong) tab->tab_rec_fnum);
 
2352
        printf("EOF:       %llu\n", (u_llong) tab->tab_rec_eof_id);
 
2353
#endif
 
2354
 
 
2355
        rec_size = XT_REC_EXT_HEADER_SIZE;
 
2356
        if (rec_size > tab->tab_recs.tci_rec_size)
 
2357
                rec_size = tab->tab_recs.tci_rec_size;
 
2358
        rec_id = 1;
 
2359
        while (rec_id < tab->tab_rec_eof_id) {
 
2360
                if (!xt_tab_get_rec_data(ot, rec_id, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer))
 
2361
                        xt_throw(self);
 
2362
 
 
2363
#ifdef DUMP_CHECK_TABLE
 
2364
                printf("%-4llu ", (u_llong) rec_id);
 
2365
#endif
 
2366
                switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
 
2367
                        case XT_TAB_STATUS_FREED:
 
2368
#ifdef DUMP_CHECK_TABLE
 
2369
                                printf("======== ");
 
2370
#endif
 
2371
                                free_rec_count++;
 
2372
                                break;
 
2373
                        case XT_TAB_STATUS_DELETE:
 
2374
#ifdef DUMP_CHECK_TABLE
 
2375
                                printf("delete   ");
 
2376
#endif
 
2377
                                delete_rec_count++;
 
2378
                                break;
 
2379
                        case XT_TAB_STATUS_FIXED:
 
2380
#ifdef DUMP_CHECK_TABLE
 
2381
                                printf("record-F ");
 
2382
#endif
 
2383
                                alloc_rec_count++;
 
2384
                                row_size = myxt_store_row_length(ot, (char *) ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE);
 
2385
                                alloc_rec_bytes += row_size;
 
2386
                                if (!min_comp_rec_len || row_size < min_comp_rec_len)
 
2387
                                        min_comp_rec_len = row_size;
 
2388
                                if (row_size > max_comp_rec_len)
 
2389
                                        max_comp_rec_len = row_size;
 
2390
                                break;
 
2391
                        case XT_TAB_STATUS_VARIABLE:
 
2392
#ifdef DUMP_CHECK_TABLE
 
2393
                                printf("record-V ");
 
2394
#endif
 
2395
                                alloc_rec_count++;
 
2396
                                row_size = myxt_load_row_length(ot, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, NULL);
 
2397
                                alloc_rec_bytes += row_size;
 
2398
                                if (!min_comp_rec_len || row_size < min_comp_rec_len)
 
2399
                                        min_comp_rec_len = row_size;
 
2400
                                if (row_size > max_comp_rec_len)
 
2401
                                        max_comp_rec_len = row_size;
 
2402
                                break;
 
2403
                        case XT_TAB_STATUS_EXT_DLOG:
 
2404
#ifdef DUMP_CHECK_TABLE
 
2405
                                printf("record-X ");
 
2406
#endif
 
2407
                                alloc_rec_count++;
 
2408
                                ext_rec_count++;
 
2409
                                ext_data_len += XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
 
2410
                                row_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4) + ot->ot_rec_size - XT_REC_EXT_HEADER_SIZE;
 
2411
                                alloc_rec_bytes += row_size;
 
2412
                                if (!min_comp_rec_len || row_size < min_comp_rec_len)
 
2413
                                        min_comp_rec_len = row_size;
 
2414
                                if (row_size > max_comp_rec_len)
 
2415
                                        max_comp_rec_len = row_size;
 
2416
                                break;
 
2417
                }
 
2418
#ifdef DUMP_CHECK_TABLE
 
2419
                if (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_CLEANED_BIT)
 
2420
                        printf("C");
 
2421
                else
 
2422
                        printf(" ");
 
2423
#endif
 
2424
                prev_rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
 
2425
                xn_id = XT_GET_DISK_4(rec_buf->tr_xact_id_4);
 
2426
                row_id = XT_GET_DISK_4(rec_buf->tr_row_id_4);
 
2427
                switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
 
2428
                        case XT_TAB_STATUS_FREED:
 
2429
#ifdef DUMP_CHECK_TABLE
 
2430
                                printf(" prev=%-3llu (xact=%-3llu row=%lu)\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
 
2431
#endif
 
2432
                                break;
 
2433
                        case XT_TAB_STATUS_EXT_DLOG:
 
2434
#ifdef DUMP_CHECK_TABLE
 
2435
                                printf(" prev=%-3llu  xact=%-3llu row=%lu  Xlog=%lu Xoff=%llu Xsiz=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id, (u_long) XT_GET_DISK_2(rec_buf->re_log_id_2), (u_llong) XT_GET_DISK_6(rec_buf->re_log_offs_6), (u_long) XT_GET_DISK_4(rec_buf->re_log_dat_siz_4));
 
2436
#endif
 
2437
 
 
2438
#ifdef CHECK_TABLE_READ_DATA_LOG
 
2439
                                xtBool ok;
 
2440
 
 
2441
                                log_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
 
2442
                                XT_GET_LOG_REF(log_id, log_offset, rec_buf);
 
2443
                                if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
2444
                                        xt_tab_read_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec);
 
2445
                                        ok = TRUE;
 
2446
                                }
 
2447
                                else {
 
2448
                                        if (!(ok = self->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec, self)))
 
2449
                                                xt_log_and_clear_exception(self);
 
2450
                                }
 
2451
                                if (ok) {
 
2452
                                        size_t          log_size2;
 
2453
                                        xtTableID       curr_tab_id;
 
2454
                                        xtRecordID      curr_rec_id;
 
2455
 
 
2456
                                        log_size2 = XT_GET_DISK_4(ext_rec.er_data_size_4);
 
2457
                                        curr_tab_id = XT_GET_DISK_4(ext_rec.er_tab_id_4);
 
2458
                                        curr_rec_id = XT_GET_DISK_4(ext_rec.er_rec_id_4);
 
2459
                                        if (log_size2 != log_size || curr_tab_id != tab->tab_id || curr_rec_id != rec_id) {
 
2460
                                                xt_logf(XT_INFO, "Table %s: record %llu, extended record %lu:%llu not valid\n", tab->tab_name, (u_llong) rec_id, (u_long) log_id, (u_llong) log_offset);
 
2461
                                        }
 
2462
                                }
 
2463
#endif
 
2464
                                break;
 
2465
                        default:
 
2466
#ifdef DUMP_CHECK_TABLE
 
2467
                                printf(" prev=%-3llu  xact=%-3llu row=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
 
2468
#endif
 
2469
                                break;
 
2470
                }
 
2471
                rec_id++;
 
2472
        }
 
2473
        
 
2474
#ifdef CHECK_TABLE_STATS
 
2475
        u_long  rec, row, ind;
 
2476
        char    value[50];
 
2477
 
 
2478
        rec = xt_seek_eof_file(self, ot->ot_rec_file);
 
2479
        row = xt_seek_eof_file(self, ot->ot_row_file);
 
2480
        ind = xt_seek_eof_file(self, ot->ot_ind_file);
 
2481
        if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
2482
                if (!tab->tab_dic.dic_rec_fixed) {
 
2483
                        xt_int8_to_byte_size((xtInt8) tab->tab_mem_total, value);
 
2484
                        printf("Ext. record memory used = %s\n", value);
 
2485
                        printf("Extended record count   = %llu\n", ext_rec_count);
 
2486
                }
 
2487
                xt_int8_to_byte_size((xtInt8) ind, value);
 
2488
                printf("Index data memory used  = %s\n", value);
 
2489
                xt_int8_to_byte_size((xtInt8) rec + row, value);
 
2490
                printf("Table data memory used  = %s\n", value);
 
2491
                xt_int8_to_byte_size((xtInt8) tab->tab_mem_total + rec + row + ind, value);
 
2492
                printf("Total memory used       = %s\n", value);
 
2493
        }
 
2494
        else {
 
2495
                if (!tab->tab_dic.dic_rec_fixed) {
 
2496
                        xt_int8_to_byte_size((xtInt8) ext_data_len, value);
 
2497
                        printf("Ext. record disk used   = %s\n", value);                
 
2498
                        printf("Extended record count   = %llu\n", ext_rec_count);
 
2499
                }
 
2500
                xt_int8_to_byte_size((xtInt8) ind, value);
 
2501
                printf("Index disk space used   = %s\n", value);
 
2502
                xt_int8_to_byte_size((xtInt8) rec + row, value);
 
2503
                printf("Table disk space used   = %s\n", value);
 
2504
                xt_int8_to_byte_size((xtInt8) ext_data_len + rec + row + ind, value);
 
2505
                printf("Total disk space used   = %s\n", value);
 
2506
        }
 
2507
        
 
2508
        if (alloc_rec_count) {
 
2509
                printf("Minumum comp. rec. len. = %llu\n", (u_llong) min_comp_rec_len);
 
2510
                printf("Average comp. rec. len. = %llu\n", (u_llong) ((double) alloc_rec_bytes / (double) alloc_rec_count + (double) 0.5));
 
2511
                printf("Maximum comp. rec. len. = %llu\n", (u_llong) max_comp_rec_len);
 
2512
        }
 
2513
        printf("Free record count       = %llu\n", (u_llong) free_rec_count);
 
2514
        printf("Deleted record count    = %llu\n", (u_llong) delete_rec_count);
 
2515
        printf("Allocated record count  = %llu\n", (u_llong) alloc_rec_count);
 
2516
 
 
2517
#endif
 
2518
        if (tab->tab_rec_fnum != free_rec_count)
 
2519
                xt_logf(XT_INFO, "Table %s: incorrect number of free blocks, %llu, should be: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) tab->tab_rec_fnum);
 
2520
 
 
2521
        /* Checking the free list: */
 
2522
        prec_id = 0;
 
2523
        rec_id = tab->tab_rec_free_id;
 
2524
        while (rec_id) {
 
2525
                if (rec_id >= tab->tab_rec_eof_id) {
 
2526
                        xt_logf(XT_INFO, "Table %s: invalid reference on free list: %llu, ", tab->tab_name, (u_llong) rec_id);
 
2527
                        if (prec_id)
 
2528
                                xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id);
 
2529
                        else
 
2530
                                xt_logf(XT_INFO, "reference by list head pointer\n");
 
2531
                        break;
 
2532
                }
 
2533
                if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) rec_buf)) {
 
2534
                        xt_log_and_clear_exception(self);
 
2535
                        break;
 
2536
                }
 
2537
                if ((rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED)
 
2538
                        xt_logf(XT_INFO, "Table %s: record, %llu, on free list is not free\n", tab->tab_name, (u_llong) rec_id);
 
2539
                free_count2++;
 
2540
                prec_id = rec_id;
 
2541
                rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
 
2542
        }
 
2543
        if (free_count2 != free_rec_count)
 
2544
                xt_logf(XT_INFO, "Table %s: not all free blocks (%llu) on free list: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) free_count2);
 
2545
 
 
2546
        freer_(); // xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
2547
 
 
2548
        xtRefID ref_id;
 
2549
 
 
2550
        xt_lock_mutex(self, &tab->tab_row_lock);
 
2551
        pushr_(xt_unlock_mutex, &tab->tab_row_lock);
 
2552
 
 
2553
#ifdef DUMP_CHECK_TABLE
 
2554
        printf("Rows:-\n");
 
2555
        printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_row_free_id, (u_llong) tab->tab_row_fnum);
 
2556
        printf("EOF:       %llu\n", (u_llong) tab->tab_row_eof_id);
 
2557
#endif
 
2558
 
 
2559
        rec_id = 1;
 
2560
        while (rec_id < tab->tab_row_eof_id) {
 
2561
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, rec_id, &ref_id, self))
 
2562
                        xt_throw(self);
 
2563
#ifdef DUMP_CHECK_TABLE
 
2564
                printf("%-3llu ", (u_llong) rec_id);
 
2565
#endif
 
2566
#ifdef DUMP_CHECK_TABLE
 
2567
                if (ref_id == 0)
 
2568
                        printf("====== 0\n");
 
2569
                else
 
2570
                        printf("in use %llu\n", (u_llong) ref_id);
 
2571
#endif
 
2572
                rec_id++;
 
2573
        }
 
2574
 
 
2575
        prec_id = 0;
 
2576
        free_count2 = 0;
 
2577
        row_id = tab->tab_row_free_id;
 
2578
        while (row_id) {
 
2579
                if (row_id >= tab->tab_row_eof_id) {
 
2580
                        xt_logf(XT_INFO, "Table %s: invalid reference on free row: %llu, ", tab->tab_name, (u_llong) row_id);
 
2581
                        if (prec_id)
 
2582
                                xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id);
 
2583
                        else
 
2584
                                xt_logf(XT_INFO, "reference by list head pointer\n");
 
2585
                        break;
 
2586
                }
 
2587
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &ref_id, self)) {
 
2588
                        xt_log_and_clear_exception(self);
 
2589
                        break;
 
2590
                }
 
2591
                free_count2++;
 
2592
                prec_id = row_id;
 
2593
                row_id = ref_id;
 
2594
        }
 
2595
        if (free_count2 != tab->tab_row_fnum)
 
2596
                xt_logf(XT_INFO, "Table %s: free row count (%llu) differs from the number of row on the list: %llu\n", tab->tab_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count2);
 
2597
 
 
2598
        freer_(); // xt_unlock_mutex(&tab->tab_row_lock);
 
2599
 
 
2600
#ifdef CHECK_INDEX_ON_CHECK_TABLE
 
2601
        xt_check_indices(ot);
 
2602
#endif
 
2603
        freer_(); // xt_unlock_mutex(&tab->tab_db->db_co_ext_lock);
 
2604
}
 
2605
 
 
2606
xtPublic void xt_rename_table(XTThreadPtr self, XTPathStrPtr old_name, XTPathStrPtr new_name)
 
2607
{
 
2608
        XTDatabaseHPtr          db = self->st_database;
 
2609
        XTOpenTablePoolPtr      table_pool;
 
2610
        XTTableHPtr                     tab = NULL;
 
2611
        char                            table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
 
2612
        char                            *postfix;
 
2613
        XTFilesOfTableRec       ft;
 
2614
        XTDictionaryRec         dic;
 
2615
        xtTableID                       tab_id;
 
2616
        XTTableEntryPtr         te_ptr;
 
2617
        char                            *te_new_name;
 
2618
        XTTablePathPtr          te_new_path;
 
2619
        XTTablePathPtr          te_old_path;
 
2620
        char                            to_path[PATH_MAX];
 
2621
 
 
2622
        memset(&dic, 0, sizeof(dic));
 
2623
 
 
2624
#ifdef TRACE_CREATE_TABLES
 
2625
        printf("RENAME %s --> %s\n", old_name->ps_path, new_name->ps_path);
 
2626
#endif
 
2627
        if (strlen(xt_last_name_of_path(new_name->ps_path)) > XT_TABLE_NAME_SIZE-1)
 
2628
                xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, new_name);
 
2629
 
 
2630
        /* MySQL renames the table while it is in use. Here is
 
2631
         * the sequence:
 
2632
         *
 
2633
         * OPEN tab1
 
2634
         * CREATE tmp_tab
 
2635
         * OPEN tmp_tab
 
2636
         * COPY tab1 -> tmp_tab
 
2637
         * CLOSE tmp_tab
 
2638
         * RENAME tab1 -> tmp2_tab
 
2639
         * RENAME tmp_tab -> tab1
 
2640
         * CLOSE tab1 (tmp2_tab)
 
2641
         * DELETE tmp2_tab
 
2642
         * OPEN tab1
 
2643
         *
 
2644
         * Since the table is open when it is renamed, I cannot
 
2645
         * get exclusive use of the table for this operation.
 
2646
         *
 
2647
         * So instead we just make sure that the sweeper is not
 
2648
         * using the table.
 
2649
         */
 
2650
        table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, old_name, FALSE, TRUE, FALSE, &tab);
 
2651
        pushr_(xt_db_unlock_table_pool, table_pool);
 
2652
        xt_ht_lock(self, db->db_tables);
 
2653
        pushr_(xt_ht_unlock, db->db_tables);
 
2654
        tab_id = tab->tab_id;
 
2655
        myxt_move_dictionary(&dic, &tab->tab_dic);
 
2656
        pushr_(myxt_free_dictionary, &dic);
 
2657
        pushr_(xt_heap_release, tab);
 
2658
 
 
2659
        /* Unmap the memory mapped table files: 
 
2660
         * For windows this must be done before we
 
2661
         * can rename the files.
 
2662
         */
 
2663
        tab_close_files(self, tab);
 
2664
 
 
2665
        freer_(); // xt_heap_release(self, old_tab)
 
2666
 
 
2667
        /* Create the new name and path: */
 
2668
        te_new_name = xt_dup_string(self, xt_last_name_of_path(new_name->ps_path));
 
2669
        pushr_(xt_free, te_new_name);
 
2670
        te_new_path = tab_get_table_path(self, db, new_name, FALSE);
 
2671
        pushr_(tab_free_table_path, te_new_path);
 
2672
 
 
2673
        te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
 
2674
 
 
2675
        /* Remove the table from the Database directory: */
 
2676
        xt_ht_del(self, db->db_tables, old_name);
 
2677
 
 
2678
        xt_enum_files_of_tables_init(old_name, tab_id, &ft);
 
2679
        while (xt_enum_files_of_tables_next(&ft)) {
 
2680
                postfix = xt_tab_file_to_name(XT_MAX_TABLE_FILE_NAME_SIZE, table_name, ft.ft_file_path);
 
2681
 
 
2682
                xt_strcpy(PATH_MAX, to_path, new_name->ps_path);
 
2683
                xt_strcat(PATH_MAX, to_path, postfix);
 
2684
 
 
2685
                if (!xt_fs_rename(NULL, ft.ft_file_path, to_path))
 
2686
                        xt_log_and_clear_exception(self);
 
2687
        }
 
2688
 
 
2689
        /* Switch the table name and path: */
 
2690
        xt_free(self, te_ptr->te_tab_name);
 
2691
        te_ptr->te_tab_name = te_new_name;
 
2692
        te_old_path = te_ptr->te_tab_path;
 
2693
        te_ptr->te_tab_path = te_new_path;
 
2694
        tab_remove_table_path(self, db, te_old_path);
 
2695
        tab_save_tables(self, db);
 
2696
 
 
2697
        popr_(); // Discard tab_free_table_path(te_new_path);
 
2698
        popr_(); // Discard xt_free(te_new_name);
 
2699
 
 
2700
        tab = xt_use_table_no_lock(self, db, new_name, FALSE, FALSE, &dic);
 
2701
        /* All renamed tables are considered repaired! */
 
2702
        xt_tab_table_repaired(tab);
 
2703
        xt_heap_release(self, tab);
 
2704
 
 
2705
        freer_(); // myxt_free_dictionary(&dic)
 
2706
        freer_(); // xt_ht_unlock(db->db_tables)
 
2707
        freer_(); // xt_db_unlock_table_pool(table_pool)
 
2708
}
 
2709
 
 
2710
xtPublic XTTableHPtr xt_use_table(XTThreadPtr self, XTPathStrPtr name, xtBool no_load, xtBool missing_ok)
 
2711
{
 
2712
        XTTableHPtr             tab;
 
2713
        XTDatabaseHPtr  db = self->st_database;
 
2714
 
 
2715
        xt_ht_lock(self, db->db_tables);
 
2716
        pushr_(xt_ht_unlock, db->db_tables);
 
2717
        tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, NULL);
 
2718
        freer_();
 
2719
        return tab;
 
2720
}
 
2721
 
 
2722
xtPublic void xt_sync_flush_table(XTThreadPtr self, XTOpenTablePtr ot, int timeout)
 
2723
{
 
2724
        XTTableHPtr             tab = ot->ot_table;
 
2725
        XTDatabaseHPtr  db = tab->tab_db;
 
2726
 
 
2727
        /* Wakeup the sweeper:
 
2728
         * We want the sweeper to check if there is anything to do,
 
2729
         * so we must wake it up.
 
2730
         * Once it has done all it can, it will go back to sleep.
 
2731
         * This should be good enough.
 
2732
         *
 
2733
         * NOTE: I all cases, we do not wait if the sweeper is in
 
2734
         * error state.
 
2735
         */
 
2736
        if (db->db_sw_idle) {
 
2737
                u_int check_count = db->db_sw_check_count;
 
2738
 
 
2739
                for (;;) {
 
2740
                        xt_wakeup_sweeper(db);
 
2741
                        if (!db->db_sw_thread || db->db_sw_idle != XT_THREAD_IDLE || check_count != db->db_sw_check_count)
 
2742
                                break;
 
2743
                        xt_sleep_milli_second(10);
 
2744
                }
 
2745
        }
 
2746
 
 
2747
        /* Wait for the sweeper to become idle: */
 
2748
        xt_lock_mutex(self, &db->db_sw_lock);
 
2749
        pushr_(xt_unlock_mutex, &db->db_sw_lock);
 
2750
        while (db->db_sw_thread && !db->db_sw_idle) {
 
2751
                xt_timed_wait_cond(self, &db->db_sw_cond, &db->db_sw_lock, 10);
 
2752
        }
 
2753
        freer_(); // xt_unlock_mutex(&db->db_sw_lock)
 
2754
 
 
2755
        /* Wait for the writer to write out all operations on the table:
 
2756
         * We also do not wait for the writer if it is in
 
2757
         * error state.
 
2758
         */
 
2759
        time_t start_time = time(NULL);
 
2760
        while (db->db_wr_thread && 
 
2761
                db->db_wr_idle != XT_THREAD_INERR &&
 
2762
                XTTableSeq::xt_op_is_before(tab->tab_head_op_seq+1, tab->tab_seq.ts_next_seq)) {
 
2763
                if (timeout && time(NULL) > start_time + timeout) {
 
2764
                        char    name_buf[XT_TABLE_NAME_BUF_SIZE];
 
2765
 
 
2766
                        xt_tab_make_table_name(tab->tab_name, name_buf, XT_TABLE_NAME_BUF_SIZE);
 
2767
                        xt_logf(XT_WARNING, "Timeout waiting for writer while flushing %s\n", name_buf);
 
2768
                        break;
 
2769
                }
 
2770
 
 
2771
                /* Flush the log, in case this is holding up the
 
2772
                 * writer!
 
2773
                 */
 
2774
                if (!db->db_xlog.xlog_flush(self))
 
2775
                        xt_throw(self);
 
2776
 
 
2777
                xt_lock_mutex(self, &db->db_wr_lock);
 
2778
                pushr_(xt_unlock_mutex, &db->db_wr_lock);
 
2779
                db->db_wr_thread_waiting++;
 
2780
                /*
 
2781
                 * Wake the writer if it is sleeping. In order to
 
2782
                 * flush a table we must wait for the writer to complete
 
2783
                 * committing all the changes in the table to the database.
 
2784
                 */
 
2785
                if (db->db_wr_idle) {
 
2786
                        if (!xt_broadcast_cond_ns(&db->db_wr_cond))
 
2787
                                xt_log_and_clear_exception_ns();
 
2788
                }
 
2789
 
 
2790
                freer_(); // xt_unlock_mutex(&db->db_wr_lock)
 
2791
                xt_sleep_milli_second(10);
 
2792
 
 
2793
                xt_lock_mutex(self, &db->db_wr_lock);
 
2794
                pushr_(xt_unlock_mutex, &db->db_wr_lock);
 
2795
                db->db_wr_thread_waiting--;
 
2796
                freer_(); // xt_unlock_mutex(&db->db_wr_lock)
 
2797
        }
 
2798
 
 
2799
        xt_flush_table(self, ot);
 
2800
}
 
2801
 
 
2802
xtBool XTFlushRecRowTask::tk_task(XTThreadPtr thread)
 
2803
{
 
2804
        XTOpenTablePtr ot;
 
2805
 
 
2806
        /* {TASK-TABLE-GONE}
 
2807
         * If this task was scheduled before the table was deleted
 
2808
         * or renamed, then we may be caught holding an invalid
 
2809
         * table (frt_table) object.
 
2810
         *
 
2811
         * As a result we just use the ID, to get the open table
 
2812
         * pointer.
 
2813
         *
 
2814
         * If the tables are not identical, then there is no point
 
2815
         * in proceeding...
 
2816
         */
 
2817
        if (!(xt_db_open_pool_table_ns(&ot, frt_table->tab_db, frt_table->tab_id)))
 
2818
                return FAILED;
 
2819
 
 
2820
        if (!ot) {
 
2821
                /* Can happen if the table has been dropped: */
 
2822
                if (thread->t_exception.e_xt_err)
 
2823
                        xt_log_and_clear_exception(thread);
 
2824
                xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table was not found\n", (u_long) frt_table->tab_id);
 
2825
                xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
 
2826
                return OK;
 
2827
        }
 
2828
 
 
2829
        if (ot->ot_table != frt_table) {
 
2830
                /* Can happen if the table has been renamed: */
 
2831
                if (thread->t_exception.e_xt_err)
 
2832
                        xt_log_and_clear_exception(thread);
 
2833
                xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table has been renamed\n", (u_long) frt_table->tab_id);
 
2834
                xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
 
2835
                goto table_gone;
 
2836
        }
 
2837
 
 
2838
        if (!xt_flush_record_row(ot, NULL, FALSE)) {
 
2839
                xt_db_return_table_to_pool_ns(ot);
 
2840
                return FAILED;
 
2841
        }
 
2842
 
 
2843
        table_gone:
 
2844
        xt_db_return_table_to_pool_ns(ot);
 
2845
        return OK;
 
2846
}
 
2847
 
 
2848
void XTFlushRecRowTask::tk_reference()
 
2849
{
 
2850
        xt_heap_reference_ns(frt_table);
 
2851
}
 
2852
 
 
2853
void XTFlushRecRowTask::tk_release()
 
2854
{
 
2855
        xt_heap_release_ns(frt_table);
 
2856
}
 
2857
 
 
2858
/*
 
2859
 * Start a flush of this file in background.
 
2860
 */
 
2861
xtPublic xtBool xt_async_flush_record_row(XTTableHPtr tab, xtBool notify_complete, XTThreadPtr thread)
 
2862
{
 
2863
        if (tab->tab_rec_flush_task->tk_is_running())
 
2864
                return OK;
 
2865
 
 
2866
        /* Run the task: */
 
2867
        return xt_run_async_task(tab->tab_rec_flush_task, notify_complete, FALSE, thread, tab->tab_db);
 
2868
}
 
2869
 
 
2870
xtPublic xtBool xt_flush_record_row(XTOpenTablePtr ot, off_t *bytes_flushed, xtBool have_table_lock)
 
2871
{
 
2872
        XTTableHeadDRec                 rec_head;
 
2873
        XTTableHPtr                             tab = ot->ot_table;
 
2874
        off_t                                   to_flush;
 
2875
#ifdef TRACE_FLUSH_TABLE
 
2876
        time_t                                  tnow = 0;
 
2877
#endif
 
2878
 
 
2879
        if (!xt_begin_checkpoint(tab->tab_db, have_table_lock, ot->ot_thread))
 
2880
                return FAILED;
 
2881
 
 
2882
        xt_lock_mutex_ns(&tab->tab_rec_flush_lock);
 
2883
#ifdef XT_SORT_REC_WRITES
 
2884
        if (!xt_xres_delay_flush(ot, TRUE))
 
2885
                goto failed;
 
2886
#endif
 
2887
        xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_START_REC_ROW);
 
2888
 
 
2889
        ASSERT_NS(ot->ot_thread == xt_get_self());
 
2890
        /* Make sure that the table recovery point, in
 
2891
         * particular the operation ID is recorded
 
2892
         * before all other flush activity!
 
2893
         *
 
2894
         * This is because only operations after the
 
2895
         * recovery point in the header are applied
 
2896
         * to the table on recovery.
 
2897
         *
 
2898
         * So the operation ID is recorded before the
 
2899
         * flush activity, and written after all is done.
 
2900
         */
 
2901
        xt_tab_store_header(ot, &rec_head);
 
2902
 
 
2903
        /* Write the table header: */
 
2904
        if (tab->tab_flush_pending) {
 
2905
                tab->tab_flush_pending = FALSE;
 
2906
 
 
2907
#ifdef TRACE_FLUSH_TABLE
 
2908
                tnow = time(NULL);
 
2909
                printf("FLUSH TABLE bytes=%lu %s\n", (u_long) tab->tab_bytes_to_flush, tab->tab_name->ps_path);
 
2910
                fflush(stdout);
 
2911
#endif
 
2912
                // Want to see how much was to be flushed in the debugger:
 
2913
                to_flush = tab->tab_bytes_to_flush;
 
2914
                tab->tab_bytes_to_flush = 0;
 
2915
                if (bytes_flushed)
 
2916
                        *bytes_flushed += to_flush;
 
2917
                
 
2918
#ifdef XT_REC_FLUSH_THRESHOLD
 
2919
                XTThreadPtr writer;
 
2920
 
 
2921
                /* Reset the writer's byte level: */
 
2922
                if ((writer = ot->ot_table->tab_db->db_wr_thread))
 
2923
                        tab->tab_rec_wr_last_flush = writer->st_statistics.st_rec.ts_write;
 
2924
#endif
 
2925
 
 
2926
                /* Flush the table data: */
 
2927
                if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags)) {
 
2928
                        if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread) ||
 
2929
                                !XT_FLUSH_RR_FILE(ot->ot_row_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread)) {
 
2930
                                tab->tab_flush_pending = TRUE;
 
2931
                                goto failed;
 
2932
                        }
 
2933
                }
 
2934
 
 
2935
                /* The header includes the operation number which
 
2936
                 * must be written AFTER all other data,
 
2937
                 * because operations will not be applied again.
 
2938
                 */
 
2939
                if (!tab_write_header(ot, &rec_head)) {
 
2940
                        tab->tab_flush_pending = TRUE;
 
2941
                        goto failed;
 
2942
                }
 
2943
        }
 
2944
 
 
2945
        /* Flush the auto-increment: */
 
2946
        if (xt_db_auto_increment_mode == 1) {
 
2947
                if (tab->tab_auto_inc != tab->tab_dic.dic_min_auto_inc) {
 
2948
                        tab->tab_dic.dic_min_auto_inc = tab->tab_auto_inc;
 
2949
                        if (!xt_tab_write_min_auto_inc(ot))
 
2950
                                goto failed;
 
2951
                }
 
2952
        }
 
2953
 
 
2954
        /* Mark this table as record/row flushed: */
 
2955
        xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_DONE_REC_ROW);
 
2956
 
 
2957
#ifdef TRACE_FLUSH_TABLE
 
2958
        if (tnow) {
 
2959
                printf("flush table (%d) %s DONE\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
 
2960
                fflush(stdout);
 
2961
        }
 
2962
#endif
 
2963
 
 
2964
        xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
 
2965
 
 
2966
        if (!xt_end_checkpoint(tab->tab_db, ot->ot_thread, NULL))
 
2967
                return FAILED;
 
2968
        return OK;
 
2969
        
 
2970
        failed:
 
2971
        xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_STOP_REC_ROW);
 
2972
 
 
2973
#ifdef TRACE_FLUSH_TABLE
 
2974
        if (tnow) {
 
2975
                printf("flush table (%d) %s FAILED\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
 
2976
                fflush(stdout);
 
2977
        }
 
2978
#endif
 
2979
 
 
2980
        xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
 
2981
        return FAILED;
 
2982
}
 
2983
 
 
2984
xtPublic void xt_flush_table(XTThreadPtr self, XTOpenTablePtr ot)
 
2985
{
 
2986
        /* GOTCHA {FLUSH-BUG}: This bug was difficult to find.
 
2987
         * It occured on Windows in the multi_update
 
2988
         * test, sometimes.
 
2989
         *
 
2990
         * What happens is the checkpointer starts to
 
2991
         * flush the table, and gets to the 
 
2992
         * XT_FLUSH_RR_FILE part.
 
2993
         *
 
2994
         * Then a rename occurs, and the user thread
 
2995
         * flushes the table, and goes through and
 
2996
         * writes the table header, with the most
 
2997
         * recent table operation (the last operation
 
2998
         * that occurred).
 
2999
         *
 
3000
         * The checkpointer the completes and
 
3001
         * also writes the header, but with old
 
3002
         * values (as read in xt_tab_store_header()).
 
3003
         *
 
3004
         * The then user thread continues, and
 
3005
         * reopens the table after rename.
 
3006
         * On reopen, it reads the old value from the header,
 
3007
         * and sets the current operation number.
 
3008
         *
 
3009
         * Now there is a problem in the table cache,
 
3010
         * because some cache pages have operation numbers
 
3011
         * that are greater than current operation
 
3012
         * number!
 
3013
         *
 
3014
         * This later lead to the free-er hanging while
 
3015
         * it waited for an operation to be 
 
3016
         * written to the disk that never would be.
 
3017
         * This is because a page can only be freed when
 
3018
         * the head operation number has passed the
 
3019
         * page operation number.
 
3020
         *
 
3021
         * Which indicates that the page has been written
 
3022
         * to disk.
 
3023
         *
 
3024
         * THE BUG FIX:
 
3025
         * As a result I now use mutex so that only one
 
3026
         * thread can flush at a time.
 
3027
         */
 
3028
 
 
3029
        if (!xt_flush_record_row(ot, NULL, FALSE))
 
3030
                xt_throw(self);
 
3031
 
 
3032
        /* This was before the table data flush,
 
3033
         * (after xt_tab_store_header() above,
 
3034
         * but I don't think it makes any difference.
 
3035
         * Because in the checkpointer it was at this
 
3036
         * position.
 
3037
         */
 
3038
        if (!xt_flush_indices(ot, NULL, FALSE, NULL))
 
3039
                xt_throw(self);
 
3040
 
 
3041
}
 
3042
 
 
3043
static XTOpenTablePtr tab_open_table(XTTableHPtr tab)
 
3044
{
 
3045
        volatile XTOpenTablePtr ot;
 
3046
        XTThreadPtr                             self;
 
3047
 
 
3048
        if (!(ot = (XTOpenTablePtr) xt_malloc_ns(sizeof(XTOpenTableRec))))
 
3049
                return NULL;
 
3050
        memset(ot, 0, offsetof(XTOpenTableRec, ot_ind_wbuf));
 
3051
 
 
3052
        ot->ot_seq_page = NULL;
 
3053
        ot->ot_seq_data = NULL;
 
3054
 
 
3055
        self = xt_get_self();
 
3056
        try_(a) {
 
3057
                xt_heap_reference(self, tab);
 
3058
                ot->ot_table = tab;
 
3059
                ot->ot_row_file = xt_open_file(self, ot->ot_table->tab_row_file->fil_path, xt_row_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_row_file_grow_size);
 
3060
                ot->ot_rec_file = xt_open_file(self, ot->ot_table->tab_rec_file->fil_path, xt_rec_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_data_file_grow_size);
 
3061
#ifdef XT_USE_DIRECT_IO_ON_INDEX
 
3062
                ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK | XT_FS_DIRECT_IO, XT_INDEX_PAGE_SIZE*256);
 
3063
#else
 
3064
                ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
 
3065
#endif
 
3066
        }
 
3067
        catch_(a) {
 
3068
                ;
 
3069
        }
 
3070
        cont_(a);
 
3071
 
 
3072
        if (!ot->ot_table || !ot->ot_row_file || !ot->ot_rec_file)
 
3073
                goto failed;
 
3074
 
 
3075
        if (!(ot->ot_row_rbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
 
3076
                goto failed;
 
3077
        ot->ot_row_rbuf_size = ot->ot_table->tab_dic.dic_rec_size;
 
3078
        if (!(ot->ot_row_wbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
 
3079
                goto failed;
 
3080
        ot->ot_row_wbuf_size = ot->ot_table->tab_dic.dic_rec_size;
 
3081
 
 
3082
        /* Cache this stuff to speed access a bit: */
 
3083
        ot->ot_rec_fixed = ot->ot_table->tab_dic.dic_rec_fixed;
 
3084
        ot->ot_rec_size = ot->ot_table->tab_dic.dic_rec_size;
 
3085
 
 
3086
        return ot;
 
3087
 
 
3088
        failed:
 
3089
        tab_close_table(ot);
 
3090
        return NULL;
 
3091
}
 
3092
 
 
3093
xtPublic XTOpenTablePtr xt_open_table(XTTableHPtr tab)
 
3094
{
 
3095
        return tab_open_table(tab);
 
3096
}
 
3097
 
 
3098
xtPublic void xt_close_table(XTOpenTablePtr ot, xtBool flush, xtBool have_table_lock)
 
3099
{
 
3100
        if (flush) {
 
3101
                if (!xt_flush_record_row(ot, NULL, have_table_lock))
 
3102
                        xt_log_and_clear_exception_ns();
 
3103
 
 
3104
                if (!xt_flush_indices(ot, NULL, have_table_lock, NULL))
 
3105
                        xt_log_and_clear_exception_ns();
 
3106
        }
 
3107
        tab_close_table(ot);
 
3108
}
 
3109
 
 
3110
static int tab_use_table_by_id(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id)
 
3111
{
 
3112
        XTTableEntryPtr te_ptr;
 
3113
        XTTableHPtr             tab = NULL;
 
3114
        int                             r = XT_TAB_OK;
 
3115
        char                    path[PATH_MAX];
 
3116
 
 
3117
        if (!db)
 
3118
                xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
 
3119
        xt_ht_lock(self, db->db_tables);
 
3120
        pushr_(xt_ht_unlock, db->db_tables);
 
3121
 
 
3122
        te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
 
3123
        if (te_ptr) {
 
3124
                if (!(tab = te_ptr->te_table)) {
 
3125
                        /* Open the table: */
 
3126
                        xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
 
3127
                        xt_add_dir_char(PATH_MAX, path);
 
3128
                        xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
 
3129
                        r = tab_new_handle(self, &tab, db, tab_id, (XTPathStrPtr) path, TRUE, NULL);
 
3130
                }
 
3131
        }
 
3132
        else
 
3133
                r = XT_TAB_NOT_FOUND;
 
3134
 
 
3135
        if (tab)
 
3136
                xt_heap_reference(self, tab);
 
3137
        *r_tab = tab;
 
3138
 
 
3139
        freer_(); // xt_ht_unlock(db->db_tables)
 
3140
        return r;
 
3141
}
 
3142
 
 
3143
xtPublic XTTableHPtr xt_use_table_by_id(XTThreadPtr self, XTDatabaseHPtr db, xtTableID tab_id, int *result)
 
3144
{
 
3145
        XTTableHPtr tab;
 
3146
        int                     r;
 
3147
 
 
3148
        r = tab_use_table_by_id(self, &tab, db, tab_id);
 
3149
        if (result) {
 
3150
                if (r != XT_TAB_OK) {
 
3151
                        *result = r;
 
3152
                        return NULL;
 
3153
                }
 
3154
        }
 
3155
        else {
 
3156
                switch (r) {
 
3157
                        case XT_TAB_NOT_FOUND:
 
3158
                                return NULL;
 
3159
                        case XT_TAB_NO_DICTIONARY:
 
3160
                                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, (u_long) tab_id);
 
3161
                        case XT_TAB_POOL_CLOSED:
 
3162
                                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
 
3163
                        default:
 
3164
                                break;
 
3165
                }
 
3166
        }
 
3167
        
 
3168
        return tab;
 
3169
}
 
3170
 
 
3171
xtPublic XTTableHPtr xt_use_table_by_id_ns(XTDatabaseHPtr db, xtTableID tab_id)
 
3172
{
 
3173
        XTTableHPtr     tab;
 
3174
        XTThreadPtr     self = xt_get_self();
 
3175
 
 
3176
        try_(a) {
 
3177
                tab = xt_use_table_by_id(self, db, tab_id, NULL);
 
3178
        }
 
3179
        catch_(a) {
 
3180
                tab = NULL;
 
3181
        }
 
3182
        cont_(a);
 
3183
        return tab;
 
3184
}
 
3185
 
 
3186
/* The fixed part of the record is already in the row buffer.
 
3187
 * This function loads the extended part, expanding the row
 
3188
 * buffer if necessary.
 
3189
 */
 
3190
xtPublic xtBool xt_tab_load_ext_data(XTOpenTablePtr ot, xtRecordID load_rec_id, xtWord1 *buffer, u_int cols_req)
 
3191
{
 
3192
        size_t                                  log_size;
 
3193
        xtLogID                                 log_id;
 
3194
        xtLogOffset                             log_offset;
 
3195
        xtWord1                                 save_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
 
3196
        xtBool                                  retried = FALSE;
 
3197
        XTactExtRecEntryDPtr    ext_data_ptr;
 
3198
        size_t                                  log_size2;
 
3199
        xtTableID                               curr_tab_id;
 
3200
        xtRecordID                              curr_rec_id;
 
3201
 
 
3202
        log_size = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->re_log_dat_siz_4);
 
3203
        XT_GET_LOG_REF(log_id, log_offset, (XTTabRecExtDPtr) ot->ot_row_rbuffer);
 
3204
 
 
3205
        if (ot->ot_rec_size + log_size > ot->ot_row_rbuf_size) {
 
3206
                if (!xt_realloc_ns((void **) &ot->ot_row_rbuffer, ot->ot_rec_size + log_size))
 
3207
                        return FAILED;
 
3208
                ot->ot_row_rbuf_size = ot->ot_rec_size + log_size;
 
3209
        }
 
3210
 
 
3211
        /* Read the extended part first: */
 
3212
        ext_data_ptr = (XTactExtRecEntryDPtr) (ot->ot_row_rbuffer + ot->ot_rec_size - offsetof(XTactExtRecEntryDRec, er_data));
 
3213
 
 
3214
        /* Save the data which the header will overwrite: */
 
3215
        memcpy(save_buffer, ext_data_ptr, offsetof(XTactExtRecEntryDRec, er_data));
 
3216
        
 
3217
        reread:
 
3218
        if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
 
3219
                xt_tab_read_ext_record(ot->ot_table, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr);
 
3220
        else {
 
3221
                if (!ot->ot_thread->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr, ot->ot_thread))
 
3222
                        goto retry_read;
 
3223
        }
 
3224
 
 
3225
        log_size2 = XT_GET_DISK_4(ext_data_ptr->er_data_size_4);
 
3226
        curr_tab_id = XT_GET_DISK_4(ext_data_ptr->er_tab_id_4);
 
3227
        curr_rec_id = XT_GET_DISK_4(ext_data_ptr->er_rec_id_4);
 
3228
 
 
3229
        if (log_size2 != log_size || curr_tab_id != ot->ot_table->tab_id || curr_rec_id != load_rec_id) {
 
3230
                /* [(3)] This can happen in the following circumstances:
 
3231
                 * - A new record is created, but the data log is not
 
3232
                 * flushed.
 
3233
                 * - The server quits.
 
3234
                 * - On restart the transaction is rolled back, but the data record
 
3235
                 *   was not written, so later a new record could be written at this
 
3236
                 *   location.
 
3237
                 * - Later the sweeper tries to cleanup this record, and finds
 
3238
                 *   that a different record has been written at this position.
 
3239
                 *
 
3240
                 * NOTE: Index entries can only be written to disk for records
 
3241
                 *       that have been committed to the disk, because uncommitted
 
3242
                 *       records may not exist in order to remove the index entry
 
3243
                 *       on cleanup.
 
3244
                 */
 
3245
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_EXT_RECORD);
 
3246
                goto retry_read;
 
3247
        }
 
3248
 
 
3249
        /* Restore the saved area: */
 
3250
        memcpy(ext_data_ptr, save_buffer, offsetof(XTactExtRecEntryDRec, er_data));
 
3251
 
 
3252
        if (retried)
 
3253
                xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
 
3254
        return myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req);
 
3255
 
 
3256
        retry_read:
 
3257
        if (!retried) {
 
3258
                /* (1) It may be that reading the log fails because the garbage collector
 
3259
                 * has moved the record since we determined the location.
 
3260
                 * We handle this here, by re-reading the data the garbage collector
 
3261
                 * would have updated.
 
3262
                 *
 
3263
                 * (2) It may also happen that a new record is just being updated or
 
3264
                 * inserted. It is possible that the handle part of the record
 
3265
                 * has been written, but not yet the overflow.
 
3266
                 * This means that repeating the read attempt could work.
 
3267
                 *
 
3268
                 * (3) The extended data has been written by another handler and not yet
 
3269
                 * flushed. This should not happen because on committed extended
 
3270
                 * records are read, and all data should be flushed before
 
3271
                 * commit!
 
3272
                 *
 
3273
                 * NOTE: (2) above is not a problem when versioning is working
 
3274
                 * correctly. In this case, we should never try to read the extended
 
3275
                 * part of an uncommitted record (belonging to some other thread/
 
3276
                 * transaction).
 
3277
                 */
 
3278
                XTTabRecExtDRec rec_buf;
 
3279
 
 
3280
                xt_lock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
 
3281
                retried = TRUE;
 
3282
 
 
3283
                if (!xt_tab_get_rec_data(ot, load_rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &rec_buf))
 
3284
                        goto failed;
 
3285
 
 
3286
                XT_GET_LOG_REF(log_id, log_offset, &rec_buf);
 
3287
                goto reread;
 
3288
        }
 
3289
 
 
3290
        failed:
 
3291
        if (retried)
 
3292
                xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
 
3293
        return FAILED;
 
3294
}
 
3295
 
 
3296
xtPublic xtBool xt_tab_put_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
 
3297
{
 
3298
        register XTTableHPtr    tab = ot->ot_table;
 
3299
 
 
3300
        ASSERT_NS(rec_id);
 
3301
 
 
3302
        return tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread);
 
3303
}
 
3304
 
 
3305
xtPublic xtBool xt_tab_put_log_op_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer)
 
3306
{
 
3307
        register XTTableHPtr    tab = ot->ot_table;
 
3308
        xtOpSeqNo                               op_seq;
 
3309
 
 
3310
        ASSERT_NS(rec_id);
 
3311
 
 
3312
        if (status == XT_LOG_ENT_REC_MOVED) {
 
3313
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, &op_seq, TRUE, ot->ot_thread))
 
3314
                        return FAILED;
 
3315
        }
 
3316
#ifdef DEBUG
 
3317
        else if (status == XT_LOG_ENT_REC_CLEANED_1) {
 
3318
                ASSERT_NS(0);   // shouldn't be used anymore
 
3319
        }
 
3320
#endif
 
3321
        else {
 
3322
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, &op_seq, TRUE, ot->ot_thread))
 
3323
                        return FAILED;
 
3324
        }
 
3325
 
 
3326
        return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
 
3327
}
 
3328
 
 
3329
xtPublic xtBool xt_tab_put_log_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
 
3330
{
 
3331
        register XTTableHPtr    tab = ot->ot_table;
 
3332
 
 
3333
        ASSERT_NS(rec_id);
 
3334
 
 
3335
        if (status == XT_LOG_ENT_REC_MOVED) {
 
3336
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, op_seq, TRUE, ot->ot_thread))
 
3337
                        return FAILED;
 
3338
        }
 
3339
        else {
 
3340
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread))
 
3341
                        return FAILED;
 
3342
        }
 
3343
 
 
3344
        return xt_xlog_modify_table(tab->tab_id, status, *op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
 
3345
}
 
3346
 
 
3347
xtPublic xtBool xt_tab_get_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer)
 
3348
{
 
3349
        register XTTableHPtr    tab = ot->ot_table;
 
3350
 
 
3351
        ASSERT_NS(rec_id);
 
3352
 
 
3353
        return tab->tab_recs.xt_tc_read(ot->ot_rec_file, rec_id, (size_t) size, buffer, ot->ot_thread);
 
3354
}
 
3355
 
 
3356
/*
 
3357
 * Note: this function grants locks even to transactions that
 
3358
 * are not specifically waiting for this transaction.
 
3359
 * This is required, because all threads waiting for 
 
3360
 * a lock should be considered "equal". In other words,
 
3361
 * they should not have to wait for the "right" transaction
 
3362
 * before they get the lock, or it will turn into a
 
3363
 * race to wait for the correct transaction.
 
3364
 *
 
3365
 * A transaction T1 can end up waiting for the wrong transaction
 
3366
 * T2, because T2 has released the lock, and given it to T3.
 
3367
 * Of course, T1 will wake up soon and realize this, but
 
3368
 * it is a matter of timing.
 
3369
 *
 
3370
 * The main point is that T2 has release the lock because
 
3371
 * it has ended (see {RELEASING-LOCKS} for more details)
 
3372
 * and therefore, there is no danger of it claiming the
 
3373
 * lock again, which can lead to a deadlock if T1 is
 
3374
 * given the lock instead of T3 in the example above.
 
3375
 * Then, if T2 tries to regain the lock before T1
 
3376
 * realizes that it has the lock.
 
3377
 */
 
3378
//static xtBool tab_get_lock_after_wait(XTThreadPtr thread, XTLockWaitPtr lw)
 
3379
//{
 
3380
//      register XTTableHPtr    tab = lw->lw_ot->ot_table;
 
3381
 
 
3382
        /* {ROW-LIST-LOCK}
 
3383
         * I don't believe this lock is required. If it is, please explain why!!
 
3384
         * XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[gl->lw_row_id % XT_ROW_RWLOCKS], thread);
 
3385
         *
 
3386
         * With the old row lock implementation a XT_TAB_ROW_WRITE_LOCK was required because
 
3387
         * the row locking did not have its own locks.
 
3388
         * The new list locking has its own locks. I was using XT_TAB_ROW_READ_LOCK,
 
3389
         * but i don't think this is required.
 
3390
         */
 
3391
//      return tab->tab_locks.xt_set_temp_lock(lw->lw_ot, lw, &lw->lw_thread->st_lock_list);
 
3392
//}
 
3393
 
 
3394
/*
 
3395
 * NOTE: Previously this function did not gain the row lock.
 
3396
 * If this change is a problem, please document why!
 
3397
 * The previously implementation did wait until no lock was on the
 
3398
 * row.
 
3399
 *
 
3400
 * I am thinking that it is simply a good idea to grab the lock,
 
3401
 * instead of waiting for no lock, before the retry. But it could
 
3402
 * result in locking more than required!
 
3403
 */
 
3404
static xtBool tab_wait_for_update(register XTOpenTablePtr ot, xtRowID row_id, xtXactID xn_id, XTThreadPtr thread)
 
3405
{
 
3406
        XTLockWaitRec   lw;
 
3407
        XTXactWaitRec   xw;
 
3408
        xtBool                  ok;
 
3409
                                
 
3410
        xw.xw_xn_id = xn_id;
 
3411
 
 
3412
        lw.lw_thread = thread;
 
3413
        lw.lw_ot = ot;
 
3414
        lw.lw_row_id = row_id;
 
3415
        lw.lw_row_updated = FALSE;
 
3416
 
 
3417
        /* First try to get the lock: */
 
3418
        if (!ot->ot_table->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list))
 
3419
                return FAILED;
 
3420
        if (lw.lw_curr_lock != XT_NO_LOCK)
 
3421
                /* Wait for the lock, then the transaction: */
 
3422
                ok = xt_xn_wait_for_xact(thread, &xw, &lw);
 
3423
        else
 
3424
                /* Just wait for the transaction: */
 
3425
                ok = xt_xn_wait_for_xact(thread, &xw, NULL);
 
3426
        
 
3427
#ifdef DEBUG_LOCK_QUEUE
 
3428
        ot->ot_table->tab_locks.rl_check(&lw);
 
3429
#endif
 
3430
        return ok;
 
3431
}
 
3432
 
 
3433
/* {WAIT-FOR}
 
3434
 * XT_OLD - The record is old. No longer visible because there is
 
3435
 * newer committed record before it in the record list.
 
3436
 * This is a special case of FALSE (the record is not visible).
 
3437
 * (see {WAIT-FOR} for details).
 
3438
 * It is significant because if we find too many of these when
 
3439
 * searching for records, then we have reason to believe the
 
3440
 * sweeper is far behind. This can happen in a test like this:
 
3441
 * runTest(INCREMENT_TEST, 2, INCREMENT_TEST_UPDATE_COUNT);
 
3442
 * What happens is T1 detects an updated row by T2,
 
3443
 * but T2 has not committed yet.
 
3444
 * It waits for T2. T2 commits and updates again before T1
 
3445
 * can update.
 
3446
 *
 
3447
 * Of course if we got a lock on the row when T2 quits, then
 
3448
 * this would not happen!
 
3449
 */
 
3450
 
 
3451
/*
 
3452
 * Is a record visible?
 
3453
 * Returns TRUE, FALSE, XT_ERR.
 
3454
 *
 
3455
 * TRUE - The record is visible.
 
3456
 * FALSE - The record is not visible.
 
3457
 * XT_ERR - An exception (error) occurred.
 
3458
 * XT_NEW - The most recent variation of this row has been returned
 
3459
 * and is to be used instead of the input!
 
3460
 * XT_REREAD - Re-read the record, and try again.
 
3461
 *
 
3462
 * Basically, a record is visible if it was committed on or before
 
3463
 * the transactions "visible time" (st_visible_time), and there
 
3464
 * are no other visible records before this record in the
 
3465
 * variation chain for the record.
 
3466
 *
 
3467
 * This holds in general, but you don't always get to see the
 
3468
 * visible record (as defined in this sence).
 
3469
 *
 
3470
 * On any kind of update (SELECT FOR UPDATE, UPDATE or DELETE), you
 
3471
 * get to see the most recent variation of the row!
 
3472
 *
 
3473
 * So on update, this function will wait if necessary for a recent
 
3474
 * update to be committed.
 
3475
 *
 
3476
 * So an update is a kind of "committed read" with a wait for
 
3477
 * uncommitted records.
 
3478
 *
 
3479
 * The result:
 
3480
 * - INSERTS may not seen by the update read, depending on when
 
3481
 *   they occur.
 
3482
 * - Records may be returned in non-index order.
 
3483
 * - New records returned must be checked again by an index scan
 
3484
 *   to make sure they conform to the condition!
 
3485
 * 
 
3486
 * CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20), 
 
3487
 * index(Value, Name)) ENGINE=pbxt;
 
3488
 * INSERT test_tab values(4, 2, 'D');
 
3489
 * INSERT test_tab values(5, 2, 'E');
 
3490
 * INSERT test_tab values(6, 2, 'F');
 
3491
 * INSERT test_tab values(7, 2, 'G');
 
3492
 * 
 
3493
 * -- C1
 
3494
 * begin;
 
3495
 * select * from test_tab where id = 6 for update;
 
3496
 * -- C2
 
3497
 * begin;
 
3498
 * select * from test_tab where value = 2 order by value, name for update;
 
3499
 * -- C1
 
3500
 * update test_tab set Name = 'A' where id = 7;
 
3501
 * commit;
 
3502
 * -- C2
 
3503
 * Result order D, E, F, A.
 
3504
 *
 
3505
 * But Jim does it like this, so it should be OK.
 
3506
 */
 
3507
static int tab_visible(register XTOpenTablePtr ot, XTTabRecHeadDPtr rec_head, xtRecordID *new_rec_id)
 
3508
{
 
3509
        XTThreadPtr                             thread = ot->ot_thread;
 
3510
        xtXactID                                xn_id;
 
3511
        XTTabRecHeadDRec                var_head;
 
3512
        xtRowID                                 row_id;
 
3513
        xtRecordID                              var_rec_id;
 
3514
        register XTTableHPtr    tab;
 
3515
        xtBool                                  wait = FALSE;
 
3516
        xtXactID                                wait_xn_id = 0;
 
3517
#ifdef TRACE_VARIATIONS
 
3518
        char                                    t_buf[500];
 
3519
        int                                             len;
 
3520
#endif
 
3521
        int                                             result = TRUE;
 
3522
        xtBool                                  rec_clean;
 
3523
        xtRecordID                              invalid_rec;
 
3524
 
 
3525
        retry:
 
3526
        /* It can be that between the time that I read the index,
 
3527
         * and the time that I try to access the
 
3528
         * record, that the record is removed by
 
3529
         * the sweeper!
 
3530
         */
 
3531
        if (XT_REC_NOT_VALID(rec_head->tr_rec_type_1))
 
3532
                return FALSE;
 
3533
 
 
3534
        row_id = XT_GET_DISK_4(rec_head->tr_row_id_4);
 
3535
 
 
3536
        /* This can happen if the row has been removed, and
 
3537
         * reused:
 
3538
         */
 
3539
        if (ot->ot_curr_row_id && row_id != ot->ot_curr_row_id)
 
3540
                return FALSE;
 
3541
 
 
3542
#ifdef TRACE_VARIATIONS
 
3543
        len = sprintf(t_buf, "row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
 
3544
#endif
 
3545
        if (!(rec_clean = XT_REC_IS_CLEAN(rec_head->tr_rec_type_1))) {
 
3546
                /* The record is not clean, which means it has not been swept.
 
3547
                 * So we have to check if it is visible.
 
3548
                 */
 
3549
                xn_id = XT_GET_DISK_4(rec_head->tr_xact_id_4);
 
3550
                switch (xt_xn_status(ot, xn_id, ot->ot_curr_rec_id)) {
 
3551
                        case XT_XN_VISIBLE:
 
3552
                                break;
 
3553
                        case XT_XN_NOT_VISIBLE:
 
3554
                                if (ot->ot_for_update) {
 
3555
                                        /* It is visible, only if it is an insert,
 
3556
                                         * which means if has no previous variation.
 
3557
                                         * Note, if an insert is updated, the record
 
3558
                                         * should be overwritten (TODO - check this).
 
3559
                                         */
 
3560
                                        var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
 
3561
                                        if (!var_rec_id)
 
3562
                                                break;
 
3563
#ifdef TRACE_VARIATIONS
 
3564
                                        if (len <= 450)
 
3565
                                                len += sprintf(t_buf+len, "OTHER COMMIT (OVERWRITTEN) T%d\n", (int) xn_id);
 
3566
                                        xt_ttracef(thread, "%s", t_buf);
 
3567
#endif
 
3568
                                }
 
3569
#ifdef TRACE_VARIATIONS
 
3570
                                else {
 
3571
                                        if (len <= 450)
 
3572
                                                len += sprintf(t_buf+len, "OTHER COMMIT T%d\n", (int) xn_id);
 
3573
                                        xt_ttracef(thread, "%s", t_buf);
 
3574
                                }
 
3575
#endif
 
3576
                                /* {WAKE-SW}
 
3577
                                 * The record is not visible, although it has been committed.
 
3578
                                 * Clean the transaction ASAP.
 
3579
                                 */
 
3580
                                ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
 
3581
                                return FALSE;
 
3582
                        case XT_XN_ABORTED:
 
3583
                                /* {WAKE-SW}
 
3584
                                 * Reading an aborted record, this transaction
 
3585
                                 * must be cleaned up ASAP!
 
3586
                                 */
 
3587
                                ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
 
3588
#ifdef TRACE_VARIATIONS
 
3589
                                if (len <= 450)
 
3590
                                        len += sprintf(t_buf+len, "ABORTED T%d\n", (int) xn_id);
 
3591
                                xt_ttracef(thread, "%s", t_buf);
 
3592
#endif
 
3593
                                return FALSE;
 
3594
                        case XT_XN_MY_UPDATE:
 
3595
                                /* This is a record written by this transaction. */
 
3596
                                if (thread->st_is_update) {
 
3597
                                        /* Check that it was not written by the current update statement: */
 
3598
                                        if (XT_STAT_ID_MASK(ot->ot_update_id) == rec_head->tr_stat_id_1) {
 
3599
#ifdef TRACE_VARIATIONS
 
3600
                                                if (len <= 450)
 
3601
                                                        len += sprintf(t_buf+len, "MY UPDATE IN THIS STATEMENT T%d\n", (int) xn_id);
 
3602
                                                xt_ttracef(thread, "%s", t_buf);
 
3603
#endif
 
3604
                                                return FALSE;
 
3605
                                        }
 
3606
                                }
 
3607
                                ot->ot_curr_row_id = row_id;
 
3608
                                ot->ot_curr_updated = TRUE;
 
3609
                                if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
 
3610
                                        return XT_ERR;
 
3611
                                /* It is visible if it is at the front of the list.
 
3612
                                 * An update can end up not being at the front of the list
 
3613
                                 * if it is deleted afterwards!
 
3614
                                 */
 
3615
#ifdef TRACE_VARIATIONS
 
3616
                                if (len <= 450) {
 
3617
                                        if (var_rec_id == ot->ot_curr_rec_id)
 
3618
                                                len += sprintf(t_buf+len, "MY UPDATE T%d\n", (int) xn_id);
 
3619
                                        else
 
3620
                                                len += sprintf(t_buf+len, "MY UPDATE (OVERWRITTEN) T%d\n", (int) xn_id);
 
3621
                                }
 
3622
                                xt_ttracef(thread, "%s", t_buf);
 
3623
#endif
 
3624
                                return var_rec_id == ot->ot_curr_rec_id;
 
3625
                        case XT_XN_OTHER_UPDATE:
 
3626
                                if (ot->ot_for_update) {
 
3627
                                        /* If this is an insert, we are interested!
 
3628
                                         * Updated values are handled below. This is because
 
3629
                                         * the changed (new) records returned below are always
 
3630
                                         * followed (in the version chain) by the record
 
3631
                                         * we would have returned (if nothing had changed).
 
3632
                                         *
 
3633
                                         * As a result, we only return records here which have
 
3634
                                         * no "history". 
 
3635
                                         */
 
3636
                                        var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
 
3637
                                        if (!var_rec_id) {
 
3638
#ifdef TRACE_VARIATIONS
 
3639
                                                if (len <= 450)
 
3640
                                                        len += sprintf(t_buf+len, "OTHER INSERT (WAIT FOR) T%d\n", (int) xn_id);
 
3641
                                                xt_ttracef(thread, "%s", t_buf);
 
3642
#endif
 
3643
                                                if (!tab_wait_for_update(ot, row_id, xn_id, thread))
 
3644
                                                        return XT_ERR;
 
3645
                                                if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
 
3646
                                                        return XT_ERR;
 
3647
                                                rec_head = &var_head;
 
3648
                                                goto retry;
 
3649
                                        }
 
3650
                                }
 
3651
#ifdef TRACE_VARIATIONS
 
3652
                                if (len <= 450)
 
3653
                                        len += sprintf(t_buf+len, "OTHER UPDATE T%d\n", (int) xn_id);
 
3654
                                xt_ttracef(thread, "%s", t_buf);
 
3655
#endif
 
3656
                                return FALSE;
 
3657
                        case XT_XN_REREAD:
 
3658
#ifdef TRACE_VARIATIONS
 
3659
                                if (len <= 450)
 
3660
                                        len += sprintf(t_buf+len, "REREAD?! T%d\n", (int) xn_id);
 
3661
                                xt_ttracef(thread, "%s", t_buf);
 
3662
#endif
 
3663
                                return XT_REREAD;
 
3664
                }
 
3665
        }
 
3666
 
 
3667
        /* Follow the variation chain until we come to this record.
 
3668
         * If it is not the first visible variation then
 
3669
         * it is not visible at all. If it in not found on the
 
3670
         * variation chain, it is also not visible.
 
3671
         */
 
3672
        tab = ot->ot_table;
 
3673
 
 
3674
        retry_2:
 
3675
 
 
3676
#ifdef XT_USE_LIST_BASED_ROW_LOCKS
 
3677
        /* The list based row locks used there own locks, so
 
3678
         * it is not necessary to get a write lock here.
 
3679
         */
 
3680
        XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3681
#else
 
3682
        if (ot->ot_for_update)
 
3683
                XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3684
        else
 
3685
                XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3686
#endif
 
3687
 
 
3688
        invalid_rec = 0;
 
3689
        retry_3:
 
3690
        if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
 
3691
                goto failed;
 
3692
#ifdef TRACE_VARIATIONS
 
3693
        len += sprintf(t_buf+len, "ROW=%d", (int) row_id);
 
3694
#endif
 
3695
        while (var_rec_id != ot->ot_curr_rec_id) {
 
3696
                if (!var_rec_id) {
 
3697
#ifdef TRACE_VARIATIONS
 
3698
                        xt_ttracef(thread, "row=%d rec=%d NOT VISI not found in list\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3699
#endif
 
3700
                        goto not_found;
 
3701
                }
 
3702
                if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
 
3703
                        goto failed;
 
3704
#ifdef TRACE_VARIATIONS
 
3705
                if (len <= 450)
 
3706
                        len += sprintf(t_buf+len, " -> %d(%d)", (int) var_rec_id, (int) var_head.tr_rec_type_1);
 
3707
#endif
 
3708
                /* All clean records are visible, by all transactions: */
 
3709
                if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1)) {
 
3710
#ifdef TRACE_VARIATIONS
 
3711
                        xt_ttracef(thread, "row=%d rec=%d NOT VISI clean rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3712
#endif
 
3713
                        goto not_found;
 
3714
                }
 
3715
                if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) {
 
3716
#ifdef TRACE_VARIATIONS
 
3717
                        xt_ttracef(thread, "row=%d rec=%d NOT VISI free rec found?!\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3718
#endif
 
3719
                        /*
 
3720
                         * After an analysis we came to conclusion that this situation is
 
3721
                         * possible and valid. It can happen if index scan and row deletion
 
3722
                         * go in parallel:
 
3723
                         *
 
3724
                         *      Client Thread                                Sweeper
 
3725
                         *      -------------                                -------
 
3726
                         *   1. start index scan, lock the index file.
 
3727
                         *                                                2. start row deletion, wait for index lock
 
3728
                         *   3. unlock the index file, start search for 
 
3729
                         *      the valid version of the record
 
3730
                         *                                                4. delete the row, mark record as freed, 
 
3731
                         *                                                   but not yet cleaned by sweeper
 
3732
                         *   5. observe the record being freed
 
3733
                         *
 
3734
                         * after these steps we can get here, if the record was marked as free after
 
3735
                         * the tab_visible was entered by the scanning thread. 
 
3736
                         *
 
3737
                         */
 
3738
                        if (invalid_rec != var_rec_id) {
 
3739
                                /* This was "var_rec_id = invalid_rec", caused an infinite loop (bug #310184!) */
 
3740
                                invalid_rec = var_rec_id;
 
3741
                                goto retry_3;
 
3742
                        }
 
3743
                        /* Assume end of list. */
 
3744
                        goto not_found;
 
3745
                }
 
3746
 
 
3747
                /* This can happen if the row has been removed, and
 
3748
                 * reused:
 
3749
                 */
 
3750
                if (row_id != XT_GET_DISK_4(var_head.tr_row_id_4))
 
3751
                        goto not_found;
 
3752
 
 
3753
                xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
 
3754
                /* This variation is visibleif committed before this
 
3755
                 * transaction started, or updated by this transaction.
 
3756
                 *
 
3757
                 * We now know that this is the valid variation for
 
3758
                 * this record (for this table) for this transaction!
 
3759
                 * This will not change, unless the transaction
 
3760
                 * updates the record (again).
 
3761
                 *
 
3762
                 * So we can store this information as a hint, if
 
3763
                 * we see other variations belonging to this record,
 
3764
                 * then we can ignore them immediately!
 
3765
                 */
 
3766
                switch (xt_xn_status(ot, xn_id, var_rec_id)) {
 
3767
                        case XT_XN_VISIBLE:
 
3768
                                /* {WAKE-SW}
 
3769
                                 * We have encountered a record that has been overwritten, if the
 
3770
                                 * record has not been cleaned, then the sweeper is too far
 
3771
                                 * behind!
 
3772
                                 */
 
3773
                                if (!rec_clean)
 
3774
                                        ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
 
3775
#ifdef TRACE_VARIATIONS
 
3776
                                xt_ttracef(thread, "row=%d rec=%d NOT VISI committed rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3777
#endif
 
3778
                                goto not_found;
 
3779
                        case XT_XN_NOT_VISIBLE:
 
3780
                                if (ot->ot_for_update) {
 
3781
                                        /* Substitute this record for the one we
 
3782
                                         * are reading!!
 
3783
                                         */
 
3784
                                        if (result == TRUE) {
 
3785
                                                if (XT_REC_IS_DELETE(var_head.tr_rec_type_1))
 
3786
                                                        result = FALSE;
 
3787
                                                else {
 
3788
                                                        *new_rec_id = var_rec_id;
 
3789
                                                        result = XT_NEW;
 
3790
                                                }
 
3791
                                        }
 
3792
                                }
 
3793
                                break;
 
3794
                        case XT_XN_ABORTED:
 
3795
                                /* Ignore the record, it will be removed. */
 
3796
                                break;
 
3797
                        case XT_XN_MY_UPDATE:
 
3798
#ifdef TRACE_VARIATIONS
 
3799
                                xt_ttracef(thread, "row=%d rec=%d NOT VISI my update found\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3800
#endif
 
3801
                                goto not_found;
 
3802
                        case XT_XN_OTHER_UPDATE:
 
3803
                                /* Wait for this update to commit or abort: */
 
3804
                                if (!wait) {
 
3805
                                        wait = TRUE;
 
3806
                                        wait_xn_id = xn_id;
 
3807
                                }
 
3808
#ifdef TRACE_VARIATIONS
 
3809
                                if (len <= 450)
 
3810
                                        len += sprintf(t_buf+len, "-T%d", (int) wait_xn_id);
 
3811
#endif
 
3812
                                break;
 
3813
                        case XT_XN_REREAD:
 
3814
                                /* {RETRY-READ}
 
3815
                                 * TODO: This is not as "correct" as it could be.
 
3816
                                 * Such records should be considered to be aborted,
 
3817
                                 * and removed from the list.
 
3818
                                 */
 
3819
                                if (invalid_rec != var_rec_id) {
 
3820
                                        invalid_rec = var_rec_id;
 
3821
                                        goto retry_3;
 
3822
                                }
 
3823
                                if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 1))
 
3824
                                        goto failed;
 
3825
 
 
3826
                                /* Assume end of list. */
 
3827
#ifdef XT_CRASH_DEBUG
 
3828
                                /* Should not happen! */
 
3829
                                xt_crash_me();
 
3830
#endif
 
3831
                                goto not_found;
 
3832
                }
 
3833
                var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
 
3834
        }
 
3835
#ifdef TRACE_VARIATIONS
 
3836
        if (len <= 450)
 
3837
                sprintf(t_buf+len, " -> %d(%d)\n", (int) var_rec_id, (int) rec_head->tr_rec_type_1);
 
3838
        else
 
3839
                sprintf(t_buf+len, " ...\n");
 
3840
        //xt_ttracef(thread, "%s", t_buf);
 
3841
#endif
 
3842
 
 
3843
        if (ot->ot_for_update) {
 
3844
                xtBool                  ok;
 
3845
                XTLockWaitRec   lw;
 
3846
 
 
3847
                if (wait) {
 
3848
                        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3849
#ifdef TRACE_VARIATIONS
 
3850
                        xt_ttracef(thread, "T%d WAIT FOR T%d (will retry)\n", (int) thread->st_xact_data->xd_start_xn_id, (int) wait_xn_id);
 
3851
#endif
 
3852
                        if (!tab_wait_for_update(ot, row_id, wait_xn_id, thread))
 
3853
                                return XT_ERR;
 
3854
                        wait = FALSE;
 
3855
                        wait_xn_id = 0;
 
3856
                        /*
 
3857
                         * Retry in order to try to avoid missing
 
3858
                         * any records that we should see in FOR UPDATE
 
3859
                         * mode.
 
3860
                         *
 
3861
                         * We also want to take another look at the record
 
3862
                         * we just tried to read.
 
3863
                         *
 
3864
                         * If it has been updated, then a new record has
 
3865
                         * been created. This will be detected when we
 
3866
                         * try to read it again, and XT_NEW will be returned.
 
3867
                         */
 
3868
                        thread->st_statistics.st_retry_index_scan++;
 
3869
                        return XT_RETRY;
 
3870
                }
 
3871
 
 
3872
                /* {ROW-LIST-LOCK} */
 
3873
                lw.lw_thread = thread;
 
3874
                lw.lw_ot = ot;
 
3875
                lw.lw_row_id = row_id;
 
3876
                lw.lw_row_updated = FALSE;
 
3877
                ok = tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list);
 
3878
                XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3879
                if (!ok) {
 
3880
#ifdef DEBUG_LOCK_QUEUE
 
3881
                        ot->ot_table->tab_locks.rl_check(&lw);
 
3882
#endif
 
3883
                        return XT_ERR;
 
3884
                }
 
3885
                if (lw.lw_curr_lock != XT_NO_LOCK) {
 
3886
#ifdef TRACE_VARIATIONS
 
3887
                        xt_ttracef(thread, "T%d WAIT FOR LOCK(%s) T%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) lw.lw_curr_lock == XT_TEMP_LOCK ? "temp" : "perm", (int) xn_id);
 
3888
#endif
 
3889
                        if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
 
3890
#ifdef DEBUG_LOCK_QUEUE
 
3891
                                ot->ot_table->tab_locks.rl_check(&lw);
 
3892
#endif
 
3893
                                return XT_ERR;
 
3894
                        }
 
3895
#ifdef DEBUG_LOCK_QUEUE
 
3896
                        ot->ot_table->tab_locks.rl_check(&lw);
 
3897
#endif
 
3898
#ifdef TRACE_VARIATIONS
 
3899
                        len = sprintf(t_buf, "(retry): row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
 
3900
#endif
 
3901
                        /* GOTCHA!
 
3902
                         * Reset the result before we go down the list again, to make sure we
 
3903
                         * get the latest record!!
 
3904
                         */
 
3905
                        result = TRUE;
 
3906
                        thread->st_statistics.st_reread_record_list++;
 
3907
                        goto retry_2;
 
3908
                }
 
3909
#ifdef DEBUG_LOCK_QUEUE
 
3910
                ot->ot_table->tab_locks.rl_check(&lw);
 
3911
#endif
 
3912
        }
 
3913
        else {
 
3914
                XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3915
        }
 
3916
 
 
3917
#ifdef TRACE_VARIATIONS
 
3918
        if (result == XT_NEW)
 
3919
                xt_ttracef(thread, "row=%d rec=%d RETURN NEW %d\n", (int) row_id, (int) ot->ot_curr_rec_id, (int) *new_rec_id);
 
3920
        else if (result)
 
3921
                xt_ttracef(thread, "row=%d rec=%d VISIBLE\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3922
        else
 
3923
                xt_ttracef(thread, "row=%d rec=%d RETURN NOT VISIBLE (NEW)\n", (int) row_id, (int) ot->ot_curr_rec_id);
 
3924
#endif
 
3925
 
 
3926
        ot->ot_curr_row_id = row_id;
 
3927
        ot->ot_curr_updated = FALSE;
 
3928
        return result;
 
3929
 
 
3930
        not_found:
 
3931
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3932
        return FALSE;
 
3933
 
 
3934
        failed:
 
3935
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
3936
        return XT_ERR;
 
3937
}
 
3938
 
 
3939
/*
 
3940
 * Return TRUE if the record has been read, and is visible.
 
3941
 * Return FALSE if the record is not visible.
 
3942
 * Return XT_ERR if an error occurs.
 
3943
 */
 
3944
xtPublic int xt_tab_visible(XTOpenTablePtr ot)
 
3945
{
 
3946
        xtRowID                         row_id;
 
3947
        XTTabRecHeadDRec        rec_head;
 
3948
        xtRecordID                      new_rec_id;
 
3949
        xtBool                          read_again = FALSE;
 
3950
        int                                     r;
 
3951
 
 
3952
        if ((row_id = ot->ot_curr_row_id)) {
 
3953
                /* Fast track, do a quick check.
 
3954
                 * Row ID is only set if this record has been committed,
 
3955
                 * (and swept).
 
3956
                 * Check if it is the first on the list!
 
3957
                 */
 
3958
                xtRecordID var_rec_id;
 
3959
 
 
3960
                retry:
 
3961
                if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
 
3962
                        return XT_ERR;
 
3963
                if (ot->ot_curr_rec_id == var_rec_id) {
 
3964
                        /* Looks good.. */
 
3965
                        if (ot->ot_for_update) {
 
3966
                                XTThreadPtr             thread = ot->ot_thread;
 
3967
                                XTTableHPtr             tab = ot->ot_table;
 
3968
                                XTLockWaitRec   lw;
 
3969
 
 
3970
                                /* {ROW-LIST-LOCK} */
 
3971
                                lw.lw_thread = thread;
 
3972
                                lw.lw_ot = ot;
 
3973
                                lw.lw_row_id = row_id;
 
3974
                                lw.lw_row_updated = FALSE;
 
3975
                                if (!tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list)) {
 
3976
#ifdef DEBUG_LOCK_QUEUE
 
3977
                                        ot->ot_table->tab_locks.rl_check(&lw);
 
3978
#endif
 
3979
                                        return XT_ERR;
 
3980
                                }
 
3981
                                if (lw.lw_curr_lock != XT_NO_LOCK) {
 
3982
                                        if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
 
3983
#ifdef DEBUG_LOCK_QUEUE
 
3984
                                                ot->ot_table->tab_locks.rl_check(&lw);
 
3985
#endif
 
3986
                                                return XT_ERR;
 
3987
                                        }
 
3988
#ifdef DEBUG_LOCK_QUEUE
 
3989
                                        ot->ot_table->tab_locks.rl_check(&lw);
 
3990
#endif
 
3991
                                        goto retry;
 
3992
                                }
 
3993
#ifdef DEBUG_LOCK_QUEUE
 
3994
                                ot->ot_table->tab_locks.rl_check(&lw);
 
3995
#endif
 
3996
                        }
 
3997
                        return TRUE;
 
3998
                }
 
3999
        }
 
4000
 
 
4001
        reread:
 
4002
        if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
 
4003
                return XT_ERR;
 
4004
 
 
4005
        switch ((r = tab_visible(ot, &rec_head, &new_rec_id))) {
 
4006
                case XT_NEW:
 
4007
                        ot->ot_curr_rec_id = new_rec_id;
 
4008
                        break;
 
4009
                case XT_REREAD:
 
4010
                        /* Avoid infinite loop: */
 
4011
                        if (read_again) {
 
4012
                                /* Should not happen! */
 
4013
                                if (!tab_record_corrupt(ot, row_id, ot->ot_curr_rec_id, true, 2))
 
4014
                                        return XT_ERR;
 
4015
#ifdef XT_CRASH_DEBUG
 
4016
                                /* Generate a core dump! */
 
4017
                                xt_crash_me();
 
4018
#endif
 
4019
                                return FALSE;
 
4020
                        }
 
4021
                        read_again = TRUE;
 
4022
                        goto reread;
 
4023
                default:
 
4024
                        break;
 
4025
        }
 
4026
        return r;
 
4027
}
 
4028
 
 
4029
/*
 
4030
 * Read a record, and return one of the following:
 
4031
 * TRUE - the record has been read, and is visible.
 
4032
 * FALSE - the record is not visible.
 
4033
 * XT_ERR - an error occurs.
 
4034
 * XT_NEW - Means the expected record has been changed.
 
4035
 * When doing an index scan, the conditions must be checked again!
 
4036
 */
 
4037
xtPublic int xt_tab_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
 
4038
{
 
4039
        register XTTableHPtr    tab = ot->ot_table;
 
4040
        size_t                                  rec_size = tab->tab_dic.dic_rec_size;
 
4041
        xtRecordID                              new_rec_id;
 
4042
        int                                             result;
 
4043
        xtBool                                  read_again = FALSE;
 
4044
 
 
4045
        if (!(ot->ot_thread->st_xact_data)) {
 
4046
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
 
4047
                return XT_ERR;
 
4048
        }
 
4049
 
 
4050
        reread:
 
4051
        if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
 
4052
                return XT_ERR;
 
4053
 
 
4054
        switch (tab_visible(ot, (XTTabRecHeadDPtr) ot->ot_row_rbuffer, &new_rec_id)) {
 
4055
                case FALSE:
 
4056
                        return FALSE;
 
4057
                case XT_ERR:
 
4058
                        return XT_ERR;
 
4059
                case XT_NEW:
 
4060
                        if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
 
4061
                                return XT_ERR;
 
4062
                        ot->ot_curr_rec_id = new_rec_id;
 
4063
                        result = XT_NEW;
 
4064
                        break;
 
4065
                case XT_RETRY:
 
4066
                        return XT_RETRY;
 
4067
                case XT_REREAD:
 
4068
                        /* Avoid infinite loop: */
 
4069
                        if (read_again) {
 
4070
                                /* Should not happen! */
 
4071
                                if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4), ot->ot_curr_rec_id, true, 3))
 
4072
                                        return XT_ERR;
 
4073
#ifdef XT_CRASH_DEBUG
 
4074
                                /* Generate a core dump! */
 
4075
                                xt_crash_me();
 
4076
#endif
 
4077
                                return FALSE;
 
4078
                        }
 
4079
                        read_again = TRUE;
 
4080
                        goto reread;
 
4081
                default:
 
4082
                        result = OK;
 
4083
                        break;
 
4084
        }
 
4085
 
 
4086
        if (ot->ot_rec_fixed)
 
4087
                memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
 
4088
        else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
 
4089
                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
 
4090
                        return XT_ERR;
 
4091
        }
 
4092
        else {
 
4093
                u_int cols_req = ot->ot_cols_req;
 
4094
 
 
4095
                ASSERT_NS(cols_req);
 
4096
                if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
 
4097
                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
 
4098
                                return XT_ERR;
 
4099
                }
 
4100
                else {
 
4101
                        if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
 
4102
                                return XT_ERR;
 
4103
                }
 
4104
        }
 
4105
 
 
4106
        return result;
 
4107
}
 
4108
 
 
4109
/*
 
4110
 * Returns:
 
4111
 *
 
4112
 * TRUE/OK - record was read.
 
4113
 * FALSE/FAILED - An error occurred.
 
4114
 */
 
4115
xtPublic int xt_tab_dirty_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
 
4116
{
 
4117
        register XTTableHPtr    tab = ot->ot_table;
 
4118
        size_t                                  rec_size = tab->tab_dic.dic_rec_size;
 
4119
 
 
4120
        if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
 
4121
                return FAILED;
 
4122
 
 
4123
        if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
 
4124
                /* Should not happen! */
 
4125
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_DELETED);
 
4126
                return FAILED;
 
4127
        }
 
4128
 
 
4129
        ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
 
4130
        ot->ot_curr_updated =
 
4131
                (XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
 
4132
 
 
4133
        if (ot->ot_rec_fixed)
 
4134
                memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
 
4135
        else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
 
4136
                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
 
4137
                        return FAILED;
 
4138
        }
 
4139
        else {
 
4140
                u_int cols_req = ot->ot_cols_req;
 
4141
 
 
4142
                ASSERT_NS(cols_req);
 
4143
                if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
 
4144
                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
 
4145
                                return FAILED;
 
4146
                }
 
4147
                else {
 
4148
                        if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
 
4149
                                return FAILED;
 
4150
                }
 
4151
        }
 
4152
 
 
4153
        return OK;
 
4154
}
 
4155
 
 
4156
#ifdef XT_USE_ROW_REC_MMAP_FILES
 
4157
/* Loading into cache is not required,
 
4158
 * Instead we copy the memory map to load the
 
4159
 * data.
 
4160
 */
 
4161
#define TAB_ROW_LOAD_CACHE              FALSE
 
4162
#else
 
4163
#define TAB_ROW_LOAD_CACHE              TRUE
 
4164
#endif
 
4165
 
 
4166
/*
 
4167
 * Pull the entire row pointer file into memory.
 
4168
 */
 
4169
xtPublic void xt_tab_load_row_pointers(XTThreadPtr self, XTOpenTablePtr ot)
 
4170
{
 
4171
        XTTableHPtr     tab = ot->ot_table;
 
4172
        xtRecordID      eof_rec_id = tab->tab_row_eof_id;
 
4173
        xtInt8          usage;
 
4174
        xtWord1         *buffer = NULL;
 
4175
 
 
4176
        /* Check if there is enough cache: */
 
4177
        usage = xt_tc_get_usage();
 
4178
        if (xt_tc_get_high() > usage)
 
4179
                usage = xt_tc_get_high();
 
4180
        if (usage + ((xtInt8) eof_rec_id * (xtInt8) tab->tab_rows.tci_rec_size) < xt_tc_get_size()) {
 
4181
                xtRecordID                      rec_id;
 
4182
                size_t                          poffset, tfer;
 
4183
                off_t                           offset, end_offset;
 
4184
                XTTabCachePagePtr       page;
 
4185
                
 
4186
                end_offset = xt_row_id_to_row_offset(tab, eof_rec_id);
 
4187
                rec_id = 1;
 
4188
                while (rec_id < eof_rec_id) {
 
4189
                        if (!tab->tab_rows.xt_tc_get_page(ot->ot_row_file, rec_id, TAB_ROW_LOAD_CACHE, &page, &poffset, self))
 
4190
                                xt_throw(self);
 
4191
                        if (page)
 
4192
                                tab->tab_rows.xt_tc_release_page(ot->ot_row_file, page, self);
 
4193
                        else {
 
4194
                                xtWord1 *buff_ptr;
 
4195
 
 
4196
                                if (!buffer)
 
4197
                                        buffer = (xtWord1 *) xt_malloc(self, tab->tab_rows.tci_page_size);
 
4198
                                offset = xt_row_id_to_row_offset(tab, rec_id);
 
4199
                                tfer = tab->tab_rows.tci_page_size;
 
4200
                                if (offset + (off_t) tfer > end_offset)
 
4201
                                        tfer = (size_t) (end_offset - offset);
 
4202
                                XT_LOCK_MEMORY_PTR(buff_ptr, ot->ot_row_file, offset, tfer, &self->st_statistics.st_rec, self);
 
4203
                                if (buff_ptr) {
 
4204
                                        memcpy(buffer, buff_ptr, tfer);
 
4205
                                        XT_UNLOCK_MEMORY_PTR(ot->ot_row_file, buff_ptr, self);
 
4206
                                }
 
4207
                        }
 
4208
                        rec_id += tab->tab_rows.tci_rows_per_page;
 
4209
                }
 
4210
                if (buffer)
 
4211
                        xt_free(self, buffer);
 
4212
        }
 
4213
}
 
4214
 
 
4215
xtPublic void xt_tab_load_table(XTThreadPtr self, XTOpenTablePtr ot)
 
4216
{
 
4217
        xt_load_pages(self, ot);
 
4218
        xt_load_indices(self, ot);
 
4219
}
 
4220
 
 
4221
xtPublic xtBool xt_tab_load_record(register XTOpenTablePtr ot, xtRecordID rec_id, XTInfoBufferPtr rec_buf)
 
4222
{
 
4223
        register XTTableHPtr    tab = ot->ot_table;
 
4224
        size_t                                  rec_size = tab->tab_dic.dic_rec_size;
 
4225
 
 
4226
        if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
 
4227
                return FAILED;
 
4228
 
 
4229
        if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
 
4230
                /* Should not happen! */
 
4231
                XTThreadPtr self = ot->ot_thread;
 
4232
 
 
4233
                xt_log(XT_WARNING, "Recently updated record invalid\n");
 
4234
                return OK;
 
4235
        }
 
4236
 
 
4237
        ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
 
4238
        ot->ot_curr_updated =
 
4239
                (XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
 
4240
 
 
4241
        if (ot->ot_rec_fixed) {
 
4242
                size_t size = rec_size - XT_REC_FIX_HEADER_SIZE;
 
4243
                if (!xt_ib_alloc(NULL, rec_buf, size))
 
4244
                        return FAILED;
 
4245
                memcpy(rec_buf->ib_db.db_data, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, size);
 
4246
        }
 
4247
        else {
 
4248
                if (!xt_ib_alloc(NULL, rec_buf, tab->tab_dic.dic_mysql_buf_size))
 
4249
                        return FAILED;
 
4250
                if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
 
4251
                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_buf->ib_db.db_data, ot->ot_cols_req))
 
4252
                                return FAILED;
 
4253
                }
 
4254
                else {
 
4255
                        u_int cols_req = ot->ot_cols_req;
 
4256
 
 
4257
                        ASSERT_NS(cols_req);
 
4258
                        if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
 
4259
                                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_buf->ib_db.db_data, cols_req))
 
4260
                                        return FAILED;
 
4261
                        }
 
4262
                        else {
 
4263
                                if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, rec_buf->ib_db.db_data, cols_req))
 
4264
                                        return FAILED;
 
4265
                        }
 
4266
                }
 
4267
        }
 
4268
 
 
4269
        return OK;
 
4270
}
 
4271
 
 
4272
xtPublic xtBool xt_tab_free_row(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
 
4273
{
 
4274
        XTTabRowRefDRec free_row;
 
4275
        xtRowID                 prev_row;
 
4276
        xtOpSeqNo               op_seq;
 
4277
 
 
4278
        ASSERT_NS(row_id); // Cannot free the header!
 
4279
 
 
4280
        xt_lock_mutex_ns(&tab->tab_row_lock);
 
4281
        prev_row = tab->tab_row_free_id;
 
4282
        XT_SET_DISK_4(free_row.rr_ref_id_4, prev_row);
 
4283
        if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, &op_seq, TRUE, ot->ot_thread)) {
 
4284
                xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4285
                return FAILED;
 
4286
        }
 
4287
        tab->tab_row_free_id = row_id;
 
4288
        tab->tab_row_fnum++;
 
4289
        ASSERT_NS(tab->tab_row_fnum < tab->tab_row_eof_id);
 
4290
        xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4291
 
 
4292
        if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_ROW_FREED, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, ot->ot_thread))
 
4293
                return FAILED;
 
4294
 
 
4295
        return OK;
 
4296
}
 
4297
 
 
4298
static void tab_free_ext_record_on_fail(XTOpenTablePtr ot, xtRecordID rec_id, XTTabRecExtDPtr ext_rec, xtBool log_err)
 
4299
{
 
4300
        xtWord4         log_over_size = XT_GET_DISK_4(ext_rec->re_log_dat_siz_4);
 
4301
        xtLogID         log_id;
 
4302
        xtLogOffset     log_offset;
 
4303
 
 
4304
        XT_GET_LOG_REF(log_id, log_offset, ext_rec);
 
4305
 
 
4306
        if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
 
4307
                xt_tab_free_ext_slot(ot->ot_table, log_id, log_offset, log_over_size);
 
4308
        else {
 
4309
                if (!ot->ot_thread->st_dlog_buf.dlb_delete_log(log_id, log_offset, log_over_size, ot->ot_table->tab_id, rec_id, ot->ot_thread)) {
 
4310
                        if (log_err)
 
4311
                                xt_log_and_clear_exception_ns();
 
4312
                }
 
4313
        }
 
4314
}
 
4315
 
 
4316
static void tab_save_exception(XTExceptionPtr e)
 
4317
{
 
4318
        XTThreadPtr self = xt_get_self();
 
4319
 
 
4320
        *e = self->t_exception;
 
4321
}
 
4322
 
 
4323
static void tab_restore_exception(XTExceptionPtr e)
 
4324
{
 
4325
        XTThreadPtr self = xt_get_self();
 
4326
 
 
4327
        self->t_exception = *e;
 
4328
}
 
4329
 
 
4330
/*
 
4331
 * This function assumes that a record may be partially written.
 
4332
 * It removes all associated data and references to the record.
 
4333
 *
 
4334
 * This function return XT_ERR if an error occurs.
 
4335
 * TRUE if the record has been removed, and may be freed.
 
4336
 * FALSE if the record has already been freed. 
 
4337
 *
 
4338
 */
 
4339
xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 *rec_data, xtRecordID *prev_var_id, xtBool clean_delete, xtRowID row_id, xtXactID XT_UNUSED(xn_id))
 
4340
{
 
4341
        register XTTableHPtr    tab = ot->ot_table;
 
4342
        size_t                                  rec_size;
 
4343
        xtWord1                                 old_rec_type;
 
4344
        u_int                                   cols_req;
 
4345
        u_int                                   cols_in_buffer;
 
4346
 
 
4347
        *prev_var_id = 0;
 
4348
 
 
4349
        if (!rec_id)
 
4350
                return FALSE;
 
4351
 
 
4352
        /*
 
4353
         * NOTE: This function uses the read buffer. This should be OK because
 
4354
         * the function is only called by the sweeper. The read buffer
 
4355
         * is REQUIRED because of the call to xt_tab_load_ext_data()!!!
 
4356
         */
 
4357
        rec_size = tab->tab_dic.dic_rec_size;
 
4358
        if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
 
4359
                return XT_ERR;
 
4360
        old_rec_type = ot->ot_row_rbuffer[0];
 
4361
 
 
4362
        /* Check of the record has not already been freed: */
 
4363
        if (XT_REC_IS_FREE(old_rec_type))
 
4364
                return FALSE;
 
4365
 
 
4366
        /* This record must belong to the given row: */
 
4367
        if (XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_row_id_4) != row_id)
 
4368
                return FALSE;
 
4369
 
 
4370
        /* The transaction ID of the record must be BEFORE or equal to the given
 
4371
         * transaction ID.
 
4372
         *
 
4373
         * No, this does not always hold. Because we wait for updates now,
 
4374
         * a "younger" transaction can update before an older
 
4375
         * transaction.
 
4376
         * Commit order determined the actual order in which the transactions
 
4377
         * should be replicated. This is determined by the log number of
 
4378
         * the commit record!
 
4379
        if (db->db_xn_curr_id(xn_id, XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_xact_id_4)))
 
4380
                return FALSE;
 
4381
         */
 
4382
 
 
4383
        *prev_var_id = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_prev_rec_id_4);
 
4384
 
 
4385
        if (tab->tab_dic.dic_key_count) {
 
4386
                XTIndexPtr      *ind;
 
4387
 
 
4388
                switch (old_rec_type) {
 
4389
                        case XT_TAB_STATUS_DELETE:
 
4390
                        case XT_TAB_STATUS_DEL_CLEAN:
 
4391
                                rec_size = sizeof(XTTabRecHeadDRec);
 
4392
                                goto set_removed;
 
4393
                        case XT_TAB_STATUS_FIXED:
 
4394
                        case XT_TAB_STATUS_FIX_CLEAN:
 
4395
                                /* We know that for a fixed length record, 
 
4396
                                 * dic_ind_rec_len <= dic_rec_size! */
 
4397
                                rec_size = (size_t) tab->tab_dic.dic_ind_rec_len + XT_REC_FIX_HEADER_SIZE;
 
4398
                                rec_data = ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE;
 
4399
                                break;
 
4400
                        case XT_TAB_STATUS_VARIABLE:
 
4401
                        case XT_TAB_STATUS_VAR_CLEAN:
 
4402
                                cols_req = tab->tab_dic.dic_ind_cols_req;
 
4403
 
 
4404
                                cols_in_buffer = cols_req;
 
4405
                                rec_size = myxt_load_row_length(ot, rec_size - XT_REC_FIX_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, &cols_in_buffer);
 
4406
                                if (cols_in_buffer < cols_req)
 
4407
                                        rec_size = tab->tab_dic.dic_rec_size;
 
4408
                                else 
 
4409
                                        rec_size += XT_REC_FIX_HEADER_SIZE;
 
4410
                                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_data, cols_req)) {
 
4411
                                        xt_log_and_clear_exception_ns();
 
4412
                                        goto set_removed;
 
4413
                                }
 
4414
                                break;
 
4415
                        case XT_TAB_STATUS_EXT_DLOG:
 
4416
                        case XT_TAB_STATUS_EXT_CLEAN:
 
4417
                                cols_req = tab->tab_dic.dic_ind_cols_req;
 
4418
 
 
4419
                                ASSERT_NS(cols_req);
 
4420
                                cols_in_buffer = cols_req;
 
4421
                                rec_size = myxt_load_row_length(ot, rec_size - XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, &cols_in_buffer);
 
4422
                                if (cols_in_buffer < cols_req) {
 
4423
                                        rec_size = tab->tab_dic.dic_rec_size;
 
4424
                                        if (!xt_tab_load_ext_data(ot, rec_id, rec_data, cols_req)) {
 
4425
                                                /* This is actually quite possible after recovery, see [(3)] */
 
4426
                                                if (ot->ot_thread->t_exception.e_xt_err != XT_ERR_BAD_EXT_RECORD &&
 
4427
                                                        ot->ot_thread->t_exception.e_xt_err != XT_ERR_DATA_LOG_NOT_FOUND)
 
4428
                                                        xt_log_and_clear_exception_ns();
 
4429
                                                goto set_removed;
 
4430
                                        }
 
4431
                                }
 
4432
                                else {
 
4433
                                        /* All the records we require are in the buffer... */
 
4434
                                        rec_size += XT_REC_EXT_HEADER_SIZE;
 
4435
                                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_data, cols_req)) {
 
4436
                                                xt_log_and_clear_exception_ns();
 
4437
                                                goto set_removed;
 
4438
                                        }
 
4439
                                }
 
4440
                                break;
 
4441
                        default:
 
4442
                                break;
 
4443
                }
 
4444
 
 
4445
                /* Could this be the case?: This change may only be flushed after the
 
4446
                 * operation below has been flushed to the log.
 
4447
                 *
 
4448
                 * No, remove records are never "undone". The sweeper will delete
 
4449
                 * the record again if it does not land in the log.
 
4450
                 *
 
4451
                 * The fact that the index entries have already been removed is not
 
4452
                 * a problem.
 
4453
                 */
 
4454
                if (!tab->tab_dic.dic_disable_index) {
 
4455
                        ind = tab->tab_dic.dic_keys;
 
4456
                        for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
 
4457
                                if (!xt_idx_delete(ot, *ind, rec_id, rec_data))
 
4458
                                        xt_log_and_clear_exception_ns();
 
4459
                        }
 
4460
                }
 
4461
        }
 
4462
        else {
 
4463
                /* No indices: */
 
4464
                switch (old_rec_type) {
 
4465
                        case XT_TAB_STATUS_DELETE:
 
4466
                        case XT_TAB_STATUS_DEL_CLEAN:
 
4467
                                rec_size = XT_REC_FIX_HEADER_SIZE;
 
4468
                                break;
 
4469
                        case XT_TAB_STATUS_FIXED:
 
4470
                        case XT_TAB_STATUS_FIX_CLEAN:
 
4471
                        case XT_TAB_STATUS_VARIABLE:
 
4472
                        case XT_TAB_STATUS_VAR_CLEAN:
 
4473
                                rec_size = XT_REC_FIX_HEADER_SIZE;
 
4474
                                break;
 
4475
                        case XT_TAB_STATUS_EXT_DLOG:
 
4476
                        case XT_TAB_STATUS_EXT_CLEAN:
 
4477
                                rec_size = XT_REC_EXT_HEADER_SIZE;
 
4478
                                break;
 
4479
                }
 
4480
        }
 
4481
 
 
4482
        set_removed:
 
4483
        if (XT_REC_IS_EXT_DLOG(old_rec_type)) {
 
4484
                /* {LOCK-EXT-REC} Lock, and read again to make sure that the
 
4485
                 * compactor does not change this record, while
 
4486
                 * we are removing it! */
 
4487
                xt_lock_mutex_ns(&tab->tab_db->db_co_ext_lock);
 
4488
                if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer)) {
 
4489
                        xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
 
4490
                        return XT_ERR;
 
4491
                }
 
4492
                xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
 
4493
 
 
4494
        }
 
4495
 
 
4496
 
 
4497
        /* A record is "clean" deleted if the record was
 
4498
         * XT_TAB_STATUS_DELETE which was comitted.
 
4499
         * This makes sure that the record will still invalidate
 
4500
         * following records in a row.
 
4501
         *
 
4502
         * Example:
 
4503
         *
 
4504
         * 1. INSERT A ROW, then DELETE it, assume the sweeper is delayed.
 
4505
         *
 
4506
         * We now have the sequence row X --> del rec A --> valid rec B.
 
4507
         *
 
4508
         * 2. A SELECT can still find B. Assume it now goes to check
 
4509
         *    if the record is valid, it reads row X, and gets A.
 
4510
         *
 
4511
         * 3. Now the sweeper gets control and removes X, A and B.
 
4512
         *    It frees A with the clean bit.
 
4513
         *
 
4514
         * 4. Now the SELECT gets control and reads A. Normally a freed record
 
4515
         *    would be ignored, and it would go onto B, which would then
 
4516
         *    be considered valid (note, even after the free, the next
 
4517
         *    pointer is not affected).
 
4518
         *
 
4519
         * However, because the clean bit has been set, it will stop at A
 
4520
         * and consider B invalid (which is the desired result).
 
4521
         *
 
4522
         * NOTE: We assume it is not possible for A to be allocated and refer
 
4523
         * to B, because B is freed before A. This means that B may refer to
 
4524
         * A after the next allocation.
 
4525
         */
 
4526
 
 
4527
        xtOpSeqNo                       op_seq;
 
4528
        XTTabRecFreeDPtr        free_rec = (XTTabRecFreeDPtr) ot->ot_row_rbuffer;
 
4529
        xtRecordID                      prev_rec_id;
 
4530
        xtWord1                         new_rec_type = XT_TAB_STATUS_FREED | (clean_delete ? XT_TAB_STATUS_CLEANED_BIT : 0);
 
4531
        u_int                           status = XT_LOG_ENT_REC_REMOVED_BI;
 
4532
 
 
4533
        xt_lock_mutex_ns(&tab->tab_rec_lock);
 
4534
        free_rec->rf_rec_type_1 = new_rec_type;
 
4535
#ifdef XT_CLUSTER_FREE_RECORDS
 
4536
        XTTabCachePagePtr       page;
 
4537
        size_t                          offset;
 
4538
 
 
4539
        if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
 
4540
                xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4541
                return FAILED;
 
4542
        }
 
4543
 
 
4544
        if (page->tcp_free_rec == 0xFFFF) {
 
4545
                /* There is no free record on this page. */
 
4546
                prev_rec_id = tab->tab_rec_free_id;
 
4547
                XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
 
4548
                memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
 
4549
                tab->tab_rec_free_id = rec_id;
 
4550
        }
 
4551
        else {
 
4552
                XTTabRecFreeDPtr        prev_free_rec = (XTTabRecFreeDPtr) (page->tcp_data + page->tcp_free_rec);
 
4553
 
 
4554
                status = XT_LOG_ENT_REC_REMOVED_BI_L;
 
4555
                XT_COPY_DISK_4(free_rec->rf_next_rec_id_4, prev_free_rec->rf_next_rec_id_4);
 
4556
                memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
 
4557
 
 
4558
                /* The previous now references the next: */
 
4559
                XT_SET_DISK_4(prev_free_rec->rf_next_rec_id_4, rec_id);
 
4560
 
 
4561
                /* This is the record ID of the previous record: */
 
4562
                ASSERT_NS((page->tcp_free_rec % tab->tab_recs.tci_rec_size) == 0);
 
4563
                prev_rec_id = (page->tcp_page_idx * tab->tab_recs.tci_rows_per_page) + (page->tcp_free_rec / tab->tab_recs.tci_rec_size) + 1;
 
4564
                ASSERT_NS(prev_rec_id != rec_id);
 
4565
        }
 
4566
 
 
4567
        /* Link after this page in future! */
 
4568
        ASSERT_NS((offset % tab->tab_recs.tci_rec_size) == 0);
 
4569
        page->tcp_free_rec = offset;
 
4570
        tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
 
4571
#else
 
4572
        prev_rec_id = tab->tab_rec_free_id;
 
4573
        XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
 
4574
        if (!xt_tab_put_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), ot->ot_row_rbuffer, &op_seq)) {
 
4575
                xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4576
                return XT_ERR;
 
4577
        }
 
4578
        tab->tab_rec_free_id = rec_id;
 
4579
        ASSERT_NS(tab->tab_rec_free_id < tab->tab_rec_eof_id);
 
4580
#endif
 
4581
        tab->tab_rec_fnum++;
 
4582
        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4583
 
 
4584
        free_rec->rf_rec_type_1 = old_rec_type;
 
4585
        if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, new_rec_type, prev_rec_id, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread))
 
4586
                return XT_ERR;
 
4587
        return OK;
 
4588
}
 
4589
 
 
4590
static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab)
 
4591
{
 
4592
        xtRowID                 row_id;
 
4593
        xtOpSeqNo               op_seq;
 
4594
        xtRowID                 next_row_id = 0;
 
4595
        u_int                   status;
 
4596
 
 
4597
        xt_lock_mutex_ns(&tab->tab_row_lock);
 
4598
        if ((row_id = tab->tab_row_free_id)) {
 
4599
                status = XT_LOG_ENT_ROW_NEW_FL;
 
4600
 
 
4601
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) {
 
4602
                        xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4603
                        return 0;
 
4604
                }
 
4605
                tab->tab_row_free_id = next_row_id;
 
4606
                ASSERT_NS(tab->tab_row_fnum > 0);
 
4607
                tab->tab_row_fnum--;
 
4608
        }
 
4609
        else {
 
4610
                status = XT_LOG_ENT_ROW_NEW;
 
4611
                row_id = tab->tab_row_eof_id;
 
4612
                if (row_id == 0xFFFFFFFF) {
 
4613
                        xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4614
                        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_MAX_ROW_COUNT);
 
4615
                        return 0;
 
4616
                }
 
4617
                if (((row_id - 1) % tab->tab_rows.tci_rows_per_page) == 0) {
 
4618
                        /* By fetching the page now, we avoid reading it later... */
 
4619
                        XTTabCachePagePtr       page;
 
4620
                        XTTabCacheSegPtr        seg;
 
4621
                        size_t                          poffset;
 
4622
 
 
4623
                        if (!tab->tab_rows.tc_fetch(ot->ot_row_file, row_id, &seg, &page, &poffset, FALSE, ot->ot_thread)) {
 
4624
                                xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4625
                                return 0;
 
4626
                        }
 
4627
                        TAB_CAC_UNLOCK(&seg->tcs_lock, ot->ot_thread->t_id);
 
4628
                }
 
4629
                tab->tab_row_eof_id++;
 
4630
        }
 
4631
        op_seq = tab->tab_seq.ts_get_op_seq();
 
4632
        xt_unlock_mutex_ns(&tab->tab_row_lock);
 
4633
 
 
4634
        if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_row_id, row_id, 0, NULL, ot->ot_thread))
 
4635
                return 0;
 
4636
 
 
4637
        XT_DISABLED_TRACE(("new row tx=%d row=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) row_id));
 
4638
        ASSERT_NS(row_id);
 
4639
        return row_id;
 
4640
}
 
4641
 
 
4642
xtPublic xtBool xt_tab_get_row(register XTOpenTablePtr ot, xtRowID row_id, xtRecordID *var_rec_id)
 
4643
{
 
4644
        register XTTableHPtr    tab = ot->ot_table;
 
4645
 
 
4646
        (void) ASSERT_NS(sizeof(XTTabRowRefDRec) == 4);
 
4647
 
 
4648
        if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, var_rec_id, ot->ot_thread))
 
4649
                return FAILED;
 
4650
        return OK;
 
4651
}
 
4652
 
 
4653
xtPublic xtBool xt_tab_set_row(XTOpenTablePtr ot, u_int status, xtRowID row_id, xtRecordID var_rec_id)
 
4654
{
 
4655
        register XTTableHPtr    tab = ot->ot_table;
 
4656
        XTTabRowRefDRec                 row_buf;
 
4657
        xtOpSeqNo                               op_seq;
 
4658
 
 
4659
        ASSERT_NS(var_rec_id < tab->tab_rec_eof_id);
 
4660
        XT_SET_DISK_4(row_buf.rr_ref_id_4, var_rec_id);
 
4661
 
 
4662
        if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, &op_seq, TRUE, ot->ot_thread))
 
4663
                return FAILED;
 
4664
 
 
4665
        return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, ot->ot_thread);
 
4666
}
 
4667
 
 
4668
static void tab_free_row_on_fail(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
 
4669
{
 
4670
        XTExceptionRec e;
 
4671
 
 
4672
        tab_save_exception(&e);
 
4673
        xt_tab_free_row(ot, tab, row_id);
 
4674
        tab_restore_exception(&e);
 
4675
}
 
4676
 
 
4677
#ifdef CHECK_CLUSTER_EFFICIENCY
 
4678
u_int           next_on_page = 0;
 
4679
u_int           next_off_page = 0;
 
4680
#endif
 
4681
 
 
4682
static xtBool tab_write_ext_record(XTOpenTablePtr XT_UNUSED(ot), XTTableHPtr tab, XTTabRecInfoPtr rec_info, xtRecordID rec_id, xtLogID log_id, xtLogOffset log_offset, XTThreadPtr thread)
 
4683
{
 
4684
        xtWord1 tmp_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
 
4685
        xtBool  ok;
 
4686
 
 
4687
        memcpy(tmp_buffer, rec_info->ri_log_buf, sizeof(tmp_buffer));
 
4688
        rec_info->ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
 
4689
        XT_SET_DISK_4(rec_info->ri_log_buf->er_data_size_4, rec_info->ri_log_data_size);
 
4690
        XT_SET_DISK_4(rec_info->ri_log_buf->er_tab_id_4, tab->tab_id);
 
4691
        XT_SET_DISK_4(rec_info->ri_log_buf->er_rec_id_4, rec_id);
 
4692
        if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
 
4693
                ok = xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf);
 
4694
        else
 
4695
                ok = thread->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf, thread);
 
4696
        memcpy(rec_info->ri_log_buf, tmp_buffer, sizeof(tmp_buffer));
 
4697
        return ok;
 
4698
}
 
4699
 
 
4700
static xtBool tab_add_record(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, u_int status)
 
4701
{
 
4702
        register XTTableHPtr    tab = ot->ot_table;
 
4703
        XTThreadPtr                             thread = ot->ot_thread;
 
4704
        xtRecordID                              rec_id;
 
4705
        xtLogID                                 log_id;
 
4706
        xtLogOffset                             log_offset;
 
4707
        xtOpSeqNo                               op_seq;
 
4708
        xtRecordID                              next_rec_id = 0;
 
4709
 
 
4710
        if (rec_info->ri_ext_rec) {
 
4711
                /* Determine where the overflow will go... */
 
4712
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
4713
                        if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
 
4714
                                return FAILED;
 
4715
                }
 
4716
                else {
 
4717
                        if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), ot->ot_thread))
 
4718
                                return FAILED;
 
4719
                }
 
4720
                XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
 
4721
        }
 
4722
 
 
4723
        /* Write the record to disk: */
 
4724
        xt_lock_mutex_ns(&tab->tab_rec_lock);
 
4725
        if ((rec_id = tab->tab_rec_free_id)) {
 
4726
                ASSERT_NS(rec_id < tab->tab_rec_eof_id);
 
4727
#ifdef XT_CLUSTER_FREE_RECORDS
 
4728
                XTTabCachePagePtr       page;
 
4729
                size_t                          offset;
 
4730
                XTTabRecFreeDPtr        free_block;
 
4731
 
 
4732
                if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
 
4733
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4734
                        return FAILED;
 
4735
                }
 
4736
 
 
4737
                /* Read the data from the old record: */
 
4738
                free_block = (XTTabRecFreeDPtr) (page->tcp_data + offset);
 
4739
                next_rec_id = XT_GET_DISK_4(free_block->rf_next_rec_id_4);
 
4740
 
 
4741
#ifdef CHECK_CLUSTER_EFFICIENCY
 
4742
                xtRecordID      dbg_rec_id;
 
4743
 
 
4744
                dbg_rec_id = next_rec_id-1;
 
4745
                if (page->tcp_page_idx == dbg_rec_id / tab->tab_recs.tci_rows_per_page)
 
4746
                        next_on_page++;
 
4747
                else
 
4748
                        next_off_page++;
 
4749
                if ((next_on_page % 1000) == 0)
 
4750
                        printf("Next on: %d off: %d\n", next_on_page, next_off_page);
 
4751
#endif
 
4752
 
 
4753
                if (page->tcp_free_rec == offset) {
 
4754
                        /* Adjust the free record: */
 
4755
                        xtRecordID      tmp_rec_id;
 
4756
 
 
4757
                        /* Check if the next record is on the same page: */
 
4758
                        tmp_rec_id = next_rec_id-1;
 
4759
                        if (page->tcp_page_idx == tmp_rec_id / tab->tab_recs.tci_rows_per_page)
 
4760
                                /* This is the next free record on this page: */
 
4761
                                page->tcp_free_rec = (xtWord2) ((tmp_rec_id % tab->tab_recs.tci_rows_per_page) * tab->tab_recs.tci_rec_size);
 
4762
                        else
 
4763
                                /* Not on the same page, so there are no more free records on this page: */
 
4764
                                page->tcp_free_rec = 0xFFFF;
 
4765
                }
 
4766
 
 
4767
                /* Write the new record: */
 
4768
                memcpy(free_block, rec_info->ri_fix_rec_buf, rec_info->ri_rec_buf_size);
 
4769
                tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
 
4770
#else
 
4771
                XTTabRecFreeDRec free_block;
 
4772
 
 
4773
                if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), (xtWord1 *) &free_block)) {
 
4774
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4775
                        return FAILED;
 
4776
                }
 
4777
                next_rec_id = XT_GET_DISK_4(free_block.rf_next_rec_id_4);
 
4778
                if (!xt_tab_put_rec_data(ot, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq)) {
 
4779
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4780
                        return FAILED;
 
4781
                }
 
4782
#endif
 
4783
                tab->tab_rec_free_id = next_rec_id;                     
 
4784
                tab->tab_rec_fnum--;
 
4785
                
 
4786
                /* XT_LOG_ENT_UPDATE --> XT_LOG_ENT_UPDATE_FL */
 
4787
                /* XT_LOG_ENT_INSERT --> XT_LOG_ENT_INSERT_FL */
 
4788
                /* XT_LOG_ENT_DELETE --> XT_LOG_ENT_DELETE_FL */
 
4789
                status += 2;
 
4790
        }
 
4791
        else {
 
4792
                xtBool read;
 
4793
 
 
4794
                rec_id = tab->tab_rec_eof_id;
 
4795
                tab->tab_rec_eof_id++;
 
4796
 
 
4797
                /* If we are writing to a new page (at the EOF)
 
4798
                 * then we do not need to read the page from the
 
4799
                 * file because it is new.
 
4800
                 *
 
4801
                 * Note that this only works because we are holding
 
4802
                 * a lock on the record file.
 
4803
                 */
 
4804
                read = ((rec_id - 1) % tab->tab_recs.tci_rows_per_page) != 0;
 
4805
 
 
4806
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq, read, thread)) {
 
4807
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4808
                        return FAILED;
 
4809
                }
 
4810
        }
 
4811
        xt_unlock_mutex_ns(&tab->tab_rec_lock);
 
4812
 
 
4813
        if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_rec_id, rec_id,  rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, thread))
 
4814
                return FAILED;
 
4815
 
 
4816
        if (rec_info->ri_ext_rec) {
 
4817
                /* Write the log buffer overflow: */            
 
4818
                if (!tab_write_ext_record(ot, tab, rec_info, rec_id, log_id, log_offset, thread))
 
4819
                        return FAILED;
 
4820
        }
 
4821
 
 
4822
        XT_DISABLED_TRACE(("new rec tx=%d val=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) rec_id));
 
4823
        rec_info->ri_rec_id = rec_id;
 
4824
        return OK;
 
4825
}
 
4826
 
 
4827
static void tab_delete_record_on_fail(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, XTTabRecHeadDPtr XT_UNUSED(row_ptr), xtWord1 *rec_data, u_int key_count)
 
4828
{
 
4829
        XTExceptionRec  e;
 
4830
        xtBool                  log_err = TRUE;
 
4831
        XTTabRecInfoRec rec_info;
 
4832
 
 
4833
        tab_save_exception(&e);
 
4834
        
 
4835
        if (e.e_xt_err == XT_ERR_DUPLICATE_KEY || 
 
4836
                e.e_xt_err == XT_ERR_DUPLICATE_FKEY) {
 
4837
                /* If the error does not cause rollback, then we will ignore the
 
4838
                 * error if an error occurs in the UNDO!
 
4839
                 */
 
4840
                log_err = FALSE;
 
4841
                tab_restore_exception(&e);
 
4842
        }
 
4843
        if (key_count) {
 
4844
                XTIndexPtr      *ind;
 
4845
 
 
4846
                ind = ot->ot_table->tab_dic.dic_keys;
 
4847
                for (u_int i=0; i<key_count; i++, ind++) {
 
4848
                        if (!xt_idx_delete(ot, *ind, rec_id, rec_data)) {
 
4849
                                if (log_err)
 
4850
                                        xt_log_and_clear_exception_ns();
 
4851
                        }
 
4852
                }
 
4853
        }
 
4854
 
 
4855
        /* This is not required because the extended record will be free
 
4856
         * later when the record is freed!
 
4857
        if (row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_DLOG || row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_CLEAN)
 
4858
                tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) row_ptr, log_err);
 
4859
         */
 
4860
 
 
4861
        rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
 
4862
        rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
 
4863
        rec_info.ri_ext_rec = NULL;
 
4864
        rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
 
4865
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
 
4866
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
4867
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, rec_id);
 
4868
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, ot->ot_thread->st_xact_data->xd_start_xn_id);
 
4869
 
 
4870
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
 
4871
                goto failed;
 
4872
 
 
4873
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
 
4874
                goto failed;
 
4875
 
 
4876
        if (log_err)
 
4877
                tab_restore_exception(&e);
 
4878
        return;
 
4879
 
 
4880
        failed:
 
4881
        if (log_err)
 
4882
                xt_log_and_clear_exception_ns();
 
4883
        else
 
4884
                tab_restore_exception(&e);
 
4885
}
 
4886
 
 
4887
/*
 
4888
 * Wait until all the variations between the start of the chain, and
 
4889
 * the given record have been rolled-back.
 
4890
 * If any is committed, register a locked error, and return FAILED.
 
4891
 */
 
4892
static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordID commit_rec_id)
 
4893
{
 
4894
        register XTTableHPtr    tab = ot->ot_table;
 
4895
        xtRecordID                              var_rec_id;
 
4896
        XTTabRecHeadDRec                var_head;
 
4897
        xtXactID                                xn_id;
 
4898
        xtRecordID                              invalid_rec = 0;
 
4899
        XTXactWaitRec                   xw;
 
4900
 
 
4901
        retry:
 
4902
        if (!xt_tab_get_row(ot, row_id, &var_rec_id))
 
4903
                return FAILED;
 
4904
 
 
4905
        while (var_rec_id != commit_rec_id) {
 
4906
                if (!var_rec_id)
 
4907
                        goto locked;
 
4908
                if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
 
4909
                        return FAILED;
 
4910
                if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1))
 
4911
                        goto locked;
 
4912
                if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) {
 
4913
                        /* Should not happen: */
 
4914
                        if (!tab_record_corrupt(ot, row_id, var_rec_id, false, 4))
 
4915
                                return FAILED;
 
4916
                        goto record_invalid;
 
4917
                }
 
4918
                xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
 
4919
                switch (xt_xn_status(ot, xn_id, var_rec_id)) {
 
4920
                        case XT_XN_VISIBLE:
 
4921
                        case XT_XN_NOT_VISIBLE:
 
4922
                                goto locked;
 
4923
                        case XT_XN_ABORTED:
 
4924
                                /* Ingore the record, it will be removed. */
 
4925
                                break;
 
4926
                        case XT_XN_MY_UPDATE:
 
4927
                                /* Should not happen: */
 
4928
                                goto locked;
 
4929
                        case XT_XN_OTHER_UPDATE:
 
4930
                                /* Wait for the transaction to commit or rollback: */
 
4931
                                XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4932
                                xw.xw_xn_id = xn_id;
 
4933
                                if (!xt_xn_wait_for_xact(ot->ot_thread, &xw, NULL)) {
 
4934
                                        XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4935
                                        return FAILED;
 
4936
                                }
 
4937
                                XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
4938
                                goto retry;
 
4939
                        case XT_XN_REREAD:
 
4940
                                if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 5))
 
4941
                                        return FAILED;
 
4942
                                goto record_invalid;
 
4943
                }
 
4944
                var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
 
4945
        }
 
4946
        return OK;
 
4947
 
 
4948
        locked:
 
4949
        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_CHANGED);
 
4950
        return FAILED;
 
4951
        
 
4952
        record_invalid:
 
4953
        /* {RETRY-READ} */
 
4954
        /* Prevent an infinite loop due to a bad record: */
 
4955
        if (invalid_rec != var_rec_id) {
 
4956
                invalid_rec = var_rec_id;
 
4957
                goto retry;
 
4958
        }
 
4959
        /* The record is invalid, it will be "overwritten"... */
 
4960
#ifdef XT_CRASH_DEBUG
 
4961
        /* Should not happen! */
 
4962
        xt_crash_me();
 
4963
#endif
 
4964
        return OK;
 
4965
}
 
4966
 
 
4967
/* Check if a record may be visible:
 
4968
 * Return TRUE of the record may be visible now.
 
4969
 * Return XT_MAYBE if the record may be visible in the future (set out_xn_id).
 
4970
 * Return FALSE of the record is not valid (freed or is a delete record).
 
4971
 * Return XT_ERR if an error occurred.
 
4972
 */
 
4973
xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXactID *out_xn_id, xtRowID *out_rowid, xtBool *out_updated)
 
4974
{
 
4975
        XTTabRecHeadDRec                rec_head;
 
4976
        xtXactID                                rec_xn_id = 0;
 
4977
        xtBool                                  wait = FALSE;
 
4978
        xtXactID                                wait_xn_id = 0;
 
4979
        xtRowID                                 row_id = 0;  // Initialized unnecessarily to satisfy (Drizzle) compile [-Wuninitialized]
 
4980
        xtRecordID                              var_rec_id;
 
4981
        xtXactID                                xn_id;
 
4982
        register XTTableHPtr    tab = NULL; // Initialized unnecessarily to satisfy (Drizzle) compile [-Wuninitialized]
 
4983
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
4984
        char                                    t_buf[500];
 
4985
        int                                             len;
 
4986
        char                                    *t_type = "C";
 
4987
#endif
 
4988
        xtRecordID                              invalid_rec = 0;
 
4989
 
 
4990
        reread:
 
4991
        if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
 
4992
                return XT_ERR;
 
4993
 
 
4994
        if (XT_REC_NOT_VALID(rec_head.tr_rec_type_1))
 
4995
                return FALSE;
 
4996
 
 
4997
        if (!XT_REC_IS_CLEAN(rec_head.tr_rec_type_1)) {
 
4998
                rec_xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
 
4999
                switch (xt_xn_status(ot, rec_xn_id, rec_id)) {
 
5000
                        case XT_XN_VISIBLE:
 
5001
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5002
                                t_type="V";
 
5003
#endif
 
5004
                                break;
 
5005
                        case XT_XN_NOT_VISIBLE:
 
5006
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5007
                                t_type="NV";
 
5008
#endif
 
5009
                                break;
 
5010
                        case XT_XN_ABORTED:
 
5011
                                return FALSE;
 
5012
                        case XT_XN_MY_UPDATE:
 
5013
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5014
                                t_type="My-Upd";
 
5015
#endif
 
5016
                                break;
 
5017
                        case XT_XN_OTHER_UPDATE:
 
5018
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5019
                                t_type="Wait";
 
5020
#endif
 
5021
                                wait = TRUE;
 
5022
                                wait_xn_id = rec_xn_id;
 
5023
                                break;
 
5024
                        case XT_XN_REREAD:
 
5025
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5026
                                t_type="Re-read";
 
5027
#endif
 
5028
                                /* {RETRY-READ} */
 
5029
                                /* Avoid infinite loop: */
 
5030
                                if (invalid_rec == rec_id) {
 
5031
                                        /* Should not happen! */
 
5032
                                        if (!tab_record_corrupt(ot, XT_GET_DISK_4(rec_head.tr_row_id_4), rec_id, true, 6))
 
5033
                                                goto failed;
 
5034
#ifdef XT_CRASH_DEBUG
 
5035
                                        /* Generate a core dump! */
 
5036
                                        xt_crash_me();
 
5037
#endif
 
5038
                                        return FALSE;
 
5039
                                }
 
5040
                                invalid_rec = rec_id;
 
5041
                                goto reread;
 
5042
                }
 
5043
        }
 
5044
 
 
5045
        /* Follow the variation chain until we come to this record.
 
5046
         * If it is not the first visible variation then
 
5047
         * it is not visible at all. If it in not found on the
 
5048
         * variation chain, it is also not visible.
 
5049
         */
 
5050
        row_id = XT_GET_DISK_4(rec_head.tr_row_id_4);
 
5051
 
 
5052
        tab = ot->ot_table;
 
5053
        XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5054
 
 
5055
        invalid_rec = 0;
 
5056
        retry:
 
5057
        if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
 
5058
                goto failed;
 
5059
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5060
        len = sprintf(t_buf, "dup row=%d", (int) row_id);
 
5061
#endif
 
5062
        while (var_rec_id != rec_id) {
 
5063
                if (!var_rec_id)
 
5064
                        goto not_found;
 
5065
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5066
                if (len <= 450)
 
5067
                        len += sprintf(t_buf+len, " -> %d", (int) var_rec_id);
 
5068
#endif
 
5069
                if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
 
5070
                        goto failed;
 
5071
                /* All clean records are visible, by all transactions: */
 
5072
                if (XT_REC_IS_CLEAN(rec_head.tr_rec_type_1))
 
5073
                        goto not_found;
 
5074
 
 
5075
                if (XT_REC_IS_FREE(rec_head.tr_rec_type_1)) {
 
5076
                        /* Should not happen: */
 
5077
                        if (invalid_rec != var_rec_id) {
 
5078
                                invalid_rec = var_rec_id;
 
5079
                                goto retry;
 
5080
                        }
 
5081
                        /* Assume end of list. */
 
5082
#ifdef XT_CRASH_DEBUG
 
5083
                        /* Should not happen! */
 
5084
                        xt_crash_me();
 
5085
#endif
 
5086
                        goto not_found;
 
5087
                }
 
5088
 
 
5089
                xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
 
5090
                switch (xt_xn_status(ot, xn_id, var_rec_id)) {
 
5091
                        case XT_XN_VISIBLE:
 
5092
                        case XT_XN_NOT_VISIBLE:
 
5093
                                goto not_found;
 
5094
                        case XT_XN_ABORTED:
 
5095
                                /* Ingore the record, it will be removed. */
 
5096
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5097
                                if (len <= 450)
 
5098
                                        len += sprintf(t_buf+len, "(T%d-A)", (int) xn_id);
 
5099
#endif
 
5100
                                break;
 
5101
                        case XT_XN_MY_UPDATE:
 
5102
                                goto not_found;
 
5103
                        case XT_XN_OTHER_UPDATE:
 
5104
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5105
                                if (len <= 450)
 
5106
                                        len += sprintf(t_buf+len, "(T%d-wait)", (int) xn_id);
 
5107
#endif
 
5108
                                /* Wait for this update to commit or abort: */
 
5109
                                if (!wait) {
 
5110
                                        wait = TRUE;
 
5111
                                        wait_xn_id = xn_id;
 
5112
                                }
 
5113
                                break;
 
5114
                        case XT_XN_REREAD:
 
5115
                                /* {RETRY-READ} */
 
5116
                                if (invalid_rec != var_rec_id) {
 
5117
                                        invalid_rec = var_rec_id;
 
5118
                                        goto retry;
 
5119
                                }
 
5120
                                /* Assume end of list. */
 
5121
                                if (!tab_record_corrupt(ot, row_id, invalid_rec, true, 7))
 
5122
                                        goto failed;
 
5123
#ifdef XT_CRASH_DEBUG
 
5124
                                /* Should not happen! */
 
5125
                                xt_crash_me();
 
5126
#endif
 
5127
                                goto not_found;
 
5128
                }
 
5129
                var_rec_id = XT_GET_DISK_4(rec_head.tr_prev_rec_id_4);
 
5130
        }
 
5131
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5132
        if (len <= 450)
 
5133
                sprintf(t_buf+len, " -> %d(T%d-%s)\n", (int) var_rec_id, (int) rec_xn_id, t_type);
 
5134
        else
 
5135
                sprintf(t_buf+len, " ...(T%d-%s)\n", (int) rec_xn_id, t_type);
 
5136
#endif
 
5137
 
 
5138
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5139
        if (wait) {
 
5140
                *out_xn_id = wait_xn_id;
 
5141
                return XT_MAYBE;
 
5142
        }
 
5143
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
 
5144
        xt_ttracef(thread, "%s", t_buf);
 
5145
#endif
 
5146
        if (out_rowid) {
 
5147
                *out_rowid = row_id;
 
5148
                *out_updated = (rec_xn_id == ot->ot_thread->st_xact_data->xd_start_xn_id);
 
5149
        }
 
5150
        return TRUE;
 
5151
 
 
5152
        not_found:
 
5153
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5154
        return FALSE;
 
5155
 
 
5156
        failed:
 
5157
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
 
5158
        return XT_ERR;
 
5159
}
 
5160
 
 
5161
xtPublic xtBool xt_tab_new_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
 
5162
{
 
5163
        register XTTableHPtr    tab = ot->ot_table;
 
5164
        register XTThreadPtr    self = ot->ot_thread;
 
5165
        XTTabRecInfoRec                 rec_info;
 
5166
        xtRowID                                 row_id;
 
5167
        u_int                                   idx_cnt = 0;
 
5168
        XTIndexPtr                              *ind;
 
5169
 
 
5170
        /* A non-temporary table has been updated: */
 
5171
        if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
 
5172
                self->st_non_temp_updated = TRUE;
 
5173
 
 
5174
        if (!myxt_store_row(ot, &rec_info, (char *) rec_buf))
 
5175
                goto failed_0;
 
5176
 
 
5177
        /* Get a new row ID: */
 
5178
        if (!(row_id = tab_new_row(ot, tab)))
 
5179
                goto failed_0;
 
5180
 
 
5181
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
 
5182
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
5183
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, 0);
 
5184
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
 
5185
 
 
5186
        /* Note, it is important that this record is written BEFORE the row
 
5187
         * due to the problem distributed here [(5)]
 
5188
         */
 
5189
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_INSERT))
 
5190
                goto failed_1;
 
5191
 
 
5192
#ifdef TRACE_VARIATIONS
 
5193
        xt_ttracef(self, "insert: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
 
5194
#endif
 
5195
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
 
5196
                goto failed_1;
 
5197
        XT_DISABLED_TRACE(("set new tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
 
5198
 
 
5199
        /* Add the index references: */
 
5200
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5201
                if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, rec_buf, NULL, FALSE)) {
 
5202
                        ot->ot_err_index_no = (*ind)->mi_index_no;
 
5203
                        goto failed_2;
 
5204
                }
 
5205
        }
 
5206
 
 
5207
        /* Do the foreign key stuff: */
 
5208
        if (ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
 
5209
                if (!ot->ot_table->tab_dic.dic_table->insertRow(ot, rec_buf))
 
5210
                        goto failed_2;
 
5211
        }
 
5212
 
 
5213
        self->st_statistics.st_row_insert++;
 
5214
        return OK;      
 
5215
 
 
5216
        failed_2:
 
5217
        /* Once the row has been inserted, it is to late to remove it!
 
5218
         * Now all we can do is delete it!
 
5219
         */
 
5220
        tab_delete_record_on_fail(ot, row_id, rec_info.ri_rec_id, (XTTabRecHeadDPtr) rec_info.ri_fix_rec_buf, rec_buf, idx_cnt);
 
5221
        goto failed_0;
 
5222
 
 
5223
        failed_1:
 
5224
        tab_free_row_on_fail(ot, tab, row_id);
 
5225
 
 
5226
        failed_0:
 
5227
        return FAILED;
 
5228
}
 
5229
 
 
5230
/* We cannot remove a change we have made to a row while a transaction
 
5231
 * is running, so we have to undo what we have done by
 
5232
 * overwriting the record we just created with
 
5233
 * the before image!
 
5234
 */
 
5235
static xtBool tab_overwrite_record_on_fail(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, xtWord1 *before_buf, xtWord1 *after_buf, u_int idx_cnt)
 
5236
{
 
5237
        register XTTableHPtr    tab = ot->ot_table;
 
5238
        XTTabRecHeadDRec                prev_rec_head;
 
5239
        u_int                                   i;
 
5240
        XTIndexPtr                              *ind;
 
5241
        XTThreadPtr                             thread = ot->ot_thread;
 
5242
        xtLogID                                 log_id;
 
5243
        xtLogOffset                             log_offset;
 
5244
        xtRecordID                              rec_id = rec_info->ri_rec_id;
 
5245
 
 
5246
        /* Remove the new extended record: */
 
5247
        if (rec_info->ri_ext_rec)
 
5248
                tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info->ri_fix_rec_buf, TRUE);
 
5249
 
 
5250
        /* Undo index entries of the new record: */
 
5251
        if (after_buf) {
 
5252
                for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
 
5253
                        if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
 
5254
                                return FAILED;
 
5255
                }
 
5256
        }
 
5257
 
 
5258
        memcpy(&prev_rec_head, rec_info->ri_fix_rec_buf, sizeof(XTTabRecHeadDRec));
 
5259
 
 
5260
        if (!before_buf) {
 
5261
                /* Can happen if the delete was called from some cascaded action.
 
5262
                 * And this is better than a crash...
 
5263
                 *
 
5264
                 * TODO: to make sure the change will not be applied in case the 
 
5265
                 * transaction will be commited, we'd need to add a log entry to 
 
5266
                 * restore the record like it's done for top-level operation. In 
 
5267
                 * order to do this we'd need to read the before-image of the 
 
5268
                 * record before modifying it.
 
5269
                 */
 
5270
                if (!thread->t_exception.e_xt_err)
 
5271
                        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_BEFORE_IMAGE);
 
5272
                return FAILED;
 
5273
        }
 
5274
 
 
5275
        /* Restore the previous record! */
 
5276
        if (!myxt_store_row(ot, rec_info, (char *) before_buf))
 
5277
                return FAILED;
 
5278
 
 
5279
        memcpy(rec_info->ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
 
5280
 
 
5281
        if (rec_info->ri_ext_rec) {
 
5282
                /* Determine where the overflow will go... */
 
5283
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
5284
                        if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
 
5285
                                return FAILED;
 
5286
                }
 
5287
                else {
 
5288
                        if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), thread))
 
5289
                                return FAILED;
 
5290
                }
 
5291
                XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
 
5292
        }
 
5293
 
 
5294
        if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf))
 
5295
                return FAILED;
 
5296
 
 
5297
        if (rec_info->ri_ext_rec) {
 
5298
                /* Write the log buffer overflow: */            
 
5299
                if (!tab_write_ext_record(ot, tab, rec_info, rec_id, log_id, log_offset, thread))
 
5300
                        return FAILED;
 
5301
        }
 
5302
 
 
5303
        /* Put the index entries back: */
 
5304
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5305
                if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
 
5306
                        /* Incomplete restore, there will be a rollback... */
 
5307
                        return FAILED;
 
5308
        }
 
5309
 
 
5310
        return OK;
 
5311
}
 
5312
 
 
5313
/*
 
5314
 * GOTCHA:
 
5315
 * If a transaction updates the same record over again, we should update
 
5316
 * in place. This prevents producing unnecessary variations!
 
5317
 */
 
5318
static xtBool tab_overwrite_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
 
5319
{
 
5320
        register XTTableHPtr    tab = ot->ot_table;
 
5321
        xtRowID                                 row_id = ot->ot_curr_row_id;
 
5322
        register XTThreadPtr    self = ot->ot_thread;
 
5323
        xtRecordID                              rec_id = ot->ot_curr_rec_id;
 
5324
        XTTabRecExtDRec                 prev_rec_head;
 
5325
        XTTabRecInfoRec                 rec_info;
 
5326
        u_int                                   idx_cnt = 0, i;
 
5327
        XTIndexPtr                              *ind;
 
5328
        xtLogID                                 log_id;
 
5329
        xtLogOffset                             log_offset;
 
5330
        xtBool                                  prev_ext_rec;
 
5331
 
 
5332
        /* A non-temporary table has been updated: */
 
5333
        if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
 
5334
                self->st_non_temp_updated = TRUE;
 
5335
 
 
5336
        if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
 
5337
                goto failed_0;
 
5338
 
 
5339
        /* Read before we overwrite! */
 
5340
        if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &prev_rec_head))
 
5341
                goto failed_0;
 
5342
 
 
5343
        prev_ext_rec = prev_rec_head.tr_rec_type_1 & XT_TAB_STATUS_EXT_DLOG;
 
5344
 
 
5345
        if (rec_info.ri_ext_rec) {
 
5346
                /* Determine where the overflow will go... */
 
5347
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
 
5348
                        if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size))
 
5349
                                goto failed_0;
 
5350
                }
 
5351
                else {
 
5352
                        if (!self->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, self))
 
5353
                                goto failed_0;
 
5354
                }
 
5355
                XT_SET_LOG_REF(rec_info.ri_ext_rec, log_id, log_offset);
 
5356
        }
 
5357
 
 
5358
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
 
5359
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
5360
        XT_COPY_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, prev_rec_head.tr_prev_rec_id_4);
 
5361
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
 
5362
 
 
5363
        /* Remove the index references, that have changed: */
 
5364
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5365
                if (!xt_idx_delete(ot, *ind, rec_id, before_buf)) {
 
5366
                        goto failed_0;
 
5367
                }
 
5368
        }
 
5369
 
 
5370
#ifdef TRACE_VARIATIONS
 
5371
        xt_ttracef(self, "overwrite: row=%d rec=%d T%d\n", (int) row_id, (int) rec_id, (int) self->st_xact_data->xd_start_xn_id);
 
5372
#endif
 
5373
        /* Overwrite the record: */
 
5374
        if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
 
5375
                goto failed_0;
 
5376
 
 
5377
        if (rec_info.ri_ext_rec) {
 
5378
                /* Write the log buffer overflow: */            
 
5379
                if (!tab_write_ext_record(ot, tab, &rec_info, rec_id, log_id, log_offset, self))
 
5380
                        goto failed_1;
 
5381
        }
 
5382
 
 
5383
        /* Add the index references that have changed: */
 
5384
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5385
                if (!xt_idx_insert(ot, *ind, 0, rec_id, after_buf, before_buf, FALSE)) {
 
5386
                        ot->ot_err_index_no = (*ind)->mi_index_no;
 
5387
                        goto failed_2;
 
5388
                }
 
5389
        }
 
5390
 
 
5391
        /* Do the foreign key stuff: */
 
5392
        if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
 
5393
                if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
 
5394
                        goto failed_2;
 
5395
        }
 
5396
        
 
5397
        /* Delete the previous overflow area: */
 
5398
        if (prev_ext_rec)
 
5399
                tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
 
5400
 
 
5401
        return OK;
 
5402
 
 
5403
        failed_2:
 
5404
        /* Remove the new extended record: */
 
5405
        if (rec_info.ri_ext_rec)
 
5406
                tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info.ri_fix_rec_buf, TRUE);
 
5407
 
 
5408
        /* Restore the previous record! */
 
5409
        /* Undo index entries: */
 
5410
        for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
 
5411
                if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
 
5412
                        goto failed_1;
 
5413
        }
 
5414
 
 
5415
        /* Restore the record: */
 
5416
        if (!myxt_store_row(ot, &rec_info, (char *) before_buf))
 
5417
                goto failed_1;
 
5418
 
 
5419
        if (rec_info.ri_ext_rec)
 
5420
                memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, XT_REC_EXT_HEADER_SIZE);
 
5421
        else
 
5422
                memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
 
5423
 
 
5424
        if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
 
5425
                goto failed_1;
 
5426
 
 
5427
        /* Put the index entries back: */
 
5428
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5429
                if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
 
5430
                        /* Incomplete restore, there will be a rollback... */
 
5431
                        goto failed_0;
 
5432
        }
 
5433
 
 
5434
        /* The previous record has now been restored. */
 
5435
        goto failed_0;
 
5436
 
 
5437
        failed_1:
 
5438
        /* The old record is overwritten, I must free the previous extended record: */
 
5439
        if (prev_ext_rec)
 
5440
                tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
 
5441
 
 
5442
        failed_0:
 
5443
        return FAILED;
 
5444
}
 
5445
 
 
5446
xtPublic xtBool xt_tab_update_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
 
5447
{
 
5448
        register XTTableHPtr    tab;
 
5449
        xtRowID                                 row_id;
 
5450
        register XTThreadPtr    self;
 
5451
        xtRecordID                              curr_var_rec_id;
 
5452
        XTTabRecInfoRec                 rec_info;
 
5453
        u_int                                   idx_cnt = 0;
 
5454
        XTIndexPtr                              *ind;
 
5455
 
 
5456
        /*
 
5457
         * Originally only the flag ot->ot_curr_updated was checked, and if it was on, then
 
5458
         * tab_overwrite_record() was called, but this caused crashes in some cases like:
 
5459
         *
 
5460
         * set @@autocommit = 0;
 
5461
         * create table t1 (s1 int primary key); 
 
5462
         * create table t2 (s1 int primary key, foreign key (s1) references t1 (s1) on update cascade);
 
5463
     * insert into t1 values (1);
 
5464
         * insert into t2 values (1);
 
5465
         * update t1 set s1 = 1;
 
5466
         *
 
5467
         * the last update lead to a crash on t2 cascade update because before_buf argument is NULL 
 
5468
         * in the call below. It is NULL only during cascade update of child table. In that case we 
 
5469
         * cannot pass before_buf value from XTDDTableRef::modifyRow as the before_buf is the original 
 
5470
         * row for the parent (t1) table and it would be used to update any existing indexes
 
5471
         * in the child table which would be wrong of course.
 
5472
         *
 
5473
         * Alternative solution would be to copy the after_info in the XTDDTableRef::modifyRow():
 
5474
         * 
 
5475
         * ...
 
5476
         * if (!xt_tab_load_record(ot, ot->ot_curr_rec_id, &after_info))
 
5477
         *     goto failed_2;
 
5478
         * ...
 
5479
         *
 
5480
         * here the xt_tab_load_record() loads the original row, so we can copy it from there, but in 
 
5481
         * that case we'd need to allocate a new (possibly up to 65536 bytes long) buffer, which makes 
 
5482
         * the optimization questionable
 
5483
         *
 
5484
         */
 
5485
        if (ot->ot_curr_updated && before_buf)
 
5486
                /* This record has already been updated by this transaction.
 
5487
                 * Do the update in place!
 
5488
                 */
 
5489
                return tab_overwrite_record(ot, before_buf, after_buf);
 
5490
 
 
5491
        tab = ot->ot_table;
 
5492
        row_id = ot->ot_curr_row_id;
 
5493
        self = ot->ot_thread;
 
5494
 
 
5495
        /* A non-temporary table has been updated: */
 
5496
        if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
 
5497
                self->st_non_temp_updated = TRUE;
 
5498
 
 
5499
        if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
 
5500
                goto failed_0;
 
5501
 
 
5502
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
 
5503
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
5504
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
 
5505
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
 
5506
 
 
5507
        /* Create the new record: */
 
5508
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_UPDATE))
 
5509
                goto failed_0;
 
5510
 
 
5511
        /* Link the new variation into the list: */
 
5512
        XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
 
5513
 
 
5514
        if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
 
5515
                goto failed_1;
 
5516
 
 
5517
        if (curr_var_rec_id != ot->ot_curr_rec_id) {
 
5518
                /* If the transaction does not rollback, I will get an
 
5519
                 * exception here:
 
5520
                 */
 
5521
                if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
 
5522
                        goto failed_1;
 
5523
                /* [(4)] This is the situation when we overwrite the
 
5524
                 * reference to curr_var_rec_id!
 
5525
                 * When curr_var_rec_id is cleaned up by the sweeper, the
 
5526
                 * sweeper will notice that the record is no longer in
 
5527
                 * the row list.
 
5528
                 */
 
5529
        }
 
5530
 
 
5531
#ifdef TRACE_VARIATIONS
 
5532
        xt_ttracef(self, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
 
5533
#endif
 
5534
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
 
5535
                goto failed_1;
 
5536
        XT_DISABLED_TRACE(("set upd tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
 
5537
 
 
5538
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
 
5539
 
 
5540
        /* Add the index references: */
 
5541
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
 
5542
                if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, after_buf, before_buf, FALSE)) {
 
5543
                        ot->ot_err_index_no = (*ind)->mi_index_no;
 
5544
                        goto failed_2;
 
5545
                }
 
5546
        }
 
5547
 
 
5548
        if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
 
5549
                if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
 
5550
                        goto failed_2;
 
5551
        }
 
5552
 
 
5553
        self->st_statistics.st_row_update++;
 
5554
        return OK;
 
5555
 
 
5556
        failed_2:
 
5557
        tab_overwrite_record_on_fail(ot, &rec_info, before_buf, after_buf, idx_cnt);
 
5558
        goto failed_0;
 
5559
 
 
5560
        failed_1:
 
5561
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
 
5562
 
 
5563
        failed_0:
 
5564
        return FAILED;
 
5565
}
 
5566
 
 
5567
xtPublic xtBool xt_tab_delete_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
 
5568
{
 
5569
        register XTTableHPtr    tab = ot->ot_table;
 
5570
        register XTThreadPtr    thread = ot->ot_thread;
 
5571
        xtRowID                                 row_id = ot->ot_curr_row_id;
 
5572
        xtRecordID                              curr_var_rec_id;
 
5573
        XTTabRecInfoRec                 rec_info;
 
5574
 
 
5575
        /* A non-temporary table has been updated: */
 
5576
        if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
 
5577
                thread->st_non_temp_updated = TRUE;
 
5578
 
 
5579
        /* Setup a delete record: */
 
5580
        rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
 
5581
        rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
 
5582
        rec_info.ri_ext_rec = NULL;
 
5583
        rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
 
5584
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
 
5585
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
 
5586
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
 
5587
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, thread->st_xact_data->xd_start_xn_id);
 
5588
 
 
5589
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
 
5590
                return FAILED;
 
5591
 
 
5592
        XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
5593
 
 
5594
        if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
 
5595
                goto failed_1;
 
5596
 
 
5597
        if (curr_var_rec_id != ot->ot_curr_rec_id) {
 
5598
                if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
 
5599
                        goto failed_1;          
 
5600
        }
 
5601
 
 
5602
#ifdef TRACE_VARIATIONS
 
5603
        xt_ttracef(thread, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) thread->st_xact_data->xd_start_xn_id);
 
5604
#endif
 
5605
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
 
5606
                goto failed_1;
 
5607
        XT_DISABLED_TRACE(("del row tx=%d row=%d rec=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
 
5608
 
 
5609
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
5610
 
 
5611
        if (ot->ot_table->tab_dic.dic_table->dt_trefs) {
 
5612
                if (!ot->ot_table->tab_dic.dic_table->deleteRow(ot, rec_buf))
 
5613
                        goto failed_2;
 
5614
        }
 
5615
 
 
5616
        thread->st_statistics.st_row_delete++;
 
5617
        return OK;
 
5618
 
 
5619
        failed_2:
 
5620
        tab_overwrite_record_on_fail(ot, &rec_info, rec_buf, NULL, 0);
 
5621
        return FAILED;
 
5622
 
 
5623
        failed_1:
 
5624
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
 
5625
        return FAILED;
 
5626
}
 
5627
 
 
5628
xtPublic xtBool xt_tab_restrict_rows(XTBasicListPtr list, XTThreadPtr thread)
 
5629
{
 
5630
        u_int                           i;
 
5631
        XTRestrictItemPtr       item;
 
5632
        XTOpenTablePtr          pot = NULL;
 
5633
        XTDatabaseHPtr          db = thread->st_database;
 
5634
        xtBool                          ok = TRUE;
 
5635
 
 
5636
        for (i=0; i<list->bl_count; i++) {
 
5637
                item = (XTRestrictItemPtr) xt_bl_item_at(list, i);
 
5638
                if (item)
 
5639
                        if (pot) {
 
5640
                                if (pot->ot_table->tab_id == item->ri_tab_id)
 
5641
                                        goto check_action;
 
5642
                                xt_db_return_table_to_pool_ns(pot);
 
5643
                                pot = NULL;
 
5644
                        }
 
5645
 
 
5646
                        if (!xt_db_open_pool_table_ns(&pot, db, item->ri_tab_id)) {
 
5647
                                /* Should not happen, but just in case, we just don't
 
5648
                                 * remove the lock. We will probably end up with a deadlock
 
5649
                                 * somewhere.
 
5650
                                 */
 
5651
                                xt_log_and_clear_exception_ns();
 
5652
                                goto skip_check_action;
 
5653
                        }
 
5654
                        if (!pot)
 
5655
                                /* Can happen of the table has been dropped: */
 
5656
                                goto skip_check_action;
 
5657
 
 
5658
                        check_action:
 
5659
                        if (!pot->ot_table->tab_dic.dic_table->checkNoAction(pot, item->ri_rec_id)) {
 
5660
                                ok = FALSE;
 
5661
                                break;
 
5662
                        }
 
5663
                        skip_check_action:;
 
5664
        }
 
5665
 
 
5666
        if (pot)
 
5667
                xt_db_return_table_to_pool_ns(pot);
 
5668
        xt_bl_free(NULL, list);
 
5669
        return ok;
 
5670
}
 
5671
 
 
5672
 
 
5673
xtPublic xtBool xt_tab_seq_init(XTOpenTablePtr ot)
 
5674
{
 
5675
        register XTTableHPtr tab = ot->ot_table;
 
5676
        
 
5677
        ASSERT_NS(!ot->ot_seq_page);
 
5678
        ot->ot_seq_page = NULL;
 
5679
        ot->ot_seq_data = NULL;
 
5680
        ot->ot_on_page = FALSE;
 
5681
        ot->ot_seq_offset = 0;
 
5682
 
 
5683
        ot->ot_curr_rec_id = 0;                 // 0 is an invalid position!
 
5684
        ot->ot_curr_row_id = 0;                 // 0 is an invalid row ID!
 
5685
        ot->ot_curr_updated = FALSE;
 
5686
 
 
5687
        /* We note the current EOF before we start a sequential scan.
 
5688
         * It is basically possible to update the same record more than
 
5689
         * once because an updated record creates a new record which
 
5690
         * has a new position which may be in the area that is
 
5691
         * still to be scanned.
 
5692
         *
 
5693
         * By noting the EOF before we start a sequential scan we
 
5694
         * reduce the possibility of this.
 
5695
         *
 
5696
         * However, the possibility still remains, but it should
 
5697
         * not be a problem because a record is not modified
 
5698
         * if there is nothing to change, which is the case
 
5699
         * if the record has already been changed!
 
5700
         *
 
5701
         * NOTE (2008-01-29) There is no longer a problem with updating a
 
5702
         * record twice because records are marked by an update.
 
5703
         *
 
5704
         * [(10)] I have changed this (see below). I now check the
 
5705
         * current EOF of the table.
 
5706
         *
 
5707
         * The reason is that committed read must be able to see the
 
5708
         * changes that occur during table table scan.   * 
 
5709
         */
 
5710
        ot->ot_seq_eof_id = tab->tab_rec_eof_id;
 
5711
 
 
5712
        if (!ot->ot_thread->st_xact_data) {
 
5713
                /* MySQL ignores this error, so we
 
5714
                 * setup the sequential scan so that it will
 
5715
                 * deliver nothing!
 
5716
                 */
 
5717
                ot->ot_seq_rec_id = ot->ot_seq_eof_id;
 
5718
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
 
5719
                return FAILED;
 
5720
        }
 
5721
 
 
5722
        ot->ot_seq_rec_id = 1;
 
5723
        ot->ot_thread->st_statistics.st_scan_table++;
 
5724
        return OK;
 
5725
}
 
5726
 
 
5727
xtPublic void xt_tab_seq_reset(XTOpenTablePtr ot)
 
5728
{
 
5729
        ot->ot_seq_rec_id = 0;
 
5730
        ot->ot_seq_eof_id = 0;
 
5731
        ASSERT_NS(!ot->ot_seq_page);
 
5732
        ot->ot_seq_page = NULL;
 
5733
        ot->ot_seq_data = NULL;
 
5734
        ot->ot_on_page = FALSE;
 
5735
        ot->ot_seq_offset = 0;
 
5736
}
 
5737
 
 
5738
xtPublic void xt_tab_seq_exit(XTOpenTablePtr ot)
 
5739
{
 
5740
        register XTTableHPtr    tab = ot->ot_table;
 
5741
 
 
5742
        if (ot->ot_seq_page) {
 
5743
                tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
 
5744
                ot->ot_seq_page = NULL;
 
5745
        }
 
5746
        if (ot->ot_seq_data) {
 
5747
                XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
 
5748
                ot->ot_seq_data = NULL;
 
5749
        }
 
5750
        ot->ot_on_page = FALSE;
 
5751
}
 
5752
 
 
5753
#ifdef XT_USE_ROW_REC_MMAP_FILES
 
5754
#define TAB_SEQ_LOAD_CACHE              FALSE
 
5755
#else
 
5756
#ifdef XT_SEQ_SCAN_LOADS_CACHE
 
5757
#define TAB_SEQ_LOAD_CACHE              TRUE
 
5758
#else
 
5759
#define TAB_SEQ_LOAD_CACHE              FALSE
 
5760
#endif
 
5761
#endif
 
5762
 
 
5763
xtPublic void xt_tab_seq_repeat(XTOpenTablePtr ot)
 
5764
{
 
5765
        ot->ot_seq_rec_id--;
 
5766
        ot->ot_seq_offset -= ot->ot_table->tab_dic.dic_rec_size;
 
5767
}
 
5768
 
 
5769
xtPublic xtBool xt_tab_seq_next(XTOpenTablePtr ot, xtWord1 *buffer, xtBool *eof)
 
5770
{
 
5771
        register XTTableHPtr    tab = ot->ot_table;
 
5772
        register size_t                 rec_size = tab->tab_dic.dic_rec_size;
 
5773
        xtWord1                                 *buff_ptr;
 
5774
        xtRecordID                              new_rec_id;
 
5775
        xtRecordID                              invalid_rec = 0;
 
5776
 
 
5777
        next_page:
 
5778
        if (!ot->ot_on_page) {
 
5779
                ASSERT_NS(!ot->ot_seq_page);
 
5780
                if (!(ot->ot_on_page = tab->tab_recs.xt_tc_get_page(ot->ot_rec_file, ot->ot_seq_rec_id, TAB_SEQ_LOAD_CACHE, &ot->ot_seq_page, &ot->ot_seq_offset, ot->ot_thread)))
 
5781
                        return FAILED;
 
5782
                if (!ot->ot_seq_page) {
 
5783
                        XT_LOCK_MEMORY_PTR(ot->ot_seq_data, ot->ot_rec_file, xt_rec_id_to_rec_offset(tab, ot->ot_seq_rec_id), tab->tab_rows.tci_page_size, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread);
 
5784
                        if (!ot->ot_seq_data)
 
5785
                                return FAILED;
 
5786
                        ot->ot_on_page = TRUE;
 
5787
                        ot->ot_seq_offset = 0;
 
5788
                }
 
5789
        }
 
5790
 
 
5791
        next_record:
 
5792
        /* [(10)] The current EOF is used: */
 
5793
        if (ot->ot_seq_rec_id >= ot->ot_seq_eof_id) {
 
5794
                *eof = TRUE;
 
5795
                return OK;
 
5796
        }
 
5797
 
 
5798
        if (ot->ot_seq_offset >= tab->tab_recs.tci_page_size) {
 
5799
                if (ot->ot_seq_page) {
 
5800
                        tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
 
5801
                        ot->ot_seq_page = NULL;
 
5802
                }
 
5803
                if (ot->ot_seq_data) {
 
5804
                        /* NULL here means that in the case of non-memory mapped
 
5805
                         * files we "keep" the lock.
 
5806
                         */
 
5807
                        XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
 
5808
                        ot->ot_seq_data = NULL;
 
5809
                }
 
5810
                ot->ot_on_page = FALSE;
 
5811
                goto next_page;
 
5812
        }
 
5813
 
 
5814
        if (ot->ot_seq_page)
 
5815
                buff_ptr = ot->ot_seq_page->tcp_data + ot->ot_seq_offset;
 
5816
        else
 
5817
                buff_ptr = ot->ot_seq_data + ot->ot_seq_offset;
 
5818
 
 
5819
        /* This is the current record: */
 
5820
        ot->ot_curr_rec_id = ot->ot_seq_rec_id;
 
5821
        ot->ot_curr_row_id = 0;
 
5822
 
 
5823
        /* Move to the next record: */
 
5824
        ot->ot_seq_rec_id++;
 
5825
        ot->ot_seq_offset += rec_size;
 
5826
 
 
5827
        retry:
 
5828
        switch (tab_visible(ot, (XTTabRecHeadDPtr) buff_ptr, &new_rec_id)) {
 
5829
                case FALSE:
 
5830
                        goto next_record;
 
5831
                case XT_ERR:
 
5832
                        goto failed;
 
5833
                case XT_NEW:
 
5834
                        buff_ptr = ot->ot_row_rbuffer;
 
5835
                        if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
 
5836
                                return XT_ERR;
 
5837
                        ot->ot_curr_rec_id = new_rec_id;
 
5838
                        break;
 
5839
                case XT_RETRY:
 
5840
                        goto retry;
 
5841
                case XT_REREAD:
 
5842
                        if (invalid_rec != ot->ot_curr_rec_id) {
 
5843
                                /* Don't re-read for the same record twice: */
 
5844
                                invalid_rec = ot->ot_curr_rec_id;
 
5845
 
 
5846
                                /* Undo move to next: */
 
5847
                                ot->ot_seq_rec_id--;
 
5848
                                ot->ot_seq_offset -= rec_size;
 
5849
                                
 
5850
                                /* Prepare to reread the page: */
 
5851
                                if (ot->ot_seq_page) {
 
5852
                                        tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
 
5853
                                        ot->ot_seq_page = NULL;
 
5854
                                }
 
5855
                                ot->ot_on_page = FALSE;
 
5856
                                goto next_page;
 
5857
                        }
 
5858
                        if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) buff_ptr)->tr_row_id_4), invalid_rec, true, 8))
 
5859
                                return XT_ERR;
 
5860
#ifdef XT_CRASH_DEBUG
 
5861
                        /* Should not happen! */
 
5862
                        xt_crash_me();
 
5863
#endif
 
5864
                        /* Continue, and skip the record... */
 
5865
                        invalid_rec = 0;
 
5866
                        goto next_record;
 
5867
                default:
 
5868
                        break;
 
5869
        }
 
5870
 
 
5871
        switch (*buff_ptr) {
 
5872
                case XT_TAB_STATUS_FIXED:
 
5873
                case XT_TAB_STATUS_FIX_CLEAN:
 
5874
                        memcpy(buffer, buff_ptr + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
 
5875
                        break;
 
5876
                case XT_TAB_STATUS_VARIABLE:
 
5877
                case XT_TAB_STATUS_VAR_CLEAN:
 
5878
                        if (!myxt_load_row(ot, buff_ptr + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
 
5879
                                goto failed_1;
 
5880
                        break;
 
5881
                case XT_TAB_STATUS_EXT_DLOG:
 
5882
                case XT_TAB_STATUS_EXT_CLEAN: {
 
5883
                        u_int cols_req = ot->ot_cols_req;
 
5884
 
 
5885
                        ASSERT_NS(cols_req);
 
5886
                        if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
 
5887
                                if (!myxt_load_row(ot, buff_ptr + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
 
5888
                                        goto failed_1;
 
5889
                        }
 
5890
                        else {
 
5891
                                if (buff_ptr != ot->ot_row_rbuffer)
 
5892
                                        memcpy(ot->ot_row_rbuffer, buff_ptr, rec_size);
 
5893
                                if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
 
5894
                                        goto failed_1;
 
5895
                        }
 
5896
                        break;
 
5897
                }
 
5898
        }
 
5899
 
 
5900
        *eof = FALSE;
 
5901
        return OK;
 
5902
 
 
5903
        failed_1:
 
5904
 
 
5905
        failed:
 
5906
        return FAILED;
 
5907
}
 
5908
 
 
5909
/*
 
5910
 * -----------------------------------------------------------------------
 
5911
 * REPAIR TABLE
 
5912
 */
 
5913
 
 
5914
#define REP_FIND                0
 
5915
#define REP_ADD                 1
 
5916
#define REP_DEL                 2
 
5917
 
 
5918
static xtBool tab_exec_repair_pending(XTDatabaseHPtr db, int what, char *table_name)
 
5919
{
 
5920
        XTThreadPtr                     thread = xt_get_self();
 
5921
        char                            file_path[PATH_MAX];
 
5922
        XTOpenFilePtr           of = NULL;
 
5923
        int                                     len;
 
5924
        char                            *buffer = NULL, *ptr, *name;
 
5925
        char                            ch;
 
5926
        xtBool                          found = FALSE;
 
5927
 
 
5928
        xt_strcpy(PATH_MAX, file_path, db->db_main_path);
 
5929
        xt_add_pbxt_file(PATH_MAX, file_path, "repair-pending");
 
5930
        
 
5931
        if (what == REP_ADD) {
 
5932
                if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 0))
 
5933
                        return FALSE;
 
5934
        }
 
5935
        else {
 
5936
                if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_DEFAULT | XT_FS_MISSING_OK, 0))
 
5937
                        return FALSE;
 
5938
        }
 
5939
        if (!of)
 
5940
                return FALSE;
 
5941
 
 
5942
        len = (int) xt_seek_eof_file(NULL, of);
 
5943
        
 
5944
        if (!(buffer = (char *) xt_malloc_ns(len + 1)))
 
5945
                goto failed;
 
5946
 
 
5947
        if (!xt_pread_file(of, 0, len, len, buffer, NULL, &thread->st_statistics.st_x, thread))
 
5948
                goto failed;
 
5949
 
 
5950
        buffer[len] = 0;
 
5951
        ptr = buffer;
 
5952
        for(;;) {
 
5953
                name = ptr;
 
5954
                while (*ptr && *ptr != '\n' && *ptr != '\r')
 
5955
                        ptr++;
 
5956
                if (ptr > name) {
 
5957
                        ch = *ptr;
 
5958
                        *ptr = 0;
 
5959
                        if (xt_tab_compare_names(name, table_name) == 0) {
 
5960
                                *ptr = ch;
 
5961
                                found = TRUE;
 
5962
                                break;
 
5963
                        }       
 
5964
                        *ptr = ch;
 
5965
                }
 
5966
                if (!*ptr)
 
5967
                        break;
 
5968
                ptr++;
 
5969
        }
 
5970
 
 
5971
        switch (what) {
 
5972
                case REP_ADD:
 
5973
                        if (!found) {
 
5974
                                /* Remove any trailing empty lines: */
 
5975
                                while (len > 0) {
 
5976
                                        if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
 
5977
                                                break;
 
5978
                                        len--;
 
5979
                                }
 
5980
                                if (len > 0) {
 
5981
                                        if (!xt_pwrite_file(of, len, 1, (void *) "\n", &thread->st_statistics.st_x, thread))
 
5982
                                                goto failed;
 
5983
                                        len++;
 
5984
                                }
 
5985
                                if (!xt_pwrite_file(of, len, strlen(table_name), table_name, &thread->st_statistics.st_x, thread))
 
5986
                                        goto failed;
 
5987
                                len += strlen(table_name);
 
5988
                                if (!xt_set_eof_file(NULL, of, len))
 
5989
                                        goto failed;
 
5990
                        }
 
5991
                        break;
 
5992
                case REP_DEL:
 
5993
                        if (found) {
 
5994
                                if (*ptr != '\0')
 
5995
                                        ptr++;
 
5996
                                memmove(name, ptr, len - (ptr - buffer));
 
5997
                                len = len - (ptr - name);
 
5998
 
 
5999
                                /* Remove trailing empty lines: */
 
6000
                                while (len > 0) {
 
6001
                                        if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
 
6002
                                                break;
 
6003
                                        len--;
 
6004
                                }
 
6005
 
 
6006
                                if (len > 0) {
 
6007
                                        if (!xt_pwrite_file(of, 0, len, buffer, &thread->st_statistics.st_x, thread))
 
6008
                                                goto failed;
 
6009
                                        if (!xt_set_eof_file(NULL, of, len))
 
6010
                                                goto failed;
 
6011
                                }
 
6012
                        }
 
6013
                        break;
 
6014
        }
 
6015
 
 
6016
        xt_close_file_ns(of);
 
6017
        xt_free_ns(buffer);
 
6018
 
 
6019
        if (len == 0)
 
6020
                xt_fs_delete(NULL, file_path);
 
6021
        return found;
 
6022
 
 
6023
        failed:
 
6024
        if (of)
 
6025
                xt_close_file_ns(of);
 
6026
        if (buffer)
 
6027
                xt_free_ns(buffer);
 
6028
        xt_log_and_clear_exception(thread);
 
6029
        return FALSE;
 
6030
}
 
6031
 
 
6032
xtPublic void xt_tab_make_table_name(XTPathStrPtr tab_path, char *table_name, size_t size)
 
6033
{
 
6034
        char    *nptr;
 
6035
 
 
6036
        nptr = xt_last_name_of_path(tab_path->ps_path);
 
6037
        if (xt_starts_with(nptr, "#sql")) {
 
6038
                /* {INVALID-OLD-TABLE-FIX}
 
6039
                 * Temporary files can have strange paths, for example
 
6040
                 * ..../var/tmp/mysqld.1/#sqldaec_1_6
 
6041
                 * This occurs, for example, occurs when the temp_table.test is
 
6042
                 * run using the PBXT suite in MariaDB:
 
6043
                 * ./mtr --suite=pbxt --do-test=temp_table
 
6044
                 *
 
6045
                 * Calling myxt_static_convert_file_name, with a '.', in the name
 
6046
                 * causes the error:
 
6047
                 * [ERROR] Invalid (old?) table or database name 'mysqld.1'
 
6048
                 * To prevent this, we do not convert the temporary
 
6049
                 * table names using the mysql functions.
 
6050
                 *
 
6051
                 * Note, this bug was found by Monty, and fixed by modifying
 
6052
                 * xt_2nd_last_name_of_path(), see {INVALID-OLD-TABLE-FIX}.
 
6053
                 *
 
6054
                 */
 
6055
                xt_2nd_last_name_of_path(size, table_name, tab_path->ps_path);
 
6056
                xt_strcat(size, table_name, ".");
 
6057
                xt_strcat(size, table_name, nptr);
 
6058
        }
 
6059
        else {
 
6060
                char    name_buf[XT_TABLE_NAME_SIZE*3+3];
 
6061
                char    *part_ptr;
 
6062
                size_t  len;
 
6063
 
 
6064
                xt_2nd_last_name_of_path(sizeof(name_buf), name_buf, tab_path->ps_path);
 
6065
                myxt_static_convert_file_name(name_buf, table_name, size);
 
6066
                xt_strcat(size, table_name, ".");
 
6067
                
 
6068
                /* Handle partition extensions to table names: */
 
6069
                if ((part_ptr = strstr(nptr, "#P#")))
 
6070
                        xt_strncpy(sizeof(name_buf), name_buf, nptr, part_ptr - nptr);
 
6071
                else
 
6072
                        xt_strcpy(sizeof(name_buf), name_buf, nptr);
 
6073
 
 
6074
                len = strlen(table_name);
 
6075
                myxt_static_convert_file_name(name_buf, table_name + len, size - len);
 
6076
 
 
6077
                if (part_ptr) {
 
6078
                        /* Add the partition extension (which is relevant to the engine). */
 
6079
                        char    *sub_part_ptr;
 
6080
 
 
6081
                        part_ptr += 3;
 
6082
                        if ((sub_part_ptr = strstr(part_ptr, "#SP#")))
 
6083
                                xt_strncpy(sizeof(name_buf), name_buf, part_ptr, sub_part_ptr - part_ptr);
 
6084
                        else
 
6085
                                xt_strcpy(sizeof(name_buf), name_buf, part_ptr);
 
6086
                        
 
6087
                        xt_strcat(size, table_name, " (");
 
6088
                        len = strlen(table_name);
 
6089
                        myxt_static_convert_file_name(name_buf, table_name + len, size - len);
 
6090
                        
 
6091
                        if (sub_part_ptr) {
 
6092
                        
 
6093
                                sub_part_ptr += 4;
 
6094
                                xt_strcat(size, table_name, " - ");
 
6095
                                len = strlen(table_name);
 
6096
                                myxt_static_convert_file_name(sub_part_ptr, table_name + len, size - len);
 
6097
                        }
 
6098
 
 
6099
                        xt_strcat(size, table_name, ")");
 
6100
                }
 
6101
        }
 
6102
}
 
6103
 
 
6104
xtPublic xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab)
 
6105
{
 
6106
        char table_name[XT_TABLE_NAME_BUF_SIZE];
 
6107
 
 
6108
        xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
 
6109
        return tab_exec_repair_pending(tab->tab_db, REP_FIND, table_name);
 
6110
}
 
6111
 
 
6112
xtPublic void xt_tab_table_repaired(XTTableHPtr tab)
 
6113
{
 
6114
        if (tab->tab_repair_pending) {
 
6115
                char table_name[XT_TABLE_NAME_BUF_SIZE];
 
6116
 
 
6117
                tab->tab_repair_pending = FALSE;
 
6118
                xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
 
6119
                tab_exec_repair_pending(tab->tab_db, REP_DEL, table_name);
 
6120
        }
 
6121
}
 
6122
 
 
6123
xtPublic void xt_tab_set_table_repair_pending(XTTableHPtr tab)
 
6124
{
 
6125
        if (!tab->tab_repair_pending) {
 
6126
                char table_name[XT_TABLE_NAME_BUF_SIZE];
 
6127
 
 
6128
                tab->tab_repair_pending = TRUE;
 
6129
                xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
 
6130
                tab_exec_repair_pending(tab->tab_db, REP_ADD, table_name);
 
6131
        }
 
6132
}
 
6133
 
 
6134
/*
 
6135
 * -----------------------------------------------------------------------
 
6136
 * EXTENDED DATA FOR RAM TABLES
 
6137
 */
 
6138
 
 
6139
xtPublic xtBool xt_tab_get_ext_slot(XTTableHPtr tab, xtLogID *log_id, xtLogOffset *log_offset, size_t XT_UNUSED(req_size))
 
6140
{
 
6141
        size_t new_slot;
 
6142
 
 
6143
        xt_spinlock_lock(&tab->tab_mem_lock);
 
6144
        if (tab->tab_mem_ind_free) {
 
6145
                new_slot = tab->tab_mem_ind_free - 1;
 
6146
                tab->tab_mem_ind_free = (size_t) tab->tab_mem_index[new_slot];
 
6147
        }
 
6148
        else {
 
6149
                if (tab->tab_mem_ind_usage == tab->tab_mem_ind_size) {
 
6150
                        /* Grow the index: */
 
6151
                        if (!xt_realloc_ns((void **) &tab->tab_mem_index, (tab->tab_mem_ind_size + 100) * sizeof(xtWord1 *)))
 
6152
                                return FAILED;
 
6153
                        tab->tab_mem_ind_size += 100;
 
6154
                }
 
6155
                new_slot = tab->tab_mem_ind_usage;
 
6156
                tab->tab_mem_ind_usage++;
 
6157
        }
 
6158
        xt_spinlock_unlock(&tab->tab_mem_lock);
 
6159
        tab->tab_mem_index[new_slot] = NULL;
 
6160
        *log_id = 1;
 
6161
        *log_offset = new_slot + 1;
 
6162
        return OK;
 
6163
}
 
6164
 
 
6165
xtPublic xtBool xt_tab_save_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
 
6166
{
 
6167
        size_t  slot = ((size_t) log_offset) - 1;
 
6168
        xtWord1 *rec_data;
 
6169
 
 
6170
        if (!(rec_data = (xtWord1 *) xt_malloc_ns(size)))
 
6171
                return FAILED;
 
6172
        memcpy(rec_data, data, size);
 
6173
        xt_spinlock_lock(&tab->tab_mem_lock);
 
6174
        tab->tab_mem_total += size;
 
6175
        tab->tab_mem_index[slot] = rec_data;
 
6176
        xt_spinlock_unlock(&tab->tab_mem_lock);
 
6177
        return OK;
 
6178
}
 
6179
 
 
6180
xtPublic void xt_tab_read_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
 
6181
{
 
6182
        size_t  slot = ((size_t) log_offset) - 1;
 
6183
 
 
6184
        if (slot < tab->tab_mem_ind_usage && tab->tab_mem_index[slot])
 
6185
                memcpy(data, tab->tab_mem_index[slot], size);
 
6186
        else
 
6187
                memset(data, 0, size);
 
6188
}
 
6189
 
 
6190
xtPublic void xt_tab_free_ext_slot(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size)
 
6191
{
 
6192
        size_t  slot = ((size_t) log_offset) - 1;
 
6193
 
 
6194
        xt_spinlock_lock(&tab->tab_mem_lock);
 
6195
        if (tab->tab_mem_index[slot]) {
 
6196
                xt_free_ns(tab->tab_mem_index[slot]);
 
6197
                tab->tab_mem_total -= size;
 
6198
        }
 
6199
        tab->tab_mem_index[slot] = (xtWord1 *) tab->tab_mem_ind_free;
 
6200
        tab->tab_mem_ind_free = slot + 1;
 
6201
        xt_spinlock_unlock(&tab->tab_mem_lock);
 
6202
}
 
6203
 
 
6204
static void tab_free_ext_records(XTTableHPtr tab)
 
6205
{
 
6206
        size_t i, next;
 
6207
        
 
6208
        if (!tab->tab_mem_index)
 
6209
                return;
 
6210
 
 
6211
        i = tab->tab_mem_ind_free;
 
6212
        while (i) {
 
6213
                next = (size_t) tab->tab_mem_index[i-1];
 
6214
                tab->tab_mem_index[i-1] = NULL;
 
6215
                i = next;
 
6216
        }
 
6217
 
 
6218
        for (i=0; i<tab->tab_mem_ind_usage; i++) {
 
6219
                if (tab->tab_mem_index[i])
 
6220
                        xt_free_ns(tab->tab_mem_index[i]);
 
6221
        }
 
6222
        
 
6223
        xt_free_ns(tab->tab_mem_index);
 
6224
}
 
6225
 
 
6226
 
 
6227