~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/pbxt/src/table_xt.cc

  • Committer: Monty Taylor
  • Date: 2008-09-16 00:00:48 UTC
  • mto: This revision was merged to the branch mainline in revision 391.
  • Revision ID: monty@inaugust.com-20080916000048-3rvrv3gv9l0ad3gs
Fixed copyright headers in drizzled/

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/* Copyright (C) 2005 PrimeBase Technologies GmbH
2
 
 *
3
 
 * PrimeBase XT
4
 
 *
5
 
 * This program is free software; you can redistribute it and/or modify
6
 
 * it under the terms of the GNU General Public License as published by
7
 
 * the Free Software Foundation; either version 2 of the License, or
8
 
 * (at your option) any later version.
9
 
 *
10
 
 * This program is distributed in the hope that it will be useful,
11
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 
 * GNU General Public License for more details.
14
 
 *
15
 
 * You should have received a copy of the GNU General Public License
16
 
 * along with this program; if not, write to the Free Software
17
 
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
 
 *
19
 
 * 2005-02-08   Paul McCullagh
20
 
 *
21
 
 * H&G2JCtL
22
 
 */
23
 
 
24
 
#include "xt_config.h"
25
 
 
26
 
#include <string.h>
27
 
#include <stdio.h>
28
 
#ifndef XT_WIN
29
 
#include <strings.h>
30
 
#endif
31
 
#include <ctype.h>
32
 
#include <time.h>
33
 
 
34
 
#ifdef DRIZZLED
35
 
#include <drizzled/common.h>
36
 
#include <drizzled/dtcollation.h>
37
 
#else
38
 
#include "mysql_priv.h"
39
 
#endif
40
 
 
41
 
#include "table_xt.h"
42
 
#include "database_xt.h"
43
 
#include "heap_xt.h"
44
 
#include "strutil_xt.h"
45
 
#include "myxt_xt.h"
46
 
#include "cache_xt.h"
47
 
#include "trace_xt.h"
48
 
#include "index_xt.h"
49
 
#include "systab_xt.h"
50
 
 
51
 
#ifdef DEBUG
52
 
//#define TRACE_VARIATIONS
53
 
//#define TRACE_VARIATIONS_IN_DUP_CHECK
54
 
//#define DUMP_CHECK_TABLE
55
 
//#define CHECK_INDEX_ON_CHECK_TABLE
56
 
//#define TRACE_TABLE_IDS
57
 
//#define TRACE_FLUSH_TABLE
58
 
//#define TRACE_CREATE_TABLES
59
 
#endif
60
 
 
61
 
#define CHECK_TABLE_STATS
62
 
 
63
 
/* The problem is that this can take a long time
64
 
 * if the table is very large!
65
 
 */
66
 
//#define CHECK_TABLE_READ_DATA_LOG
67
 
 
68
 
#ifdef TRACE_TABLE_IDS
69
 
//#define PRINTF                xt_ftracef
70
 
#define PRINTF          xt_trace
71
 
#endif
72
 
 
73
 
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr dic);
74
 
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic);
75
 
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def);
76
 
static void tab_free_ext_records(XTTableHPtr tab);
77
 
 
78
 
/*
79
 
 * -----------------------------------------------------------------------
80
 
 * Internal structures
81
 
 */
82
 
 
83
 
#define XT_MAX_TABLE_FILE_NAME_SIZE             (XT_TABLE_NAME_SIZE+6+40)
84
 
 
85
 
/*
86
 
 * -----------------------------------------------------------------------
87
 
 * Handle Error Detected in a Table
88
 
 */
89
 
 
90
 
struct XTTableError {
91
 
        xtTableID               ter_tab_id;
92
 
        xtRecordID              ter_rec_id;
93
 
};
94
 
 
95
 
static int tab_comp_tab_error(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
96
 
{
97
 
        XTTableError    *ter_a = ((XTTableError *) a);
98
 
        XTTableError    *ter_b = (XTTableError *) b;
99
 
 
100
 
        if (ter_a->ter_tab_id < ter_b->ter_tab_id)
101
 
                return -1;
102
 
        if (ter_a->ter_tab_id == ter_b->ter_tab_id) {
103
 
                if (ter_a->ter_rec_id < ter_b->ter_rec_id)
104
 
                        return -1;
105
 
                if (ter_a->ter_rec_id == ter_b->ter_rec_id)
106
 
                        return 0;
107
 
                return 1;
108
 
        }
109
 
        return 1;
110
 
}
111
 
 
112
 
static xtBool tab_record_corrupt(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, bool not_valid, int where)
113
 
{
114
 
        XTTableHPtr             tab = ot->ot_table;
115
 
        XTDatabaseHPtr  db = tab->tab_db;
116
 
        XTTableError    ter;
117
 
        XTTableError    *ter_ptr;
118
 
        
119
 
        ter.ter_tab_id = tab->tab_id;
120
 
        ter.ter_rec_id = rec_id;
121
 
        
122
 
        xt_sl_lock_ns(db->db_error_list, ot->ot_thread);
123
 
        if (!(ter_ptr = (XTTableError *) xt_sl_find(NULL, db->db_error_list, &ter))) {
124
 
                xtBool  ok;
125
 
                char    table_name[XT_IDENTIFIER_NAME_SIZE*3+3];
126
 
 
127
 
                ok = xt_sl_insert(NULL, db->db_error_list, &ter, &ter);
128
 
                xt_sl_unlock_ns(db->db_error_list);
129
 
                if (!ok)
130
 
                        return FAILED;
131
 
                xt_tab_set_table_repair_pending(tab);
132
 
                xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
133
 
                xt_logf(XT_NT_ERROR, "#%d Table %s: row %llu, record %llu, is %s, REPAIR TABLE required.\n", where,
134
 
                        table_name, 
135
 
                        (u_llong) row_id,
136
 
                        (u_llong) rec_id,
137
 
                        not_valid ? "not valid" : "free");
138
 
        }
139
 
        else
140
 
                xt_sl_unlock_ns(db->db_error_list);
141
 
        return OK;
142
 
}
143
 
 
144
 
/*
145
 
 * -----------------------------------------------------------------------
146
 
 * Compare paths:
147
 
 */
148
 
 
149
 
/* GOTCHA! The problem:
150
 
 *
151
 
 * The server uses names like: "./test/my_tab",
152
 
 * the BLOB streaming engine uses: "test/my_tab"
153
 
 * which leads to the same table being loaded twice.
154
 
 */
155
 
xtPublic int xt_tab_compare_paths(char *n1, char *n2)
156
 
{
157
 
        n1 = xt_last_2_names_of_path(n1);
158
 
        n2 = xt_last_2_names_of_path(n2);
159
 
        if (pbxt_ignore_case)
160
 
                return strcasecmp(n1, n2);
161
 
        return strcmp(n1, n2);
162
 
}
163
 
 
164
 
/*
165
 
 * This function only compares only the last 2 components of
166
 
 * the path because table names must differ in this area.
167
 
 */
168
 
xtPublic int xt_tab_compare_names(const char *n1, const char *n2)
169
 
{
170
 
        n1 = xt_last_2_names_of_path(n1);
171
 
        n2 = xt_last_2_names_of_path(n2);
172
 
        if (pbxt_ignore_case)
173
 
                return strcasecmp(n1, n2);
174
 
        return strcmp(n1, n2);
175
 
}
176
 
 
177
 
/*
178
 
 * -----------------------------------------------------------------------
179
 
 * Private utilities
180
 
 */
181
 
 
182
 
static xtBool tab_list_comp(void *key, void *data)
183
 
{
184
 
        XTTableHPtr     tab = (XTTableHPtr) data;
185
 
 
186
 
        return strcmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
187
 
}
188
 
 
189
 
static xtHashValue tab_list_hash(xtBool is_key, void *key_data)
190
 
{
191
 
        XTTableHPtr     tab = (XTTableHPtr) key_data;
192
 
 
193
 
        if (is_key)
194
 
                return xt_ht_hash(xt_last_2_names_of_path((char *) key_data));
195
 
        return xt_ht_hash(xt_last_2_names_of_path(tab->tab_name->ps_path));
196
 
}
197
 
 
198
 
static xtBool tab_list_comp_ci(void *key, void *data)
199
 
{
200
 
        XTTableHPtr     tab = (XTTableHPtr) data;
201
 
 
202
 
        return strcasecmp(xt_last_2_names_of_path((char *) key), xt_last_2_names_of_path(tab->tab_name->ps_path)) == 0;
203
 
}
204
 
 
205
 
static xtHashValue tab_list_hash_ci(xtBool is_key, void *key_data)
206
 
{
207
 
        XTTableHPtr     tab = (XTTableHPtr) key_data;
208
 
 
209
 
        if (is_key)
210
 
                return xt_ht_casehash(xt_last_2_names_of_path((char *) key_data));
211
 
        return xt_ht_casehash(xt_last_2_names_of_path(tab->tab_name->ps_path));
212
 
}
213
 
 
214
 
static void tab_list_free(XTThreadPtr self, void *data)
215
 
{
216
 
        XTTableHPtr             tab = (XTTableHPtr) data;
217
 
        XTDatabaseHPtr  db = tab->tab_db;
218
 
        XTTableEntryPtr te_ptr;
219
 
 
220
 
        /* Remove the reference from the ID list, whem the table is
221
 
         * removed from the name list:
222
 
         */
223
 
        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab->tab_id)))
224
 
                te_ptr->te_table = NULL;
225
 
 
226
 
        if (tab->tab_dic.dic_table)
227
 
                tab->tab_dic.dic_table->removeReferences(self);
228
 
        xt_heap_release(self, tab);
229
 
}
230
 
 
231
 
static void tab_close_files(XTThreadPtr self, XTTableHPtr tab)
232
 
{
233
 
        if (tab->tab_rec_file) {
234
 
                xt_fs_release_file(self, tab->tab_rec_file);
235
 
                tab->tab_rec_file = NULL;
236
 
        }
237
 
        if (tab->tab_row_file) {
238
 
                xt_fs_release_file(self, tab->tab_row_file);
239
 
                tab->tab_row_file = NULL;
240
 
        }
241
 
        if (tab->tab_ind_file) {
242
 
                xt_fs_release_file(self, tab->tab_ind_file);
243
 
                tab->tab_ind_file = NULL;
244
 
        }
245
 
}
246
 
 
247
 
static void tab_finalize(XTThreadPtr self, void *x)
248
 
{
249
 
        XTTableHPtr     tab = (XTTableHPtr) x;
250
 
 
251
 
        xt_exit_row_locks(&tab->tab_locks);
252
 
 
253
 
        xt_xres_exit_tab(self, tab);
254
 
 
255
 
        if (tab->tab_ind_free_list) {
256
 
                XTIndFreeListPtr list, flist;
257
 
                
258
 
                list = tab->tab_ind_free_list;
259
 
                while (list) {
260
 
                        flist = list;
261
 
                        list = list->fl_next_list;
262
 
                        xt_free(self, flist);
263
 
                }
264
 
                tab->tab_ind_free_list = NULL;
265
 
        }
266
 
 
267
 
        tab_close_files(self, tab);
268
 
 
269
 
        if (tab->tab_index_head) {
270
 
                xt_free(self, tab->tab_index_head);
271
 
                tab->tab_index_head = NULL;
272
 
        }
273
 
 
274
 
        tab_free_ext_records(tab);
275
 
 
276
 
#ifdef TRACE_TABLE_IDS
277
 
        PRINTF("%s: free TABLE: db=%d tab=%d %s\n", self->t_name, (int) tab->tab_db ? tab->tab_db->db_id : 0, (int) tab->tab_id, 
278
 
                tab->tab_name ? xt_last_2_names_of_path(tab->tab_name->ps_path) : "?");
279
 
#endif
280
 
        if (tab->tab_name) {
281
 
                xt_free(self, tab->tab_name);
282
 
                tab->tab_name = NULL;
283
 
        }
284
 
        myxt_free_dictionary(self, &tab->tab_dic);
285
 
        if (tab->tab_free_locks) {
286
 
                tab->tab_seq.xt_op_seq_exit(self);
287
 
                xt_spinlock_free(self, &tab->tab_mem_lock);
288
 
                xt_spinlock_free(self, &tab->tab_ainc_lock);
289
 
                xt_free_mutex(&tab->tab_rec_flush_lock);
290
 
                xt_free_mutex(&tab->tab_ind_flush_lock);
291
 
                xt_free_mutex(&tab->tab_ind_stat_lock);
292
 
                xt_free_mutex(&tab->tab_dic_field_lock);
293
 
                xt_free_mutex(&tab->tab_row_lock);
294
 
                xt_free_mutex(&tab->tab_ind_lock);
295
 
                xt_free_mutex(&tab->tab_rec_lock);
296
 
                for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
297
 
                        XT_TAB_ROW_FREE_LOCK(self, &tab->tab_row_rwlock[i]);
298
 
        }
299
 
#ifdef XT_SORT_REC_WRITES
300
 
        if (tab->tab_rec_dw_writes) {
301
 
                xt_free_sortedlist(self, tab->tab_rec_dw_writes);
302
 
                tab->tab_rec_dw_writes = NULL;
303
 
        }
304
 
        if (tab->tab_rec_dw_data)
305
 
                xt_free_ns(tab->tab_rec_dw_data);
306
 
#endif
307
 
        if (tab->tab_rec_flush_task)
308
 
                tab->tab_rec_flush_task->tk_exit();
309
 
        if (tab->tab_ind_flush_task)
310
 
                tab->tab_ind_flush_task->tk_exit();
311
 
}
312
 
 
313
 
static void tab_onrelease(void *x)
314
 
{
315
 
        XTTableHPtr     tab = (XTTableHPtr) x;
316
 
 
317
 
        /* Signal threads waiting for exclusive use of the table: */
318
 
        if (tab->tab_db->db_tables)
319
 
                xt_ht_signal(NULL, tab->tab_db->db_tables);
320
 
}
321
 
 
322
 
/*
323
 
 * -----------------------------------------------------------------------
324
 
 * PUBLIC METHODS
325
 
 */
326
 
 
327
 
/*
328
 
 * This function sets the table name to "", if the file
329
 
 * does not belong to XT.
330
 
 */
331
 
xtPublic char *xt_tab_file_to_name(size_t size, char *tab_name, char *file_name)
332
 
{
333
 
        char    *cptr;
334
 
        size_t  len;
335
 
 
336
 
        file_name = xt_last_name_of_path(file_name);
337
 
        cptr = file_name + strlen(file_name) - 1;
338
 
        while (cptr > file_name && *cptr != '.')
339
 
                cptr--;
340
 
        if (cptr > file_name && *cptr == '.') {
341
 
                if (strcmp(cptr, ".xtl") == 0 || strcmp(cptr, ".xtr") == 0) {
342
 
                        cptr--;
343
 
                        while (cptr > file_name && isdigit(*cptr))
344
 
                                cptr--;
345
 
                }
346
 
                else {
347
 
                        const char **ext = pbxt_extensions;
348
 
                        
349
 
                        while (*ext) {
350
 
                                if (strcmp(cptr, *ext) == 0)
351
 
                                        goto ret_name;
352
 
                                ext++;
353
 
                        }
354
 
                        cptr = file_name;
355
 
                }
356
 
        }
357
 
 
358
 
        ret_name:
359
 
        len = cptr - file_name;
360
 
        if (len > size-1)
361
 
                len = size-1;
362
 
 
363
 
        memcpy(tab_name, file_name, len);
364
 
        tab_name[len] = 0;
365
 
 
366
 
        /* Return a pointer to what was removed! */
367
 
        return file_name + len;
368
 
}
369
 
 
370
 
static void tab_get_row_file_name(char *table_name, char *name, xtTableID tab_id)
371
 
{
372
 
        sprintf(table_name, "%s-%lu.xtr", name, (u_long) tab_id);
373
 
}
374
 
 
375
 
static void tab_get_data_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
376
 
{
377
 
        sprintf(table_name, "%s.xtd", name);
378
 
}
379
 
 
380
 
static void tab_get_index_file_name(char *table_name, char *name, xtTableID XT_UNUSED(tab_id))
381
 
{
382
 
        sprintf(table_name, "%s.xti", name);
383
 
}
384
 
 
385
 
static void tab_free_by_id(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
386
 
{
387
 
        XTTableEntryPtr te_ptr = (XTTableEntryPtr) item;
388
 
 
389
 
        if (te_ptr->te_tab_name) {
390
 
                xt_free(self, te_ptr->te_tab_name);
391
 
                te_ptr->te_tab_name = NULL;
392
 
        }
393
 
        te_ptr->te_tab_id = 0;
394
 
        te_ptr->te_heap_tab = FALSE;
395
 
        te_ptr->te_table = NULL;
396
 
}
397
 
 
398
 
static int tab_comp_by_id(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
399
 
{
400
 
        xtTableID               te_id = *((xtTableID *) a);
401
 
        XTTableEntryPtr te_ptr = (XTTableEntryPtr) b;
402
 
 
403
 
        if (te_id < te_ptr->te_tab_id)
404
 
                return -1;
405
 
        if (te_id == te_ptr->te_tab_id)
406
 
                return 0;
407
 
        return 1;
408
 
}
409
 
 
410
 
static void tab_free_path(XTThreadPtr self, void *XT_UNUSED(thunk), void *item)
411
 
{
412
 
        XTTablePathPtr  tp_ptr = *((XTTablePathPtr *) item);
413
 
 
414
 
        xt_free(self, tp_ptr);
415
 
}
416
 
 
417
 
static int tab_comp_path(XTThreadPtr XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
418
 
{
419
 
        char                    *path = (char *) a;
420
 
        XTTablePathPtr  tp_ptr = *((XTTablePathPtr *) b);
421
 
 
422
 
        return xt_tab_compare_paths(path, tp_ptr->tp_path);
423
 
}
424
 
 
425
 
static xtBool tab_get_name_value(XTTableDescPtr td, char **ret_name, char **ret_value)
426
 
{
427
 
        char *ptr = td->x.z.td_curr_ptr;
428
 
 
429
 
        while (*ptr && isspace(*ptr)) ptr++;
430
 
        if (!*ptr) {
431
 
                td->x.z.td_curr_ptr = ptr;
432
 
                return FALSE;
433
 
        }
434
 
 
435
 
        *ret_name = ptr;
436
 
        while (*ptr && *ptr != '=' && *ptr != '\r' && *ptr != '\n') ptr++;
437
 
        if (*ptr == '=') {
438
 
                *ptr = 0;
439
 
                ptr++;
440
 
                *ret_value = ptr;
441
 
                while (*ptr && *ptr != '\r' && *ptr != '\n') ptr++;
442
 
                if (*ptr) {
443
 
                        *ptr = 0;
444
 
                        ptr++;
445
 
                }
446
 
        }
447
 
        else {
448
 
                if (*ptr) {
449
 
                        *ptr = 0;
450
 
                        ptr++;
451
 
                }
452
 
                *ret_value = NULL;
453
 
        }
454
 
        td->x.z.td_curr_ptr = ptr;
455
 
        return TRUE;
456
 
}
457
 
 
458
 
xtPublic void xt_describe_tables_init(XTThreadPtr self, XTDatabaseHPtr db, XTTableDescPtr td)
459
 
{
460
 
        char pbuf[PATH_MAX];
461
 
 
462
 
        td->td_db = db;
463
 
        xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
464
 
        xt_add_tables_file(PATH_MAX, pbuf);
465
 
        if (xt_fs_exists(pbuf))
466
 
                td->td_type = XT_TD_FROM_TAB_FILE;
467
 
        else
468
 
                td->td_type = XT_TD_FROM_DIRECTORY;
469
 
 
470
 
        switch (td->td_type) {
471
 
                case XT_TD_FROM_DIRECTORY:
472
 
                        td->x.y.td_path_idx = 0;
473
 
                        if (td->x.y.td_path_idx < xt_sl_get_size(db->db_table_paths)) {
474
 
                                XTTablePathPtr *tp_ptr;
475
 
 
476
 
                                tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, td->x.y.td_path_idx);
477
 
                                td->td_tab_path = *tp_ptr;
478
 
                                td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
479
 
                        }
480
 
                        else
481
 
                                td->x.y.td_open_dir = NULL;
482
 
                        break;
483
 
                case XT_TD_FROM_TAB_FILE:
484
 
                        XTOpenFilePtr   of;
485
 
                        int                             len;
486
 
                        char                    *buffer;
487
 
                        char                    *name;
488
 
                        char                    *value;
489
 
 
490
 
                        of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
491
 
                        pushr_(xt_close_file, of);
492
 
                        len = (int) xt_seek_eof_file(self, of);
493
 
                        buffer = (char *) xt_malloc(self, len + 1);
494
 
                        pushr_(xt_free, buffer);
495
 
                        if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
496
 
                                xt_throw(self);
497
 
                        buffer[len] = 0;
498
 
                        popr_(); // Discard xt_free(buffer)
499
 
                        freer_(); // xt_close_file(of)
500
 
 
501
 
                        td->x.z.td_table_info = buffer;
502
 
                        td->x.z.td_curr_ptr = buffer;
503
 
                        while (tab_get_name_value(td, &name, &value)) {
504
 
                                if (strcmp(name, "[table]") == 0)
505
 
                                        break;
506
 
                        }
507
 
                        break;
508
 
        }
509
 
}
510
 
 
511
 
xtPublic xtBool xt_describe_tables_next(XTThreadPtr self, XTTableDescPtr td)
512
 
{
513
 
        char    *tab_name;
514
 
        xtBool  r = FALSE;
515
 
 
516
 
        enter_();
517
 
 
518
 
        switch (td->td_type) {
519
 
                case XT_TD_FROM_DIRECTORY:
520
 
                        retry:
521
 
                        if (!td->x.y.td_open_dir)
522
 
                                return_(FALSE);
523
 
                        try_(a) {
524
 
                                r = xt_dir_next(self, td->x.y.td_open_dir);
525
 
                        }
526
 
                        catch_(a) {
527
 
                                xt_describe_tables_exit(self, td);
528
 
                                throw_();
529
 
                        }
530
 
                        cont_(a);
531
 
                        if (!r) {
532
 
                                XTTablePathPtr *tp_ptr;
533
 
 
534
 
                                if (td->x.y.td_path_idx+1 >= xt_sl_get_size(td->td_db->db_table_paths))
535
 
                                        return_(FALSE);
536
 
 
537
 
                                if (td->x.y.td_open_dir)
538
 
                                        xt_dir_close(NULL, td->x.y.td_open_dir);
539
 
                                td->x.y.td_open_dir = NULL;
540
 
 
541
 
                                td->x.y.td_path_idx++;
542
 
                                tp_ptr = (XTTablePathPtr *) xt_sl_item_at(td->td_db->db_table_paths, td->x.y.td_path_idx);
543
 
                                td->td_tab_path = *tp_ptr;
544
 
                                td->x.y.td_open_dir = xt_dir_open(self, td->td_tab_path->tp_path, "*.xtr");
545
 
                                goto retry;
546
 
                        }
547
 
 
548
 
                        tab_name = xt_dir_name(self, td->x.y.td_open_dir);
549
 
                        td->td_tab_id = (xtTableID) xt_file_name_to_id(tab_name);
550
 
                        xt_tab_file_to_name(XT_TABLE_NAME_SIZE, td->td_tab_name, tab_name);
551
 
                        td->td_heap_tab = FALSE;
552
 
                        break;
553
 
                case XT_TD_FROM_TAB_FILE:
554
 
                        char *name;
555
 
                        char *value;
556
 
 
557
 
                        td->td_tab_id = 0;
558
 
                        while (tab_get_name_value(td, &name, &value)) {
559
 
                                if (strcmp(name, "name") == 0)
560
 
                                        xt_strcpy(XT_TABLE_NAME_SIZE, td->td_tab_name, value);
561
 
                                else if (strcmp(name, "id") == 0) {
562
 
                                        u_long lvalue = 0;
563
 
 
564
 
                                        sscanf(value, "%lu", &lvalue);
565
 
                                        td->td_tab_id = (xtTableID) lvalue;
566
 
                                }
567
 
                                else if (strcmp(name, "storage") == 0) {
568
 
                                        if (strcmp(value, "heap") == 0)
569
 
                                                td->td_heap_tab = TRUE;
570
 
                                        else /* disk */
571
 
                                                td->td_heap_tab = FALSE;
572
 
                                }
573
 
                                else if (strcmp(name, "location") == 0) {
574
 
                                        XTTablePathPtr  *tp;
575
 
                                        XTTablePathPtr  db_path;
576
 
 
577
 
#ifdef XT_WIN
578
 
                                        char *ptr = value;
579
 
 
580
 
                                        /* Convert path to WIN path: */
581
 
                                        while (*ptr) {
582
 
                                                if (*ptr == '/')
583
 
                                                        *ptr = '\\';
584
 
                                                ptr++;
585
 
                                        }
586
 
#endif
587
 
                                        if ((tp = (XTTablePathPtr *) xt_sl_find(self, td->td_db->db_table_paths, value)))
588
 
                                                db_path = *tp;
589
 
                                        else {
590
 
                                                size_t                  len;
591
 
 
592
 
                                                len = strlen(value);
593
 
                                                db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
594
 
                                                db_path->tp_tab_count = 0;
595
 
                                                memcpy(db_path->tp_path, value, len);
596
 
                                                db_path->tp_path[len] = 0;
597
 
                                                xt_sl_insert(self, td->td_db->db_table_paths, db_path->tp_path, &db_path);
598
 
                                        }
599
 
                                        td->td_tab_path = db_path;
600
 
                                }
601
 
                                else if (strcmp(name, "type") == 0) {
602
 
                                        u_long lvalue = 0;
603
 
 
604
 
                                        sscanf(value, "%lu", &lvalue);
605
 
                                        td->td_tab_type = (xtWord1) lvalue;
606
 
                                } 
607
 
                                else if (strcmp(name, "[table]") == 0)
608
 
                                        break;
609
 
                        }
610
 
                        if (!td->td_tab_id)
611
 
                                return_(FALSE);
612
 
                        break;
613
 
        }
614
 
        return_(TRUE);
615
 
}
616
 
 
617
 
xtPublic void xt_describe_tables_exit(XTThreadPtr self, XTTableDescPtr td)
618
 
{
619
 
        switch (td->td_type) {
620
 
                case XT_TD_FROM_DIRECTORY:
621
 
                        if (td->x.y.td_open_dir)
622
 
                                xt_dir_close(NULL, td->x.y.td_open_dir);
623
 
                        td->x.y.td_open_dir = NULL;
624
 
                        break;
625
 
                case XT_TD_FROM_TAB_FILE:
626
 
                        if (td->x.z.td_table_info) {
627
 
                                xt_free(self, td->x.z.td_table_info);
628
 
                                td->x.z.td_table_info = NULL;
629
 
                        }
630
 
                        td->x.z.td_curr_ptr = NULL;
631
 
                        break;
632
 
        }
633
 
        td->td_tab_path = NULL;
634
 
}
635
 
 
636
 
xtPublic void xt_tab_init_db(XTThreadPtr self, XTDatabaseHPtr db)
637
 
{
638
 
        XTTableDescRec          desc;
639
 
        XTTableEntryRec         te_tab;
640
 
        XTTableEntryPtr         te_ptr;
641
 
        XTTablePathPtr          db_path;
642
 
        char                            pbuf[PATH_MAX];
643
 
        int                                     len;
644
 
        u_int                           edx;
645
 
 
646
 
        enter_();
647
 
        pushr_(xt_tab_exit_db, db);
648
 
        if (pbxt_ignore_case)
649
 
                db->db_tables = xt_new_hashtable(self, tab_list_comp_ci, tab_list_hash_ci, tab_list_free, TRUE, TRUE);
650
 
        else
651
 
                db->db_tables = xt_new_hashtable(self, tab_list_comp, tab_list_hash, tab_list_free, TRUE, TRUE);
652
 
        db->db_table_by_id = xt_new_sortedlist(self, sizeof(XTTableEntryRec), 20, 20, tab_comp_by_id, db, tab_free_by_id, FALSE, FALSE);
653
 
        db->db_table_paths = xt_new_sortedlist(self, sizeof(XTTablePathPtr), 20, 20, tab_comp_path, db, tab_free_path, FALSE, FALSE);
654
 
        db->db_error_list = xt_new_sortedlist(self, sizeof(XTTableError), 20, 20, tab_comp_tab_error, db, NULL, TRUE, FALSE);
655
 
 
656
 
        if (db->db_multi_path) {
657
 
                XTOpenFilePtr   of;
658
 
                char                    *buffer, *ptr, *path;
659
 
 
660
 
                xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
661
 
                xt_add_tables_file(PATH_MAX, pbuf);
662
 
                if (!xt_fs_exists(pbuf)) {
663
 
                        /* Load the location file, if a tables file does not
664
 
                         * exists:
665
 
                         */
666
 
                        xt_strcpy(PATH_MAX, pbuf, db->db_main_path);
667
 
                        xt_add_location_file(PATH_MAX, pbuf);
668
 
                        if (xt_fs_exists(pbuf)) {
669
 
                                of = xt_open_file(self, pbuf, XT_FT_STANDARD, XT_FS_DEFAULT, 1024);
670
 
                                pushr_(xt_close_file, of);
671
 
                                len = (int) xt_seek_eof_file(self, of);
672
 
                                buffer = (char *) xt_malloc(self, len + 1);
673
 
                                pushr_(xt_free, buffer);
674
 
                                if (!xt_pread_file(of, 0, len, len, buffer, NULL, &self->st_statistics.st_x, self))
675
 
                                        xt_throw(self);
676
 
                                buffer[len] = 0;
677
 
                                ptr = buffer;
678
 
                                while (*ptr) {
679
 
                                        /* Ignore preceeding space: */
680
 
                                        while (*ptr && isspace(*ptr))
681
 
                                                ptr++;
682
 
                                        path = ptr;
683
 
                                        while (*ptr && *ptr != '\n' && *ptr != '\r') {
684
 
#ifdef XT_WIN
685
 
                                                /* Undo the conversion below: */
686
 
                                                if (*ptr == '/')
687
 
                                                        *ptr = '\\';
688
 
#endif
689
 
                                                ptr++;
690
 
                                        }
691
 
                                        if (*path != '#' && ptr > path) {
692
 
                                                len = (int) (ptr - path);
693
 
                                                db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
694
 
                                                db_path->tp_tab_count = 0;
695
 
                                                memcpy(db_path->tp_path, path, len);
696
 
                                                db_path->tp_path[len] = 0;
697
 
                                                xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
698
 
                                        }
699
 
                                        ptr++;
700
 
                                }
701
 
                                freer_(); // xt_free(buffer)
702
 
                                freer_(); // xt_close_file(of)
703
 
                        }
704
 
                }
705
 
        }
706
 
        else {
707
 
                len = (int) strlen(db->db_main_path);
708
 
                db_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
709
 
                db_path->tp_tab_count = 0;
710
 
                strcpy(db_path->tp_path, db->db_main_path);
711
 
                xt_sl_insert(self, db->db_table_paths, db_path->tp_path, &db_path);
712
 
        }
713
 
 
714
 
        xt_describe_tables_init(self, db, &desc);
715
 
        pushr_(xt_describe_tables_exit, &desc);
716
 
        while (xt_describe_tables_next(self, &desc)) {
717
 
                te_tab.te_tab_id = desc.td_tab_id;
718
 
                te_tab.te_heap_tab = desc.td_heap_tab;
719
 
 
720
 
                if (te_tab.te_tab_id > db->db_curr_tab_id)
721
 
                        db->db_curr_tab_id = te_tab.te_tab_id;
722
 
 
723
 
                te_tab.te_tab_name = xt_dup_string(self, desc.td_tab_name);
724
 
                te_tab.te_tab_path = desc.td_tab_path;
725
 
                desc.td_tab_path->tp_tab_count++;
726
 
                te_tab.te_table = NULL;
727
 
                te_tab.te_type = desc.td_tab_type;
728
 
                xt_sl_insert(self, db->db_table_by_id, &desc.td_tab_id, &te_tab);
729
 
        }
730
 
        freer_(); // xt_describe_tables_exit(&desc)
731
 
 
732
 
        /*
733
 
         * When we open all tables, we ignore problems with foreign keys.
734
 
         * This must be done or we will not be able to load tables that
735
 
         * were created with foreign key checks off.
736
 
         */
737
 
        self->st_ignore_fkeys = 1;
738
 
        /* 
739
 
         * The purpose of this code is to ensure that all tables are opened and cached,
740
 
         * which is actually only required if tables have foreign key references.
741
 
         *
742
 
         * In other words, a side affect of this code is that FK references between tables
743
 
         * are registered, and checked.
744
 
         *
745
 
         * Unfortunately we don't know if a table is referenced by a FK, so we have to open
746
 
         * all tables.
747
 
         * 
748
 
         * Cannot open tables in the loop above because db->db_table_by_id which is built 
749
 
         * above is used by xt_use_table_no_lock() 
750
 
         *
751
 
         * {TABLE-STATS}
752
 
         * NOTE: The code also lead to the statistics failing to work because 
753
 
         * the tables were already open when the handler was opened.
754
 
         * Previously we only caclulated statistics when a handler was opened
755
 
         * and the underlying table was also opened.
756
 
         */
757
 
        XTTableHPtr tab;
758
 
 
759
 
        xt_enum_tables_init(&edx);
760
 
        while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
761
 
                xt_strcpy(PATH_MAX, pbuf, te_ptr->te_tab_path->tp_path);
762
 
                xt_add_dir_char(PATH_MAX, pbuf);
763
 
                xt_strcat(PATH_MAX, pbuf, te_ptr->te_tab_name);
764
 
                if ((tab = xt_use_table_no_lock_ns(db, (XTPathStrPtr) pbuf, FALSE, FALSE, NULL)))
765
 
                        xt_heap_release_ns(tab);
766
 
                else
767
 
                        xt_log_and_clear_warning(self);
768
 
        }
769
 
        self->st_ignore_fkeys = 0;
770
 
 
771
 
        popr_(); // Discard xt_tab_exit_db(db)
772
 
        exit_();
773
 
}
774
 
 
775
 
static void tab_save_tables(XTThreadPtr self, XTDatabaseHPtr db)
776
 
{
777
 
        XTTableEntryPtr         te_ptr;
778
 
        XTStringBufferRec       buffer;
779
 
        XTOpenFilePtr           of;
780
 
        char                            path[PATH_MAX];
781
 
 
782
 
        memset(&buffer, 0, sizeof(buffer));
783
 
 
784
 
        xt_strcpy(PATH_MAX, path, db->db_main_path);
785
 
        xt_add_tables_file(PATH_MAX, path);
786
 
 
787
 
        if (xt_sl_get_size(db->db_table_by_id)) {
788
 
                pushr_(xt_sb_free, &buffer);
789
 
                for (u_int i=0; i<xt_sl_get_size(db->db_table_by_id); i++) {
790
 
                        te_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, i);
791
 
                        xt_sb_concat(self, &buffer, "[table]\n");
792
 
                        xt_sb_concat(self, &buffer, "id=");
793
 
                        xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_tab_id);
794
 
                        xt_sb_concat(self, &buffer, "\n");
795
 
                        xt_sb_concat(self, &buffer, "name=");
796
 
                        xt_sb_concat(self, &buffer, te_ptr->te_tab_name);
797
 
                        xt_sb_concat(self, &buffer, "\n");
798
 
                        xt_sb_concat(self, &buffer, "location=");
799
 
                        xt_sb_concat(self, &buffer, te_ptr->te_tab_path->tp_path);
800
 
                        xt_sb_concat(self, &buffer, "\n");
801
 
                        xt_sb_concat(self, &buffer, "storage=");
802
 
                        if (te_ptr->te_heap_tab)
803
 
                                xt_sb_concat(self, &buffer, "heap\n");
804
 
                        else
805
 
                                xt_sb_concat(self, &buffer, "disk\n");
806
 
                        xt_sb_concat(self, &buffer, "type=");
807
 
                        xt_sb_concat_int8(self, &buffer, (xtInt8) te_ptr->te_type);
808
 
                        xt_sb_concat(self, &buffer, "\n");
809
 
                }
810
 
 
811
 
#ifdef XT_WIN
812
 
                /* To make the location file cross-platform (at least
813
 
                 * as long as relative paths are used) we replace all '\' 
814
 
                 * with '/': */
815
 
                char *ptr;
816
 
                
817
 
                ptr = buffer.sb_cstring;
818
 
                while (*ptr) {
819
 
                        if (*ptr == '\\')
820
 
                                *ptr = '/';
821
 
                        ptr++;
822
 
                }
823
 
#endif
824
 
                of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
825
 
                pushr_(xt_close_file, of);
826
 
                if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
827
 
                        xt_throw(self);
828
 
                xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
829
 
                freer_(); // xt_close_file(of)
830
 
                
831
 
                freer_(); // xt_sb_free(&buffer);
832
 
        }
833
 
        else
834
 
                xt_fs_delete(NULL, path);
835
 
}
836
 
 
837
 
static void tab_save_table_paths(XTThreadPtr self, XTDatabaseHPtr db)
838
 
{
839
 
        XTTablePathPtr          *tp_ptr;
840
 
        XTStringBufferRec       buffer;
841
 
        XTOpenFilePtr           of;
842
 
        char                            path[PATH_MAX];
843
 
 
844
 
        memset(&buffer, 0, sizeof(buffer));
845
 
 
846
 
        xt_strcpy(PATH_MAX, path, db->db_main_path);
847
 
        xt_add_location_file(PATH_MAX, path);
848
 
 
849
 
        if (xt_sl_get_size(db->db_table_paths)) {
850
 
                pushr_(xt_sb_free, &buffer);
851
 
                for (u_int i=0; i<xt_sl_get_size(db->db_table_paths); i++) {
852
 
                        tp_ptr = (XTTablePathPtr *) xt_sl_item_at(db->db_table_paths, i);
853
 
                        xt_sb_concat(self, &buffer, (*tp_ptr)->tp_path);
854
 
                        xt_sb_concat(self, &buffer, "\n");
855
 
                }
856
 
 
857
 
#ifdef XT_WIN
858
 
                /* To make the location file cross-platform (at least
859
 
                 * as long as relative paths are used) we replace all '\' 
860
 
                 * with '/': */
861
 
                char *ptr;
862
 
                
863
 
                ptr = buffer.sb_cstring;
864
 
                while (*ptr) {
865
 
                        if (*ptr == '\\')
866
 
                                *ptr = '/';
867
 
                        ptr++;
868
 
                }
869
 
#endif
870
 
 
871
 
                of = xt_open_file(self, path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 1024);
872
 
                pushr_(xt_close_file, of);
873
 
                if (!xt_pwrite_file(of, 0, strlen(buffer.sb_cstring), buffer.sb_cstring, &self->st_statistics.st_x, self))
874
 
                        xt_throw(self);
875
 
                xt_set_eof_file(self, of, strlen(buffer.sb_cstring));
876
 
                freer_(); // xt_close_file(of)
877
 
                
878
 
                freer_(); // xt_sb_free(&buffer);
879
 
        }
880
 
        else
881
 
                xt_fs_delete(NULL, path);
882
 
}
883
 
 
884
 
static XTTablePathPtr tab_get_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr tab_name, xtBool save_it)
885
 
{
886
 
        XTTablePathPtr  *tp, tab_path;
887
 
        char                    path[PATH_MAX];
888
 
 
889
 
        xt_strcpy(PATH_MAX, path, tab_name->ps_path);
890
 
        xt_remove_last_name_of_path(path);
891
 
        xt_remove_dir_char(path);
892
 
        tp = (XTTablePathPtr *) xt_sl_find(self, db->db_table_paths, path);
893
 
        if (tp)
894
 
                tab_path = *tp;
895
 
        else {
896
 
                int len = (int) strlen(path);
897
 
 
898
 
                tab_path = (XTTablePathPtr) xt_malloc(self, offsetof(XTTablePathRec, tp_path) + len + 1);
899
 
                tab_path->tp_tab_count = 0;
900
 
                memcpy(tab_path->tp_path, path, len);
901
 
                tab_path->tp_path[len] = 0;
902
 
                xt_sl_insert(self, db->db_table_paths, tab_path->tp_path, &tab_path);
903
 
                if (save_it) {
904
 
                        tab_save_table_paths(self, db);
905
 
                        if (xt_sl_get_size(db->db_table_paths) == 1) {
906
 
                                XTSystemTableShare::createSystemTables(self, db);
907
 
                        }
908
 
                }
909
 
        }
910
 
        tab_path->tp_tab_count++;
911
 
        return tab_path;
912
 
}
913
 
 
914
 
static void tab_remove_table_path(XTThreadPtr self, XTDatabaseHPtr db, XTTablePathPtr tab_path)
915
 
{
916
 
        if (tab_path->tp_tab_count > 0) {
917
 
                tab_path->tp_tab_count--;
918
 
                if (tab_path->tp_tab_count == 0) {
919
 
                        xt_sl_delete(self, db->db_table_paths, tab_path->tp_path);
920
 
                        tab_save_table_paths(self, db);
921
 
                }
922
 
        }
923
 
}
924
 
 
925
 
static void tab_free_table_path(XTThreadPtr self, XTTablePathPtr tab_path)
926
 
{
927
 
        XTDatabaseHPtr db = self->st_database;
928
 
 
929
 
        tab_remove_table_path(self, db, tab_path);
930
 
}
931
 
 
932
 
xtPublic void xt_tab_exit_db(XTThreadPtr self, XTDatabaseHPtr db)
933
 
{
934
 
        if (db->db_tables) {
935
 
                xt_free_hashtable(self, db->db_tables);
936
 
                db->db_tables = NULL;
937
 
        }
938
 
        if (db->db_table_by_id) {
939
 
                xt_free_sortedlist(self, db->db_table_by_id);
940
 
                db->db_table_by_id = NULL;
941
 
        }
942
 
        if (db->db_table_paths) {
943
 
                xt_free_sortedlist(self, db->db_table_paths);
944
 
                db->db_table_paths = NULL;
945
 
        }
946
 
        if (db->db_error_list) {
947
 
                xt_free_sortedlist(self, db->db_error_list);
948
 
                db->db_error_list = NULL;
949
 
        }
950
 
}
951
 
 
952
 
 
953
 
xtPublic xtBool xt_table_exists(XTDatabaseHPtr db)
954
 
{
955
 
        return xt_sl_get_size(db->db_table_by_id) > 0;
956
 
}
957
 
 
958
 
/*
959
 
 * Enumerate all tables in the current database.
960
 
 */
961
 
 
962
 
xtPublic void xt_enum_tables_init(u_int *edx)
963
 
{
964
 
        *edx = 0;
965
 
}
966
 
 
967
 
xtPublic XTTableEntryPtr xt_enum_tables_next(XTThreadPtr XT_UNUSED(self), XTDatabaseHPtr db, u_int *edx)
968
 
{
969
 
        XTTableEntryPtr en_ptr;
970
 
 
971
 
        if (*edx >= xt_sl_get_size(db->db_table_by_id))
972
 
                return NULL;
973
 
        en_ptr = (XTTableEntryPtr) xt_sl_item_at(db->db_table_by_id, *edx);
974
 
        (*edx)++;
975
 
        return en_ptr;
976
 
}
977
 
 
978
 
xtPublic void xt_enum_files_of_tables_init(XTPathStrPtr tab_name, xtTableID tab_id, XTFilesOfTablePtr ft)
979
 
{
980
 
        ft->ft_state = 0;
981
 
        ft->ft_tab_name = tab_name;
982
 
        ft->ft_tab_id = tab_id;
983
 
}
984
 
 
985
 
xtPublic xtBool xt_enum_files_of_tables_next(XTFilesOfTablePtr ft)
986
 
{
987
 
        char file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
988
 
 
989
 
        retry:
990
 
        switch (ft->ft_state) {
991
 
                case 0:
992
 
                        tab_get_row_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
993
 
                        break;
994
 
                case 1:
995
 
                        tab_get_data_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
996
 
                        break;
997
 
                case 2:
998
 
                        tab_get_index_file_name(file_name, xt_last_name_of_path(ft->ft_tab_name->ps_path), ft->ft_tab_id);
999
 
                        break;
1000
 
                default:
1001
 
                        return FAILED;
1002
 
        }
1003
 
 
1004
 
        ft->ft_state++;
1005
 
        xt_strcpy(PATH_MAX, ft->ft_file_path, ft->ft_tab_name->ps_path);
1006
 
        xt_remove_last_name_of_path(ft->ft_file_path);
1007
 
        xt_strcat(PATH_MAX, ft->ft_file_path, file_name);
1008
 
        if (!xt_fs_exists(ft->ft_file_path))
1009
 
                goto retry;
1010
 
 
1011
 
        return TRUE;
1012
 
}
1013
 
 
1014
 
static xtBool tab_find_table(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtTableID *tab_id)
1015
 
{
1016
 
        u_int                   edx;
1017
 
        XTTableEntryPtr te_ptr;
1018
 
        char                    path[PATH_MAX];
1019
 
 
1020
 
        xt_enum_tables_init(&edx);
1021
 
        while ((te_ptr = xt_enum_tables_next(self, db, &edx))) {
1022
 
                xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
1023
 
                xt_add_dir_char(PATH_MAX, path);
1024
 
                xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
1025
 
                if (xt_tab_compare_names(path, name->ps_path) == 0) {
1026
 
                        *tab_id = te_ptr->te_tab_id;
1027
 
                        return TRUE;
1028
 
                }
1029
 
        }
1030
 
        return FALSE;
1031
 
}
1032
 
 
1033
 
xtPublic void xt_tab_disable_index(XTTableHPtr tab, u_int ind_error)
1034
 
{
1035
 
        tab->tab_dic.dic_disable_index = ind_error;
1036
 
        xt_tab_set_table_repair_pending(tab);
1037
 
}
1038
 
 
1039
 
xtPublic void xt_tab_set_index_error(XTTableHPtr tab)
1040
 
{
1041
 
        switch (tab->tab_dic.dic_disable_index) {
1042
 
                case XT_INDEX_OK:
1043
 
                        break;
1044
 
                case XT_INDEX_TOO_OLD:
1045
 
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_OLD_VERSION, tab->tab_name);
1046
 
                        break;
1047
 
                case XT_INDEX_TOO_NEW:
1048
 
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NEW_VERSION, tab->tab_name);
1049
 
                        break;
1050
 
                case XT_INDEX_BAD_BLOCK:
1051
 
                        char number[40];
1052
 
 
1053
 
                        sprintf(number, "%d", (int) tab->tab_index_page_size);
1054
 
                        xt_register_i2xterr(XT_REG_CONTEXT, XT_ERR_BAD_IND_BLOCK_SIZE, xt_last_name_of_path(tab->tab_name->ps_path), number);
1055
 
                        break;
1056
 
                case XT_INDEX_CORRUPTED:
1057
 
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_CORRUPTED, tab->tab_name);
1058
 
                        break;
1059
 
                case XT_INDEX_MISSING:
1060
 
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_MISSING, tab->tab_name);
1061
 
                        break;
1062
 
                case XT_INDEX_NOT_RECOVERED:
1063
 
                        xt_register_taberr(XT_REG_CONTEXT, XT_ERR_INDEX_NOT_RECOVERED, tab->tab_name);
1064
 
                        break;
1065
 
        }
1066
 
}
1067
 
 
1068
 
static void tab_load_index_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file, XTPathStrPtr table_name)
1069
 
{
1070
 
        XT_NODE_TEMP;
1071
 
        XTIndexPtr                      *ind;
1072
 
        xtWord1                         *data;
1073
 
        XTIndexFormatDPtr       index_fmt;
1074
 
 
1075
 
        /* Load the pointers: */
1076
 
        if (tab->tab_index_head)
1077
 
                xt_free_ns(tab->tab_index_head);
1078
 
        tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc(self, XT_INDEX_HEAD_SIZE);
1079
 
 
1080
 
        if (file) {
1081
 
                if (!xt_pread_file(file, 0, XT_INDEX_HEAD_SIZE, 0, tab->tab_index_head, NULL, &self->st_statistics.st_ind, self))
1082
 
                        xt_throw(self);
1083
 
 
1084
 
                tab->tab_index_format_offset = XT_GET_DISK_4(tab->tab_index_head->tp_format_offset_4);
1085
 
                index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
1086
 
 
1087
 
                /* If the table version is less than or equal to an incompatible (unsupported
1088
 
                 * version), or greater than the current version, then we cannot open this table
1089
 
                 */
1090
 
                if (XT_GET_DISK_2(index_fmt->if_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
1091
 
                        XT_GET_DISK_2(index_fmt->if_tab_version_2) > XT_TAB_CURRENT_VERSION) {
1092
 
                        switch (XT_GET_DISK_2(index_fmt->if_tab_version_2)) {
1093
 
                                case 4: 
1094
 
                                        xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
1095
 
                                        break;
1096
 
                                case 3: 
1097
 
                                        xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
1098
 
                                        break;
1099
 
                                default:
1100
 
                                        xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
1101
 
                                        break;
1102
 
                        }
1103
 
                        return;
1104
 
                }
1105
 
 
1106
 
                tab->tab_dic.dic_index_ver = XT_GET_DISK_2(index_fmt->if_ind_version_2);
1107
 
                tab->tab_dic.dic_disable_index = XT_INDEX_OK;
1108
 
 
1109
 
                if (tab->tab_dic.dic_index_ver == 1) {
1110
 
                        tab->tab_index_header_size = 1024 * 16;
1111
 
                        tab->tab_index_page_size = 1024 * 16;
1112
 
                }
1113
 
                else {
1114
 
                        tab->tab_index_header_size = XT_GET_DISK_4(tab->tab_index_head->tp_header_size_4);
1115
 
                        tab->tab_index_page_size = XT_GET_DISK_4(index_fmt->if_page_size_4);
1116
 
                }       
1117
 
 
1118
 
#ifdef XT_USE_LAZY_DELETE
1119
 
                if (tab->tab_dic.dic_index_ver <= XT_IND_NO_LAZY_DELETE)
1120
 
                        tab->tab_dic.dic_no_lazy_delete = TRUE;
1121
 
                else
1122
 
                        tab->tab_dic.dic_no_lazy_delete = FALSE;
1123
 
#else
1124
 
                tab->tab_dic.dic_no_lazy_delete = TRUE;
1125
 
#endif
1126
 
 
1127
 
                /* Incorrect version of index is handled by allowing a sequential scan, but no index access.
1128
 
                 * Recovery with the wrong index type will not recover the indexes, a REPAIR TABLE
1129
 
                 * will be required!
1130
 
                 */
1131
 
                if (tab->tab_dic.dic_index_ver != XT_IND_CURRENT_VERSION) {
1132
 
                        switch (tab->tab_dic.dic_index_ver) {
1133
 
                                case XT_IND_NO_LAZY_DELETE:
1134
 
                                case XT_IND_LAZY_DELETE_OK:
1135
 
                                        /* I can handle this type of index. */
1136
 
                                        break;
1137
 
                                default:
1138
 
                                        if (tab->tab_dic.dic_index_ver < XT_IND_CURRENT_VERSION)
1139
 
                                                xt_tab_disable_index(tab, XT_INDEX_TOO_OLD);
1140
 
                                        else
1141
 
                                                xt_tab_disable_index(tab, XT_INDEX_TOO_NEW);
1142
 
                                        break;
1143
 
                        }
1144
 
                }
1145
 
                else if (tab->tab_index_page_size != XT_INDEX_PAGE_SIZE)
1146
 
                        xt_tab_disable_index(tab, XT_INDEX_BAD_BLOCK);
1147
 
        }
1148
 
        else {
1149
 
                memset(tab->tab_index_head, 0, XT_INDEX_HEAD_SIZE);
1150
 
                xt_tab_disable_index(tab, XT_INDEX_MISSING);
1151
 
                tab->tab_index_header_size = XT_INDEX_HEAD_SIZE;
1152
 
                tab->tab_index_page_size = XT_INDEX_PAGE_SIZE;
1153
 
                tab->tab_dic.dic_index_ver = 0;
1154
 
                tab->tab_index_format_offset = 0;
1155
 
        }
1156
 
 
1157
 
        
1158
 
        if (tab->tab_dic.dic_disable_index) {
1159
 
                xt_tab_set_index_error(tab);
1160
 
                xt_log_and_clear_exception_ns();
1161
 
        }
1162
 
 
1163
 
        if (tab->tab_dic.dic_disable_index) {
1164
 
                /* Reset, as if we have empty indexes.
1165
 
                 * Flush will wipe things out, of course.
1166
 
                 * REPAIR TABLE will be required...
1167
 
                 */
1168
 
                XT_NODE_ID(tab->tab_ind_eof) = 1;
1169
 
                XT_NODE_ID(tab->tab_ind_free) = 0;
1170
 
 
1171
 
                ind = tab->tab_dic.dic_keys;
1172
 
                for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++)
1173
 
                        XT_NODE_ID((*ind)->mi_root) = 0;
1174
 
        }
1175
 
        else {
1176
 
                XT_NODE_ID(tab->tab_ind_eof) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_eof_6);
1177
 
                XT_NODE_ID(tab->tab_ind_free) = (xtIndexNodeID) XT_GET_DISK_6(tab->tab_index_head->tp_ind_free_6);
1178
 
 
1179
 
                data = tab->tab_index_head->tp_data;
1180
 
                ind = tab->tab_dic.dic_keys;
1181
 
                for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
1182
 
                        (*ind)->mi_root = XT_GET_NODE_REF(tab, data);
1183
 
                        data += XT_NODE_REF_SIZE;
1184
 
                }
1185
 
        }
1186
 
}
1187
 
 
1188
 
static void tab_load_table_format(XTThreadPtr self, XTOpenFilePtr file, XTPathStrPtr table_name, size_t *ret_format_offset, size_t *ret_head_size, XTDictionaryPtr dic)
1189
 
{
1190
 
        XTDiskValue4            size_buf;
1191
 
        size_t                          head_size;
1192
 
        XTTableFormatDRec       tab_fmt;
1193
 
        size_t                          fmt_size;
1194
 
 
1195
 
        if (!xt_pread_file(file, 0, 4, 4, &size_buf, NULL, &self->st_statistics.st_rec, self))
1196
 
                xt_throw(self);
1197
 
 
1198
 
        head_size = XT_GET_DISK_4(size_buf);
1199
 
        *ret_format_offset = head_size;
1200
 
 
1201
 
        /* Load the table format information: */
1202
 
        if (!xt_pread_file(file, head_size, offsetof(XTTableFormatDRec, tf_definition), offsetof(XTTableFormatDRec, tf_tab_version_2) + 2, &tab_fmt, NULL, &self->st_statistics.st_rec, self))
1203
 
                xt_throw(self);
1204
 
 
1205
 
        /* If the table version is less than or equal to an incompatible (unsupported
1206
 
         * version), or greater than the current version, then we cannot open this table
1207
 
         */
1208
 
        if (XT_GET_DISK_2(tab_fmt.tf_tab_version_2) <= XT_TAB_INCOMPATIBLE_VERSION ||
1209
 
                XT_GET_DISK_2(tab_fmt.tf_tab_version_2) > XT_TAB_CURRENT_VERSION) {
1210
 
                switch (XT_GET_DISK_2(tab_fmt.tf_tab_version_2)) {
1211
 
                        case 4: 
1212
 
                                xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.91 Beta");
1213
 
                                break;
1214
 
                        case 3: 
1215
 
                                xt_throw_tabcolerr(XT_CONTEXT, XT_ERR_UPGRADE_TABLE, table_name, "0.9.85 Beta");
1216
 
                                break;
1217
 
                        default:
1218
 
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_BAD_TABLE_VERSION, table_name);
1219
 
                                break;
1220
 
                }
1221
 
                return;
1222
 
        }
1223
 
 
1224
 
        fmt_size = XT_GET_DISK_4(tab_fmt.tf_format_size_4);
1225
 
        *ret_head_size = XT_GET_DISK_4(tab_fmt.tf_tab_head_size_4);
1226
 
        dic->dic_rec_size = XT_GET_DISK_4(tab_fmt.tf_rec_size_4);
1227
 
        dic->dic_rec_fixed = XT_GET_DISK_1(tab_fmt.tf_rec_fixed_1);
1228
 
        dic->dic_min_auto_inc = XT_GET_DISK_8(tab_fmt.tf_min_auto_inc_8);
1229
 
        if (fmt_size > offsetof(XTTableFormatDRec, tf_definition)) {
1230
 
                size_t  def_size = fmt_size - offsetof(XTTableFormatDRec, tf_definition);
1231
 
                char    *def_sql;
1232
 
 
1233
 
                pushsr_(def_sql, xt_free, (char *) xt_malloc(self, def_size));
1234
 
                if (!xt_pread_file(file, head_size+offsetof(XTTableFormatDRec, tf_definition), def_size, def_size, def_sql, NULL, &self->st_statistics.st_rec, self))
1235
 
                        xt_throw(self);
1236
 
                dic->dic_table = xt_ri_create_table(self, false, table_name, def_sql, myxt_create_table_from_table(self, dic->dic_my_table), NULL);
1237
 
                freer_(); // xt_free(def_sql)
1238
 
        }
1239
 
        else
1240
 
                dic->dic_table = myxt_create_table_from_table(self, dic->dic_my_table);
1241
 
}
1242
 
 
1243
 
static void tab_load_table_header(XTThreadPtr self, XTTableHPtr tab, XTOpenFilePtr file)
1244
 
{
1245
 
        XTTableHeadDRec rec_head;
1246
 
 
1247
 
        if (!xt_pread_file(file, 0, sizeof(XTTableHeadDRec), sizeof(XTTableHeadDRec), (xtWord1 *) &rec_head, NULL, &self->st_statistics.st_rec, self))
1248
 
                xt_throw(self);
1249
 
 
1250
 
        tab->tab_head_op_seq = XT_GET_DISK_4(rec_head.th_op_seq_4);
1251
 
        tab->tab_head_row_free_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_free_6);
1252
 
        tab->tab_head_row_eof_id = (xtRowID) XT_GET_DISK_6(rec_head.th_row_eof_6);
1253
 
        tab->tab_head_row_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_row_fnum_6);
1254
 
        tab->tab_head_rec_free_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_free_6);
1255
 
        tab->tab_head_rec_eof_id = (xtRecordID) XT_GET_DISK_6(rec_head.th_rec_eof_6);
1256
 
        tab->tab_head_rec_fnum = (xtWord4) XT_GET_DISK_6(rec_head.th_rec_fnum_6);
1257
 
        tab->tab_wr_op_seq = tab->tab_head_op_seq;
1258
 
}
1259
 
 
1260
 
xtPublic void xt_tab_store_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
1261
 
{
1262
 
        XTTableHPtr tab = ot->ot_table;
1263
 
 
1264
 
        XT_SET_DISK_4(rec_head->th_op_seq_4, tab->tab_head_op_seq);
1265
 
        XT_SET_DISK_6(rec_head->th_row_free_6, tab->tab_head_row_free_id);
1266
 
        XT_SET_DISK_6(rec_head->th_row_eof_6, tab->tab_head_row_eof_id);
1267
 
        XT_SET_DISK_6(rec_head->th_row_fnum_6, tab->tab_head_row_fnum);
1268
 
        XT_SET_DISK_6(rec_head->th_rec_free_6, tab->tab_head_rec_free_id);
1269
 
        XT_SET_DISK_6(rec_head->th_rec_eof_6, tab->tab_head_rec_eof_id);
1270
 
        XT_SET_DISK_6(rec_head->th_rec_fnum_6, tab->tab_head_rec_fnum);
1271
 
}
1272
 
 
1273
 
static xtBool tab_write_header(XTOpenTablePtr ot, XTTableHeadDPtr rec_head)
1274
 
{
1275
 
        if (!xt_tab_write_rec(ot, offsetof(XTTableHeadDRec, th_op_seq_4), 40, (xtWord1 *) rec_head->th_op_seq_4))
1276
 
                return FAILED;
1277
 
        if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
1278
 
                return FAILED;
1279
 
        return OK;
1280
 
}
1281
 
 
1282
 
xtPublic xtBool xt_tab_write_min_auto_inc(XTOpenTablePtr ot)
1283
 
{
1284
 
        xtWord1         value[8];
1285
 
        off_t           offset;
1286
 
 
1287
 
        XT_SET_DISK_8(value, ot->ot_table->tab_dic.dic_min_auto_inc);
1288
 
        offset = ot->ot_table->tab_table_format_offset + offsetof(XTTableFormatDRec, tf_min_auto_inc_8);
1289
 
        if (!xt_tab_write_rec(ot, offset, 8, value))
1290
 
                return FAILED;
1291
 
        if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread))
1292
 
                return FAILED;
1293
 
        return OK;
1294
 
}
1295
 
 
1296
 
/* a helper function to remove table from the open tables hash on exception
1297
 
 * used in tab_new_handle() below
1298
 
 */
1299
 
 #ifdef NO_LONGER_REQ
1300
 
static void xt_del_from_db_tables_ht(XTThreadPtr self, XTTableHPtr tab)
1301
 
{
1302
 
        XTTableEntryPtr te_ptr;
1303
 
        XTDatabaseHPtr  db = tab->tab_db;
1304
 
        xtTableID               tab_id = tab->tab_id;
1305
 
 
1306
 
        /* Oops! should use tab->tab_name, instead of tab! */
1307
 
        xt_ht_del(self, db->db_tables, tab->tab_name);
1308
 
 
1309
 
        /* Remove the reference from the ID list, when a table is
1310
 
         * removed from the table name list:
1311
 
         */
1312
 
        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id)))
1313
 
                te_ptr->te_table = NULL;
1314
 
}
1315
 
#endif
1316
 
 
1317
 
xtPublic XTFileType xt_rec_file_type(xtBool heap_tab)
1318
 
{
1319
 
        if (heap_tab)
1320
 
                return XT_FT_HEAP;
1321
 
        if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1322
 
                return XT_FT_REWRITE_FLUSH;
1323
 
        return XT_REC_FILE_TYPE;
1324
 
}
1325
 
 
1326
 
xtPublic XTFileType xt_row_file_type(xtBool heap_tab)
1327
 
{
1328
 
        if (heap_tab)
1329
 
                return XT_FT_HEAP;
1330
 
        if (XT_REC_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1331
 
                return XT_FT_REWRITE_FLUSH;
1332
 
        return XT_ROW_FILE_TYPE;
1333
 
}
1334
 
 
1335
 
xtPublic XTFileType xt_ind_file_type(xtBool heap_tab)
1336
 
{
1337
 
        if (heap_tab)
1338
 
                return XT_FT_HEAP;
1339
 
        if (XT_IND_FILE_TYPE == XT_FT_STANDARD && xt_db_rewrite_flushing)
1340
 
                return XT_FT_REWRITE_FLUSH;
1341
 
        return XT_IND_FILE_TYPE;
1342
 
}
1343
 
 
1344
 
#ifdef XT_SORT_REC_WRITES
1345
 
static int tab_cmp_dw_rec_id(struct XTThread *XT_UNUSED(self), register const void *XT_UNUSED(thunk), register const void *a, register const void *b)
1346
 
{
1347
 
        xtRecordID              rec_id = *((xtRecordID *) a);
1348
 
        XTDelayWritePtr dw_ptr = (XTDelayWritePtr) b;
1349
 
 
1350
 
        if (rec_id == dw_ptr->dw_rec_id)
1351
 
                return 0;
1352
 
        if (rec_id < dw_ptr->dw_rec_id)
1353
 
                return -1;
1354
 
        return 1;
1355
 
}
1356
 
#endif
1357
 
 
1358
 
/*
1359
 
 * Create a new table handle (i.e. open a table).
1360
 
 * Return NULL if the table is missing, and it is OK for the table
1361
 
 * to be missing.
1362
 
 */
1363
 
static int tab_new_handle(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id, XTPathStrPtr tab_path, xtBool missing_ok, XTDictionaryPtr dic)
1364
 
{
1365
 
        char                    path[PATH_MAX];
1366
 
        XTTableHPtr             tab;
1367
 
        char                    file_name[XT_MAX_TABLE_FILE_NAME_SIZE];
1368
 
        XTOpenFilePtr   of_rec, of_ind;
1369
 
        XTTableEntryPtr te_ptr;
1370
 
        size_t                  tab_format_offset;
1371
 
        size_t                  tab_head_size = 0;
1372
 
 
1373
 
        enter_();
1374
 
 
1375
 
        te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
1376
 
        ASSERT(te_ptr);
1377
 
 
1378
 
        tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
1379
 
        pushr_(xt_heap_release, tab);
1380
 
 
1381
 
        tab->tab_name = (XTPathStrPtr) xt_dup_string(self, tab_path->ps_path);
1382
 
        tab->tab_db = db;
1383
 
        tab->tab_id = tab_id;
1384
 
        tab->tab_dic.dic_table_type = te_ptr->te_type;
1385
 
#ifdef TRACE_TABLE_IDS
1386
 
        PRINTF("%s: allocated TABLE: db=%d tab=%d %s\n", self->t_name, (int) db->db_id, (int) tab->tab_id, xt_last_2_names_of_path(tab->tab_name->ps_path));
1387
 
#endif
1388
 
 
1389
 
        if (dic) {
1390
 
                myxt_move_dictionary(&tab->tab_dic, dic);
1391
 
                myxt_setup_dictionary(self, &tab->tab_dic);
1392
 
        }
1393
 
        else {
1394
 
                if (!myxt_load_dictionary(self, &tab->tab_dic, db, tab_path)) {
1395
 
                        freer_(); // xt_heap_release(tab)
1396
 
                        return_(XT_TAB_NO_DICTIONARY);
1397
 
                }
1398
 
        }
1399
 
 
1400
 
        /* Do not use the XT_TF_DDL_TEMP_TABLE bit from the given dic
1401
 
         * This bit depends only on the
1402
 
         * name of the table, and must be set explicitly.
1403
 
         */
1404
 
        if (myxt_temp_table_name(tab_path->ps_path))
1405
 
                tab->tab_dic.dic_tab_flags |= XT_TF_DDL_TEMP_TABLE;
1406
 
        else
1407
 
                tab->tab_dic.dic_tab_flags &= ~XT_TF_DDL_TEMP_TABLE;
1408
 
 
1409
 
        tab->tab_seq.xt_op_seq_init(self);
1410
 
        xt_spinlock_init_with_autoname(self, &tab->tab_ainc_lock);
1411
 
        xt_init_mutex_with_autoname(self, &tab->tab_rec_flush_lock);
1412
 
        xt_init_mutex_with_autoname(self, &tab->tab_ind_flush_lock);
1413
 
        xt_init_mutex_with_autoname(self, &tab->tab_ind_stat_lock);
1414
 
        xt_init_mutex_with_autoname(self, &tab->tab_dic_field_lock);
1415
 
        xt_init_mutex_with_autoname(self, &tab->tab_row_lock);
1416
 
        xt_init_mutex_with_autoname(self, &tab->tab_ind_lock);
1417
 
        xt_init_mutex_with_autoname(self, &tab->tab_rec_lock);
1418
 
        xt_spinlock_init_with_autoname(self, &tab->tab_mem_lock);
1419
 
        if (!(tab->tab_rec_flush_task = new XTFlushRecRowTask()))
1420
 
                xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
1421
 
        tab->tab_rec_flush_task->tk_init(self);
1422
 
        tab->tab_rec_flush_task->frt_table = tab;
1423
 
        if (!(tab->tab_ind_flush_task = new XTFlushIndexTask()))
1424
 
                xt_throw_errno(XT_CONTEXT, XT_ENOMEM);
1425
 
        tab->tab_ind_flush_task->tk_init(self);
1426
 
        tab->tab_ind_flush_task->fit_table = tab;
1427
 
        for (u_int i=0; i<XT_ROW_RWLOCKS; i++)
1428
 
                XT_TAB_ROW_INIT_LOCK(self, &tab->tab_row_rwlock[i]);
1429
 
        tab->tab_free_locks = TRUE;
1430
 
 
1431
 
        xt_strcpy(PATH_MAX, path, tab_path->ps_path);
1432
 
        xt_remove_last_name_of_path(path);
1433
 
        tab_get_row_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1434
 
        xt_strcat(PATH_MAX, path, file_name);
1435
 
        tab->tab_row_file = xt_fs_get_file(self, path, xt_row_file_type(te_ptr->te_heap_tab));
1436
 
 
1437
 
        xt_remove_last_name_of_path(path);
1438
 
        tab_get_data_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1439
 
        xt_strcat(PATH_MAX, path, file_name);
1440
 
        tab->tab_rec_file = xt_fs_get_file(self, path, xt_rec_file_type(te_ptr->te_heap_tab));
1441
 
 
1442
 
        xt_remove_last_name_of_path(path);
1443
 
        tab_get_index_file_name(file_name, xt_last_name_of_path(tab_path->ps_path), tab_id);
1444
 
        xt_strcat(PATH_MAX, path, file_name);
1445
 
        tab->tab_ind_file = xt_fs_get_file(self, path, xt_ind_file_type(te_ptr->te_heap_tab));
1446
 
 
1447
 
        if (te_ptr->te_heap_tab) {
1448
 
                XTOpenFilePtr   of_row;
1449
 
 
1450
 
                tab->tab_dic.dic_tab_flags |= XT_TF_MEMORY_TABLE;
1451
 
                of_row = xt_open_file(self, tab->tab_row_file->fil_path, xt_row_file_type(TRUE), XT_FS_CREATE, xt_db_row_file_grow_size);
1452
 
                pushr_(xt_close_file, of_row);
1453
 
                if (xt_seek_eof_file(self, of_row) == 0)
1454
 
                        tab_init_row_file(self, of_row, tab, &tab->tab_dic);
1455
 
                freer_(); // xt_close_file(of_row)
1456
 
 
1457
 
                of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(TRUE), XT_FS_CREATE, XT_INDEX_PAGE_SIZE*256);
1458
 
                if (xt_seek_eof_file(self, of_ind) == 0)
1459
 
                        tab_init_ind_file(self, of_ind, tab, &tab->tab_dic);
1460
 
                pushr_(xt_close_file, of_ind);
1461
 
                tab_load_index_header(self, tab, of_ind, tab_path);
1462
 
                freer_(); // xt_close_file(of_ind)
1463
 
 
1464
 
                of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(te_ptr->te_heap_tab), XT_FS_CREATE, xt_db_data_file_grow_size);
1465
 
                pushr_(xt_close_file, of_rec);
1466
 
                if (xt_seek_eof_file(self, of_rec) == 0)
1467
 
                        tab_init_data_file(self, of_rec, tab, &tab->tab_dic, 0, NULL);
1468
 
        }
1469
 
        else {
1470
 
#ifdef XT_SORT_REC_WRITES
1471
 
                tab->tab_rec_dw_writes = xt_new_sortedlist(self, sizeof(XTDelayWriteRec), 20, 10, tab_cmp_dw_rec_id, NULL, NULL, TRUE, FALSE);
1472
 
#endif
1473
 
                of_ind = xt_open_file(self, tab->tab_ind_file->fil_path, xt_ind_file_type(FALSE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
1474
 
                if (of_ind) {
1475
 
                        pushr_(xt_close_file, of_ind);
1476
 
                        tab_load_index_header(self, tab, of_ind, tab_path);
1477
 
                        freer_(); // xt_close_file(of_ind)
1478
 
                }
1479
 
                else
1480
 
                        tab_load_index_header(self, tab, of_ind, tab_path);
1481
 
 
1482
 
                of_rec = xt_open_file(self, tab->tab_rec_file->fil_path, xt_rec_file_type(FALSE), missing_ok ? XT_FS_MISSING_OK : XT_FS_DEFAULT, xt_db_data_file_grow_size);
1483
 
                if (!of_rec) {
1484
 
                        freer_(); // xt_heap_release(tab)
1485
 
                        return_(XT_TAB_NOT_FOUND);
1486
 
                }
1487
 
                pushr_(xt_close_file, of_rec);
1488
 
        }
1489
 
 
1490
 
        tab_load_table_format(self, of_rec, tab_path, &tab_format_offset, &tab_head_size, &tab->tab_dic);
1491
 
        tab->tab_table_format_offset = tab_format_offset;
1492
 
        tab->tab_table_head_size = tab_head_size;
1493
 
        tab->tab_dic.dic_table->dt_table = tab;
1494
 
        tab_load_table_header(self, tab, of_rec);
1495
 
        freer_(); // xt_close_file(of_rec)
1496
 
 
1497
 
        tab->tab_seq.xt_op_seq_set(self, tab->tab_wr_op_seq+1);
1498
 
        tab->tab_row_eof_id = tab->tab_head_row_eof_id;
1499
 
        tab->tab_row_free_id = tab->tab_head_row_free_id;
1500
 
        tab->tab_row_fnum = tab->tab_head_row_fnum;
1501
 
        tab->tab_rec_eof_id = tab->tab_head_rec_eof_id;
1502
 
        tab->tab_rec_free_id = tab->tab_head_rec_free_id;
1503
 
        tab->tab_rec_fnum = tab->tab_head_rec_fnum;
1504
 
 
1505
 
        tab->tab_rows.xt_tc_setup(tab, FALSE, sizeof(XTTabRowHeadDRec), sizeof(XTTabRowRefDRec));
1506
 
        tab->tab_recs.xt_tc_setup(tab, TRUE, tab_head_size, tab->tab_dic.dic_rec_size);
1507
 
 
1508
 
        xt_xres_init_tab(self, tab);
1509
 
 
1510
 
        if (!xt_init_row_locks(&tab->tab_locks))
1511
 
                xt_throw(self);
1512
 
 
1513
 
        xt_heap_set_release_callback(tab, tab_onrelease);
1514
 
 
1515
 
        tab->tab_repair_pending = xt_tab_is_table_repair_pending(tab);
1516
 
 
1517
 
        popr_(); // Discard xt_heap_release(tab)
1518
 
 
1519
 
        xt_ht_put(self, db->db_tables, tab);
1520
 
 
1521
 
        /* Add a reference to the ID list, when a table is
1522
 
         * added to the table name list:
1523
 
         */
1524
 
        te_ptr->te_table = tab;
1525
 
 
1526
 
    /* Moved from after xt_init_row_locks() above, so that calling
1527
 
     * xt_use_table_no_lock() with no_load == FALSE from attachReferences()
1528
 
     * will work if we have cyclic foreign key references.
1529
 
     */ 
1530
 
        if (tab->tab_dic.dic_table) {
1531
 
                try_(a) {
1532
 
                        tab->tab_dic.dic_table->attachReferences(self, db);
1533
 
                }
1534
 
                catch_(a) {
1535
 
                        /* Errors are thrown when: set foreign_key_checks = 1 */
1536
 
                        /* Undo everything done above: */
1537
 
                        xt_ht_del(self, db->db_tables, tab->tab_name);
1538
 
                        xt_throw(self);
1539
 
                }
1540
 
                cont_(a);
1541
 
        }
1542
 
 
1543
 
        *r_tab = tab;
1544
 
        return_(XT_TAB_OK);
1545
 
}
1546
 
 
1547
 
/*
1548
 
 * Get a reference to a table in the current database. The table reference is valid,
1549
 
 * as long as the thread is using the database!!!
1550
 
 */
1551
 
xtPublic XTTableHPtr xt_use_table_no_lock(XTThreadPtr self, XTDatabaseHPtr db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
1552
 
{
1553
 
        XTTableHPtr tab;
1554
 
 
1555
 
        if (!db)
1556
 
                xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
1557
 
 
1558
 
        tab = (XTTableHPtr) xt_ht_get(self, db->db_tables, name);
1559
 
        if (!tab && !no_load) {
1560
 
                xtTableID       tab_id = 0;
1561
 
 
1562
 
                if (!tab_find_table(self, db, name, &tab_id)) {
1563
 
                        if (missing_ok)
1564
 
                                return NULL;
1565
 
                        xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
1566
 
                }
1567
 
 
1568
 
                switch (tab_new_handle(self, &tab, db, tab_id, name, missing_ok, dic)) {
1569
 
                        case XT_TAB_NO_DICTIONARY:
1570
 
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, name);
1571
 
                        case XT_TAB_POOL_CLOSED:
1572
 
                                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
1573
 
                        case XT_TAB_NOT_FOUND:
1574
 
                                if (missing_ok)
1575
 
                                        return NULL;
1576
 
                                xt_throw_taberr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND, name);
1577
 
                        default:
1578
 
                                break;
1579
 
                }
1580
 
        }
1581
 
        
1582
 
        if (tab)
1583
 
                xt_heap_reference(self, tab);
1584
 
 
1585
 
        return tab;
1586
 
}
1587
 
 
1588
 
xtPublic XTTableHPtr xt_use_table_no_lock_ns(struct XTDatabase *db, XTPathStrPtr name, xtBool no_load, xtBool missing_ok, XTDictionaryPtr dic)
1589
 
{
1590
 
        XTTableHPtr     tab;
1591
 
        XTThreadPtr     self = xt_get_self();
1592
 
 
1593
 
        try_(a) {
1594
 
                tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, dic);
1595
 
        }
1596
 
        catch_(a) {
1597
 
                tab = NULL;
1598
 
        }
1599
 
        cont_(a);
1600
 
        return tab;
1601
 
}
1602
 
 
1603
 
static void tab_close_table(XTOpenTablePtr ot)
1604
 
{
1605
 
        xt_ind_free_reserved(ot);
1606
 
 
1607
 
        if (ot->ot_rec_file) {
1608
 
                XT_CLOSE_RR_FILE_NS(ot->ot_rec_file);
1609
 
                ot->ot_rec_file = NULL;
1610
 
                
1611
 
        }
1612
 
        if (ot->ot_ind_file) {
1613
 
                xt_close_file_ns(ot->ot_ind_file);
1614
 
                ot->ot_ind_file = NULL;
1615
 
                
1616
 
        }
1617
 
        if (ot->ot_row_file) {
1618
 
                XT_CLOSE_RR_FILE_NS(ot->ot_row_file);
1619
 
                ot->ot_row_file = NULL;
1620
 
                
1621
 
        }
1622
 
        if (ot->ot_table) {
1623
 
                xt_heap_release(xt_get_self(), ot->ot_table);
1624
 
                ot->ot_table = NULL;
1625
 
        }
1626
 
        if (ot->ot_ind_rhandle) {
1627
 
                xt_ind_release_handle(ot->ot_ind_rhandle, FALSE, ot->ot_thread);
1628
 
                ot->ot_ind_rhandle = NULL;
1629
 
        }
1630
 
        if (ot->ot_row_rbuffer) {
1631
 
                xt_free_ns(ot->ot_row_rbuffer);
1632
 
                ot->ot_row_rbuf_size = 0;
1633
 
                ot->ot_row_rbuffer = NULL;
1634
 
        }
1635
 
        if (ot->ot_row_wbuffer) {
1636
 
                xt_free_ns(ot->ot_row_wbuffer);
1637
 
                ot->ot_row_wbuf_size = 0;
1638
 
                ot->ot_row_wbuffer = NULL;
1639
 
        }
1640
 
#ifdef XT_TRACK_RETURNED_ROWS
1641
 
        if (ot->ot_rows_returned) {
1642
 
                xt_free_ns(ot->ot_rows_returned);
1643
 
                ot->ot_rows_returned = NULL;
1644
 
        }
1645
 
        ot->ot_rows_ret_curr = 0;
1646
 
        ot->ot_rows_ret_max = 0;
1647
 
#endif
1648
 
        xt_free(NULL, ot);
1649
 
}
1650
 
 
1651
 
static void tab_delete_table_files(XTThreadPtr self, XTPathStrPtr tab_name, xtTableID tab_id)
1652
 
{
1653
 
        XTFilesOfTableRec       ft;
1654
 
 
1655
 
        xt_enum_files_of_tables_init(tab_name, tab_id, &ft);
1656
 
        while (xt_enum_files_of_tables_next(&ft)) {
1657
 
                if (!xt_fs_delete(NULL, ft.ft_file_path))
1658
 
                        xt_log_and_clear_exception(self);
1659
 
        }
1660
 
}
1661
 
 
1662
 
static void tab_init_row_file(XTThreadPtr self, XTOpenFilePtr of_row, XTTableHPtr tab, XTDictionaryPtr XT_UNUSED(dic))
1663
 
{
1664
 
        XTTabRowHeadDRec        row_head;
1665
 
 
1666
 
        tab->tab_row_eof_id = 1;
1667
 
        tab->tab_row_free_id = 0;
1668
 
        tab->tab_row_fnum = 0;
1669
 
 
1670
 
        tab->tab_head_row_eof_id = 1;
1671
 
        tab->tab_head_row_free_id = 0;
1672
 
        tab->tab_head_row_fnum  = 0;
1673
 
 
1674
 
        XT_SET_DISK_4(row_head.rh_magic_4, XT_TAB_ROW_MAGIC);
1675
 
        if (!xt_pwrite_file(of_row, 0, sizeof(row_head), &row_head, &self->st_statistics.st_rec, self))
1676
 
                xt_throw(self);
1677
 
}
1678
 
 
1679
 
static void tab_init_data_file(XTThreadPtr self, XTOpenFilePtr of_rec, XTTableHPtr tab, XTDictionaryPtr dic, size_t def_len, XTStringBufferPtr tab_def)
1680
 
{
1681
 
        off_t                           eof;
1682
 
        XTTableHeadDRec         rec_head;
1683
 
        XTTableFormatDRec       table_fmt;
1684
 
 
1685
 
        /* Calculate the offset of the first record in the data handle file. */
1686
 
        eof = sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition) + def_len + XT_FORMAT_DEF_SPACE;
1687
 
        eof = (eof + 1024 - 1) / 1024 * 1024;           // Round to a value divisible by 1024
1688
 
 
1689
 
        tab->tab_table_format_offset = sizeof(XTTableHeadDRec);
1690
 
        tab->tab_table_head_size = (size_t) eof;
1691
 
 
1692
 
        tab->tab_rec_eof_id = 1;                                                // This is the first record ID!
1693
 
        tab->tab_rec_free_id = 0;
1694
 
        tab->tab_rec_fnum = 0;
1695
 
        
1696
 
        tab->tab_head_rec_eof_id = 1;                                   // The first record ID
1697
 
        tab->tab_head_rec_free_id = 0;
1698
 
        tab->tab_head_rec_fnum = 0;
1699
 
 
1700
 
        tab->tab_dic.dic_rec_size = dic->dic_rec_size;
1701
 
        tab->tab_dic.dic_rec_fixed = dic->dic_rec_fixed;
1702
 
        tab->tab_dic.dic_tab_flags = dic->dic_tab_flags;
1703
 
        tab->tab_dic.dic_min_auto_inc = dic->dic_min_auto_inc;
1704
 
        tab->tab_dic.dic_def_ave_row_size = dic->dic_def_ave_row_size;
1705
 
        tab->tab_dic.dic_table_type = dic->dic_table_type;
1706
 
 
1707
 
        XT_SET_DISK_4(rec_head.th_head_size_4, sizeof(XTTableHeadDRec));
1708
 
        XT_SET_DISK_4(rec_head.th_op_seq_4, tab->tab_head_op_seq);
1709
 
        XT_SET_DISK_6(rec_head.th_row_free_6, tab->tab_head_row_free_id);
1710
 
        XT_SET_DISK_6(rec_head.th_row_eof_6, tab->tab_head_row_eof_id);
1711
 
        XT_SET_DISK_6(rec_head.th_row_fnum_6, tab->tab_head_row_fnum);
1712
 
        XT_SET_DISK_6(rec_head.th_rec_free_6, tab->tab_head_rec_free_id);
1713
 
        XT_SET_DISK_6(rec_head.th_rec_eof_6, tab->tab_head_rec_eof_id);
1714
 
        XT_SET_DISK_6(rec_head.th_rec_fnum_6, tab->tab_head_rec_fnum);
1715
 
 
1716
 
        if (!xt_pwrite_file(of_rec, 0, sizeof(XTTableHeadDRec), &rec_head, &self->st_statistics.st_rec, self))
1717
 
                xt_throw(self);
1718
 
 
1719
 
        /* Store the table format: */
1720
 
        memset(&table_fmt, 0, offsetof(XTTableFormatDRec, tf_definition));
1721
 
        XT_SET_DISK_4(table_fmt.tf_format_size_4, offsetof(XTTableFormatDRec, tf_definition) + def_len);
1722
 
        XT_SET_DISK_4(table_fmt.tf_tab_head_size_4, eof);
1723
 
        XT_SET_DISK_2(table_fmt.tf_tab_version_2, XT_TAB_CURRENT_VERSION);
1724
 
        XT_SET_DISK_4(table_fmt.tf_rec_size_4, tab->tab_dic.dic_rec_size);
1725
 
        XT_SET_DISK_1(table_fmt.tf_rec_fixed_1, tab->tab_dic.dic_rec_fixed);
1726
 
        XT_SET_DISK_2(table_fmt.tf_tab_unused_2, 0);
1727
 
        XT_SET_DISK_8(table_fmt.tf_min_auto_inc_8, tab->tab_dic.dic_min_auto_inc);
1728
 
 
1729
 
        if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec), offsetof(XTTableFormatDRec, tf_definition), &table_fmt, &self->st_statistics.st_rec, self))
1730
 
                xt_throw(self);
1731
 
        if (def_len) {
1732
 
                if (!xt_pwrite_file(of_rec, sizeof(XTTableHeadDRec) + offsetof(XTTableFormatDRec, tf_definition), def_len, tab_def->sb_cstring, &self->st_statistics.st_rec, self))
1733
 
                        xt_throw(self);
1734
 
        }
1735
 
}
1736
 
 
1737
 
static void tab_init_ind_file(XTThreadPtr self, XTOpenFilePtr of_ind, XTTableHPtr tab, XTDictionaryPtr dic)
1738
 
{
1739
 
        XTIndexFormatDPtr       index_fmt;
1740
 
 
1741
 
        /* This is the size of the index header: */
1742
 
        tab->tab_index_format_offset = offsetof(XTIndexHeadDRec, tp_data) + dic->dic_key_count * XT_NODE_REF_SIZE;
1743
 
        if (!(tab->tab_index_head = (XTIndexHeadDPtr) xt_calloc_ns(XT_INDEX_HEAD_SIZE)))
1744
 
                xt_throw(self);
1745
 
 
1746
 
        XT_NODE_ID(tab->tab_ind_eof) = 1;
1747
 
        XT_NODE_ID(tab->tab_ind_free) = 0;
1748
 
 
1749
 
        XT_SET_DISK_4(tab->tab_index_head->tp_header_size_4, XT_INDEX_HEAD_SIZE);
1750
 
        XT_SET_DISK_4(tab->tab_index_head->tp_format_offset_4, tab->tab_index_format_offset);
1751
 
        XT_SET_DISK_6(tab->tab_index_head->tp_ind_eof_6, XT_NODE_ID(tab->tab_ind_eof));
1752
 
        XT_SET_DISK_6(tab->tab_index_head->tp_ind_free_6, XT_NODE_ID(tab->tab_ind_free));
1753
 
 
1754
 
        /* Store the index format: */
1755
 
        index_fmt = (XTIndexFormatDPtr) (((xtWord1 *) tab->tab_index_head) + tab->tab_index_format_offset);
1756
 
        XT_SET_DISK_4(index_fmt->if_format_size_4, sizeof(XTIndexFormatDRec));
1757
 
        XT_SET_DISK_2(index_fmt->if_tab_version_2, XT_TAB_CURRENT_VERSION);
1758
 
        XT_SET_DISK_2(index_fmt->if_ind_version_2, XT_IND_CURRENT_VERSION);
1759
 
        XT_SET_DISK_1(index_fmt->if_node_ref_size_1, XT_NODE_REF_SIZE);
1760
 
        XT_SET_DISK_1(index_fmt->if_rec_ref_size_1, XT_RECORD_REF_SIZE);
1761
 
        XT_SET_DISK_4(index_fmt->if_page_size_4, XT_INDEX_PAGE_SIZE);
1762
 
 
1763
 
        /* Save the header: */
1764
 
        if (!xt_pwrite_file(of_ind, 0, XT_INDEX_HEAD_SIZE, tab->tab_index_head, &self->st_statistics.st_ind, self))
1765
 
                xt_throw(self);
1766
 
}
1767
 
 
1768
 
xtPublic void xt_create_table(XTThreadPtr self, XTPathStrPtr name, XTDictionaryPtr dic)
1769
 
{
1770
 
        char                            table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
1771
 
        char                            path[PATH_MAX];
1772
 
        XTDatabaseHPtr          db = self->st_database;
1773
 
        XTOpenTablePoolPtr      table_pool;
1774
 
        XTTableHPtr                     tab;
1775
 
        XTTableHPtr                     old_tab = NULL;
1776
 
        xtTableID                       old_tab_id = 0;
1777
 
        xtTableID                       tab_id = 0;
1778
 
        XTStringBufferRec       tab_def = { 0, 0, 0 };
1779
 
        XTTableEntryRec         te_tab;
1780
 
        XTSortedListInfoRec     li_undo;
1781
 
 
1782
 
#ifdef TRACE_CREATE_TABLES
1783
 
        printf("CREATE %s\n", name->ps_path);
1784
 
#endif
1785
 
        enter_();
1786
 
        if (strlen(xt_last_name_of_path(name->ps_path)) > XT_TABLE_NAME_SIZE-1)
1787
 
                xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, name);
1788
 
        if (!db)
1789
 
                xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
1790
 
 
1791
 
        /* Lock to prevent table list change during creation. */
1792
 
        table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, name, FALSE, TRUE, TRUE, &old_tab);
1793
 
        pushr_(xt_db_unlock_table_pool, table_pool);
1794
 
        xt_ht_lock(self, db->db_tables);
1795
 
        pushr_(xt_ht_unlock, db->db_tables);
1796
 
        pushr_(xt_heap_release, old_tab);
1797
 
 
1798
 
        /* This must be done before we remove the old table
1799
 
         * from the directory, or we will not be able
1800
 
         * to find the table, which could is require
1801
 
         * for TRUNCATE!
1802
 
         */
1803
 
        if (xt_sl_get_size(db->db_table_by_id) >= XT_MAX_TABLES)
1804
 
                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TOO_MANY_TABLES, (u_long) XT_MAX_TABLES);
1805
 
 
1806
 
        tab_id = db->db_curr_tab_id + 1;                
1807
 
 
1808
 
        if (old_tab) {
1809
 
                old_tab_id = old_tab->tab_id;           
1810
 
                xt_dl_delete_ext_data(self, old_tab, FALSE, TRUE);
1811
 
                freer_(); // xt_heap_release(self, old_tab)
1812
 
 
1813
 
                /* For the Windows version this must be done before we
1814
 
                 * start to delete the underlying files!
1815
 
                 */
1816
 
                tab_close_files(self, old_tab);
1817
 
 
1818
 
                tab_delete_table_files(self, name, old_tab_id);
1819
 
 
1820
 
                /* Remove the PBMS table: */
1821
 
                ASSERT(xt_get_self() == self);
1822
 
 
1823
 
                /* Remove the table from the directory. It will get a new
1824
 
                 * ID so the handle in the directory will no longer be valid.
1825
 
                 */
1826
 
                xt_ht_del(self, db->db_tables, name);
1827
 
        }
1828
 
        else {
1829
 
                freer_(); // xt_heap_release(self, old_tab)
1830
 
        }
1831
 
 
1832
 
        /* Add the table to the directory, well remove on error! */
1833
 
        li_undo.li_sl = db->db_table_by_id;
1834
 
        li_undo.li_key = &tab_id;
1835
 
        te_tab.te_tab_id = tab_id;
1836
 
        te_tab.te_heap_tab = dic->dic_tab_flags & XT_TF_MEMORY_TABLE;
1837
 
        te_tab.te_tab_name = xt_dup_string(self, xt_last_name_of_path(name->ps_path));
1838
 
        te_tab.te_tab_path = tab_get_table_path(self, db, name, TRUE);
1839
 
        te_tab.te_table = NULL;
1840
 
        te_tab.te_type = dic->dic_table_type;  
1841
 
        xt_sl_insert(self, db->db_table_by_id, &tab_id, &te_tab);
1842
 
 
1843
 
        *path = 0;
1844
 
        try_(a) {
1845
 
                XTOpenFilePtr   of_row, of_rec, of_ind;
1846
 
                size_t                  def_len = 0;
1847
 
 
1848
 
                tab_save_tables(self, db);
1849
 
 
1850
 
                tab = (XTTableHPtr) xt_heap_new(self, sizeof(XTTableHRec), tab_finalize);
1851
 
                pushr_(xt_heap_release, tab);
1852
 
 
1853
 
                /* The length of the foreign key definition: */
1854
 
                if (dic->dic_table) {
1855
 
                        dic->dic_table->loadString(self, &tab_def);
1856
 
                        def_len = tab_def.sb_len + 1;
1857
 
                }
1858
 
 
1859
 
                tab->tab_head_op_seq = 0;
1860
 
                tab->tab_wr_op_seq = 0;
1861
 
#ifdef DEBUG
1862
 
                /* This tests operation number overflow. */
1863
 
                //tab->tab_head_op_seq = 0xFFFFFFFF - 12;
1864
 
                //tab->tab_wr_op_seq = 0xFFFFFFFF - 12;
1865
 
#endif
1866
 
 
1867
 
                /* ------- ROW FILE: */
1868
 
                xt_strcpy(PATH_MAX, path, name->ps_path);
1869
 
                xt_remove_last_name_of_path(path);
1870
 
                tab_get_row_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1871
 
                xt_strcat(PATH_MAX, path, table_name);
1872
 
                of_row = xt_open_file(self, path, xt_row_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_row_file_grow_size);
1873
 
                pushr_(xt_close_file, of_row);
1874
 
                tab_init_row_file(self, of_row, tab, dic);
1875
 
                freer_(); // xt_close_file(of_row)
1876
 
 
1877
 
                (void) ASSERT(sizeof(XTTabRowHeadDRec) == sizeof(XTTabRowRefDRec));
1878
 
                (void) ASSERT(sizeof(XTTabRowRefDRec) == 1 << XT_TAB_ROW_SHIFTS);
1879
 
 
1880
 
                /* ------------ DATA FILE: */
1881
 
                xt_remove_last_name_of_path(path);
1882
 
                tab_get_data_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1883
 
                xt_strcat(PATH_MAX, path, table_name);
1884
 
                of_rec = xt_open_file(self, path, xt_rec_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, xt_db_data_file_grow_size);
1885
 
                pushr_(xt_close_file, of_rec);
1886
 
                tab_init_data_file(self, of_rec, tab, dic, def_len, &tab_def);
1887
 
                freer_(); // xt_close_file(of_rec)
1888
 
 
1889
 
                /* ----------- INDEX FILE: */
1890
 
                xt_remove_last_name_of_path(path);
1891
 
                tab_get_index_file_name(table_name, xt_last_name_of_path(name->ps_path), tab_id);
1892
 
                xt_strcat(PATH_MAX, path, table_name);
1893
 
                of_ind = xt_open_file(self, path, xt_ind_file_type(dic->dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_CREATE | XT_FS_EXCLUSIVE, XT_INDEX_PAGE_SIZE*256);
1894
 
                pushr_(xt_close_file, of_ind);
1895
 
                tab_init_ind_file(self, of_ind, tab, dic);
1896
 
                freer_(); // xt_close_file(of_ind)
1897
 
 
1898
 
                /* ------------ */
1899
 
                /* Log the new table ID! */
1900
 
                db->db_curr_tab_id = tab_id;
1901
 
                if (!xt_xn_log_tab_id(self, tab_id)) {
1902
 
                        db->db_curr_tab_id = tab_id - 1;
1903
 
                        xt_throw(self);
1904
 
                }
1905
 
 
1906
 
                freer_(); // xt_heap_release(tab)
1907
 
 
1908
 
                /* {LOAD-FOR-FKS}
1909
 
                 * 2008-12-10: Note, there is another problem, example:
1910
 
                 * set storage_engine = pbxt;
1911
 
                 * 
1912
 
                 * CREATE TABLE t1 (s1 INT PRIMARY KEY, s2 INT);
1913
 
                 * CREATE TABLE t2 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t1 (s1) ON UPDATE CASCADE);
1914
 
                 * CREATE TABLE t3 (s1 INT PRIMARY KEY, FOREIGN KEY (s1) REFERENCES t2 (s1) ON UPDATE CASCADE);
1915
 
                 * 
1916
 
                 * DROP TABLE IF EXISTS t2,t1;
1917
 
                 * CREATE TABLE t1 (s1 ENUM('a','b') PRIMARY KEY);
1918
 
                 * CREATE TABLE t2 (s1 ENUM('A','B'), FOREIGN KEY (s1) REFERENCES t1 (s1));
1919
 
                 * 
1920
 
                 * DROP TABLE IF EXISTS t2,t1;
1921
 
                 * 
1922
 
                 * In the example above. The second create t2 does not fail, although t3 references it,
1923
 
                 * and the data types do not match.
1924
 
                 * 
1925
 
                 * The main problem is that this error comes on DROP TABLE IF EXISTS t2! Which prevents
1926
 
                 * the table from being dropped - not good.
1927
 
                 *
1928
 
                 * So my idea here is to open the table, and if it fails, then the create table fails
1929
 
                 * as well.
1930
 
                 */
1931
 
                /*
1932
 
                 * Drizzle-specific:
1933
 
                 * We pass table type separately and provide NULL for the dic parameter, this is because
1934
 
                 * we want to force loading table (which is triggered by dic == NULL) but we still need table type
1935
 
                 */
1936
 
                if (!old_tab_id) {
1937
 
#ifndef DRIZZLED
1938
 
                        tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1939
 
                        xt_heap_release(self, tab);
1940
 
#endif
1941
 
                }
1942
 
        }
1943
 
        catch_(a) {
1944
 
                /* Creation failed, delete the table files: */
1945
 
                XTException e;
1946
 
 
1947
 
                xt_enter_exception_handler(self, &e);
1948
 
                if (*path)
1949
 
                        tab_delete_table_files(self, name, tab_id);
1950
 
                tab_remove_table_path(self, db, te_tab.te_tab_path);
1951
 
                xt_sl_delete(NULL, db->db_table_by_id, &tab_id);
1952
 
                tab_save_tables(self, db);
1953
 
                xt_sb_set_size(self, &tab_def, 0);
1954
 
                xt_exit_exception_handler(self, &e);
1955
 
                xt_throw(self);
1956
 
        }
1957
 
        cont_(a);
1958
 
 
1959
 
        xt_sb_set_size(self, &tab_def, 0);
1960
 
 
1961
 
        if (old_tab_id) {
1962
 
                try_(b) {
1963
 
                        XTTableEntryPtr te_ptr;
1964
 
 
1965
 
                        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &old_tab_id))) {
1966
 
                                tab_remove_table_path(self, db, te_ptr->te_tab_path);
1967
 
                                xt_sl_delete(self, db->db_table_by_id, &old_tab_id);
1968
 
                                tab_save_tables(self, db);
1969
 
                        }
1970
 
 
1971
 
                        /* Same purpose as above {LOAD-FOR-FKS} (although this should work, 
1972
 
                         * beacuse this is a TRUNCATE TABLE.
1973
 
                         */
1974
 
                        tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
1975
 
                        xt_heap_release(self, tab);
1976
 
                }
1977
 
                catch_(b) {
1978
 
                        /* Log this error, but do not return it, because
1979
 
                         * it just involves the cleanup of the old table,
1980
 
                         * the new table has been successfully created.
1981
 
                         */
1982
 
                        xt_log_and_clear_exception(self);
1983
 
                }
1984
 
                cont_(b);
1985
 
        }
1986
 
 
1987
 
        freer_(); // xt_ht_unlock(db->db_tables)
1988
 
        freer_(); // xt_db_unlock_table_pool(table_pool)
1989
 
 
1990
 
        /* I open the table here, because I cannot rely on MySQL to do
1991
 
         * it after a create. This is normally OK, but with foreign keys
1992
 
         * tables can be referenced and then they are not opened
1993
 
         * before use. In this example, the INSERT opens t2, but t1 is
1994
 
         * not opened of the create. As a result the foreign key
1995
 
         * reference is not resolved.
1996
 
         *
1997
 
         * drop table t1, t2;
1998
 
         * CREATE TABLE t1
1999
 
         * (
2000
 
         *  id INT PRIMARY KEY
2001
 
         * ) ENGINE=pbxt;
2002
 
         * 
2003
 
         * CREATE TABLE t2
2004
 
         * (
2005
 
         *  v INT,
2006
 
         *  CONSTRAINT c1 FOREIGN KEY (v) REFERENCES t1(id)
2007
 
         * ) ENGINE=pbxt;
2008
 
         * 
2009
 
         * --error 1452
2010
 
         * INSERT INTO t2 VALUES(2);
2011
 
         */
2012
 
        /* this code is not needed anymore as we open tables referred by FKs as necessary during checks
2013
 
        xt_ht_lock(self, db->db_tables);
2014
 
        pushr_(xt_ht_unlock, db->db_tables);
2015
 
        tab = xt_use_table_no_lock(self, db, name, FALSE, FALSE, NULL);
2016
 
        freer_(); // xt_ht_unlock(db->db_tables)
2017
 
        xt_heap_release(self, tab);
2018
 
        * CHANGED see {LOAD-FOR-FKS} above.
2019
 
        */
2020
 
 
2021
 
        exit_();
2022
 
}
2023
 
 
2024
 
xtPublic void xt_drop_table(XTThreadPtr self, XTPathStrPtr tab_name, xtBool drop_db)
2025
 
{
2026
 
        XTDatabaseHPtr          db = self->st_database;
2027
 
        XTOpenTablePoolPtr      table_pool;
2028
 
        XTTableHPtr                     tab = NULL;
2029
 
        xtTableID                       tab_id = 0;
2030
 
        xtBool                          can_drop = TRUE;
2031
 
 
2032
 
        enter_();
2033
 
 
2034
 
#ifdef TRACE_CREATE_TABLES
2035
 
        printf("DROP %s\n", tab_name->ps_path);
2036
 
#endif
2037
 
 
2038
 
        table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, tab_name, FALSE, FALSE, TRUE, &tab);
2039
 
        pushr_(xt_db_unlock_table_pool, table_pool);
2040
 
        xt_ht_lock(self, db->db_tables);
2041
 
        pushr_(xt_ht_unlock, db->db_tables);
2042
 
        pushr_(xt_heap_release, tab);
2043
 
 
2044
 
        if (table_pool) {
2045
 
                tab_id = tab->tab_id;   /* tab is not null if returned table_pool is not null */
2046
 
                /* check if other tables refer this */
2047
 
                if (!self->st_ignore_fkeys) 
2048
 
                        can_drop = tab->tab_dic.dic_table->checkCanDrop(drop_db);
2049
 
        }
2050
 
#ifdef DRIZZLED 
2051
 
        /* See the comment in ha_pbxt::delete_table regarding different implmentation of DROP TABLE
2052
 
         * in MySQL and Drizzle
2053
 
         */
2054
 
        else {
2055
 
                xt_throw_xterr(XT_CONTEXT, XT_ERR_TABLE_NOT_FOUND);
2056
 
        }
2057
 
#endif
2058
 
 
2059
 
        if (can_drop) {
2060
 
                if (tab_id) {
2061
 
                        XTTableEntryPtr te_ptr;
2062
 
 
2063
 
                        xt_dl_delete_ext_data(self, tab, FALSE, TRUE);
2064
 
                        freer_(); // xt_heap_release(self, tab)
2065
 
 
2066
 
                        /* For the Windows version this must be done before we
2067
 
                         * start to delete the underlying files!
2068
 
                         */
2069
 
                        tab_close_files(self, tab);
2070
 
 
2071
 
                        tab_delete_table_files(self, tab_name, tab_id);
2072
 
 
2073
 
                        ASSERT(xt_get_self() == self);
2074
 
                        if ((te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id))) {
2075
 
                                tab_remove_table_path(self, db, te_ptr->te_tab_path);
2076
 
                                xt_sl_delete(self, db->db_table_by_id, &tab_id);
2077
 
                                tab_save_tables(self, db);
2078
 
                        }
2079
 
                }
2080
 
                else {
2081
 
                        freer_(); // xt_heap_release(self, tab)
2082
 
                }
2083
 
 
2084
 
                xt_ht_del(self, db->db_tables, tab_name);
2085
 
        }
2086
 
        else {  /* cannot drop table because of FK dependencies */
2087
 
                xt_throw_xterr(XT_CONTEXT, XT_ERR_ROW_IS_REFERENCED);
2088
 
        }
2089
 
 
2090
 
        freer_(); // xt_ht_unlock(db->db_tables)
2091
 
        freer_(); // xt_db_unlock_table_pool(table_pool)
2092
 
        exit_();
2093
 
}
2094
 
 
2095
 
xtPublic void xt_tab_check_free_lists(XTThreadPtr self, XTOpenTablePtr ot, bool check_recs, bool correct_count)
2096
 
{
2097
 
        char                                    table_name[XT_IDENTIFIER_NAME_SIZE*3+3];
2098
 
        register XTTableHPtr    tab = ot->ot_table;
2099
 
        xtRowID                                 prev_row_id;
2100
 
        xtRowID                                 row_id;
2101
 
        xtRefID                                 next_row_id;
2102
 
        u_llong                                 free_count;
2103
 
 
2104
 
        xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
2105
 
        if (check_recs) {
2106
 
                xtRecordID              prev_rec_id;
2107
 
                xtRecordID              rec_id;
2108
 
                XTTabRecExtDRec rec_buf;
2109
 
 
2110
 
                xt_lock_mutex_ns(&tab->tab_rec_lock);
2111
 
                /* Checking the free list: */
2112
 
                prev_rec_id = 0;
2113
 
                free_count = 0;
2114
 
                rec_id = tab->tab_rec_free_id;
2115
 
                while (rec_id) {
2116
 
                        if (rec_id >= tab->tab_rec_eof_id) {
2117
 
                                xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free list: %llu, ", table_name, (u_llong) rec_id);
2118
 
                                if (prev_rec_id)
2119
 
                                        xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_rec_id);
2120
 
                                else
2121
 
                                        xt_logf(XT_NT_ERROR, "reference by list head pointer\n");
2122
 
                                xt_tab_set_table_repair_pending(tab);
2123
 
                                break;
2124
 
                        }
2125
 
                        if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) &rec_buf)) {
2126
 
                                if (self)
2127
 
                                        xt_throw(self);
2128
 
                                else
2129
 
                                        xt_log_and_clear_warning(ot->ot_thread);
2130
 
                                break;
2131
 
                        }
2132
 
                        if ((rec_buf.tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED)
2133
 
                                xt_logf(XT_NT_INFO, "Table %s: record, %llu, on free list is not free\n", table_name, (u_llong) rec_id);
2134
 
                        free_count++;
2135
 
                        prev_rec_id = rec_id;
2136
 
                        rec_id = XT_GET_DISK_4(rec_buf.tr_prev_rec_id_4);
2137
 
                }
2138
 
                if (free_count != tab->tab_rec_fnum) {
2139
 
                        if (correct_count) {
2140
 
                                tab->tab_rec_fnum = free_count;
2141
 
                                tab->tab_head_rec_fnum = free_count;
2142
 
                                tab->tab_flush_pending = TRUE;
2143
 
                                xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) has been set to the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count);
2144
 
                        }
2145
 
                        else
2146
 
                                xt_logf(XT_NT_INFO, "Table %s: free record count (%llu) differs from the number of records on the list: %llu\n", table_name, (u_llong) tab->tab_rec_fnum, (u_llong) free_count);
2147
 
                }
2148
 
                xt_unlock_mutex_ns(&tab->tab_rec_lock);
2149
 
        }
2150
 
 
2151
 
        /* Check the row free list: */
2152
 
        xt_lock_mutex_ns(&tab->tab_row_lock);
2153
 
 
2154
 
        prev_row_id = 0;
2155
 
        free_count = 0;
2156
 
        row_id = tab->tab_row_free_id;
2157
 
        while (row_id) {
2158
 
                if (row_id >= tab->tab_row_eof_id) {
2159
 
                        xt_logf(XT_NT_ERROR, "Table %s: invalid reference on free row: %llu, ", table_name, (u_llong) row_id);
2160
 
                        if (prev_row_id)
2161
 
                                xt_logf(XT_NT_ERROR, "reference by: %llu\n", (u_llong) prev_row_id);
2162
 
                        else
2163
 
                                xt_logf(XT_NT_ERROR, "reference by list head pointer\n");
2164
 
                        xt_tab_set_table_repair_pending(tab);
2165
 
                        break;
2166
 
                }
2167
 
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) {
2168
 
                        if (self)
2169
 
                                xt_throw(self);
2170
 
                        else
2171
 
                                xt_log_and_clear_warning(ot->ot_thread);
2172
 
                        break;
2173
 
                }
2174
 
                free_count++;
2175
 
                prev_row_id = row_id;
2176
 
                row_id = next_row_id;
2177
 
        }
2178
 
        if (free_count != tab->tab_row_fnum) {
2179
 
                if (correct_count) {
2180
 
                        /* tab_row_fnum is the current value, and tab_head_row_fnum is the value on
2181
 
                         * disk. tab_head_row_fnum is set by the writer as the changes are applied
2182
 
                         * to the database.
2183
 
                         *
2184
 
                         * This is the value then stored in the header of the file. This value
2185
 
                         * is in sync with other changes to the file.
2186
 
                         *
2187
 
                         * So the fact that I am setting both value means this will not work at
2188
 
                         * runtime, unless all changes have been applied by the writer.
2189
 
                         *
2190
 
                         * The correct way to do this at run time would be to add the change to the
2191
 
                         * transaction log, so that it is applied by the writer.
2192
 
                         */
2193
 
                        tab->tab_row_fnum = free_count;
2194
 
                        tab->tab_head_row_fnum = free_count;
2195
 
                        tab->tab_flush_pending = TRUE;
2196
 
                        xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) has been set to the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count);
2197
 
                }
2198
 
                else
2199
 
                        xt_logf(XT_NT_INFO, "Table %s: free row count (%llu) differs from the number of rows on the list: %llu\n", table_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count);
2200
 
        }
2201
 
 
2202
 
        xt_unlock_mutex_ns(&tab->tab_row_lock);
2203
 
}
2204
 
 
2205
 
/*
2206
 
 * Record buffer size:
2207
 
 * -------------------
2208
 
 * The size of the record buffer used to hold the row
2209
 
 * in memory. This buffer size does not include the BLOB data.
2210
 
 * About 8 bytes (a pointer and a size) is reserved for each BLOB
2211
 
 * in this buffer.
2212
 
 *
2213
 
 * The buffer size includes a number of "NULL" bytes followed by
2214
 
 * the data area. The NULL bytes contain 1 bit for every column,
2215
 
 * to indicate of the columns is NULL or not.
2216
 
 *
2217
 
 * The size of the buffer is 4/8-byte aligned, so it may be padded
2218
 
 * at the end.
2219
 
 *
2220
 
 * Fixed length rec. len.:
2221
 
 * -----------------------
2222
 
 * If the record does not include any BLOBs then this is the size of the
2223
 
 * fixed length record. The size if the data in the data handle record
2224
 
 * need never be bigger then this length, if the record does not
2225
 
 * contain BLOBs. So this should be the maximum size set for
2226
 
 * AVG_ROW_LENGTH in this case.
2227
 
 *
2228
 
 * Handle data record size:
2229
 
 * ------------------------
2230
 
 * This is the size of the handle data record. It is the data size
2231
 
 * plus the "max header size".
2232
 
 *
2233
 
 * Min/max header size:
2234
 
 * The min and max header size of the header in the data handle file.
2235
 
 * The larger header is used if a record has an extended data (data log
2236
 
 * file) component.
2237
 
 *
2238
 
 * Min/avg/max record size:
2239
 
 * ------------------------
2240
 
 * These are variable length records sizes. That is, the size of records
2241
 
 * when stored in the variable length format. Variable length records
2242
 
 * do not have fixed fields sizes, instead the fields are packed one
2243
 
 * after the other, prefixed by a number of size indicator bytes.
2244
 
 *
2245
 
 * The average is an estimate of the average record size. This estimate
2246
 
 * is used if no AVG_ROW_LENGTH is specifically given.
2247
 
 *
2248
 
 * If the average estimate is withing 20% of the maximum size of the record,
2249
 
 * then the record will be handled as a fixed length record.
2250
 
 *
2251
 
 * Avg row len set for tab:
2252
 
 * ------------------------
2253
 
 * This is the value set using AVG_ROW_LENGTH when the table is declared.
2254
 
 *
2255
 
 * Rows fixed length:
2256
 
 * ------------------
2257
 
 * YES if the records of this table are handled as a fixed length records.
2258
 
 * In this case the table records will never have an extended record
2259
 
 * component.
2260
 
 *
2261
 
 * The size of the data area in the handle data record is set to the
2262
 
 * size of the MySQL data record ("Fixed length rec. len.").
2263
 
 *
2264
 
 * It also means that the record format used is identical to the MySQL
2265
 
 * record format.
2266
 
 *
2267
 
 * If the records are not fixed, then the variable length record format
2268
 
 * is used. Records size are then in the range specified by
2269
 
 * "Min/avg/max record size".
2270
 
 *
2271
 
 * Maximum fixed size:
2272
 
 * -------------------
2273
 
 * This is the maximum size of a data log record.
2274
 
 *
2275
 
 * Minimum variable size:
2276
 
 * ------------------------
2277
 
 * Records below this size are handled as a fixed length record size, unless
2278
 
 * the AVG_ROW_LENGTH is specifically set.
2279
 
 */
2280
 
xtPublic void xt_check_table(XTThreadPtr self, XTOpenTablePtr ot)
2281
 
{
2282
 
        XTTableHPtr                             tab = ot->ot_table;
2283
 
        xtRecordID                              prec_id;
2284
 
        XTTabRecExtDPtr                 rec_buf = (XTTabRecExtDPtr) ot->ot_row_rbuffer;
2285
 
#ifdef CHECK_TABLE_READ_DATA_LOG
2286
 
        XTactExtRecEntryDRec    ext_rec;
2287
 
        size_t                                  log_size;
2288
 
        xtLogID                                 log_id;
2289
 
        xtLogOffset                             log_offset;
2290
 
#endif
2291
 
        xtRecordID                              rec_id;
2292
 
        xtRecordID                              prev_rec_id;
2293
 
        xtXactID                                xn_id;
2294
 
        xtRowID                                 row_id;
2295
 
        u_llong                                 free_rec_count = 0, free_count2 = 0;
2296
 
        u_llong                                 delete_rec_count = 0;
2297
 
        u_llong                                 alloc_rec_count = 0;
2298
 
        u_llong                                 alloc_rec_bytes = 0;
2299
 
        u_llong                                 min_comp_rec_len = 0;
2300
 
        u_llong                                 max_comp_rec_len = 0;
2301
 
        size_t                                  rec_size;
2302
 
        size_t                                  row_size;
2303
 
        u_llong                                 ext_data_len = 0;
2304
 
        u_llong                                 ext_rec_count = 0;
2305
 
 
2306
 
#if defined(DUMP_CHECK_TABLE) || defined(CHECK_TABLE_STATS)
2307
 
        printf("\nCHECK TABLE: %s\n", tab->tab_name->ps_path);
2308
 
#endif
2309
 
 
2310
 
        xt_lock_mutex(self, &tab->tab_db->db_co_ext_lock);
2311
 
        pushr_(xt_unlock_mutex, &tab->tab_db->db_co_ext_lock);
2312
 
 
2313
 
        xt_lock_mutex(self, &tab->tab_rec_lock);
2314
 
        pushr_(xt_unlock_mutex, &tab->tab_rec_lock);
2315
 
 
2316
 
#ifdef CHECK_TABLE_STATS
2317
 
        printf("Record buffer size      = %lu\n", (u_long) tab->tab_dic.dic_mysql_buf_size);
2318
 
        printf("Fixed length rec. len.  = %lu\n", (u_long) tab->tab_dic.dic_mysql_rec_size);
2319
 
        printf("Handle data record size = %lu\n", (u_long) tab->tab_dic.dic_rec_size);
2320
 
        printf("Min/max header size     = %d/%d\n", (int) offsetof(XTTabRecFix, rf_data), tab->tab_dic.dic_rec_fixed ? (int) offsetof(XTTabRecFix, rf_data) : (int) offsetof(XTTabRecExtDRec, re_data));
2321
 
        printf("Min/avg/max record size = %llu/%llu/%llu\n", (u_llong) tab->tab_dic.dic_min_row_size, (u_llong) tab->tab_dic.dic_ave_row_size, (u_llong) tab->tab_dic.dic_max_row_size);
2322
 
        if (tab->tab_dic.dic_def_ave_row_size)
2323
 
                printf("Avg row len set for tab = %lu\n", (u_long) tab->tab_dic.dic_def_ave_row_size);
2324
 
        else
2325
 
                printf("Avg row len set for tab = not specified\n");
2326
 
        printf("Rows fixed length       = %s\n", tab->tab_dic.dic_rec_fixed ? "YES" : "NO");
2327
 
        if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
2328
 
                printf("Table type              = MEMORY\n");
2329
 
        else if (tab->tab_dic.dic_tab_flags & XT_TF_REAL_TEMP_TABLE)
2330
 
                printf("Table type              = TEMPORARY\n");
2331
 
        else if (tab->tab_dic.dic_tab_flags & XT_TF_DDL_TEMP_TABLE)
2332
 
                printf("Table type              = DDL-TEMPORARY\n");
2333
 
        if (tab->tab_dic.dic_def_ave_row_size)
2334
 
                printf("Maximum fixed size      = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH_SPEC);
2335
 
        else
2336
 
                printf("Maximum fixed size      = %lu\n", (u_long) XT_TAB_MAX_FIX_REC_LENGTH);
2337
 
        printf("Minimum variable size   = %lu\n", (u_long) XT_TAB_MIN_VAR_REC_LENGTH);
2338
 
        printf("Minimum auto-increment  = %llu\n", (u_llong) tab->tab_dic.dic_min_auto_inc);
2339
 
        printf("Number of columns       = %lu\n", (u_long) tab->tab_dic.dic_no_of_cols);
2340
 
        printf("Number of fixed columns = %lu\n", (u_long) tab->tab_dic.dic_fix_col_count);
2341
 
        printf("Columns req. for index  = %lu\n", (u_long) tab->tab_dic.dic_ind_cols_req);
2342
 
        if (tab->tab_dic.dic_ind_rec_len)
2343
 
                printf("Rec len req. for index  = %llu\n", (u_llong) tab->tab_dic.dic_ind_rec_len);
2344
 
        printf("Columns req. for blobs  = %lu\n", (u_long) tab->tab_dic.dic_blob_cols_req);
2345
 
        printf("Number of blob columns  = %lu\n", (u_long) tab->tab_dic.dic_blob_count);
2346
 
        printf("Number of indices       = %lu\n", (u_long) tab->tab_dic.dic_key_count);
2347
 
#endif
2348
 
 
2349
 
#ifdef DUMP_CHECK_TABLE
2350
 
        printf("Records:-\n");
2351
 
        printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_rec_free_id, (u_llong) tab->tab_rec_fnum);
2352
 
        printf("EOF:       %llu\n", (u_llong) tab->tab_rec_eof_id);
2353
 
#endif
2354
 
 
2355
 
        rec_size = XT_REC_EXT_HEADER_SIZE;
2356
 
        if (rec_size > tab->tab_recs.tci_rec_size)
2357
 
                rec_size = tab->tab_recs.tci_rec_size;
2358
 
        rec_id = 1;
2359
 
        while (rec_id < tab->tab_rec_eof_id) {
2360
 
                if (!xt_tab_get_rec_data(ot, rec_id, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer))
2361
 
                        xt_throw(self);
2362
 
 
2363
 
#ifdef DUMP_CHECK_TABLE
2364
 
                printf("%-4llu ", (u_llong) rec_id);
2365
 
#endif
2366
 
                switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
2367
 
                        case XT_TAB_STATUS_FREED:
2368
 
#ifdef DUMP_CHECK_TABLE
2369
 
                                printf("======== ");
2370
 
#endif
2371
 
                                free_rec_count++;
2372
 
                                break;
2373
 
                        case XT_TAB_STATUS_DELETE:
2374
 
#ifdef DUMP_CHECK_TABLE
2375
 
                                printf("delete   ");
2376
 
#endif
2377
 
                                delete_rec_count++;
2378
 
                                break;
2379
 
                        case XT_TAB_STATUS_FIXED:
2380
 
#ifdef DUMP_CHECK_TABLE
2381
 
                                printf("record-F ");
2382
 
#endif
2383
 
                                alloc_rec_count++;
2384
 
                                row_size = myxt_store_row_length(ot, (char *) ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE);
2385
 
                                alloc_rec_bytes += row_size;
2386
 
                                if (!min_comp_rec_len || row_size < min_comp_rec_len)
2387
 
                                        min_comp_rec_len = row_size;
2388
 
                                if (row_size > max_comp_rec_len)
2389
 
                                        max_comp_rec_len = row_size;
2390
 
                                break;
2391
 
                        case XT_TAB_STATUS_VARIABLE:
2392
 
#ifdef DUMP_CHECK_TABLE
2393
 
                                printf("record-V ");
2394
 
#endif
2395
 
                                alloc_rec_count++;
2396
 
                                row_size = myxt_load_row_length(ot, tab->tab_dic.dic_rec_size, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, NULL);
2397
 
                                alloc_rec_bytes += row_size;
2398
 
                                if (!min_comp_rec_len || row_size < min_comp_rec_len)
2399
 
                                        min_comp_rec_len = row_size;
2400
 
                                if (row_size > max_comp_rec_len)
2401
 
                                        max_comp_rec_len = row_size;
2402
 
                                break;
2403
 
                        case XT_TAB_STATUS_EXT_DLOG:
2404
 
#ifdef DUMP_CHECK_TABLE
2405
 
                                printf("record-X ");
2406
 
#endif
2407
 
                                alloc_rec_count++;
2408
 
                                ext_rec_count++;
2409
 
                                ext_data_len += XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
2410
 
                                row_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4) + ot->ot_rec_size - XT_REC_EXT_HEADER_SIZE;
2411
 
                                alloc_rec_bytes += row_size;
2412
 
                                if (!min_comp_rec_len || row_size < min_comp_rec_len)
2413
 
                                        min_comp_rec_len = row_size;
2414
 
                                if (row_size > max_comp_rec_len)
2415
 
                                        max_comp_rec_len = row_size;
2416
 
                                break;
2417
 
                }
2418
 
#ifdef DUMP_CHECK_TABLE
2419
 
                if (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_CLEANED_BIT)
2420
 
                        printf("C");
2421
 
                else
2422
 
                        printf(" ");
2423
 
#endif
2424
 
                prev_rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
2425
 
                xn_id = XT_GET_DISK_4(rec_buf->tr_xact_id_4);
2426
 
                row_id = XT_GET_DISK_4(rec_buf->tr_row_id_4);
2427
 
                switch (rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) {
2428
 
                        case XT_TAB_STATUS_FREED:
2429
 
#ifdef DUMP_CHECK_TABLE
2430
 
                                printf(" prev=%-3llu (xact=%-3llu row=%lu)\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
2431
 
#endif
2432
 
                                break;
2433
 
                        case XT_TAB_STATUS_EXT_DLOG:
2434
 
#ifdef DUMP_CHECK_TABLE
2435
 
                                printf(" prev=%-3llu  xact=%-3llu row=%lu  Xlog=%lu Xoff=%llu Xsiz=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id, (u_long) XT_GET_DISK_2(rec_buf->re_log_id_2), (u_llong) XT_GET_DISK_6(rec_buf->re_log_offs_6), (u_long) XT_GET_DISK_4(rec_buf->re_log_dat_siz_4));
2436
 
#endif
2437
 
 
2438
 
#ifdef CHECK_TABLE_READ_DATA_LOG
2439
 
                                xtBool ok;
2440
 
 
2441
 
                                log_size = XT_GET_DISK_4(rec_buf->re_log_dat_siz_4);
2442
 
                                XT_GET_LOG_REF(log_id, log_offset, rec_buf);
2443
 
                                if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
2444
 
                                        xt_tab_read_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec);
2445
 
                                        ok = TRUE;
2446
 
                                }
2447
 
                                else {
2448
 
                                        if (!(ok = self->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data), (xtWord1 *) &ext_rec, self)))
2449
 
                                                xt_log_and_clear_exception(self);
2450
 
                                }
2451
 
                                if (ok) {
2452
 
                                        size_t          log_size2;
2453
 
                                        xtTableID       curr_tab_id;
2454
 
                                        xtRecordID      curr_rec_id;
2455
 
 
2456
 
                                        log_size2 = XT_GET_DISK_4(ext_rec.er_data_size_4);
2457
 
                                        curr_tab_id = XT_GET_DISK_4(ext_rec.er_tab_id_4);
2458
 
                                        curr_rec_id = XT_GET_DISK_4(ext_rec.er_rec_id_4);
2459
 
                                        if (log_size2 != log_size || curr_tab_id != tab->tab_id || curr_rec_id != rec_id) {
2460
 
                                                xt_logf(XT_INFO, "Table %s: record %llu, extended record %lu:%llu not valid\n", tab->tab_name, (u_llong) rec_id, (u_long) log_id, (u_llong) log_offset);
2461
 
                                        }
2462
 
                                }
2463
 
#endif
2464
 
                                break;
2465
 
                        default:
2466
 
#ifdef DUMP_CHECK_TABLE
2467
 
                                printf(" prev=%-3llu  xact=%-3llu row=%lu\n", (u_llong) prev_rec_id, (u_llong) xn_id, (u_long) row_id);
2468
 
#endif
2469
 
                                break;
2470
 
                }
2471
 
                rec_id++;
2472
 
        }
2473
 
        
2474
 
#ifdef CHECK_TABLE_STATS
2475
 
        u_long  rec, row, ind;
2476
 
        char    value[50];
2477
 
 
2478
 
        rec = xt_seek_eof_file(self, ot->ot_rec_file);
2479
 
        row = xt_seek_eof_file(self, ot->ot_row_file);
2480
 
        ind = xt_seek_eof_file(self, ot->ot_ind_file);
2481
 
        if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
2482
 
                if (!tab->tab_dic.dic_rec_fixed) {
2483
 
                        xt_int8_to_byte_size((xtInt8) tab->tab_mem_total, value);
2484
 
                        printf("Ext. record memory used = %s\n", value);
2485
 
                        printf("Extended record count   = %llu\n", ext_rec_count);
2486
 
                }
2487
 
                xt_int8_to_byte_size((xtInt8) ind, value);
2488
 
                printf("Index data memory used  = %s\n", value);
2489
 
                xt_int8_to_byte_size((xtInt8) rec + row, value);
2490
 
                printf("Table data memory used  = %s\n", value);
2491
 
                xt_int8_to_byte_size((xtInt8) tab->tab_mem_total + rec + row + ind, value);
2492
 
                printf("Total memory used       = %s\n", value);
2493
 
        }
2494
 
        else {
2495
 
                if (!tab->tab_dic.dic_rec_fixed) {
2496
 
                        xt_int8_to_byte_size((xtInt8) ext_data_len, value);
2497
 
                        printf("Ext. record disk used   = %s\n", value);                
2498
 
                        printf("Extended record count   = %llu\n", ext_rec_count);
2499
 
                }
2500
 
                xt_int8_to_byte_size((xtInt8) ind, value);
2501
 
                printf("Index disk space used   = %s\n", value);
2502
 
                xt_int8_to_byte_size((xtInt8) rec + row, value);
2503
 
                printf("Table disk space used   = %s\n", value);
2504
 
                xt_int8_to_byte_size((xtInt8) ext_data_len + rec + row + ind, value);
2505
 
                printf("Total disk space used   = %s\n", value);
2506
 
        }
2507
 
        
2508
 
        if (alloc_rec_count) {
2509
 
                printf("Minumum comp. rec. len. = %llu\n", (u_llong) min_comp_rec_len);
2510
 
                printf("Average comp. rec. len. = %llu\n", (u_llong) ((double) alloc_rec_bytes / (double) alloc_rec_count + (double) 0.5));
2511
 
                printf("Maximum comp. rec. len. = %llu\n", (u_llong) max_comp_rec_len);
2512
 
        }
2513
 
        printf("Free record count       = %llu\n", (u_llong) free_rec_count);
2514
 
        printf("Deleted record count    = %llu\n", (u_llong) delete_rec_count);
2515
 
        printf("Allocated record count  = %llu\n", (u_llong) alloc_rec_count);
2516
 
 
2517
 
#endif
2518
 
        if (tab->tab_rec_fnum != free_rec_count)
2519
 
                xt_logf(XT_INFO, "Table %s: incorrect number of free blocks, %llu, should be: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) tab->tab_rec_fnum);
2520
 
 
2521
 
        /* Checking the free list: */
2522
 
        prec_id = 0;
2523
 
        rec_id = tab->tab_rec_free_id;
2524
 
        while (rec_id) {
2525
 
                if (rec_id >= tab->tab_rec_eof_id) {
2526
 
                        xt_logf(XT_INFO, "Table %s: invalid reference on free list: %llu, ", tab->tab_name, (u_llong) rec_id);
2527
 
                        if (prec_id)
2528
 
                                xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id);
2529
 
                        else
2530
 
                                xt_logf(XT_INFO, "reference by list head pointer\n");
2531
 
                        break;
2532
 
                }
2533
 
                if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_FIX_HEADER_SIZE, (xtWord1 *) rec_buf)) {
2534
 
                        xt_log_and_clear_exception(self);
2535
 
                        break;
2536
 
                }
2537
 
                if ((rec_buf->tr_rec_type_1 & XT_TAB_STATUS_MASK) != XT_TAB_STATUS_FREED)
2538
 
                        xt_logf(XT_INFO, "Table %s: record, %llu, on free list is not free\n", tab->tab_name, (u_llong) rec_id);
2539
 
                free_count2++;
2540
 
                prec_id = rec_id;
2541
 
                rec_id = XT_GET_DISK_4(rec_buf->tr_prev_rec_id_4);
2542
 
        }
2543
 
        if (free_count2 != free_rec_count)
2544
 
                xt_logf(XT_INFO, "Table %s: not all free blocks (%llu) on free list: %llu\n", tab->tab_name, (u_llong) free_rec_count, (u_llong) free_count2);
2545
 
 
2546
 
        freer_(); // xt_unlock_mutex_ns(&tab->tab_rec_lock);
2547
 
 
2548
 
        xtRefID ref_id;
2549
 
 
2550
 
        xt_lock_mutex(self, &tab->tab_row_lock);
2551
 
        pushr_(xt_unlock_mutex, &tab->tab_row_lock);
2552
 
 
2553
 
#ifdef DUMP_CHECK_TABLE
2554
 
        printf("Rows:-\n");
2555
 
        printf("Free list: %llu (%llu)\n", (u_llong) tab->tab_row_free_id, (u_llong) tab->tab_row_fnum);
2556
 
        printf("EOF:       %llu\n", (u_llong) tab->tab_row_eof_id);
2557
 
#endif
2558
 
 
2559
 
        rec_id = 1;
2560
 
        while (rec_id < tab->tab_row_eof_id) {
2561
 
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, rec_id, &ref_id, self))
2562
 
                        xt_throw(self);
2563
 
#ifdef DUMP_CHECK_TABLE
2564
 
                printf("%-3llu ", (u_llong) rec_id);
2565
 
#endif
2566
 
#ifdef DUMP_CHECK_TABLE
2567
 
                if (ref_id == 0)
2568
 
                        printf("====== 0\n");
2569
 
                else
2570
 
                        printf("in use %llu\n", (u_llong) ref_id);
2571
 
#endif
2572
 
                rec_id++;
2573
 
        }
2574
 
 
2575
 
        prec_id = 0;
2576
 
        free_count2 = 0;
2577
 
        row_id = tab->tab_row_free_id;
2578
 
        while (row_id) {
2579
 
                if (row_id >= tab->tab_row_eof_id) {
2580
 
                        xt_logf(XT_INFO, "Table %s: invalid reference on free row: %llu, ", tab->tab_name, (u_llong) row_id);
2581
 
                        if (prec_id)
2582
 
                                xt_logf(XT_INFO, "reference by: %llu\n", (u_llong) prec_id);
2583
 
                        else
2584
 
                                xt_logf(XT_INFO, "reference by list head pointer\n");
2585
 
                        break;
2586
 
                }
2587
 
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &ref_id, self)) {
2588
 
                        xt_log_and_clear_exception(self);
2589
 
                        break;
2590
 
                }
2591
 
                free_count2++;
2592
 
                prec_id = row_id;
2593
 
                row_id = ref_id;
2594
 
        }
2595
 
        if (free_count2 != tab->tab_row_fnum)
2596
 
                xt_logf(XT_INFO, "Table %s: free row count (%llu) differs from the number of row on the list: %llu\n", tab->tab_name, (u_llong) tab->tab_row_fnum, (u_llong) free_count2);
2597
 
 
2598
 
        freer_(); // xt_unlock_mutex(&tab->tab_row_lock);
2599
 
 
2600
 
#ifdef CHECK_INDEX_ON_CHECK_TABLE
2601
 
        xt_check_indices(ot);
2602
 
#endif
2603
 
        freer_(); // xt_unlock_mutex(&tab->tab_db->db_co_ext_lock);
2604
 
}
2605
 
 
2606
 
xtPublic void xt_rename_table(XTThreadPtr self, XTPathStrPtr old_name, XTPathStrPtr new_name)
2607
 
{
2608
 
        XTDatabaseHPtr          db = self->st_database;
2609
 
        XTOpenTablePoolPtr      table_pool;
2610
 
        XTTableHPtr                     tab = NULL;
2611
 
        char                            table_name[XT_MAX_TABLE_FILE_NAME_SIZE];
2612
 
        char                            *postfix;
2613
 
        XTFilesOfTableRec       ft;
2614
 
        XTDictionaryRec         dic;
2615
 
        xtTableID                       tab_id;
2616
 
        XTTableEntryPtr         te_ptr;
2617
 
        char                            *te_new_name;
2618
 
        XTTablePathPtr          te_new_path;
2619
 
        XTTablePathPtr          te_old_path;
2620
 
        char                            to_path[PATH_MAX];
2621
 
 
2622
 
        memset(&dic, 0, sizeof(dic));
2623
 
 
2624
 
#ifdef TRACE_CREATE_TABLES
2625
 
        printf("RENAME %s --> %s\n", old_name->ps_path, new_name->ps_path);
2626
 
#endif
2627
 
        if (strlen(xt_last_name_of_path(new_name->ps_path)) > XT_TABLE_NAME_SIZE-1)
2628
 
                xt_throw_taberr(XT_CONTEXT, XT_ERR_NAME_TOO_LONG, new_name);
2629
 
 
2630
 
        /* MySQL renames the table while it is in use. Here is
2631
 
         * the sequence:
2632
 
         *
2633
 
         * OPEN tab1
2634
 
         * CREATE tmp_tab
2635
 
         * OPEN tmp_tab
2636
 
         * COPY tab1 -> tmp_tab
2637
 
         * CLOSE tmp_tab
2638
 
         * RENAME tab1 -> tmp2_tab
2639
 
         * RENAME tmp_tab -> tab1
2640
 
         * CLOSE tab1 (tmp2_tab)
2641
 
         * DELETE tmp2_tab
2642
 
         * OPEN tab1
2643
 
         *
2644
 
         * Since the table is open when it is renamed, I cannot
2645
 
         * get exclusive use of the table for this operation.
2646
 
         *
2647
 
         * So instead we just make sure that the sweeper is not
2648
 
         * using the table.
2649
 
         */
2650
 
        table_pool = xt_db_lock_table_pool_by_name(self, self->st_database, old_name, FALSE, TRUE, FALSE, &tab);
2651
 
        pushr_(xt_db_unlock_table_pool, table_pool);
2652
 
        xt_ht_lock(self, db->db_tables);
2653
 
        pushr_(xt_ht_unlock, db->db_tables);
2654
 
        tab_id = tab->tab_id;
2655
 
        myxt_move_dictionary(&dic, &tab->tab_dic);
2656
 
        pushr_(myxt_free_dictionary, &dic);
2657
 
        pushr_(xt_heap_release, tab);
2658
 
 
2659
 
        /* Unmap the memory mapped table files: 
2660
 
         * For windows this must be done before we
2661
 
         * can rename the files.
2662
 
         */
2663
 
        tab_close_files(self, tab);
2664
 
 
2665
 
        freer_(); // xt_heap_release(self, old_tab)
2666
 
 
2667
 
        /* Create the new name and path: */
2668
 
        te_new_name = xt_dup_string(self, xt_last_name_of_path(new_name->ps_path));
2669
 
        pushr_(xt_free, te_new_name);
2670
 
        te_new_path = tab_get_table_path(self, db, new_name, FALSE);
2671
 
        pushr_(tab_free_table_path, te_new_path);
2672
 
 
2673
 
        te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
2674
 
 
2675
 
        /* Remove the table from the Database directory: */
2676
 
        xt_ht_del(self, db->db_tables, old_name);
2677
 
 
2678
 
        xt_enum_files_of_tables_init(old_name, tab_id, &ft);
2679
 
        while (xt_enum_files_of_tables_next(&ft)) {
2680
 
                postfix = xt_tab_file_to_name(XT_MAX_TABLE_FILE_NAME_SIZE, table_name, ft.ft_file_path);
2681
 
 
2682
 
                xt_strcpy(PATH_MAX, to_path, new_name->ps_path);
2683
 
                xt_strcat(PATH_MAX, to_path, postfix);
2684
 
 
2685
 
                if (!xt_fs_rename(NULL, ft.ft_file_path, to_path))
2686
 
                        xt_log_and_clear_exception(self);
2687
 
        }
2688
 
 
2689
 
        /* Switch the table name and path: */
2690
 
        xt_free(self, te_ptr->te_tab_name);
2691
 
        te_ptr->te_tab_name = te_new_name;
2692
 
        te_old_path = te_ptr->te_tab_path;
2693
 
        te_ptr->te_tab_path = te_new_path;
2694
 
        tab_remove_table_path(self, db, te_old_path);
2695
 
        tab_save_tables(self, db);
2696
 
 
2697
 
        popr_(); // Discard tab_free_table_path(te_new_path);
2698
 
        popr_(); // Discard xt_free(te_new_name);
2699
 
 
2700
 
        tab = xt_use_table_no_lock(self, db, new_name, FALSE, FALSE, &dic);
2701
 
        /* All renamed tables are considered repaired! */
2702
 
        xt_tab_table_repaired(tab);
2703
 
        xt_heap_release(self, tab);
2704
 
 
2705
 
        freer_(); // myxt_free_dictionary(&dic)
2706
 
        freer_(); // xt_ht_unlock(db->db_tables)
2707
 
        freer_(); // xt_db_unlock_table_pool(table_pool)
2708
 
}
2709
 
 
2710
 
xtPublic XTTableHPtr xt_use_table(XTThreadPtr self, XTPathStrPtr name, xtBool no_load, xtBool missing_ok)
2711
 
{
2712
 
        XTTableHPtr             tab;
2713
 
        XTDatabaseHPtr  db = self->st_database;
2714
 
 
2715
 
        xt_ht_lock(self, db->db_tables);
2716
 
        pushr_(xt_ht_unlock, db->db_tables);
2717
 
        tab = xt_use_table_no_lock(self, db, name, no_load, missing_ok, NULL);
2718
 
        freer_();
2719
 
        return tab;
2720
 
}
2721
 
 
2722
 
xtPublic void xt_sync_flush_table(XTThreadPtr self, XTOpenTablePtr ot, int timeout)
2723
 
{
2724
 
        XTTableHPtr             tab = ot->ot_table;
2725
 
        XTDatabaseHPtr  db = tab->tab_db;
2726
 
 
2727
 
        /* Wakeup the sweeper:
2728
 
         * We want the sweeper to check if there is anything to do,
2729
 
         * so we must wake it up.
2730
 
         * Once it has done all it can, it will go back to sleep.
2731
 
         * This should be good enough.
2732
 
         *
2733
 
         * NOTE: I all cases, we do not wait if the sweeper is in
2734
 
         * error state.
2735
 
         */
2736
 
        if (db->db_sw_idle) {
2737
 
                u_int check_count = db->db_sw_check_count;
2738
 
 
2739
 
                for (;;) {
2740
 
                        xt_wakeup_sweeper(db);
2741
 
                        if (!db->db_sw_thread || db->db_sw_idle != XT_THREAD_IDLE || check_count != db->db_sw_check_count)
2742
 
                                break;
2743
 
                        xt_sleep_milli_second(10);
2744
 
                }
2745
 
        }
2746
 
 
2747
 
        /* Wait for the sweeper to become idle: */
2748
 
        xt_lock_mutex(self, &db->db_sw_lock);
2749
 
        pushr_(xt_unlock_mutex, &db->db_sw_lock);
2750
 
        while (db->db_sw_thread && !db->db_sw_idle) {
2751
 
                xt_timed_wait_cond(self, &db->db_sw_cond, &db->db_sw_lock, 10);
2752
 
        }
2753
 
        freer_(); // xt_unlock_mutex(&db->db_sw_lock)
2754
 
 
2755
 
        /* Wait for the writer to write out all operations on the table:
2756
 
         * We also do not wait for the writer if it is in
2757
 
         * error state.
2758
 
         */
2759
 
        time_t start_time = time(NULL);
2760
 
        while (db->db_wr_thread && 
2761
 
                db->db_wr_idle != XT_THREAD_INERR &&
2762
 
                XTTableSeq::xt_op_is_before(tab->tab_head_op_seq+1, tab->tab_seq.ts_next_seq)) {
2763
 
                if (timeout && time(NULL) > start_time + timeout) {
2764
 
                        char    name_buf[XT_TABLE_NAME_BUF_SIZE];
2765
 
 
2766
 
                        xt_tab_make_table_name(tab->tab_name, name_buf, XT_TABLE_NAME_BUF_SIZE);
2767
 
                        xt_logf(XT_WARNING, "Timeout waiting for writer while flushing %s\n", name_buf);
2768
 
                        break;
2769
 
                }
2770
 
 
2771
 
                /* Flush the log, in case this is holding up the
2772
 
                 * writer!
2773
 
                 */
2774
 
                if (!db->db_xlog.xlog_flush(self))
2775
 
                        xt_throw(self);
2776
 
 
2777
 
                xt_lock_mutex(self, &db->db_wr_lock);
2778
 
                pushr_(xt_unlock_mutex, &db->db_wr_lock);
2779
 
                db->db_wr_thread_waiting++;
2780
 
                /*
2781
 
                 * Wake the writer if it is sleeping. In order to
2782
 
                 * flush a table we must wait for the writer to complete
2783
 
                 * committing all the changes in the table to the database.
2784
 
                 */
2785
 
                if (db->db_wr_idle) {
2786
 
                        if (!xt_broadcast_cond_ns(&db->db_wr_cond))
2787
 
                                xt_log_and_clear_exception_ns();
2788
 
                }
2789
 
 
2790
 
                freer_(); // xt_unlock_mutex(&db->db_wr_lock)
2791
 
                xt_sleep_milli_second(10);
2792
 
 
2793
 
                xt_lock_mutex(self, &db->db_wr_lock);
2794
 
                pushr_(xt_unlock_mutex, &db->db_wr_lock);
2795
 
                db->db_wr_thread_waiting--;
2796
 
                freer_(); // xt_unlock_mutex(&db->db_wr_lock)
2797
 
        }
2798
 
 
2799
 
        xt_flush_table(self, ot);
2800
 
}
2801
 
 
2802
 
xtBool XTFlushRecRowTask::tk_task(XTThreadPtr thread)
2803
 
{
2804
 
        XTOpenTablePtr ot;
2805
 
 
2806
 
        /* {TASK-TABLE-GONE}
2807
 
         * If this task was scheduled before the table was deleted
2808
 
         * or renamed, then we may be caught holding an invalid
2809
 
         * table (frt_table) object.
2810
 
         *
2811
 
         * As a result we just use the ID, to get the open table
2812
 
         * pointer.
2813
 
         *
2814
 
         * If the tables are not identical, then there is no point
2815
 
         * in proceeding...
2816
 
         */
2817
 
        if (!(xt_db_open_pool_table_ns(&ot, frt_table->tab_db, frt_table->tab_id)))
2818
 
                return FAILED;
2819
 
 
2820
 
        if (!ot) {
2821
 
                /* Can happen if the table has been dropped: */
2822
 
                if (thread->t_exception.e_xt_err)
2823
 
                        xt_log_and_clear_exception(thread);
2824
 
                xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table was not found\n", (u_long) frt_table->tab_id);
2825
 
                xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
2826
 
                return OK;
2827
 
        }
2828
 
 
2829
 
        if (ot->ot_table != frt_table) {
2830
 
                /* Can happen if the table has been renamed: */
2831
 
                if (thread->t_exception.e_xt_err)
2832
 
                        xt_log_and_clear_exception(thread);
2833
 
                xt_logf(XT_NT_WARNING, "Checkpoint skipping table (ID) %lu: table has been renamed\n", (u_long) frt_table->tab_id);
2834
 
                xt_checkpoint_set_flush_state(frt_table->tab_db, frt_table->tab_id, XT_CPT_STATE_DONE_ALL);
2835
 
                goto table_gone;
2836
 
        }
2837
 
 
2838
 
        if (!xt_flush_record_row(ot, NULL, FALSE)) {
2839
 
                xt_db_return_table_to_pool_ns(ot);
2840
 
                return FAILED;
2841
 
        }
2842
 
 
2843
 
        table_gone:
2844
 
        xt_db_return_table_to_pool_ns(ot);
2845
 
        return OK;
2846
 
}
2847
 
 
2848
 
void XTFlushRecRowTask::tk_reference()
2849
 
{
2850
 
        xt_heap_reference_ns(frt_table);
2851
 
}
2852
 
 
2853
 
void XTFlushRecRowTask::tk_release()
2854
 
{
2855
 
        xt_heap_release_ns(frt_table);
2856
 
}
2857
 
 
2858
 
/*
2859
 
 * Start a flush of this file in background.
2860
 
 */
2861
 
xtPublic xtBool xt_async_flush_record_row(XTTableHPtr tab, xtBool notify_complete, XTThreadPtr thread)
2862
 
{
2863
 
        if (tab->tab_rec_flush_task->tk_is_running())
2864
 
                return OK;
2865
 
 
2866
 
        /* Run the task: */
2867
 
        return xt_run_async_task(tab->tab_rec_flush_task, notify_complete, FALSE, thread, tab->tab_db);
2868
 
}
2869
 
 
2870
 
xtPublic xtBool xt_flush_record_row(XTOpenTablePtr ot, off_t *bytes_flushed, xtBool have_table_lock)
2871
 
{
2872
 
        XTTableHeadDRec                 rec_head;
2873
 
        XTTableHPtr                             tab = ot->ot_table;
2874
 
        off_t                                   to_flush;
2875
 
#ifdef TRACE_FLUSH_TABLE
2876
 
        time_t                                  tnow = 0;
2877
 
#endif
2878
 
 
2879
 
        if (!xt_begin_checkpoint(tab->tab_db, have_table_lock, ot->ot_thread))
2880
 
                return FAILED;
2881
 
 
2882
 
        xt_lock_mutex_ns(&tab->tab_rec_flush_lock);
2883
 
#ifdef XT_SORT_REC_WRITES
2884
 
        if (!xt_xres_delay_flush(ot, TRUE))
2885
 
                goto failed;
2886
 
#endif
2887
 
        xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_START_REC_ROW);
2888
 
 
2889
 
        ASSERT_NS(ot->ot_thread == xt_get_self());
2890
 
        /* Make sure that the table recovery point, in
2891
 
         * particular the operation ID is recorded
2892
 
         * before all other flush activity!
2893
 
         *
2894
 
         * This is because only operations after the
2895
 
         * recovery point in the header are applied
2896
 
         * to the table on recovery.
2897
 
         *
2898
 
         * So the operation ID is recorded before the
2899
 
         * flush activity, and written after all is done.
2900
 
         */
2901
 
        xt_tab_store_header(ot, &rec_head);
2902
 
 
2903
 
        /* Write the table header: */
2904
 
        if (tab->tab_flush_pending) {
2905
 
                tab->tab_flush_pending = FALSE;
2906
 
 
2907
 
#ifdef TRACE_FLUSH_TABLE
2908
 
                tnow = time(NULL);
2909
 
                printf("FLUSH TABLE bytes=%lu %s\n", (u_long) tab->tab_bytes_to_flush, tab->tab_name->ps_path);
2910
 
                fflush(stdout);
2911
 
#endif
2912
 
                // Want to see how much was to be flushed in the debugger:
2913
 
                to_flush = tab->tab_bytes_to_flush;
2914
 
                tab->tab_bytes_to_flush = 0;
2915
 
                if (bytes_flushed)
2916
 
                        *bytes_flushed += to_flush;
2917
 
                
2918
 
#ifdef XT_REC_FLUSH_THRESHOLD
2919
 
                XTThreadPtr writer;
2920
 
 
2921
 
                /* Reset the writer's byte level: */
2922
 
                if ((writer = ot->ot_table->tab_db->db_wr_thread))
2923
 
                        tab->tab_rec_wr_last_flush = writer->st_statistics.st_rec.ts_write;
2924
 
#endif
2925
 
 
2926
 
                /* Flush the table data: */
2927
 
                if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags)) {
2928
 
                        if (!XT_FLUSH_RR_FILE(ot->ot_rec_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread) ||
2929
 
                                !XT_FLUSH_RR_FILE(ot->ot_row_file, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread)) {
2930
 
                                tab->tab_flush_pending = TRUE;
2931
 
                                goto failed;
2932
 
                        }
2933
 
                }
2934
 
 
2935
 
                /* The header includes the operation number which
2936
 
                 * must be written AFTER all other data,
2937
 
                 * because operations will not be applied again.
2938
 
                 */
2939
 
                if (!tab_write_header(ot, &rec_head)) {
2940
 
                        tab->tab_flush_pending = TRUE;
2941
 
                        goto failed;
2942
 
                }
2943
 
        }
2944
 
 
2945
 
        /* Flush the auto-increment: */
2946
 
        if (xt_db_auto_increment_mode == 1) {
2947
 
                if (tab->tab_auto_inc != tab->tab_dic.dic_min_auto_inc) {
2948
 
                        tab->tab_dic.dic_min_auto_inc = tab->tab_auto_inc;
2949
 
                        if (!xt_tab_write_min_auto_inc(ot))
2950
 
                                goto failed;
2951
 
                }
2952
 
        }
2953
 
 
2954
 
        /* Mark this table as record/row flushed: */
2955
 
        xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_DONE_REC_ROW);
2956
 
 
2957
 
#ifdef TRACE_FLUSH_TABLE
2958
 
        if (tnow) {
2959
 
                printf("flush table (%d) %s DONE\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
2960
 
                fflush(stdout);
2961
 
        }
2962
 
#endif
2963
 
 
2964
 
        xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
2965
 
 
2966
 
        if (!xt_end_checkpoint(tab->tab_db, ot->ot_thread, NULL))
2967
 
                return FAILED;
2968
 
        return OK;
2969
 
        
2970
 
        failed:
2971
 
        xt_checkpoint_set_flush_state(tab->tab_db, tab->tab_id, XT_CPT_STATE_STOP_REC_ROW);
2972
 
 
2973
 
#ifdef TRACE_FLUSH_TABLE
2974
 
        if (tnow) {
2975
 
                printf("flush table (%d) %s FAILED\n", (int) (time(NULL) - tnow), tab->tab_name->ps_path);
2976
 
                fflush(stdout);
2977
 
        }
2978
 
#endif
2979
 
 
2980
 
        xt_unlock_mutex_ns(&tab->tab_rec_flush_lock);
2981
 
        return FAILED;
2982
 
}
2983
 
 
2984
 
xtPublic void xt_flush_table(XTThreadPtr self, XTOpenTablePtr ot)
2985
 
{
2986
 
        /* GOTCHA {FLUSH-BUG}: This bug was difficult to find.
2987
 
         * It occured on Windows in the multi_update
2988
 
         * test, sometimes.
2989
 
         *
2990
 
         * What happens is the checkpointer starts to
2991
 
         * flush the table, and gets to the 
2992
 
         * XT_FLUSH_RR_FILE part.
2993
 
         *
2994
 
         * Then a rename occurs, and the user thread
2995
 
         * flushes the table, and goes through and
2996
 
         * writes the table header, with the most
2997
 
         * recent table operation (the last operation
2998
 
         * that occurred).
2999
 
         *
3000
 
         * The checkpointer the completes and
3001
 
         * also writes the header, but with old
3002
 
         * values (as read in xt_tab_store_header()).
3003
 
         *
3004
 
         * The then user thread continues, and
3005
 
         * reopens the table after rename.
3006
 
         * On reopen, it reads the old value from the header,
3007
 
         * and sets the current operation number.
3008
 
         *
3009
 
         * Now there is a problem in the table cache,
3010
 
         * because some cache pages have operation numbers
3011
 
         * that are greater than current operation
3012
 
         * number!
3013
 
         *
3014
 
         * This later lead to the free-er hanging while
3015
 
         * it waited for an operation to be 
3016
 
         * written to the disk that never would be.
3017
 
         * This is because a page can only be freed when
3018
 
         * the head operation number has passed the
3019
 
         * page operation number.
3020
 
         *
3021
 
         * Which indicates that the page has been written
3022
 
         * to disk.
3023
 
         *
3024
 
         * THE BUG FIX:
3025
 
         * As a result I now use mutex so that only one
3026
 
         * thread can flush at a time.
3027
 
         */
3028
 
 
3029
 
        if (!xt_flush_record_row(ot, NULL, FALSE))
3030
 
                xt_throw(self);
3031
 
 
3032
 
        /* This was before the table data flush,
3033
 
         * (after xt_tab_store_header() above,
3034
 
         * but I don't think it makes any difference.
3035
 
         * Because in the checkpointer it was at this
3036
 
         * position.
3037
 
         */
3038
 
        if (!xt_flush_indices(ot, NULL, FALSE, NULL))
3039
 
                xt_throw(self);
3040
 
 
3041
 
}
3042
 
 
3043
 
static XTOpenTablePtr tab_open_table(XTTableHPtr tab)
3044
 
{
3045
 
        volatile XTOpenTablePtr ot;
3046
 
        XTThreadPtr                             self;
3047
 
 
3048
 
        if (!(ot = (XTOpenTablePtr) xt_malloc_ns(sizeof(XTOpenTableRec))))
3049
 
                return NULL;
3050
 
        memset(ot, 0, offsetof(XTOpenTableRec, ot_ind_wbuf));
3051
 
 
3052
 
        ot->ot_seq_page = NULL;
3053
 
        ot->ot_seq_data = NULL;
3054
 
 
3055
 
        self = xt_get_self();
3056
 
        try_(a) {
3057
 
                xt_heap_reference(self, tab);
3058
 
                ot->ot_table = tab;
3059
 
                ot->ot_row_file = xt_open_file(self, ot->ot_table->tab_row_file->fil_path, xt_row_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_row_file_grow_size);
3060
 
                ot->ot_rec_file = xt_open_file(self, ot->ot_table->tab_rec_file->fil_path, xt_rec_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_DEFAULT, xt_db_data_file_grow_size);
3061
 
#ifdef XT_USE_DIRECT_IO_ON_INDEX
3062
 
                ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK | XT_FS_DIRECT_IO, XT_INDEX_PAGE_SIZE*256);
3063
 
#else
3064
 
                ot->ot_ind_file = xt_open_file(self, ot->ot_table->tab_ind_file->fil_path, xt_ind_file_type(tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE), XT_FS_MISSING_OK, XT_INDEX_PAGE_SIZE*256);
3065
 
#endif
3066
 
        }
3067
 
        catch_(a) {
3068
 
                ;
3069
 
        }
3070
 
        cont_(a);
3071
 
 
3072
 
        if (!ot->ot_table || !ot->ot_row_file || !ot->ot_rec_file)
3073
 
                goto failed;
3074
 
 
3075
 
        if (!(ot->ot_row_rbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
3076
 
                goto failed;
3077
 
        ot->ot_row_rbuf_size = ot->ot_table->tab_dic.dic_rec_size;
3078
 
        if (!(ot->ot_row_wbuffer = (xtWord1 *) xt_malloc_ns(ot->ot_table->tab_dic.dic_rec_size)))
3079
 
                goto failed;
3080
 
        ot->ot_row_wbuf_size = ot->ot_table->tab_dic.dic_rec_size;
3081
 
 
3082
 
        /* Cache this stuff to speed access a bit: */
3083
 
        ot->ot_rec_fixed = ot->ot_table->tab_dic.dic_rec_fixed;
3084
 
        ot->ot_rec_size = ot->ot_table->tab_dic.dic_rec_size;
3085
 
 
3086
 
        return ot;
3087
 
 
3088
 
        failed:
3089
 
        tab_close_table(ot);
3090
 
        return NULL;
3091
 
}
3092
 
 
3093
 
xtPublic XTOpenTablePtr xt_open_table(XTTableHPtr tab)
3094
 
{
3095
 
        return tab_open_table(tab);
3096
 
}
3097
 
 
3098
 
xtPublic void xt_close_table(XTOpenTablePtr ot, xtBool flush, xtBool have_table_lock)
3099
 
{
3100
 
        if (flush) {
3101
 
                if (!xt_flush_record_row(ot, NULL, have_table_lock))
3102
 
                        xt_log_and_clear_exception_ns();
3103
 
 
3104
 
                if (!xt_flush_indices(ot, NULL, have_table_lock, NULL))
3105
 
                        xt_log_and_clear_exception_ns();
3106
 
        }
3107
 
        tab_close_table(ot);
3108
 
}
3109
 
 
3110
 
static int tab_use_table_by_id(XTThreadPtr self, XTTableHPtr *r_tab, XTDatabaseHPtr db, xtTableID tab_id)
3111
 
{
3112
 
        XTTableEntryPtr te_ptr;
3113
 
        XTTableHPtr             tab = NULL;
3114
 
        int                             r = XT_TAB_OK;
3115
 
        char                    path[PATH_MAX];
3116
 
 
3117
 
        if (!db)
3118
 
                xt_throw_xterr(XT_CONTEXT, XT_ERR_NO_DATABASE_IN_USE);
3119
 
        xt_ht_lock(self, db->db_tables);
3120
 
        pushr_(xt_ht_unlock, db->db_tables);
3121
 
 
3122
 
        te_ptr = (XTTableEntryPtr) xt_sl_find(self, db->db_table_by_id, &tab_id);
3123
 
        if (te_ptr) {
3124
 
                if (!(tab = te_ptr->te_table)) {
3125
 
                        /* Open the table: */
3126
 
                        xt_strcpy(PATH_MAX, path, te_ptr->te_tab_path->tp_path);
3127
 
                        xt_add_dir_char(PATH_MAX, path);
3128
 
                        xt_strcat(PATH_MAX, path, te_ptr->te_tab_name);
3129
 
                        r = tab_new_handle(self, &tab, db, tab_id, (XTPathStrPtr) path, TRUE, NULL);
3130
 
                }
3131
 
        }
3132
 
        else
3133
 
                r = XT_TAB_NOT_FOUND;
3134
 
 
3135
 
        if (tab)
3136
 
                xt_heap_reference(self, tab);
3137
 
        *r_tab = tab;
3138
 
 
3139
 
        freer_(); // xt_ht_unlock(db->db_tables)
3140
 
        return r;
3141
 
}
3142
 
 
3143
 
xtPublic XTTableHPtr xt_use_table_by_id(XTThreadPtr self, XTDatabaseHPtr db, xtTableID tab_id, int *result)
3144
 
{
3145
 
        XTTableHPtr tab;
3146
 
        int                     r;
3147
 
 
3148
 
        r = tab_use_table_by_id(self, &tab, db, tab_id);
3149
 
        if (result) {
3150
 
                if (r != XT_TAB_OK) {
3151
 
                        *result = r;
3152
 
                        return NULL;
3153
 
                }
3154
 
        }
3155
 
        else {
3156
 
                switch (r) {
3157
 
                        case XT_TAB_NOT_FOUND:
3158
 
                                return NULL;
3159
 
                        case XT_TAB_NO_DICTIONARY:
3160
 
                                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_NO_DICTIONARY, (u_long) tab_id);
3161
 
                        case XT_TAB_POOL_CLOSED:
3162
 
                                xt_throw_ulxterr(XT_CONTEXT, XT_ERR_TABLE_LOCKED, (u_long) tab_id);
3163
 
                        default:
3164
 
                                break;
3165
 
                }
3166
 
        }
3167
 
        
3168
 
        return tab;
3169
 
}
3170
 
 
3171
 
xtPublic XTTableHPtr xt_use_table_by_id_ns(XTDatabaseHPtr db, xtTableID tab_id)
3172
 
{
3173
 
        XTTableHPtr     tab;
3174
 
        XTThreadPtr     self = xt_get_self();
3175
 
 
3176
 
        try_(a) {
3177
 
                tab = xt_use_table_by_id(self, db, tab_id, NULL);
3178
 
        }
3179
 
        catch_(a) {
3180
 
                tab = NULL;
3181
 
        }
3182
 
        cont_(a);
3183
 
        return tab;
3184
 
}
3185
 
 
3186
 
/* The fixed part of the record is already in the row buffer.
3187
 
 * This function loads the extended part, expanding the row
3188
 
 * buffer if necessary.
3189
 
 */
3190
 
xtPublic xtBool xt_tab_load_ext_data(XTOpenTablePtr ot, xtRecordID load_rec_id, xtWord1 *buffer, u_int cols_req)
3191
 
{
3192
 
        size_t                                  log_size;
3193
 
        xtLogID                                 log_id;
3194
 
        xtLogOffset                             log_offset;
3195
 
        xtWord1                                 save_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
3196
 
        xtBool                                  retried = FALSE;
3197
 
        XTactExtRecEntryDPtr    ext_data_ptr;
3198
 
        size_t                                  log_size2;
3199
 
        xtTableID                               curr_tab_id;
3200
 
        xtRecordID                              curr_rec_id;
3201
 
 
3202
 
        log_size = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->re_log_dat_siz_4);
3203
 
        XT_GET_LOG_REF(log_id, log_offset, (XTTabRecExtDPtr) ot->ot_row_rbuffer);
3204
 
 
3205
 
        if (ot->ot_rec_size + log_size > ot->ot_row_rbuf_size) {
3206
 
                if (!xt_realloc_ns((void **) &ot->ot_row_rbuffer, ot->ot_rec_size + log_size))
3207
 
                        return FAILED;
3208
 
                ot->ot_row_rbuf_size = ot->ot_rec_size + log_size;
3209
 
        }
3210
 
 
3211
 
        /* Read the extended part first: */
3212
 
        ext_data_ptr = (XTactExtRecEntryDPtr) (ot->ot_row_rbuffer + ot->ot_rec_size - offsetof(XTactExtRecEntryDRec, er_data));
3213
 
 
3214
 
        /* Save the data which the header will overwrite: */
3215
 
        memcpy(save_buffer, ext_data_ptr, offsetof(XTactExtRecEntryDRec, er_data));
3216
 
        
3217
 
        reread:
3218
 
        if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
3219
 
                xt_tab_read_ext_record(ot->ot_table, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr);
3220
 
        else {
3221
 
                if (!ot->ot_thread->st_dlog_buf.dlb_read_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + log_size, (xtWord1 *) ext_data_ptr, ot->ot_thread))
3222
 
                        goto retry_read;
3223
 
        }
3224
 
 
3225
 
        log_size2 = XT_GET_DISK_4(ext_data_ptr->er_data_size_4);
3226
 
        curr_tab_id = XT_GET_DISK_4(ext_data_ptr->er_tab_id_4);
3227
 
        curr_rec_id = XT_GET_DISK_4(ext_data_ptr->er_rec_id_4);
3228
 
 
3229
 
        if (log_size2 != log_size || curr_tab_id != ot->ot_table->tab_id || curr_rec_id != load_rec_id) {
3230
 
                /* [(3)] This can happen in the following circumstances:
3231
 
                 * - A new record is created, but the data log is not
3232
 
                 * flushed.
3233
 
                 * - The server quits.
3234
 
                 * - On restart the transaction is rolled back, but the data record
3235
 
                 *   was not written, so later a new record could be written at this
3236
 
                 *   location.
3237
 
                 * - Later the sweeper tries to cleanup this record, and finds
3238
 
                 *   that a different record has been written at this position.
3239
 
                 *
3240
 
                 * NOTE: Index entries can only be written to disk for records
3241
 
                 *       that have been committed to the disk, because uncommitted
3242
 
                 *       records may not exist in order to remove the index entry
3243
 
                 *       on cleanup.
3244
 
                 */
3245
 
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_BAD_EXT_RECORD);
3246
 
                goto retry_read;
3247
 
        }
3248
 
 
3249
 
        /* Restore the saved area: */
3250
 
        memcpy(ext_data_ptr, save_buffer, offsetof(XTactExtRecEntryDRec, er_data));
3251
 
 
3252
 
        if (retried)
3253
 
                xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3254
 
        return myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req);
3255
 
 
3256
 
        retry_read:
3257
 
        if (!retried) {
3258
 
                /* (1) It may be that reading the log fails because the garbage collector
3259
 
                 * has moved the record since we determined the location.
3260
 
                 * We handle this here, by re-reading the data the garbage collector
3261
 
                 * would have updated.
3262
 
                 *
3263
 
                 * (2) It may also happen that a new record is just being updated or
3264
 
                 * inserted. It is possible that the handle part of the record
3265
 
                 * has been written, but not yet the overflow.
3266
 
                 * This means that repeating the read attempt could work.
3267
 
                 *
3268
 
                 * (3) The extended data has been written by another handler and not yet
3269
 
                 * flushed. This should not happen because on committed extended
3270
 
                 * records are read, and all data should be flushed before
3271
 
                 * commit!
3272
 
                 *
3273
 
                 * NOTE: (2) above is not a problem when versioning is working
3274
 
                 * correctly. In this case, we should never try to read the extended
3275
 
                 * part of an uncommitted record (belonging to some other thread/
3276
 
                 * transaction).
3277
 
                 */
3278
 
                XTTabRecExtDRec rec_buf;
3279
 
 
3280
 
                xt_lock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3281
 
                retried = TRUE;
3282
 
 
3283
 
                if (!xt_tab_get_rec_data(ot, load_rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &rec_buf))
3284
 
                        goto failed;
3285
 
 
3286
 
                XT_GET_LOG_REF(log_id, log_offset, &rec_buf);
3287
 
                goto reread;
3288
 
        }
3289
 
 
3290
 
        failed:
3291
 
        if (retried)
3292
 
                xt_unlock_mutex_ns(&ot->ot_table->tab_db->db_co_ext_lock);
3293
 
        return FAILED;
3294
 
}
3295
 
 
3296
 
xtPublic xtBool xt_tab_put_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
3297
 
{
3298
 
        register XTTableHPtr    tab = ot->ot_table;
3299
 
 
3300
 
        ASSERT_NS(rec_id);
3301
 
 
3302
 
        return tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread);
3303
 
}
3304
 
 
3305
 
xtPublic xtBool xt_tab_put_log_op_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer)
3306
 
{
3307
 
        register XTTableHPtr    tab = ot->ot_table;
3308
 
        xtOpSeqNo                               op_seq;
3309
 
 
3310
 
        ASSERT_NS(rec_id);
3311
 
 
3312
 
        if (status == XT_LOG_ENT_REC_MOVED) {
3313
 
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, &op_seq, TRUE, ot->ot_thread))
3314
 
                        return FAILED;
3315
 
        }
3316
 
#ifdef DEBUG
3317
 
        else if (status == XT_LOG_ENT_REC_CLEANED_1) {
3318
 
                ASSERT_NS(0);   // shouldn't be used anymore
3319
 
        }
3320
 
#endif
3321
 
        else {
3322
 
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, &op_seq, TRUE, ot->ot_thread))
3323
 
                        return FAILED;
3324
 
        }
3325
 
 
3326
 
        return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
3327
 
}
3328
 
 
3329
 
xtPublic xtBool xt_tab_put_log_rec_data(XTOpenTablePtr ot, u_int status, xtRecordID free_rec_id, xtRecordID rec_id, size_t size, xtWord1 *buffer, xtOpSeqNo *op_seq)
3330
 
{
3331
 
        register XTTableHPtr    tab = ot->ot_table;
3332
 
 
3333
 
        ASSERT_NS(rec_id);
3334
 
 
3335
 
        if (status == XT_LOG_ENT_REC_MOVED) {
3336
 
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, offsetof(XTTabRecExtDRec, re_log_id_2), size, buffer, op_seq, TRUE, ot->ot_thread))
3337
 
                        return FAILED;
3338
 
        }
3339
 
        else {
3340
 
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, size, buffer, op_seq, TRUE, ot->ot_thread))
3341
 
                        return FAILED;
3342
 
        }
3343
 
 
3344
 
        return xt_xlog_modify_table(tab->tab_id, status, *op_seq, 0, free_rec_id, rec_id, size, buffer, ot->ot_thread);
3345
 
}
3346
 
 
3347
 
xtPublic xtBool xt_tab_get_rec_data(XTOpenTablePtr ot, xtRecordID rec_id, size_t size, xtWord1 *buffer)
3348
 
{
3349
 
        register XTTableHPtr    tab = ot->ot_table;
3350
 
 
3351
 
        ASSERT_NS(rec_id);
3352
 
 
3353
 
        return tab->tab_recs.xt_tc_read(ot->ot_rec_file, rec_id, (size_t) size, buffer, ot->ot_thread);
3354
 
}
3355
 
 
3356
 
/*
3357
 
 * Note: this function grants locks even to transactions that
3358
 
 * are not specifically waiting for this transaction.
3359
 
 * This is required, because all threads waiting for 
3360
 
 * a lock should be considered "equal". In other words,
3361
 
 * they should not have to wait for the "right" transaction
3362
 
 * before they get the lock, or it will turn into a
3363
 
 * race to wait for the correct transaction.
3364
 
 *
3365
 
 * A transaction T1 can end up waiting for the wrong transaction
3366
 
 * T2, because T2 has released the lock, and given it to T3.
3367
 
 * Of course, T1 will wake up soon and realize this, but
3368
 
 * it is a matter of timing.
3369
 
 *
3370
 
 * The main point is that T2 has release the lock because
3371
 
 * it has ended (see {RELEASING-LOCKS} for more details)
3372
 
 * and therefore, there is no danger of it claiming the
3373
 
 * lock again, which can lead to a deadlock if T1 is
3374
 
 * given the lock instead of T3 in the example above.
3375
 
 * Then, if T2 tries to regain the lock before T1
3376
 
 * realizes that it has the lock.
3377
 
 */
3378
 
//static xtBool tab_get_lock_after_wait(XTThreadPtr thread, XTLockWaitPtr lw)
3379
 
//{
3380
 
//      register XTTableHPtr    tab = lw->lw_ot->ot_table;
3381
 
 
3382
 
        /* {ROW-LIST-LOCK}
3383
 
         * I don't believe this lock is required. If it is, please explain why!!
3384
 
         * XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[gl->lw_row_id % XT_ROW_RWLOCKS], thread);
3385
 
         *
3386
 
         * With the old row lock implementation a XT_TAB_ROW_WRITE_LOCK was required because
3387
 
         * the row locking did not have its own locks.
3388
 
         * The new list locking has its own locks. I was using XT_TAB_ROW_READ_LOCK,
3389
 
         * but i don't think this is required.
3390
 
         */
3391
 
//      return tab->tab_locks.xt_set_temp_lock(lw->lw_ot, lw, &lw->lw_thread->st_lock_list);
3392
 
//}
3393
 
 
3394
 
/*
3395
 
 * NOTE: Previously this function did not gain the row lock.
3396
 
 * If this change is a problem, please document why!
3397
 
 * The previously implementation did wait until no lock was on the
3398
 
 * row.
3399
 
 *
3400
 
 * I am thinking that it is simply a good idea to grab the lock,
3401
 
 * instead of waiting for no lock, before the retry. But it could
3402
 
 * result in locking more than required!
3403
 
 */
3404
 
static xtBool tab_wait_for_update(register XTOpenTablePtr ot, xtRowID row_id, xtXactID xn_id, XTThreadPtr thread)
3405
 
{
3406
 
        XTLockWaitRec   lw;
3407
 
        XTXactWaitRec   xw;
3408
 
        xtBool                  ok;
3409
 
                                
3410
 
        xw.xw_xn_id = xn_id;
3411
 
 
3412
 
        lw.lw_thread = thread;
3413
 
        lw.lw_ot = ot;
3414
 
        lw.lw_row_id = row_id;
3415
 
        lw.lw_row_updated = FALSE;
3416
 
 
3417
 
        /* First try to get the lock: */
3418
 
        if (!ot->ot_table->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list))
3419
 
                return FAILED;
3420
 
        if (lw.lw_curr_lock != XT_NO_LOCK)
3421
 
                /* Wait for the lock, then the transaction: */
3422
 
                ok = xt_xn_wait_for_xact(thread, &xw, &lw);
3423
 
        else
3424
 
                /* Just wait for the transaction: */
3425
 
                ok = xt_xn_wait_for_xact(thread, &xw, NULL);
3426
 
        
3427
 
#ifdef DEBUG_LOCK_QUEUE
3428
 
        ot->ot_table->tab_locks.rl_check(&lw);
3429
 
#endif
3430
 
        return ok;
3431
 
}
3432
 
 
3433
 
/* {WAIT-FOR}
3434
 
 * XT_OLD - The record is old. No longer visible because there is
3435
 
 * newer committed record before it in the record list.
3436
 
 * This is a special case of FALSE (the record is not visible).
3437
 
 * (see {WAIT-FOR} for details).
3438
 
 * It is significant because if we find too many of these when
3439
 
 * searching for records, then we have reason to believe the
3440
 
 * sweeper is far behind. This can happen in a test like this:
3441
 
 * runTest(INCREMENT_TEST, 2, INCREMENT_TEST_UPDATE_COUNT);
3442
 
 * What happens is T1 detects an updated row by T2,
3443
 
 * but T2 has not committed yet.
3444
 
 * It waits for T2. T2 commits and updates again before T1
3445
 
 * can update.
3446
 
 *
3447
 
 * Of course if we got a lock on the row when T2 quits, then
3448
 
 * this would not happen!
3449
 
 */
3450
 
 
3451
 
/*
3452
 
 * Is a record visible?
3453
 
 * Returns TRUE, FALSE, XT_ERR.
3454
 
 *
3455
 
 * TRUE - The record is visible.
3456
 
 * FALSE - The record is not visible.
3457
 
 * XT_ERR - An exception (error) occurred.
3458
 
 * XT_NEW - The most recent variation of this row has been returned
3459
 
 * and is to be used instead of the input!
3460
 
 * XT_REREAD - Re-read the record, and try again.
3461
 
 *
3462
 
 * Basically, a record is visible if it was committed on or before
3463
 
 * the transactions "visible time" (st_visible_time), and there
3464
 
 * are no other visible records before this record in the
3465
 
 * variation chain for the record.
3466
 
 *
3467
 
 * This holds in general, but you don't always get to see the
3468
 
 * visible record (as defined in this sence).
3469
 
 *
3470
 
 * On any kind of update (SELECT FOR UPDATE, UPDATE or DELETE), you
3471
 
 * get to see the most recent variation of the row!
3472
 
 *
3473
 
 * So on update, this function will wait if necessary for a recent
3474
 
 * update to be committed.
3475
 
 *
3476
 
 * So an update is a kind of "committed read" with a wait for
3477
 
 * uncommitted records.
3478
 
 *
3479
 
 * The result:
3480
 
 * - INSERTS may not seen by the update read, depending on when
3481
 
 *   they occur.
3482
 
 * - Records may be returned in non-index order.
3483
 
 * - New records returned must be checked again by an index scan
3484
 
 *   to make sure they conform to the condition!
3485
 
 * 
3486
 
 * CREATE TABLE test_tab (ID int primary key, Value int, Name varchar(20), 
3487
 
 * index(Value, Name)) ENGINE=pbxt;
3488
 
 * INSERT test_tab values(4, 2, 'D');
3489
 
 * INSERT test_tab values(5, 2, 'E');
3490
 
 * INSERT test_tab values(6, 2, 'F');
3491
 
 * INSERT test_tab values(7, 2, 'G');
3492
 
 * 
3493
 
 * -- C1
3494
 
 * begin;
3495
 
 * select * from test_tab where id = 6 for update;
3496
 
 * -- C2
3497
 
 * begin;
3498
 
 * select * from test_tab where value = 2 order by value, name for update;
3499
 
 * -- C1
3500
 
 * update test_tab set Name = 'A' where id = 7;
3501
 
 * commit;
3502
 
 * -- C2
3503
 
 * Result order D, E, F, A.
3504
 
 *
3505
 
 * But Jim does it like this, so it should be OK.
3506
 
 */
3507
 
static int tab_visible(register XTOpenTablePtr ot, XTTabRecHeadDPtr rec_head, xtRecordID *new_rec_id)
3508
 
{
3509
 
        XTThreadPtr                             thread = ot->ot_thread;
3510
 
        xtXactID                                xn_id;
3511
 
        XTTabRecHeadDRec                var_head;
3512
 
        xtRowID                                 row_id;
3513
 
        xtRecordID                              var_rec_id;
3514
 
        register XTTableHPtr    tab;
3515
 
        xtBool                                  wait = FALSE;
3516
 
        xtXactID                                wait_xn_id = 0;
3517
 
#ifdef TRACE_VARIATIONS
3518
 
        char                                    t_buf[500];
3519
 
        int                                             len;
3520
 
#endif
3521
 
        int                                             result = TRUE;
3522
 
        xtBool                                  rec_clean;
3523
 
        xtRecordID                              invalid_rec;
3524
 
 
3525
 
        retry:
3526
 
        /* It can be that between the time that I read the index,
3527
 
         * and the time that I try to access the
3528
 
         * record, that the record is removed by
3529
 
         * the sweeper!
3530
 
         */
3531
 
        if (XT_REC_NOT_VALID(rec_head->tr_rec_type_1))
3532
 
                return FALSE;
3533
 
 
3534
 
        row_id = XT_GET_DISK_4(rec_head->tr_row_id_4);
3535
 
 
3536
 
        /* This can happen if the row has been removed, and
3537
 
         * reused:
3538
 
         */
3539
 
        if (ot->ot_curr_row_id && row_id != ot->ot_curr_row_id)
3540
 
                return FALSE;
3541
 
 
3542
 
#ifdef TRACE_VARIATIONS
3543
 
        len = sprintf(t_buf, "row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
3544
 
#endif
3545
 
        if (!(rec_clean = XT_REC_IS_CLEAN(rec_head->tr_rec_type_1))) {
3546
 
                /* The record is not clean, which means it has not been swept.
3547
 
                 * So we have to check if it is visible.
3548
 
                 */
3549
 
                xn_id = XT_GET_DISK_4(rec_head->tr_xact_id_4);
3550
 
                switch (xt_xn_status(ot, xn_id, ot->ot_curr_rec_id)) {
3551
 
                        case XT_XN_VISIBLE:
3552
 
                                break;
3553
 
                        case XT_XN_NOT_VISIBLE:
3554
 
                                if (ot->ot_for_update) {
3555
 
                                        /* It is visible, only if it is an insert,
3556
 
                                         * which means if has no previous variation.
3557
 
                                         * Note, if an insert is updated, the record
3558
 
                                         * should be overwritten (TODO - check this).
3559
 
                                         */
3560
 
                                        var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
3561
 
                                        if (!var_rec_id)
3562
 
                                                break;
3563
 
#ifdef TRACE_VARIATIONS
3564
 
                                        if (len <= 450)
3565
 
                                                len += sprintf(t_buf+len, "OTHER COMMIT (OVERWRITTEN) T%d\n", (int) xn_id);
3566
 
                                        xt_ttracef(thread, "%s", t_buf);
3567
 
#endif
3568
 
                                }
3569
 
#ifdef TRACE_VARIATIONS
3570
 
                                else {
3571
 
                                        if (len <= 450)
3572
 
                                                len += sprintf(t_buf+len, "OTHER COMMIT T%d\n", (int) xn_id);
3573
 
                                        xt_ttracef(thread, "%s", t_buf);
3574
 
                                }
3575
 
#endif
3576
 
                                /* {WAKE-SW}
3577
 
                                 * The record is not visible, although it has been committed.
3578
 
                                 * Clean the transaction ASAP.
3579
 
                                 */
3580
 
                                ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3581
 
                                return FALSE;
3582
 
                        case XT_XN_ABORTED:
3583
 
                                /* {WAKE-SW}
3584
 
                                 * Reading an aborted record, this transaction
3585
 
                                 * must be cleaned up ASAP!
3586
 
                                 */
3587
 
                                ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3588
 
#ifdef TRACE_VARIATIONS
3589
 
                                if (len <= 450)
3590
 
                                        len += sprintf(t_buf+len, "ABORTED T%d\n", (int) xn_id);
3591
 
                                xt_ttracef(thread, "%s", t_buf);
3592
 
#endif
3593
 
                                return FALSE;
3594
 
                        case XT_XN_MY_UPDATE:
3595
 
                                /* This is a record written by this transaction. */
3596
 
                                if (thread->st_is_update) {
3597
 
                                        /* Check that it was not written by the current update statement: */
3598
 
                                        if (XT_STAT_ID_MASK(ot->ot_update_id) == rec_head->tr_stat_id_1) {
3599
 
#ifdef TRACE_VARIATIONS
3600
 
                                                if (len <= 450)
3601
 
                                                        len += sprintf(t_buf+len, "MY UPDATE IN THIS STATEMENT T%d\n", (int) xn_id);
3602
 
                                                xt_ttracef(thread, "%s", t_buf);
3603
 
#endif
3604
 
                                                return FALSE;
3605
 
                                        }
3606
 
                                }
3607
 
                                ot->ot_curr_row_id = row_id;
3608
 
                                ot->ot_curr_updated = TRUE;
3609
 
                                if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3610
 
                                        return XT_ERR;
3611
 
                                /* It is visible if it is at the front of the list.
3612
 
                                 * An update can end up not being at the front of the list
3613
 
                                 * if it is deleted afterwards!
3614
 
                                 */
3615
 
#ifdef TRACE_VARIATIONS
3616
 
                                if (len <= 450) {
3617
 
                                        if (var_rec_id == ot->ot_curr_rec_id)
3618
 
                                                len += sprintf(t_buf+len, "MY UPDATE T%d\n", (int) xn_id);
3619
 
                                        else
3620
 
                                                len += sprintf(t_buf+len, "MY UPDATE (OVERWRITTEN) T%d\n", (int) xn_id);
3621
 
                                }
3622
 
                                xt_ttracef(thread, "%s", t_buf);
3623
 
#endif
3624
 
                                return var_rec_id == ot->ot_curr_rec_id;
3625
 
                        case XT_XN_OTHER_UPDATE:
3626
 
                                if (ot->ot_for_update) {
3627
 
                                        /* If this is an insert, we are interested!
3628
 
                                         * Updated values are handled below. This is because
3629
 
                                         * the changed (new) records returned below are always
3630
 
                                         * followed (in the version chain) by the record
3631
 
                                         * we would have returned (if nothing had changed).
3632
 
                                         *
3633
 
                                         * As a result, we only return records here which have
3634
 
                                         * no "history". 
3635
 
                                         */
3636
 
                                        var_rec_id = XT_GET_DISK_4(rec_head->tr_prev_rec_id_4);
3637
 
                                        if (!var_rec_id) {
3638
 
#ifdef TRACE_VARIATIONS
3639
 
                                                if (len <= 450)
3640
 
                                                        len += sprintf(t_buf+len, "OTHER INSERT (WAIT FOR) T%d\n", (int) xn_id);
3641
 
                                                xt_ttracef(thread, "%s", t_buf);
3642
 
#endif
3643
 
                                                if (!tab_wait_for_update(ot, row_id, xn_id, thread))
3644
 
                                                        return XT_ERR;
3645
 
                                                if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
3646
 
                                                        return XT_ERR;
3647
 
                                                rec_head = &var_head;
3648
 
                                                goto retry;
3649
 
                                        }
3650
 
                                }
3651
 
#ifdef TRACE_VARIATIONS
3652
 
                                if (len <= 450)
3653
 
                                        len += sprintf(t_buf+len, "OTHER UPDATE T%d\n", (int) xn_id);
3654
 
                                xt_ttracef(thread, "%s", t_buf);
3655
 
#endif
3656
 
                                return FALSE;
3657
 
                        case XT_XN_REREAD:
3658
 
#ifdef TRACE_VARIATIONS
3659
 
                                if (len <= 450)
3660
 
                                        len += sprintf(t_buf+len, "REREAD?! T%d\n", (int) xn_id);
3661
 
                                xt_ttracef(thread, "%s", t_buf);
3662
 
#endif
3663
 
                                return XT_REREAD;
3664
 
                }
3665
 
        }
3666
 
 
3667
 
        /* Follow the variation chain until we come to this record.
3668
 
         * If it is not the first visible variation then
3669
 
         * it is not visible at all. If it in not found on the
3670
 
         * variation chain, it is also not visible.
3671
 
         */
3672
 
        tab = ot->ot_table;
3673
 
 
3674
 
        retry_2:
3675
 
 
3676
 
#ifdef XT_USE_LIST_BASED_ROW_LOCKS
3677
 
        /* The list based row locks used there own locks, so
3678
 
         * it is not necessary to get a write lock here.
3679
 
         */
3680
 
        XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3681
 
#else
3682
 
        if (ot->ot_for_update)
3683
 
                XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3684
 
        else
3685
 
                XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3686
 
#endif
3687
 
 
3688
 
        invalid_rec = 0;
3689
 
        retry_3:
3690
 
        if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3691
 
                goto failed;
3692
 
#ifdef TRACE_VARIATIONS
3693
 
        len += sprintf(t_buf+len, "ROW=%d", (int) row_id);
3694
 
#endif
3695
 
        while (var_rec_id != ot->ot_curr_rec_id) {
3696
 
                if (!var_rec_id) {
3697
 
#ifdef TRACE_VARIATIONS
3698
 
                        xt_ttracef(thread, "row=%d rec=%d NOT VISI not found in list\n", (int) row_id, (int) ot->ot_curr_rec_id);
3699
 
#endif
3700
 
                        goto not_found;
3701
 
                }
3702
 
                if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
3703
 
                        goto failed;
3704
 
#ifdef TRACE_VARIATIONS
3705
 
                if (len <= 450)
3706
 
                        len += sprintf(t_buf+len, " -> %d(%d)", (int) var_rec_id, (int) var_head.tr_rec_type_1);
3707
 
#endif
3708
 
                /* All clean records are visible, by all transactions: */
3709
 
                if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1)) {
3710
 
#ifdef TRACE_VARIATIONS
3711
 
                        xt_ttracef(thread, "row=%d rec=%d NOT VISI clean rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3712
 
#endif
3713
 
                        goto not_found;
3714
 
                }
3715
 
                if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) {
3716
 
#ifdef TRACE_VARIATIONS
3717
 
                        xt_ttracef(thread, "row=%d rec=%d NOT VISI free rec found?!\n", (int) row_id, (int) ot->ot_curr_rec_id);
3718
 
#endif
3719
 
                        /*
3720
 
                         * After an analysis we came to conclusion that this situation is
3721
 
                         * possible and valid. It can happen if index scan and row deletion
3722
 
                         * go in parallel:
3723
 
                         *
3724
 
                         *      Client Thread                                Sweeper
3725
 
                         *      -------------                                -------
3726
 
                         *   1. start index scan, lock the index file.
3727
 
                         *                                                2. start row deletion, wait for index lock
3728
 
                         *   3. unlock the index file, start search for 
3729
 
                         *      the valid version of the record
3730
 
                         *                                                4. delete the row, mark record as freed, 
3731
 
                         *                                                   but not yet cleaned by sweeper
3732
 
                         *   5. observe the record being freed
3733
 
                         *
3734
 
                         * after these steps we can get here, if the record was marked as free after
3735
 
                         * the tab_visible was entered by the scanning thread. 
3736
 
                         *
3737
 
                         */
3738
 
                        if (invalid_rec != var_rec_id) {
3739
 
                                /* This was "var_rec_id = invalid_rec", caused an infinite loop (bug #310184!) */
3740
 
                                invalid_rec = var_rec_id;
3741
 
                                goto retry_3;
3742
 
                        }
3743
 
                        /* Assume end of list. */
3744
 
                        goto not_found;
3745
 
                }
3746
 
 
3747
 
                /* This can happen if the row has been removed, and
3748
 
                 * reused:
3749
 
                 */
3750
 
                if (row_id != XT_GET_DISK_4(var_head.tr_row_id_4))
3751
 
                        goto not_found;
3752
 
 
3753
 
                xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
3754
 
                /* This variation is visibleif committed before this
3755
 
                 * transaction started, or updated by this transaction.
3756
 
                 *
3757
 
                 * We now know that this is the valid variation for
3758
 
                 * this record (for this table) for this transaction!
3759
 
                 * This will not change, unless the transaction
3760
 
                 * updates the record (again).
3761
 
                 *
3762
 
                 * So we can store this information as a hint, if
3763
 
                 * we see other variations belonging to this record,
3764
 
                 * then we can ignore them immediately!
3765
 
                 */
3766
 
                switch (xt_xn_status(ot, xn_id, var_rec_id)) {
3767
 
                        case XT_XN_VISIBLE:
3768
 
                                /* {WAKE-SW}
3769
 
                                 * We have encountered a record that has been overwritten, if the
3770
 
                                 * record has not been cleaned, then the sweeper is too far
3771
 
                                 * behind!
3772
 
                                 */
3773
 
                                if (!rec_clean)
3774
 
                                        ot->ot_table->tab_db->db_sw_faster |= XT_SW_DIRTY_RECORD_FOUND;
3775
 
#ifdef TRACE_VARIATIONS
3776
 
                                xt_ttracef(thread, "row=%d rec=%d NOT VISI committed rec found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3777
 
#endif
3778
 
                                goto not_found;
3779
 
                        case XT_XN_NOT_VISIBLE:
3780
 
                                if (ot->ot_for_update) {
3781
 
                                        /* Substitute this record for the one we
3782
 
                                         * are reading!!
3783
 
                                         */
3784
 
                                        if (result == TRUE) {
3785
 
                                                if (XT_REC_IS_DELETE(var_head.tr_rec_type_1))
3786
 
                                                        result = FALSE;
3787
 
                                                else {
3788
 
                                                        *new_rec_id = var_rec_id;
3789
 
                                                        result = XT_NEW;
3790
 
                                                }
3791
 
                                        }
3792
 
                                }
3793
 
                                break;
3794
 
                        case XT_XN_ABORTED:
3795
 
                                /* Ignore the record, it will be removed. */
3796
 
                                break;
3797
 
                        case XT_XN_MY_UPDATE:
3798
 
#ifdef TRACE_VARIATIONS
3799
 
                                xt_ttracef(thread, "row=%d rec=%d NOT VISI my update found\n", (int) row_id, (int) ot->ot_curr_rec_id);
3800
 
#endif
3801
 
                                goto not_found;
3802
 
                        case XT_XN_OTHER_UPDATE:
3803
 
                                /* Wait for this update to commit or abort: */
3804
 
                                if (!wait) {
3805
 
                                        wait = TRUE;
3806
 
                                        wait_xn_id = xn_id;
3807
 
                                }
3808
 
#ifdef TRACE_VARIATIONS
3809
 
                                if (len <= 450)
3810
 
                                        len += sprintf(t_buf+len, "-T%d", (int) wait_xn_id);
3811
 
#endif
3812
 
                                break;
3813
 
                        case XT_XN_REREAD:
3814
 
                                /* {RETRY-READ}
3815
 
                                 * TODO: This is not as "correct" as it could be.
3816
 
                                 * Such records should be considered to be aborted,
3817
 
                                 * and removed from the list.
3818
 
                                 */
3819
 
                                if (invalid_rec != var_rec_id) {
3820
 
                                        invalid_rec = var_rec_id;
3821
 
                                        goto retry_3;
3822
 
                                }
3823
 
                                if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 1))
3824
 
                                        goto failed;
3825
 
 
3826
 
                                /* Assume end of list. */
3827
 
#ifdef XT_CRASH_DEBUG
3828
 
                                /* Should not happen! */
3829
 
                                xt_crash_me();
3830
 
#endif
3831
 
                                goto not_found;
3832
 
                }
3833
 
                var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
3834
 
        }
3835
 
#ifdef TRACE_VARIATIONS
3836
 
        if (len <= 450)
3837
 
                sprintf(t_buf+len, " -> %d(%d)\n", (int) var_rec_id, (int) rec_head->tr_rec_type_1);
3838
 
        else
3839
 
                sprintf(t_buf+len, " ...\n");
3840
 
        //xt_ttracef(thread, "%s", t_buf);
3841
 
#endif
3842
 
 
3843
 
        if (ot->ot_for_update) {
3844
 
                xtBool                  ok;
3845
 
                XTLockWaitRec   lw;
3846
 
 
3847
 
                if (wait) {
3848
 
                        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3849
 
#ifdef TRACE_VARIATIONS
3850
 
                        xt_ttracef(thread, "T%d WAIT FOR T%d (will retry)\n", (int) thread->st_xact_data->xd_start_xn_id, (int) wait_xn_id);
3851
 
#endif
3852
 
                        if (!tab_wait_for_update(ot, row_id, wait_xn_id, thread))
3853
 
                                return XT_ERR;
3854
 
                        wait = FALSE;
3855
 
                        wait_xn_id = 0;
3856
 
                        /*
3857
 
                         * Retry in order to try to avoid missing
3858
 
                         * any records that we should see in FOR UPDATE
3859
 
                         * mode.
3860
 
                         *
3861
 
                         * We also want to take another look at the record
3862
 
                         * we just tried to read.
3863
 
                         *
3864
 
                         * If it has been updated, then a new record has
3865
 
                         * been created. This will be detected when we
3866
 
                         * try to read it again, and XT_NEW will be returned.
3867
 
                         */
3868
 
                        thread->st_statistics.st_retry_index_scan++;
3869
 
                        return XT_RETRY;
3870
 
                }
3871
 
 
3872
 
                /* {ROW-LIST-LOCK} */
3873
 
                lw.lw_thread = thread;
3874
 
                lw.lw_ot = ot;
3875
 
                lw.lw_row_id = row_id;
3876
 
                lw.lw_row_updated = FALSE;
3877
 
                ok = tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list);
3878
 
                XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3879
 
                if (!ok) {
3880
 
#ifdef DEBUG_LOCK_QUEUE
3881
 
                        ot->ot_table->tab_locks.rl_check(&lw);
3882
 
#endif
3883
 
                        return XT_ERR;
3884
 
                }
3885
 
                if (lw.lw_curr_lock != XT_NO_LOCK) {
3886
 
#ifdef TRACE_VARIATIONS
3887
 
                        xt_ttracef(thread, "T%d WAIT FOR LOCK(%s) T%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) lw.lw_curr_lock == XT_TEMP_LOCK ? "temp" : "perm", (int) xn_id);
3888
 
#endif
3889
 
                        if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
3890
 
#ifdef DEBUG_LOCK_QUEUE
3891
 
                                ot->ot_table->tab_locks.rl_check(&lw);
3892
 
#endif
3893
 
                                return XT_ERR;
3894
 
                        }
3895
 
#ifdef DEBUG_LOCK_QUEUE
3896
 
                        ot->ot_table->tab_locks.rl_check(&lw);
3897
 
#endif
3898
 
#ifdef TRACE_VARIATIONS
3899
 
                        len = sprintf(t_buf, "(retry): row=%d rec=%d ", (int) row_id, (int) ot->ot_curr_rec_id);
3900
 
#endif
3901
 
                        /* GOTCHA!
3902
 
                         * Reset the result before we go down the list again, to make sure we
3903
 
                         * get the latest record!!
3904
 
                         */
3905
 
                        result = TRUE;
3906
 
                        thread->st_statistics.st_reread_record_list++;
3907
 
                        goto retry_2;
3908
 
                }
3909
 
#ifdef DEBUG_LOCK_QUEUE
3910
 
                ot->ot_table->tab_locks.rl_check(&lw);
3911
 
#endif
3912
 
        }
3913
 
        else {
3914
 
                XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3915
 
        }
3916
 
 
3917
 
#ifdef TRACE_VARIATIONS
3918
 
        if (result == XT_NEW)
3919
 
                xt_ttracef(thread, "row=%d rec=%d RETURN NEW %d\n", (int) row_id, (int) ot->ot_curr_rec_id, (int) *new_rec_id);
3920
 
        else if (result)
3921
 
                xt_ttracef(thread, "row=%d rec=%d VISIBLE\n", (int) row_id, (int) ot->ot_curr_rec_id);
3922
 
        else
3923
 
                xt_ttracef(thread, "row=%d rec=%d RETURN NOT VISIBLE (NEW)\n", (int) row_id, (int) ot->ot_curr_rec_id);
3924
 
#endif
3925
 
 
3926
 
        ot->ot_curr_row_id = row_id;
3927
 
        ot->ot_curr_updated = FALSE;
3928
 
        return result;
3929
 
 
3930
 
        not_found:
3931
 
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3932
 
        return FALSE;
3933
 
 
3934
 
        failed:
3935
 
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
3936
 
        return XT_ERR;
3937
 
}
3938
 
 
3939
 
/*
3940
 
 * Return TRUE if the record has been read, and is visible.
3941
 
 * Return FALSE if the record is not visible.
3942
 
 * Return XT_ERR if an error occurs.
3943
 
 */
3944
 
xtPublic int xt_tab_visible(XTOpenTablePtr ot)
3945
 
{
3946
 
        xtRowID                         row_id;
3947
 
        XTTabRecHeadDRec        rec_head;
3948
 
        xtRecordID                      new_rec_id;
3949
 
        xtBool                          read_again = FALSE;
3950
 
        int                                     r;
3951
 
 
3952
 
        if ((row_id = ot->ot_curr_row_id)) {
3953
 
                /* Fast track, do a quick check.
3954
 
                 * Row ID is only set if this record has been committed,
3955
 
                 * (and swept).
3956
 
                 * Check if it is the first on the list!
3957
 
                 */
3958
 
                xtRecordID var_rec_id;
3959
 
 
3960
 
                retry:
3961
 
                if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
3962
 
                        return XT_ERR;
3963
 
                if (ot->ot_curr_rec_id == var_rec_id) {
3964
 
                        /* Looks good.. */
3965
 
                        if (ot->ot_for_update) {
3966
 
                                XTThreadPtr             thread = ot->ot_thread;
3967
 
                                XTTableHPtr             tab = ot->ot_table;
3968
 
                                XTLockWaitRec   lw;
3969
 
 
3970
 
                                /* {ROW-LIST-LOCK} */
3971
 
                                lw.lw_thread = thread;
3972
 
                                lw.lw_ot = ot;
3973
 
                                lw.lw_row_id = row_id;
3974
 
                                lw.lw_row_updated = FALSE;
3975
 
                                if (!tab->tab_locks.xt_set_temp_lock(ot, &lw, &thread->st_lock_list)) {
3976
 
#ifdef DEBUG_LOCK_QUEUE
3977
 
                                        ot->ot_table->tab_locks.rl_check(&lw);
3978
 
#endif
3979
 
                                        return XT_ERR;
3980
 
                                }
3981
 
                                if (lw.lw_curr_lock != XT_NO_LOCK) {
3982
 
                                        if (!xt_xn_wait_for_xact(thread, NULL, &lw)) {
3983
 
#ifdef DEBUG_LOCK_QUEUE
3984
 
                                                ot->ot_table->tab_locks.rl_check(&lw);
3985
 
#endif
3986
 
                                                return XT_ERR;
3987
 
                                        }
3988
 
#ifdef DEBUG_LOCK_QUEUE
3989
 
                                        ot->ot_table->tab_locks.rl_check(&lw);
3990
 
#endif
3991
 
                                        goto retry;
3992
 
                                }
3993
 
#ifdef DEBUG_LOCK_QUEUE
3994
 
                                ot->ot_table->tab_locks.rl_check(&lw);
3995
 
#endif
3996
 
                        }
3997
 
                        return TRUE;
3998
 
                }
3999
 
        }
4000
 
 
4001
 
        reread:
4002
 
        if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
4003
 
                return XT_ERR;
4004
 
 
4005
 
        switch ((r = tab_visible(ot, &rec_head, &new_rec_id))) {
4006
 
                case XT_NEW:
4007
 
                        ot->ot_curr_rec_id = new_rec_id;
4008
 
                        break;
4009
 
                case XT_REREAD:
4010
 
                        /* Avoid infinite loop: */
4011
 
                        if (read_again) {
4012
 
                                /* Should not happen! */
4013
 
                                if (!tab_record_corrupt(ot, row_id, ot->ot_curr_rec_id, true, 2))
4014
 
                                        return XT_ERR;
4015
 
#ifdef XT_CRASH_DEBUG
4016
 
                                /* Generate a core dump! */
4017
 
                                xt_crash_me();
4018
 
#endif
4019
 
                                return FALSE;
4020
 
                        }
4021
 
                        read_again = TRUE;
4022
 
                        goto reread;
4023
 
                default:
4024
 
                        break;
4025
 
        }
4026
 
        return r;
4027
 
}
4028
 
 
4029
 
/*
4030
 
 * Read a record, and return one of the following:
4031
 
 * TRUE - the record has been read, and is visible.
4032
 
 * FALSE - the record is not visible.
4033
 
 * XT_ERR - an error occurs.
4034
 
 * XT_NEW - Means the expected record has been changed.
4035
 
 * When doing an index scan, the conditions must be checked again!
4036
 
 */
4037
 
xtPublic int xt_tab_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
4038
 
{
4039
 
        register XTTableHPtr    tab = ot->ot_table;
4040
 
        size_t                                  rec_size = tab->tab_dic.dic_rec_size;
4041
 
        xtRecordID                              new_rec_id;
4042
 
        int                                             result;
4043
 
        xtBool                                  read_again = FALSE;
4044
 
 
4045
 
        if (!(ot->ot_thread->st_xact_data)) {
4046
 
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
4047
 
                return XT_ERR;
4048
 
        }
4049
 
 
4050
 
        reread:
4051
 
        if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
4052
 
                return XT_ERR;
4053
 
 
4054
 
        switch (tab_visible(ot, (XTTabRecHeadDPtr) ot->ot_row_rbuffer, &new_rec_id)) {
4055
 
                case FALSE:
4056
 
                        return FALSE;
4057
 
                case XT_ERR:
4058
 
                        return XT_ERR;
4059
 
                case XT_NEW:
4060
 
                        if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
4061
 
                                return XT_ERR;
4062
 
                        ot->ot_curr_rec_id = new_rec_id;
4063
 
                        result = XT_NEW;
4064
 
                        break;
4065
 
                case XT_RETRY:
4066
 
                        return XT_RETRY;
4067
 
                case XT_REREAD:
4068
 
                        /* Avoid infinite loop: */
4069
 
                        if (read_again) {
4070
 
                                /* Should not happen! */
4071
 
                                if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4), ot->ot_curr_rec_id, true, 3))
4072
 
                                        return XT_ERR;
4073
 
#ifdef XT_CRASH_DEBUG
4074
 
                                /* Generate a core dump! */
4075
 
                                xt_crash_me();
4076
 
#endif
4077
 
                                return FALSE;
4078
 
                        }
4079
 
                        read_again = TRUE;
4080
 
                        goto reread;
4081
 
                default:
4082
 
                        result = OK;
4083
 
                        break;
4084
 
        }
4085
 
 
4086
 
        if (ot->ot_rec_fixed)
4087
 
                memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
4088
 
        else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
4089
 
                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
4090
 
                        return XT_ERR;
4091
 
        }
4092
 
        else {
4093
 
                u_int cols_req = ot->ot_cols_req;
4094
 
 
4095
 
                ASSERT_NS(cols_req);
4096
 
                if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
4097
 
                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
4098
 
                                return XT_ERR;
4099
 
                }
4100
 
                else {
4101
 
                        if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
4102
 
                                return XT_ERR;
4103
 
                }
4104
 
        }
4105
 
 
4106
 
        return result;
4107
 
}
4108
 
 
4109
 
/*
4110
 
 * Returns:
4111
 
 *
4112
 
 * TRUE/OK - record was read.
4113
 
 * FALSE/FAILED - An error occurred.
4114
 
 */
4115
 
xtPublic int xt_tab_dirty_read_record(register XTOpenTablePtr ot, xtWord1 *buffer)
4116
 
{
4117
 
        register XTTableHPtr    tab = ot->ot_table;
4118
 
        size_t                                  rec_size = tab->tab_dic.dic_rec_size;
4119
 
 
4120
 
        if (!xt_tab_get_rec_data(ot, ot->ot_curr_rec_id, rec_size, ot->ot_row_rbuffer))
4121
 
                return FAILED;
4122
 
 
4123
 
        if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
4124
 
                /* Should not happen! */
4125
 
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_DELETED);
4126
 
                return FAILED;
4127
 
        }
4128
 
 
4129
 
        ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
4130
 
        ot->ot_curr_updated =
4131
 
                (XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
4132
 
 
4133
 
        if (ot->ot_rec_fixed)
4134
 
                memcpy(buffer, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
4135
 
        else if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
4136
 
                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
4137
 
                        return FAILED;
4138
 
        }
4139
 
        else {
4140
 
                u_int cols_req = ot->ot_cols_req;
4141
 
 
4142
 
                ASSERT_NS(cols_req);
4143
 
                if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
4144
 
                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
4145
 
                                return FAILED;
4146
 
                }
4147
 
                else {
4148
 
                        if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
4149
 
                                return FAILED;
4150
 
                }
4151
 
        }
4152
 
 
4153
 
        return OK;
4154
 
}
4155
 
 
4156
 
#ifdef XT_USE_ROW_REC_MMAP_FILES
4157
 
/* Loading into cache is not required,
4158
 
 * Instead we copy the memory map to load the
4159
 
 * data.
4160
 
 */
4161
 
#define TAB_ROW_LOAD_CACHE              FALSE
4162
 
#else
4163
 
#define TAB_ROW_LOAD_CACHE              TRUE
4164
 
#endif
4165
 
 
4166
 
/*
4167
 
 * Pull the entire row pointer file into memory.
4168
 
 */
4169
 
xtPublic void xt_tab_load_row_pointers(XTThreadPtr self, XTOpenTablePtr ot)
4170
 
{
4171
 
        XTTableHPtr     tab = ot->ot_table;
4172
 
        xtRecordID      eof_rec_id = tab->tab_row_eof_id;
4173
 
        xtInt8          usage;
4174
 
        xtWord1         *buffer = NULL;
4175
 
 
4176
 
        /* Check if there is enough cache: */
4177
 
        usage = xt_tc_get_usage();
4178
 
        if (xt_tc_get_high() > usage)
4179
 
                usage = xt_tc_get_high();
4180
 
        if (usage + ((xtInt8) eof_rec_id * (xtInt8) tab->tab_rows.tci_rec_size) < xt_tc_get_size()) {
4181
 
                xtRecordID                      rec_id;
4182
 
                size_t                          poffset, tfer;
4183
 
                off_t                           offset, end_offset;
4184
 
                XTTabCachePagePtr       page;
4185
 
                
4186
 
                end_offset = xt_row_id_to_row_offset(tab, eof_rec_id);
4187
 
                rec_id = 1;
4188
 
                while (rec_id < eof_rec_id) {
4189
 
                        if (!tab->tab_rows.xt_tc_get_page(ot->ot_row_file, rec_id, TAB_ROW_LOAD_CACHE, &page, &poffset, self))
4190
 
                                xt_throw(self);
4191
 
                        if (page)
4192
 
                                tab->tab_rows.xt_tc_release_page(ot->ot_row_file, page, self);
4193
 
                        else {
4194
 
                                xtWord1 *buff_ptr;
4195
 
 
4196
 
                                if (!buffer)
4197
 
                                        buffer = (xtWord1 *) xt_malloc(self, tab->tab_rows.tci_page_size);
4198
 
                                offset = xt_row_id_to_row_offset(tab, rec_id);
4199
 
                                tfer = tab->tab_rows.tci_page_size;
4200
 
                                if (offset + (off_t) tfer > end_offset)
4201
 
                                        tfer = (size_t) (end_offset - offset);
4202
 
                                XT_LOCK_MEMORY_PTR(buff_ptr, ot->ot_row_file, offset, tfer, &self->st_statistics.st_rec, self);
4203
 
                                if (buff_ptr) {
4204
 
                                        memcpy(buffer, buff_ptr, tfer);
4205
 
                                        XT_UNLOCK_MEMORY_PTR(ot->ot_row_file, buff_ptr, self);
4206
 
                                }
4207
 
                        }
4208
 
                        rec_id += tab->tab_rows.tci_rows_per_page;
4209
 
                }
4210
 
                if (buffer)
4211
 
                        xt_free(self, buffer);
4212
 
        }
4213
 
}
4214
 
 
4215
 
xtPublic void xt_tab_load_table(XTThreadPtr self, XTOpenTablePtr ot)
4216
 
{
4217
 
        xt_load_pages(self, ot);
4218
 
        xt_load_indices(self, ot);
4219
 
}
4220
 
 
4221
 
xtPublic xtBool xt_tab_load_record(register XTOpenTablePtr ot, xtRecordID rec_id, XTInfoBufferPtr rec_buf)
4222
 
{
4223
 
        register XTTableHPtr    tab = ot->ot_table;
4224
 
        size_t                                  rec_size = tab->tab_dic.dic_rec_size;
4225
 
 
4226
 
        if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
4227
 
                return FAILED;
4228
 
 
4229
 
        if (XT_REC_NOT_VALID(ot->ot_row_rbuffer[0])) {
4230
 
                /* Should not happen! */
4231
 
                XTThreadPtr self = ot->ot_thread;
4232
 
 
4233
 
                xt_log(XT_WARNING, "Recently updated record invalid\n");
4234
 
                return OK;
4235
 
        }
4236
 
 
4237
 
        ot->ot_curr_row_id = XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_row_id_4);
4238
 
        ot->ot_curr_updated =
4239
 
                (XT_GET_DISK_4(((XTTabRecHeadDPtr) ot->ot_row_rbuffer)->tr_xact_id_4) == ot->ot_thread->st_xact_data->xd_start_xn_id);
4240
 
 
4241
 
        if (ot->ot_rec_fixed) {
4242
 
                size_t size = rec_size - XT_REC_FIX_HEADER_SIZE;
4243
 
                if (!xt_ib_alloc(NULL, rec_buf, size))
4244
 
                        return FAILED;
4245
 
                memcpy(rec_buf->ib_db.db_data, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, size);
4246
 
        }
4247
 
        else {
4248
 
                if (!xt_ib_alloc(NULL, rec_buf, tab->tab_dic.dic_mysql_buf_size))
4249
 
                        return FAILED;
4250
 
                if (ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VARIABLE || ot->ot_row_rbuffer[0] == XT_TAB_STATUS_VAR_CLEAN) {
4251
 
                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_buf->ib_db.db_data, ot->ot_cols_req))
4252
 
                                return FAILED;
4253
 
                }
4254
 
                else {
4255
 
                        u_int cols_req = ot->ot_cols_req;
4256
 
 
4257
 
                        ASSERT_NS(cols_req);
4258
 
                        if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
4259
 
                                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_buf->ib_db.db_data, cols_req))
4260
 
                                        return FAILED;
4261
 
                        }
4262
 
                        else {
4263
 
                                if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, rec_buf->ib_db.db_data, cols_req))
4264
 
                                        return FAILED;
4265
 
                        }
4266
 
                }
4267
 
        }
4268
 
 
4269
 
        return OK;
4270
 
}
4271
 
 
4272
 
xtPublic xtBool xt_tab_free_row(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
4273
 
{
4274
 
        XTTabRowRefDRec free_row;
4275
 
        xtRowID                 prev_row;
4276
 
        xtOpSeqNo               op_seq;
4277
 
 
4278
 
        ASSERT_NS(row_id); // Cannot free the header!
4279
 
 
4280
 
        xt_lock_mutex_ns(&tab->tab_row_lock);
4281
 
        prev_row = tab->tab_row_free_id;
4282
 
        XT_SET_DISK_4(free_row.rr_ref_id_4, prev_row);
4283
 
        if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, &op_seq, TRUE, ot->ot_thread)) {
4284
 
                xt_unlock_mutex_ns(&tab->tab_row_lock);
4285
 
                return FAILED;
4286
 
        }
4287
 
        tab->tab_row_free_id = row_id;
4288
 
        tab->tab_row_fnum++;
4289
 
        ASSERT_NS(tab->tab_row_fnum < tab->tab_row_eof_id);
4290
 
        xt_unlock_mutex_ns(&tab->tab_row_lock);
4291
 
 
4292
 
        if (!xt_xlog_modify_table(tab->tab_id, XT_LOG_ENT_ROW_FREED, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &free_row, ot->ot_thread))
4293
 
                return FAILED;
4294
 
 
4295
 
        return OK;
4296
 
}
4297
 
 
4298
 
static void tab_free_ext_record_on_fail(XTOpenTablePtr ot, xtRecordID rec_id, XTTabRecExtDPtr ext_rec, xtBool log_err)
4299
 
{
4300
 
        xtWord4         log_over_size = XT_GET_DISK_4(ext_rec->re_log_dat_siz_4);
4301
 
        xtLogID         log_id;
4302
 
        xtLogOffset     log_offset;
4303
 
 
4304
 
        XT_GET_LOG_REF(log_id, log_offset, ext_rec);
4305
 
 
4306
 
        if (ot->ot_table->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
4307
 
                xt_tab_free_ext_slot(ot->ot_table, log_id, log_offset, log_over_size);
4308
 
        else {
4309
 
                if (!ot->ot_thread->st_dlog_buf.dlb_delete_log(log_id, log_offset, log_over_size, ot->ot_table->tab_id, rec_id, ot->ot_thread)) {
4310
 
                        if (log_err)
4311
 
                                xt_log_and_clear_exception_ns();
4312
 
                }
4313
 
        }
4314
 
}
4315
 
 
4316
 
static void tab_save_exception(XTExceptionPtr e)
4317
 
{
4318
 
        XTThreadPtr self = xt_get_self();
4319
 
 
4320
 
        *e = self->t_exception;
4321
 
}
4322
 
 
4323
 
static void tab_restore_exception(XTExceptionPtr e)
4324
 
{
4325
 
        XTThreadPtr self = xt_get_self();
4326
 
 
4327
 
        self->t_exception = *e;
4328
 
}
4329
 
 
4330
 
/*
4331
 
 * This function assumes that a record may be partially written.
4332
 
 * It removes all associated data and references to the record.
4333
 
 *
4334
 
 * This function return XT_ERR if an error occurs.
4335
 
 * TRUE if the record has been removed, and may be freed.
4336
 
 * FALSE if the record has already been freed. 
4337
 
 *
4338
 
 */
4339
 
xtPublic int xt_tab_remove_record(XTOpenTablePtr ot, xtRecordID rec_id, xtWord1 *rec_data, xtRecordID *prev_var_id, xtBool clean_delete, xtRowID row_id, xtXactID XT_UNUSED(xn_id))
4340
 
{
4341
 
        register XTTableHPtr    tab = ot->ot_table;
4342
 
        size_t                                  rec_size;
4343
 
        xtWord1                                 old_rec_type;
4344
 
        u_int                                   cols_req;
4345
 
        u_int                                   cols_in_buffer;
4346
 
 
4347
 
        *prev_var_id = 0;
4348
 
 
4349
 
        if (!rec_id)
4350
 
                return FALSE;
4351
 
 
4352
 
        /*
4353
 
         * NOTE: This function uses the read buffer. This should be OK because
4354
 
         * the function is only called by the sweeper. The read buffer
4355
 
         * is REQUIRED because of the call to xt_tab_load_ext_data()!!!
4356
 
         */
4357
 
        rec_size = tab->tab_dic.dic_rec_size;
4358
 
        if (!xt_tab_get_rec_data(ot, rec_id, rec_size, ot->ot_row_rbuffer))
4359
 
                return XT_ERR;
4360
 
        old_rec_type = ot->ot_row_rbuffer[0];
4361
 
 
4362
 
        /* Check of the record has not already been freed: */
4363
 
        if (XT_REC_IS_FREE(old_rec_type))
4364
 
                return FALSE;
4365
 
 
4366
 
        /* This record must belong to the given row: */
4367
 
        if (XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_row_id_4) != row_id)
4368
 
                return FALSE;
4369
 
 
4370
 
        /* The transaction ID of the record must be BEFORE or equal to the given
4371
 
         * transaction ID.
4372
 
         *
4373
 
         * No, this does not always hold. Because we wait for updates now,
4374
 
         * a "younger" transaction can update before an older
4375
 
         * transaction.
4376
 
         * Commit order determined the actual order in which the transactions
4377
 
         * should be replicated. This is determined by the log number of
4378
 
         * the commit record!
4379
 
        if (db->db_xn_curr_id(xn_id, XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_xact_id_4)))
4380
 
                return FALSE;
4381
 
         */
4382
 
 
4383
 
        *prev_var_id = XT_GET_DISK_4(((XTTabRecExtDPtr) ot->ot_row_rbuffer)->tr_prev_rec_id_4);
4384
 
 
4385
 
        if (tab->tab_dic.dic_key_count) {
4386
 
                XTIndexPtr      *ind;
4387
 
 
4388
 
                switch (old_rec_type) {
4389
 
                        case XT_TAB_STATUS_DELETE:
4390
 
                        case XT_TAB_STATUS_DEL_CLEAN:
4391
 
                                rec_size = sizeof(XTTabRecHeadDRec);
4392
 
                                goto set_removed;
4393
 
                        case XT_TAB_STATUS_FIXED:
4394
 
                        case XT_TAB_STATUS_FIX_CLEAN:
4395
 
                                /* We know that for a fixed length record, 
4396
 
                                 * dic_ind_rec_len <= dic_rec_size! */
4397
 
                                rec_size = (size_t) tab->tab_dic.dic_ind_rec_len + XT_REC_FIX_HEADER_SIZE;
4398
 
                                rec_data = ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE;
4399
 
                                break;
4400
 
                        case XT_TAB_STATUS_VARIABLE:
4401
 
                        case XT_TAB_STATUS_VAR_CLEAN:
4402
 
                                cols_req = tab->tab_dic.dic_ind_cols_req;
4403
 
 
4404
 
                                cols_in_buffer = cols_req;
4405
 
                                rec_size = myxt_load_row_length(ot, rec_size - XT_REC_FIX_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, &cols_in_buffer);
4406
 
                                if (cols_in_buffer < cols_req)
4407
 
                                        rec_size = tab->tab_dic.dic_rec_size;
4408
 
                                else 
4409
 
                                        rec_size += XT_REC_FIX_HEADER_SIZE;
4410
 
                                if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_FIX_HEADER_SIZE, rec_data, cols_req)) {
4411
 
                                        xt_log_and_clear_exception_ns();
4412
 
                                        goto set_removed;
4413
 
                                }
4414
 
                                break;
4415
 
                        case XT_TAB_STATUS_EXT_DLOG:
4416
 
                        case XT_TAB_STATUS_EXT_CLEAN:
4417
 
                                cols_req = tab->tab_dic.dic_ind_cols_req;
4418
 
 
4419
 
                                ASSERT_NS(cols_req);
4420
 
                                cols_in_buffer = cols_req;
4421
 
                                rec_size = myxt_load_row_length(ot, rec_size - XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, &cols_in_buffer);
4422
 
                                if (cols_in_buffer < cols_req) {
4423
 
                                        rec_size = tab->tab_dic.dic_rec_size;
4424
 
                                        if (!xt_tab_load_ext_data(ot, rec_id, rec_data, cols_req)) {
4425
 
                                                /* This is actually quite possible after recovery, see [(3)] */
4426
 
                                                if (ot->ot_thread->t_exception.e_xt_err != XT_ERR_BAD_EXT_RECORD &&
4427
 
                                                        ot->ot_thread->t_exception.e_xt_err != XT_ERR_DATA_LOG_NOT_FOUND)
4428
 
                                                        xt_log_and_clear_exception_ns();
4429
 
                                                goto set_removed;
4430
 
                                        }
4431
 
                                }
4432
 
                                else {
4433
 
                                        /* All the records we require are in the buffer... */
4434
 
                                        rec_size += XT_REC_EXT_HEADER_SIZE;
4435
 
                                        if (!myxt_load_row(ot, ot->ot_row_rbuffer + XT_REC_EXT_HEADER_SIZE, rec_data, cols_req)) {
4436
 
                                                xt_log_and_clear_exception_ns();
4437
 
                                                goto set_removed;
4438
 
                                        }
4439
 
                                }
4440
 
                                break;
4441
 
                        default:
4442
 
                                break;
4443
 
                }
4444
 
 
4445
 
                /* Could this be the case?: This change may only be flushed after the
4446
 
                 * operation below has been flushed to the log.
4447
 
                 *
4448
 
                 * No, remove records are never "undone". The sweeper will delete
4449
 
                 * the record again if it does not land in the log.
4450
 
                 *
4451
 
                 * The fact that the index entries have already been removed is not
4452
 
                 * a problem.
4453
 
                 */
4454
 
                if (!tab->tab_dic.dic_disable_index) {
4455
 
                        ind = tab->tab_dic.dic_keys;
4456
 
                        for (u_int i=0; i<tab->tab_dic.dic_key_count; i++, ind++) {
4457
 
                                if (!xt_idx_delete(ot, *ind, rec_id, rec_data))
4458
 
                                        xt_log_and_clear_exception_ns();
4459
 
                        }
4460
 
                }
4461
 
        }
4462
 
        else {
4463
 
                /* No indices: */
4464
 
                switch (old_rec_type) {
4465
 
                        case XT_TAB_STATUS_DELETE:
4466
 
                        case XT_TAB_STATUS_DEL_CLEAN:
4467
 
                                rec_size = XT_REC_FIX_HEADER_SIZE;
4468
 
                                break;
4469
 
                        case XT_TAB_STATUS_FIXED:
4470
 
                        case XT_TAB_STATUS_FIX_CLEAN:
4471
 
                        case XT_TAB_STATUS_VARIABLE:
4472
 
                        case XT_TAB_STATUS_VAR_CLEAN:
4473
 
                                rec_size = XT_REC_FIX_HEADER_SIZE;
4474
 
                                break;
4475
 
                        case XT_TAB_STATUS_EXT_DLOG:
4476
 
                        case XT_TAB_STATUS_EXT_CLEAN:
4477
 
                                rec_size = XT_REC_EXT_HEADER_SIZE;
4478
 
                                break;
4479
 
                }
4480
 
        }
4481
 
 
4482
 
        set_removed:
4483
 
        if (XT_REC_IS_EXT_DLOG(old_rec_type)) {
4484
 
                /* {LOCK-EXT-REC} Lock, and read again to make sure that the
4485
 
                 * compactor does not change this record, while
4486
 
                 * we are removing it! */
4487
 
                xt_lock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4488
 
                if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, ot->ot_row_rbuffer)) {
4489
 
                        xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4490
 
                        return XT_ERR;
4491
 
                }
4492
 
                xt_unlock_mutex_ns(&tab->tab_db->db_co_ext_lock);
4493
 
 
4494
 
        }
4495
 
 
4496
 
 
4497
 
        /* A record is "clean" deleted if the record was
4498
 
         * XT_TAB_STATUS_DELETE which was comitted.
4499
 
         * This makes sure that the record will still invalidate
4500
 
         * following records in a row.
4501
 
         *
4502
 
         * Example:
4503
 
         *
4504
 
         * 1. INSERT A ROW, then DELETE it, assume the sweeper is delayed.
4505
 
         *
4506
 
         * We now have the sequence row X --> del rec A --> valid rec B.
4507
 
         *
4508
 
         * 2. A SELECT can still find B. Assume it now goes to check
4509
 
         *    if the record is valid, it reads row X, and gets A.
4510
 
         *
4511
 
         * 3. Now the sweeper gets control and removes X, A and B.
4512
 
         *    It frees A with the clean bit.
4513
 
         *
4514
 
         * 4. Now the SELECT gets control and reads A. Normally a freed record
4515
 
         *    would be ignored, and it would go onto B, which would then
4516
 
         *    be considered valid (note, even after the free, the next
4517
 
         *    pointer is not affected).
4518
 
         *
4519
 
         * However, because the clean bit has been set, it will stop at A
4520
 
         * and consider B invalid (which is the desired result).
4521
 
         *
4522
 
         * NOTE: We assume it is not possible for A to be allocated and refer
4523
 
         * to B, because B is freed before A. This means that B may refer to
4524
 
         * A after the next allocation.
4525
 
         */
4526
 
 
4527
 
        xtOpSeqNo                       op_seq;
4528
 
        XTTabRecFreeDPtr        free_rec = (XTTabRecFreeDPtr) ot->ot_row_rbuffer;
4529
 
        xtRecordID                      prev_rec_id;
4530
 
        xtWord1                         new_rec_type = XT_TAB_STATUS_FREED | (clean_delete ? XT_TAB_STATUS_CLEANED_BIT : 0);
4531
 
        u_int                           status = XT_LOG_ENT_REC_REMOVED_BI;
4532
 
 
4533
 
        xt_lock_mutex_ns(&tab->tab_rec_lock);
4534
 
        free_rec->rf_rec_type_1 = new_rec_type;
4535
 
#ifdef XT_CLUSTER_FREE_RECORDS
4536
 
        XTTabCachePagePtr       page;
4537
 
        size_t                          offset;
4538
 
 
4539
 
        if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
4540
 
                xt_unlock_mutex_ns(&tab->tab_rec_lock);
4541
 
                return FAILED;
4542
 
        }
4543
 
 
4544
 
        if (page->tcp_free_rec == 0xFFFF) {
4545
 
                /* There is no free record on this page. */
4546
 
                prev_rec_id = tab->tab_rec_free_id;
4547
 
                XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
4548
 
                memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
4549
 
                tab->tab_rec_free_id = rec_id;
4550
 
        }
4551
 
        else {
4552
 
                XTTabRecFreeDPtr        prev_free_rec = (XTTabRecFreeDPtr) (page->tcp_data + page->tcp_free_rec);
4553
 
 
4554
 
                status = XT_LOG_ENT_REC_REMOVED_BI_L;
4555
 
                XT_COPY_DISK_4(free_rec->rf_next_rec_id_4, prev_free_rec->rf_next_rec_id_4);
4556
 
                memcpy(page->tcp_data + offset, ot->ot_row_rbuffer, sizeof(XTTabRecFreeDRec));
4557
 
 
4558
 
                /* The previous now references the next: */
4559
 
                XT_SET_DISK_4(prev_free_rec->rf_next_rec_id_4, rec_id);
4560
 
 
4561
 
                /* This is the record ID of the previous record: */
4562
 
                ASSERT_NS((page->tcp_free_rec % tab->tab_recs.tci_rec_size) == 0);
4563
 
                prev_rec_id = (page->tcp_page_idx * tab->tab_recs.tci_rows_per_page) + (page->tcp_free_rec / tab->tab_recs.tci_rec_size) + 1;
4564
 
                ASSERT_NS(prev_rec_id != rec_id);
4565
 
        }
4566
 
 
4567
 
        /* Link after this page in future! */
4568
 
        ASSERT_NS((offset % tab->tab_recs.tci_rec_size) == 0);
4569
 
        page->tcp_free_rec = offset;
4570
 
        tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
4571
 
#else
4572
 
        prev_rec_id = tab->tab_rec_free_id;
4573
 
        XT_SET_DISK_4(free_rec->rf_next_rec_id_4, prev_rec_id);
4574
 
        if (!xt_tab_put_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), ot->ot_row_rbuffer, &op_seq)) {
4575
 
                xt_unlock_mutex_ns(&tab->tab_rec_lock);
4576
 
                return XT_ERR;
4577
 
        }
4578
 
        tab->tab_rec_free_id = rec_id;
4579
 
        ASSERT_NS(tab->tab_rec_free_id < tab->tab_rec_eof_id);
4580
 
#endif
4581
 
        tab->tab_rec_fnum++;
4582
 
        xt_unlock_mutex_ns(&tab->tab_rec_lock);
4583
 
 
4584
 
        free_rec->rf_rec_type_1 = old_rec_type;
4585
 
        if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, new_rec_type, prev_rec_id, rec_id, rec_size, ot->ot_row_rbuffer, ot->ot_thread))
4586
 
                return XT_ERR;
4587
 
        return OK;
4588
 
}
4589
 
 
4590
 
static xtRowID tab_new_row(XTOpenTablePtr ot, XTTableHPtr tab)
4591
 
{
4592
 
        xtRowID                 row_id;
4593
 
        xtOpSeqNo               op_seq;
4594
 
        xtRowID                 next_row_id = 0;
4595
 
        u_int                   status;
4596
 
 
4597
 
        xt_lock_mutex_ns(&tab->tab_row_lock);
4598
 
        if ((row_id = tab->tab_row_free_id)) {
4599
 
                status = XT_LOG_ENT_ROW_NEW_FL;
4600
 
 
4601
 
                if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, &next_row_id, ot->ot_thread)) {
4602
 
                        xt_unlock_mutex_ns(&tab->tab_row_lock);
4603
 
                        return 0;
4604
 
                }
4605
 
                tab->tab_row_free_id = next_row_id;
4606
 
                ASSERT_NS(tab->tab_row_fnum > 0);
4607
 
                tab->tab_row_fnum--;
4608
 
        }
4609
 
        else {
4610
 
                status = XT_LOG_ENT_ROW_NEW;
4611
 
                row_id = tab->tab_row_eof_id;
4612
 
                if (row_id == 0xFFFFFFFF) {
4613
 
                        xt_unlock_mutex_ns(&tab->tab_row_lock);
4614
 
                        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_MAX_ROW_COUNT);
4615
 
                        return 0;
4616
 
                }
4617
 
                if (((row_id - 1) % tab->tab_rows.tci_rows_per_page) == 0) {
4618
 
                        /* By fetching the page now, we avoid reading it later... */
4619
 
                        XTTabCachePagePtr       page;
4620
 
                        XTTabCacheSegPtr        seg;
4621
 
                        size_t                          poffset;
4622
 
 
4623
 
                        if (!tab->tab_rows.tc_fetch(ot->ot_row_file, row_id, &seg, &page, &poffset, FALSE, ot->ot_thread)) {
4624
 
                                xt_unlock_mutex_ns(&tab->tab_row_lock);
4625
 
                                return 0;
4626
 
                        }
4627
 
                        TAB_CAC_UNLOCK(&seg->tcs_lock, ot->ot_thread->t_id);
4628
 
                }
4629
 
                tab->tab_row_eof_id++;
4630
 
        }
4631
 
        op_seq = tab->tab_seq.ts_get_op_seq();
4632
 
        xt_unlock_mutex_ns(&tab->tab_row_lock);
4633
 
 
4634
 
        if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_row_id, row_id, 0, NULL, ot->ot_thread))
4635
 
                return 0;
4636
 
 
4637
 
        XT_DISABLED_TRACE(("new row tx=%d row=%d\n", (int) ot->ot_thread->st_xact_data->xd_start_xn_id, (int) row_id));
4638
 
        ASSERT_NS(row_id);
4639
 
        return row_id;
4640
 
}
4641
 
 
4642
 
xtPublic xtBool xt_tab_get_row(register XTOpenTablePtr ot, xtRowID row_id, xtRecordID *var_rec_id)
4643
 
{
4644
 
        register XTTableHPtr    tab = ot->ot_table;
4645
 
 
4646
 
        (void) ASSERT_NS(sizeof(XTTabRowRefDRec) == 4);
4647
 
 
4648
 
        if (!tab->tab_rows.xt_tc_read_4(ot->ot_row_file, row_id, var_rec_id, ot->ot_thread))
4649
 
                return FAILED;
4650
 
        return OK;
4651
 
}
4652
 
 
4653
 
xtPublic xtBool xt_tab_set_row(XTOpenTablePtr ot, u_int status, xtRowID row_id, xtRecordID var_rec_id)
4654
 
{
4655
 
        register XTTableHPtr    tab = ot->ot_table;
4656
 
        XTTabRowRefDRec                 row_buf;
4657
 
        xtOpSeqNo                               op_seq;
4658
 
 
4659
 
        ASSERT_NS(var_rec_id < tab->tab_rec_eof_id);
4660
 
        XT_SET_DISK_4(row_buf.rr_ref_id_4, var_rec_id);
4661
 
 
4662
 
        if (!tab->tab_rows.xt_tc_write(ot->ot_row_file, row_id, 0, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, &op_seq, TRUE, ot->ot_thread))
4663
 
                return FAILED;
4664
 
 
4665
 
        return xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, 0, row_id, sizeof(XTTabRowRefDRec), (xtWord1 *) &row_buf, ot->ot_thread);
4666
 
}
4667
 
 
4668
 
static void tab_free_row_on_fail(XTOpenTablePtr ot, XTTableHPtr tab, xtRowID row_id)
4669
 
{
4670
 
        XTExceptionRec e;
4671
 
 
4672
 
        tab_save_exception(&e);
4673
 
        xt_tab_free_row(ot, tab, row_id);
4674
 
        tab_restore_exception(&e);
4675
 
}
4676
 
 
4677
 
#ifdef CHECK_CLUSTER_EFFICIENCY
4678
 
u_int           next_on_page = 0;
4679
 
u_int           next_off_page = 0;
4680
 
#endif
4681
 
 
4682
 
static xtBool tab_write_ext_record(XTOpenTablePtr XT_UNUSED(ot), XTTableHPtr tab, XTTabRecInfoPtr rec_info, xtRecordID rec_id, xtLogID log_id, xtLogOffset log_offset, XTThreadPtr thread)
4683
 
{
4684
 
        xtWord1 tmp_buffer[offsetof(XTactExtRecEntryDRec, er_data)];
4685
 
        xtBool  ok;
4686
 
 
4687
 
        memcpy(tmp_buffer, rec_info->ri_log_buf, sizeof(tmp_buffer));
4688
 
        rec_info->ri_log_buf->er_status_1 = XT_LOG_ENT_EXT_REC_OK;
4689
 
        XT_SET_DISK_4(rec_info->ri_log_buf->er_data_size_4, rec_info->ri_log_data_size);
4690
 
        XT_SET_DISK_4(rec_info->ri_log_buf->er_tab_id_4, tab->tab_id);
4691
 
        XT_SET_DISK_4(rec_info->ri_log_buf->er_rec_id_4, rec_id);
4692
 
        if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE)
4693
 
                ok = xt_tab_save_ext_record(tab, log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf);
4694
 
        else
4695
 
                ok = thread->st_dlog_buf.dlb_append_log(log_id, log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info->ri_log_data_size, (xtWord1 *) rec_info->ri_log_buf, thread);
4696
 
        memcpy(rec_info->ri_log_buf, tmp_buffer, sizeof(tmp_buffer));
4697
 
        return ok;
4698
 
}
4699
 
 
4700
 
static xtBool tab_add_record(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, u_int status)
4701
 
{
4702
 
        register XTTableHPtr    tab = ot->ot_table;
4703
 
        XTThreadPtr                             thread = ot->ot_thread;
4704
 
        xtRecordID                              rec_id;
4705
 
        xtLogID                                 log_id;
4706
 
        xtLogOffset                             log_offset;
4707
 
        xtOpSeqNo                               op_seq;
4708
 
        xtRecordID                              next_rec_id = 0;
4709
 
 
4710
 
        if (rec_info->ri_ext_rec) {
4711
 
                /* Determine where the overflow will go... */
4712
 
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
4713
 
                        if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
4714
 
                                return FAILED;
4715
 
                }
4716
 
                else {
4717
 
                        if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), ot->ot_thread))
4718
 
                                return FAILED;
4719
 
                }
4720
 
                XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
4721
 
        }
4722
 
 
4723
 
        /* Write the record to disk: */
4724
 
        xt_lock_mutex_ns(&tab->tab_rec_lock);
4725
 
        if ((rec_id = tab->tab_rec_free_id)) {
4726
 
                ASSERT_NS(rec_id < tab->tab_rec_eof_id);
4727
 
#ifdef XT_CLUSTER_FREE_RECORDS
4728
 
                XTTabCachePagePtr       page;
4729
 
                size_t                          offset;
4730
 
                XTTabRecFreeDPtr        free_block;
4731
 
 
4732
 
                if (!tab->tab_recs.xt_tc_lock_page(ot->ot_rec_file, &page, rec_id, &offset, ot->ot_thread)) {
4733
 
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
4734
 
                        return FAILED;
4735
 
                }
4736
 
 
4737
 
                /* Read the data from the old record: */
4738
 
                free_block = (XTTabRecFreeDPtr) (page->tcp_data + offset);
4739
 
                next_rec_id = XT_GET_DISK_4(free_block->rf_next_rec_id_4);
4740
 
 
4741
 
#ifdef CHECK_CLUSTER_EFFICIENCY
4742
 
                xtRecordID      dbg_rec_id;
4743
 
 
4744
 
                dbg_rec_id = next_rec_id-1;
4745
 
                if (page->tcp_page_idx == dbg_rec_id / tab->tab_recs.tci_rows_per_page)
4746
 
                        next_on_page++;
4747
 
                else
4748
 
                        next_off_page++;
4749
 
                if ((next_on_page % 1000) == 0)
4750
 
                        printf("Next on: %d off: %d\n", next_on_page, next_off_page);
4751
 
#endif
4752
 
 
4753
 
                if (page->tcp_free_rec == offset) {
4754
 
                        /* Adjust the free record: */
4755
 
                        xtRecordID      tmp_rec_id;
4756
 
 
4757
 
                        /* Check if the next record is on the same page: */
4758
 
                        tmp_rec_id = next_rec_id-1;
4759
 
                        if (page->tcp_page_idx == tmp_rec_id / tab->tab_recs.tci_rows_per_page)
4760
 
                                /* This is the next free record on this page: */
4761
 
                                page->tcp_free_rec = (xtWord2) ((tmp_rec_id % tab->tab_recs.tci_rows_per_page) * tab->tab_recs.tci_rec_size);
4762
 
                        else
4763
 
                                /* Not on the same page, so there are no more free records on this page: */
4764
 
                                page->tcp_free_rec = 0xFFFF;
4765
 
                }
4766
 
 
4767
 
                /* Write the new record: */
4768
 
                memcpy(free_block, rec_info->ri_fix_rec_buf, rec_info->ri_rec_buf_size);
4769
 
                tab->tab_recs.xt_tc_unlock_page(ot->ot_rec_file, page, &op_seq, ot->ot_thread);
4770
 
#else
4771
 
                XTTabRecFreeDRec free_block;
4772
 
 
4773
 
                if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecFreeDRec), (xtWord1 *) &free_block)) {
4774
 
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
4775
 
                        return FAILED;
4776
 
                }
4777
 
                next_rec_id = XT_GET_DISK_4(free_block.rf_next_rec_id_4);
4778
 
                if (!xt_tab_put_rec_data(ot, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq)) {
4779
 
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
4780
 
                        return FAILED;
4781
 
                }
4782
 
#endif
4783
 
                tab->tab_rec_free_id = next_rec_id;                     
4784
 
                tab->tab_rec_fnum--;
4785
 
                
4786
 
                /* XT_LOG_ENT_UPDATE --> XT_LOG_ENT_UPDATE_FL */
4787
 
                /* XT_LOG_ENT_INSERT --> XT_LOG_ENT_INSERT_FL */
4788
 
                /* XT_LOG_ENT_DELETE --> XT_LOG_ENT_DELETE_FL */
4789
 
                status += 2;
4790
 
        }
4791
 
        else {
4792
 
                xtBool read;
4793
 
 
4794
 
                rec_id = tab->tab_rec_eof_id;
4795
 
                tab->tab_rec_eof_id++;
4796
 
 
4797
 
                /* If we are writing to a new page (at the EOF)
4798
 
                 * then we do not need to read the page from the
4799
 
                 * file because it is new.
4800
 
                 *
4801
 
                 * Note that this only works because we are holding
4802
 
                 * a lock on the record file.
4803
 
                 */
4804
 
                read = ((rec_id - 1) % tab->tab_recs.tci_rows_per_page) != 0;
4805
 
 
4806
 
                if (!tab->tab_recs.xt_tc_write(ot->ot_rec_file, rec_id, 0, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, &op_seq, read, thread)) {
4807
 
                        xt_unlock_mutex_ns(&tab->tab_rec_lock);
4808
 
                        return FAILED;
4809
 
                }
4810
 
        }
4811
 
        xt_unlock_mutex_ns(&tab->tab_rec_lock);
4812
 
 
4813
 
        if (!xt_xlog_modify_table(tab->tab_id, status, op_seq, 0, next_rec_id, rec_id,  rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf, thread))
4814
 
                return FAILED;
4815
 
 
4816
 
        if (rec_info->ri_ext_rec) {
4817
 
                /* Write the log buffer overflow: */            
4818
 
                if (!tab_write_ext_record(ot, tab, rec_info, rec_id, log_id, log_offset, thread))
4819
 
                        return FAILED;
4820
 
        }
4821
 
 
4822
 
        XT_DISABLED_TRACE(("new rec tx=%d val=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) rec_id));
4823
 
        rec_info->ri_rec_id = rec_id;
4824
 
        return OK;
4825
 
}
4826
 
 
4827
 
static void tab_delete_record_on_fail(XTOpenTablePtr ot, xtRowID row_id, xtRecordID rec_id, XTTabRecHeadDPtr XT_UNUSED(row_ptr), xtWord1 *rec_data, u_int key_count)
4828
 
{
4829
 
        XTExceptionRec  e;
4830
 
        xtBool                  log_err = TRUE;
4831
 
        XTTabRecInfoRec rec_info;
4832
 
 
4833
 
        tab_save_exception(&e);
4834
 
        
4835
 
        if (e.e_xt_err == XT_ERR_DUPLICATE_KEY || 
4836
 
                e.e_xt_err == XT_ERR_DUPLICATE_FKEY) {
4837
 
                /* If the error does not cause rollback, then we will ignore the
4838
 
                 * error if an error occurs in the UNDO!
4839
 
                 */
4840
 
                log_err = FALSE;
4841
 
                tab_restore_exception(&e);
4842
 
        }
4843
 
        if (key_count) {
4844
 
                XTIndexPtr      *ind;
4845
 
 
4846
 
                ind = ot->ot_table->tab_dic.dic_keys;
4847
 
                for (u_int i=0; i<key_count; i++, ind++) {
4848
 
                        if (!xt_idx_delete(ot, *ind, rec_id, rec_data)) {
4849
 
                                if (log_err)
4850
 
                                        xt_log_and_clear_exception_ns();
4851
 
                        }
4852
 
                }
4853
 
        }
4854
 
 
4855
 
        /* This is not required because the extended record will be free
4856
 
         * later when the record is freed!
4857
 
        if (row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_DLOG || row_ptr->tr_rec_type_1 == XT_TAB_STATUS_EXT_CLEAN)
4858
 
                tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) row_ptr, log_err);
4859
 
         */
4860
 
 
4861
 
        rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
4862
 
        rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
4863
 
        rec_info.ri_ext_rec = NULL;
4864
 
        rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
4865
 
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
4866
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
4867
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, rec_id);
4868
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, ot->ot_thread->st_xact_data->xd_start_xn_id);
4869
 
 
4870
 
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
4871
 
                goto failed;
4872
 
 
4873
 
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
4874
 
                goto failed;
4875
 
 
4876
 
        if (log_err)
4877
 
                tab_restore_exception(&e);
4878
 
        return;
4879
 
 
4880
 
        failed:
4881
 
        if (log_err)
4882
 
                xt_log_and_clear_exception_ns();
4883
 
        else
4884
 
                tab_restore_exception(&e);
4885
 
}
4886
 
 
4887
 
/*
4888
 
 * Wait until all the variations between the start of the chain, and
4889
 
 * the given record have been rolled-back.
4890
 
 * If any is committed, register a locked error, and return FAILED.
4891
 
 */
4892
 
static xtBool tab_wait_for_rollback(XTOpenTablePtr ot, xtRowID row_id, xtRecordID commit_rec_id)
4893
 
{
4894
 
        register XTTableHPtr    tab = ot->ot_table;
4895
 
        xtRecordID                              var_rec_id;
4896
 
        XTTabRecHeadDRec                var_head;
4897
 
        xtXactID                                xn_id;
4898
 
        xtRecordID                              invalid_rec = 0;
4899
 
        XTXactWaitRec                   xw;
4900
 
 
4901
 
        retry:
4902
 
        if (!xt_tab_get_row(ot, row_id, &var_rec_id))
4903
 
                return FAILED;
4904
 
 
4905
 
        while (var_rec_id != commit_rec_id) {
4906
 
                if (!var_rec_id)
4907
 
                        goto locked;
4908
 
                if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &var_head))
4909
 
                        return FAILED;
4910
 
                if (XT_REC_IS_CLEAN(var_head.tr_rec_type_1))
4911
 
                        goto locked;
4912
 
                if (XT_REC_IS_FREE(var_head.tr_rec_type_1)) {
4913
 
                        /* Should not happen: */
4914
 
                        if (!tab_record_corrupt(ot, row_id, var_rec_id, false, 4))
4915
 
                                return FAILED;
4916
 
                        goto record_invalid;
4917
 
                }
4918
 
                xn_id = XT_GET_DISK_4(var_head.tr_xact_id_4);
4919
 
                switch (xt_xn_status(ot, xn_id, var_rec_id)) {
4920
 
                        case XT_XN_VISIBLE:
4921
 
                        case XT_XN_NOT_VISIBLE:
4922
 
                                goto locked;
4923
 
                        case XT_XN_ABORTED:
4924
 
                                /* Ingore the record, it will be removed. */
4925
 
                                break;
4926
 
                        case XT_XN_MY_UPDATE:
4927
 
                                /* Should not happen: */
4928
 
                                goto locked;
4929
 
                        case XT_XN_OTHER_UPDATE:
4930
 
                                /* Wait for the transaction to commit or rollback: */
4931
 
                                XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4932
 
                                xw.xw_xn_id = xn_id;
4933
 
                                if (!xt_xn_wait_for_xact(ot->ot_thread, &xw, NULL)) {
4934
 
                                        XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4935
 
                                        return FAILED;
4936
 
                                }
4937
 
                                XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
4938
 
                                goto retry;
4939
 
                        case XT_XN_REREAD:
4940
 
                                if (!tab_record_corrupt(ot, row_id, var_rec_id, true, 5))
4941
 
                                        return FAILED;
4942
 
                                goto record_invalid;
4943
 
                }
4944
 
                var_rec_id = XT_GET_DISK_4(var_head.tr_prev_rec_id_4);
4945
 
        }
4946
 
        return OK;
4947
 
 
4948
 
        locked:
4949
 
        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_RECORD_CHANGED);
4950
 
        return FAILED;
4951
 
        
4952
 
        record_invalid:
4953
 
        /* {RETRY-READ} */
4954
 
        /* Prevent an infinite loop due to a bad record: */
4955
 
        if (invalid_rec != var_rec_id) {
4956
 
                invalid_rec = var_rec_id;
4957
 
                goto retry;
4958
 
        }
4959
 
        /* The record is invalid, it will be "overwritten"... */
4960
 
#ifdef XT_CRASH_DEBUG
4961
 
        /* Should not happen! */
4962
 
        xt_crash_me();
4963
 
#endif
4964
 
        return OK;
4965
 
}
4966
 
 
4967
 
/* Check if a record may be visible:
4968
 
 * Return TRUE of the record may be visible now.
4969
 
 * Return XT_MAYBE if the record may be visible in the future (set out_xn_id).
4970
 
 * Return FALSE of the record is not valid (freed or is a delete record).
4971
 
 * Return XT_ERR if an error occurred.
4972
 
 */
4973
 
xtPublic int xt_tab_maybe_committed(XTOpenTablePtr ot, xtRecordID rec_id, xtXactID *out_xn_id, xtRowID *out_rowid, xtBool *out_updated)
4974
 
{
4975
 
        XTTabRecHeadDRec                rec_head;
4976
 
        xtXactID                                rec_xn_id = 0;
4977
 
        xtBool                                  wait = FALSE;
4978
 
        xtXactID                                wait_xn_id = 0;
4979
 
        xtRowID                                 row_id = 0;  // Initialized unnecessarily to satisfy (Drizzle) compile [-Wuninitialized]
4980
 
        xtRecordID                              var_rec_id;
4981
 
        xtXactID                                xn_id;
4982
 
        register XTTableHPtr    tab = NULL; // Initialized unnecessarily to satisfy (Drizzle) compile [-Wuninitialized]
4983
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
4984
 
        char                                    t_buf[500];
4985
 
        int                                             len;
4986
 
        char                                    *t_type = "C";
4987
 
#endif
4988
 
        xtRecordID                              invalid_rec = 0;
4989
 
 
4990
 
        reread:
4991
 
        if (!xt_tab_get_rec_data(ot, rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
4992
 
                return XT_ERR;
4993
 
 
4994
 
        if (XT_REC_NOT_VALID(rec_head.tr_rec_type_1))
4995
 
                return FALSE;
4996
 
 
4997
 
        if (!XT_REC_IS_CLEAN(rec_head.tr_rec_type_1)) {
4998
 
                rec_xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
4999
 
                switch (xt_xn_status(ot, rec_xn_id, rec_id)) {
5000
 
                        case XT_XN_VISIBLE:
5001
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5002
 
                                t_type="V";
5003
 
#endif
5004
 
                                break;
5005
 
                        case XT_XN_NOT_VISIBLE:
5006
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5007
 
                                t_type="NV";
5008
 
#endif
5009
 
                                break;
5010
 
                        case XT_XN_ABORTED:
5011
 
                                return FALSE;
5012
 
                        case XT_XN_MY_UPDATE:
5013
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5014
 
                                t_type="My-Upd";
5015
 
#endif
5016
 
                                break;
5017
 
                        case XT_XN_OTHER_UPDATE:
5018
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5019
 
                                t_type="Wait";
5020
 
#endif
5021
 
                                wait = TRUE;
5022
 
                                wait_xn_id = rec_xn_id;
5023
 
                                break;
5024
 
                        case XT_XN_REREAD:
5025
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5026
 
                                t_type="Re-read";
5027
 
#endif
5028
 
                                /* {RETRY-READ} */
5029
 
                                /* Avoid infinite loop: */
5030
 
                                if (invalid_rec == rec_id) {
5031
 
                                        /* Should not happen! */
5032
 
                                        if (!tab_record_corrupt(ot, XT_GET_DISK_4(rec_head.tr_row_id_4), rec_id, true, 6))
5033
 
                                                goto failed;
5034
 
#ifdef XT_CRASH_DEBUG
5035
 
                                        /* Generate a core dump! */
5036
 
                                        xt_crash_me();
5037
 
#endif
5038
 
                                        return FALSE;
5039
 
                                }
5040
 
                                invalid_rec = rec_id;
5041
 
                                goto reread;
5042
 
                }
5043
 
        }
5044
 
 
5045
 
        /* Follow the variation chain until we come to this record.
5046
 
         * If it is not the first visible variation then
5047
 
         * it is not visible at all. If it in not found on the
5048
 
         * variation chain, it is also not visible.
5049
 
         */
5050
 
        row_id = XT_GET_DISK_4(rec_head.tr_row_id_4);
5051
 
 
5052
 
        tab = ot->ot_table;
5053
 
        XT_TAB_ROW_READ_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5054
 
 
5055
 
        invalid_rec = 0;
5056
 
        retry:
5057
 
        if (!(xt_tab_get_row(ot, row_id, &var_rec_id)))
5058
 
                goto failed;
5059
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5060
 
        len = sprintf(t_buf, "dup row=%d", (int) row_id);
5061
 
#endif
5062
 
        while (var_rec_id != rec_id) {
5063
 
                if (!var_rec_id)
5064
 
                        goto not_found;
5065
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5066
 
                if (len <= 450)
5067
 
                        len += sprintf(t_buf+len, " -> %d", (int) var_rec_id);
5068
 
#endif
5069
 
                if (!xt_tab_get_rec_data(ot, var_rec_id, sizeof(XTTabRecHeadDRec), (xtWord1 *) &rec_head))
5070
 
                        goto failed;
5071
 
                /* All clean records are visible, by all transactions: */
5072
 
                if (XT_REC_IS_CLEAN(rec_head.tr_rec_type_1))
5073
 
                        goto not_found;
5074
 
 
5075
 
                if (XT_REC_IS_FREE(rec_head.tr_rec_type_1)) {
5076
 
                        /* Should not happen: */
5077
 
                        if (invalid_rec != var_rec_id) {
5078
 
                                invalid_rec = var_rec_id;
5079
 
                                goto retry;
5080
 
                        }
5081
 
                        /* Assume end of list. */
5082
 
#ifdef XT_CRASH_DEBUG
5083
 
                        /* Should not happen! */
5084
 
                        xt_crash_me();
5085
 
#endif
5086
 
                        goto not_found;
5087
 
                }
5088
 
 
5089
 
                xn_id = XT_GET_DISK_4(rec_head.tr_xact_id_4);
5090
 
                switch (xt_xn_status(ot, xn_id, var_rec_id)) {
5091
 
                        case XT_XN_VISIBLE:
5092
 
                        case XT_XN_NOT_VISIBLE:
5093
 
                                goto not_found;
5094
 
                        case XT_XN_ABORTED:
5095
 
                                /* Ingore the record, it will be removed. */
5096
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5097
 
                                if (len <= 450)
5098
 
                                        len += sprintf(t_buf+len, "(T%d-A)", (int) xn_id);
5099
 
#endif
5100
 
                                break;
5101
 
                        case XT_XN_MY_UPDATE:
5102
 
                                goto not_found;
5103
 
                        case XT_XN_OTHER_UPDATE:
5104
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5105
 
                                if (len <= 450)
5106
 
                                        len += sprintf(t_buf+len, "(T%d-wait)", (int) xn_id);
5107
 
#endif
5108
 
                                /* Wait for this update to commit or abort: */
5109
 
                                if (!wait) {
5110
 
                                        wait = TRUE;
5111
 
                                        wait_xn_id = xn_id;
5112
 
                                }
5113
 
                                break;
5114
 
                        case XT_XN_REREAD:
5115
 
                                /* {RETRY-READ} */
5116
 
                                if (invalid_rec != var_rec_id) {
5117
 
                                        invalid_rec = var_rec_id;
5118
 
                                        goto retry;
5119
 
                                }
5120
 
                                /* Assume end of list. */
5121
 
                                if (!tab_record_corrupt(ot, row_id, invalid_rec, true, 7))
5122
 
                                        goto failed;
5123
 
#ifdef XT_CRASH_DEBUG
5124
 
                                /* Should not happen! */
5125
 
                                xt_crash_me();
5126
 
#endif
5127
 
                                goto not_found;
5128
 
                }
5129
 
                var_rec_id = XT_GET_DISK_4(rec_head.tr_prev_rec_id_4);
5130
 
        }
5131
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5132
 
        if (len <= 450)
5133
 
                sprintf(t_buf+len, " -> %d(T%d-%s)\n", (int) var_rec_id, (int) rec_xn_id, t_type);
5134
 
        else
5135
 
                sprintf(t_buf+len, " ...(T%d-%s)\n", (int) rec_xn_id, t_type);
5136
 
#endif
5137
 
 
5138
 
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5139
 
        if (wait) {
5140
 
                *out_xn_id = wait_xn_id;
5141
 
                return XT_MAYBE;
5142
 
        }
5143
 
#ifdef TRACE_VARIATIONS_IN_DUP_CHECK
5144
 
        xt_ttracef(thread, "%s", t_buf);
5145
 
#endif
5146
 
        if (out_rowid) {
5147
 
                *out_rowid = row_id;
5148
 
                *out_updated = (rec_xn_id == ot->ot_thread->st_xact_data->xd_start_xn_id);
5149
 
        }
5150
 
        return TRUE;
5151
 
 
5152
 
        not_found:
5153
 
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5154
 
        return FALSE;
5155
 
 
5156
 
        failed:
5157
 
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], ot->ot_thread);
5158
 
        return XT_ERR;
5159
 
}
5160
 
 
5161
 
xtPublic xtBool xt_tab_new_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
5162
 
{
5163
 
        register XTTableHPtr    tab = ot->ot_table;
5164
 
        register XTThreadPtr    self = ot->ot_thread;
5165
 
        XTTabRecInfoRec                 rec_info;
5166
 
        xtRowID                                 row_id;
5167
 
        u_int                                   idx_cnt = 0;
5168
 
        XTIndexPtr                              *ind;
5169
 
 
5170
 
        /* A non-temporary table has been updated: */
5171
 
        if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5172
 
                self->st_non_temp_updated = TRUE;
5173
 
 
5174
 
        if (!myxt_store_row(ot, &rec_info, (char *) rec_buf))
5175
 
                goto failed_0;
5176
 
 
5177
 
        /* Get a new row ID: */
5178
 
        if (!(row_id = tab_new_row(ot, tab)))
5179
 
                goto failed_0;
5180
 
 
5181
 
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5182
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5183
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, 0);
5184
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5185
 
 
5186
 
        /* Note, it is important that this record is written BEFORE the row
5187
 
         * due to the problem distributed here [(5)]
5188
 
         */
5189
 
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_INSERT))
5190
 
                goto failed_1;
5191
 
 
5192
 
#ifdef TRACE_VARIATIONS
5193
 
        xt_ttracef(self, "insert: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
5194
 
#endif
5195
 
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5196
 
                goto failed_1;
5197
 
        XT_DISABLED_TRACE(("set new tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5198
 
 
5199
 
        /* Add the index references: */
5200
 
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5201
 
                if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, rec_buf, NULL, FALSE)) {
5202
 
                        ot->ot_err_index_no = (*ind)->mi_index_no;
5203
 
                        goto failed_2;
5204
 
                }
5205
 
        }
5206
 
 
5207
 
        /* Do the foreign key stuff: */
5208
 
        if (ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5209
 
                if (!ot->ot_table->tab_dic.dic_table->insertRow(ot, rec_buf))
5210
 
                        goto failed_2;
5211
 
        }
5212
 
 
5213
 
        self->st_statistics.st_row_insert++;
5214
 
        return OK;      
5215
 
 
5216
 
        failed_2:
5217
 
        /* Once the row has been inserted, it is to late to remove it!
5218
 
         * Now all we can do is delete it!
5219
 
         */
5220
 
        tab_delete_record_on_fail(ot, row_id, rec_info.ri_rec_id, (XTTabRecHeadDPtr) rec_info.ri_fix_rec_buf, rec_buf, idx_cnt);
5221
 
        goto failed_0;
5222
 
 
5223
 
        failed_1:
5224
 
        tab_free_row_on_fail(ot, tab, row_id);
5225
 
 
5226
 
        failed_0:
5227
 
        return FAILED;
5228
 
}
5229
 
 
5230
 
/* We cannot remove a change we have made to a row while a transaction
5231
 
 * is running, so we have to undo what we have done by
5232
 
 * overwriting the record we just created with
5233
 
 * the before image!
5234
 
 */
5235
 
static xtBool tab_overwrite_record_on_fail(XTOpenTablePtr ot, XTTabRecInfoPtr rec_info, xtWord1 *before_buf, xtWord1 *after_buf, u_int idx_cnt)
5236
 
{
5237
 
        register XTTableHPtr    tab = ot->ot_table;
5238
 
        XTTabRecHeadDRec                prev_rec_head;
5239
 
        u_int                                   i;
5240
 
        XTIndexPtr                              *ind;
5241
 
        XTThreadPtr                             thread = ot->ot_thread;
5242
 
        xtLogID                                 log_id;
5243
 
        xtLogOffset                             log_offset;
5244
 
        xtRecordID                              rec_id = rec_info->ri_rec_id;
5245
 
 
5246
 
        /* Remove the new extended record: */
5247
 
        if (rec_info->ri_ext_rec)
5248
 
                tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info->ri_fix_rec_buf, TRUE);
5249
 
 
5250
 
        /* Undo index entries of the new record: */
5251
 
        if (after_buf) {
5252
 
                for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
5253
 
                        if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
5254
 
                                return FAILED;
5255
 
                }
5256
 
        }
5257
 
 
5258
 
        memcpy(&prev_rec_head, rec_info->ri_fix_rec_buf, sizeof(XTTabRecHeadDRec));
5259
 
 
5260
 
        if (!before_buf) {
5261
 
                /* Can happen if the delete was called from some cascaded action.
5262
 
                 * And this is better than a crash...
5263
 
                 *
5264
 
                 * TODO: to make sure the change will not be applied in case the 
5265
 
                 * transaction will be commited, we'd need to add a log entry to 
5266
 
                 * restore the record like it's done for top-level operation. In 
5267
 
                 * order to do this we'd need to read the before-image of the 
5268
 
                 * record before modifying it.
5269
 
                 */
5270
 
                if (!thread->t_exception.e_xt_err)
5271
 
                        xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_BEFORE_IMAGE);
5272
 
                return FAILED;
5273
 
        }
5274
 
 
5275
 
        /* Restore the previous record! */
5276
 
        if (!myxt_store_row(ot, rec_info, (char *) before_buf))
5277
 
                return FAILED;
5278
 
 
5279
 
        memcpy(rec_info->ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
5280
 
 
5281
 
        if (rec_info->ri_ext_rec) {
5282
 
                /* Determine where the overflow will go... */
5283
 
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5284
 
                        if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data)))
5285
 
                                return FAILED;
5286
 
                }
5287
 
                else {
5288
 
                        if (!thread->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, rec_info->ri_log_data_size + offsetof(XTactExtRecEntryDRec, er_data), thread))
5289
 
                                return FAILED;
5290
 
                }
5291
 
                XT_SET_LOG_REF(rec_info->ri_ext_rec, log_id, log_offset);
5292
 
        }
5293
 
 
5294
 
        if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info->ri_rec_buf_size, (xtWord1 *) rec_info->ri_fix_rec_buf))
5295
 
                return FAILED;
5296
 
 
5297
 
        if (rec_info->ri_ext_rec) {
5298
 
                /* Write the log buffer overflow: */            
5299
 
                if (!tab_write_ext_record(ot, tab, rec_info, rec_id, log_id, log_offset, thread))
5300
 
                        return FAILED;
5301
 
        }
5302
 
 
5303
 
        /* Put the index entries back: */
5304
 
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5305
 
                if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
5306
 
                        /* Incomplete restore, there will be a rollback... */
5307
 
                        return FAILED;
5308
 
        }
5309
 
 
5310
 
        return OK;
5311
 
}
5312
 
 
5313
 
/*
5314
 
 * GOTCHA:
5315
 
 * If a transaction updates the same record over again, we should update
5316
 
 * in place. This prevents producing unnecessary variations!
5317
 
 */
5318
 
static xtBool tab_overwrite_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
5319
 
{
5320
 
        register XTTableHPtr    tab = ot->ot_table;
5321
 
        xtRowID                                 row_id = ot->ot_curr_row_id;
5322
 
        register XTThreadPtr    self = ot->ot_thread;
5323
 
        xtRecordID                              rec_id = ot->ot_curr_rec_id;
5324
 
        XTTabRecExtDRec                 prev_rec_head;
5325
 
        XTTabRecInfoRec                 rec_info;
5326
 
        u_int                                   idx_cnt = 0, i;
5327
 
        XTIndexPtr                              *ind;
5328
 
        xtLogID                                 log_id;
5329
 
        xtLogOffset                             log_offset;
5330
 
        xtBool                                  prev_ext_rec;
5331
 
 
5332
 
        /* A non-temporary table has been updated: */
5333
 
        if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5334
 
                self->st_non_temp_updated = TRUE;
5335
 
 
5336
 
        if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
5337
 
                goto failed_0;
5338
 
 
5339
 
        /* Read before we overwrite! */
5340
 
        if (!xt_tab_get_rec_data(ot, rec_id, XT_REC_EXT_HEADER_SIZE, (xtWord1 *) &prev_rec_head))
5341
 
                goto failed_0;
5342
 
 
5343
 
        prev_ext_rec = prev_rec_head.tr_rec_type_1 & XT_TAB_STATUS_EXT_DLOG;
5344
 
 
5345
 
        if (rec_info.ri_ext_rec) {
5346
 
                /* Determine where the overflow will go... */
5347
 
                if (tab->tab_dic.dic_tab_flags & XT_TF_MEMORY_TABLE) {
5348
 
                        if (!xt_tab_get_ext_slot(tab, &log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size))
5349
 
                                goto failed_0;
5350
 
                }
5351
 
                else {
5352
 
                        if (!self->st_dlog_buf.dlb_get_log_offset(&log_id, &log_offset, offsetof(XTactExtRecEntryDRec, er_data) + rec_info.ri_log_data_size, self))
5353
 
                                goto failed_0;
5354
 
                }
5355
 
                XT_SET_LOG_REF(rec_info.ri_ext_rec, log_id, log_offset);
5356
 
        }
5357
 
 
5358
 
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5359
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5360
 
        XT_COPY_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, prev_rec_head.tr_prev_rec_id_4);
5361
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5362
 
 
5363
 
        /* Remove the index references, that have changed: */
5364
 
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5365
 
                if (!xt_idx_delete(ot, *ind, rec_id, before_buf)) {
5366
 
                        goto failed_0;
5367
 
                }
5368
 
        }
5369
 
 
5370
 
#ifdef TRACE_VARIATIONS
5371
 
        xt_ttracef(self, "overwrite: row=%d rec=%d T%d\n", (int) row_id, (int) rec_id, (int) self->st_xact_data->xd_start_xn_id);
5372
 
#endif
5373
 
        /* Overwrite the record: */
5374
 
        if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
5375
 
                goto failed_0;
5376
 
 
5377
 
        if (rec_info.ri_ext_rec) {
5378
 
                /* Write the log buffer overflow: */            
5379
 
                if (!tab_write_ext_record(ot, tab, &rec_info, rec_id, log_id, log_offset, self))
5380
 
                        goto failed_1;
5381
 
        }
5382
 
 
5383
 
        /* Add the index references that have changed: */
5384
 
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5385
 
                if (!xt_idx_insert(ot, *ind, 0, rec_id, after_buf, before_buf, FALSE)) {
5386
 
                        ot->ot_err_index_no = (*ind)->mi_index_no;
5387
 
                        goto failed_2;
5388
 
                }
5389
 
        }
5390
 
 
5391
 
        /* Do the foreign key stuff: */
5392
 
        if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5393
 
                if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
5394
 
                        goto failed_2;
5395
 
        }
5396
 
        
5397
 
        /* Delete the previous overflow area: */
5398
 
        if (prev_ext_rec)
5399
 
                tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
5400
 
 
5401
 
        return OK;
5402
 
 
5403
 
        failed_2:
5404
 
        /* Remove the new extended record: */
5405
 
        if (rec_info.ri_ext_rec)
5406
 
                tab_free_ext_record_on_fail(ot, rec_id, (XTTabRecExtDPtr) rec_info.ri_fix_rec_buf, TRUE);
5407
 
 
5408
 
        /* Restore the previous record! */
5409
 
        /* Undo index entries: */
5410
 
        for (i=0, ind=tab->tab_dic.dic_keys; i<idx_cnt; i++, ind++) {
5411
 
                if (!xt_idx_delete(ot, *ind, rec_id, after_buf))
5412
 
                        goto failed_1;
5413
 
        }
5414
 
 
5415
 
        /* Restore the record: */
5416
 
        if (!myxt_store_row(ot, &rec_info, (char *) before_buf))
5417
 
                goto failed_1;
5418
 
 
5419
 
        if (rec_info.ri_ext_rec)
5420
 
                memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, XT_REC_EXT_HEADER_SIZE);
5421
 
        else
5422
 
                memcpy(rec_info.ri_fix_rec_buf, &prev_rec_head, sizeof(XTTabRecHeadDRec));
5423
 
 
5424
 
        if (!xt_tab_put_log_op_rec_data(ot, XT_LOG_ENT_REC_MODIFIED, 0, rec_id, rec_info.ri_rec_buf_size, (xtWord1 *) rec_info.ri_fix_rec_buf))
5425
 
                goto failed_1;
5426
 
 
5427
 
        /* Put the index entries back: */
5428
 
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5429
 
                if (!xt_idx_insert(ot, *ind, 0, rec_id, before_buf, after_buf, TRUE))
5430
 
                        /* Incomplete restore, there will be a rollback... */
5431
 
                        goto failed_0;
5432
 
        }
5433
 
 
5434
 
        /* The previous record has now been restored. */
5435
 
        goto failed_0;
5436
 
 
5437
 
        failed_1:
5438
 
        /* The old record is overwritten, I must free the previous extended record: */
5439
 
        if (prev_ext_rec)
5440
 
                tab_free_ext_record_on_fail(ot, rec_id, &prev_rec_head, TRUE);
5441
 
 
5442
 
        failed_0:
5443
 
        return FAILED;
5444
 
}
5445
 
 
5446
 
xtPublic xtBool xt_tab_update_record(XTOpenTablePtr ot, xtWord1 *before_buf, xtWord1 *after_buf)
5447
 
{
5448
 
        register XTTableHPtr    tab;
5449
 
        xtRowID                                 row_id;
5450
 
        register XTThreadPtr    self;
5451
 
        xtRecordID                              curr_var_rec_id;
5452
 
        XTTabRecInfoRec                 rec_info;
5453
 
        u_int                                   idx_cnt = 0;
5454
 
        XTIndexPtr                              *ind;
5455
 
 
5456
 
        /*
5457
 
         * Originally only the flag ot->ot_curr_updated was checked, and if it was on, then
5458
 
         * tab_overwrite_record() was called, but this caused crashes in some cases like:
5459
 
         *
5460
 
         * set @@autocommit = 0;
5461
 
         * create table t1 (s1 int primary key); 
5462
 
         * create table t2 (s1 int primary key, foreign key (s1) references t1 (s1) on update cascade);
5463
 
     * insert into t1 values (1);
5464
 
         * insert into t2 values (1);
5465
 
         * update t1 set s1 = 1;
5466
 
         *
5467
 
         * the last update lead to a crash on t2 cascade update because before_buf argument is NULL 
5468
 
         * in the call below. It is NULL only during cascade update of child table. In that case we 
5469
 
         * cannot pass before_buf value from XTDDTableRef::modifyRow as the before_buf is the original 
5470
 
         * row for the parent (t1) table and it would be used to update any existing indexes
5471
 
         * in the child table which would be wrong of course.
5472
 
         *
5473
 
         * Alternative solution would be to copy the after_info in the XTDDTableRef::modifyRow():
5474
 
         * 
5475
 
         * ...
5476
 
         * if (!xt_tab_load_record(ot, ot->ot_curr_rec_id, &after_info))
5477
 
         *     goto failed_2;
5478
 
         * ...
5479
 
         *
5480
 
         * here the xt_tab_load_record() loads the original row, so we can copy it from there, but in 
5481
 
         * that case we'd need to allocate a new (possibly up to 65536 bytes long) buffer, which makes 
5482
 
         * the optimization questionable
5483
 
         *
5484
 
         */
5485
 
        if (ot->ot_curr_updated && before_buf)
5486
 
                /* This record has already been updated by this transaction.
5487
 
                 * Do the update in place!
5488
 
                 */
5489
 
                return tab_overwrite_record(ot, before_buf, after_buf);
5490
 
 
5491
 
        tab = ot->ot_table;
5492
 
        row_id = ot->ot_curr_row_id;
5493
 
        self = ot->ot_thread;
5494
 
 
5495
 
        /* A non-temporary table has been updated: */
5496
 
        if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5497
 
                self->st_non_temp_updated = TRUE;
5498
 
 
5499
 
        if (!myxt_store_row(ot, &rec_info, (char *) after_buf))
5500
 
                goto failed_0;
5501
 
 
5502
 
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = ot->ot_update_id;
5503
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5504
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
5505
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, self->st_xact_data->xd_start_xn_id);
5506
 
 
5507
 
        /* Create the new record: */
5508
 
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_UPDATE))
5509
 
                goto failed_0;
5510
 
 
5511
 
        /* Link the new variation into the list: */
5512
 
        XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
5513
 
 
5514
 
        if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
5515
 
                goto failed_1;
5516
 
 
5517
 
        if (curr_var_rec_id != ot->ot_curr_rec_id) {
5518
 
                /* If the transaction does not rollback, I will get an
5519
 
                 * exception here:
5520
 
                 */
5521
 
                if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
5522
 
                        goto failed_1;
5523
 
                /* [(4)] This is the situation when we overwrite the
5524
 
                 * reference to curr_var_rec_id!
5525
 
                 * When curr_var_rec_id is cleaned up by the sweeper, the
5526
 
                 * sweeper will notice that the record is no longer in
5527
 
                 * the row list.
5528
 
                 */
5529
 
        }
5530
 
 
5531
 
#ifdef TRACE_VARIATIONS
5532
 
        xt_ttracef(self, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) self->st_xact_data->xd_start_xn_id);
5533
 
#endif
5534
 
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5535
 
                goto failed_1;
5536
 
        XT_DISABLED_TRACE(("set upd tx=%d row=%d rec=%d\n", (int) self->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5537
 
 
5538
 
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
5539
 
 
5540
 
        /* Add the index references: */
5541
 
        for (idx_cnt=0, ind=tab->tab_dic.dic_keys; idx_cnt<tab->tab_dic.dic_key_count; idx_cnt++, ind++) {
5542
 
                if (!xt_idx_insert(ot, *ind, 0, rec_info.ri_rec_id, after_buf, before_buf, FALSE)) {
5543
 
                        ot->ot_err_index_no = (*ind)->mi_index_no;
5544
 
                        goto failed_2;
5545
 
                }
5546
 
        }
5547
 
 
5548
 
        if (ot->ot_table->tab_dic.dic_table->dt_trefs || ot->ot_table->tab_dic.dic_table->dt_fkeys.size() > 0) {
5549
 
                if (!ot->ot_table->tab_dic.dic_table->updateRow(ot, before_buf, after_buf))
5550
 
                        goto failed_2;
5551
 
        }
5552
 
 
5553
 
        self->st_statistics.st_row_update++;
5554
 
        return OK;
5555
 
 
5556
 
        failed_2:
5557
 
        tab_overwrite_record_on_fail(ot, &rec_info, before_buf, after_buf, idx_cnt);
5558
 
        goto failed_0;
5559
 
 
5560
 
        failed_1:
5561
 
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], self);
5562
 
 
5563
 
        failed_0:
5564
 
        return FAILED;
5565
 
}
5566
 
 
5567
 
xtPublic xtBool xt_tab_delete_record(XTOpenTablePtr ot, xtWord1 *rec_buf)
5568
 
{
5569
 
        register XTTableHPtr    tab = ot->ot_table;
5570
 
        register XTThreadPtr    thread = ot->ot_thread;
5571
 
        xtRowID                                 row_id = ot->ot_curr_row_id;
5572
 
        xtRecordID                              curr_var_rec_id;
5573
 
        XTTabRecInfoRec                 rec_info;
5574
 
 
5575
 
        /* A non-temporary table has been updated: */
5576
 
        if (!XT_IS_TEMP_TABLE(tab->tab_dic.dic_tab_flags))
5577
 
                thread->st_non_temp_updated = TRUE;
5578
 
 
5579
 
        /* Setup a delete record: */
5580
 
        rec_info.ri_fix_rec_buf = (XTTabRecFixDPtr) ot->ot_row_wbuffer;
5581
 
        rec_info.ri_rec_buf_size = offsetof(XTTabRecFixDRec, rf_data);
5582
 
        rec_info.ri_ext_rec = NULL;
5583
 
        rec_info.ri_fix_rec_buf->tr_rec_type_1 = XT_TAB_STATUS_DELETE;
5584
 
        rec_info.ri_fix_rec_buf->tr_stat_id_1 = 0;
5585
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_row_id_4, row_id);
5586
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_prev_rec_id_4, ot->ot_curr_rec_id);
5587
 
        XT_SET_DISK_4(rec_info.ri_fix_rec_buf->tr_xact_id_4, thread->st_xact_data->xd_start_xn_id);
5588
 
 
5589
 
        if (!tab_add_record(ot, &rec_info, XT_LOG_ENT_DELETE))
5590
 
                return FAILED;
5591
 
 
5592
 
        XT_TAB_ROW_WRITE_LOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
5593
 
 
5594
 
        if (!xt_tab_get_row(ot, row_id, &curr_var_rec_id))
5595
 
                goto failed_1;
5596
 
 
5597
 
        if (curr_var_rec_id != ot->ot_curr_rec_id) {
5598
 
                if (!tab_wait_for_rollback(ot, row_id, ot->ot_curr_rec_id))
5599
 
                        goto failed_1;          
5600
 
        }
5601
 
 
5602
 
#ifdef TRACE_VARIATIONS
5603
 
        xt_ttracef(thread, "update: row=%d rec=%d T%d\n", (int) row_id, (int) rec_info.ri_rec_id, (int) thread->st_xact_data->xd_start_xn_id);
5604
 
#endif
5605
 
        if (!xt_tab_set_row(ot, XT_LOG_ENT_ROW_ADD_REC, row_id, rec_info.ri_rec_id))
5606
 
                goto failed_1;
5607
 
        XT_DISABLED_TRACE(("del row tx=%d row=%d rec=%d\n", (int) thread->st_xact_data->xd_start_xn_id, (int) row_id, (int) rec_info.ri_rec_id));
5608
 
 
5609
 
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
5610
 
 
5611
 
        if (ot->ot_table->tab_dic.dic_table->dt_trefs) {
5612
 
                if (!ot->ot_table->tab_dic.dic_table->deleteRow(ot, rec_buf))
5613
 
                        goto failed_2;
5614
 
        }
5615
 
 
5616
 
        thread->st_statistics.st_row_delete++;
5617
 
        return OK;
5618
 
 
5619
 
        failed_2:
5620
 
        tab_overwrite_record_on_fail(ot, &rec_info, rec_buf, NULL, 0);
5621
 
        return FAILED;
5622
 
 
5623
 
        failed_1:
5624
 
        XT_TAB_ROW_UNLOCK(&tab->tab_row_rwlock[row_id % XT_ROW_RWLOCKS], thread);
5625
 
        return FAILED;
5626
 
}
5627
 
 
5628
 
xtPublic xtBool xt_tab_restrict_rows(XTBasicListPtr list, XTThreadPtr thread)
5629
 
{
5630
 
        u_int                           i;
5631
 
        XTRestrictItemPtr       item;
5632
 
        XTOpenTablePtr          pot = NULL;
5633
 
        XTDatabaseHPtr          db = thread->st_database;
5634
 
        xtBool                          ok = TRUE;
5635
 
 
5636
 
        for (i=0; i<list->bl_count; i++) {
5637
 
                item = (XTRestrictItemPtr) xt_bl_item_at(list, i);
5638
 
                if (item)
5639
 
                        if (pot) {
5640
 
                                if (pot->ot_table->tab_id == item->ri_tab_id)
5641
 
                                        goto check_action;
5642
 
                                xt_db_return_table_to_pool_ns(pot);
5643
 
                                pot = NULL;
5644
 
                        }
5645
 
 
5646
 
                        if (!xt_db_open_pool_table_ns(&pot, db, item->ri_tab_id)) {
5647
 
                                /* Should not happen, but just in case, we just don't
5648
 
                                 * remove the lock. We will probably end up with a deadlock
5649
 
                                 * somewhere.
5650
 
                                 */
5651
 
                                xt_log_and_clear_exception_ns();
5652
 
                                goto skip_check_action;
5653
 
                        }
5654
 
                        if (!pot)
5655
 
                                /* Can happen of the table has been dropped: */
5656
 
                                goto skip_check_action;
5657
 
 
5658
 
                        check_action:
5659
 
                        if (!pot->ot_table->tab_dic.dic_table->checkNoAction(pot, item->ri_rec_id)) {
5660
 
                                ok = FALSE;
5661
 
                                break;
5662
 
                        }
5663
 
                        skip_check_action:;
5664
 
        }
5665
 
 
5666
 
        if (pot)
5667
 
                xt_db_return_table_to_pool_ns(pot);
5668
 
        xt_bl_free(NULL, list);
5669
 
        return ok;
5670
 
}
5671
 
 
5672
 
 
5673
 
xtPublic xtBool xt_tab_seq_init(XTOpenTablePtr ot)
5674
 
{
5675
 
        register XTTableHPtr tab = ot->ot_table;
5676
 
        
5677
 
        ASSERT_NS(!ot->ot_seq_page);
5678
 
        ot->ot_seq_page = NULL;
5679
 
        ot->ot_seq_data = NULL;
5680
 
        ot->ot_on_page = FALSE;
5681
 
        ot->ot_seq_offset = 0;
5682
 
 
5683
 
        ot->ot_curr_rec_id = 0;                 // 0 is an invalid position!
5684
 
        ot->ot_curr_row_id = 0;                 // 0 is an invalid row ID!
5685
 
        ot->ot_curr_updated = FALSE;
5686
 
 
5687
 
        /* We note the current EOF before we start a sequential scan.
5688
 
         * It is basically possible to update the same record more than
5689
 
         * once because an updated record creates a new record which
5690
 
         * has a new position which may be in the area that is
5691
 
         * still to be scanned.
5692
 
         *
5693
 
         * By noting the EOF before we start a sequential scan we
5694
 
         * reduce the possibility of this.
5695
 
         *
5696
 
         * However, the possibility still remains, but it should
5697
 
         * not be a problem because a record is not modified
5698
 
         * if there is nothing to change, which is the case
5699
 
         * if the record has already been changed!
5700
 
         *
5701
 
         * NOTE (2008-01-29) There is no longer a problem with updating a
5702
 
         * record twice because records are marked by an update.
5703
 
         *
5704
 
         * [(10)] I have changed this (see below). I now check the
5705
 
         * current EOF of the table.
5706
 
         *
5707
 
         * The reason is that committed read must be able to see the
5708
 
         * changes that occur during table table scan.   * 
5709
 
         */
5710
 
        ot->ot_seq_eof_id = tab->tab_rec_eof_id;
5711
 
 
5712
 
        if (!ot->ot_thread->st_xact_data) {
5713
 
                /* MySQL ignores this error, so we
5714
 
                 * setup the sequential scan so that it will
5715
 
                 * deliver nothing!
5716
 
                 */
5717
 
                ot->ot_seq_rec_id = ot->ot_seq_eof_id;
5718
 
                xt_register_xterr(XT_REG_CONTEXT, XT_ERR_NO_TRANSACTION);
5719
 
                return FAILED;
5720
 
        }
5721
 
 
5722
 
        ot->ot_seq_rec_id = 1;
5723
 
        ot->ot_thread->st_statistics.st_scan_table++;
5724
 
        return OK;
5725
 
}
5726
 
 
5727
 
xtPublic void xt_tab_seq_reset(XTOpenTablePtr ot)
5728
 
{
5729
 
        ot->ot_seq_rec_id = 0;
5730
 
        ot->ot_seq_eof_id = 0;
5731
 
        ASSERT_NS(!ot->ot_seq_page);
5732
 
        ot->ot_seq_page = NULL;
5733
 
        ot->ot_seq_data = NULL;
5734
 
        ot->ot_on_page = FALSE;
5735
 
        ot->ot_seq_offset = 0;
5736
 
}
5737
 
 
5738
 
xtPublic void xt_tab_seq_exit(XTOpenTablePtr ot)
5739
 
{
5740
 
        register XTTableHPtr    tab = ot->ot_table;
5741
 
 
5742
 
        if (ot->ot_seq_page) {
5743
 
                tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5744
 
                ot->ot_seq_page = NULL;
5745
 
        }
5746
 
        if (ot->ot_seq_data) {
5747
 
                XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
5748
 
                ot->ot_seq_data = NULL;
5749
 
        }
5750
 
        ot->ot_on_page = FALSE;
5751
 
}
5752
 
 
5753
 
#ifdef XT_USE_ROW_REC_MMAP_FILES
5754
 
#define TAB_SEQ_LOAD_CACHE              FALSE
5755
 
#else
5756
 
#ifdef XT_SEQ_SCAN_LOADS_CACHE
5757
 
#define TAB_SEQ_LOAD_CACHE              TRUE
5758
 
#else
5759
 
#define TAB_SEQ_LOAD_CACHE              FALSE
5760
 
#endif
5761
 
#endif
5762
 
 
5763
 
xtPublic void xt_tab_seq_repeat(XTOpenTablePtr ot)
5764
 
{
5765
 
        ot->ot_seq_rec_id--;
5766
 
        ot->ot_seq_offset -= ot->ot_table->tab_dic.dic_rec_size;
5767
 
}
5768
 
 
5769
 
xtPublic xtBool xt_tab_seq_next(XTOpenTablePtr ot, xtWord1 *buffer, xtBool *eof)
5770
 
{
5771
 
        register XTTableHPtr    tab = ot->ot_table;
5772
 
        register size_t                 rec_size = tab->tab_dic.dic_rec_size;
5773
 
        xtWord1                                 *buff_ptr;
5774
 
        xtRecordID                              new_rec_id;
5775
 
        xtRecordID                              invalid_rec = 0;
5776
 
 
5777
 
        next_page:
5778
 
        if (!ot->ot_on_page) {
5779
 
                ASSERT_NS(!ot->ot_seq_page);
5780
 
                if (!(ot->ot_on_page = tab->tab_recs.xt_tc_get_page(ot->ot_rec_file, ot->ot_seq_rec_id, TAB_SEQ_LOAD_CACHE, &ot->ot_seq_page, &ot->ot_seq_offset, ot->ot_thread)))
5781
 
                        return FAILED;
5782
 
                if (!ot->ot_seq_page) {
5783
 
                        XT_LOCK_MEMORY_PTR(ot->ot_seq_data, ot->ot_rec_file, xt_rec_id_to_rec_offset(tab, ot->ot_seq_rec_id), tab->tab_rows.tci_page_size, &ot->ot_thread->st_statistics.st_rec, ot->ot_thread);
5784
 
                        if (!ot->ot_seq_data)
5785
 
                                return FAILED;
5786
 
                        ot->ot_on_page = TRUE;
5787
 
                        ot->ot_seq_offset = 0;
5788
 
                }
5789
 
        }
5790
 
 
5791
 
        next_record:
5792
 
        /* [(10)] The current EOF is used: */
5793
 
        if (ot->ot_seq_rec_id >= ot->ot_seq_eof_id) {
5794
 
                *eof = TRUE;
5795
 
                return OK;
5796
 
        }
5797
 
 
5798
 
        if (ot->ot_seq_offset >= tab->tab_recs.tci_page_size) {
5799
 
                if (ot->ot_seq_page) {
5800
 
                        tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5801
 
                        ot->ot_seq_page = NULL;
5802
 
                }
5803
 
                if (ot->ot_seq_data) {
5804
 
                        /* NULL here means that in the case of non-memory mapped
5805
 
                         * files we "keep" the lock.
5806
 
                         */
5807
 
                        XT_UNLOCK_MEMORY_PTR(ot->ot_rec_file, ot->ot_seq_data, ot->ot_thread);
5808
 
                        ot->ot_seq_data = NULL;
5809
 
                }
5810
 
                ot->ot_on_page = FALSE;
5811
 
                goto next_page;
5812
 
        }
5813
 
 
5814
 
        if (ot->ot_seq_page)
5815
 
                buff_ptr = ot->ot_seq_page->tcp_data + ot->ot_seq_offset;
5816
 
        else
5817
 
                buff_ptr = ot->ot_seq_data + ot->ot_seq_offset;
5818
 
 
5819
 
        /* This is the current record: */
5820
 
        ot->ot_curr_rec_id = ot->ot_seq_rec_id;
5821
 
        ot->ot_curr_row_id = 0;
5822
 
 
5823
 
        /* Move to the next record: */
5824
 
        ot->ot_seq_rec_id++;
5825
 
        ot->ot_seq_offset += rec_size;
5826
 
 
5827
 
        retry:
5828
 
        switch (tab_visible(ot, (XTTabRecHeadDPtr) buff_ptr, &new_rec_id)) {
5829
 
                case FALSE:
5830
 
                        goto next_record;
5831
 
                case XT_ERR:
5832
 
                        goto failed;
5833
 
                case XT_NEW:
5834
 
                        buff_ptr = ot->ot_row_rbuffer;
5835
 
                        if (!xt_tab_get_rec_data(ot, new_rec_id, rec_size, ot->ot_row_rbuffer))
5836
 
                                return XT_ERR;
5837
 
                        ot->ot_curr_rec_id = new_rec_id;
5838
 
                        break;
5839
 
                case XT_RETRY:
5840
 
                        goto retry;
5841
 
                case XT_REREAD:
5842
 
                        if (invalid_rec != ot->ot_curr_rec_id) {
5843
 
                                /* Don't re-read for the same record twice: */
5844
 
                                invalid_rec = ot->ot_curr_rec_id;
5845
 
 
5846
 
                                /* Undo move to next: */
5847
 
                                ot->ot_seq_rec_id--;
5848
 
                                ot->ot_seq_offset -= rec_size;
5849
 
                                
5850
 
                                /* Prepare to reread the page: */
5851
 
                                if (ot->ot_seq_page) {
5852
 
                                        tab->tab_recs.xt_tc_release_page(ot->ot_rec_file, ot->ot_seq_page, ot->ot_thread);
5853
 
                                        ot->ot_seq_page = NULL;
5854
 
                                }
5855
 
                                ot->ot_on_page = FALSE;
5856
 
                                goto next_page;
5857
 
                        }
5858
 
                        if (!tab_record_corrupt(ot, XT_GET_DISK_4(((XTTabRecHeadDPtr) buff_ptr)->tr_row_id_4), invalid_rec, true, 8))
5859
 
                                return XT_ERR;
5860
 
#ifdef XT_CRASH_DEBUG
5861
 
                        /* Should not happen! */
5862
 
                        xt_crash_me();
5863
 
#endif
5864
 
                        /* Continue, and skip the record... */
5865
 
                        invalid_rec = 0;
5866
 
                        goto next_record;
5867
 
                default:
5868
 
                        break;
5869
 
        }
5870
 
 
5871
 
        switch (*buff_ptr) {
5872
 
                case XT_TAB_STATUS_FIXED:
5873
 
                case XT_TAB_STATUS_FIX_CLEAN:
5874
 
                        memcpy(buffer, buff_ptr + XT_REC_FIX_HEADER_SIZE, rec_size - XT_REC_FIX_HEADER_SIZE);
5875
 
                        break;
5876
 
                case XT_TAB_STATUS_VARIABLE:
5877
 
                case XT_TAB_STATUS_VAR_CLEAN:
5878
 
                        if (!myxt_load_row(ot, buff_ptr + XT_REC_FIX_HEADER_SIZE, buffer, ot->ot_cols_req))
5879
 
                                goto failed_1;
5880
 
                        break;
5881
 
                case XT_TAB_STATUS_EXT_DLOG:
5882
 
                case XT_TAB_STATUS_EXT_CLEAN: {
5883
 
                        u_int cols_req = ot->ot_cols_req;
5884
 
 
5885
 
                        ASSERT_NS(cols_req);
5886
 
                        if (cols_req && cols_req <= tab->tab_dic.dic_fix_col_count) {
5887
 
                                if (!myxt_load_row(ot, buff_ptr + XT_REC_EXT_HEADER_SIZE, buffer, cols_req))
5888
 
                                        goto failed_1;
5889
 
                        }
5890
 
                        else {
5891
 
                                if (buff_ptr != ot->ot_row_rbuffer)
5892
 
                                        memcpy(ot->ot_row_rbuffer, buff_ptr, rec_size);
5893
 
                                if (!xt_tab_load_ext_data(ot, ot->ot_curr_rec_id, buffer, cols_req))
5894
 
                                        goto failed_1;
5895
 
                        }
5896
 
                        break;
5897
 
                }
5898
 
        }
5899
 
 
5900
 
        *eof = FALSE;
5901
 
        return OK;
5902
 
 
5903
 
        failed_1:
5904
 
 
5905
 
        failed:
5906
 
        return FAILED;
5907
 
}
5908
 
 
5909
 
/*
5910
 
 * -----------------------------------------------------------------------
5911
 
 * REPAIR TABLE
5912
 
 */
5913
 
 
5914
 
#define REP_FIND                0
5915
 
#define REP_ADD                 1
5916
 
#define REP_DEL                 2
5917
 
 
5918
 
static xtBool tab_exec_repair_pending(XTDatabaseHPtr db, int what, char *table_name)
5919
 
{
5920
 
        XTThreadPtr                     thread = xt_get_self();
5921
 
        char                            file_path[PATH_MAX];
5922
 
        XTOpenFilePtr           of = NULL;
5923
 
        int                                     len;
5924
 
        char                            *buffer = NULL, *ptr, *name;
5925
 
        char                            ch;
5926
 
        xtBool                          found = FALSE;
5927
 
 
5928
 
        xt_strcpy(PATH_MAX, file_path, db->db_main_path);
5929
 
        xt_add_pbxt_file(PATH_MAX, file_path, "repair-pending");
5930
 
        
5931
 
        if (what == REP_ADD) {
5932
 
                if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_CREATE | XT_FS_MAKE_PATH, 0))
5933
 
                        return FALSE;
5934
 
        }
5935
 
        else {
5936
 
                if (!xt_open_file_ns(&of, file_path, XT_FT_STANDARD, XT_FS_DEFAULT | XT_FS_MISSING_OK, 0))
5937
 
                        return FALSE;
5938
 
        }
5939
 
        if (!of)
5940
 
                return FALSE;
5941
 
 
5942
 
        len = (int) xt_seek_eof_file(NULL, of);
5943
 
        
5944
 
        if (!(buffer = (char *) xt_malloc_ns(len + 1)))
5945
 
                goto failed;
5946
 
 
5947
 
        if (!xt_pread_file(of, 0, len, len, buffer, NULL, &thread->st_statistics.st_x, thread))
5948
 
                goto failed;
5949
 
 
5950
 
        buffer[len] = 0;
5951
 
        ptr = buffer;
5952
 
        for(;;) {
5953
 
                name = ptr;
5954
 
                while (*ptr && *ptr != '\n' && *ptr != '\r')
5955
 
                        ptr++;
5956
 
                if (ptr > name) {
5957
 
                        ch = *ptr;
5958
 
                        *ptr = 0;
5959
 
                        if (xt_tab_compare_names(name, table_name) == 0) {
5960
 
                                *ptr = ch;
5961
 
                                found = TRUE;
5962
 
                                break;
5963
 
                        }       
5964
 
                        *ptr = ch;
5965
 
                }
5966
 
                if (!*ptr)
5967
 
                        break;
5968
 
                ptr++;
5969
 
        }
5970
 
 
5971
 
        switch (what) {
5972
 
                case REP_ADD:
5973
 
                        if (!found) {
5974
 
                                /* Remove any trailing empty lines: */
5975
 
                                while (len > 0) {
5976
 
                                        if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
5977
 
                                                break;
5978
 
                                        len--;
5979
 
                                }
5980
 
                                if (len > 0) {
5981
 
                                        if (!xt_pwrite_file(of, len, 1, (void *) "\n", &thread->st_statistics.st_x, thread))
5982
 
                                                goto failed;
5983
 
                                        len++;
5984
 
                                }
5985
 
                                if (!xt_pwrite_file(of, len, strlen(table_name), table_name, &thread->st_statistics.st_x, thread))
5986
 
                                        goto failed;
5987
 
                                len += strlen(table_name);
5988
 
                                if (!xt_set_eof_file(NULL, of, len))
5989
 
                                        goto failed;
5990
 
                        }
5991
 
                        break;
5992
 
                case REP_DEL:
5993
 
                        if (found) {
5994
 
                                if (*ptr != '\0')
5995
 
                                        ptr++;
5996
 
                                memmove(name, ptr, len - (ptr - buffer));
5997
 
                                len = len - (ptr - name);
5998
 
 
5999
 
                                /* Remove trailing empty lines: */
6000
 
                                while (len > 0) {
6001
 
                                        if (buffer[len-1] != '\n' && buffer[len-1] != '\r')
6002
 
                                                break;
6003
 
                                        len--;
6004
 
                                }
6005
 
 
6006
 
                                if (len > 0) {
6007
 
                                        if (!xt_pwrite_file(of, 0, len, buffer, &thread->st_statistics.st_x, thread))
6008
 
                                                goto failed;
6009
 
                                        if (!xt_set_eof_file(NULL, of, len))
6010
 
                                                goto failed;
6011
 
                                }
6012
 
                        }
6013
 
                        break;
6014
 
        }
6015
 
 
6016
 
        xt_close_file_ns(of);
6017
 
        xt_free_ns(buffer);
6018
 
 
6019
 
        if (len == 0)
6020
 
                xt_fs_delete(NULL, file_path);
6021
 
        return found;
6022
 
 
6023
 
        failed:
6024
 
        if (of)
6025
 
                xt_close_file_ns(of);
6026
 
        if (buffer)
6027
 
                xt_free_ns(buffer);
6028
 
        xt_log_and_clear_exception(thread);
6029
 
        return FALSE;
6030
 
}
6031
 
 
6032
 
xtPublic void xt_tab_make_table_name(XTPathStrPtr tab_path, char *table_name, size_t size)
6033
 
{
6034
 
        char    *nptr;
6035
 
 
6036
 
        nptr = xt_last_name_of_path(tab_path->ps_path);
6037
 
        if (xt_starts_with(nptr, "#sql")) {
6038
 
                /* {INVALID-OLD-TABLE-FIX}
6039
 
                 * Temporary files can have strange paths, for example
6040
 
                 * ..../var/tmp/mysqld.1/#sqldaec_1_6
6041
 
                 * This occurs, for example, occurs when the temp_table.test is
6042
 
                 * run using the PBXT suite in MariaDB:
6043
 
                 * ./mtr --suite=pbxt --do-test=temp_table
6044
 
                 *
6045
 
                 * Calling myxt_static_convert_file_name, with a '.', in the name
6046
 
                 * causes the error:
6047
 
                 * [ERROR] Invalid (old?) table or database name 'mysqld.1'
6048
 
                 * To prevent this, we do not convert the temporary
6049
 
                 * table names using the mysql functions.
6050
 
                 *
6051
 
                 * Note, this bug was found by Monty, and fixed by modifying
6052
 
                 * xt_2nd_last_name_of_path(), see {INVALID-OLD-TABLE-FIX}.
6053
 
                 *
6054
 
                 */
6055
 
                xt_2nd_last_name_of_path(size, table_name, tab_path->ps_path);
6056
 
                xt_strcat(size, table_name, ".");
6057
 
                xt_strcat(size, table_name, nptr);
6058
 
        }
6059
 
        else {
6060
 
                char    name_buf[XT_TABLE_NAME_SIZE*3+3];
6061
 
                char    *part_ptr;
6062
 
                size_t  len;
6063
 
 
6064
 
                xt_2nd_last_name_of_path(sizeof(name_buf), name_buf, tab_path->ps_path);
6065
 
                myxt_static_convert_file_name(name_buf, table_name, size);
6066
 
                xt_strcat(size, table_name, ".");
6067
 
                
6068
 
                /* Handle partition extensions to table names: */
6069
 
                if ((part_ptr = strstr(nptr, "#P#")))
6070
 
                        xt_strncpy(sizeof(name_buf), name_buf, nptr, part_ptr - nptr);
6071
 
                else
6072
 
                        xt_strcpy(sizeof(name_buf), name_buf, nptr);
6073
 
 
6074
 
                len = strlen(table_name);
6075
 
                myxt_static_convert_file_name(name_buf, table_name + len, size - len);
6076
 
 
6077
 
                if (part_ptr) {
6078
 
                        /* Add the partition extension (which is relevant to the engine). */
6079
 
                        char    *sub_part_ptr;
6080
 
 
6081
 
                        part_ptr += 3;
6082
 
                        if ((sub_part_ptr = strstr(part_ptr, "#SP#")))
6083
 
                                xt_strncpy(sizeof(name_buf), name_buf, part_ptr, sub_part_ptr - part_ptr);
6084
 
                        else
6085
 
                                xt_strcpy(sizeof(name_buf), name_buf, part_ptr);
6086
 
                        
6087
 
                        xt_strcat(size, table_name, " (");
6088
 
                        len = strlen(table_name);
6089
 
                        myxt_static_convert_file_name(name_buf, table_name + len, size - len);
6090
 
                        
6091
 
                        if (sub_part_ptr) {
6092
 
                        
6093
 
                                sub_part_ptr += 4;
6094
 
                                xt_strcat(size, table_name, " - ");
6095
 
                                len = strlen(table_name);
6096
 
                                myxt_static_convert_file_name(sub_part_ptr, table_name + len, size - len);
6097
 
                        }
6098
 
 
6099
 
                        xt_strcat(size, table_name, ")");
6100
 
                }
6101
 
        }
6102
 
}
6103
 
 
6104
 
xtPublic xtBool xt_tab_is_table_repair_pending(XTTableHPtr tab)
6105
 
{
6106
 
        char table_name[XT_TABLE_NAME_BUF_SIZE];
6107
 
 
6108
 
        xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
6109
 
        return tab_exec_repair_pending(tab->tab_db, REP_FIND, table_name);
6110
 
}
6111
 
 
6112
 
xtPublic void xt_tab_table_repaired(XTTableHPtr tab)
6113
 
{
6114
 
        if (tab->tab_repair_pending) {
6115
 
                char table_name[XT_TABLE_NAME_BUF_SIZE];
6116
 
 
6117
 
                tab->tab_repair_pending = FALSE;
6118
 
                xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
6119
 
                tab_exec_repair_pending(tab->tab_db, REP_DEL, table_name);
6120
 
        }
6121
 
}
6122
 
 
6123
 
xtPublic void xt_tab_set_table_repair_pending(XTTableHPtr tab)
6124
 
{
6125
 
        if (!tab->tab_repair_pending) {
6126
 
                char table_name[XT_TABLE_NAME_BUF_SIZE];
6127
 
 
6128
 
                tab->tab_repair_pending = TRUE;
6129
 
                xt_tab_make_table_name(tab->tab_name, table_name, sizeof(table_name));
6130
 
                tab_exec_repair_pending(tab->tab_db, REP_ADD, table_name);
6131
 
        }
6132
 
}
6133
 
 
6134
 
/*
6135
 
 * -----------------------------------------------------------------------
6136
 
 * EXTENDED DATA FOR RAM TABLES
6137
 
 */
6138
 
 
6139
 
xtPublic xtBool xt_tab_get_ext_slot(XTTableHPtr tab, xtLogID *log_id, xtLogOffset *log_offset, size_t XT_UNUSED(req_size))
6140
 
{
6141
 
        size_t new_slot;
6142
 
 
6143
 
        xt_spinlock_lock(&tab->tab_mem_lock);
6144
 
        if (tab->tab_mem_ind_free) {
6145
 
                new_slot = tab->tab_mem_ind_free - 1;
6146
 
                tab->tab_mem_ind_free = (size_t) tab->tab_mem_index[new_slot];
6147
 
        }
6148
 
        else {
6149
 
                if (tab->tab_mem_ind_usage == tab->tab_mem_ind_size) {
6150
 
                        /* Grow the index: */
6151
 
                        if (!xt_realloc_ns((void **) &tab->tab_mem_index, (tab->tab_mem_ind_size + 100) * sizeof(xtWord1 *)))
6152
 
                                return FAILED;
6153
 
                        tab->tab_mem_ind_size += 100;
6154
 
                }
6155
 
                new_slot = tab->tab_mem_ind_usage;
6156
 
                tab->tab_mem_ind_usage++;
6157
 
        }
6158
 
        xt_spinlock_unlock(&tab->tab_mem_lock);
6159
 
        tab->tab_mem_index[new_slot] = NULL;
6160
 
        *log_id = 1;
6161
 
        *log_offset = new_slot + 1;
6162
 
        return OK;
6163
 
}
6164
 
 
6165
 
xtPublic xtBool xt_tab_save_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
6166
 
{
6167
 
        size_t  slot = ((size_t) log_offset) - 1;
6168
 
        xtWord1 *rec_data;
6169
 
 
6170
 
        if (!(rec_data = (xtWord1 *) xt_malloc_ns(size)))
6171
 
                return FAILED;
6172
 
        memcpy(rec_data, data, size);
6173
 
        xt_spinlock_lock(&tab->tab_mem_lock);
6174
 
        tab->tab_mem_total += size;
6175
 
        tab->tab_mem_index[slot] = rec_data;
6176
 
        xt_spinlock_unlock(&tab->tab_mem_lock);
6177
 
        return OK;
6178
 
}
6179
 
 
6180
 
xtPublic void xt_tab_read_ext_record(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size, xtWord1 *data)
6181
 
{
6182
 
        size_t  slot = ((size_t) log_offset) - 1;
6183
 
 
6184
 
        if (slot < tab->tab_mem_ind_usage && tab->tab_mem_index[slot])
6185
 
                memcpy(data, tab->tab_mem_index[slot], size);
6186
 
        else
6187
 
                memset(data, 0, size);
6188
 
}
6189
 
 
6190
 
xtPublic void xt_tab_free_ext_slot(XTTableHPtr tab, xtLogID XT_UNUSED(log_id), xtLogOffset log_offset, size_t size)
6191
 
{
6192
 
        size_t  slot = ((size_t) log_offset) - 1;
6193
 
 
6194
 
        xt_spinlock_lock(&tab->tab_mem_lock);
6195
 
        if (tab->tab_mem_index[slot]) {
6196
 
                xt_free_ns(tab->tab_mem_index[slot]);
6197
 
                tab->tab_mem_total -= size;
6198
 
        }
6199
 
        tab->tab_mem_index[slot] = (xtWord1 *) tab->tab_mem_ind_free;
6200
 
        tab->tab_mem_ind_free = slot + 1;
6201
 
        xt_spinlock_unlock(&tab->tab_mem_lock);
6202
 
}
6203
 
 
6204
 
static void tab_free_ext_records(XTTableHPtr tab)
6205
 
{
6206
 
        size_t i, next;
6207
 
        
6208
 
        if (!tab->tab_mem_index)
6209
 
                return;
6210
 
 
6211
 
        i = tab->tab_mem_ind_free;
6212
 
        while (i) {
6213
 
                next = (size_t) tab->tab_mem_index[i-1];
6214
 
                tab->tab_mem_index[i-1] = NULL;
6215
 
                i = next;
6216
 
        }
6217
 
 
6218
 
        for (i=0; i<tab->tab_mem_ind_usage; i++) {
6219
 
                if (tab->tab_mem_index[i])
6220
 
                        xt_free_ns(tab->tab_mem_index[i]);
6221
 
        }
6222
 
        
6223
 
        xt_free_ns(tab->tab_mem_index);
6224
 
}
6225
 
 
6226
 
 
6227