1
/******************************************************
2
New index creation routines using a merge sort
4
(c) 2005,2007 Innobase Oy
6
Created 12/4/2005 Jan Lindstrom
7
Completed by Sunny Bains and Marko Makela
8
*******************************************************/
10
#include "row0merge.h"
16
#include "dict0dict.h"
18
#include "dict0boot.h"
19
#include "dict0crea.h"
20
#include "dict0load.h"
22
#include "mach0data.h"
27
#include "trx0purge.h"
31
#include "read0read.h"
33
#include "lock0lock.h"
34
#include "data0data.h"
35
#include "data0type.h"
37
#include "pars0pars.h"
41
#include "handler0alter.h"
44
/* Set these in order ot enable debug printout. */
45
static ibool row_merge_print_cmp;
46
static ibool row_merge_print_read;
47
static ibool row_merge_print_write;
48
#endif /* UNIV_DEBUG */
50
/* Block size for I/O operations in merge sort. The minimum is
51
UNIV_PAGE_SIZE, or page_get_free_space_of_empty() rounded to a power of 2.
53
When not creating a PRIMARY KEY that contains column prefixes, this
54
can be set as small as UNIV_PAGE_SIZE / 2. See the comment above
55
ut_ad(data_size < sizeof(row_merge_block_t)). */
57
typedef byte row_merge_block_t[1048576];
59
/* Secondary buffer for I/O operations of merge records. This buffer
60
is used for writing or reading a record that spans two row_merge_block_t.
61
Thus, it must be able to hold one merge record, whose maximum size is
62
the same as the minimum size of row_merge_block_t. */
64
typedef byte mrec_buf_t[UNIV_PAGE_SIZE];
66
/* Merge record in row_merge_block_t. The format is the same as a
67
record in ROW_FORMAT=COMPACT with the exception that the
68
REC_N_NEW_EXTRA_BYTES are omitted. */
71
/* Buffer for sorting in main memory. */
72
struct row_merge_buf_struct {
73
mem_heap_t* heap; /* memory heap where allocated */
74
dict_index_t* index; /* the index the tuples belong to */
75
ulint total_size; /* total amount of data bytes */
76
ulint n_tuples; /* number of data tuples */
77
ulint max_tuples; /* maximum number of data tuples */
78
const dfield_t**tuples; /* array of pointers to
79
arrays of fields that form
81
const dfield_t**tmp_tuples; /* temporary copy of tuples,
85
typedef struct row_merge_buf_struct row_merge_buf_t;
87
/* Information about temporary files used in merge sort are stored
90
struct merge_file_struct {
91
int fd; /* File descriptor */
92
ulint offset; /* File offset */
95
typedef struct merge_file_struct merge_file_t;
98
/**********************************************************
99
Display a merge tuple. */
102
row_merge_tuple_print(
103
/*==================*/
104
FILE* f, /* in: output stream */
105
const dfield_t* entry, /* in: tuple to print */
106
ulint n_fields)/* in: number of fields in the tuple */
110
for (j = 0; j < n_fields; j++) {
111
const dfield_t* field = &entry[j];
113
if (dfield_is_null(field)) {
114
fputs("\n NULL;", f);
116
ulint field_len = dfield_get_len(field);
117
ulint len = ut_min(field_len, 20);
118
if (dfield_is_ext(field)) {
123
ut_print_buf(f, dfield_get_data(field), len);
124
if (len != field_len) {
125
fprintf(f, " (total %lu bytes)", field_len);
131
#endif /* UNIV_DEBUG */
133
/**********************************************************
134
Allocate a sort buffer. */
137
row_merge_buf_create_low(
138
/*=====================*/
139
/* out,own: sort buffer */
140
mem_heap_t* heap, /* in: heap where allocated */
141
dict_index_t* index, /* in: secondary index */
142
ulint max_tuples, /* in: maximum number of data tuples */
143
ulint buf_size) /* in: size of the buffer, in bytes */
145
row_merge_buf_t* buf;
147
ut_ad(max_tuples > 0);
148
ut_ad(max_tuples <= sizeof(row_merge_block_t));
149
ut_ad(max_tuples < buf_size);
151
buf = mem_heap_zalloc(heap, buf_size);
154
buf->max_tuples = max_tuples;
155
buf->tuples = mem_heap_alloc(heap,
156
2 * max_tuples * sizeof *buf->tuples);
157
buf->tmp_tuples = buf->tuples + max_tuples;
162
/**********************************************************
163
Allocate a sort buffer. */
166
row_merge_buf_create(
167
/*=================*/
168
/* out,own: sort buffer */
169
dict_index_t* index) /* in: secondary index */
171
row_merge_buf_t* buf;
176
max_tuples = sizeof(row_merge_block_t)
177
/ ut_max(1, dict_index_get_min_size(index));
179
buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
181
heap = mem_heap_create(buf_size + sizeof(row_merge_block_t));
183
buf = row_merge_buf_create_low(heap, index, max_tuples, buf_size);
188
/**********************************************************
189
Empty a sort buffer. */
194
/* out: sort buffer */
195
row_merge_buf_t* buf) /* in,own: sort buffer */
198
ulint max_tuples = buf->max_tuples;
199
mem_heap_t* heap = buf->heap;
200
dict_index_t* index = buf->index;
202
buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
204
mem_heap_empty(heap);
206
return(row_merge_buf_create_low(heap, index, max_tuples, buf_size));
209
/**********************************************************
210
Deallocate a sort buffer. */
215
row_merge_buf_t* buf) /* in,own: sort buffer, to be freed */
217
mem_heap_free(buf->heap);
220
/**********************************************************
221
Insert a data tuple into a sort buffer. */
226
/* out: TRUE if added,
227
FALSE if out of space */
228
row_merge_buf_t* buf, /* in/out: sort buffer */
229
const dtuple_t* row, /* in: row in clustered index */
230
const row_ext_t* ext) /* in: cache of externally stored
231
column prefixes, or NULL */
237
const dict_index_t* index;
241
if (buf->n_tuples >= buf->max_tuples) {
245
UNIV_PREFETCH_R(row->fields);
249
n_fields = dict_index_get_n_fields(index);
251
entry = mem_heap_alloc(buf->heap, n_fields * sizeof *entry);
252
buf->tuples[buf->n_tuples] = entry;
256
extra_size = UT_BITS_IN_BYTES(index->n_nullable);
258
for (i = 0; i < n_fields; i++, field++) {
259
const dict_field_t* ifield;
260
const dict_col_t* col;
262
const dfield_t* row_field;
265
ifield = dict_index_get_nth_field(index, i);
267
col_no = dict_col_get_no(col);
268
row_field = dtuple_get_nth_field(row, col_no);
269
dfield_copy(field, row_field);
270
len = dfield_get_len(field);
272
if (dfield_is_null(field)) {
273
ut_ad(!(col->prtype & DATA_NOT_NULL));
275
} else if (UNIV_LIKELY(!ext)) {
276
} else if (dict_index_is_clust(index)) {
277
/* Flag externally stored fields. */
278
const byte* buf = row_ext_lookup(ext, col_no,
280
if (UNIV_LIKELY_NULL(buf)) {
281
ut_a(buf != field_ref_zero);
282
if (i < dict_index_get_n_unique(index)) {
283
dfield_set_data(field, buf, len);
285
dfield_set_ext(field);
286
len = dfield_get_len(field);
290
const byte* buf = row_ext_lookup(ext, col_no,
292
if (UNIV_LIKELY_NULL(buf)) {
293
ut_a(buf != field_ref_zero);
294
dfield_set_data(field, buf, len);
298
/* If a column prefix index, take only the prefix */
300
if (ifield->prefix_len) {
301
len = dtype_get_at_most_n_mbchars(
303
col->mbminlen, col->mbmaxlen,
305
len, dfield_get_data(field));
306
dfield_set_len(field, len);
309
ut_ad(len <= col->len || col->mtype == DATA_BLOB);
311
if (ifield->fixed_len) {
312
ut_ad(len == ifield->fixed_len);
313
ut_ad(!dfield_is_ext(field));
314
} else if (dfield_is_ext(field)) {
317
|| (col->len < 256 && col->mtype != DATA_BLOB)) {
320
/* For variable-length columns, we look up the
321
maximum length from the column itself. If this
322
is a prefix index column shorter than 256 bytes,
323
this will waste one byte. */
334
size = rec_get_converted_size_comp(index,
336
entry, n_fields, &extra);
338
ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
339
ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
341
#endif /* UNIV_DEBUG */
343
/* Add to the total size of the record in row_merge_block_t
344
the encoded length of extra_size and the extra bytes (extra_size).
345
See row_merge_buf_write() for the variable-length encoding
347
data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80);
349
/* The following assertion may fail if row_merge_block_t is
350
declared very small and a PRIMARY KEY is being created with
351
many prefix columns. In that case, the record may exceed the
352
page_zip_rec_needs_ext() limit. However, no further columns
353
will be moved to external storage until the record is inserted
354
to the clustered index B-tree. */
355
ut_ad(data_size < sizeof(row_merge_block_t));
357
/* Reserve one byte for the end marker of row_merge_block_t. */
358
if (buf->total_size + data_size >= sizeof(row_merge_block_t) - 1) {
362
buf->total_size += data_size;
367
/* Copy the data fields. */
370
dfield_dup(field++, buf->heap);
371
} while (--n_fields);
376
/* Structure for reporting duplicate records. */
377
struct row_merge_dup_struct {
378
const dict_index_t* index; /* index being sorted */
379
TABLE* table; /* MySQL table object */
380
ulint n_dup; /* number of duplicates */
383
typedef struct row_merge_dup_struct row_merge_dup_t;
385
/*****************************************************************
386
Report a duplicate key. */
389
row_merge_dup_report(
390
/*=================*/
391
row_merge_dup_t* dup, /* in/out: for reporting duplicates */
392
const dfield_t* entry) /* in: duplicate index entry */
395
const dtuple_t* tuple;
396
dtuple_t tuple_store;
398
const dict_index_t* index = dup->index;
399
ulint n_fields= dict_index_get_n_fields(index);
400
mem_heap_t* heap = NULL;
401
ulint offsets_[REC_OFFS_NORMAL_SIZE];
406
/* Only report the first duplicate record,
407
but count all duplicate records. */
411
rec_offs_init(offsets_);
413
/* Convert the tuple to a record and then to MySQL format. */
415
tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
416
n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
418
rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
419
offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
422
innobase_rec_to_mysql(dup->table, rec, index, offsets);
424
if (UNIV_LIKELY_NULL(heap)) {
429
/*****************************************************************
430
Compare two tuples. */
435
/* out: 1, 0, -1 if a is greater,
436
equal, less, respectively, than b */
437
ulint n_field,/* in: number of fields */
438
const dfield_t* a, /* in: first tuple to be compared */
439
const dfield_t* b, /* in: second tuple to be compared */
440
row_merge_dup_t* dup) /* in/out: for reporting duplicates */
443
const dfield_t* field = a;
446
cmp = cmp_dfield_dfield(a++, b++);
447
} while (!cmp && --n_field);
449
if (UNIV_UNLIKELY(!cmp) && UNIV_LIKELY_NULL(dup)) {
450
row_merge_dup_report(dup, field);
456
/**************************************************************************
457
Merge sort the tuple buffer in main memory. */
460
row_merge_tuple_sort(
461
/*=================*/
462
ulint n_field,/* in: number of fields */
463
row_merge_dup_t* dup, /* in/out: for reporting duplicates */
464
const dfield_t** tuples, /* in/out: tuples */
465
const dfield_t** aux, /* in/out: work area */
466
ulint low, /* in: lower bound of the
467
sorting area, inclusive */
468
ulint high) /* in: upper bound of the
469
sorting area, exclusive */
471
#define row_merge_tuple_sort_ctx(a,b,c,d) \
472
row_merge_tuple_sort(n_field, dup, a, b, c, d)
473
#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)
475
UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,
476
tuples, aux, low, high, row_merge_tuple_cmp_ctx);
479
/**********************************************************
485
row_merge_buf_t* buf, /* in/out: sort buffer */
486
row_merge_dup_t* dup) /* in/out: for reporting duplicates */
488
row_merge_tuple_sort(dict_index_get_n_unique(buf->index), dup,
489
buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);
492
/**********************************************************
493
Write a buffer to a block. */
498
const row_merge_buf_t* buf, /* in: sorted buffer */
500
const merge_file_t* of, /* in: output file */
501
#endif /* UNIV_DEBUG */
502
row_merge_block_t* block) /* out: buffer for writing to file */
504
# define row_merge_buf_write(buf, of, block) row_merge_buf_write(buf, block)
505
#endif /* !UNIV_DEBUG */
507
const dict_index_t* index = buf->index;
508
ulint n_fields= dict_index_get_n_fields(index);
509
byte* b = &(*block)[0];
513
for (i = 0; i < buf->n_tuples; i++) {
516
const dfield_t* entry = buf->tuples[i];
518
size = rec_get_converted_size_comp(index,
522
ut_ad(size > extra_size);
523
ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
524
extra_size -= REC_N_NEW_EXTRA_BYTES;
525
size -= REC_N_NEW_EXTRA_BYTES;
527
/* Encode extra_size + 1 */
528
if (extra_size + 1 < 0x80) {
529
*b++ = (byte) (extra_size + 1);
531
ut_ad((extra_size + 1) < 0x8000);
532
*b++ = (byte) (0x80 | ((extra_size + 1) >> 8));
533
*b++ = (byte) (extra_size + 1);
536
ut_ad(b + size < block[1]);
538
rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
545
if (row_merge_print_write) {
546
fprintf(stderr, "row_merge_buf_write %p,%d,%lu %lu",
547
(void*) b, of->fd, (ulong) of->offset,
549
row_merge_tuple_print(stderr, entry, n_fields);
551
#endif /* UNIV_DEBUG */
554
/* Write an "end-of-chunk" marker. */
556
ut_a(b == block[0] + buf->total_size);
558
#ifdef UNIV_DEBUG_VALGRIND
559
/* The rest of the block is uninitialized. Initialize it
560
to avoid bogus warnings. */
561
memset(b, 0xff, block[1] - b);
562
#endif /* UNIV_DEBUG_VALGRIND */
564
if (row_merge_print_write) {
565
fprintf(stderr, "row_merge_buf_write %p,%d,%lu EOF\n",
566
(void*) b, of->fd, (ulong) of->offset);
568
#endif /* UNIV_DEBUG */
571
/**********************************************************
572
Create a memory heap and allocate space for row_merge_rec_offsets(). */
575
row_merge_heap_create(
576
/*==================*/
577
/* out: memory heap */
578
const dict_index_t* index, /* in: record descriptor */
579
ulint** offsets1, /* out: offsets */
580
ulint** offsets2) /* out: offsets */
582
ulint i = 1 + REC_OFFS_HEADER_SIZE
583
+ dict_index_get_n_fields(index);
584
mem_heap_t* heap = mem_heap_create(2 * i * sizeof *offsets1);
586
*offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
587
*offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
589
(*offsets1)[0] = (*offsets2)[0] = i;
590
(*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
595
/**************************************************************************
596
Search an index object by name and column names. If several indexes match,
597
return the index with the max id. */
600
row_merge_dict_table_get_index(
601
/*===========================*/
602
/* out: matching index,
604
dict_table_t* table, /* in: table */
605
const merge_index_def_t*index_def) /* in: index definition */
609
const char** column_names;
611
column_names = mem_alloc(index_def->n_fields * sizeof *column_names);
613
for (i = 0; i < index_def->n_fields; ++i) {
614
column_names[i] = index_def->fields[i].field_name;
617
index = dict_table_get_index_by_max_id(
618
table, index_def->name, column_names, index_def->n_fields);
620
mem_free((void*) column_names);
625
/************************************************************************
626
Read a merge block from the file system. */
631
/* out: TRUE if request was
632
successful, FALSE if fail */
633
int fd, /* in: file descriptor */
634
ulint offset, /* in: offset where to read */
635
row_merge_block_t* buf) /* out: data */
637
ib_uint64_t ofs = ((ib_uint64_t) offset) * sizeof *buf;
640
success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
641
(ulint) (ofs & 0xFFFFFFFF),
644
if (UNIV_UNLIKELY(!success)) {
645
ut_print_timestamp(stderr);
647
" InnoDB: failed to read merge block at %"PRIu64"\n", ofs);
650
return(UNIV_LIKELY(success));
653
/************************************************************************
654
Read a merge block from the file system. */
659
/* out: TRUE if request was
660
successful, FALSE if fail */
661
int fd, /* in: file descriptor */
662
ulint offset, /* in: offset where to write */
663
const void* buf) /* in: data */
665
ib_uint64_t ofs = ((ib_uint64_t) offset)
666
* sizeof(row_merge_block_t);
668
return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
669
(ulint) (ofs & 0xFFFFFFFF),
671
sizeof(row_merge_block_t))));
674
/************************************************************************
675
Read a merge record. */
680
/* out: pointer to next record,
683
row_merge_block_t* block, /* in/out: file buffer */
684
mrec_buf_t* buf, /* in/out: secondary buffer */
685
const byte* b, /* in: pointer to record */
686
const dict_index_t* index, /* in: index of the record */
687
int fd, /* in: file descriptor */
688
ulint* foffs, /* in/out: file offset */
689
const mrec_t** mrec, /* out: pointer to merge record,
690
or NULL on end of list
691
(non-NULL on I/O error) */
692
ulint* offsets)/* out: offsets of mrec */
700
ut_ad(b >= block[0]);
707
ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE
708
+ dict_index_get_n_fields(index));
712
if (UNIV_UNLIKELY(!extra_size)) {
716
if (row_merge_print_read) {
717
fprintf(stderr, "row_merge_read %p,%p,%d,%lu EOF\n",
718
(const void*) b, (const void*) block,
721
#endif /* UNIV_DEBUG */
725
if (extra_size >= 0x80) {
726
/* Read another byte of extra_size. */
728
if (UNIV_UNLIKELY(b >= block[1])) {
729
if (!row_merge_read(fd, ++(*foffs), block)) {
731
/* Signal I/O error. */
736
/* Wrap around to the beginning of the buffer. */
740
extra_size = (extra_size & 0x7f) << 8;
744
/* Normalize extra_size. Above, value 0 signals "end of list". */
747
/* Read the extra bytes. */
749
if (UNIV_UNLIKELY(b + extra_size >= block[1])) {
750
/* The record spans two blocks. Copy the entire record
751
to the auxiliary buffer and handle this as a special
754
avail_size = block[1] - b;
756
memcpy(*buf, b, avail_size);
758
if (!row_merge_read(fd, ++(*foffs), block)) {
763
/* Wrap around to the beginning of the buffer. */
766
/* Copy the record. */
767
memcpy(*buf + avail_size, b, extra_size - avail_size);
768
b += extra_size - avail_size;
770
*mrec = *buf + extra_size;
772
rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
774
data_size = rec_offs_data_size(offsets);
776
/* These overflows should be impossible given that
777
records are much smaller than either buffer, and
778
the record starts near the beginning of each buffer. */
779
ut_a(extra_size + data_size < sizeof *buf);
780
ut_a(b + data_size < block[1]);
782
/* Copy the data bytes. */
783
memcpy(*buf + extra_size, b, data_size);
789
*mrec = b + extra_size;
791
rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
793
data_size = rec_offs_data_size(offsets);
794
ut_ad(extra_size + data_size < sizeof *buf);
796
b += extra_size + data_size;
798
if (UNIV_LIKELY(b < block[1])) {
799
/* The record fits entirely in the block.
800
This is the normal case. */
804
/* The record spans two blocks. Copy it to buf. */
806
b -= extra_size + data_size;
807
avail_size = block[1] - b;
808
memcpy(*buf, b, avail_size);
809
*mrec = *buf + extra_size;
810
rec_offs_make_valid(*mrec, index, offsets);
812
if (!row_merge_read(fd, ++(*foffs), block)) {
817
/* Wrap around to the beginning of the buffer. */
820
/* Copy the rest of the record. */
821
memcpy(*buf + avail_size, b, extra_size + data_size - avail_size);
822
b += extra_size + data_size - avail_size;
826
if (row_merge_print_read) {
827
fprintf(stderr, "row_merge_read %p,%p,%d,%lu ",
828
(const void*) b, (const void*) block,
830
rec_print_comp(stderr, *mrec, offsets);
833
#endif /* UNIV_DEBUG */
838
/************************************************************************
839
Write a merge record. */
842
row_merge_write_rec_low(
843
/*====================*/
844
byte* b, /* out: buffer */
845
ulint e, /* in: encoded extra_size */
847
ulint size, /* in: total size to write */
848
int fd, /* in: file descriptor */
849
ulint foffs, /* in: file offset */
850
#endif /* UNIV_DEBUG */
851
const mrec_t* mrec, /* in: record to write */
852
const ulint* offsets)/* in: offsets of mrec */
854
# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets) \
855
row_merge_write_rec_low(b, e, mrec, offsets)
856
#endif /* !UNIV_DEBUG */
859
const byte* const end = b + size;
860
ut_ad(e == rec_offs_extra_size(offsets) + 1);
862
if (row_merge_print_write) {
863
fprintf(stderr, "row_merge_write %p,%d,%lu ",
864
(void*) b, fd, (ulong) foffs);
865
rec_print_comp(stderr, mrec, offsets);
868
#endif /* UNIV_DEBUG */
873
*b++ = (byte) (0x80 | (e >> 8));
877
memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));
878
ut_ad(b + rec_offs_size(offsets) == end);
881
/************************************************************************
882
Write a merge record. */
887
/* out: pointer to end of block,
889
row_merge_block_t* block, /* in/out: file buffer */
890
mrec_buf_t* buf, /* in/out: secondary buffer */
891
byte* b, /* in: pointer to end of block */
892
int fd, /* in: file descriptor */
893
ulint* foffs, /* in/out: file offset */
894
const mrec_t* mrec, /* in: record to write */
895
const ulint* offsets)/* in: offsets of mrec */
903
ut_ad(b >= block[0]);
907
ut_ad(mrec < block[0] || mrec > block[1]);
908
ut_ad(mrec < buf[0] || mrec > buf[1]);
910
/* Normalize extra_size. Value 0 signals "end of list". */
911
extra_size = rec_offs_extra_size(offsets) + 1;
913
size = extra_size + (extra_size >= 0x80)
914
+ rec_offs_data_size(offsets);
916
if (UNIV_UNLIKELY(b + size >= block[1])) {
917
/* The record spans two blocks.
918
Copy it to the temporary buffer first. */
919
avail_size = block[1] - b;
921
row_merge_write_rec_low(buf[0],
922
extra_size, size, fd, *foffs,
925
/* Copy the head of the temporary buffer, write
926
the completed block, and copy the tail of the
927
record to the head of the new block. */
928
memcpy(b, buf[0], avail_size);
930
if (!row_merge_write(fd, (*foffs)++, block)) {
934
UNIV_MEM_INVALID(block[0], sizeof block[0]);
938
memcpy(b, buf[0] + avail_size, size - avail_size);
939
b += size - avail_size;
941
row_merge_write_rec_low(b, extra_size, size, fd, *foffs,
949
/************************************************************************
950
Write an end-of-list marker. */
955
/* out: pointer to end of block,
957
row_merge_block_t* block, /* in/out: file buffer */
958
byte* b, /* in: pointer to end of block */
959
int fd, /* in: file descriptor */
960
ulint* foffs) /* in/out: file offset */
963
ut_ad(b >= block[0]);
967
if (row_merge_print_write) {
968
fprintf(stderr, "row_merge_write %p,%p,%d,%lu EOF\n",
969
(void*) b, (void*) block, fd, (ulong) *foffs);
971
#endif /* UNIV_DEBUG */
974
UNIV_MEM_ASSERT_RW(block[0], b - block[0]);
975
UNIV_MEM_ASSERT_W(block[0], sizeof block[0]);
976
#ifdef UNIV_DEBUG_VALGRIND
977
/* The rest of the block is uninitialized. Initialize it
978
to avoid bogus warnings. */
979
memset(b, 0xff, block[1] - b);
980
#endif /* UNIV_DEBUG_VALGRIND */
982
if (!row_merge_write(fd, (*foffs)++, block)) {
986
UNIV_MEM_INVALID(block[0], sizeof block[0]);
990
/*****************************************************************
991
Compare two merge records. */
997
mrec1 is greater, equal, less,
998
respectively, than mrec2 */
999
const mrec_t* mrec1, /* in: first merge
1000
record to be compared */
1001
const mrec_t* mrec2, /* in: second merge
1002
record to be compared */
1003
const ulint* offsets1, /* in: first record offsets */
1004
const ulint* offsets2, /* in: second record offsets */
1005
const dict_index_t* index) /* in: index */
1009
cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);
1012
if (row_merge_print_cmp) {
1013
fputs("row_merge_cmp1 ", stderr);
1014
rec_print_comp(stderr, mrec1, offsets1);
1015
fputs("\nrow_merge_cmp2 ", stderr);
1016
rec_print_comp(stderr, mrec2, offsets2);
1017
fprintf(stderr, "\nrow_merge_cmp=%d\n", cmp);
1019
#endif /* UNIV_DEBUG */
1024
/************************************************************************
1025
Reads clustered index of the table and create temporary files
1026
containing the index entries for the indexes to be built. */
1029
row_merge_read_clustered_index(
1030
/*===========================*/
1031
/* out: DB_SUCCESS or error */
1032
trx_t* trx, /* in: transaction */
1033
TABLE* table, /* in/out: MySQL table object,
1034
for reporting erroneous records */
1035
const dict_table_t* old_table,/* in: table where rows are
1037
const dict_table_t* new_table,/* in: table where indexes are
1038
created; identical to old_table
1039
unless creating a PRIMARY KEY */
1040
dict_index_t** index, /* in: indexes to be created */
1041
merge_file_t* files, /* in: temporary files */
1042
ulint n_index,/* in: number of indexes to create */
1043
row_merge_block_t* block) /* in/out: file buffer */
1045
dict_index_t* clust_index; /* Clustered index */
1046
mem_heap_t* row_heap; /* Heap memory to create
1047
clustered index records */
1048
row_merge_buf_t** merge_buf; /* Temporary list for records*/
1049
btr_pcur_t pcur; /* Persistent cursor on the
1051
mtr_t mtr; /* Mini transaction */
1052
ulint err = DB_SUCCESS;/* Return code */
1054
ulint n_nonnull = 0; /* number of columns
1055
changed to NOT NULL */
1056
ulint* nonnull = NULL; /* NOT NULL columns */
1058
trx->op_info = "reading clustered index";
1066
/* Create and initialize memory for record buffers */
1068
merge_buf = mem_alloc(n_index * sizeof *merge_buf);
1070
for (i = 0; i < n_index; i++) {
1071
merge_buf[i] = row_merge_buf_create(index[i]);
1076
/* Find the clustered index and create a persistent cursor
1079
clust_index = dict_table_get_first_index(old_table);
1081
btr_pcur_open_at_index_side(
1082
TRUE, clust_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
1084
if (UNIV_UNLIKELY(old_table != new_table)) {
1085
ulint n_cols = dict_table_get_n_cols(old_table);
1087
/* A primary key will be created. Identify the
1088
columns that were flagged NOT NULL in the new table,
1089
so that we can quickly check that the records in the
1090
(old) clustered index do not violate the added NOT
1091
NULL constraints. */
1093
ut_a(n_cols == dict_table_get_n_cols(new_table));
1095
nonnull = mem_alloc(n_cols * sizeof *nonnull);
1097
for (i = 0; i < n_cols; i++) {
1098
if (dict_table_get_nth_col(old_table, i)->prtype
1104
if (dict_table_get_nth_col(new_table, i)->prtype
1107
nonnull[n_nonnull++] = i;
1117
row_heap = mem_heap_create(sizeof(mrec_buf_t));
1119
/* Scan the clustered index. */
1123
dtuple_t* row = NULL;
1125
ibool has_next = TRUE;
1127
btr_pcur_move_to_next_on_page(&pcur);
1129
/* When switching pages, commit the mini-transaction
1130
in order to release the latch on the old page. */
1132
if (btr_pcur_is_after_last_on_page(&pcur)) {
1133
btr_pcur_store_position(&pcur, &mtr);
1136
btr_pcur_restore_position(BTR_SEARCH_LEAF,
1138
has_next = btr_pcur_move_to_next_user_rec(&pcur, &mtr);
1141
if (UNIV_LIKELY(has_next)) {
1142
rec = btr_pcur_get_rec(&pcur);
1143
offsets = rec_get_offsets(rec, clust_index, NULL,
1144
ULINT_UNDEFINED, &row_heap);
1146
/* Skip delete marked records. */
1147
if (rec_get_deleted_flag(
1148
rec, dict_table_is_comp(old_table))) {
1152
srv_n_rows_inserted++;
1154
/* Build a row based on the clustered index. */
1156
row = row_build(ROW_COPY_POINTERS, clust_index,
1158
new_table, &ext, row_heap);
1160
if (UNIV_LIKELY_NULL(nonnull)) {
1161
for (i = 0; i < n_nonnull; i++) {
1163
= &row->fields[nonnull[i]];
1165
= dfield_get_type(field);
1167
ut_a(!(field_type->prtype
1170
if (dfield_is_null(field)) {
1171
err = DB_PRIMARY_KEY_IS_NULL;
1176
field_type->prtype |= DATA_NOT_NULL;
1181
/* Build all entries for all the indexes to be created
1182
in a single scan of the clustered index. */
1184
for (i = 0; i < n_index; i++) {
1185
row_merge_buf_t* buf = merge_buf[i];
1186
merge_file_t* file = &files[i];
1187
const dict_index_t* index = buf->index;
1190
(row && row_merge_buf_add(buf, row, ext))) {
1194
/* The buffer must be sufficiently large
1195
to hold at least one record. */
1196
ut_ad(buf->n_tuples || !has_next);
1198
/* We have enough data tuples to form a block.
1199
Sort them and write to disk. */
1201
if (buf->n_tuples) {
1202
if (dict_index_is_unique(index)) {
1203
row_merge_dup_t dup;
1204
dup.index = buf->index;
1208
row_merge_buf_sort(buf, &dup);
1211
err = DB_DUPLICATE_KEY;
1213
trx->error_key_num = i;
1217
row_merge_buf_sort(buf, NULL);
1221
row_merge_buf_write(buf, file, block);
1223
if (!row_merge_write(file->fd, file->offset++,
1225
err = DB_OUT_OF_FILE_SPACE;
1229
UNIV_MEM_INVALID(block[0], sizeof block[0]);
1230
merge_buf[i] = row_merge_buf_empty(buf);
1232
/* Try writing the record again, now that
1233
the buffer has been written out and emptied. */
1236
(row && !row_merge_buf_add(buf, row, ext))) {
1237
/* An empty buffer should have enough
1238
room for at least one record. */
1243
mem_heap_empty(row_heap);
1245
if (UNIV_UNLIKELY(!has_next)) {
1251
btr_pcur_close(&pcur);
1253
mem_heap_free(row_heap);
1255
if (UNIV_LIKELY_NULL(nonnull)) {
1259
for (i = 0; i < n_index; i++) {
1260
row_merge_buf_free(merge_buf[i]);
1263
mem_free(merge_buf);
1270
/*****************************************************************
1271
Merge two blocks of linked lists on disk and write a bigger block. */
1276
/* out: DB_SUCCESS or error code */
1277
const dict_index_t* index, /* in: index being created */
1278
merge_file_t* file, /* in/out: file containing
1280
row_merge_block_t* block, /* in/out: 3 buffers */
1281
ulint* foffs0, /* in/out: offset of first
1282
source list in the file */
1283
ulint* foffs1, /* in/out: offset of second
1284
source list in the file */
1285
merge_file_t* of, /* in/out: output file */
1286
TABLE* table) /* in/out: MySQL table, for
1287
reporting erroneous key value
1290
mem_heap_t* heap; /* memory heap for offsets0, offsets1 */
1292
mrec_buf_t buf[3]; /* buffer for handling split mrec in block[] */
1293
const byte* b0; /* pointer to block[0] */
1294
const byte* b1; /* pointer to block[1] */
1295
byte* b2; /* pointer to block[2] */
1296
const mrec_t* mrec0; /* merge rec, points to block[0] or buf[0] */
1297
const mrec_t* mrec1; /* merge rec, points to block[1] or buf[1] */
1298
ulint* offsets0;/* offsets of mrec0 */
1299
ulint* offsets1;/* offsets of mrec1 */
1301
heap = row_merge_heap_create(index, &offsets0, &offsets1);
1303
/* Write a record and read the next record. Split the output
1304
file in two halves, which can be merged on the following pass. */
1305
#define ROW_MERGE_WRITE_GET_NEXT(N, AT_END) \
1307
b2 = row_merge_write_rec(&block[2], &buf[2], b2, \
1308
of->fd, &of->offset, \
1309
mrec##N, offsets##N); \
1310
if (UNIV_UNLIKELY(!b2)) { \
1313
b##N = row_merge_read_rec(&block[N], &buf[N], \
1315
file->fd, foffs##N, \
1316
&mrec##N, offsets##N); \
1317
if (UNIV_UNLIKELY(!b##N)) { \
1325
if (!row_merge_read(file->fd, *foffs0, &block[0])
1326
|| !row_merge_read(file->fd, *foffs1, &block[1])) {
1328
mem_heap_free(heap);
1329
return(DB_CORRUPTION);
1336
b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
1337
foffs0, &mrec0, offsets0);
1338
b1 = row_merge_read_rec(&block[1], &buf[1], b1, index, file->fd,
1339
foffs1, &mrec1, offsets1);
1340
if (UNIV_UNLIKELY(!b0 && mrec0)
1341
|| UNIV_UNLIKELY(!b1 && mrec1)) {
1346
while (mrec0 && mrec1) {
1347
switch (row_merge_cmp(mrec0, mrec1,
1348
offsets0, offsets1, index)) {
1351
(dict_index_is_unique(index))) {
1352
innobase_rec_to_mysql(table, mrec0,
1354
mem_heap_free(heap);
1355
return(DB_DUPLICATE_KEY);
1359
ROW_MERGE_WRITE_GET_NEXT(0, goto merged);
1362
ROW_MERGE_WRITE_GET_NEXT(1, goto merged);
1372
/* append all mrec0 to output */
1374
ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
1379
/* append all mrec1 to output */
1381
ROW_MERGE_WRITE_GET_NEXT(1, goto done1);
1386
mem_heap_free(heap);
1387
b2 = row_merge_write_eof(&block[2], b2, of->fd, &of->offset);
1388
return(b2 ? DB_SUCCESS : DB_CORRUPTION);
1391
/*****************************************************************
1392
Merge disk files. */
1397
/* out: DB_SUCCESS or error code */
1398
const dict_index_t* index, /* in: index being created */
1399
merge_file_t* file, /* in/out: file containing
1401
ulint half, /* in: half the file */
1402
row_merge_block_t* block, /* in/out: 3 buffers */
1403
int* tmpfd, /* in/out: temporary file handle */
1404
TABLE* table) /* in/out: MySQL table, for
1405
reporting erroneous key value
1408
ulint foffs0; /* first input offset */
1409
ulint foffs1; /* second input offset */
1410
ulint error; /* error code */
1411
merge_file_t of; /* output file */
1413
UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
1419
/* Merge blocks to the output file. */
1423
for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {
1424
error = row_merge_blocks(index, file, block,
1425
&foffs0, &foffs1, &of, table);
1427
if (error != DB_SUCCESS) {
1432
/* Copy the last block, if there is one. */
1433
while (foffs0 < half) {
1434
if (!row_merge_read(file->fd, foffs0++, block)
1435
|| !row_merge_write(of.fd, of.offset++, block)) {
1436
return(DB_CORRUPTION);
1439
while (foffs1 < file->offset) {
1440
if (!row_merge_read(file->fd, foffs1++, block)
1441
|| !row_merge_write(of.fd, of.offset++, block)) {
1442
return(DB_CORRUPTION);
1446
/* Swap file descriptors for the next pass. */
1450
UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
1455
/*****************************************************************
1456
Merge disk files. */
1461
/* out: DB_SUCCESS or error code */
1462
const dict_index_t* index, /* in: index being created */
1463
merge_file_t* file, /* in/out: file containing
1465
row_merge_block_t* block, /* in/out: 3 buffers */
1466
int* tmpfd, /* in/out: temporary file handle */
1467
TABLE* table) /* in/out: MySQL table, for
1468
reporting erroneous key value
1471
ulint blksz; /* block size */
1473
for (blksz = 1; blksz < file->offset; blksz *= 2) {
1477
ut_ad(ut_is_2pow(blksz));
1478
half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);
1479
error = row_merge(index, file, half, block, tmpfd, table);
1481
if (error != DB_SUCCESS) {
1489
/*****************************************************************
1490
Copy externally stored columns to the data tuple. */
1493
row_merge_copy_blobs(
1494
/*=================*/
1495
const mrec_t* mrec, /* in: merge record */
1496
const ulint* offsets,/* in: offsets of mrec */
1497
ulint zip_size,/* in: compressed page size in bytes, or 0 */
1498
dtuple_t* tuple, /* in/out: data tuple */
1499
mem_heap_t* heap) /* in/out: memory heap */
1502
ulint n_fields = dtuple_get_n_fields(tuple);
1504
for (i = 0; i < n_fields; i++) {
1507
dfield_t* field = dtuple_get_nth_field(tuple, i);
1509
if (!dfield_is_ext(field)) {
1513
ut_ad(!dfield_is_null(field));
1515
/* The table is locked during index creation.
1516
Therefore, externally stored columns cannot possibly
1517
be freed between the time the BLOB pointers are read
1518
(row_merge_read_clustered_index()) and dereferenced
1520
data = btr_rec_copy_externally_stored_field(
1521
mrec, offsets, zip_size, i, &len, heap);
1523
dfield_set_data(field, data, len);
1527
/************************************************************************
1528
Read sorted file containing index data tuples and insert these data
1529
tuples to the index */
1532
row_merge_insert_index_tuples(
1533
/*==========================*/
1534
/* out: DB_SUCCESS or error number */
1535
trx_t* trx, /* in: transaction */
1536
dict_index_t* index, /* in: index */
1537
dict_table_t* table, /* in: new table */
1538
ulint zip_size,/* in: compressed page size of
1539
the old table, or 0 if uncompressed */
1540
int fd, /* in: file descriptor */
1541
row_merge_block_t* block) /* in/out: file buffer */
1547
mem_heap_t* tuple_heap;
1548
mem_heap_t* graph_heap;
1549
ulint error = DB_SUCCESS;
1557
/* We use the insert query graph as the dummy graph
1558
needed in the row module call */
1560
trx->op_info = "inserting index entries";
1562
graph_heap = mem_heap_create(500);
1563
node = ins_node_create(INS_DIRECT, table, graph_heap);
1565
thr = pars_complete_graph_for_exec(node, trx, graph_heap);
1567
que_thr_move_to_run_state_for_mysql(thr, trx);
1569
tuple_heap = mem_heap_create(1000);
1572
ulint i = 1 + REC_OFFS_HEADER_SIZE
1573
+ dict_index_get_n_fields(index);
1574
offsets = mem_heap_alloc(graph_heap, i * sizeof *offsets);
1576
offsets[1] = dict_index_get_n_fields(index);
1581
if (!row_merge_read(fd, foffs, block)) {
1582
error = DB_CORRUPTION;
1589
b = row_merge_read_rec(block, &buf, b, index,
1590
fd, &foffs, &mrec, offsets);
1591
if (UNIV_UNLIKELY(!b)) {
1592
/* End of list, or I/O error */
1594
error = DB_CORRUPTION;
1599
dtuple = row_rec_to_index_entry_low(
1600
mrec, index, offsets, &n_ext, tuple_heap);
1602
if (UNIV_UNLIKELY(n_ext)) {
1603
row_merge_copy_blobs(mrec, offsets, zip_size,
1604
dtuple, tuple_heap);
1608
node->table = table;
1609
node->trx_id = trx->id;
1611
ut_ad(dtuple_validate(dtuple));
1614
thr->run_node = thr;
1615
thr->prev_node = thr->common.parent;
1617
error = row_ins_index_entry(index, dtuple,
1620
if (UNIV_LIKELY(error == DB_SUCCESS)) {
1625
thr->lock_state = QUE_THR_LOCK_ROW;
1626
trx->error_state = error;
1627
que_thr_stop_for_mysql(thr);
1628
thr->lock_state = QUE_THR_LOCK_NOLOCK;
1629
} while (row_mysql_handle_errors(&error, trx,
1634
mem_heap_empty(tuple_heap);
1638
que_thr_stop_for_mysql_no_error(thr, trx);
1640
que_graph_free(thr->graph);
1644
mem_heap_free(tuple_heap);
1649
/*************************************************************************
1650
Sets an exclusive lock on a table, for the duration of creating indexes. */
1653
row_merge_lock_table(
1654
/*=================*/
1655
/* out: error code or DB_SUCCESS */
1656
trx_t* trx, /* in/out: transaction */
1657
dict_table_t* table, /* in: table to lock */
1658
enum lock_mode mode) /* in: LOCK_X or LOCK_S */
1666
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
1667
ut_ad(mode == LOCK_X || mode == LOCK_S);
1669
heap = mem_heap_create(512);
1671
trx->op_info = "setting table lock for creating or dropping index";
1673
node = sel_node_create(heap);
1674
thr = pars_complete_graph_for_exec(node, trx, heap);
1675
thr->graph->state = QUE_FORK_ACTIVE;
1677
/* We use the select query graph as the dummy graph needed
1678
in the lock module call */
1680
thr = que_fork_get_first_thr(que_node_get_parent(thr));
1681
que_thr_move_to_run_state_for_mysql(thr, trx);
1684
thr->run_node = thr;
1685
thr->prev_node = thr->common.parent;
1687
err = lock_table(0, table, mode, thr);
1689
trx->error_state = err;
1691
if (UNIV_LIKELY(err == DB_SUCCESS)) {
1692
que_thr_stop_for_mysql_no_error(thr, trx);
1694
que_thr_stop_for_mysql(thr);
1696
if (err != DB_QUE_THR_SUSPENDED) {
1697
ibool was_lock_wait;
1699
was_lock_wait = row_mysql_handle_errors(
1700
&err, trx, thr, NULL);
1702
if (was_lock_wait) {
1709
parent = que_node_get_parent(thr);
1710
run_thr = que_fork_start_command(parent);
1712
ut_a(run_thr == thr);
1714
/* There was a lock wait but the thread was not
1715
in a ready to run or running state. */
1716
trx->error_state = DB_LOCK_WAIT;
1722
que_graph_free(thr->graph);
1728
/*************************************************************************
1729
Drop an index from the InnoDB system tables. */
1732
row_merge_drop_index(
1733
/*=================*/
1734
dict_index_t* index, /* in: index to be removed */
1735
dict_table_t* table, /* in: table */
1736
trx_t* trx) /* in: transaction handle */
1739
ibool dict_lock = FALSE;
1740
pars_info_t* info = pars_info_create();
1742
/* We use the private SQL parser of Innobase to generate the
1743
query graphs needed in deleting the dictionary data from system
1744
tables in Innobase. Deleting a row from SYS_INDEXES table also
1745
frees the file segments of the B-tree associated with the index. */
1747
static const char str1[] =
1748
"PROCEDURE DROP_INDEX_PROC () IS\n"
1750
"DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
1751
"DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
1752
" AND TABLE_ID = :tableid;\n"
1755
ut_ad(index && table && trx);
1757
pars_info_add_dulint_literal(info, "indexid", index->id);
1758
pars_info_add_dulint_literal(info, "tableid", table->id);
1760
trx_start_if_not_started(trx);
1761
trx->op_info = "dropping index";
1763
if (trx->dict_operation_lock_mode == 0) {
1764
row_mysql_lock_data_dictionary(trx);
1768
err = que_eval_sql(info, str1, FALSE, trx);
1770
ut_a(err == DB_SUCCESS);
1772
/* Replace this index with another equivalent index for all
1773
foreign key constraints on this table where this index is used */
1775
dict_table_replace_index_in_foreign_list(table, index);
1776
dict_index_remove_from_cache(table, index);
1779
row_mysql_unlock_data_dictionary(trx);
1785
/*************************************************************************
1786
Drop those indexes which were created before an error occurred
1787
when building an index. */
1790
row_merge_drop_indexes(
1791
/*===================*/
1792
trx_t* trx, /* in: transaction */
1793
dict_table_t* table, /* in: table containing the indexes */
1794
dict_index_t** index, /* in: indexes to drop */
1795
ulint num_created) /* in: number of elements in index[] */
1799
for (key_num = 0; key_num < num_created; key_num++) {
1800
row_merge_drop_index(index[key_num], table, trx);
1804
/*************************************************************************
1805
Drop all partially created indexes during crash recovery. */
1808
row_merge_drop_temp_indexes(void)
1809
/*=============================*/
1814
/* We use the private SQL parser of Innobase to generate the
1815
query graphs needed in deleting the dictionary data from system
1816
tables in Innobase. Deleting a row from SYS_INDEXES table also
1817
frees the file segments of the B-tree associated with the index. */
1818
#if TEMP_INDEX_PREFIX != '\377'
1819
# error "TEMP_INDEX_PREFIX != '\377'"
1821
static const char drop_temp_indexes[] =
1822
"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
1824
"DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
1825
"WHERE SUBSTR(NAME,0,1)='\377' FOR UPDATE;\n"
1829
"\t\tFETCH c INTO indexid;\n"
1830
"\t\tIF (SQL % NOTFOUND) THEN\n"
1833
"\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
1834
"\t\tDELETE FROM SYS_INDEXES WHERE CURRENT OF c;\n"
1840
trx = trx_allocate_for_background();
1841
trx->op_info = "dropping partially created indexes";
1842
row_mysql_lock_data_dictionary(trx);
1844
err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx);
1845
ut_a(err == DB_SUCCESS);
1847
row_mysql_unlock_data_dictionary(trx);
1848
trx_free_for_background(trx);
1851
/*************************************************************************
1852
Create a merge file. */
1855
row_merge_file_create(
1856
/*==================*/
1857
merge_file_t* merge_file) /* out: merge file structure */
1859
merge_file->fd = innobase_mysql_tmpfile();
1860
merge_file->offset = 0;
1863
/*************************************************************************
1864
Destroy a merge file. */
1867
row_merge_file_destroy(
1868
/*===================*/
1869
merge_file_t* merge_file) /* out: merge file structure */
1871
if (merge_file->fd != -1) {
1872
close(merge_file->fd);
1873
merge_file->fd = -1;
1877
/*************************************************************************
1878
Determine the precise type of a column that is added to a tem
1879
if a column must be constrained NOT NULL. */
1882
row_merge_col_prtype(
1883
/*=================*/
1884
/* out: col->prtype, possibly
1885
ORed with DATA_NOT_NULL */
1886
const dict_col_t* col, /* in: column */
1887
const char* col_name, /* in: name of the column */
1888
const merge_index_def_t*index_def) /* in: the index definition
1889
of the primary key */
1891
ulint prtype = col->prtype;
1894
ut_ad(index_def->ind_type & DICT_CLUSTERED);
1896
if (prtype & DATA_NOT_NULL) {
1901
/* All columns that are included
1902
in the PRIMARY KEY must be NOT NULL. */
1904
for (i = 0; i < index_def->n_fields; i++) {
1905
if (!strcmp(col_name, index_def->fields[i].field_name)) {
1906
return(prtype | DATA_NOT_NULL);
1913
/*************************************************************************
1914
Create a temporary table for creating a primary key, using the definition
1915
of an existing table. */
1918
row_merge_create_temporary_table(
1919
/*=============================*/
1922
const char* table_name, /* in: new table name */
1923
const merge_index_def_t*index_def, /* in: the index definition
1924
of the primary key */
1925
const dict_table_t* table, /* in: old table definition */
1926
trx_t* trx) /* in/out: transaction
1927
(sets error_state) */
1930
dict_table_t* new_table = NULL;
1931
ulint n_cols = dict_table_get_n_user_cols(table);
1933
mem_heap_t* heap = mem_heap_create(1000);
1938
ut_ad(mutex_own(&dict_sys->mutex));
1940
new_table = dict_mem_table_create(table_name, 0, n_cols, table->flags);
1942
for (i = 0; i < n_cols; i++) {
1943
const dict_col_t* col;
1944
const char* col_name;
1946
col = dict_table_get_nth_col(table, i);
1947
col_name = dict_table_get_col_name(table, i);
1949
dict_mem_table_add_col(new_table, heap, col_name, col->mtype,
1950
row_merge_col_prtype(col, col_name,
1955
error = row_create_table_for_mysql(new_table, trx);
1956
mem_heap_free(heap);
1958
if (error != DB_SUCCESS) {
1959
trx->error_state = error;
1960
dict_mem_table_free(new_table);
1967
/*************************************************************************
1968
Rename the temporary indexes in the dictionary to permanent ones. */
1971
row_merge_rename_indexes(
1972
/*=====================*/
1973
/* out: DB_SUCCESS if all OK */
1974
trx_t* trx, /* in/out: transaction */
1975
dict_table_t* table) /* in/out: table with new indexes */
1977
ibool dict_lock = FALSE;
1978
ulint err = DB_SUCCESS;
1979
pars_info_t* info = pars_info_create();
1981
/* We use the private SQL parser of Innobase to generate the
1982
query graphs needed in renaming indexes. */
1984
#if TEMP_INDEX_PREFIX != '\377'
1985
# error "TEMP_INDEX_PREFIX != '\377'"
1988
static const char rename_indexes[] =
1989
"PROCEDURE RENAME_INDEXES_PROC () IS\n"
1991
"UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
1992
"WHERE TABLE_ID = :tableid AND SUBSTR(NAME,0,1)='\377';\n"
1995
ut_ad(table && trx);
1997
trx_start_if_not_started(trx);
1998
trx->op_info = "renaming indexes";
2000
pars_info_add_dulint_literal(info, "tableid", table->id);
2002
if (trx->dict_operation_lock_mode == 0) {
2003
row_mysql_lock_data_dictionary(trx);
2007
err = que_eval_sql(info, rename_indexes, FALSE, trx);
2009
if (err == DB_SUCCESS) {
2010
dict_index_t* index = dict_table_get_first_index(table);
2012
if (*index->name == TEMP_INDEX_PREFIX) {
2015
index = dict_table_get_next_index(index);
2020
row_mysql_unlock_data_dictionary(trx);
2028
/*************************************************************************
2029
Rename the tables in the data dictionary. */
2032
row_merge_rename_tables(
2033
/*====================*/
2034
/* out: error code or DB_SUCCESS */
2035
dict_table_t* old_table, /* in/out: old table, renamed to
2037
dict_table_t* new_table, /* in/out: new table, renamed to
2039
const char* tmp_name, /* in: new name for old_table */
2040
trx_t* trx) /* in: transaction handle */
2042
ulint err = DB_ERROR;
2044
const char* old_name= old_table->name;
2046
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
2047
ut_ad(old_table != new_table);
2048
ut_ad(mutex_own(&dict_sys->mutex));
2050
trx->op_info = "renaming tables";
2051
trx_start_if_not_started(trx);
2053
/* We use the private SQL parser of Innobase to generate the query
2054
graphs needed in updating the dictionary data in system tables. */
2056
info = pars_info_create();
2058
pars_info_add_str_literal(info, "new_name", new_table->name);
2059
pars_info_add_str_literal(info, "old_name", old_name);
2060
pars_info_add_str_literal(info, "tmp_name", tmp_name);
2062
err = que_eval_sql(info,
2063
"PROCEDURE RENAME_TABLES () IS\n"
2065
"UPDATE SYS_TABLES SET NAME = :tmp_name\n"
2066
" WHERE NAME = :old_name;\n"
2067
"UPDATE SYS_TABLES SET NAME = :old_name\n"
2068
" WHERE NAME = :new_name;\n"
2069
"END;\n", FALSE, trx);
2071
if (err != DB_SUCCESS) {
2076
/* The following calls will also rename the .ibd data files if
2077
the tables are stored in a single-table tablespace */
2079
if (!dict_table_rename_in_cache(old_table, tmp_name, FALSE)
2080
|| !dict_table_rename_in_cache(new_table, old_name, FALSE)) {
2086
err = dict_load_foreigns(old_name, TRUE);
2088
if (err != DB_SUCCESS) {
2090
trx->error_state = DB_SUCCESS;
2091
trx_general_rollback_for_mysql(trx, FALSE, NULL);
2092
trx->error_state = DB_SUCCESS;
2100
/*************************************************************************
2101
Create and execute a query graph for creating an index. */
2104
row_merge_create_index_graph(
2105
/*=========================*/
2106
/* out: DB_SUCCESS or error code */
2107
trx_t* trx, /* in: trx */
2108
dict_table_t* table, /* in: table */
2109
dict_index_t* index) /* in: index */
2111
ind_node_t* node; /* Index creation node */
2112
mem_heap_t* heap; /* Memory heap */
2113
que_thr_t* thr; /* Query thread */
2120
heap = mem_heap_create(512);
2122
index->table = table;
2123
node = ind_create_graph_create(index, heap);
2124
thr = pars_complete_graph_for_exec(node, trx, heap);
2126
ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
2128
que_run_threads(thr);
2130
err = trx->error_state;
2132
que_graph_free((que_t*) que_node_get_parent(thr));
2137
/*************************************************************************
2138
Create the index and load in to the dictionary. */
2141
row_merge_create_index(
2142
/*===================*/
2143
/* out: index, or NULL on error */
2144
trx_t* trx, /* in/out: trx (sets error_state) */
2145
dict_table_t* table, /* in: the index is on this table */
2146
const merge_index_def_t* /* in: the index definition */
2149
dict_index_t* index;
2151
ulint n_fields = index_def->n_fields;
2154
/* Create the index prototype, using the passed in def, this is not
2155
a persistent operation. We pass 0 as the space id, and determine at
2156
a lower level the space id where to store the table. */
2158
index = dict_mem_index_create(table->name, index_def->name,
2159
0, index_def->ind_type, n_fields);
2163
/* Create the index id, as it will be required when we build
2164
the index. We assign the id here because we want to write an
2165
UNDO record before we insert the entry into SYS_INDEXES. */
2166
ut_a(ut_dulint_is_zero(index->id));
2168
index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);
2169
index->table = table;
2171
for (i = 0; i < n_fields; i++) {
2172
merge_index_field_t* ifield = &index_def->fields[i];
2174
dict_mem_index_add_field(index, ifield->field_name,
2175
ifield->prefix_len);
2178
/* Add the index to SYS_INDEXES, this will use the prototype
2179
to create an entry in SYS_INDEXES. */
2180
err = row_merge_create_index_graph(trx, table, index);
2182
if (err == DB_SUCCESS) {
2184
index = row_merge_dict_table_get_index(
2189
#ifdef ROW_MERGE_IS_INDEX_USABLE
2190
/* Note the id of the transaction that created this
2191
index, we use it to restrict readers from accessing
2192
this index, to ensure read consistency. */
2193
index->trx_id = trx->id;
2194
#endif /* ROW_MERGE_IS_INDEX_USABLE */
2202
#ifdef ROW_MERGE_IS_INDEX_USABLE
2203
/*************************************************************************
2204
Check if a transaction can use an index. */
2207
row_merge_is_index_usable(
2208
/*======================*/
2209
const trx_t* trx, /* in: transaction */
2210
const dict_index_t* index) /* in: index to check */
2212
if (!trx->read_view) {
2216
return(ut_dulint_cmp(index->trx_id, trx->read_view->low_limit_id) < 0);
2218
#endif /* ROW_MERGE_IS_INDEX_USABLE */
2220
/*************************************************************************
2221
Drop the old table. */
2224
row_merge_drop_table(
2225
/*=================*/
2226
/* out: DB_SUCCESS or error code */
2227
trx_t* trx, /* in: transaction */
2228
dict_table_t* table) /* in: table to drop */
2230
ulint err = DB_SUCCESS;
2231
ibool dict_locked = FALSE;
2233
if (trx->dict_operation_lock_mode == 0) {
2234
row_mysql_lock_data_dictionary(trx);
2238
/* There must be no open transactions on the table. */
2239
ut_a(table->n_mysql_handles_opened == 0);
2241
err = row_drop_table_for_mysql_no_commit(table->name, trx, FALSE);
2244
row_mysql_unlock_data_dictionary(trx);
2250
/*************************************************************************
2251
Build indexes on a table by reading a clustered index,
2252
creating a temporary file containing index entries, merge sorting
2253
these index entries and inserting sorted index entries to indexes. */
2256
row_merge_build_indexes(
2257
/*====================*/
2258
/* out: DB_SUCCESS or error code */
2259
trx_t* trx, /* in: transaction */
2260
dict_table_t* old_table, /* in: table where rows are
2262
dict_table_t* new_table, /* in: table where indexes are
2263
created; identical to old_table
2264
unless creating a PRIMARY KEY */
2265
dict_index_t** indexes, /* in: indexes to be created */
2266
ulint n_indexes, /* in: size of indexes[] */
2267
TABLE* table) /* in/out: MySQL table, for
2268
reporting erroneous key value
2271
merge_file_t* merge_files;
2272
row_merge_block_t* block;
2284
trx_start_if_not_started(trx);
2286
/* Allocate memory for merge file data structure and initialize
2289
merge_files = mem_alloc(n_indexes * sizeof *merge_files);
2290
block_size = 3 * sizeof *block;
2291
block = os_mem_alloc_large(&block_size);
2293
for (i = 0; i < n_indexes; i++) {
2295
row_merge_file_create(&merge_files[i]);
2298
tmpfd = innobase_mysql_tmpfile();
2300
/* Reset the MySQL row buffer that is used when reporting
2302
innobase_rec_reset(table);
2304
/* Read clustered index of the table and create files for
2305
secondary index entries for merge sort */
2307
error = row_merge_read_clustered_index(
2308
trx, table, old_table, new_table, indexes,
2309
merge_files, n_indexes, block);
2311
if (error != DB_SUCCESS) {
2316
/* Now we have files containing index entries ready for
2317
sorting and inserting. */
2319
for (i = 0; i < n_indexes; i++) {
2320
error = row_merge_sort(indexes[i], &merge_files[i],
2321
block, &tmpfd, table);
2323
if (error == DB_SUCCESS) {
2324
error = row_merge_insert_index_tuples(
2325
trx, indexes[i], new_table,
2326
dict_table_zip_size(old_table),
2327
merge_files[i].fd, block);
2330
/* Close the temporary file to free up space. */
2331
row_merge_file_destroy(&merge_files[i]);
2333
if (error != DB_SUCCESS) {
2334
trx->error_key_num = i;
2342
for (i = 0; i < n_indexes; i++) {
2343
row_merge_file_destroy(&merge_files[i]);
2346
mem_free(merge_files);
2347
os_mem_free_large(block, block_size);