1
/*****************************************************************************
3
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/******************************************************
20
New index creation routines using a merge sort
22
Created 12/4/2005 Jan Lindstrom
23
Completed by Sunny Bains and Marko Makela
24
*******************************************************/
26
#include "row0merge.h"
32
#include "dict0dict.h"
34
#include "dict0boot.h"
35
#include "dict0crea.h"
36
#include "dict0load.h"
38
#include "mach0data.h"
43
#include "trx0purge.h"
47
#include "read0read.h"
49
#include "lock0lock.h"
50
#include "data0data.h"
51
#include "data0type.h"
53
#include "pars0pars.h"
57
#include "handler0alter.h"
60
/* Set these in order ot enable debug printout. */
61
static ibool row_merge_print_cmp;
62
static ibool row_merge_print_read;
63
static ibool row_merge_print_write;
64
#endif /* UNIV_DEBUG */
66
/* Block size for I/O operations in merge sort. The minimum is
67
UNIV_PAGE_SIZE, or page_get_free_space_of_empty() rounded to a power of 2.
69
When not creating a PRIMARY KEY that contains column prefixes, this
70
can be set as small as UNIV_PAGE_SIZE / 2. See the comment above
71
ut_ad(data_size < sizeof(row_merge_block_t)). */
73
typedef byte row_merge_block_t[1048576];
75
/* Secondary buffer for I/O operations of merge records. This buffer
76
is used for writing or reading a record that spans two row_merge_block_t.
77
Thus, it must be able to hold one merge record, whose maximum size is
78
the same as the minimum size of row_merge_block_t. */
80
typedef byte mrec_buf_t[UNIV_PAGE_SIZE];
82
/* Merge record in row_merge_block_t. The format is the same as a
83
record in ROW_FORMAT=COMPACT with the exception that the
84
REC_N_NEW_EXTRA_BYTES are omitted. */
87
/* Buffer for sorting in main memory. */
88
struct row_merge_buf_struct {
89
mem_heap_t* heap; /* memory heap where allocated */
90
dict_index_t* index; /* the index the tuples belong to */
91
ulint total_size; /* total amount of data bytes */
92
ulint n_tuples; /* number of data tuples */
93
ulint max_tuples; /* maximum number of data tuples */
94
const dfield_t**tuples; /* array of pointers to
95
arrays of fields that form
97
const dfield_t**tmp_tuples; /* temporary copy of tuples,
101
typedef struct row_merge_buf_struct row_merge_buf_t;
103
/* Information about temporary files used in merge sort are stored
106
struct merge_file_struct {
107
int fd; /* File descriptor */
108
ulint offset; /* File offset */
111
typedef struct merge_file_struct merge_file_t;
114
/**********************************************************
115
Display a merge tuple. */
118
row_merge_tuple_print(
119
/*==================*/
120
FILE* f, /* in: output stream */
121
const dfield_t* entry, /* in: tuple to print */
122
ulint n_fields)/* in: number of fields in the tuple */
126
for (j = 0; j < n_fields; j++) {
127
const dfield_t* field = &entry[j];
129
if (dfield_is_null(field)) {
130
fputs("\n NULL;", f);
132
ulint field_len = dfield_get_len(field);
133
ulint len = ut_min(field_len, 20);
134
if (dfield_is_ext(field)) {
139
ut_print_buf(f, dfield_get_data(field), len);
140
if (len != field_len) {
141
fprintf(f, " (total %lu bytes)", field_len);
147
#endif /* UNIV_DEBUG */
149
/**********************************************************
150
Allocate a sort buffer. */
153
row_merge_buf_create_low(
154
/*=====================*/
155
/* out,own: sort buffer */
156
mem_heap_t* heap, /* in: heap where allocated */
157
dict_index_t* index, /* in: secondary index */
158
ulint max_tuples, /* in: maximum number of data tuples */
159
ulint buf_size) /* in: size of the buffer, in bytes */
161
row_merge_buf_t* buf;
163
ut_ad(max_tuples > 0);
164
ut_ad(max_tuples <= sizeof(row_merge_block_t));
165
ut_ad(max_tuples < buf_size);
167
buf = mem_heap_zalloc(heap, buf_size);
170
buf->max_tuples = max_tuples;
171
buf->tuples = mem_heap_alloc(heap,
172
2 * max_tuples * sizeof *buf->tuples);
173
buf->tmp_tuples = buf->tuples + max_tuples;
178
/**********************************************************
179
Allocate a sort buffer. */
182
row_merge_buf_create(
183
/*=================*/
184
/* out,own: sort buffer */
185
dict_index_t* index) /* in: secondary index */
187
row_merge_buf_t* buf;
192
max_tuples = sizeof(row_merge_block_t)
193
/ ut_max(1, dict_index_get_min_size(index));
195
buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
197
heap = mem_heap_create(buf_size + sizeof(row_merge_block_t));
199
buf = row_merge_buf_create_low(heap, index, max_tuples, buf_size);
204
/**********************************************************
205
Empty a sort buffer. */
210
/* out: sort buffer */
211
row_merge_buf_t* buf) /* in,own: sort buffer */
214
ulint max_tuples = buf->max_tuples;
215
mem_heap_t* heap = buf->heap;
216
dict_index_t* index = buf->index;
218
buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
220
mem_heap_empty(heap);
222
return(row_merge_buf_create_low(heap, index, max_tuples, buf_size));
225
/**********************************************************
226
Deallocate a sort buffer. */
231
row_merge_buf_t* buf) /* in,own: sort buffer, to be freed */
233
mem_heap_free(buf->heap);
236
/**********************************************************
237
Insert a data tuple into a sort buffer. */
242
/* out: TRUE if added,
243
FALSE if out of space */
244
row_merge_buf_t* buf, /* in/out: sort buffer */
245
const dtuple_t* row, /* in: row in clustered index */
246
const row_ext_t* ext) /* in: cache of externally stored
247
column prefixes, or NULL */
253
const dict_index_t* index;
257
if (buf->n_tuples >= buf->max_tuples) {
261
UNIV_PREFETCH_R(row->fields);
265
n_fields = dict_index_get_n_fields(index);
267
entry = mem_heap_alloc(buf->heap, n_fields * sizeof *entry);
268
buf->tuples[buf->n_tuples] = entry;
272
extra_size = UT_BITS_IN_BYTES(index->n_nullable);
274
for (i = 0; i < n_fields; i++, field++) {
275
const dict_field_t* ifield;
276
const dict_col_t* col;
278
const dfield_t* row_field;
281
ifield = dict_index_get_nth_field(index, i);
283
col_no = dict_col_get_no(col);
284
row_field = dtuple_get_nth_field(row, col_no);
285
dfield_copy(field, row_field);
286
len = dfield_get_len(field);
288
if (dfield_is_null(field)) {
289
ut_ad(!(col->prtype & DATA_NOT_NULL));
291
} else if (UNIV_LIKELY(!ext)) {
292
} else if (dict_index_is_clust(index)) {
293
/* Flag externally stored fields. */
294
const byte* buf = row_ext_lookup(ext, col_no,
296
if (UNIV_LIKELY_NULL(buf)) {
297
ut_a(buf != field_ref_zero);
298
if (i < dict_index_get_n_unique(index)) {
299
dfield_set_data(field, buf, len);
301
dfield_set_ext(field);
302
len = dfield_get_len(field);
306
const byte* buf = row_ext_lookup(ext, col_no,
308
if (UNIV_LIKELY_NULL(buf)) {
309
ut_a(buf != field_ref_zero);
310
dfield_set_data(field, buf, len);
314
/* If a column prefix index, take only the prefix */
316
if (ifield->prefix_len) {
317
len = dtype_get_at_most_n_mbchars(
319
col->mbminlen, col->mbmaxlen,
321
len, dfield_get_data(field));
322
dfield_set_len(field, len);
325
ut_ad(len <= col->len || col->mtype == DATA_BLOB);
327
if (ifield->fixed_len) {
328
ut_ad(len == ifield->fixed_len);
329
ut_ad(!dfield_is_ext(field));
330
} else if (dfield_is_ext(field)) {
333
|| (col->len < 256 && col->mtype != DATA_BLOB)) {
336
/* For variable-length columns, we look up the
337
maximum length from the column itself. If this
338
is a prefix index column shorter than 256 bytes,
339
this will waste one byte. */
350
size = rec_get_converted_size_comp(index,
352
entry, n_fields, &extra);
354
ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
355
ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
357
#endif /* UNIV_DEBUG */
359
/* Add to the total size of the record in row_merge_block_t
360
the encoded length of extra_size and the extra bytes (extra_size).
361
See row_merge_buf_write() for the variable-length encoding
363
data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80);
365
/* The following assertion may fail if row_merge_block_t is
366
declared very small and a PRIMARY KEY is being created with
367
many prefix columns. In that case, the record may exceed the
368
page_zip_rec_needs_ext() limit. However, no further columns
369
will be moved to external storage until the record is inserted
370
to the clustered index B-tree. */
371
ut_ad(data_size < sizeof(row_merge_block_t));
373
/* Reserve one byte for the end marker of row_merge_block_t. */
374
if (buf->total_size + data_size >= sizeof(row_merge_block_t) - 1) {
378
buf->total_size += data_size;
383
/* Copy the data fields. */
386
dfield_dup(field++, buf->heap);
387
} while (--n_fields);
392
/* Structure for reporting duplicate records. */
393
struct row_merge_dup_struct {
394
const dict_index_t* index; /* index being sorted */
395
TABLE* table; /* MySQL table object */
396
ulint n_dup; /* number of duplicates */
399
typedef struct row_merge_dup_struct row_merge_dup_t;
401
/*****************************************************************
402
Report a duplicate key. */
405
row_merge_dup_report(
406
/*=================*/
407
row_merge_dup_t* dup, /* in/out: for reporting duplicates */
408
const dfield_t* entry) /* in: duplicate index entry */
411
const dtuple_t* tuple;
412
dtuple_t tuple_store;
414
const dict_index_t* index = dup->index;
415
ulint n_fields= dict_index_get_n_fields(index);
416
mem_heap_t* heap = NULL;
417
ulint offsets_[REC_OFFS_NORMAL_SIZE];
422
/* Only report the first duplicate record,
423
but count all duplicate records. */
427
rec_offs_init(offsets_);
429
/* Convert the tuple to a record and then to MySQL format. */
431
tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
432
n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
434
rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
435
offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
438
innobase_rec_to_mysql(dup->table, rec, index, offsets);
440
if (UNIV_LIKELY_NULL(heap)) {
445
/*****************************************************************
446
Compare two tuples. */
451
/* out: 1, 0, -1 if a is greater,
452
equal, less, respectively, than b */
453
ulint n_field,/* in: number of fields */
454
const dfield_t* a, /* in: first tuple to be compared */
455
const dfield_t* b, /* in: second tuple to be compared */
456
row_merge_dup_t* dup) /* in/out: for reporting duplicates */
459
const dfield_t* field = a;
461
/* Compare the fields of the tuples until a difference is
462
found or we run out of fields to compare. If !cmp at the
463
end, the tuples are equal. */
465
cmp = cmp_dfield_dfield(a++, b++);
466
} while (!cmp && --n_field);
468
if (UNIV_UNLIKELY(!cmp) && UNIV_LIKELY_NULL(dup)) {
469
/* Report a duplicate value error if the tuples are
470
logically equal. NULL columns are logically inequal,
471
although they are equal in the sorting order. Find
472
out if any of the fields are NULL. */
473
for (b = field; b != a; b++) {
474
if (dfield_is_null(b)) {
480
row_merge_dup_report(dup, field);
487
/**************************************************************************
488
Merge sort the tuple buffer in main memory. */
491
row_merge_tuple_sort(
492
/*=================*/
493
ulint n_field,/* in: number of fields */
494
row_merge_dup_t* dup, /* in/out: for reporting duplicates */
495
const dfield_t** tuples, /* in/out: tuples */
496
const dfield_t** aux, /* in/out: work area */
497
ulint low, /* in: lower bound of the
498
sorting area, inclusive */
499
ulint high) /* in: upper bound of the
500
sorting area, exclusive */
502
#define row_merge_tuple_sort_ctx(a,b,c,d) \
503
row_merge_tuple_sort(n_field, dup, a, b, c, d)
504
#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)
506
UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,
507
tuples, aux, low, high, row_merge_tuple_cmp_ctx);
510
/**********************************************************
516
row_merge_buf_t* buf, /* in/out: sort buffer */
517
row_merge_dup_t* dup) /* in/out: for reporting duplicates */
519
row_merge_tuple_sort(dict_index_get_n_unique(buf->index), dup,
520
buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);
523
/**********************************************************
524
Write a buffer to a block. */
529
const row_merge_buf_t* buf, /* in: sorted buffer */
531
const merge_file_t* of, /* in: output file */
532
#endif /* UNIV_DEBUG */
533
row_merge_block_t* block) /* out: buffer for writing to file */
535
# define row_merge_buf_write(buf, of, block) row_merge_buf_write(buf, block)
536
#endif /* !UNIV_DEBUG */
538
const dict_index_t* index = buf->index;
539
ulint n_fields= dict_index_get_n_fields(index);
540
byte* b = &(*block)[0];
544
for (i = 0; i < buf->n_tuples; i++) {
547
const dfield_t* entry = buf->tuples[i];
549
size = rec_get_converted_size_comp(index,
553
ut_ad(size > extra_size);
554
ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
555
extra_size -= REC_N_NEW_EXTRA_BYTES;
556
size -= REC_N_NEW_EXTRA_BYTES;
558
/* Encode extra_size + 1 */
559
if (extra_size + 1 < 0x80) {
560
*b++ = (byte) (extra_size + 1);
562
ut_ad((extra_size + 1) < 0x8000);
563
*b++ = (byte) (0x80 | ((extra_size + 1) >> 8));
564
*b++ = (byte) (extra_size + 1);
567
ut_ad(b + size < block[1]);
569
rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
576
if (row_merge_print_write) {
577
fprintf(stderr, "row_merge_buf_write %p,%d,%lu %lu",
578
(void*) b, of->fd, (ulong) of->offset,
580
row_merge_tuple_print(stderr, entry, n_fields);
582
#endif /* UNIV_DEBUG */
585
/* Write an "end-of-chunk" marker. */
587
ut_a(b == block[0] + buf->total_size);
589
#ifdef UNIV_DEBUG_VALGRIND
590
/* The rest of the block is uninitialized. Initialize it
591
to avoid bogus warnings. */
592
memset(b, 0xff, block[1] - b);
593
#endif /* UNIV_DEBUG_VALGRIND */
595
if (row_merge_print_write) {
596
fprintf(stderr, "row_merge_buf_write %p,%d,%lu EOF\n",
597
(void*) b, of->fd, (ulong) of->offset);
599
#endif /* UNIV_DEBUG */
602
/**********************************************************
603
Create a memory heap and allocate space for row_merge_rec_offsets(). */
606
row_merge_heap_create(
607
/*==================*/
608
/* out: memory heap */
609
const dict_index_t* index, /* in: record descriptor */
610
ulint** offsets1, /* out: offsets */
611
ulint** offsets2) /* out: offsets */
613
ulint i = 1 + REC_OFFS_HEADER_SIZE
614
+ dict_index_get_n_fields(index);
615
mem_heap_t* heap = mem_heap_create(2 * i * sizeof *offsets1);
617
*offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
618
*offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
620
(*offsets1)[0] = (*offsets2)[0] = i;
621
(*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
626
/**************************************************************************
627
Search an index object by name and column names. If several indexes match,
628
return the index with the max id. */
631
row_merge_dict_table_get_index(
632
/*===========================*/
633
/* out: matching index,
635
dict_table_t* table, /* in: table */
636
const merge_index_def_t*index_def) /* in: index definition */
640
const char** column_names;
642
column_names = mem_alloc(index_def->n_fields * sizeof *column_names);
644
for (i = 0; i < index_def->n_fields; ++i) {
645
column_names[i] = index_def->fields[i].field_name;
648
index = dict_table_get_index_by_max_id(
649
table, index_def->name, column_names, index_def->n_fields);
651
mem_free((void*) column_names);
656
/************************************************************************
657
Read a merge block from the file system. */
662
/* out: TRUE if request was
663
successful, FALSE if fail */
664
int fd, /* in: file descriptor */
665
ulint offset, /* in: offset where to read */
666
row_merge_block_t* buf) /* out: data */
668
ib_uint64_t ofs = ((ib_uint64_t) offset) * sizeof *buf;
671
success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
672
(ulint) (ofs & 0xFFFFFFFF),
675
if (UNIV_UNLIKELY(!success)) {
676
ut_print_timestamp(stderr);
678
" InnoDB: failed to read merge block at %"PRIu64"\n", ofs);
681
return(UNIV_LIKELY(success));
684
/************************************************************************
685
Read a merge block from the file system. */
690
/* out: TRUE if request was
691
successful, FALSE if fail */
692
int fd, /* in: file descriptor */
693
ulint offset, /* in: offset where to write */
694
const void* buf) /* in: data */
696
ib_uint64_t ofs = ((ib_uint64_t) offset)
697
* sizeof(row_merge_block_t);
699
return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
700
(ulint) (ofs & 0xFFFFFFFF),
702
sizeof(row_merge_block_t))));
705
/************************************************************************
706
Read a merge record. */
711
/* out: pointer to next record,
714
row_merge_block_t* block, /* in/out: file buffer */
715
mrec_buf_t* buf, /* in/out: secondary buffer */
716
const byte* b, /* in: pointer to record */
717
const dict_index_t* index, /* in: index of the record */
718
int fd, /* in: file descriptor */
719
ulint* foffs, /* in/out: file offset */
720
const mrec_t** mrec, /* out: pointer to merge record,
721
or NULL on end of list
722
(non-NULL on I/O error) */
723
ulint* offsets)/* out: offsets of mrec */
731
ut_ad(b >= block[0]);
738
ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE
739
+ dict_index_get_n_fields(index));
743
if (UNIV_UNLIKELY(!extra_size)) {
747
if (row_merge_print_read) {
748
fprintf(stderr, "row_merge_read %p,%p,%d,%lu EOF\n",
749
(const void*) b, (const void*) block,
752
#endif /* UNIV_DEBUG */
756
if (extra_size >= 0x80) {
757
/* Read another byte of extra_size. */
759
if (UNIV_UNLIKELY(b >= block[1])) {
760
if (!row_merge_read(fd, ++(*foffs), block)) {
762
/* Signal I/O error. */
767
/* Wrap around to the beginning of the buffer. */
771
extra_size = (extra_size & 0x7f) << 8;
775
/* Normalize extra_size. Above, value 0 signals "end of list". */
778
/* Read the extra bytes. */
780
if (UNIV_UNLIKELY(b + extra_size >= block[1])) {
781
/* The record spans two blocks. Copy the entire record
782
to the auxiliary buffer and handle this as a special
785
avail_size = block[1] - b;
787
memcpy(*buf, b, avail_size);
789
if (!row_merge_read(fd, ++(*foffs), block)) {
794
/* Wrap around to the beginning of the buffer. */
797
/* Copy the record. */
798
memcpy(*buf + avail_size, b, extra_size - avail_size);
799
b += extra_size - avail_size;
801
*mrec = *buf + extra_size;
803
rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
805
data_size = rec_offs_data_size(offsets);
807
/* These overflows should be impossible given that
808
records are much smaller than either buffer, and
809
the record starts near the beginning of each buffer. */
810
ut_a(extra_size + data_size < sizeof *buf);
811
ut_a(b + data_size < block[1]);
813
/* Copy the data bytes. */
814
memcpy(*buf + extra_size, b, data_size);
820
*mrec = b + extra_size;
822
rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
824
data_size = rec_offs_data_size(offsets);
825
ut_ad(extra_size + data_size < sizeof *buf);
827
b += extra_size + data_size;
829
if (UNIV_LIKELY(b < block[1])) {
830
/* The record fits entirely in the block.
831
This is the normal case. */
835
/* The record spans two blocks. Copy it to buf. */
837
b -= extra_size + data_size;
838
avail_size = block[1] - b;
839
memcpy(*buf, b, avail_size);
840
*mrec = *buf + extra_size;
841
rec_offs_make_valid(*mrec, index, offsets);
843
if (!row_merge_read(fd, ++(*foffs), block)) {
848
/* Wrap around to the beginning of the buffer. */
851
/* Copy the rest of the record. */
852
memcpy(*buf + avail_size, b, extra_size + data_size - avail_size);
853
b += extra_size + data_size - avail_size;
857
if (row_merge_print_read) {
858
fprintf(stderr, "row_merge_read %p,%p,%d,%lu ",
859
(const void*) b, (const void*) block,
861
rec_print_comp(stderr, *mrec, offsets);
864
#endif /* UNIV_DEBUG */
869
/************************************************************************
870
Write a merge record. */
873
row_merge_write_rec_low(
874
/*====================*/
875
byte* b, /* out: buffer */
876
ulint e, /* in: encoded extra_size */
878
ulint size, /* in: total size to write */
879
int fd, /* in: file descriptor */
880
ulint foffs, /* in: file offset */
881
#endif /* UNIV_DEBUG */
882
const mrec_t* mrec, /* in: record to write */
883
const ulint* offsets)/* in: offsets of mrec */
885
# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets) \
886
row_merge_write_rec_low(b, e, mrec, offsets)
887
#endif /* !UNIV_DEBUG */
890
const byte* const end = b + size;
891
ut_ad(e == rec_offs_extra_size(offsets) + 1);
893
if (row_merge_print_write) {
894
fprintf(stderr, "row_merge_write %p,%d,%lu ",
895
(void*) b, fd, (ulong) foffs);
896
rec_print_comp(stderr, mrec, offsets);
899
#endif /* UNIV_DEBUG */
904
*b++ = (byte) (0x80 | (e >> 8));
908
memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));
909
ut_ad(b + rec_offs_size(offsets) == end);
912
/************************************************************************
913
Write a merge record. */
918
/* out: pointer to end of block,
920
row_merge_block_t* block, /* in/out: file buffer */
921
mrec_buf_t* buf, /* in/out: secondary buffer */
922
byte* b, /* in: pointer to end of block */
923
int fd, /* in: file descriptor */
924
ulint* foffs, /* in/out: file offset */
925
const mrec_t* mrec, /* in: record to write */
926
const ulint* offsets)/* in: offsets of mrec */
934
ut_ad(b >= block[0]);
938
ut_ad(mrec < block[0] || mrec > block[1]);
939
ut_ad(mrec < buf[0] || mrec > buf[1]);
941
/* Normalize extra_size. Value 0 signals "end of list". */
942
extra_size = rec_offs_extra_size(offsets) + 1;
944
size = extra_size + (extra_size >= 0x80)
945
+ rec_offs_data_size(offsets);
947
if (UNIV_UNLIKELY(b + size >= block[1])) {
948
/* The record spans two blocks.
949
Copy it to the temporary buffer first. */
950
avail_size = block[1] - b;
952
row_merge_write_rec_low(buf[0],
953
extra_size, size, fd, *foffs,
956
/* Copy the head of the temporary buffer, write
957
the completed block, and copy the tail of the
958
record to the head of the new block. */
959
memcpy(b, buf[0], avail_size);
961
if (!row_merge_write(fd, (*foffs)++, block)) {
965
UNIV_MEM_INVALID(block[0], sizeof block[0]);
969
memcpy(b, buf[0] + avail_size, size - avail_size);
970
b += size - avail_size;
972
row_merge_write_rec_low(b, extra_size, size, fd, *foffs,
980
/************************************************************************
981
Write an end-of-list marker. */
986
/* out: pointer to end of block,
988
row_merge_block_t* block, /* in/out: file buffer */
989
byte* b, /* in: pointer to end of block */
990
int fd, /* in: file descriptor */
991
ulint* foffs) /* in/out: file offset */
994
ut_ad(b >= block[0]);
998
if (row_merge_print_write) {
999
fprintf(stderr, "row_merge_write %p,%p,%d,%lu EOF\n",
1000
(void*) b, (void*) block, fd, (ulong) *foffs);
1002
#endif /* UNIV_DEBUG */
1005
UNIV_MEM_ASSERT_RW(block[0], b - block[0]);
1006
UNIV_MEM_ASSERT_W(block[0], sizeof block[0]);
1007
#ifdef UNIV_DEBUG_VALGRIND
1008
/* The rest of the block is uninitialized. Initialize it
1009
to avoid bogus warnings. */
1010
memset(b, 0xff, block[1] - b);
1011
#endif /* UNIV_DEBUG_VALGRIND */
1013
if (!row_merge_write(fd, (*foffs)++, block)) {
1017
UNIV_MEM_INVALID(block[0], sizeof block[0]);
1021
/*****************************************************************
1022
Compare two merge records. */
1028
mrec1 is greater, equal, less,
1029
respectively, than mrec2 */
1030
const mrec_t* mrec1, /* in: first merge
1031
record to be compared */
1032
const mrec_t* mrec2, /* in: second merge
1033
record to be compared */
1034
const ulint* offsets1, /* in: first record offsets */
1035
const ulint* offsets2, /* in: second record offsets */
1036
const dict_index_t* index) /* in: index */
1040
cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);
1043
if (row_merge_print_cmp) {
1044
fputs("row_merge_cmp1 ", stderr);
1045
rec_print_comp(stderr, mrec1, offsets1);
1046
fputs("\nrow_merge_cmp2 ", stderr);
1047
rec_print_comp(stderr, mrec2, offsets2);
1048
fprintf(stderr, "\nrow_merge_cmp=%d\n", cmp);
1050
#endif /* UNIV_DEBUG */
1055
/************************************************************************
1056
Reads clustered index of the table and create temporary files
1057
containing the index entries for the indexes to be built. */
1060
row_merge_read_clustered_index(
1061
/*===========================*/
1062
/* out: DB_SUCCESS or error */
1063
trx_t* trx, /* in: transaction */
1064
TABLE* table, /* in/out: MySQL table object,
1065
for reporting erroneous records */
1066
const dict_table_t* old_table,/* in: table where rows are
1068
const dict_table_t* new_table,/* in: table where indexes are
1069
created; identical to old_table
1070
unless creating a PRIMARY KEY */
1071
dict_index_t** index, /* in: indexes to be created */
1072
merge_file_t* files, /* in: temporary files */
1073
ulint n_index,/* in: number of indexes to create */
1074
row_merge_block_t* block) /* in/out: file buffer */
1076
dict_index_t* clust_index; /* Clustered index */
1077
mem_heap_t* row_heap; /* Heap memory to create
1078
clustered index records */
1079
row_merge_buf_t** merge_buf; /* Temporary list for records*/
1080
btr_pcur_t pcur; /* Persistent cursor on the
1082
mtr_t mtr; /* Mini transaction */
1083
ulint err = DB_SUCCESS;/* Return code */
1085
ulint n_nonnull = 0; /* number of columns
1086
changed to NOT NULL */
1087
ulint* nonnull = NULL; /* NOT NULL columns */
1089
trx->op_info = "reading clustered index";
1097
/* Create and initialize memory for record buffers */
1099
merge_buf = mem_alloc(n_index * sizeof *merge_buf);
1101
for (i = 0; i < n_index; i++) {
1102
merge_buf[i] = row_merge_buf_create(index[i]);
1107
/* Find the clustered index and create a persistent cursor
1110
clust_index = dict_table_get_first_index(old_table);
1112
btr_pcur_open_at_index_side(
1113
TRUE, clust_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
1115
if (UNIV_UNLIKELY(old_table != new_table)) {
1116
ulint n_cols = dict_table_get_n_cols(old_table);
1118
/* A primary key will be created. Identify the
1119
columns that were flagged NOT NULL in the new table,
1120
so that we can quickly check that the records in the
1121
(old) clustered index do not violate the added NOT
1122
NULL constraints. */
1124
ut_a(n_cols == dict_table_get_n_cols(new_table));
1126
nonnull = mem_alloc(n_cols * sizeof *nonnull);
1128
for (i = 0; i < n_cols; i++) {
1129
if (dict_table_get_nth_col(old_table, i)->prtype
1135
if (dict_table_get_nth_col(new_table, i)->prtype
1138
nonnull[n_nonnull++] = i;
1148
row_heap = mem_heap_create(sizeof(mrec_buf_t));
1150
/* Scan the clustered index. */
1154
dtuple_t* row = NULL;
1156
ibool has_next = TRUE;
1158
btr_pcur_move_to_next_on_page(&pcur);
1160
/* When switching pages, commit the mini-transaction
1161
in order to release the latch on the old page. */
1163
if (btr_pcur_is_after_last_on_page(&pcur)) {
1164
btr_pcur_store_position(&pcur, &mtr);
1167
btr_pcur_restore_position(BTR_SEARCH_LEAF,
1169
has_next = btr_pcur_move_to_next_user_rec(&pcur, &mtr);
1172
if (UNIV_LIKELY(has_next)) {
1173
rec = btr_pcur_get_rec(&pcur);
1174
offsets = rec_get_offsets(rec, clust_index, NULL,
1175
ULINT_UNDEFINED, &row_heap);
1177
/* Skip delete marked records. */
1178
if (rec_get_deleted_flag(
1179
rec, dict_table_is_comp(old_table))) {
1183
srv_n_rows_inserted++;
1185
/* Build a row based on the clustered index. */
1187
row = row_build(ROW_COPY_POINTERS, clust_index,
1189
new_table, &ext, row_heap);
1191
if (UNIV_LIKELY_NULL(nonnull)) {
1192
for (i = 0; i < n_nonnull; i++) {
1194
= &row->fields[nonnull[i]];
1196
= dfield_get_type(field);
1198
ut_a(!(field_type->prtype
1201
if (dfield_is_null(field)) {
1202
err = DB_PRIMARY_KEY_IS_NULL;
1207
field_type->prtype |= DATA_NOT_NULL;
1212
/* Build all entries for all the indexes to be created
1213
in a single scan of the clustered index. */
1215
for (i = 0; i < n_index; i++) {
1216
row_merge_buf_t* buf = merge_buf[i];
1217
merge_file_t* file = &files[i];
1218
const dict_index_t* index = buf->index;
1221
(row && row_merge_buf_add(buf, row, ext))) {
1225
/* The buffer must be sufficiently large
1226
to hold at least one record. */
1227
ut_ad(buf->n_tuples || !has_next);
1229
/* We have enough data tuples to form a block.
1230
Sort them and write to disk. */
1232
if (buf->n_tuples) {
1233
if (dict_index_is_unique(index)) {
1234
row_merge_dup_t dup;
1235
dup.index = buf->index;
1239
row_merge_buf_sort(buf, &dup);
1242
err = DB_DUPLICATE_KEY;
1244
trx->error_key_num = i;
1248
row_merge_buf_sort(buf, NULL);
1252
row_merge_buf_write(buf, file, block);
1254
if (!row_merge_write(file->fd, file->offset++,
1256
err = DB_OUT_OF_FILE_SPACE;
1260
UNIV_MEM_INVALID(block[0], sizeof block[0]);
1261
merge_buf[i] = row_merge_buf_empty(buf);
1263
/* Try writing the record again, now that
1264
the buffer has been written out and emptied. */
1267
(row && !row_merge_buf_add(buf, row, ext))) {
1268
/* An empty buffer should have enough
1269
room for at least one record. */
1274
mem_heap_empty(row_heap);
1276
if (UNIV_UNLIKELY(!has_next)) {
1282
btr_pcur_close(&pcur);
1284
mem_heap_free(row_heap);
1286
if (UNIV_LIKELY_NULL(nonnull)) {
1290
for (i = 0; i < n_index; i++) {
1291
row_merge_buf_free(merge_buf[i]);
1294
mem_free(merge_buf);
1301
/*****************************************************************
1302
Merge two blocks of linked lists on disk and write a bigger block. */
1307
/* out: DB_SUCCESS or error code */
1308
const dict_index_t* index, /* in: index being created */
1309
merge_file_t* file, /* in/out: file containing
1311
row_merge_block_t* block, /* in/out: 3 buffers */
1312
ulint* foffs0, /* in/out: offset of first
1313
source list in the file */
1314
ulint* foffs1, /* in/out: offset of second
1315
source list in the file */
1316
merge_file_t* of, /* in/out: output file */
1317
TABLE* table) /* in/out: MySQL table, for
1318
reporting erroneous key value
1321
mem_heap_t* heap; /* memory heap for offsets0, offsets1 */
1323
mrec_buf_t buf[3]; /* buffer for handling split mrec in block[] */
1324
const byte* b0; /* pointer to block[0] */
1325
const byte* b1; /* pointer to block[1] */
1326
byte* b2; /* pointer to block[2] */
1327
const mrec_t* mrec0; /* merge rec, points to block[0] or buf[0] */
1328
const mrec_t* mrec1; /* merge rec, points to block[1] or buf[1] */
1329
ulint* offsets0;/* offsets of mrec0 */
1330
ulint* offsets1;/* offsets of mrec1 */
1332
heap = row_merge_heap_create(index, &offsets0, &offsets1);
1334
/* Write a record and read the next record. Split the output
1335
file in two halves, which can be merged on the following pass. */
1336
#define ROW_MERGE_WRITE_GET_NEXT(N, AT_END) \
1338
b2 = row_merge_write_rec(&block[2], &buf[2], b2, \
1339
of->fd, &of->offset, \
1340
mrec##N, offsets##N); \
1341
if (UNIV_UNLIKELY(!b2)) { \
1344
b##N = row_merge_read_rec(&block[N], &buf[N], \
1346
file->fd, foffs##N, \
1347
&mrec##N, offsets##N); \
1348
if (UNIV_UNLIKELY(!b##N)) { \
1356
if (!row_merge_read(file->fd, *foffs0, &block[0])
1357
|| !row_merge_read(file->fd, *foffs1, &block[1])) {
1359
mem_heap_free(heap);
1360
return(DB_CORRUPTION);
1367
b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
1368
foffs0, &mrec0, offsets0);
1369
b1 = row_merge_read_rec(&block[1], &buf[1], b1, index, file->fd,
1370
foffs1, &mrec1, offsets1);
1371
if (UNIV_UNLIKELY(!b0 && mrec0)
1372
|| UNIV_UNLIKELY(!b1 && mrec1)) {
1377
while (mrec0 && mrec1) {
1378
switch (row_merge_cmp(mrec0, mrec1,
1379
offsets0, offsets1, index)) {
1382
(dict_index_is_unique(index))) {
1383
innobase_rec_to_mysql(table, mrec0,
1385
mem_heap_free(heap);
1386
return(DB_DUPLICATE_KEY);
1390
ROW_MERGE_WRITE_GET_NEXT(0, goto merged);
1393
ROW_MERGE_WRITE_GET_NEXT(1, goto merged);
1403
/* append all mrec0 to output */
1405
ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
1410
/* append all mrec1 to output */
1412
ROW_MERGE_WRITE_GET_NEXT(1, goto done1);
1417
mem_heap_free(heap);
1418
b2 = row_merge_write_eof(&block[2], b2, of->fd, &of->offset);
1419
return(b2 ? DB_SUCCESS : DB_CORRUPTION);
1422
/*****************************************************************
1423
Merge disk files. */
1428
/* out: DB_SUCCESS or error code */
1429
const dict_index_t* index, /* in: index being created */
1430
merge_file_t* file, /* in/out: file containing
1432
ulint half, /* in: half the file */
1433
row_merge_block_t* block, /* in/out: 3 buffers */
1434
int* tmpfd, /* in/out: temporary file handle */
1435
TABLE* table) /* in/out: MySQL table, for
1436
reporting erroneous key value
1439
ulint foffs0; /* first input offset */
1440
ulint foffs1; /* second input offset */
1441
ulint error; /* error code */
1442
merge_file_t of; /* output file */
1444
UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
1450
/* Merge blocks to the output file. */
1454
for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {
1455
error = row_merge_blocks(index, file, block,
1456
&foffs0, &foffs1, &of, table);
1458
if (error != DB_SUCCESS) {
1463
/* Copy the last block, if there is one. */
1464
while (foffs0 < half) {
1465
if (!row_merge_read(file->fd, foffs0++, block)
1466
|| !row_merge_write(of.fd, of.offset++, block)) {
1467
return(DB_CORRUPTION);
1470
while (foffs1 < file->offset) {
1471
if (!row_merge_read(file->fd, foffs1++, block)
1472
|| !row_merge_write(of.fd, of.offset++, block)) {
1473
return(DB_CORRUPTION);
1477
/* Swap file descriptors for the next pass. */
1481
UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
1486
/*****************************************************************
1487
Merge disk files. */
1492
/* out: DB_SUCCESS or error code */
1493
const dict_index_t* index, /* in: index being created */
1494
merge_file_t* file, /* in/out: file containing
1496
row_merge_block_t* block, /* in/out: 3 buffers */
1497
int* tmpfd, /* in/out: temporary file handle */
1498
TABLE* table) /* in/out: MySQL table, for
1499
reporting erroneous key value
1502
ulint blksz; /* block size */
1504
for (blksz = 1; blksz < file->offset; blksz *= 2) {
1508
ut_ad(ut_is_2pow(blksz));
1509
half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);
1510
error = row_merge(index, file, half, block, tmpfd, table);
1512
if (error != DB_SUCCESS) {
1520
/*****************************************************************
1521
Copy externally stored columns to the data tuple. */
1524
row_merge_copy_blobs(
1525
/*=================*/
1526
const mrec_t* mrec, /* in: merge record */
1527
const ulint* offsets,/* in: offsets of mrec */
1528
ulint zip_size,/* in: compressed page size in bytes, or 0 */
1529
dtuple_t* tuple, /* in/out: data tuple */
1530
mem_heap_t* heap) /* in/out: memory heap */
1533
ulint n_fields = dtuple_get_n_fields(tuple);
1535
for (i = 0; i < n_fields; i++) {
1538
dfield_t* field = dtuple_get_nth_field(tuple, i);
1540
if (!dfield_is_ext(field)) {
1544
ut_ad(!dfield_is_null(field));
1546
/* The table is locked during index creation.
1547
Therefore, externally stored columns cannot possibly
1548
be freed between the time the BLOB pointers are read
1549
(row_merge_read_clustered_index()) and dereferenced
1551
data = btr_rec_copy_externally_stored_field(
1552
mrec, offsets, zip_size, i, &len, heap);
1554
dfield_set_data(field, data, len);
1558
/************************************************************************
1559
Read sorted file containing index data tuples and insert these data
1560
tuples to the index */
1563
row_merge_insert_index_tuples(
1564
/*==========================*/
1565
/* out: DB_SUCCESS or error number */
1566
trx_t* trx, /* in: transaction */
1567
dict_index_t* index, /* in: index */
1568
dict_table_t* table, /* in: new table */
1569
ulint zip_size,/* in: compressed page size of
1570
the old table, or 0 if uncompressed */
1571
int fd, /* in: file descriptor */
1572
row_merge_block_t* block) /* in/out: file buffer */
1578
mem_heap_t* tuple_heap;
1579
mem_heap_t* graph_heap;
1580
ulint error = DB_SUCCESS;
1588
/* We use the insert query graph as the dummy graph
1589
needed in the row module call */
1591
trx->op_info = "inserting index entries";
1593
graph_heap = mem_heap_create(500);
1594
node = ins_node_create(INS_DIRECT, table, graph_heap);
1596
thr = pars_complete_graph_for_exec(node, trx, graph_heap);
1598
que_thr_move_to_run_state_for_mysql(thr, trx);
1600
tuple_heap = mem_heap_create(1000);
1603
ulint i = 1 + REC_OFFS_HEADER_SIZE
1604
+ dict_index_get_n_fields(index);
1605
offsets = mem_heap_alloc(graph_heap, i * sizeof *offsets);
1607
offsets[1] = dict_index_get_n_fields(index);
1612
if (!row_merge_read(fd, foffs, block)) {
1613
error = DB_CORRUPTION;
1620
b = row_merge_read_rec(block, &buf, b, index,
1621
fd, &foffs, &mrec, offsets);
1622
if (UNIV_UNLIKELY(!b)) {
1623
/* End of list, or I/O error */
1625
error = DB_CORRUPTION;
1630
dtuple = row_rec_to_index_entry_low(
1631
mrec, index, offsets, &n_ext, tuple_heap);
1633
if (UNIV_UNLIKELY(n_ext)) {
1634
row_merge_copy_blobs(mrec, offsets, zip_size,
1635
dtuple, tuple_heap);
1639
node->table = table;
1640
node->trx_id = trx->id;
1642
ut_ad(dtuple_validate(dtuple));
1645
thr->run_node = thr;
1646
thr->prev_node = thr->common.parent;
1648
error = row_ins_index_entry(index, dtuple,
1651
if (UNIV_LIKELY(error == DB_SUCCESS)) {
1656
thr->lock_state = QUE_THR_LOCK_ROW;
1657
trx->error_state = error;
1658
que_thr_stop_for_mysql(thr);
1659
thr->lock_state = QUE_THR_LOCK_NOLOCK;
1660
} while (row_mysql_handle_errors(&error, trx,
1665
mem_heap_empty(tuple_heap);
1669
que_thr_stop_for_mysql_no_error(thr, trx);
1671
que_graph_free(thr->graph);
1675
mem_heap_free(tuple_heap);
1680
/*************************************************************************
1681
Sets an exclusive lock on a table, for the duration of creating indexes. */
1684
row_merge_lock_table(
1685
/*=================*/
1686
/* out: error code or DB_SUCCESS */
1687
trx_t* trx, /* in/out: transaction */
1688
dict_table_t* table, /* in: table to lock */
1689
enum lock_mode mode) /* in: LOCK_X or LOCK_S */
1697
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
1698
ut_ad(mode == LOCK_X || mode == LOCK_S);
1700
heap = mem_heap_create(512);
1702
trx->op_info = "setting table lock for creating or dropping index";
1704
node = sel_node_create(heap);
1705
thr = pars_complete_graph_for_exec(node, trx, heap);
1706
thr->graph->state = QUE_FORK_ACTIVE;
1708
/* We use the select query graph as the dummy graph needed
1709
in the lock module call */
1711
thr = que_fork_get_first_thr(que_node_get_parent(thr));
1712
que_thr_move_to_run_state_for_mysql(thr, trx);
1715
thr->run_node = thr;
1716
thr->prev_node = thr->common.parent;
1718
err = lock_table(0, table, mode, thr);
1720
trx->error_state = err;
1722
if (UNIV_LIKELY(err == DB_SUCCESS)) {
1723
que_thr_stop_for_mysql_no_error(thr, trx);
1725
que_thr_stop_for_mysql(thr);
1727
if (err != DB_QUE_THR_SUSPENDED) {
1728
ibool was_lock_wait;
1730
was_lock_wait = row_mysql_handle_errors(
1731
&err, trx, thr, NULL);
1733
if (was_lock_wait) {
1740
parent = que_node_get_parent(thr);
1741
run_thr = que_fork_start_command(parent);
1743
ut_a(run_thr == thr);
1745
/* There was a lock wait but the thread was not
1746
in a ready to run or running state. */
1747
trx->error_state = DB_LOCK_WAIT;
1753
que_graph_free(thr->graph);
1759
/*************************************************************************
1760
Drop an index from the InnoDB system tables. The data dictionary must
1761
have been locked exclusively by the caller, because the transaction
1762
will not be committed. */
1765
row_merge_drop_index(
1766
/*=================*/
1767
dict_index_t* index, /* in: index to be removed */
1768
dict_table_t* table, /* in: table */
1769
trx_t* trx) /* in: transaction handle */
1772
pars_info_t* info = pars_info_create();
1774
/* We use the private SQL parser of Innobase to generate the
1775
query graphs needed in deleting the dictionary data from system
1776
tables in Innobase. Deleting a row from SYS_INDEXES table also
1777
frees the file segments of the B-tree associated with the index. */
1779
static const char str1[] =
1780
"PROCEDURE DROP_INDEX_PROC () IS\n"
1782
"DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
1783
"DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
1784
" AND TABLE_ID = :tableid;\n"
1787
ut_ad(index && table && trx);
1789
pars_info_add_dulint_literal(info, "indexid", index->id);
1790
pars_info_add_dulint_literal(info, "tableid", table->id);
1792
trx_start_if_not_started(trx);
1793
trx->op_info = "dropping index";
1795
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
1797
err = que_eval_sql(info, str1, FALSE, trx);
1799
ut_a(err == DB_SUCCESS);
1801
/* Replace this index with another equivalent index for all
1802
foreign key constraints on this table where this index is used */
1804
dict_table_replace_index_in_foreign_list(table, index);
1805
dict_index_remove_from_cache(table, index);
1810
/*************************************************************************
1811
Drop those indexes which were created before an error occurred when
1812
building an index. The data dictionary must have been locked
1813
exclusively by the caller, because the transaction will not be
1817
row_merge_drop_indexes(
1818
/*===================*/
1819
trx_t* trx, /* in: transaction */
1820
dict_table_t* table, /* in: table containing the indexes */
1821
dict_index_t** index, /* in: indexes to drop */
1822
ulint num_created) /* in: number of elements in index[] */
1826
for (key_num = 0; key_num < num_created; key_num++) {
1827
row_merge_drop_index(index[key_num], table, trx);
1831
/*************************************************************************
1832
Drop all partially created indexes during crash recovery. */
1835
row_merge_drop_temp_indexes(void)
1836
/*=============================*/
1841
/* We use the private SQL parser of Innobase to generate the
1842
query graphs needed in deleting the dictionary data from system
1843
tables in Innobase. Deleting a row from SYS_INDEXES table also
1844
frees the file segments of the B-tree associated with the index. */
1845
#if TEMP_INDEX_PREFIX != '\377'
1846
# error "TEMP_INDEX_PREFIX != '\377'"
1848
static const char drop_temp_indexes[] =
1849
"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
1851
"DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
1852
"WHERE SUBSTR(NAME,0,1)='\377';\n"
1855
"\tWHILE 1=1 LOOP\n"
1856
"\t\tFETCH c INTO indexid;\n"
1857
"\t\tIF (SQL % NOTFOUND) THEN\n"
1860
"\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
1861
"\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
1867
trx = trx_allocate_for_background();
1868
trx->op_info = "dropping partially created indexes";
1869
row_mysql_lock_data_dictionary(trx);
1871
/* Incomplete transactions may be holding some locks on the
1872
data dictionary tables. However, they should never have been
1873
able to lock the records corresponding to the partially
1874
created indexes that we are attempting to delete, because the
1875
table was locked when the indexes were being created. We will
1876
drop the partially created indexes before the rollback of
1877
incomplete transactions is initiated. Thus, this should not
1878
interfere with the incomplete transactions. */
1879
trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
1880
err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx);
1881
ut_a(err == DB_SUCCESS);
1883
row_mysql_unlock_data_dictionary(trx);
1884
trx_free_for_background(trx);
1887
/*************************************************************************
1888
Create a merge file. */
1891
row_merge_file_create(
1892
/*==================*/
1893
merge_file_t* merge_file) /* out: merge file structure */
1895
merge_file->fd = innobase_mysql_tmpfile();
1896
merge_file->offset = 0;
1899
/*************************************************************************
1900
Destroy a merge file. */
1903
row_merge_file_destroy(
1904
/*===================*/
1905
merge_file_t* merge_file) /* out: merge file structure */
1907
if (merge_file->fd != -1) {
1908
close(merge_file->fd);
1909
merge_file->fd = -1;
1913
/*************************************************************************
1914
Determine the precise type of a column that is added to a tem
1915
if a column must be constrained NOT NULL. */
1918
row_merge_col_prtype(
1919
/*=================*/
1920
/* out: col->prtype, possibly
1921
ORed with DATA_NOT_NULL */
1922
const dict_col_t* col, /* in: column */
1923
const char* col_name, /* in: name of the column */
1924
const merge_index_def_t*index_def) /* in: the index definition
1925
of the primary key */
1927
ulint prtype = col->prtype;
1930
ut_ad(index_def->ind_type & DICT_CLUSTERED);
1932
if (prtype & DATA_NOT_NULL) {
1937
/* All columns that are included
1938
in the PRIMARY KEY must be NOT NULL. */
1940
for (i = 0; i < index_def->n_fields; i++) {
1941
if (!strcmp(col_name, index_def->fields[i].field_name)) {
1942
return(prtype | DATA_NOT_NULL);
1949
/*************************************************************************
1950
Create a temporary table for creating a primary key, using the definition
1951
of an existing table. */
1954
row_merge_create_temporary_table(
1955
/*=============================*/
1958
const char* table_name, /* in: new table name */
1959
const merge_index_def_t*index_def, /* in: the index definition
1960
of the primary key */
1961
const dict_table_t* table, /* in: old table definition */
1962
trx_t* trx) /* in/out: transaction
1963
(sets error_state) */
1966
dict_table_t* new_table = NULL;
1967
ulint n_cols = dict_table_get_n_user_cols(table);
1969
mem_heap_t* heap = mem_heap_create(1000);
1974
ut_ad(mutex_own(&dict_sys->mutex));
1976
new_table = dict_mem_table_create(table_name, 0, n_cols, table->flags);
1978
for (i = 0; i < n_cols; i++) {
1979
const dict_col_t* col;
1980
const char* col_name;
1982
col = dict_table_get_nth_col(table, i);
1983
col_name = dict_table_get_col_name(table, i);
1985
dict_mem_table_add_col(new_table, heap, col_name, col->mtype,
1986
row_merge_col_prtype(col, col_name,
1991
error = row_create_table_for_mysql(new_table, trx);
1992
mem_heap_free(heap);
1994
if (error != DB_SUCCESS) {
1995
trx->error_state = error;
2002
/*************************************************************************
2003
Rename the temporary indexes in the dictionary to permanent ones. The
2004
data dictionary must have been locked exclusively by the caller,
2005
because the transaction will not be committed. */
2008
row_merge_rename_indexes(
2009
/*=====================*/
2010
/* out: DB_SUCCESS if all OK */
2011
trx_t* trx, /* in/out: transaction */
2012
dict_table_t* table) /* in/out: table with new indexes */
2014
ulint err = DB_SUCCESS;
2015
pars_info_t* info = pars_info_create();
2017
/* We use the private SQL parser of Innobase to generate the
2018
query graphs needed in renaming indexes. */
2020
#if TEMP_INDEX_PREFIX != '\377'
2021
# error "TEMP_INDEX_PREFIX != '\377'"
2024
static const char rename_indexes[] =
2025
"PROCEDURE RENAME_INDEXES_PROC () IS\n"
2027
"UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
2028
"WHERE TABLE_ID = :tableid AND SUBSTR(NAME,0,1)='\377';\n"
2033
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
2035
trx->op_info = "renaming indexes";
2037
pars_info_add_dulint_literal(info, "tableid", table->id);
2039
err = que_eval_sql(info, rename_indexes, FALSE, trx);
2041
if (err == DB_SUCCESS) {
2042
dict_index_t* index = dict_table_get_first_index(table);
2044
if (*index->name == TEMP_INDEX_PREFIX) {
2047
index = dict_table_get_next_index(index);
2056
/*************************************************************************
2057
Rename the tables in the data dictionary. The data dictionary must
2058
have been locked exclusively by the caller, because the transaction
2059
will not be committed. */
2062
row_merge_rename_tables(
2063
/*====================*/
2064
/* out: error code or DB_SUCCESS */
2065
dict_table_t* old_table, /* in/out: old table, renamed to
2067
dict_table_t* new_table, /* in/out: new table, renamed to
2069
const char* tmp_name, /* in: new name for old_table */
2070
trx_t* trx) /* in: transaction handle */
2072
ulint err = DB_ERROR;
2074
const char* old_name= old_table->name;
2076
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
2077
ut_ad(old_table != new_table);
2078
ut_ad(mutex_own(&dict_sys->mutex));
2080
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
2082
trx->op_info = "renaming tables";
2084
/* We use the private SQL parser of Innobase to generate the query
2085
graphs needed in updating the dictionary data in system tables. */
2087
info = pars_info_create();
2089
pars_info_add_str_literal(info, "new_name", new_table->name);
2090
pars_info_add_str_literal(info, "old_name", old_name);
2091
pars_info_add_str_literal(info, "tmp_name", tmp_name);
2093
err = que_eval_sql(info,
2094
"PROCEDURE RENAME_TABLES () IS\n"
2096
"UPDATE SYS_TABLES SET NAME = :tmp_name\n"
2097
" WHERE NAME = :old_name;\n"
2098
"UPDATE SYS_TABLES SET NAME = :old_name\n"
2099
" WHERE NAME = :new_name;\n"
2100
"END;\n", FALSE, trx);
2102
if (err != DB_SUCCESS) {
2107
/* The following calls will also rename the .ibd data files if
2108
the tables are stored in a single-table tablespace */
2110
if (!dict_table_rename_in_cache(old_table, tmp_name, FALSE)
2111
|| !dict_table_rename_in_cache(new_table, old_name, FALSE)) {
2117
err = dict_load_foreigns(old_name, TRUE);
2119
if (err != DB_SUCCESS) {
2121
trx->error_state = DB_SUCCESS;
2122
trx_general_rollback_for_mysql(trx, FALSE, NULL);
2123
trx->error_state = DB_SUCCESS;
2131
/*************************************************************************
2132
Create and execute a query graph for creating an index. */
2135
row_merge_create_index_graph(
2136
/*=========================*/
2137
/* out: DB_SUCCESS or error code */
2138
trx_t* trx, /* in: trx */
2139
dict_table_t* table, /* in: table */
2140
dict_index_t* index) /* in: index */
2142
ind_node_t* node; /* Index creation node */
2143
mem_heap_t* heap; /* Memory heap */
2144
que_thr_t* thr; /* Query thread */
2151
heap = mem_heap_create(512);
2153
index->table = table;
2154
node = ind_create_graph_create(index, heap);
2155
thr = pars_complete_graph_for_exec(node, trx, heap);
2157
ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
2159
que_run_threads(thr);
2161
err = trx->error_state;
2163
que_graph_free((que_t*) que_node_get_parent(thr));
2168
/*************************************************************************
2169
Create the index and load in to the dictionary. */
2172
row_merge_create_index(
2173
/*===================*/
2174
/* out: index, or NULL on error */
2175
trx_t* trx, /* in/out: trx (sets error_state) */
2176
dict_table_t* table, /* in: the index is on this table */
2177
const merge_index_def_t* /* in: the index definition */
2180
dict_index_t* index;
2182
ulint n_fields = index_def->n_fields;
2185
/* Create the index prototype, using the passed in def, this is not
2186
a persistent operation. We pass 0 as the space id, and determine at
2187
a lower level the space id where to store the table. */
2189
index = dict_mem_index_create(table->name, index_def->name,
2190
0, index_def->ind_type, n_fields);
2194
for (i = 0; i < n_fields; i++) {
2195
merge_index_field_t* ifield = &index_def->fields[i];
2197
dict_mem_index_add_field(index, ifield->field_name,
2198
ifield->prefix_len);
2201
/* Add the index to SYS_INDEXES, using the index prototype. */
2202
err = row_merge_create_index_graph(trx, table, index);
2204
if (err == DB_SUCCESS) {
2206
index = row_merge_dict_table_get_index(
2211
#ifdef ROW_MERGE_IS_INDEX_USABLE
2212
/* Note the id of the transaction that created this
2213
index, we use it to restrict readers from accessing
2214
this index, to ensure read consistency. */
2215
index->trx_id = trx->id;
2216
#endif /* ROW_MERGE_IS_INDEX_USABLE */
2224
#ifdef ROW_MERGE_IS_INDEX_USABLE
2225
/*************************************************************************
2226
Check if a transaction can use an index. */
2229
row_merge_is_index_usable(
2230
/*======================*/
2231
const trx_t* trx, /* in: transaction */
2232
const dict_index_t* index) /* in: index to check */
2234
if (!trx->read_view) {
2238
return(ut_dulint_cmp(index->trx_id, trx->read_view->low_limit_id) < 0);
2240
#endif /* ROW_MERGE_IS_INDEX_USABLE */
2242
/*************************************************************************
2243
Drop the old table. */
2246
row_merge_drop_table(
2247
/*=================*/
2248
/* out: DB_SUCCESS or error code */
2249
trx_t* trx, /* in: transaction */
2250
dict_table_t* table) /* in: table to drop */
2252
/* There must be no open transactions on the table. */
2253
ut_a(table->n_mysql_handles_opened == 0);
2255
return(row_drop_table_for_mysql(table->name, trx, FALSE));
2258
/*************************************************************************
2259
Build indexes on a table by reading a clustered index,
2260
creating a temporary file containing index entries, merge sorting
2261
these index entries and inserting sorted index entries to indexes. */
2264
row_merge_build_indexes(
2265
/*====================*/
2266
/* out: DB_SUCCESS or error code */
2267
trx_t* trx, /* in: transaction */
2268
dict_table_t* old_table, /* in: table where rows are
2270
dict_table_t* new_table, /* in: table where indexes are
2271
created; identical to old_table
2272
unless creating a PRIMARY KEY */
2273
dict_index_t** indexes, /* in: indexes to be created */
2274
ulint n_indexes, /* in: size of indexes[] */
2275
TABLE* table) /* in/out: MySQL table, for
2276
reporting erroneous key value
2279
merge_file_t* merge_files;
2280
row_merge_block_t* block;
2292
trx_start_if_not_started(trx);
2294
/* Allocate memory for merge file data structure and initialize
2297
merge_files = mem_alloc(n_indexes * sizeof *merge_files);
2298
block_size = 3 * sizeof *block;
2299
block = os_mem_alloc_large(&block_size);
2301
for (i = 0; i < n_indexes; i++) {
2303
row_merge_file_create(&merge_files[i]);
2306
tmpfd = innobase_mysql_tmpfile();
2308
/* Reset the MySQL row buffer that is used when reporting
2310
innobase_rec_reset(table);
2312
/* Read clustered index of the table and create files for
2313
secondary index entries for merge sort */
2315
error = row_merge_read_clustered_index(
2316
trx, table, old_table, new_table, indexes,
2317
merge_files, n_indexes, block);
2319
if (error != DB_SUCCESS) {
2324
/* Now we have files containing index entries ready for
2325
sorting and inserting. */
2327
for (i = 0; i < n_indexes; i++) {
2328
error = row_merge_sort(indexes[i], &merge_files[i],
2329
block, &tmpfd, table);
2331
if (error == DB_SUCCESS) {
2332
error = row_merge_insert_index_tuples(
2333
trx, indexes[i], new_table,
2334
dict_table_zip_size(old_table),
2335
merge_files[i].fd, block);
2338
/* Close the temporary file to free up space. */
2339
row_merge_file_destroy(&merge_files[i]);
2341
if (error != DB_SUCCESS) {
2342
trx->error_key_num = i;
2350
for (i = 0; i < n_indexes; i++) {
2351
row_merge_file_destroy(&merge_files[i]);
2354
mem_free(merge_files);
2355
os_mem_free_large(block, block_size);