~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/row/row0merge.c

  • Committer: Brian Aker
  • Date: 2009-04-27 14:36:40 UTC
  • Revision ID: brian@gaz-20090427143640-f6zjmtt9vm55qgm2
Patch on show processlist from  davi@apache.org

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*****************************************************************************
 
2
 
 
3
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
 
4
 
 
5
This program is free software; you can redistribute it and/or modify it under
 
6
the terms of the GNU General Public License as published by the Free Software
 
7
Foundation; version 2 of the License.
 
8
 
 
9
This program is distributed in the hope that it will be useful, but WITHOUT
 
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 
12
 
 
13
You should have received a copy of the GNU General Public License along with
 
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 
15
Place, Suite 330, Boston, MA 02111-1307 USA
 
16
 
 
17
*****************************************************************************/
 
18
 
 
19
/******************************************************
 
20
New index creation routines using a merge sort
 
21
 
 
22
Created 12/4/2005 Jan Lindstrom
 
23
Completed by Sunny Bains and Marko Makela
 
24
*******************************************************/
 
25
 
 
26
#include "row0merge.h"
 
27
#include "row0ext.h"
 
28
#include "row0row.h"
 
29
#include "row0upd.h"
 
30
#include "row0ins.h"
 
31
#include "row0sel.h"
 
32
#include "dict0dict.h"
 
33
#include "dict0mem.h"
 
34
#include "dict0boot.h"
 
35
#include "dict0crea.h"
 
36
#include "dict0load.h"
 
37
#include "btr0btr.h"
 
38
#include "mach0data.h"
 
39
#include "trx0rseg.h"
 
40
#include "trx0trx.h"
 
41
#include "trx0roll.h"
 
42
#include "trx0undo.h"
 
43
#include "trx0purge.h"
 
44
#include "trx0rec.h"
 
45
#include "que0que.h"
 
46
#include "rem0cmp.h"
 
47
#include "read0read.h"
 
48
#include "os0file.h"
 
49
#include "lock0lock.h"
 
50
#include "data0data.h"
 
51
#include "data0type.h"
 
52
#include "que0que.h"
 
53
#include "pars0pars.h"
 
54
#include "mem0mem.h"
 
55
#include "log0log.h"
 
56
#include "ut0sort.h"
 
57
#include "handler0alter.h"
 
58
 
 
59
#ifdef UNIV_DEBUG
 
60
/* Set these in order ot enable debug printout. */
 
61
static ibool    row_merge_print_cmp;
 
62
static ibool    row_merge_print_read;
 
63
static ibool    row_merge_print_write;
 
64
#endif /* UNIV_DEBUG */
 
65
 
 
66
/* Block size for I/O operations in merge sort.  The minimum is
 
67
UNIV_PAGE_SIZE, or page_get_free_space_of_empty() rounded to a power of 2.
 
68
 
 
69
When not creating a PRIMARY KEY that contains column prefixes, this
 
70
can be set as small as UNIV_PAGE_SIZE / 2.  See the comment above
 
71
ut_ad(data_size < sizeof(row_merge_block_t)). */
 
72
 
 
73
typedef byte    row_merge_block_t[1048576];
 
74
 
 
75
/* Secondary buffer for I/O operations of merge records.  This buffer
 
76
is used for writing or reading a record that spans two row_merge_block_t.
 
77
Thus, it must be able to hold one merge record, whose maximum size is
 
78
the same as the minimum size of row_merge_block_t. */
 
79
 
 
80
typedef byte    mrec_buf_t[UNIV_PAGE_SIZE];
 
81
 
 
82
/* Merge record in row_merge_block_t.  The format is the same as a
 
83
record in ROW_FORMAT=COMPACT with the exception that the
 
84
REC_N_NEW_EXTRA_BYTES are omitted. */
 
85
typedef byte    mrec_t;
 
86
 
 
87
/* Buffer for sorting in main memory. */
 
88
struct row_merge_buf_struct {
 
89
        mem_heap_t*     heap;           /* memory heap where allocated */
 
90
        dict_index_t*   index;          /* the index the tuples belong to */
 
91
        ulint           total_size;     /* total amount of data bytes */
 
92
        ulint           n_tuples;       /* number of data tuples */
 
93
        ulint           max_tuples;     /* maximum number of data tuples */
 
94
        const dfield_t**tuples;         /* array of pointers to
 
95
                                        arrays of fields that form
 
96
                                        the data tuples */
 
97
        const dfield_t**tmp_tuples;     /* temporary copy of tuples,
 
98
                                        for sorting */
 
99
};
 
100
 
 
101
typedef struct row_merge_buf_struct row_merge_buf_t;
 
102
 
 
103
/* Information about temporary files used in merge sort are stored
 
104
to this structure */
 
105
 
 
106
struct merge_file_struct {
 
107
        int     fd;             /* File descriptor */
 
108
        ulint   offset;         /* File offset */
 
109
};
 
110
 
 
111
typedef struct merge_file_struct merge_file_t;
 
112
 
 
113
#ifdef UNIV_DEBUG
 
114
/**********************************************************
 
115
Display a merge tuple. */
 
116
static
 
117
void
 
118
row_merge_tuple_print(
 
119
/*==================*/
 
120
        FILE*           f,      /* in: output stream */
 
121
        const dfield_t* entry,  /* in: tuple to print */
 
122
        ulint           n_fields)/* in: number of fields in the tuple */
 
123
{
 
124
        ulint   j;
 
125
 
 
126
        for (j = 0; j < n_fields; j++) {
 
127
                const dfield_t* field = &entry[j];
 
128
 
 
129
                if (dfield_is_null(field)) {
 
130
                        fputs("\n NULL;", f);
 
131
                } else {
 
132
                        ulint   field_len       = dfield_get_len(field);
 
133
                        ulint   len             = ut_min(field_len, 20);
 
134
                        if (dfield_is_ext(field)) {
 
135
                                fputs("\nE", f);
 
136
                        } else {
 
137
                                fputs("\n ", f);
 
138
                        }
 
139
                        ut_print_buf(f, dfield_get_data(field), len);
 
140
                        if (len != field_len) {
 
141
                                fprintf(f, " (total %lu bytes)", field_len);
 
142
                        }
 
143
                }
 
144
        }
 
145
        putc('\n', f);
 
146
}
 
147
#endif /* UNIV_DEBUG */
 
148
 
 
149
/**********************************************************
 
150
Allocate a sort buffer. */
 
151
static
 
152
row_merge_buf_t*
 
153
row_merge_buf_create_low(
 
154
/*=====================*/
 
155
                                        /* out,own: sort buffer */
 
156
        mem_heap_t*     heap,           /* in: heap where allocated */
 
157
        dict_index_t*   index,          /* in: secondary index */
 
158
        ulint           max_tuples,     /* in: maximum number of data tuples */
 
159
        ulint           buf_size)       /* in: size of the buffer, in bytes */
 
160
{
 
161
        row_merge_buf_t*        buf;
 
162
 
 
163
        ut_ad(max_tuples > 0);
 
164
        ut_ad(max_tuples <= sizeof(row_merge_block_t));
 
165
        ut_ad(max_tuples < buf_size);
 
166
 
 
167
        buf = mem_heap_zalloc(heap, buf_size);
 
168
        buf->heap = heap;
 
169
        buf->index = index;
 
170
        buf->max_tuples = max_tuples;
 
171
        buf->tuples = mem_heap_alloc(heap,
 
172
                                     2 * max_tuples * sizeof *buf->tuples);
 
173
        buf->tmp_tuples = buf->tuples + max_tuples;
 
174
 
 
175
        return(buf);
 
176
}
 
177
 
 
178
/**********************************************************
 
179
Allocate a sort buffer. */
 
180
static
 
181
row_merge_buf_t*
 
182
row_merge_buf_create(
 
183
/*=================*/
 
184
                                /* out,own: sort buffer */
 
185
        dict_index_t*   index)  /* in: secondary index */
 
186
{
 
187
        row_merge_buf_t*        buf;
 
188
        ulint                   max_tuples;
 
189
        ulint                   buf_size;
 
190
        mem_heap_t*             heap;
 
191
 
 
192
        max_tuples = sizeof(row_merge_block_t)
 
193
                / ut_max(1, dict_index_get_min_size(index));
 
194
 
 
195
        buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
 
196
 
 
197
        heap = mem_heap_create(buf_size + sizeof(row_merge_block_t));
 
198
 
 
199
        buf = row_merge_buf_create_low(heap, index, max_tuples, buf_size);
 
200
 
 
201
        return(buf);
 
202
}
 
203
 
 
204
/**********************************************************
 
205
Empty a sort buffer. */
 
206
static
 
207
row_merge_buf_t*
 
208
row_merge_buf_empty(
 
209
/*================*/
 
210
                                        /* out: sort buffer */
 
211
        row_merge_buf_t*        buf)    /* in,own: sort buffer */
 
212
{
 
213
        ulint           buf_size;
 
214
        ulint           max_tuples      = buf->max_tuples;
 
215
        mem_heap_t*     heap            = buf->heap;
 
216
        dict_index_t*   index           = buf->index;
 
217
 
 
218
        buf_size = (sizeof *buf) + (max_tuples - 1) * sizeof *buf->tuples;
 
219
 
 
220
        mem_heap_empty(heap);
 
221
 
 
222
        return(row_merge_buf_create_low(heap, index, max_tuples, buf_size));
 
223
}
 
224
 
 
225
/**********************************************************
 
226
Deallocate a sort buffer. */
 
227
static
 
228
void
 
229
row_merge_buf_free(
 
230
/*===============*/
 
231
        row_merge_buf_t*        buf)    /* in,own: sort buffer, to be freed */
 
232
{
 
233
        mem_heap_free(buf->heap);
 
234
}
 
235
 
 
236
/**********************************************************
 
237
Insert a data tuple into a sort buffer. */
 
238
static
 
239
ibool
 
240
row_merge_buf_add(
 
241
/*==============*/
 
242
                                        /* out: TRUE if added,
 
243
                                        FALSE if out of space */
 
244
        row_merge_buf_t*        buf,    /* in/out: sort buffer */
 
245
        const dtuple_t*         row,    /* in: row in clustered index */
 
246
        const row_ext_t*        ext)    /* in: cache of externally stored
 
247
                                        column prefixes, or NULL */
 
248
{
 
249
        ulint                   i;
 
250
        ulint                   n_fields;
 
251
        ulint                   data_size;
 
252
        ulint                   extra_size;
 
253
        const dict_index_t*     index;
 
254
        dfield_t*               entry;
 
255
        dfield_t*               field;
 
256
 
 
257
        if (buf->n_tuples >= buf->max_tuples) {
 
258
                return(FALSE);
 
259
        }
 
260
 
 
261
        UNIV_PREFETCH_R(row->fields);
 
262
 
 
263
        index = buf->index;
 
264
 
 
265
        n_fields = dict_index_get_n_fields(index);
 
266
 
 
267
        entry = mem_heap_alloc(buf->heap, n_fields * sizeof *entry);
 
268
        buf->tuples[buf->n_tuples] = entry;
 
269
        field = entry;
 
270
 
 
271
        data_size = 0;
 
272
        extra_size = UT_BITS_IN_BYTES(index->n_nullable);
 
273
 
 
274
        for (i = 0; i < n_fields; i++, field++) {
 
275
                const dict_field_t*     ifield;
 
276
                const dict_col_t*       col;
 
277
                ulint                   col_no;
 
278
                const dfield_t*         row_field;
 
279
                ulint                   len;
 
280
 
 
281
                ifield = dict_index_get_nth_field(index, i);
 
282
                col = ifield->col;
 
283
                col_no = dict_col_get_no(col);
 
284
                row_field = dtuple_get_nth_field(row, col_no);
 
285
                dfield_copy(field, row_field);
 
286
                len = dfield_get_len(field);
 
287
 
 
288
                if (dfield_is_null(field)) {
 
289
                        ut_ad(!(col->prtype & DATA_NOT_NULL));
 
290
                        continue;
 
291
                } else if (UNIV_LIKELY(!ext)) {
 
292
                } else if (dict_index_is_clust(index)) {
 
293
                        /* Flag externally stored fields. */
 
294
                        const byte*     buf = row_ext_lookup(ext, col_no,
 
295
                                                             &len);
 
296
                        if (UNIV_LIKELY_NULL(buf)) {
 
297
                                ut_a(buf != field_ref_zero);
 
298
                                if (i < dict_index_get_n_unique(index)) {
 
299
                                        dfield_set_data(field, buf, len);
 
300
                                } else {
 
301
                                        dfield_set_ext(field);
 
302
                                        len = dfield_get_len(field);
 
303
                                }
 
304
                        }
 
305
                } else {
 
306
                        const byte*     buf = row_ext_lookup(ext, col_no,
 
307
                                                             &len);
 
308
                        if (UNIV_LIKELY_NULL(buf)) {
 
309
                                ut_a(buf != field_ref_zero);
 
310
                                dfield_set_data(field, buf, len);
 
311
                        }
 
312
                }
 
313
 
 
314
                /* If a column prefix index, take only the prefix */
 
315
 
 
316
                if (ifield->prefix_len) {
 
317
                        len = dtype_get_at_most_n_mbchars(
 
318
                                col->prtype,
 
319
                                col->mbminlen, col->mbmaxlen,
 
320
                                ifield->prefix_len,
 
321
                                len, dfield_get_data(field));
 
322
                        dfield_set_len(field, len);
 
323
                }
 
324
 
 
325
                ut_ad(len <= col->len || col->mtype == DATA_BLOB);
 
326
 
 
327
                if (ifield->fixed_len) {
 
328
                        ut_ad(len == ifield->fixed_len);
 
329
                        ut_ad(!dfield_is_ext(field));
 
330
                } else if (dfield_is_ext(field)) {
 
331
                        extra_size += 2;
 
332
                } else if (len < 128
 
333
                           || (col->len < 256 && col->mtype != DATA_BLOB)) {
 
334
                        extra_size++;
 
335
                } else {
 
336
                        /* For variable-length columns, we look up the
 
337
                        maximum length from the column itself.  If this
 
338
                        is a prefix index column shorter than 256 bytes,
 
339
                        this will waste one byte. */
 
340
                        extra_size += 2;
 
341
                }
 
342
                data_size += len;
 
343
        }
 
344
 
 
345
#ifdef UNIV_DEBUG
 
346
        {
 
347
                ulint   size;
 
348
                ulint   extra;
 
349
 
 
350
                size = rec_get_converted_size_comp(index,
 
351
                                                   REC_STATUS_ORDINARY,
 
352
                                                   entry, n_fields, &extra);
 
353
 
 
354
                ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
 
355
                ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
 
356
        }
 
357
#endif /* UNIV_DEBUG */
 
358
 
 
359
        /* Add to the total size of the record in row_merge_block_t
 
360
        the encoded length of extra_size and the extra bytes (extra_size).
 
361
        See row_merge_buf_write() for the variable-length encoding
 
362
        of extra_size. */
 
363
        data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80);
 
364
 
 
365
        /* The following assertion may fail if row_merge_block_t is
 
366
        declared very small and a PRIMARY KEY is being created with
 
367
        many prefix columns.  In that case, the record may exceed the
 
368
        page_zip_rec_needs_ext() limit.  However, no further columns
 
369
        will be moved to external storage until the record is inserted
 
370
        to the clustered index B-tree. */
 
371
        ut_ad(data_size < sizeof(row_merge_block_t));
 
372
 
 
373
        /* Reserve one byte for the end marker of row_merge_block_t. */
 
374
        if (buf->total_size + data_size >= sizeof(row_merge_block_t) - 1) {
 
375
                return(FALSE);
 
376
        }
 
377
 
 
378
        buf->total_size += data_size;
 
379
        buf->n_tuples++;
 
380
 
 
381
        field = entry;
 
382
 
 
383
        /* Copy the data fields. */
 
384
 
 
385
        do {
 
386
                dfield_dup(field++, buf->heap);
 
387
        } while (--n_fields);
 
388
 
 
389
        return(TRUE);
 
390
}
 
391
 
 
392
/* Structure for reporting duplicate records. */
 
393
struct row_merge_dup_struct {
 
394
        const dict_index_t*     index;          /* index being sorted */
 
395
        TABLE*                  table;          /* MySQL table object */
 
396
        ulint                   n_dup;          /* number of duplicates */
 
397
};
 
398
 
 
399
typedef struct row_merge_dup_struct row_merge_dup_t;
 
400
 
 
401
/*****************************************************************
 
402
Report a duplicate key. */
 
403
static
 
404
void
 
405
row_merge_dup_report(
 
406
/*=================*/
 
407
        row_merge_dup_t*        dup,    /* in/out: for reporting duplicates */
 
408
        const dfield_t*         entry)  /* in: duplicate index entry */
 
409
{
 
410
        mrec_buf_t              buf;
 
411
        const dtuple_t*         tuple;
 
412
        dtuple_t                tuple_store;
 
413
        const rec_t*            rec;
 
414
        const dict_index_t*     index   = dup->index;
 
415
        ulint                   n_fields= dict_index_get_n_fields(index);
 
416
        mem_heap_t*             heap    = NULL;
 
417
        ulint                   offsets_[REC_OFFS_NORMAL_SIZE];
 
418
        ulint*                  offsets;
 
419
        ulint                   n_ext;
 
420
 
 
421
        if (dup->n_dup++) {
 
422
                /* Only report the first duplicate record,
 
423
                but count all duplicate records. */
 
424
                return;
 
425
        }
 
426
 
 
427
        rec_offs_init(offsets_);
 
428
 
 
429
        /* Convert the tuple to a record and then to MySQL format. */
 
430
 
 
431
        tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
 
432
        n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
 
433
 
 
434
        rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
 
435
        offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
 
436
                                  &heap);
 
437
 
 
438
        innobase_rec_to_mysql(dup->table, rec, index, offsets);
 
439
 
 
440
        if (UNIV_LIKELY_NULL(heap)) {
 
441
                mem_heap_free(heap);
 
442
        }
 
443
}
 
444
 
 
445
/*****************************************************************
 
446
Compare two tuples. */
 
447
static
 
448
int
 
449
row_merge_tuple_cmp(
 
450
/*================*/
 
451
                                        /* out: 1, 0, -1 if a is greater,
 
452
                                        equal, less, respectively, than b */
 
453
        ulint                   n_field,/* in: number of fields */
 
454
        const dfield_t*         a,      /* in: first tuple to be compared */
 
455
        const dfield_t*         b,      /* in: second tuple to be compared */
 
456
        row_merge_dup_t*        dup)    /* in/out: for reporting duplicates */
 
457
{
 
458
        int             cmp;
 
459
        const dfield_t* field   = a;
 
460
 
 
461
        /* Compare the fields of the tuples until a difference is
 
462
        found or we run out of fields to compare.  If !cmp at the
 
463
        end, the tuples are equal. */
 
464
        do {
 
465
                cmp = cmp_dfield_dfield(a++, b++);
 
466
        } while (!cmp && --n_field);
 
467
 
 
468
        if (UNIV_UNLIKELY(!cmp) && UNIV_LIKELY_NULL(dup)) {
 
469
                /* Report a duplicate value error if the tuples are
 
470
                logically equal.  NULL columns are logically inequal,
 
471
                although they are equal in the sorting order.  Find
 
472
                out if any of the fields are NULL. */
 
473
                for (b = field; b != a; b++) {
 
474
                        if (dfield_is_null(b)) {
 
475
 
 
476
                                goto func_exit;
 
477
                        }
 
478
                }
 
479
 
 
480
                row_merge_dup_report(dup, field);
 
481
        }
 
482
 
 
483
func_exit:
 
484
        return(cmp);
 
485
}
 
486
 
 
487
/**************************************************************************
 
488
Merge sort the tuple buffer in main memory. */
 
489
static
 
490
void
 
491
row_merge_tuple_sort(
 
492
/*=================*/
 
493
        ulint                   n_field,/* in: number of fields */
 
494
        row_merge_dup_t*        dup,    /* in/out: for reporting duplicates */
 
495
        const dfield_t**        tuples, /* in/out: tuples */
 
496
        const dfield_t**        aux,    /* in/out: work area */
 
497
        ulint                   low,    /* in: lower bound of the
 
498
                                        sorting area, inclusive */
 
499
        ulint                   high)   /* in: upper bound of the
 
500
                                        sorting area, exclusive */
 
501
{
 
502
#define row_merge_tuple_sort_ctx(a,b,c,d) \
 
503
        row_merge_tuple_sort(n_field, dup, a, b, c, d)
 
504
#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)
 
505
 
 
506
        UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,
 
507
                              tuples, aux, low, high, row_merge_tuple_cmp_ctx);
 
508
}
 
509
 
 
510
/**********************************************************
 
511
Sort a buffer. */
 
512
static
 
513
void
 
514
row_merge_buf_sort(
 
515
/*===============*/
 
516
        row_merge_buf_t*        buf,    /* in/out: sort buffer */
 
517
        row_merge_dup_t*        dup)    /* in/out: for reporting duplicates */
 
518
{
 
519
        row_merge_tuple_sort(dict_index_get_n_unique(buf->index), dup,
 
520
                             buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);
 
521
}
 
522
 
 
523
/**********************************************************
 
524
Write a buffer to a block. */
 
525
static
 
526
void
 
527
row_merge_buf_write(
 
528
/*================*/
 
529
        const row_merge_buf_t*  buf,    /* in: sorted buffer */
 
530
#ifdef UNIV_DEBUG
 
531
        const merge_file_t*     of,     /* in: output file */
 
532
#endif /* UNIV_DEBUG */
 
533
        row_merge_block_t*      block)  /* out: buffer for writing to file */
 
534
#ifndef UNIV_DEBUG
 
535
# define row_merge_buf_write(buf, of, block) row_merge_buf_write(buf, block)
 
536
#endif /* !UNIV_DEBUG */
 
537
{
 
538
        const dict_index_t*     index   = buf->index;
 
539
        ulint                   n_fields= dict_index_get_n_fields(index);
 
540
        byte*                   b       = &(*block)[0];
 
541
 
 
542
        ulint           i;
 
543
 
 
544
        for (i = 0; i < buf->n_tuples; i++) {
 
545
                ulint           size;
 
546
                ulint           extra_size;
 
547
                const dfield_t* entry           = buf->tuples[i];
 
548
 
 
549
                size = rec_get_converted_size_comp(index,
 
550
                                                   REC_STATUS_ORDINARY,
 
551
                                                   entry, n_fields,
 
552
                                                   &extra_size);
 
553
                ut_ad(size > extra_size);
 
554
                ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
 
555
                extra_size -= REC_N_NEW_EXTRA_BYTES;
 
556
                size -= REC_N_NEW_EXTRA_BYTES;
 
557
 
 
558
                /* Encode extra_size + 1 */
 
559
                if (extra_size + 1 < 0x80) {
 
560
                        *b++ = (byte) (extra_size + 1);
 
561
                } else {
 
562
                        ut_ad((extra_size + 1) < 0x8000);
 
563
                        *b++ = (byte) (0x80 | ((extra_size + 1) >> 8));
 
564
                        *b++ = (byte) (extra_size + 1);
 
565
                }
 
566
 
 
567
                ut_ad(b + size < block[1]);
 
568
 
 
569
                rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
 
570
                                               REC_STATUS_ORDINARY,
 
571
                                               entry, n_fields);
 
572
 
 
573
                b += size;
 
574
 
 
575
#ifdef UNIV_DEBUG
 
576
                if (row_merge_print_write) {
 
577
                        fprintf(stderr, "row_merge_buf_write %p,%d,%lu %lu",
 
578
                                (void*) b, of->fd, (ulong) of->offset,
 
579
                                (ulong) i);
 
580
                        row_merge_tuple_print(stderr, entry, n_fields);
 
581
                }
 
582
#endif /* UNIV_DEBUG */
 
583
        }
 
584
 
 
585
        /* Write an "end-of-chunk" marker. */
 
586
        ut_a(b < block[1]);
 
587
        ut_a(b == block[0] + buf->total_size);
 
588
        *b++ = 0;
 
589
#ifdef UNIV_DEBUG_VALGRIND
 
590
        /* The rest of the block is uninitialized.  Initialize it
 
591
        to avoid bogus warnings. */
 
592
        memset(b, 0xff, block[1] - b);
 
593
#endif /* UNIV_DEBUG_VALGRIND */
 
594
#ifdef UNIV_DEBUG
 
595
        if (row_merge_print_write) {
 
596
                fprintf(stderr, "row_merge_buf_write %p,%d,%lu EOF\n",
 
597
                        (void*) b, of->fd, (ulong) of->offset);
 
598
        }
 
599
#endif /* UNIV_DEBUG */
 
600
}
 
601
 
 
602
/**********************************************************
 
603
Create a memory heap and allocate space for row_merge_rec_offsets(). */
 
604
static
 
605
mem_heap_t*
 
606
row_merge_heap_create(
 
607
/*==================*/
 
608
                                                /* out: memory heap */
 
609
        const dict_index_t*     index,          /* in: record descriptor */
 
610
        ulint**                 offsets1,       /* out: offsets */
 
611
        ulint**                 offsets2)       /* out: offsets */
 
612
{
 
613
        ulint           i       = 1 + REC_OFFS_HEADER_SIZE
 
614
                + dict_index_get_n_fields(index);
 
615
        mem_heap_t*     heap    = mem_heap_create(2 * i * sizeof *offsets1);
 
616
 
 
617
        *offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
 
618
        *offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
 
619
 
 
620
        (*offsets1)[0] = (*offsets2)[0] = i;
 
621
        (*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
 
622
 
 
623
        return(heap);
 
624
}
 
625
 
 
626
/**************************************************************************
 
627
Search an index object by name and column names.  If several indexes match,
 
628
return the index with the max id. */
 
629
static
 
630
dict_index_t*
 
631
row_merge_dict_table_get_index(
 
632
/*===========================*/
 
633
                                                /* out: matching index,
 
634
                                                NULL if not found */
 
635
        dict_table_t*           table,          /* in: table */
 
636
        const merge_index_def_t*index_def)      /* in: index definition */
 
637
{
 
638
        ulint           i;
 
639
        dict_index_t*   index;
 
640
        const char**    column_names;
 
641
 
 
642
        column_names = mem_alloc(index_def->n_fields * sizeof *column_names);
 
643
 
 
644
        for (i = 0; i < index_def->n_fields; ++i) {
 
645
                column_names[i] = index_def->fields[i].field_name;
 
646
        }
 
647
 
 
648
        index = dict_table_get_index_by_max_id(
 
649
                table, index_def->name, column_names, index_def->n_fields);
 
650
 
 
651
        mem_free((void*) column_names);
 
652
 
 
653
        return(index);
 
654
}
 
655
 
 
656
/************************************************************************
 
657
Read a merge block from the file system. */
 
658
static
 
659
ibool
 
660
row_merge_read(
 
661
/*===========*/
 
662
                                        /* out: TRUE if request was
 
663
                                        successful, FALSE if fail */
 
664
        int                     fd,     /* in: file descriptor */
 
665
        ulint                   offset, /* in: offset where to read */
 
666
        row_merge_block_t*      buf)    /* out: data */
 
667
{
 
668
        ib_uint64_t     ofs = ((ib_uint64_t) offset) * sizeof *buf;
 
669
        ibool           success;
 
670
 
 
671
        success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
 
672
                                                 (ulint) (ofs & 0xFFFFFFFF),
 
673
                                                 (ulint) (ofs >> 32),
 
674
                                                 sizeof *buf);
 
675
        if (UNIV_UNLIKELY(!success)) {
 
676
                ut_print_timestamp(stderr);
 
677
                fprintf(stderr,
 
678
                        "  InnoDB: failed to read merge block at %"PRIu64"\n", ofs);
 
679
        }
 
680
 
 
681
        return(UNIV_LIKELY(success));
 
682
}
 
683
 
 
684
/************************************************************************
 
685
Read a merge block from the file system. */
 
686
static
 
687
ibool
 
688
row_merge_write(
 
689
/*============*/
 
690
                                /* out: TRUE if request was
 
691
                                successful, FALSE if fail */
 
692
        int             fd,     /* in: file descriptor */
 
693
        ulint           offset, /* in: offset where to write */
 
694
        const void*     buf)    /* in: data */
 
695
{
 
696
        ib_uint64_t     ofs = ((ib_uint64_t) offset)
 
697
                * sizeof(row_merge_block_t);
 
698
 
 
699
        return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
 
700
                                         (ulint) (ofs & 0xFFFFFFFF),
 
701
                                         (ulint) (ofs >> 32),
 
702
                                         sizeof(row_merge_block_t))));
 
703
}
 
704
 
 
705
/************************************************************************
 
706
Read a merge record. */
 
707
static
 
708
const byte*
 
709
row_merge_read_rec(
 
710
/*===============*/
 
711
                                        /* out: pointer to next record,
 
712
                                        or NULL on I/O error
 
713
                                        or end of list */
 
714
        row_merge_block_t*      block,  /* in/out: file buffer */
 
715
        mrec_buf_t*             buf,    /* in/out: secondary buffer */
 
716
        const byte*             b,      /* in: pointer to record */
 
717
        const dict_index_t*     index,  /* in: index of the record */
 
718
        int                     fd,     /* in: file descriptor */
 
719
        ulint*                  foffs,  /* in/out: file offset */
 
720
        const mrec_t**          mrec,   /* out: pointer to merge record,
 
721
                                        or NULL on end of list
 
722
                                        (non-NULL on I/O error) */
 
723
        ulint*                  offsets)/* out: offsets of mrec */
 
724
{
 
725
        ulint   extra_size;
 
726
        ulint   data_size;
 
727
        ulint   avail_size;
 
728
 
 
729
        ut_ad(block);
 
730
        ut_ad(buf);
 
731
        ut_ad(b >= block[0]);
 
732
        ut_ad(b < block[1]);
 
733
        ut_ad(index);
 
734
        ut_ad(foffs);
 
735
        ut_ad(mrec);
 
736
        ut_ad(offsets);
 
737
 
 
738
        ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE
 
739
              + dict_index_get_n_fields(index));
 
740
 
 
741
        extra_size = *b++;
 
742
 
 
743
        if (UNIV_UNLIKELY(!extra_size)) {
 
744
                /* End of list */
 
745
                *mrec = NULL;
 
746
#ifdef UNIV_DEBUG
 
747
                if (row_merge_print_read) {
 
748
                        fprintf(stderr, "row_merge_read %p,%p,%d,%lu EOF\n",
 
749
                                (const void*) b, (const void*) block,
 
750
                                fd, (ulong) *foffs);
 
751
                }
 
752
#endif /* UNIV_DEBUG */
 
753
                return(NULL);
 
754
        }
 
755
 
 
756
        if (extra_size >= 0x80) {
 
757
                /* Read another byte of extra_size. */
 
758
 
 
759
                if (UNIV_UNLIKELY(b >= block[1])) {
 
760
                        if (!row_merge_read(fd, ++(*foffs), block)) {
 
761
err_exit:
 
762
                                /* Signal I/O error. */
 
763
                                *mrec = b;
 
764
                                return(NULL);
 
765
                        }
 
766
 
 
767
                        /* Wrap around to the beginning of the buffer. */
 
768
                        b = block[0];
 
769
                }
 
770
 
 
771
                extra_size = (extra_size & 0x7f) << 8;
 
772
                extra_size |= *b++;
 
773
        }
 
774
 
 
775
        /* Normalize extra_size.  Above, value 0 signals "end of list". */
 
776
        extra_size--;
 
777
 
 
778
        /* Read the extra bytes. */
 
779
 
 
780
        if (UNIV_UNLIKELY(b + extra_size >= block[1])) {
 
781
                /* The record spans two blocks.  Copy the entire record
 
782
                to the auxiliary buffer and handle this as a special
 
783
                case. */
 
784
 
 
785
                avail_size = block[1] - b;
 
786
 
 
787
                memcpy(*buf, b, avail_size);
 
788
 
 
789
                if (!row_merge_read(fd, ++(*foffs), block)) {
 
790
 
 
791
                        goto err_exit;
 
792
                }
 
793
 
 
794
                /* Wrap around to the beginning of the buffer. */
 
795
                b = block[0];
 
796
 
 
797
                /* Copy the record. */
 
798
                memcpy(*buf + avail_size, b, extra_size - avail_size);
 
799
                b += extra_size - avail_size;
 
800
 
 
801
                *mrec = *buf + extra_size;
 
802
 
 
803
                rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
 
804
 
 
805
                data_size = rec_offs_data_size(offsets);
 
806
 
 
807
                /* These overflows should be impossible given that
 
808
                records are much smaller than either buffer, and
 
809
                the record starts near the beginning of each buffer. */
 
810
                ut_a(extra_size + data_size < sizeof *buf);
 
811
                ut_a(b + data_size < block[1]);
 
812
 
 
813
                /* Copy the data bytes. */
 
814
                memcpy(*buf + extra_size, b, data_size);
 
815
                b += data_size;
 
816
 
 
817
                goto func_exit;
 
818
        }
 
819
 
 
820
        *mrec = b + extra_size;
 
821
 
 
822
        rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
 
823
 
 
824
        data_size = rec_offs_data_size(offsets);
 
825
        ut_ad(extra_size + data_size < sizeof *buf);
 
826
 
 
827
        b += extra_size + data_size;
 
828
 
 
829
        if (UNIV_LIKELY(b < block[1])) {
 
830
                /* The record fits entirely in the block.
 
831
                This is the normal case. */
 
832
                goto func_exit;
 
833
        }
 
834
 
 
835
        /* The record spans two blocks.  Copy it to buf. */
 
836
 
 
837
        b -= extra_size + data_size;
 
838
        avail_size = block[1] - b;
 
839
        memcpy(*buf, b, avail_size);
 
840
        *mrec = *buf + extra_size;
 
841
        rec_offs_make_valid(*mrec, index, offsets);
 
842
 
 
843
        if (!row_merge_read(fd, ++(*foffs), block)) {
 
844
 
 
845
                goto err_exit;
 
846
        }
 
847
 
 
848
        /* Wrap around to the beginning of the buffer. */
 
849
        b = block[0];
 
850
 
 
851
        /* Copy the rest of the record. */
 
852
        memcpy(*buf + avail_size, b, extra_size + data_size - avail_size);
 
853
        b += extra_size + data_size - avail_size;
 
854
 
 
855
func_exit:
 
856
#ifdef UNIV_DEBUG
 
857
        if (row_merge_print_read) {
 
858
                fprintf(stderr, "row_merge_read %p,%p,%d,%lu ",
 
859
                        (const void*) b, (const void*) block,
 
860
                        fd, (ulong) *foffs);
 
861
                rec_print_comp(stderr, *mrec, offsets);
 
862
                putc('\n', stderr);
 
863
        }
 
864
#endif /* UNIV_DEBUG */
 
865
 
 
866
        return(b);
 
867
}
 
868
 
 
869
/************************************************************************
 
870
Write a merge record. */
 
871
static
 
872
void
 
873
row_merge_write_rec_low(
 
874
/*====================*/
 
875
        byte*           b,      /* out: buffer */
 
876
        ulint           e,      /* in: encoded extra_size */
 
877
#ifdef UNIV_DEBUG
 
878
        ulint           size,   /* in: total size to write */
 
879
        int             fd,     /* in: file descriptor */
 
880
        ulint           foffs,  /* in: file offset */
 
881
#endif /* UNIV_DEBUG */
 
882
        const mrec_t*   mrec,   /* in: record to write */
 
883
        const ulint*    offsets)/* in: offsets of mrec */
 
884
#ifndef UNIV_DEBUG
 
885
# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets)  \
 
886
        row_merge_write_rec_low(b, e, mrec, offsets)
 
887
#endif /* !UNIV_DEBUG */
 
888
{
 
889
#ifdef UNIV_DEBUG
 
890
        const byte* const end = b + size;
 
891
        ut_ad(e == rec_offs_extra_size(offsets) + 1);
 
892
 
 
893
        if (row_merge_print_write) {
 
894
                fprintf(stderr, "row_merge_write %p,%d,%lu ",
 
895
                        (void*) b, fd, (ulong) foffs);
 
896
                rec_print_comp(stderr, mrec, offsets);
 
897
                putc('\n', stderr);
 
898
        }
 
899
#endif /* UNIV_DEBUG */
 
900
 
 
901
        if (e < 0x80) {
 
902
                *b++ = (byte) e;
 
903
        } else {
 
904
                *b++ = (byte) (0x80 | (e >> 8));
 
905
                *b++ = (byte) e;
 
906
        }
 
907
 
 
908
        memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));
 
909
        ut_ad(b + rec_offs_size(offsets) == end);
 
910
}
 
911
 
 
912
/************************************************************************
 
913
Write a merge record. */
 
914
static
 
915
byte*
 
916
row_merge_write_rec(
 
917
/*================*/
 
918
                                        /* out: pointer to end of block,
 
919
                                        or NULL on error */
 
920
        row_merge_block_t*      block,  /* in/out: file buffer */
 
921
        mrec_buf_t*             buf,    /* in/out: secondary buffer */
 
922
        byte*                   b,      /* in: pointer to end of block */
 
923
        int                     fd,     /* in: file descriptor */
 
924
        ulint*                  foffs,  /* in/out: file offset */
 
925
        const mrec_t*           mrec,   /* in: record to write */
 
926
        const ulint*            offsets)/* in: offsets of mrec */
 
927
{
 
928
        ulint   extra_size;
 
929
        ulint   size;
 
930
        ulint   avail_size;
 
931
 
 
932
        ut_ad(block);
 
933
        ut_ad(buf);
 
934
        ut_ad(b >= block[0]);
 
935
        ut_ad(b < block[1]);
 
936
        ut_ad(mrec);
 
937
        ut_ad(foffs);
 
938
        ut_ad(mrec < block[0] || mrec > block[1]);
 
939
        ut_ad(mrec < buf[0] || mrec > buf[1]);
 
940
 
 
941
        /* Normalize extra_size.  Value 0 signals "end of list". */
 
942
        extra_size = rec_offs_extra_size(offsets) + 1;
 
943
 
 
944
        size = extra_size + (extra_size >= 0x80)
 
945
                + rec_offs_data_size(offsets);
 
946
 
 
947
        if (UNIV_UNLIKELY(b + size >= block[1])) {
 
948
                /* The record spans two blocks.
 
949
                Copy it to the temporary buffer first. */
 
950
                avail_size = block[1] - b;
 
951
 
 
952
                row_merge_write_rec_low(buf[0],
 
953
                                        extra_size, size, fd, *foffs,
 
954
                                        mrec, offsets);
 
955
 
 
956
                /* Copy the head of the temporary buffer, write
 
957
                the completed block, and copy the tail of the
 
958
                record to the head of the new block. */
 
959
                memcpy(b, buf[0], avail_size);
 
960
 
 
961
                if (!row_merge_write(fd, (*foffs)++, block)) {
 
962
                        return(NULL);
 
963
                }
 
964
 
 
965
                UNIV_MEM_INVALID(block[0], sizeof block[0]);
 
966
 
 
967
                /* Copy the rest. */
 
968
                b = block[0];
 
969
                memcpy(b, buf[0] + avail_size, size - avail_size);
 
970
                b += size - avail_size;
 
971
        } else {
 
972
                row_merge_write_rec_low(b, extra_size, size, fd, *foffs,
 
973
                                        mrec, offsets);
 
974
                b += size;
 
975
        }
 
976
 
 
977
        return(b);
 
978
}
 
979
 
 
980
/************************************************************************
 
981
Write an end-of-list marker. */
 
982
static
 
983
byte*
 
984
row_merge_write_eof(
 
985
/*================*/
 
986
                                        /* out: pointer to end of block,
 
987
                                        or NULL on error */
 
988
        row_merge_block_t*      block,  /* in/out: file buffer */
 
989
        byte*                   b,      /* in: pointer to end of block */
 
990
        int                     fd,     /* in: file descriptor */
 
991
        ulint*                  foffs)  /* in/out: file offset */
 
992
{
 
993
        ut_ad(block);
 
994
        ut_ad(b >= block[0]);
 
995
        ut_ad(b < block[1]);
 
996
        ut_ad(foffs);
 
997
#ifdef UNIV_DEBUG
 
998
        if (row_merge_print_write) {
 
999
                fprintf(stderr, "row_merge_write %p,%p,%d,%lu EOF\n",
 
1000
                        (void*) b, (void*) block, fd, (ulong) *foffs);
 
1001
        }
 
1002
#endif /* UNIV_DEBUG */
 
1003
 
 
1004
        *b++ = 0;
 
1005
        UNIV_MEM_ASSERT_RW(block[0], b - block[0]);
 
1006
        UNIV_MEM_ASSERT_W(block[0], sizeof block[0]);
 
1007
#ifdef UNIV_DEBUG_VALGRIND
 
1008
        /* The rest of the block is uninitialized.  Initialize it
 
1009
        to avoid bogus warnings. */
 
1010
        memset(b, 0xff, block[1] - b);
 
1011
#endif /* UNIV_DEBUG_VALGRIND */
 
1012
 
 
1013
        if (!row_merge_write(fd, (*foffs)++, block)) {
 
1014
                return(NULL);
 
1015
        }
 
1016
 
 
1017
        UNIV_MEM_INVALID(block[0], sizeof block[0]);
 
1018
        return(block[0]);
 
1019
}
 
1020
 
 
1021
/*****************************************************************
 
1022
Compare two merge records. */
 
1023
static
 
1024
int
 
1025
row_merge_cmp(
 
1026
/*==========*/
 
1027
                                                /* out: 1, 0, -1 if
 
1028
                                                mrec1 is greater, equal, less,
 
1029
                                                respectively, than mrec2 */
 
1030
        const mrec_t*           mrec1,          /* in: first merge
 
1031
                                                record to be compared */
 
1032
        const mrec_t*           mrec2,          /* in: second merge
 
1033
                                                record to be compared */
 
1034
        const ulint*            offsets1,       /* in: first record offsets */
 
1035
        const ulint*            offsets2,       /* in: second record offsets */
 
1036
        const dict_index_t*     index)          /* in: index */
 
1037
{
 
1038
        int     cmp;
 
1039
 
 
1040
        cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);
 
1041
 
 
1042
#ifdef UNIV_DEBUG
 
1043
        if (row_merge_print_cmp) {
 
1044
                fputs("row_merge_cmp1 ", stderr);
 
1045
                rec_print_comp(stderr, mrec1, offsets1);
 
1046
                fputs("\nrow_merge_cmp2 ", stderr);
 
1047
                rec_print_comp(stderr, mrec2, offsets2);
 
1048
                fprintf(stderr, "\nrow_merge_cmp=%d\n", cmp);
 
1049
        }
 
1050
#endif /* UNIV_DEBUG */
 
1051
 
 
1052
        return(cmp);
 
1053
}
 
1054
 
 
1055
/************************************************************************
 
1056
Reads clustered index of the table and create temporary files
 
1057
containing the index entries for the indexes to be built. */
 
1058
static
 
1059
ulint
 
1060
row_merge_read_clustered_index(
 
1061
/*===========================*/
 
1062
                                        /* out: DB_SUCCESS or error */
 
1063
        trx_t*                  trx,    /* in: transaction */
 
1064
        TABLE*                  table,  /* in/out: MySQL table object,
 
1065
                                        for reporting erroneous records */
 
1066
        const dict_table_t*     old_table,/* in: table where rows are
 
1067
                                        read from */
 
1068
        const dict_table_t*     new_table,/* in: table where indexes are
 
1069
                                        created; identical to old_table
 
1070
                                        unless creating a PRIMARY KEY */
 
1071
        dict_index_t**          index,  /* in: indexes to be created */
 
1072
        merge_file_t*           files,  /* in: temporary files */
 
1073
        ulint                   n_index,/* in: number of indexes to create */
 
1074
        row_merge_block_t*      block)  /* in/out: file buffer */
 
1075
{
 
1076
        dict_index_t*           clust_index;    /* Clustered index */
 
1077
        mem_heap_t*             row_heap;       /* Heap memory to create
 
1078
                                                clustered index records */
 
1079
        row_merge_buf_t**       merge_buf;      /* Temporary list for records*/
 
1080
        btr_pcur_t              pcur;           /* Persistent cursor on the
 
1081
                                                clustered index */
 
1082
        mtr_t                   mtr;            /* Mini transaction */
 
1083
        ulint                   err = DB_SUCCESS;/* Return code */
 
1084
        ulint                   i;
 
1085
        ulint                   n_nonnull = 0;  /* number of columns
 
1086
                                                changed to NOT NULL */
 
1087
        ulint*                  nonnull = NULL; /* NOT NULL columns */
 
1088
 
 
1089
        trx->op_info = "reading clustered index";
 
1090
 
 
1091
        ut_ad(trx);
 
1092
        ut_ad(old_table);
 
1093
        ut_ad(new_table);
 
1094
        ut_ad(index);
 
1095
        ut_ad(files);
 
1096
 
 
1097
        /* Create and initialize memory for record buffers */
 
1098
 
 
1099
        merge_buf = mem_alloc(n_index * sizeof *merge_buf);
 
1100
 
 
1101
        for (i = 0; i < n_index; i++) {
 
1102
                merge_buf[i] = row_merge_buf_create(index[i]);
 
1103
        }
 
1104
 
 
1105
        mtr_start(&mtr);
 
1106
 
 
1107
        /* Find the clustered index and create a persistent cursor
 
1108
        based on that. */
 
1109
 
 
1110
        clust_index = dict_table_get_first_index(old_table);
 
1111
 
 
1112
        btr_pcur_open_at_index_side(
 
1113
                TRUE, clust_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
 
1114
 
 
1115
        if (UNIV_UNLIKELY(old_table != new_table)) {
 
1116
                ulint   n_cols = dict_table_get_n_cols(old_table);
 
1117
 
 
1118
                /* A primary key will be created.  Identify the
 
1119
                columns that were flagged NOT NULL in the new table,
 
1120
                so that we can quickly check that the records in the
 
1121
                (old) clustered index do not violate the added NOT
 
1122
                NULL constraints. */
 
1123
 
 
1124
                ut_a(n_cols == dict_table_get_n_cols(new_table));
 
1125
 
 
1126
                nonnull = mem_alloc(n_cols * sizeof *nonnull);
 
1127
 
 
1128
                for (i = 0; i < n_cols; i++) {
 
1129
                        if (dict_table_get_nth_col(old_table, i)->prtype
 
1130
                            & DATA_NOT_NULL) {
 
1131
 
 
1132
                                continue;
 
1133
                        }
 
1134
 
 
1135
                        if (dict_table_get_nth_col(new_table, i)->prtype
 
1136
                            & DATA_NOT_NULL) {
 
1137
 
 
1138
                                nonnull[n_nonnull++] = i;
 
1139
                        }
 
1140
                }
 
1141
 
 
1142
                if (!n_nonnull) {
 
1143
                        mem_free(nonnull);
 
1144
                        nonnull = NULL;
 
1145
                }
 
1146
        }
 
1147
 
 
1148
        row_heap = mem_heap_create(sizeof(mrec_buf_t));
 
1149
 
 
1150
        /* Scan the clustered index. */
 
1151
        for (;;) {
 
1152
                const rec_t*    rec;
 
1153
                ulint*          offsets;
 
1154
                dtuple_t*       row             = NULL;
 
1155
                row_ext_t*      ext;
 
1156
                ibool           has_next        = TRUE;
 
1157
 
 
1158
                btr_pcur_move_to_next_on_page(&pcur);
 
1159
 
 
1160
                /* When switching pages, commit the mini-transaction
 
1161
                in order to release the latch on the old page. */
 
1162
 
 
1163
                if (btr_pcur_is_after_last_on_page(&pcur)) {
 
1164
                        btr_pcur_store_position(&pcur, &mtr);
 
1165
                        mtr_commit(&mtr);
 
1166
                        mtr_start(&mtr);
 
1167
                        btr_pcur_restore_position(BTR_SEARCH_LEAF,
 
1168
                                                  &pcur, &mtr);
 
1169
                        has_next = btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 
1170
                }
 
1171
 
 
1172
                if (UNIV_LIKELY(has_next)) {
 
1173
                        rec = btr_pcur_get_rec(&pcur);
 
1174
                        offsets = rec_get_offsets(rec, clust_index, NULL,
 
1175
                                                  ULINT_UNDEFINED, &row_heap);
 
1176
 
 
1177
                        /* Skip delete marked records. */
 
1178
                        if (rec_get_deleted_flag(
 
1179
                                    rec, dict_table_is_comp(old_table))) {
 
1180
                                continue;
 
1181
                        }
 
1182
 
 
1183
                        srv_n_rows_inserted++;
 
1184
 
 
1185
                        /* Build a row based on the clustered index. */
 
1186
 
 
1187
                        row = row_build(ROW_COPY_POINTERS, clust_index,
 
1188
                                        rec, offsets,
 
1189
                                        new_table, &ext, row_heap);
 
1190
 
 
1191
                        if (UNIV_LIKELY_NULL(nonnull)) {
 
1192
                                for (i = 0; i < n_nonnull; i++) {
 
1193
                                        dfield_t*       field
 
1194
                                                = &row->fields[nonnull[i]];
 
1195
                                        dtype_t*        field_type
 
1196
                                                = dfield_get_type(field);
 
1197
 
 
1198
                                        ut_a(!(field_type->prtype
 
1199
                                               & DATA_NOT_NULL));
 
1200
 
 
1201
                                        if (dfield_is_null(field)) {
 
1202
                                                err = DB_PRIMARY_KEY_IS_NULL;
 
1203
                                                i = 0;
 
1204
                                                goto err_exit;
 
1205
                                        }
 
1206
 
 
1207
                                        field_type->prtype |= DATA_NOT_NULL;
 
1208
                                }
 
1209
                        }
 
1210
                }
 
1211
 
 
1212
                /* Build all entries for all the indexes to be created
 
1213
                in a single scan of the clustered index. */
 
1214
 
 
1215
                for (i = 0; i < n_index; i++) {
 
1216
                        row_merge_buf_t*        buf     = merge_buf[i];
 
1217
                        merge_file_t*           file    = &files[i];
 
1218
                        const dict_index_t*     index   = buf->index;
 
1219
 
 
1220
                        if (UNIV_LIKELY
 
1221
                            (row && row_merge_buf_add(buf, row, ext))) {
 
1222
                                continue;
 
1223
                        }
 
1224
 
 
1225
                        /* The buffer must be sufficiently large
 
1226
                        to hold at least one record. */
 
1227
                        ut_ad(buf->n_tuples || !has_next);
 
1228
 
 
1229
                        /* We have enough data tuples to form a block.
 
1230
                        Sort them and write to disk. */
 
1231
 
 
1232
                        if (buf->n_tuples) {
 
1233
                                if (dict_index_is_unique(index)) {
 
1234
                                        row_merge_dup_t dup;
 
1235
                                        dup.index = buf->index;
 
1236
                                        dup.table = table;
 
1237
                                        dup.n_dup = 0;
 
1238
 
 
1239
                                        row_merge_buf_sort(buf, &dup);
 
1240
 
 
1241
                                        if (dup.n_dup) {
 
1242
                                                err = DB_DUPLICATE_KEY;
 
1243
err_exit:
 
1244
                                                trx->error_key_num = i;
 
1245
                                                goto func_exit;
 
1246
                                        }
 
1247
                                } else {
 
1248
                                        row_merge_buf_sort(buf, NULL);
 
1249
                                }
 
1250
                        }
 
1251
 
 
1252
                        row_merge_buf_write(buf, file, block);
 
1253
 
 
1254
                        if (!row_merge_write(file->fd, file->offset++,
 
1255
                                             block)) {
 
1256
                                err = DB_OUT_OF_FILE_SPACE;
 
1257
                                goto err_exit;
 
1258
                        }
 
1259
 
 
1260
                        UNIV_MEM_INVALID(block[0], sizeof block[0]);
 
1261
                        merge_buf[i] = row_merge_buf_empty(buf);
 
1262
 
 
1263
                        /* Try writing the record again, now that
 
1264
                        the buffer has been written out and emptied. */
 
1265
 
 
1266
                        if (UNIV_UNLIKELY
 
1267
                            (row && !row_merge_buf_add(buf, row, ext))) {
 
1268
                                /* An empty buffer should have enough
 
1269
                                room for at least one record. */
 
1270
                                ut_error;
 
1271
                        }
 
1272
                }
 
1273
 
 
1274
                mem_heap_empty(row_heap);
 
1275
 
 
1276
                if (UNIV_UNLIKELY(!has_next)) {
 
1277
                        goto func_exit;
 
1278
                }
 
1279
        }
 
1280
 
 
1281
func_exit:
 
1282
        btr_pcur_close(&pcur);
 
1283
        mtr_commit(&mtr);
 
1284
        mem_heap_free(row_heap);
 
1285
 
 
1286
        if (UNIV_LIKELY_NULL(nonnull)) {
 
1287
                mem_free(nonnull);
 
1288
        }
 
1289
 
 
1290
        for (i = 0; i < n_index; i++) {
 
1291
                row_merge_buf_free(merge_buf[i]);
 
1292
        }
 
1293
 
 
1294
        mem_free(merge_buf);
 
1295
 
 
1296
        trx->op_info = "";
 
1297
 
 
1298
        return(err);
 
1299
}
 
1300
 
 
1301
/*****************************************************************
 
1302
Merge two blocks of linked lists on disk and write a bigger block. */
 
1303
static
 
1304
ulint
 
1305
row_merge_blocks(
 
1306
/*=============*/
 
1307
                                        /* out: DB_SUCCESS or error code */
 
1308
        const dict_index_t*     index,  /* in: index being created */
 
1309
        merge_file_t*           file,   /* in/out: file containing
 
1310
                                        index entries */
 
1311
        row_merge_block_t*      block,  /* in/out: 3 buffers */
 
1312
        ulint*                  foffs0, /* in/out: offset of first
 
1313
                                        source list in the file */
 
1314
        ulint*                  foffs1, /* in/out: offset of second
 
1315
                                        source list in the file */
 
1316
        merge_file_t*           of,     /* in/out: output file */
 
1317
        TABLE*                  table)  /* in/out: MySQL table, for
 
1318
                                        reporting erroneous key value
 
1319
                                        if applicable */
 
1320
{
 
1321
        mem_heap_t*     heap;   /* memory heap for offsets0, offsets1 */
 
1322
 
 
1323
        mrec_buf_t      buf[3]; /* buffer for handling split mrec in block[] */
 
1324
        const byte*     b0;     /* pointer to block[0] */
 
1325
        const byte*     b1;     /* pointer to block[1] */
 
1326
        byte*           b2;     /* pointer to block[2] */
 
1327
        const mrec_t*   mrec0;  /* merge rec, points to block[0] or buf[0] */
 
1328
        const mrec_t*   mrec1;  /* merge rec, points to block[1] or buf[1] */
 
1329
        ulint*          offsets0;/* offsets of mrec0 */
 
1330
        ulint*          offsets1;/* offsets of mrec1 */
 
1331
 
 
1332
        heap = row_merge_heap_create(index, &offsets0, &offsets1);
 
1333
 
 
1334
        /* Write a record and read the next record.  Split the output
 
1335
        file in two halves, which can be merged on the following pass. */
 
1336
#define ROW_MERGE_WRITE_GET_NEXT(N, AT_END)                             \
 
1337
        do {                                                            \
 
1338
                b2 = row_merge_write_rec(&block[2], &buf[2], b2,        \
 
1339
                                         of->fd, &of->offset,           \
 
1340
                                         mrec##N, offsets##N);          \
 
1341
                if (UNIV_UNLIKELY(!b2)) {                               \
 
1342
                        goto corrupt;                                   \
 
1343
                }                                                       \
 
1344
                b##N = row_merge_read_rec(&block[N], &buf[N],           \
 
1345
                                          b##N, index,                  \
 
1346
                                          file->fd, foffs##N,           \
 
1347
                                          &mrec##N, offsets##N);        \
 
1348
                if (UNIV_UNLIKELY(!b##N)) {                             \
 
1349
                        if (mrec##N) {                                  \
 
1350
                                goto corrupt;                           \
 
1351
                        }                                               \
 
1352
                        AT_END;                                         \
 
1353
                }                                                       \
 
1354
        } while (0)
 
1355
 
 
1356
        if (!row_merge_read(file->fd, *foffs0, &block[0])
 
1357
            || !row_merge_read(file->fd, *foffs1, &block[1])) {
 
1358
corrupt:
 
1359
                mem_heap_free(heap);
 
1360
                return(DB_CORRUPTION);
 
1361
        }
 
1362
 
 
1363
        b0 = block[0];
 
1364
        b1 = block[1];
 
1365
        b2 = block[2];
 
1366
 
 
1367
        b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
 
1368
                                foffs0, &mrec0, offsets0);
 
1369
        b1 = row_merge_read_rec(&block[1], &buf[1], b1, index, file->fd,
 
1370
                                foffs1, &mrec1, offsets1);
 
1371
        if (UNIV_UNLIKELY(!b0 && mrec0)
 
1372
            || UNIV_UNLIKELY(!b1 && mrec1)) {
 
1373
 
 
1374
                goto corrupt;
 
1375
        }
 
1376
 
 
1377
        while (mrec0 && mrec1) {
 
1378
                switch (row_merge_cmp(mrec0, mrec1,
 
1379
                                      offsets0, offsets1, index)) {
 
1380
                case 0:
 
1381
                        if (UNIV_UNLIKELY
 
1382
                            (dict_index_is_unique(index))) {
 
1383
                                innobase_rec_to_mysql(table, mrec0,
 
1384
                                                      index, offsets0);
 
1385
                                mem_heap_free(heap);
 
1386
                                return(DB_DUPLICATE_KEY);
 
1387
                        }
 
1388
                        /* fall through */
 
1389
                case -1:
 
1390
                        ROW_MERGE_WRITE_GET_NEXT(0, goto merged);
 
1391
                        break;
 
1392
                case 1:
 
1393
                        ROW_MERGE_WRITE_GET_NEXT(1, goto merged);
 
1394
                        break;
 
1395
                default:
 
1396
                        ut_error;
 
1397
                }
 
1398
 
 
1399
        }
 
1400
 
 
1401
merged:
 
1402
        if (mrec0) {
 
1403
                /* append all mrec0 to output */
 
1404
                for (;;) {
 
1405
                        ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
 
1406
                }
 
1407
        }
 
1408
done0:
 
1409
        if (mrec1) {
 
1410
                /* append all mrec1 to output */
 
1411
                for (;;) {
 
1412
                        ROW_MERGE_WRITE_GET_NEXT(1, goto done1);
 
1413
                }
 
1414
        }
 
1415
done1:
 
1416
 
 
1417
        mem_heap_free(heap);
 
1418
        b2 = row_merge_write_eof(&block[2], b2, of->fd, &of->offset);
 
1419
        return(b2 ? DB_SUCCESS : DB_CORRUPTION);
 
1420
}
 
1421
 
 
1422
/*****************************************************************
 
1423
Merge disk files. */
 
1424
static
 
1425
ulint
 
1426
row_merge(
 
1427
/*======*/
 
1428
                                        /* out: DB_SUCCESS or error code */
 
1429
        const dict_index_t*     index,  /* in: index being created */
 
1430
        merge_file_t*           file,   /* in/out: file containing
 
1431
                                        index entries */
 
1432
        ulint                   half,   /* in: half the file */
 
1433
        row_merge_block_t*      block,  /* in/out: 3 buffers */
 
1434
        int*                    tmpfd,  /* in/out: temporary file handle */
 
1435
        TABLE*                  table)  /* in/out: MySQL table, for
 
1436
                                        reporting erroneous key value
 
1437
                                        if applicable */
 
1438
{
 
1439
        ulint           foffs0; /* first input offset */
 
1440
        ulint           foffs1; /* second input offset */
 
1441
        ulint           error;  /* error code */
 
1442
        merge_file_t    of;     /* output file */
 
1443
 
 
1444
        UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
 
1445
        ut_ad(half > 0);
 
1446
 
 
1447
        of.fd = *tmpfd;
 
1448
        of.offset = 0;
 
1449
 
 
1450
        /* Merge blocks to the output file. */
 
1451
        foffs0 = 0;
 
1452
        foffs1 = half;
 
1453
 
 
1454
        for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {
 
1455
                error = row_merge_blocks(index, file, block,
 
1456
                                         &foffs0, &foffs1, &of, table);
 
1457
 
 
1458
                if (error != DB_SUCCESS) {
 
1459
                        return(error);
 
1460
                }
 
1461
        }
 
1462
 
 
1463
        /* Copy the last block, if there is one. */
 
1464
        while (foffs0 < half) {
 
1465
                if (!row_merge_read(file->fd, foffs0++, block)
 
1466
                    || !row_merge_write(of.fd, of.offset++, block)) {
 
1467
                        return(DB_CORRUPTION);
 
1468
                }
 
1469
        }
 
1470
        while (foffs1 < file->offset) {
 
1471
                if (!row_merge_read(file->fd, foffs1++, block)
 
1472
                    || !row_merge_write(of.fd, of.offset++, block)) {
 
1473
                        return(DB_CORRUPTION);
 
1474
                }
 
1475
        }
 
1476
 
 
1477
        /* Swap file descriptors for the next pass. */
 
1478
        *tmpfd = file->fd;
 
1479
        *file = of;
 
1480
 
 
1481
        UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
 
1482
 
 
1483
        return(DB_SUCCESS);
 
1484
}
 
1485
 
 
1486
/*****************************************************************
 
1487
Merge disk files. */
 
1488
static
 
1489
ulint
 
1490
row_merge_sort(
 
1491
/*===========*/
 
1492
                                        /* out: DB_SUCCESS or error code */
 
1493
        const dict_index_t*     index,  /* in: index being created */
 
1494
        merge_file_t*           file,   /* in/out: file containing
 
1495
                                        index entries */
 
1496
        row_merge_block_t*      block,  /* in/out: 3 buffers */
 
1497
        int*                    tmpfd,  /* in/out: temporary file handle */
 
1498
        TABLE*                  table)  /* in/out: MySQL table, for
 
1499
                                        reporting erroneous key value
 
1500
                                        if applicable */
 
1501
{
 
1502
        ulint   blksz;  /* block size */
 
1503
 
 
1504
        for (blksz = 1; blksz < file->offset; blksz *= 2) {
 
1505
                ulint   half;
 
1506
                ulint   error;
 
1507
 
 
1508
                ut_ad(ut_is_2pow(blksz));
 
1509
                half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);
 
1510
                error = row_merge(index, file, half, block, tmpfd, table);
 
1511
 
 
1512
                if (error != DB_SUCCESS) {
 
1513
                        return(error);
 
1514
                }
 
1515
        }
 
1516
 
 
1517
        return(DB_SUCCESS);
 
1518
}
 
1519
 
 
1520
/*****************************************************************
 
1521
Copy externally stored columns to the data tuple. */
 
1522
static
 
1523
void
 
1524
row_merge_copy_blobs(
 
1525
/*=================*/
 
1526
        const mrec_t*   mrec,   /* in: merge record */
 
1527
        const ulint*    offsets,/* in: offsets of mrec */
 
1528
        ulint           zip_size,/* in: compressed page size in bytes, or 0 */
 
1529
        dtuple_t*       tuple,  /* in/out: data tuple */
 
1530
        mem_heap_t*     heap)   /* in/out: memory heap */
 
1531
{
 
1532
        ulint   i;
 
1533
        ulint   n_fields = dtuple_get_n_fields(tuple);
 
1534
 
 
1535
        for (i = 0; i < n_fields; i++) {
 
1536
                ulint           len;
 
1537
                const void*     data;
 
1538
                dfield_t*       field = dtuple_get_nth_field(tuple, i);
 
1539
 
 
1540
                if (!dfield_is_ext(field)) {
 
1541
                        continue;
 
1542
                }
 
1543
 
 
1544
                ut_ad(!dfield_is_null(field));
 
1545
 
 
1546
                /* The table is locked during index creation.
 
1547
                Therefore, externally stored columns cannot possibly
 
1548
                be freed between the time the BLOB pointers are read
 
1549
                (row_merge_read_clustered_index()) and dereferenced
 
1550
                (below). */
 
1551
                data = btr_rec_copy_externally_stored_field(
 
1552
                        mrec, offsets, zip_size, i, &len, heap);
 
1553
 
 
1554
                dfield_set_data(field, data, len);
 
1555
        }
 
1556
}
 
1557
 
 
1558
/************************************************************************
 
1559
Read sorted file containing index data tuples and insert these data
 
1560
tuples to the index */
 
1561
static
 
1562
ulint
 
1563
row_merge_insert_index_tuples(
 
1564
/*==========================*/
 
1565
                                        /* out: DB_SUCCESS or error number */
 
1566
        trx_t*                  trx,    /* in: transaction */
 
1567
        dict_index_t*           index,  /* in: index */
 
1568
        dict_table_t*           table,  /* in: new table */
 
1569
        ulint                   zip_size,/* in: compressed page size of
 
1570
                                         the old table, or 0 if uncompressed */
 
1571
        int                     fd,     /* in: file descriptor */
 
1572
        row_merge_block_t*      block)  /* in/out: file buffer */
 
1573
{
 
1574
        mrec_buf_t              buf;
 
1575
        const byte*             b;
 
1576
        que_thr_t*              thr;
 
1577
        ins_node_t*             node;
 
1578
        mem_heap_t*             tuple_heap;
 
1579
        mem_heap_t*             graph_heap;
 
1580
        ulint                   error = DB_SUCCESS;
 
1581
        ulint                   foffs = 0;
 
1582
        ulint*                  offsets;
 
1583
 
 
1584
        ut_ad(trx);
 
1585
        ut_ad(index);
 
1586
        ut_ad(table);
 
1587
 
 
1588
        /* We use the insert query graph as the dummy graph
 
1589
        needed in the row module call */
 
1590
 
 
1591
        trx->op_info = "inserting index entries";
 
1592
 
 
1593
        graph_heap = mem_heap_create(500);
 
1594
        node = ins_node_create(INS_DIRECT, table, graph_heap);
 
1595
 
 
1596
        thr = pars_complete_graph_for_exec(node, trx, graph_heap);
 
1597
 
 
1598
        que_thr_move_to_run_state_for_mysql(thr, trx);
 
1599
 
 
1600
        tuple_heap = mem_heap_create(1000);
 
1601
 
 
1602
        {
 
1603
                ulint i = 1 + REC_OFFS_HEADER_SIZE
 
1604
                        + dict_index_get_n_fields(index);
 
1605
                offsets = mem_heap_alloc(graph_heap, i * sizeof *offsets);
 
1606
                offsets[0] = i;
 
1607
                offsets[1] = dict_index_get_n_fields(index);
 
1608
        }
 
1609
 
 
1610
        b = *block;
 
1611
 
 
1612
        if (!row_merge_read(fd, foffs, block)) {
 
1613
                error = DB_CORRUPTION;
 
1614
        } else {
 
1615
                for (;;) {
 
1616
                        const mrec_t*   mrec;
 
1617
                        dtuple_t*       dtuple;
 
1618
                        ulint           n_ext;
 
1619
 
 
1620
                        b = row_merge_read_rec(block, &buf, b, index,
 
1621
                                               fd, &foffs, &mrec, offsets);
 
1622
                        if (UNIV_UNLIKELY(!b)) {
 
1623
                                /* End of list, or I/O error */
 
1624
                                if (mrec) {
 
1625
                                        error = DB_CORRUPTION;
 
1626
                                }
 
1627
                                break;
 
1628
                        }
 
1629
 
 
1630
                        dtuple = row_rec_to_index_entry_low(
 
1631
                                mrec, index, offsets, &n_ext, tuple_heap);
 
1632
 
 
1633
                        if (UNIV_UNLIKELY(n_ext)) {
 
1634
                                row_merge_copy_blobs(mrec, offsets, zip_size,
 
1635
                                                     dtuple, tuple_heap);
 
1636
                        }
 
1637
 
 
1638
                        node->row = dtuple;
 
1639
                        node->table = table;
 
1640
                        node->trx_id = trx->id;
 
1641
 
 
1642
                        ut_ad(dtuple_validate(dtuple));
 
1643
 
 
1644
                        do {
 
1645
                                thr->run_node = thr;
 
1646
                                thr->prev_node = thr->common.parent;
 
1647
 
 
1648
                                error = row_ins_index_entry(index, dtuple,
 
1649
                                                            0, FALSE, thr);
 
1650
 
 
1651
                                if (UNIV_LIKELY(error == DB_SUCCESS)) {
 
1652
 
 
1653
                                        goto next_rec;
 
1654
                                }
 
1655
 
 
1656
                                thr->lock_state = QUE_THR_LOCK_ROW;
 
1657
                                trx->error_state = error;
 
1658
                                que_thr_stop_for_mysql(thr);
 
1659
                                thr->lock_state = QUE_THR_LOCK_NOLOCK;
 
1660
                        } while (row_mysql_handle_errors(&error, trx,
 
1661
                                                         thr, NULL));
 
1662
 
 
1663
                        goto err_exit;
 
1664
next_rec:
 
1665
                        mem_heap_empty(tuple_heap);
 
1666
                }
 
1667
        }
 
1668
 
 
1669
        que_thr_stop_for_mysql_no_error(thr, trx);
 
1670
err_exit:
 
1671
        que_graph_free(thr->graph);
 
1672
 
 
1673
        trx->op_info = "";
 
1674
 
 
1675
        mem_heap_free(tuple_heap);
 
1676
 
 
1677
        return(error);
 
1678
}
 
1679
 
 
1680
/*************************************************************************
 
1681
Sets an exclusive lock on a table, for the duration of creating indexes. */
 
1682
UNIV_INTERN
 
1683
ulint
 
1684
row_merge_lock_table(
 
1685
/*=================*/
 
1686
                                        /* out: error code or DB_SUCCESS */
 
1687
        trx_t*          trx,            /* in/out: transaction */
 
1688
        dict_table_t*   table,          /* in: table to lock */
 
1689
        enum lock_mode  mode)           /* in: LOCK_X or LOCK_S */
 
1690
{
 
1691
        mem_heap_t*     heap;
 
1692
        que_thr_t*      thr;
 
1693
        ulint           err;
 
1694
        sel_node_t*     node;
 
1695
 
 
1696
        ut_ad(trx);
 
1697
        ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
 
1698
        ut_ad(mode == LOCK_X || mode == LOCK_S);
 
1699
 
 
1700
        heap = mem_heap_create(512);
 
1701
 
 
1702
        trx->op_info = "setting table lock for creating or dropping index";
 
1703
 
 
1704
        node = sel_node_create(heap);
 
1705
        thr = pars_complete_graph_for_exec(node, trx, heap);
 
1706
        thr->graph->state = QUE_FORK_ACTIVE;
 
1707
 
 
1708
        /* We use the select query graph as the dummy graph needed
 
1709
        in the lock module call */
 
1710
 
 
1711
        thr = que_fork_get_first_thr(que_node_get_parent(thr));
 
1712
        que_thr_move_to_run_state_for_mysql(thr, trx);
 
1713
 
 
1714
run_again:
 
1715
        thr->run_node = thr;
 
1716
        thr->prev_node = thr->common.parent;
 
1717
 
 
1718
        err = lock_table(0, table, mode, thr);
 
1719
 
 
1720
        trx->error_state = err;
 
1721
 
 
1722
        if (UNIV_LIKELY(err == DB_SUCCESS)) {
 
1723
                que_thr_stop_for_mysql_no_error(thr, trx);
 
1724
        } else {
 
1725
                que_thr_stop_for_mysql(thr);
 
1726
 
 
1727
                if (err != DB_QUE_THR_SUSPENDED) {
 
1728
                        ibool   was_lock_wait;
 
1729
 
 
1730
                        was_lock_wait = row_mysql_handle_errors(
 
1731
                                &err, trx, thr, NULL);
 
1732
 
 
1733
                        if (was_lock_wait) {
 
1734
                                goto run_again;
 
1735
                        }
 
1736
                } else {
 
1737
                        que_thr_t*      run_thr;
 
1738
                        que_node_t*     parent;
 
1739
 
 
1740
                        parent = que_node_get_parent(thr);
 
1741
                        run_thr = que_fork_start_command(parent);
 
1742
 
 
1743
                        ut_a(run_thr == thr);
 
1744
 
 
1745
                        /* There was a lock wait but the thread was not
 
1746
                        in a ready to run or running state. */
 
1747
                        trx->error_state = DB_LOCK_WAIT;
 
1748
 
 
1749
                        goto run_again;
 
1750
                }
 
1751
        }
 
1752
 
 
1753
        que_graph_free(thr->graph);
 
1754
        trx->op_info = "";
 
1755
 
 
1756
        return(err);
 
1757
}
 
1758
 
 
1759
/*************************************************************************
 
1760
Drop an index from the InnoDB system tables.  The data dictionary must
 
1761
have been locked exclusively by the caller, because the transaction
 
1762
will not be committed. */
 
1763
UNIV_INTERN
 
1764
void
 
1765
row_merge_drop_index(
 
1766
/*=================*/
 
1767
        dict_index_t*   index,  /* in: index to be removed */
 
1768
        dict_table_t*   table,  /* in: table */
 
1769
        trx_t*          trx)    /* in: transaction handle */
 
1770
{
 
1771
        ulint           err;
 
1772
        pars_info_t*    info = pars_info_create();
 
1773
 
 
1774
        /* We use the private SQL parser of Innobase to generate the
 
1775
        query graphs needed in deleting the dictionary data from system
 
1776
        tables in Innobase. Deleting a row from SYS_INDEXES table also
 
1777
        frees the file segments of the B-tree associated with the index. */
 
1778
 
 
1779
        static const char str1[] =
 
1780
                "PROCEDURE DROP_INDEX_PROC () IS\n"
 
1781
                "BEGIN\n"
 
1782
                "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
 
1783
                "DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
 
1784
                "               AND TABLE_ID = :tableid;\n"
 
1785
                "END;\n";
 
1786
 
 
1787
        ut_ad(index && table && trx);
 
1788
 
 
1789
        pars_info_add_dulint_literal(info, "indexid", index->id);
 
1790
        pars_info_add_dulint_literal(info, "tableid", table->id);
 
1791
 
 
1792
        trx_start_if_not_started(trx);
 
1793
        trx->op_info = "dropping index";
 
1794
 
 
1795
        ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
 
1796
 
 
1797
        err = que_eval_sql(info, str1, FALSE, trx);
 
1798
 
 
1799
        ut_a(err == DB_SUCCESS);
 
1800
 
 
1801
        /* Replace this index with another equivalent index for all
 
1802
        foreign key constraints on this table where this index is used */
 
1803
 
 
1804
        dict_table_replace_index_in_foreign_list(table, index);
 
1805
        dict_index_remove_from_cache(table, index);
 
1806
 
 
1807
        trx->op_info = "";
 
1808
}
 
1809
 
 
1810
/*************************************************************************
 
1811
Drop those indexes which were created before an error occurred when
 
1812
building an index.  The data dictionary must have been locked
 
1813
exclusively by the caller, because the transaction will not be
 
1814
committed. */
 
1815
UNIV_INTERN
 
1816
void
 
1817
row_merge_drop_indexes(
 
1818
/*===================*/
 
1819
        trx_t*          trx,            /* in: transaction */
 
1820
        dict_table_t*   table,          /* in: table containing the indexes */
 
1821
        dict_index_t**  index,          /* in: indexes to drop */
 
1822
        ulint           num_created)    /* in: number of elements in index[] */
 
1823
{
 
1824
        ulint   key_num;
 
1825
 
 
1826
        for (key_num = 0; key_num < num_created; key_num++) {
 
1827
                row_merge_drop_index(index[key_num], table, trx);
 
1828
        }
 
1829
}
 
1830
 
 
1831
/*************************************************************************
 
1832
Drop all partially created indexes during crash recovery. */
 
1833
UNIV_INTERN
 
1834
void
 
1835
row_merge_drop_temp_indexes(void)
 
1836
/*=============================*/
 
1837
{
 
1838
        trx_t*          trx;
 
1839
        ulint           err;
 
1840
 
 
1841
        /* We use the private SQL parser of Innobase to generate the
 
1842
        query graphs needed in deleting the dictionary data from system
 
1843
        tables in Innobase. Deleting a row from SYS_INDEXES table also
 
1844
        frees the file segments of the B-tree associated with the index. */
 
1845
#if TEMP_INDEX_PREFIX != '\377'
 
1846
# error "TEMP_INDEX_PREFIX != '\377'"
 
1847
#endif
 
1848
        static const char drop_temp_indexes[] =
 
1849
                "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
 
1850
                "indexid CHAR;\n"
 
1851
                "DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
 
1852
                "WHERE SUBSTR(NAME,0,1)='\377';\n"
 
1853
                "BEGIN\n"
 
1854
                "\tOPEN c;\n"
 
1855
                "\tWHILE 1=1 LOOP\n"
 
1856
                "\t\tFETCH c INTO indexid;\n"
 
1857
                "\t\tIF (SQL % NOTFOUND) THEN\n"
 
1858
                "\t\t\tEXIT;\n"
 
1859
                "\t\tEND IF;\n"
 
1860
                "\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
 
1861
                "\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
 
1862
                "\tEND LOOP;\n"
 
1863
                "\tCLOSE c;\n"
 
1864
                "\tCOMMIT WORK;\n"
 
1865
                "END;\n";
 
1866
 
 
1867
        trx = trx_allocate_for_background();
 
1868
        trx->op_info = "dropping partially created indexes";
 
1869
        row_mysql_lock_data_dictionary(trx);
 
1870
 
 
1871
        /* Incomplete transactions may be holding some locks on the
 
1872
        data dictionary tables.  However, they should never have been
 
1873
        able to lock the records corresponding to the partially
 
1874
        created indexes that we are attempting to delete, because the
 
1875
        table was locked when the indexes were being created.  We will
 
1876
        drop the partially created indexes before the rollback of
 
1877
        incomplete transactions is initiated.  Thus, this should not
 
1878
        interfere with the incomplete transactions. */
 
1879
        trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
 
1880
        err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx);
 
1881
        ut_a(err == DB_SUCCESS);
 
1882
 
 
1883
        row_mysql_unlock_data_dictionary(trx);
 
1884
        trx_free_for_background(trx);
 
1885
}
 
1886
 
 
1887
/*************************************************************************
 
1888
Create a merge file. */
 
1889
static
 
1890
void
 
1891
row_merge_file_create(
 
1892
/*==================*/
 
1893
        merge_file_t*   merge_file)     /* out: merge file structure */
 
1894
{
 
1895
        merge_file->fd = innobase_mysql_tmpfile();
 
1896
        merge_file->offset = 0;
 
1897
}
 
1898
 
 
1899
/*************************************************************************
 
1900
Destroy a merge file. */
 
1901
static
 
1902
void
 
1903
row_merge_file_destroy(
 
1904
/*===================*/
 
1905
        merge_file_t*   merge_file)     /* out: merge file structure */
 
1906
{
 
1907
        if (merge_file->fd != -1) {
 
1908
                close(merge_file->fd);
 
1909
                merge_file->fd = -1;
 
1910
        }
 
1911
}
 
1912
 
 
1913
/*************************************************************************
 
1914
Determine the precise type of a column that is added to a tem
 
1915
if a column must be constrained NOT NULL. */
 
1916
UNIV_INLINE
 
1917
ulint
 
1918
row_merge_col_prtype(
 
1919
/*=================*/
 
1920
                                                /* out: col->prtype, possibly
 
1921
                                                ORed with DATA_NOT_NULL */
 
1922
        const dict_col_t*       col,            /* in: column */
 
1923
        const char*             col_name,       /* in: name of the column */
 
1924
        const merge_index_def_t*index_def)      /* in: the index definition
 
1925
                                                of the primary key */
 
1926
{
 
1927
        ulint   prtype = col->prtype;
 
1928
        ulint   i;
 
1929
 
 
1930
        ut_ad(index_def->ind_type & DICT_CLUSTERED);
 
1931
 
 
1932
        if (prtype & DATA_NOT_NULL) {
 
1933
 
 
1934
                return(prtype);
 
1935
        }
 
1936
 
 
1937
        /* All columns that are included
 
1938
        in the PRIMARY KEY must be NOT NULL. */
 
1939
 
 
1940
        for (i = 0; i < index_def->n_fields; i++) {
 
1941
                if (!strcmp(col_name, index_def->fields[i].field_name)) {
 
1942
                        return(prtype | DATA_NOT_NULL);
 
1943
                }
 
1944
        }
 
1945
 
 
1946
        return(prtype);
 
1947
}
 
1948
 
 
1949
/*************************************************************************
 
1950
Create a temporary table for creating a primary key, using the definition
 
1951
of an existing table. */
 
1952
UNIV_INTERN
 
1953
dict_table_t*
 
1954
row_merge_create_temporary_table(
 
1955
/*=============================*/
 
1956
                                                /* out: table,
 
1957
                                                or NULL on error */
 
1958
        const char*             table_name,     /* in: new table name */
 
1959
        const merge_index_def_t*index_def,      /* in: the index definition
 
1960
                                                of the primary key */
 
1961
        const dict_table_t*     table,          /* in: old table definition */
 
1962
        trx_t*                  trx)            /* in/out: transaction
 
1963
                                                (sets error_state) */
 
1964
{
 
1965
        ulint           i;
 
1966
        dict_table_t*   new_table = NULL;
 
1967
        ulint           n_cols = dict_table_get_n_user_cols(table);
 
1968
        ulint           error;
 
1969
        mem_heap_t*     heap = mem_heap_create(1000);
 
1970
 
 
1971
        ut_ad(table_name);
 
1972
        ut_ad(index_def);
 
1973
        ut_ad(table);
 
1974
        ut_ad(mutex_own(&dict_sys->mutex));
 
1975
 
 
1976
        new_table = dict_mem_table_create(table_name, 0, n_cols, table->flags);
 
1977
 
 
1978
        for (i = 0; i < n_cols; i++) {
 
1979
                const dict_col_t*       col;
 
1980
                const char*             col_name;
 
1981
 
 
1982
                col = dict_table_get_nth_col(table, i);
 
1983
                col_name = dict_table_get_col_name(table, i);
 
1984
 
 
1985
                dict_mem_table_add_col(new_table, heap, col_name, col->mtype,
 
1986
                                       row_merge_col_prtype(col, col_name,
 
1987
                                                            index_def),
 
1988
                                       col->len);
 
1989
        }
 
1990
 
 
1991
        error = row_create_table_for_mysql(new_table, trx);
 
1992
        mem_heap_free(heap);
 
1993
 
 
1994
        if (error != DB_SUCCESS) {
 
1995
                trx->error_state = error;
 
1996
                new_table = NULL;
 
1997
        }
 
1998
 
 
1999
        return(new_table);
 
2000
}
 
2001
 
 
2002
/*************************************************************************
 
2003
Rename the temporary indexes in the dictionary to permanent ones.  The
 
2004
data dictionary must have been locked exclusively by the caller,
 
2005
because the transaction will not be committed. */
 
2006
UNIV_INTERN
 
2007
ulint
 
2008
row_merge_rename_indexes(
 
2009
/*=====================*/
 
2010
                                        /* out: DB_SUCCESS if all OK */
 
2011
        trx_t*          trx,            /* in/out: transaction */
 
2012
        dict_table_t*   table)          /* in/out: table with new indexes */
 
2013
{
 
2014
        ulint           err = DB_SUCCESS;
 
2015
        pars_info_t*    info = pars_info_create();
 
2016
 
 
2017
        /* We use the private SQL parser of Innobase to generate the
 
2018
        query graphs needed in renaming indexes. */
 
2019
 
 
2020
#if TEMP_INDEX_PREFIX != '\377'
 
2021
# error "TEMP_INDEX_PREFIX != '\377'"
 
2022
#endif
 
2023
 
 
2024
        static const char rename_indexes[] =
 
2025
                "PROCEDURE RENAME_INDEXES_PROC () IS\n"
 
2026
                "BEGIN\n"
 
2027
                "UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
 
2028
                "WHERE TABLE_ID = :tableid AND SUBSTR(NAME,0,1)='\377';\n"
 
2029
                "END;\n";
 
2030
 
 
2031
        ut_ad(table);
 
2032
        ut_ad(trx);
 
2033
        ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
 
2034
 
 
2035
        trx->op_info = "renaming indexes";
 
2036
 
 
2037
        pars_info_add_dulint_literal(info, "tableid", table->id);
 
2038
 
 
2039
        err = que_eval_sql(info, rename_indexes, FALSE, trx);
 
2040
 
 
2041
        if (err == DB_SUCCESS) {
 
2042
                dict_index_t*   index = dict_table_get_first_index(table);
 
2043
                do {
 
2044
                        if (*index->name == TEMP_INDEX_PREFIX) {
 
2045
                                index->name++;
 
2046
                        }
 
2047
                        index = dict_table_get_next_index(index);
 
2048
                } while (index);
 
2049
        }
 
2050
 
 
2051
        trx->op_info = "";
 
2052
 
 
2053
        return(err);
 
2054
}
 
2055
 
 
2056
/*************************************************************************
 
2057
Rename the tables in the data dictionary.  The data dictionary must
 
2058
have been locked exclusively by the caller, because the transaction
 
2059
will not be committed. */
 
2060
UNIV_INTERN
 
2061
ulint
 
2062
row_merge_rename_tables(
 
2063
/*====================*/
 
2064
                                        /* out: error code or DB_SUCCESS */
 
2065
        dict_table_t*   old_table,      /* in/out: old table, renamed to
 
2066
                                        tmp_name */
 
2067
        dict_table_t*   new_table,      /* in/out: new table, renamed to
 
2068
                                        old_table->name */
 
2069
        const char*     tmp_name,       /* in: new name for old_table */
 
2070
        trx_t*          trx)            /* in: transaction handle */
 
2071
{
 
2072
        ulint           err     = DB_ERROR;
 
2073
        pars_info_t*    info;
 
2074
        const char*     old_name= old_table->name;
 
2075
 
 
2076
        ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
 
2077
        ut_ad(old_table != new_table);
 
2078
        ut_ad(mutex_own(&dict_sys->mutex));
 
2079
 
 
2080
        ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
 
2081
 
 
2082
        trx->op_info = "renaming tables";
 
2083
 
 
2084
        /* We use the private SQL parser of Innobase to generate the query
 
2085
        graphs needed in updating the dictionary data in system tables. */
 
2086
 
 
2087
        info = pars_info_create();
 
2088
 
 
2089
        pars_info_add_str_literal(info, "new_name", new_table->name);
 
2090
        pars_info_add_str_literal(info, "old_name", old_name);
 
2091
        pars_info_add_str_literal(info, "tmp_name", tmp_name);
 
2092
 
 
2093
        err = que_eval_sql(info,
 
2094
                           "PROCEDURE RENAME_TABLES () IS\n"
 
2095
                           "BEGIN\n"
 
2096
                           "UPDATE SYS_TABLES SET NAME = :tmp_name\n"
 
2097
                           " WHERE NAME = :old_name;\n"
 
2098
                           "UPDATE SYS_TABLES SET NAME = :old_name\n"
 
2099
                           " WHERE NAME = :new_name;\n"
 
2100
                           "END;\n", FALSE, trx);
 
2101
 
 
2102
        if (err != DB_SUCCESS) {
 
2103
 
 
2104
                goto err_exit;
 
2105
        }
 
2106
 
 
2107
        /* The following calls will also rename the .ibd data files if
 
2108
        the tables are stored in a single-table tablespace */
 
2109
 
 
2110
        if (!dict_table_rename_in_cache(old_table, tmp_name, FALSE)
 
2111
            || !dict_table_rename_in_cache(new_table, old_name, FALSE)) {
 
2112
 
 
2113
                err = DB_ERROR;
 
2114
                goto err_exit;
 
2115
        }
 
2116
 
 
2117
        err = dict_load_foreigns(old_name, TRUE);
 
2118
 
 
2119
        if (err != DB_SUCCESS) {
 
2120
err_exit:
 
2121
                trx->error_state = DB_SUCCESS;
 
2122
                trx_general_rollback_for_mysql(trx, FALSE, NULL);
 
2123
                trx->error_state = DB_SUCCESS;
 
2124
        }
 
2125
 
 
2126
        trx->op_info = "";
 
2127
 
 
2128
        return(err);
 
2129
}
 
2130
 
 
2131
/*************************************************************************
 
2132
Create and execute a query graph for creating an index. */
 
2133
static
 
2134
ulint
 
2135
row_merge_create_index_graph(
 
2136
/*=========================*/
 
2137
                                        /* out: DB_SUCCESS or error code */
 
2138
        trx_t*          trx,            /* in: trx */
 
2139
        dict_table_t*   table,          /* in: table */
 
2140
        dict_index_t*   index)          /* in: index */
 
2141
{
 
2142
        ind_node_t*     node;           /* Index creation node */
 
2143
        mem_heap_t*     heap;           /* Memory heap */
 
2144
        que_thr_t*      thr;            /* Query thread */
 
2145
        ulint           err;
 
2146
 
 
2147
        ut_ad(trx);
 
2148
        ut_ad(table);
 
2149
        ut_ad(index);
 
2150
 
 
2151
        heap = mem_heap_create(512);
 
2152
 
 
2153
        index->table = table;
 
2154
        node = ind_create_graph_create(index, heap);
 
2155
        thr = pars_complete_graph_for_exec(node, trx, heap);
 
2156
 
 
2157
        ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
 
2158
 
 
2159
        que_run_threads(thr);
 
2160
 
 
2161
        err = trx->error_state;
 
2162
 
 
2163
        que_graph_free((que_t*) que_node_get_parent(thr));
 
2164
 
 
2165
        return(err);
 
2166
}
 
2167
 
 
2168
/*************************************************************************
 
2169
Create the index and load in to the dictionary. */
 
2170
UNIV_INTERN
 
2171
dict_index_t*
 
2172
row_merge_create_index(
 
2173
/*===================*/
 
2174
                                        /* out: index, or NULL on error */
 
2175
        trx_t*          trx,            /* in/out: trx (sets error_state) */
 
2176
        dict_table_t*   table,          /* in: the index is on this table */
 
2177
        const merge_index_def_t*        /* in: the index definition */
 
2178
                        index_def)
 
2179
{
 
2180
        dict_index_t*   index;
 
2181
        ulint           err;
 
2182
        ulint           n_fields = index_def->n_fields;
 
2183
        ulint           i;
 
2184
 
 
2185
        /* Create the index prototype, using the passed in def, this is not
 
2186
        a persistent operation. We pass 0 as the space id, and determine at
 
2187
        a lower level the space id where to store the table. */
 
2188
 
 
2189
        index = dict_mem_index_create(table->name, index_def->name,
 
2190
                                      0, index_def->ind_type, n_fields);
 
2191
 
 
2192
        ut_a(index);
 
2193
 
 
2194
        for (i = 0; i < n_fields; i++) {
 
2195
                merge_index_field_t*    ifield = &index_def->fields[i];
 
2196
 
 
2197
                dict_mem_index_add_field(index, ifield->field_name,
 
2198
                                         ifield->prefix_len);
 
2199
        }
 
2200
 
 
2201
        /* Add the index to SYS_INDEXES, using the index prototype. */
 
2202
        err = row_merge_create_index_graph(trx, table, index);
 
2203
 
 
2204
        if (err == DB_SUCCESS) {
 
2205
 
 
2206
                index = row_merge_dict_table_get_index(
 
2207
                        table, index_def);
 
2208
 
 
2209
                ut_a(index);
 
2210
 
 
2211
#ifdef ROW_MERGE_IS_INDEX_USABLE
 
2212
                /* Note the id of the transaction that created this
 
2213
                index, we use it to restrict readers from accessing
 
2214
                this index, to ensure read consistency. */
 
2215
                index->trx_id = trx->id;
 
2216
#endif /* ROW_MERGE_IS_INDEX_USABLE */
 
2217
        } else {
 
2218
                index = NULL;
 
2219
        }
 
2220
 
 
2221
        return(index);
 
2222
}
 
2223
 
 
2224
#ifdef ROW_MERGE_IS_INDEX_USABLE
 
2225
/*************************************************************************
 
2226
Check if a transaction can use an index. */
 
2227
UNIV_INTERN
 
2228
ibool
 
2229
row_merge_is_index_usable(
 
2230
/*======================*/
 
2231
        const trx_t*            trx,    /* in: transaction */
 
2232
        const dict_index_t*     index)  /* in: index to check */
 
2233
{
 
2234
        if (!trx->read_view) {
 
2235
                return(TRUE);
 
2236
        }
 
2237
 
 
2238
        return(ut_dulint_cmp(index->trx_id, trx->read_view->low_limit_id) < 0);
 
2239
}
 
2240
#endif /* ROW_MERGE_IS_INDEX_USABLE */
 
2241
 
 
2242
/*************************************************************************
 
2243
Drop the old table. */
 
2244
UNIV_INTERN
 
2245
ulint
 
2246
row_merge_drop_table(
 
2247
/*=================*/
 
2248
                                        /* out: DB_SUCCESS or error code */
 
2249
        trx_t*          trx,            /* in: transaction */
 
2250
        dict_table_t*   table)          /* in: table to drop */
 
2251
{
 
2252
        /* There must be no open transactions on the table. */
 
2253
        ut_a(table->n_mysql_handles_opened == 0);
 
2254
 
 
2255
        return(row_drop_table_for_mysql(table->name, trx, FALSE));
 
2256
}
 
2257
 
 
2258
/*************************************************************************
 
2259
Build indexes on a table by reading a clustered index,
 
2260
creating a temporary file containing index entries, merge sorting
 
2261
these index entries and inserting sorted index entries to indexes. */
 
2262
UNIV_INTERN
 
2263
ulint
 
2264
row_merge_build_indexes(
 
2265
/*====================*/
 
2266
                                        /* out: DB_SUCCESS or error code */
 
2267
        trx_t*          trx,            /* in: transaction */
 
2268
        dict_table_t*   old_table,      /* in: table where rows are
 
2269
                                        read from */
 
2270
        dict_table_t*   new_table,      /* in: table where indexes are
 
2271
                                        created; identical to old_table
 
2272
                                        unless creating a PRIMARY KEY */
 
2273
        dict_index_t**  indexes,        /* in: indexes to be created */
 
2274
        ulint           n_indexes,      /* in: size of indexes[] */
 
2275
        TABLE*          table)          /* in/out: MySQL table, for
 
2276
                                        reporting erroneous key value
 
2277
                                        if applicable */
 
2278
{
 
2279
        merge_file_t*           merge_files;
 
2280
        row_merge_block_t*      block;
 
2281
        ulint                   block_size;
 
2282
        ulint                   i;
 
2283
        ulint                   error;
 
2284
        int                     tmpfd;
 
2285
 
 
2286
        ut_ad(trx);
 
2287
        ut_ad(old_table);
 
2288
        ut_ad(new_table);
 
2289
        ut_ad(indexes);
 
2290
        ut_ad(n_indexes);
 
2291
 
 
2292
        trx_start_if_not_started(trx);
 
2293
 
 
2294
        /* Allocate memory for merge file data structure and initialize
 
2295
        fields */
 
2296
 
 
2297
        merge_files = mem_alloc(n_indexes * sizeof *merge_files);
 
2298
        block_size = 3 * sizeof *block;
 
2299
        block = os_mem_alloc_large(&block_size);
 
2300
 
 
2301
        for (i = 0; i < n_indexes; i++) {
 
2302
 
 
2303
                row_merge_file_create(&merge_files[i]);
 
2304
        }
 
2305
 
 
2306
        tmpfd = innobase_mysql_tmpfile();
 
2307
 
 
2308
        /* Reset the MySQL row buffer that is used when reporting
 
2309
        duplicate keys. */
 
2310
        innobase_rec_reset(table);
 
2311
 
 
2312
        /* Read clustered index of the table and create files for
 
2313
        secondary index entries for merge sort */
 
2314
 
 
2315
        error = row_merge_read_clustered_index(
 
2316
                trx, table, old_table, new_table, indexes,
 
2317
                merge_files, n_indexes, block);
 
2318
 
 
2319
        if (error != DB_SUCCESS) {
 
2320
 
 
2321
                goto func_exit;
 
2322
        }
 
2323
 
 
2324
        /* Now we have files containing index entries ready for
 
2325
        sorting and inserting. */
 
2326
 
 
2327
        for (i = 0; i < n_indexes; i++) {
 
2328
                error = row_merge_sort(indexes[i], &merge_files[i],
 
2329
                                       block, &tmpfd, table);
 
2330
 
 
2331
                if (error == DB_SUCCESS) {
 
2332
                        error = row_merge_insert_index_tuples(
 
2333
                                trx, indexes[i], new_table,
 
2334
                                dict_table_zip_size(old_table),
 
2335
                                merge_files[i].fd, block);
 
2336
                }
 
2337
 
 
2338
                /* Close the temporary file to free up space. */
 
2339
                row_merge_file_destroy(&merge_files[i]);
 
2340
 
 
2341
                if (error != DB_SUCCESS) {
 
2342
                        trx->error_key_num = i;
 
2343
                        goto func_exit;
 
2344
                }
 
2345
        }
 
2346
 
 
2347
func_exit:
 
2348
        close(tmpfd);
 
2349
 
 
2350
        for (i = 0; i < n_indexes; i++) {
 
2351
                row_merge_file_destroy(&merge_files[i]);
 
2352
        }
 
2353
 
 
2354
        mem_free(merge_files);
 
2355
        os_mem_free_large(block, block_size);
 
2356
 
 
2357
        return(error);
 
2358
}