~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/row/row0merge.c

  • Committer: Brian Aker
  • Date: 2010-10-28 17:12:01 UTC
  • mfrom: (1887.1.3 merge)
  • Revision ID: brian@tangent.org-20101028171201-baj6l1bnntn1s4ad
Merge in POTFILES changes.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*****************************************************************************
2
2
 
3
 
Copyright (C) 2005, 2010, Innobase Oy. All Rights Reserved.
 
3
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
4
4
 
5
5
This program is free software; you can redistribute it and/or modify it under
6
6
the terms of the GNU General Public License as published by the Free Software
58
58
#include "handler0alter.h"
59
59
#include <unistd.h>
60
60
 
61
 
/* Ignore posix_fadvise() on those platforms where it does not exist */
62
 
#if defined __WIN__
63
 
# define posix_fadvise(fd, offset, len, advice) /* nothing */
64
 
#endif /* __WIN__ */
65
 
 
66
61
#ifdef UNIV_DEBUG
67
62
/** Set these in order ot enable debug printout. */
68
63
/* @{ */
187
182
        ut_ad(max_tuples <= sizeof(row_merge_block_t));
188
183
        ut_ad(max_tuples < buf_size);
189
184
 
190
 
        buf = static_cast<row_merge_buf_t *>(mem_heap_zalloc(heap, buf_size));
 
185
        buf = mem_heap_zalloc(heap, buf_size);
191
186
        buf->heap = heap;
192
187
        buf->index = index;
193
188
        buf->max_tuples = max_tuples;
194
 
        buf->tuples = static_cast<const dfield_t **>(mem_heap_alloc(heap,
195
 
                                     2 * max_tuples * sizeof *buf->tuples));
 
189
        buf->tuples = mem_heap_alloc(heap,
 
190
                                     2 * max_tuples * sizeof *buf->tuples);
196
191
        buf->tmp_tuples = buf->tuples + max_tuples;
197
192
 
198
193
        return(buf);
275
270
        const dict_index_t*     index;
276
271
        dfield_t*               entry;
277
272
        dfield_t*               field;
278
 
        const dict_field_t*     ifield;
279
273
 
280
274
        if (buf->n_tuples >= buf->max_tuples) {
281
275
                return(FALSE);
287
281
 
288
282
        n_fields = dict_index_get_n_fields(index);
289
283
 
290
 
        entry = static_cast<dfield_t *>(mem_heap_alloc(buf->heap, n_fields * sizeof *entry));
 
284
        entry = mem_heap_alloc(buf->heap, n_fields * sizeof *entry);
291
285
        buf->tuples[buf->n_tuples] = entry;
292
286
        field = entry;
293
287
 
294
288
        data_size = 0;
295
289
        extra_size = UT_BITS_IN_BYTES(index->n_nullable);
296
290
 
297
 
        ifield = dict_index_get_nth_field(index, 0);
298
 
 
299
 
        for (i = 0; i < n_fields; i++, field++, ifield++) {
 
291
        for (i = 0; i < n_fields; i++, field++) {
 
292
                const dict_field_t*     ifield;
300
293
                const dict_col_t*       col;
301
294
                ulint                   col_no;
302
295
                const dfield_t*         row_field;
303
296
                ulint                   len;
304
297
 
 
298
                ifield = dict_index_get_nth_field(index, i);
305
299
                col = ifield->col;
306
300
                col_no = dict_col_get_no(col);
307
301
                row_field = dtuple_get_nth_field(row, col_no);
314
308
                } else if (UNIV_LIKELY(!ext)) {
315
309
                } else if (dict_index_is_clust(index)) {
316
310
                        /* Flag externally stored fields. */
317
 
                        const byte*     row_buf = row_ext_lookup(ext, col_no,
 
311
                        const byte*     buf = row_ext_lookup(ext, col_no,
318
312
                                                             &len);
319
 
                        if (UNIV_LIKELY_NULL(row_buf)) {
320
 
                                ut_a(row_buf != field_ref_zero);
 
313
                        if (UNIV_LIKELY_NULL(buf)) {
 
314
                                ut_a(buf != field_ref_zero);
321
315
                                if (i < dict_index_get_n_unique(index)) {
322
 
                                        dfield_set_data(field, row_buf, len);
 
316
                                        dfield_set_data(field, buf, len);
323
317
                                } else {
324
318
                                        dfield_set_ext(field);
325
319
                                        len = dfield_get_len(field);
326
320
                                }
327
321
                        }
328
322
                } else {
329
 
                        const byte*     row_buf = row_ext_lookup(ext, col_no,
 
323
                        const byte*     buf = row_ext_lookup(ext, col_no,
330
324
                                                             &len);
331
 
                        if (UNIV_LIKELY_NULL(row_buf)) {
332
 
                                ut_a(row_buf != field_ref_zero);
333
 
                                dfield_set_data(field, row_buf, len);
 
325
                        if (UNIV_LIKELY_NULL(buf)) {
 
326
                                ut_a(buf != field_ref_zero);
 
327
                                dfield_set_data(field, buf, len);
334
328
                        }
335
329
                }
336
330
 
339
333
                if (ifield->prefix_len) {
340
334
                        len = dtype_get_at_most_n_mbchars(
341
335
                                col->prtype,
342
 
                                col->mbminmaxlen,
 
336
                                col->mbminlen, col->mbmaxlen,
343
337
                                ifield->prefix_len,
344
 
                                len, static_cast<const char *>(dfield_get_data(field)));
 
338
                                len, dfield_get_data(field));
345
339
                        dfield_set_len(field, len);
346
340
                }
347
341
 
415
409
/** Structure for reporting duplicate records. */
416
410
struct row_merge_dup_struct {
417
411
        const dict_index_t*     index;          /*!< index being sorted */
418
 
        TABLE*          table;          /*!< MySQL table object */
 
412
        TABLE*                  table;          /*!< MySQL table object */
419
413
        ulint                   n_dup;          /*!< number of duplicates */
420
414
};
421
415
 
431
425
        row_merge_dup_t*        dup,    /*!< in/out: for reporting duplicates */
432
426
        const dfield_t*         entry)  /*!< in: duplicate index entry */
433
427
{
434
 
        mrec_buf_t*             buf;
 
428
        mrec_buf_t              buf;
435
429
        const dtuple_t*         tuple;
436
430
        dtuple_t                tuple_store;
437
431
        const rec_t*            rec;
438
432
        const dict_index_t*     index   = dup->index;
439
433
        ulint                   n_fields= dict_index_get_n_fields(index);
440
 
        mem_heap_t*             heap;
 
434
        mem_heap_t*             heap    = NULL;
 
435
        ulint                   offsets_[REC_OFFS_NORMAL_SIZE];
441
436
        ulint*                  offsets;
442
437
        ulint                   n_ext;
443
438
 
447
442
                return;
448
443
        }
449
444
 
 
445
        rec_offs_init(offsets_);
 
446
 
450
447
        /* Convert the tuple to a record and then to MySQL format. */
451
 
        heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields)
452
 
                               * sizeof *offsets
453
 
                               + sizeof *buf);
454
 
 
455
 
        buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof *buf));
456
448
 
457
449
        tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
458
450
        n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
459
451
 
460
 
        rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext);
461
 
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
 
452
        rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
 
453
        offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
 
454
                                  &heap);
462
455
 
463
456
        innobase_rec_to_mysql(dup->table, rec, index, offsets);
464
457
 
465
 
        mem_heap_free(heap);
 
458
        if (UNIV_LIKELY_NULL(heap)) {
 
459
                mem_heap_free(heap);
 
460
        }
466
461
}
467
462
 
468
463
/*************************************************************//**
633
628
}
634
629
 
635
630
/******************************************************//**
636
 
Create a memory heap and allocate space for row_merge_rec_offsets()
637
 
and mrec_buf_t[3].
 
631
Create a memory heap and allocate space for row_merge_rec_offsets().
638
632
@return memory heap */
639
633
static
640
634
mem_heap_t*
641
635
row_merge_heap_create(
642
636
/*==================*/
643
637
        const dict_index_t*     index,          /*!< in: record descriptor */
644
 
        mrec_buf_t**            buf,            /*!< out: 3 buffers */
645
638
        ulint**                 offsets1,       /*!< out: offsets */
646
639
        ulint**                 offsets2)       /*!< out: offsets */
647
640
{
648
641
        ulint           i       = 1 + REC_OFFS_HEADER_SIZE
649
642
                + dict_index_get_n_fields(index);
650
 
        mem_heap_t*     heap    = mem_heap_create(2 * i * sizeof **offsets1
651
 
                                                  + 3 * sizeof **buf);
 
643
        mem_heap_t*     heap    = mem_heap_create(2 * i * sizeof *offsets1);
652
644
 
653
 
        *buf = static_cast<mrec_buf_t*>(mem_heap_alloc(heap, 3 * sizeof **buf));
654
 
        *offsets1 = static_cast<ulint*>(mem_heap_alloc(heap, i * sizeof **offsets1));
655
 
        *offsets2 = static_cast<ulint*>(mem_heap_alloc(heap, i * sizeof **offsets2));
 
645
        *offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
 
646
        *offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
656
647
 
657
648
        (*offsets1)[0] = (*offsets2)[0] = i;
658
649
        (*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
675
666
        dict_index_t*   index;
676
667
        const char**    column_names;
677
668
 
678
 
        column_names = static_cast<const char **>(mem_alloc(index_def->n_fields * sizeof *column_names));
 
669
        column_names = mem_alloc(index_def->n_fields * sizeof *column_names);
679
670
 
680
671
        for (i = 0; i < index_def->n_fields; ++i) {
681
672
                column_names[i] = index_def->fields[i].field_name;
697
688
row_merge_read(
698
689
/*===========*/
699
690
        int                     fd,     /*!< in: file descriptor */
700
 
        ulint                   offset, /*!< in: offset where to read
701
 
                                        in number of row_merge_block_t
702
 
                                        elements */
 
691
        ulint                   offset, /*!< in: offset where to read */
703
692
        row_merge_block_t*      buf)    /*!< out: data */
704
693
{
705
694
        ib_uint64_t     ofs = ((ib_uint64_t) offset) * sizeof *buf;
716
705
                                                 (ulint) (ofs & 0xFFFFFFFF),
717
706
                                                 (ulint) (ofs >> 32),
718
707
                                                 sizeof *buf);
719
 
#ifdef POSIX_FADV_DONTNEED
720
 
        /* Each block is read exactly once.  Free up the file cache. */
721
 
        posix_fadvise(fd, ofs, sizeof *buf, POSIX_FADV_DONTNEED);
722
 
#endif /* POSIX_FADV_DONTNEED */
723
 
 
724
708
        if (UNIV_UNLIKELY(!success)) {
725
709
                ut_print_timestamp(stderr);
726
710
                fprintf(stderr,
731
715
}
732
716
 
733
717
/********************************************************************//**
734
 
Write a merge block to the file system.
 
718
Read a merge block from the file system.
735
719
@return TRUE if request was successful, FALSE if fail */
736
720
static
737
721
ibool
738
722
row_merge_write(
739
723
/*============*/
740
724
        int             fd,     /*!< in: file descriptor */
741
 
        ulint           offset, /*!< in: offset where to write,
742
 
                                in number of row_merge_block_t elements */
 
725
        ulint           offset, /*!< in: offset where to write */
743
726
        const void*     buf)    /*!< in: data */
744
727
{
745
 
        size_t          buf_len = sizeof(row_merge_block_t);
746
 
        ib_uint64_t     ofs = buf_len * (ib_uint64_t) offset;
747
 
        ibool           ret;
748
 
 
749
 
        ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
750
 
                            (ulint) (ofs & 0xFFFFFFFF),
751
 
                            (ulint) (ofs >> 32),
752
 
                            buf_len);
 
728
        ib_uint64_t     ofs = ((ib_uint64_t) offset)
 
729
                * sizeof(row_merge_block_t);
753
730
 
754
731
#ifdef UNIV_DEBUG
755
732
        if (row_merge_print_block_write) {
758
735
        }
759
736
#endif /* UNIV_DEBUG */
760
737
 
761
 
#ifdef POSIX_FADV_DONTNEED
762
 
        /* The block will be needed on the next merge pass,
763
 
        but it can be evicted from the file cache meanwhile. */
764
 
        posix_fadvise(fd, ofs, buf_len, POSIX_FADV_DONTNEED);
765
 
#endif /* POSIX_FADV_DONTNEED */
766
 
 
767
 
        return(UNIV_LIKELY(ret));
 
738
        return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
 
739
                                         (ulint) (ofs & 0xFFFFFFFF),
 
740
                                         (ulint) (ofs >> 32),
 
741
                                         sizeof(row_merge_block_t))));
768
742
}
769
743
 
770
744
/********************************************************************//**
1099
1073
                                                record to be compared */
1100
1074
        const ulint*            offsets1,       /*!< in: first record offsets */
1101
1075
        const ulint*            offsets2,       /*!< in: second record offsets */
1102
 
        const dict_index_t*     index,          /*!< in: index */
1103
 
        ibool*                  null_eq)        /*!< out: set to TRUE if
1104
 
                                                found matching null values */
 
1076
        const dict_index_t*     index)          /*!< in: index */
1105
1077
{
1106
1078
        int     cmp;
1107
1079
 
1108
 
        cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index,
1109
 
                                 null_eq);
 
1080
        cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);
1110
1081
 
1111
1082
#ifdef UNIV_DEBUG
1112
1083
        if (row_merge_print_cmp) {
1130
1101
row_merge_read_clustered_index(
1131
1102
/*===========================*/
1132
1103
        trx_t*                  trx,    /*!< in: transaction */
1133
 
        TABLE*          table,  /*!< in/out: MySQL table object,
 
1104
        TABLE*                  table,  /*!< in/out: MySQL table object,
1134
1105
                                        for reporting erroneous records */
1135
1106
        const dict_table_t*     old_table,/*!< in: table where rows are
1136
1107
                                        read from */
1165
1136
 
1166
1137
        /* Create and initialize memory for record buffers */
1167
1138
 
1168
 
        merge_buf = static_cast<row_merge_buf_t **>(mem_alloc(n_index * sizeof *merge_buf));
 
1139
        merge_buf = mem_alloc(n_index * sizeof *merge_buf);
1169
1140
 
1170
1141
        for (i = 0; i < n_index; i++) {
1171
1142
                merge_buf[i] = row_merge_buf_create(index[i]);
1192
1163
 
1193
1164
                ut_a(n_cols == dict_table_get_n_cols(new_table));
1194
1165
 
1195
 
                nonnull = static_cast<ulint*>(mem_alloc(n_cols * sizeof *nonnull));
 
1166
                nonnull = mem_alloc(n_cols * sizeof *nonnull);
1196
1167
 
1197
1168
                for (i = 0; i < n_cols; i++) {
1198
1169
                        if (dict_table_get_nth_col(old_table, i)->prtype
1290
1261
                for (i = 0; i < n_index; i++) {
1291
1262
                        row_merge_buf_t*        buf     = merge_buf[i];
1292
1263
                        merge_file_t*           file    = &files[i];
1293
 
                        const dict_index_t*     buf_index       = buf->index;
 
1264
                        const dict_index_t*     index   = buf->index;
1294
1265
 
1295
1266
                        if (UNIV_LIKELY
1296
1267
                            (row && row_merge_buf_add(buf, row, ext))) {
1306
1277
                        Sort them and write to disk. */
1307
1278
 
1308
1279
                        if (buf->n_tuples) {
1309
 
                                if (dict_index_is_unique(buf_index)) {
 
1280
                                if (dict_index_is_unique(index)) {
1310
1281
                                        row_merge_dup_t dup;
1311
1282
                                        dup.index = buf->index;
1312
1283
                                        dup.table = table;
1418
1389
        ulint*                  foffs1, /*!< in/out: offset of second
1419
1390
                                        source list in the file */
1420
1391
        merge_file_t*           of,     /*!< in/out: output file */
1421
 
        TABLE*          table)  /*!< in/out: MySQL table, for
 
1392
        TABLE*                  table)  /*!< in/out: MySQL table, for
1422
1393
                                        reporting erroneous key value
1423
1394
                                        if applicable */
1424
1395
{
1425
1396
        mem_heap_t*     heap;   /*!< memory heap for offsets0, offsets1 */
1426
1397
 
1427
 
        mrec_buf_t*     buf;    /*!< buffer for handling
1428
 
                                split mrec in block[] */
 
1398
        mrec_buf_t      buf[3]; /*!< buffer for handling split mrec in block[] */
1429
1399
        const byte*     b0;     /*!< pointer to block[0] */
1430
1400
        const byte*     b1;     /*!< pointer to block[1] */
1431
1401
        byte*           b2;     /*!< pointer to block[2] */
1445
1415
        }
1446
1416
#endif /* UNIV_DEBUG */
1447
1417
 
1448
 
        heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
1449
 
 
1450
 
        buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3));
 
1418
        heap = row_merge_heap_create(index, &offsets0, &offsets1);
1451
1419
 
1452
1420
        /* Write a record and read the next record.  Split the output
1453
1421
        file in two halves, which can be merged on the following pass. */
1474
1442
        }
1475
1443
 
1476
1444
        while (mrec0 && mrec1) {
1477
 
                ibool   null_eq = FALSE;
1478
1445
                switch (row_merge_cmp(mrec0, mrec1,
1479
 
                                      offsets0, offsets1, index,
1480
 
                                      &null_eq)) {
 
1446
                                      offsets0, offsets1, index)) {
1481
1447
                case 0:
1482
1448
                        if (UNIV_UNLIKELY
1483
 
                            (dict_index_is_unique(index) && !null_eq)) {
 
1449
                            (dict_index_is_unique(index))) {
1484
1450
                                innobase_rec_to_mysql(table, mrec0,
1485
1451
                                                      index, offsets0);
1486
1452
                                mem_heap_free(heap);
1535
1501
{
1536
1502
        mem_heap_t*     heap;   /*!< memory heap for offsets0, offsets1 */
1537
1503
 
1538
 
        mrec_buf_t*     buf;    /*!< buffer for handling
 
1504
        mrec_buf_t      buf[3]; /*!< buffer for handling
1539
1505
                                split mrec in block[] */
1540
1506
        const byte*     b0;     /*!< pointer to block[0] */
1541
1507
        byte*           b2;     /*!< pointer to block[2] */
1553
1519
        }
1554
1520
#endif /* UNIV_DEBUG */
1555
1521
 
1556
 
        heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
1557
 
        buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3));
 
1522
        heap = row_merge_heap_create(index, &offsets0, &offsets1);
1558
1523
 
1559
1524
        /* Write a record and read the next record.  Split the output
1560
1525
        file in two halves, which can be merged on the following pass. */
1603
1568
        const dict_index_t*     index,  /*!< in: index being created */
1604
1569
        merge_file_t*           file,   /*!< in/out: file containing
1605
1570
                                        index entries */
 
1571
        ulint*                  half,   /*!< in/out: half the file */
1606
1572
        row_merge_block_t*      block,  /*!< in/out: 3 buffers */
1607
1573
        int*                    tmpfd,  /*!< in/out: temporary file handle */
1608
 
        TABLE*          table,  /*!< in/out: MySQL table, for
1609
 
                                        reporting erroneous key value
1610
 
                                        if applicable */
1611
 
        ulint*                  num_run,/*!< in/out: Number of runs remain
1612
 
                                        to be merged */
1613
 
        ulint*                  run_offset) /*!< in/out: Array contains the
1614
 
                                        first offset number for each merge
1615
 
                                        run */
 
1574
        TABLE*                  table)  /*!< in/out: MySQL table, for
 
1575
                                        reporting erroneous key value
 
1576
                                        if applicable */
1616
1577
{
1617
1578
        ulint           foffs0; /*!< first input offset */
1618
1579
        ulint           foffs1; /*!< second input offset */
1619
1580
        ulint           error;  /*!< error code */
1620
1581
        merge_file_t    of;     /*!< output file */
1621
 
        const ulint     ihalf   = run_offset[*num_run / 2];
 
1582
        const ulint     ihalf   = *half;
1622
1583
                                /*!< half the input file */
1623
 
        ulint           n_run   = 0;
1624
 
                                /*!< num of runs generated from this merge */
1625
 
 
 
1584
        ulint           ohalf;  /*!< half the output file */
1626
1585
 
1627
1586
        UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
1628
1587
        ut_ad(ihalf < file->offset);
1631
1590
        of.offset = 0;
1632
1591
        of.n_rec = 0;
1633
1592
 
1634
 
#ifdef POSIX_FADV_SEQUENTIAL
1635
 
        /* The input file will be read sequentially, starting from the
1636
 
        beginning and the middle.  In Linux, the POSIX_FADV_SEQUENTIAL
1637
 
        affects the entire file.  Each block will be read exactly once. */
1638
 
        posix_fadvise(file->fd, 0, 0,
1639
 
                      POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE);
1640
 
#endif /* POSIX_FADV_SEQUENTIAL */
1641
 
 
1642
1593
        /* Merge blocks to the output file. */
 
1594
        ohalf = 0;
1643
1595
        foffs0 = 0;
1644
1596
        foffs1 = ihalf;
1645
1597
 
1646
 
        UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset);
1647
 
 
1648
1598
        for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
 
1599
                ulint   ahalf;  /*!< arithmetic half the input file */
1649
1600
 
1650
1601
                if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1651
1602
                        return(DB_INTERRUPTED);
1652
1603
                }
1653
1604
 
1654
 
                /* Remember the offset number for this run */
1655
 
                run_offset[n_run++] = of.offset;
1656
 
 
1657
1605
                error = row_merge_blocks(index, file, block,
1658
1606
                                         &foffs0, &foffs1, &of, table);
1659
1607
 
1661
1609
                        return(error);
1662
1610
                }
1663
1611
 
 
1612
                /* Record the offset of the output file when
 
1613
                approximately half the output has been generated.  In
 
1614
                this way, the next invocation of row_merge() will
 
1615
                spend most of the time in this loop.  The initial
 
1616
                estimate is ohalf==0. */
 
1617
                ahalf = file->offset / 2;
 
1618
                ut_ad(ohalf <= of.offset);
 
1619
 
 
1620
                /* Improve the estimate until reaching half the input
 
1621
                file size, or we can not get any closer to it.  All
 
1622
                comparands should be non-negative when !(ohalf < ahalf)
 
1623
                because ohalf <= of.offset. */
 
1624
                if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) {
 
1625
                        ohalf = of.offset;
 
1626
                }
1664
1627
        }
1665
1628
 
1666
1629
        /* Copy the last blocks, if there are any. */
1670
1633
                        return(DB_INTERRUPTED);
1671
1634
                }
1672
1635
 
1673
 
                /* Remember the offset number for this run */
1674
 
                run_offset[n_run++] = of.offset;
1675
 
 
1676
1636
                if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) {
1677
1637
                        return(DB_CORRUPTION);
1678
1638
                }
1685
1645
                        return(DB_INTERRUPTED);
1686
1646
                }
1687
1647
 
1688
 
                /* Remember the offset number for this run */
1689
 
                run_offset[n_run++] = of.offset;
1690
 
 
1691
1648
                if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) {
1692
1649
                        return(DB_CORRUPTION);
1693
1650
                }
1699
1656
                return(DB_CORRUPTION);
1700
1657
        }
1701
1658
 
1702
 
        ut_ad(n_run <= *num_run);
1703
 
 
1704
 
        *num_run = n_run;
1705
 
 
1706
 
        /* Each run can contain one or more offsets. As merge goes on,
1707
 
        the number of runs (to merge) will reduce until we have one
1708
 
        single run. So the number of runs will always be smaller than
1709
 
        the number of offsets in file */
1710
 
        ut_ad((*num_run) <= file->offset);
1711
 
 
1712
 
        /* The number of offsets in output file is always equal or
1713
 
        smaller than input file */
1714
 
        ut_ad(of.offset <= file->offset);
1715
 
 
1716
1659
        /* Swap file descriptors for the next pass. */
1717
1660
        *tmpfd = file->fd;
1718
1661
        *file = of;
 
1662
        *half = ohalf;
1719
1663
 
1720
1664
        UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
1721
1665
 
1735
1679
                                        index entries */
1736
1680
        row_merge_block_t*      block,  /*!< in/out: 3 buffers */
1737
1681
        int*                    tmpfd,  /*!< in/out: temporary file handle */
1738
 
        TABLE*          table)  /*!< in/out: MySQL table, for
 
1682
        TABLE*                  table)  /*!< in/out: MySQL table, for
1739
1683
                                        reporting erroneous key value
1740
1684
                                        if applicable */
1741
1685
{
1742
1686
        ulint   half = file->offset / 2;
1743
 
        ulint   num_runs;
1744
 
        ulint*  run_offset;
1745
 
        ulint   error = DB_SUCCESS;
1746
 
 
1747
 
        /* Record the number of merge runs we need to perform */
1748
 
        num_runs = file->offset;
1749
 
 
1750
 
        /* If num_runs are less than 1, nothing to merge */
1751
 
        if (num_runs <= 1) {
1752
 
                return(error);
1753
 
        }
1754
 
 
1755
 
        /* "run_offset" records each run's first offset number */
1756
 
        run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint));
1757
 
 
1758
 
        /* This tells row_merge() where to start for the first round
1759
 
        of merge. */
1760
 
        run_offset[half] = half;
1761
1687
 
1762
1688
        /* The file should always contain at least one byte (the end
1763
1689
        of file marker).  Thus, it must be at least one block. */
1764
1690
        ut_ad(file->offset > 0);
1765
1691
 
1766
 
        /* Merge the runs until we have one big run */
1767
1692
        do {
1768
 
                error = row_merge(trx, index, file, block, tmpfd,
1769
 
                                  table, &num_runs, run_offset);
 
1693
                ulint   error;
1770
1694
 
1771
 
                UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
 
1695
                error = row_merge(trx, index, file, &half,
 
1696
                                  block, tmpfd, table);
1772
1697
 
1773
1698
                if (error != DB_SUCCESS) {
1774
 
                        break;
 
1699
                        return(error);
1775
1700
                }
1776
 
        } while (num_runs > 1);
1777
 
 
1778
 
        mem_free(run_offset);
1779
 
 
1780
 
        return(error);
 
1701
 
 
1702
                /* half > 0 should hold except when the file consists
 
1703
                of one block.  No need to merge further then. */
 
1704
                ut_ad(half > 0 || file->offset == 1);
 
1705
        } while (half < file->offset && half > 0);
 
1706
 
 
1707
        return(DB_SUCCESS);
1781
1708
}
1782
1709
 
1783
1710
/*************************************************************//**
1813
1740
                (below). */
1814
1741
                data = btr_rec_copy_externally_stored_field(
1815
1742
                        mrec, offsets, zip_size, i, &len, heap);
1816
 
                /* Because we have locked the table, any records
1817
 
                written by incomplete transactions must have been
1818
 
                rolled back already. There must not be any incomplete
1819
 
                BLOB columns. */
1820
 
                ut_a(data);
1821
1743
 
1822
1744
                dfield_set_data(field, data, len);
1823
1745
        }
1839
1761
        int                     fd,     /*!< in: file descriptor */
1840
1762
        row_merge_block_t*      block)  /*!< in/out: file buffer */
1841
1763
{
 
1764
        mrec_buf_t              buf;
1842
1765
        const byte*             b;
1843
1766
        que_thr_t*              thr;
1844
1767
        ins_node_t*             node;
1857
1780
 
1858
1781
        trx->op_info = "inserting index entries";
1859
1782
 
1860
 
        graph_heap = mem_heap_create(500 + sizeof(mrec_buf_t));
 
1783
        graph_heap = mem_heap_create(500);
1861
1784
        node = ins_node_create(INS_DIRECT, table, graph_heap);
1862
1785
 
1863
1786
        thr = pars_complete_graph_for_exec(node, trx, graph_heap);
1869
1792
        {
1870
1793
                ulint i = 1 + REC_OFFS_HEADER_SIZE
1871
1794
                        + dict_index_get_n_fields(index);
1872
 
                offsets = static_cast<ulint *>(mem_heap_alloc(graph_heap, i * sizeof *offsets));
 
1795
                offsets = mem_heap_alloc(graph_heap, i * sizeof *offsets);
1873
1796
                offsets[0] = i;
1874
1797
                offsets[1] = dict_index_get_n_fields(index);
1875
1798
        }
1879
1802
        if (!row_merge_read(fd, foffs, block)) {
1880
1803
                error = DB_CORRUPTION;
1881
1804
        } else {
1882
 
                mrec_buf_t*     buf = static_cast<mrec_buf_t *>(mem_heap_alloc(graph_heap, sizeof *buf));
1883
 
 
1884
1805
                for (;;) {
1885
1806
                        const mrec_t*   mrec;
1886
1807
                        dtuple_t*       dtuple;
1887
1808
                        ulint           n_ext;
1888
1809
 
1889
 
                        b = row_merge_read_rec(block, buf, b, index,
 
1810
                        b = row_merge_read_rec(block, &buf, b, index,
1890
1811
                                               fd, &foffs, &mrec, offsets);
1891
1812
                        if (UNIV_UNLIKELY(!b)) {
1892
1813
                                /* End of list, or I/O error */
1977
1898
        /* We use the select query graph as the dummy graph needed
1978
1899
        in the lock module call */
1979
1900
 
1980
 
        thr = que_fork_get_first_thr(static_cast<que_fork_t *>(que_node_get_parent(thr)));
 
1901
        thr = que_fork_get_first_thr(que_node_get_parent(thr));
1981
1902
        que_thr_move_to_run_state_for_mysql(thr, trx);
1982
1903
 
1983
1904
run_again:
2007
1928
                        que_node_t*     parent;
2008
1929
 
2009
1930
                        parent = que_node_get_parent(thr);
2010
 
                        run_thr = que_fork_start_command(static_cast<que_fork_t *>(parent));
 
1931
                        run_thr = que_fork_start_command(parent);
2011
1932
 
2012
1933
                        ut_a(run_thr == thr);
2013
1934
 
2057
1978
                /* Drop the field definitions of the index. */
2058
1979
                "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
2059
1980
                /* Drop the index definition and the B-tree. */
2060
 
                "DELETE FROM SYS_INDEXES WHERE ID = :indexid;\n"
 
1981
                "DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
 
1982
                "               AND TABLE_ID = :tableid;\n"
2061
1983
                "END;\n";
2062
1984
 
2063
1985
        ut_ad(index && table && trx);
2064
1986
 
2065
 
        pars_info_add_ull_literal(info, "indexid", index->id);
 
1987
        pars_info_add_dulint_literal(info, "indexid", index->id);
 
1988
        pars_info_add_dulint_literal(info, "tableid", table->id);
2066
1989
 
2067
1990
        trx_start_if_not_started(trx);
2068
1991
        trx->op_info = "dropping index";
2076
1999
        /* Replace this index with another equivalent index for all
2077
2000
        foreign key constraints on this table where this index is used */
2078
2001
 
2079
 
        dict_table_replace_index_in_foreign_list(table, index, trx);
 
2002
        dict_table_replace_index_in_foreign_list(table, index);
2080
2003
        dict_index_remove_from_cache(table, index);
2081
2004
 
2082
2005
        trx->op_info = "";
2111
2034
/*=============================*/
2112
2035
{
2113
2036
        trx_t*          trx;
2114
 
        btr_pcur_t      pcur;
2115
 
        mtr_t           mtr;
 
2037
        ulint           err;
2116
2038
 
2117
 
        /* Load the table definitions that contain partially defined
2118
 
        indexes, so that the data dictionary information can be checked
2119
 
        when accessing the tablename.ibd files. */
 
2039
        /* We use the private SQL parser of Innobase to generate the
 
2040
        query graphs needed in deleting the dictionary data from system
 
2041
        tables in Innobase. Deleting a row from SYS_INDEXES table also
 
2042
        frees the file segments of the B-tree associated with the index. */
 
2043
        static const char drop_temp_indexes[] =
 
2044
                "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
 
2045
                "indexid CHAR;\n"
 
2046
                "DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
 
2047
                "WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "';\n"
 
2048
                "BEGIN\n"
 
2049
                "\tOPEN c;\n"
 
2050
                "\tWHILE 1=1 LOOP\n"
 
2051
                "\t\tFETCH c INTO indexid;\n"
 
2052
                "\t\tIF (SQL % NOTFOUND) THEN\n"
 
2053
                "\t\t\tEXIT;\n"
 
2054
                "\t\tEND IF;\n"
 
2055
                "\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
 
2056
                "\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
 
2057
                "\tEND LOOP;\n"
 
2058
                "\tCLOSE c;\n"
 
2059
                "\tCOMMIT WORK;\n"
 
2060
                "END;\n";
2120
2061
 
2121
2062
        trx = trx_allocate_for_background();
2122
2063
        trx->op_info = "dropping partially created indexes";
2123
2064
        row_mysql_lock_data_dictionary(trx);
2124
2065
 
2125
 
        mtr_start(&mtr);
2126
 
 
2127
 
        btr_pcur_open_at_index_side(
2128
 
                TRUE,
2129
 
                dict_table_get_first_index(dict_sys->sys_indexes),
2130
 
                BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
2131
 
 
2132
 
        for (;;) {
2133
 
                const rec_t*    rec;
2134
 
                const byte*     field;
2135
 
                ulint           len;
2136
 
                table_id_t      table_id;
2137
 
                dict_table_t*   table;
2138
 
 
2139
 
                btr_pcur_move_to_next_user_rec(&pcur, &mtr);
2140
 
 
2141
 
                if (!btr_pcur_is_on_user_rec(&pcur)) {
2142
 
                        break;
2143
 
                }
2144
 
 
2145
 
                rec = btr_pcur_get_rec(&pcur);
2146
 
                field = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_NAME_FIELD,
2147
 
                                              &len);
2148
 
                if (len == UNIV_SQL_NULL || len == 0
2149
 
                    || (char) *field != TEMP_INDEX_PREFIX) {
2150
 
                        continue;
2151
 
                }
2152
 
 
2153
 
                /* This is a temporary index. */
2154
 
 
2155
 
                field = rec_get_nth_field_old(rec, 0/*TABLE_ID*/, &len);
2156
 
                if (len != 8) {
2157
 
                        /* Corrupted TABLE_ID */
2158
 
                        continue;
2159
 
                }
2160
 
 
2161
 
                table_id = mach_read_from_8(field);
2162
 
 
2163
 
                btr_pcur_store_position(&pcur, &mtr);
2164
 
                btr_pcur_commit_specify_mtr(&pcur, &mtr);
2165
 
 
2166
 
                table = dict_table_get_on_id_low(table_id);
2167
 
 
2168
 
                if (table) {
2169
 
                        dict_index_t*   index;
2170
 
                        dict_index_t*   next_index;
2171
 
 
2172
 
                        for (index = dict_table_get_first_index(table);
2173
 
                             index; index = next_index) {
2174
 
 
2175
 
                                next_index = dict_table_get_next_index(index);
2176
 
 
2177
 
                                if (*index->name == TEMP_INDEX_PREFIX) {
2178
 
                                        row_merge_drop_index(index, table, trx);
2179
 
                                        trx_commit_for_mysql(trx);
2180
 
                                }
2181
 
                        }
2182
 
                }
2183
 
 
2184
 
                mtr_start(&mtr);
2185
 
                btr_pcur_restore_position(BTR_SEARCH_LEAF,
2186
 
                                          &pcur, &mtr);
2187
 
        }
2188
 
 
2189
 
        btr_pcur_close(&pcur);
2190
 
        mtr_commit(&mtr);
 
2066
        /* Incomplete transactions may be holding some locks on the
 
2067
        data dictionary tables.  However, they should never have been
 
2068
        able to lock the records corresponding to the partially
 
2069
        created indexes that we are attempting to delete, because the
 
2070
        table was locked when the indexes were being created.  We will
 
2071
        drop the partially created indexes before the rollback of
 
2072
        incomplete transactions is initiated.  Thus, this should not
 
2073
        interfere with the incomplete transactions. */
 
2074
        trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
 
2075
        pars_info_t *info = pars_info_create();
 
2076
        err = que_eval_sql(info, drop_temp_indexes, FALSE, trx);
 
2077
        ut_a(err == DB_SUCCESS);
 
2078
 
2191
2079
        row_mysql_unlock_data_dictionary(trx);
2192
2080
        trx_free_for_background(trx);
2193
2081
}
2200
2088
/*==================*/
2201
2089
        merge_file_t*   merge_file)     /*!< out: merge file structure */
2202
2090
{
2203
 
#ifdef UNIV_PFS_IO
2204
 
        /* This temp file open does not go through normal
2205
 
        file APIs, add instrumentation to register with
2206
 
        performance schema */
2207
 
        struct PSI_file_locker* locker = NULL;
2208
 
        PSI_file_locker_state   state;
2209
 
        register_pfs_file_open_begin(&state, locker, innodb_file_temp_key,
2210
 
                                     PSI_FILE_OPEN,
2211
 
                                     "Innodb Merge Temp File",
2212
 
                                     __FILE__, __LINE__);
2213
 
#endif
2214
2091
        merge_file->fd = innobase_mysql_tmpfile();
2215
2092
        merge_file->offset = 0;
2216
2093
        merge_file->n_rec = 0;
2217
 
#ifdef UNIV_PFS_IO
2218
 
        register_pfs_file_open_end(locker, merge_file->fd);
2219
 
#endif
2220
2094
}
2221
2095
 
2222
2096
/*********************************************************************//**
2227
2101
/*===================*/
2228
2102
        merge_file_t*   merge_file)     /*!< out: merge file structure */
2229
2103
{
2230
 
#ifdef UNIV_PFS_IO
2231
 
        struct PSI_file_locker* locker = NULL;
2232
 
        PSI_file_locker_state   state;
2233
 
        register_pfs_file_io_begin(&state, locker, merge_file->fd, 0, PSI_FILE_CLOSE,
2234
 
                                   __FILE__, __LINE__);
2235
 
#endif
2236
2104
        if (merge_file->fd != -1) {
2237
2105
                close(merge_file->fd);
2238
2106
                merge_file->fd = -1;
2239
2107
        }
2240
 
 
2241
 
#ifdef UNIV_PFS_IO
2242
 
        register_pfs_file_io_end(locker, 0);
2243
 
#endif
2244
2108
}
2245
2109
 
2246
2110
/*********************************************************************//**
2362
2226
 
2363
2227
        trx->op_info = "renaming indexes";
2364
2228
 
2365
 
        pars_info_add_ull_literal(info, "tableid", table->id);
 
2229
        pars_info_add_dulint_literal(info, "tableid", table->id);
2366
2230
 
2367
2231
        err = que_eval_sql(info, rename_indexes, FALSE, trx);
2368
2232
 
2399
2263
{
2400
2264
        ulint           err     = DB_ERROR;
2401
2265
        pars_info_t*    info;
2402
 
        char            old_name[MAX_TABLE_NAME_LEN + 1];
 
2266
        const char*     old_name= old_table->name;
2403
2267
 
2404
2268
        ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
2405
2269
        ut_ad(old_table != new_table);
2407
2271
 
2408
2272
        ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
2409
2273
 
2410
 
        /* store the old/current name to an automatic variable */
2411
 
        if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
2412
 
                memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
2413
 
        } else {
2414
 
                ut_print_timestamp(stderr);
2415
 
                fprintf(stderr, "InnoDB: too long table name: '%s', "
2416
 
                        "max length is %d\n", old_table->name,
2417
 
                        MAX_TABLE_NAME_LEN);
2418
 
                ut_error;
2419
 
        }
2420
 
 
2421
 
        /* store the old/current name to an automatic variable */
2422
 
        if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
2423
 
                memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
2424
 
        } else {
2425
 
                ut_print_timestamp(stderr);
2426
 
                fprintf(stderr, "InnoDB: too long table name: '%s', "
2427
 
                        "max length is %d\n", old_table->name,
2428
 
                        MAX_TABLE_NAME_LEN);
2429
 
                ut_error;
2430
 
        }
2431
 
 
2432
2274
        trx->op_info = "renaming tables";
2433
2275
 
2434
2276
        /* We use the private SQL parser of Innobase to generate the query
2464
2306
                goto err_exit;
2465
2307
        }
2466
2308
 
2467
 
        err = dict_load_foreigns(old_name, FALSE, TRUE);
 
2309
        err = dict_load_foreigns(old_name, TRUE);
2468
2310
 
2469
2311
        if (err != DB_SUCCESS) {
2470
2312
err_exit:
2504
2346
        node = ind_create_graph_create(index, heap);
2505
2347
        thr = pars_complete_graph_for_exec(node, trx, heap);
2506
2348
 
2507
 
        ut_a(thr == que_fork_start_command(static_cast<que_fork_t *>(que_node_get_parent(thr))));
 
2349
        ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
2508
2350
 
2509
2351
        que_run_threads(thr);
2510
2352
 
2561
2403
                /* Note the id of the transaction that created this
2562
2404
                index, we use it to restrict readers from accessing
2563
2405
                this index, to ensure read consistency. */
2564
 
                index->trx_id = trx->id;
 
2406
                index->trx_id = (ib_uint64_t)
 
2407
                        ut_conv_dulint_to_longlong(trx->id);
2565
2408
        } else {
2566
2409
                index = NULL;
2567
2410
        }
2578
2421
        const trx_t*            trx,    /*!< in: transaction */
2579
2422
        const dict_index_t*     index)  /*!< in: index to check */
2580
2423
{
2581
 
        return(!trx->read_view
2582
 
               || read_view_sees_trx_id(trx->read_view, index->trx_id));
 
2424
        return(!trx->read_view || read_view_sees_trx_id(
 
2425
                       trx->read_view,
 
2426
                       ut_dulint_create((ulint) (index->trx_id >> 32),
 
2427
                                        (ulint) index->trx_id & 0xFFFFFFFF)));
2583
2428
}
2584
2429
 
2585
2430
/*********************************************************************//**
2615
2460
                                        unless creating a PRIMARY KEY */
2616
2461
        dict_index_t**  indexes,        /*!< in: indexes to be created */
2617
2462
        ulint           n_indexes,      /*!< in: size of indexes[] */
2618
 
        TABLE*  table)          /*!< in/out: MySQL table, for
 
2463
        TABLE*          table)          /*!< in/out: MySQL table, for
2619
2464
                                        reporting erroneous key value
2620
2465
                                        if applicable */
2621
2466
{
2637
2482
        /* Allocate memory for merge file data structure and initialize
2638
2483
        fields */
2639
2484
 
2640
 
        merge_files = static_cast<merge_file_t *>(mem_alloc(n_indexes * sizeof *merge_files));
 
2485
        merge_files = mem_alloc(n_indexes * sizeof *merge_files);
2641
2486
        block_size = 3 * sizeof *block;
2642
 
        block = static_cast<row_merge_block_t *>(os_mem_alloc_large(&block_size));
 
2487
        block = os_mem_alloc_large(&block_size);
2643
2488
 
2644
2489
        for (i = 0; i < n_indexes; i++) {
2645
2490