~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/row/row0merge.c

  • Committer: Joe Daly
  • Date: 2010-03-08 04:23:54 UTC
  • mto: This revision was merged to the branch mainline in revision 1380.
  • Revision ID: skinny.moey@gmail.com-20100308042354-7k0jibdqaxkhac7o
scoreboardĀ implementationĀ forĀ statistics

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*****************************************************************************
2
2
 
3
 
Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
 
3
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
4
4
 
5
5
This program is free software; you can redistribute it and/or modify it under
6
6
the terms of the GNU General Public License as published by the Free Software
11
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
12
 
13
13
You should have received a copy of the GNU General Public License along with
14
 
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
 
St, Fifth Floor, Boston, MA 02110-1301 USA
 
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 
15
Place, Suite 330, Boston, MA 02111-1307 USA
16
16
 
17
17
*****************************************************************************/
18
18
 
61
61
#ifdef UNIV_DEBUG
62
62
/** Set these in order ot enable debug printout. */
63
63
/* @{ */
64
 
/** Log the outcome of each row_merge_cmp() call, comparing records. */
65
64
static ibool    row_merge_print_cmp;
66
 
/** Log each record read from temporary file. */
67
65
static ibool    row_merge_print_read;
68
 
/** Log each record write to temporary file. */
69
66
static ibool    row_merge_print_write;
70
 
/** Log each row_merge_blocks() call, merging two blocks of records to
71
 
a bigger one. */
72
 
static ibool    row_merge_print_block;
73
 
/** Log each block read from temporary file. */
74
 
static ibool    row_merge_print_block_read;
75
 
/** Log each block read from temporary file. */
76
 
static ibool    row_merge_print_block_write;
77
67
/* @} */
78
68
#endif /* UNIV_DEBUG */
79
69
 
120
110
 
121
111
/** Information about temporary files used in merge sort */
122
112
struct merge_file_struct {
123
 
        int             fd;             /*!< file descriptor */
124
 
        ulint           offset;         /*!< file offset (end of file) */
125
 
        ib_uint64_t     n_rec;          /*!< number of records in the file */
 
113
        int     fd;             /*!< file descriptor */
 
114
        ulint   offset;         /*!< file offset */
126
115
};
127
116
 
128
117
/** Information about temporary files used in merge sort */
425
414
        row_merge_dup_t*        dup,    /*!< in/out: for reporting duplicates */
426
415
        const dfield_t*         entry)  /*!< in: duplicate index entry */
427
416
{
428
 
        mrec_buf_t*             buf;
 
417
        mrec_buf_t              buf;
429
418
        const dtuple_t*         tuple;
430
419
        dtuple_t                tuple_store;
431
420
        const rec_t*            rec;
432
421
        const dict_index_t*     index   = dup->index;
433
422
        ulint                   n_fields= dict_index_get_n_fields(index);
434
 
        mem_heap_t*             heap;
 
423
        mem_heap_t*             heap    = NULL;
 
424
        ulint                   offsets_[REC_OFFS_NORMAL_SIZE];
435
425
        ulint*                  offsets;
436
426
        ulint                   n_ext;
437
427
 
441
431
                return;
442
432
        }
443
433
 
 
434
        rec_offs_init(offsets_);
 
435
 
444
436
        /* Convert the tuple to a record and then to MySQL format. */
445
 
        heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields)
446
 
                               * sizeof *offsets
447
 
                               + sizeof *buf);
448
 
 
449
 
        buf = mem_heap_alloc(heap, sizeof *buf);
450
437
 
451
438
        tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
452
439
        n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
453
440
 
454
 
        rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext);
455
 
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
 
441
        rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
 
442
        offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
 
443
                                  &heap);
456
444
 
457
445
        innobase_rec_to_mysql(dup->table, rec, index, offsets);
458
446
 
459
 
        mem_heap_free(heap);
 
447
        if (UNIV_LIKELY_NULL(heap)) {
 
448
                mem_heap_free(heap);
 
449
        }
460
450
}
461
451
 
462
452
/*************************************************************//**
627
617
}
628
618
 
629
619
/******************************************************//**
630
 
Create a memory heap and allocate space for row_merge_rec_offsets()
631
 
and mrec_buf_t[3].
 
620
Create a memory heap and allocate space for row_merge_rec_offsets().
632
621
@return memory heap */
633
622
static
634
623
mem_heap_t*
635
624
row_merge_heap_create(
636
625
/*==================*/
637
626
        const dict_index_t*     index,          /*!< in: record descriptor */
638
 
        mrec_buf_t**            buf,            /*!< out: 3 buffers */
639
627
        ulint**                 offsets1,       /*!< out: offsets */
640
628
        ulint**                 offsets2)       /*!< out: offsets */
641
629
{
642
630
        ulint           i       = 1 + REC_OFFS_HEADER_SIZE
643
631
                + dict_index_get_n_fields(index);
644
 
        mem_heap_t*     heap    = mem_heap_create(2 * i * sizeof **offsets1
645
 
                                                  + 3 * sizeof **buf);
 
632
        mem_heap_t*     heap    = mem_heap_create(2 * i * sizeof *offsets1);
646
633
 
647
 
        *buf = mem_heap_alloc(heap, 3 * sizeof **buf);
648
 
        *offsets1 = mem_heap_alloc(heap, i * sizeof **offsets1);
649
 
        *offsets2 = mem_heap_alloc(heap, i * sizeof **offsets2);
 
634
        *offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
 
635
        *offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
650
636
 
651
637
        (*offsets1)[0] = (*offsets2)[0] = i;
652
638
        (*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
697
683
        ib_uint64_t     ofs = ((ib_uint64_t) offset) * sizeof *buf;
698
684
        ibool           success;
699
685
 
700
 
#ifdef UNIV_DEBUG
701
 
        if (row_merge_print_block_read) {
702
 
                fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
703
 
                        fd, (ulong) offset);
704
 
        }
705
 
#endif /* UNIV_DEBUG */
706
 
 
707
686
        success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
708
687
                                                 (ulint) (ofs & 0xFFFFFFFF),
709
688
                                                 (ulint) (ofs >> 32),
718
697
}
719
698
 
720
699
/********************************************************************//**
721
 
Write a merge block to the file system.
 
700
Read a merge block from the file system.
722
701
@return TRUE if request was successful, FALSE if fail */
723
702
static
724
703
ibool
725
704
row_merge_write(
726
705
/*============*/
727
706
        int             fd,     /*!< in: file descriptor */
728
 
        ulint           offset, /*!< in: offset where to read
729
 
                                in number of row_merge_block_t
730
 
                                elements */
 
707
        ulint           offset, /*!< in: offset where to write */
731
708
        const void*     buf)    /*!< in: data */
732
709
{
733
710
        ib_uint64_t     ofs = ((ib_uint64_t) offset)
734
711
                * sizeof(row_merge_block_t);
735
712
 
736
 
#ifdef UNIV_DEBUG
737
 
        if (row_merge_print_block_write) {
738
 
                fprintf(stderr, "row_merge_write fd=%d ofs=%lu\n",
739
 
                        fd, (ulong) offset);
740
 
        }
741
 
#endif /* UNIV_DEBUG */
742
 
 
743
713
        return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
744
714
                                         (ulint) (ofs & 0xFFFFFFFF),
745
715
                                         (ulint) (ofs >> 32),
749
719
/********************************************************************//**
750
720
Read a merge record.
751
721
@return pointer to next record, or NULL on I/O error or end of list */
752
 
static __attribute__((nonnull))
 
722
static
753
723
const byte*
754
724
row_merge_read_rec(
755
725
/*===============*/
1078
1048
                                                record to be compared */
1079
1049
        const ulint*            offsets1,       /*!< in: first record offsets */
1080
1050
        const ulint*            offsets2,       /*!< in: second record offsets */
1081
 
        const dict_index_t*     index,          /*!< in: index */
1082
 
        ibool*                  null_eq)        /*!< out: set to TRUE if
1083
 
                                                found matching null values */
 
1051
        const dict_index_t*     index)          /*!< in: index */
1084
1052
{
1085
1053
        int     cmp;
1086
1054
 
1087
 
        cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index,
1088
 
                                 null_eq);
 
1055
        cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);
1089
1056
 
1090
1057
#ifdef UNIV_DEBUG
1091
1058
        if (row_merge_print_cmp) {
1104
1071
Reads clustered index of the table and create temporary files
1105
1072
containing the index entries for the indexes to be built.
1106
1073
@return DB_SUCCESS or error */
1107
 
static __attribute__((nonnull))
 
1074
static
1108
1075
ulint
1109
1076
row_merge_read_clustered_index(
1110
1077
/*===========================*/
1209
1176
                in order to release the latch on the old page. */
1210
1177
 
1211
1178
                if (btr_pcur_is_after_last_on_page(&pcur)) {
1212
 
                        if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1213
 
                                err = DB_INTERRUPTED;
1214
 
                                trx->error_key_num = 0;
1215
 
                                goto func_exit;
1216
 
                        }
1217
 
 
1218
1179
                        btr_pcur_store_position(&pcur, &mtr);
1219
1180
                        mtr_commit(&mtr);
1220
1181
                        mtr_start(&mtr);
1254
1215
 
1255
1216
                                        if (dfield_is_null(field)) {
1256
1217
                                                err = DB_PRIMARY_KEY_IS_NULL;
1257
 
                                                trx->error_key_num = 0;
1258
 
                                                goto func_exit;
 
1218
                                                i = 0;
 
1219
                                                goto err_exit;
1259
1220
                                        }
1260
1221
 
1261
1222
                                        field_type->prtype |= DATA_NOT_NULL;
1273
1234
 
1274
1235
                        if (UNIV_LIKELY
1275
1236
                            (row && row_merge_buf_add(buf, row, ext))) {
1276
 
                                file->n_rec++;
1277
1237
                                continue;
1278
1238
                        }
1279
1239
 
1295
1255
 
1296
1256
                                        if (dup.n_dup) {
1297
1257
                                                err = DB_DUPLICATE_KEY;
 
1258
err_exit:
1298
1259
                                                trx->error_key_num = i;
1299
1260
                                                goto func_exit;
1300
1261
                                        }
1308
1269
                        if (!row_merge_write(file->fd, file->offset++,
1309
1270
                                             block)) {
1310
1271
                                err = DB_OUT_OF_FILE_SPACE;
1311
 
                                trx->error_key_num = i;
1312
 
                                goto func_exit;
 
1272
                                goto err_exit;
1313
1273
                        }
1314
1274
 
1315
1275
                        UNIV_MEM_INVALID(block[0], sizeof block[0]);
1316
1276
                        merge_buf[i] = row_merge_buf_empty(buf);
1317
1277
 
1318
 
                        if (UNIV_LIKELY(row != NULL)) {
1319
 
                                /* Try writing the record again, now
1320
 
                                that the buffer has been written out
1321
 
                                and emptied. */
1322
 
 
1323
 
                                if (UNIV_UNLIKELY
1324
 
                                    (!row_merge_buf_add(buf, row, ext))) {
1325
 
                                        /* An empty buffer should have enough
1326
 
                                        room for at least one record. */
1327
 
                                        ut_error;
1328
 
                                }
1329
 
 
1330
 
                                file->n_rec++;
 
1278
                        /* Try writing the record again, now that
 
1279
                        the buffer has been written out and emptied. */
 
1280
 
 
1281
                        if (UNIV_UNLIKELY
 
1282
                            (row && !row_merge_buf_add(buf, row, ext))) {
 
1283
                                /* An empty buffer should have enough
 
1284
                                room for at least one record. */
 
1285
                                ut_error;
1331
1286
                        }
1332
1287
                }
1333
1288
 
1366
1321
                b2 = row_merge_write_rec(&block[2], &buf[2], b2,        \
1367
1322
                                         of->fd, &of->offset,           \
1368
1323
                                         mrec##N, offsets##N);          \
1369
 
                if (UNIV_UNLIKELY(!b2 || ++of->n_rec > file->n_rec)) {  \
 
1324
                if (UNIV_UNLIKELY(!b2)) {                               \
1370
1325
                        goto corrupt;                                   \
1371
1326
                }                                                       \
1372
1327
                b##N = row_merge_read_rec(&block[N], &buf[N],           \
1382
1337
        } while (0)
1383
1338
 
1384
1339
/*************************************************************//**
1385
 
Merge two blocks of records on disk and write a bigger block.
 
1340
Merge two blocks of linked lists on disk and write a bigger block.
1386
1341
@return DB_SUCCESS or error code */
1387
1342
static
1388
1343
ulint
1389
1344
row_merge_blocks(
1390
1345
/*=============*/
1391
1346
        const dict_index_t*     index,  /*!< in: index being created */
1392
 
        const merge_file_t*     file,   /*!< in: file containing
 
1347
        merge_file_t*           file,   /*!< in/out: file containing
1393
1348
                                        index entries */
1394
1349
        row_merge_block_t*      block,  /*!< in/out: 3 buffers */
1395
1350
        ulint*                  foffs0, /*!< in/out: offset of first
1403
1358
{
1404
1359
        mem_heap_t*     heap;   /*!< memory heap for offsets0, offsets1 */
1405
1360
 
1406
 
        mrec_buf_t*     buf;    /*!< buffer for handling
1407
 
                                split mrec in block[] */
 
1361
        mrec_buf_t      buf[3]; /*!< buffer for handling split mrec in block[] */
1408
1362
        const byte*     b0;     /*!< pointer to block[0] */
1409
1363
        const byte*     b1;     /*!< pointer to block[1] */
1410
1364
        byte*           b2;     /*!< pointer to block[2] */
1413
1367
        ulint*          offsets0;/* offsets of mrec0 */
1414
1368
        ulint*          offsets1;/* offsets of mrec1 */
1415
1369
 
1416
 
#ifdef UNIV_DEBUG
1417
 
        if (row_merge_print_block) {
1418
 
                fprintf(stderr,
1419
 
                        "row_merge_blocks fd=%d ofs=%lu + fd=%d ofs=%lu"
1420
 
                        " = fd=%d ofs=%lu\n",
1421
 
                        file->fd, (ulong) *foffs0,
1422
 
                        file->fd, (ulong) *foffs1,
1423
 
                        of->fd, (ulong) of->offset);
1424
 
        }
1425
 
#endif /* UNIV_DEBUG */
1426
 
 
1427
 
        heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
1428
 
 
1429
 
        buf = mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3);
 
1370
        heap = row_merge_heap_create(index, &offsets0, &offsets1);
1430
1371
 
1431
1372
        /* Write a record and read the next record.  Split the output
1432
1373
        file in two halves, which can be merged on the following pass. */
1453
1394
        }
1454
1395
 
1455
1396
        while (mrec0 && mrec1) {
1456
 
                ibool   null_eq = FALSE;
1457
1397
                switch (row_merge_cmp(mrec0, mrec1,
1458
 
                                      offsets0, offsets1, index,
1459
 
                                      &null_eq)) {
 
1398
                                      offsets0, offsets1, index)) {
1460
1399
                case 0:
1461
1400
                        if (UNIV_UNLIKELY
1462
 
                            (dict_index_is_unique(index) && !null_eq)) {
 
1401
                            (dict_index_is_unique(index))) {
1463
1402
                                innobase_rec_to_mysql(table, mrec0,
1464
1403
                                                      index, offsets0);
1465
1404
                                mem_heap_free(heap);
1500
1439
}
1501
1440
 
1502
1441
/*************************************************************//**
1503
 
Copy a block of index entries.
1504
 
@return TRUE on success, FALSE on failure */
1505
 
static __attribute__((nonnull))
1506
 
ibool
1507
 
row_merge_blocks_copy(
1508
 
/*==================*/
1509
 
        const dict_index_t*     index,  /*!< in: index being created */
1510
 
        const merge_file_t*     file,   /*!< in: input file */
1511
 
        row_merge_block_t*      block,  /*!< in/out: 3 buffers */
1512
 
        ulint*                  foffs0, /*!< in/out: input file offset */
1513
 
        merge_file_t*           of)     /*!< in/out: output file */
1514
 
{
1515
 
        mem_heap_t*     heap;   /*!< memory heap for offsets0, offsets1 */
1516
 
 
1517
 
        mrec_buf_t*     buf;    /*!< buffer for handling
1518
 
                                split mrec in block[] */
1519
 
        const byte*     b0;     /*!< pointer to block[0] */
1520
 
        byte*           b2;     /*!< pointer to block[2] */
1521
 
        const mrec_t*   mrec0;  /*!< merge rec, points to block[0] */
1522
 
        ulint*          offsets0;/* offsets of mrec0 */
1523
 
        ulint*          offsets1;/* dummy offsets */
1524
 
 
1525
 
#ifdef UNIV_DEBUG
1526
 
        if (row_merge_print_block) {
1527
 
                fprintf(stderr,
1528
 
                        "row_merge_blocks_copy fd=%d ofs=%lu"
1529
 
                        " = fd=%d ofs=%lu\n",
1530
 
                        file->fd, (ulong) foffs0,
1531
 
                        of->fd, (ulong) of->offset);
1532
 
        }
1533
 
#endif /* UNIV_DEBUG */
1534
 
 
1535
 
        heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
1536
 
 
1537
 
        buf = mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3);
1538
 
 
1539
 
        /* Write a record and read the next record.  Split the output
1540
 
        file in two halves, which can be merged on the following pass. */
1541
 
 
1542
 
        if (!row_merge_read(file->fd, *foffs0, &block[0])) {
1543
 
corrupt:
1544
 
                mem_heap_free(heap);
1545
 
                return(FALSE);
1546
 
        }
1547
 
 
1548
 
        b0 = block[0];
1549
 
        b2 = block[2];
1550
 
 
1551
 
        b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
1552
 
                                foffs0, &mrec0, offsets0);
1553
 
        if (UNIV_UNLIKELY(!b0 && mrec0)) {
1554
 
 
1555
 
                goto corrupt;
1556
 
        }
1557
 
 
1558
 
        if (mrec0) {
1559
 
                /* append all mrec0 to output */
1560
 
                for (;;) {
1561
 
                        ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
1562
 
                }
1563
 
        }
1564
 
done0:
1565
 
 
1566
 
        /* The file offset points to the beginning of the last page
1567
 
        that has been read.  Update it to point to the next block. */
1568
 
        (*foffs0)++;
1569
 
 
1570
 
        mem_heap_free(heap);
1571
 
        return(row_merge_write_eof(&block[2], b2, of->fd, &of->offset)
1572
 
               != NULL);
1573
 
}
1574
 
 
1575
 
/*************************************************************//**
1576
1442
Merge disk files.
1577
1443
@return DB_SUCCESS or error code */
1578
 
static __attribute__((nonnull))
 
1444
static
1579
1445
ulint
1580
1446
row_merge(
1581
1447
/*======*/
1582
 
        trx_t*                  trx,    /*!< in: transaction */
1583
1448
        const dict_index_t*     index,  /*!< in: index being created */
1584
1449
        merge_file_t*           file,   /*!< in/out: file containing
1585
1450
                                        index entries */
1586
 
        ulint*                  half,   /*!< in/out: half the file */
 
1451
        ulint                   half,   /*!< in: half the file */
1587
1452
        row_merge_block_t*      block,  /*!< in/out: 3 buffers */
1588
1453
        int*                    tmpfd,  /*!< in/out: temporary file handle */
1589
1454
        TABLE*                  table)  /*!< in/out: MySQL table, for
1594
1459
        ulint           foffs1; /*!< second input offset */
1595
1460
        ulint           error;  /*!< error code */
1596
1461
        merge_file_t    of;     /*!< output file */
1597
 
        const ulint     ihalf   = *half;
1598
 
                                /*!< half the input file */
1599
 
        ulint           ohalf;  /*!< half the output file */
1600
1462
 
1601
1463
        UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
1602
 
        ut_ad(ihalf < file->offset);
 
1464
        ut_ad(half > 0);
1603
1465
 
1604
1466
        of.fd = *tmpfd;
1605
1467
        of.offset = 0;
1606
 
        of.n_rec = 0;
1607
1468
 
1608
1469
        /* Merge blocks to the output file. */
1609
 
        ohalf = 0;
1610
1470
        foffs0 = 0;
1611
 
        foffs1 = ihalf;
1612
 
 
1613
 
        for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
1614
 
                ulint   ahalf;  /*!< arithmetic half the input file */
1615
 
 
1616
 
                if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1617
 
                        return(DB_INTERRUPTED);
1618
 
                }
1619
 
 
 
1471
        foffs1 = half;
 
1472
 
 
1473
        for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {
1620
1474
                error = row_merge_blocks(index, file, block,
1621
1475
                                         &foffs0, &foffs1, &of, table);
1622
1476
 
1623
1477
                if (error != DB_SUCCESS) {
1624
1478
                        return(error);
1625
1479
                }
1626
 
 
1627
 
                /* Record the offset of the output file when
1628
 
                approximately half the output has been generated.  In
1629
 
                this way, the next invocation of row_merge() will
1630
 
                spend most of the time in this loop.  The initial
1631
 
                estimate is ohalf==0. */
1632
 
                ahalf = file->offset / 2;
1633
 
                ut_ad(ohalf <= of.offset);
1634
 
 
1635
 
                /* Improve the estimate until reaching half the input
1636
 
                file size, or we can not get any closer to it.  All
1637
 
                comparands should be non-negative when !(ohalf < ahalf)
1638
 
                because ohalf <= of.offset. */
1639
 
                if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) {
1640
 
                        ohalf = of.offset;
1641
 
                }
1642
1480
        }
1643
1481
 
1644
 
        /* Copy the last blocks, if there are any. */
1645
 
 
1646
 
        while (foffs0 < ihalf) {
1647
 
                if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1648
 
                        return(DB_INTERRUPTED);
1649
 
                }
1650
 
 
1651
 
                if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) {
 
1482
        /* Copy the last block, if there is one. */
 
1483
        while (foffs0 < half) {
 
1484
                if (!row_merge_read(file->fd, foffs0++, block)
 
1485
                    || !row_merge_write(of.fd, of.offset++, block)) {
1652
1486
                        return(DB_CORRUPTION);
1653
1487
                }
1654
1488
        }
1655
 
 
1656
 
        ut_ad(foffs0 == ihalf);
1657
 
 
1658
1489
        while (foffs1 < file->offset) {
1659
 
                if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1660
 
                        return(DB_INTERRUPTED);
1661
 
                }
1662
 
 
1663
 
                if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) {
 
1490
                if (!row_merge_read(file->fd, foffs1++, block)
 
1491
                    || !row_merge_write(of.fd, of.offset++, block)) {
1664
1492
                        return(DB_CORRUPTION);
1665
1493
                }
1666
1494
        }
1667
1495
 
1668
 
        ut_ad(foffs1 == file->offset);
1669
 
 
1670
 
        if (UNIV_UNLIKELY(of.n_rec != file->n_rec)) {
1671
 
                return(DB_CORRUPTION);
1672
 
        }
1673
 
 
1674
1496
        /* Swap file descriptors for the next pass. */
1675
1497
        *tmpfd = file->fd;
1676
1498
        *file = of;
1677
 
        *half = ohalf;
1678
1499
 
1679
1500
        UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
1680
1501
 
1688
1509
ulint
1689
1510
row_merge_sort(
1690
1511
/*===========*/
1691
 
        trx_t*                  trx,    /*!< in: transaction */
1692
1512
        const dict_index_t*     index,  /*!< in: index being created */
1693
1513
        merge_file_t*           file,   /*!< in/out: file containing
1694
1514
                                        index entries */
1698
1518
                                        reporting erroneous key value
1699
1519
                                        if applicable */
1700
1520
{
1701
 
        ulint   half = file->offset / 2;
1702
 
 
1703
 
        /* The file should always contain at least one byte (the end
1704
 
        of file marker).  Thus, it must be at least one block. */
1705
 
        ut_ad(file->offset > 0);
1706
 
 
1707
 
        do {
 
1521
        ulint   blksz;  /*!< block size */
 
1522
 
 
1523
        for (blksz = 1; blksz < file->offset; blksz *= 2) {
 
1524
                ulint   half;
1708
1525
                ulint   error;
1709
1526
 
1710
 
                error = row_merge(trx, index, file, &half,
1711
 
                                  block, tmpfd, table);
 
1527
                ut_ad(ut_is_2pow(blksz));
 
1528
                half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);
 
1529
                error = row_merge(index, file, half, block, tmpfd, table);
1712
1530
 
1713
1531
                if (error != DB_SUCCESS) {
1714
1532
                        return(error);
1715
1533
                }
1716
 
 
1717
 
                /* half > 0 should hold except when the file consists
1718
 
                of one block.  No need to merge further then. */
1719
 
                ut_ad(half > 0 || file->offset == 1);
1720
 
        } while (half < file->offset && half > 0);
 
1534
        }
1721
1535
 
1722
1536
        return(DB_SUCCESS);
1723
1537
}
1776
1590
        int                     fd,     /*!< in: file descriptor */
1777
1591
        row_merge_block_t*      block)  /*!< in/out: file buffer */
1778
1592
{
 
1593
        mrec_buf_t              buf;
1779
1594
        const byte*             b;
1780
1595
        que_thr_t*              thr;
1781
1596
        ins_node_t*             node;
1794
1609
 
1795
1610
        trx->op_info = "inserting index entries";
1796
1611
 
1797
 
        graph_heap = mem_heap_create(500 + sizeof(mrec_buf_t));
 
1612
        graph_heap = mem_heap_create(500);
1798
1613
        node = ins_node_create(INS_DIRECT, table, graph_heap);
1799
1614
 
1800
1615
        thr = pars_complete_graph_for_exec(node, trx, graph_heap);
1816
1631
        if (!row_merge_read(fd, foffs, block)) {
1817
1632
                error = DB_CORRUPTION;
1818
1633
        } else {
1819
 
                mrec_buf_t*     buf = mem_heap_alloc(graph_heap, sizeof *buf);
1820
 
 
1821
1634
                for (;;) {
1822
1635
                        const mrec_t*   mrec;
1823
1636
                        dtuple_t*       dtuple;
1824
1637
                        ulint           n_ext;
1825
1638
 
1826
 
                        b = row_merge_read_rec(block, buf, b, index,
 
1639
                        b = row_merge_read_rec(block, &buf, b, index,
1827
1640
                                               fd, &foffs, &mrec, offsets);
1828
1641
                        if (UNIV_UNLIKELY(!b)) {
1829
1642
                                /* End of list, or I/O error */
1985
1798
        static const char str1[] =
1986
1799
                "PROCEDURE DROP_INDEX_PROC () IS\n"
1987
1800
                "BEGIN\n"
1988
 
                /* Rename the index, so that it will be dropped by
1989
 
                row_merge_drop_temp_indexes() at crash recovery
1990
 
                if the server crashes before this trx is committed. */
1991
 
                "UPDATE SYS_INDEXES SET NAME=CONCAT('"
1992
 
                TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n"
1993
 
                "COMMIT WORK;\n"
1994
 
                /* Drop the field definitions of the index. */
1995
1801
                "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
1996
 
                /* Drop the index definition and the B-tree. */
1997
 
                "DELETE FROM SYS_INDEXES WHERE ID = :indexid;\n"
 
1802
                "DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
 
1803
                "               AND TABLE_ID = :tableid;\n"
1998
1804
                "END;\n";
1999
1805
 
2000
1806
        ut_ad(index && table && trx);
2001
1807
 
2002
1808
        pars_info_add_dulint_literal(info, "indexid", index->id);
 
1809
        pars_info_add_dulint_literal(info, "tableid", table->id);
2003
1810
 
2004
1811
        trx_start_if_not_started(trx);
2005
1812
        trx->op_info = "dropping index";
2048
1855
/*=============================*/
2049
1856
{
2050
1857
        trx_t*          trx;
2051
 
        btr_pcur_t      pcur;
2052
 
        mtr_t           mtr;
 
1858
        ulint           err;
2053
1859
 
2054
 
        /* Load the table definitions that contain partially defined
2055
 
        indexes, so that the data dictionary information can be checked
2056
 
        when accessing the tablename.ibd files. */
 
1860
        /* We use the private SQL parser of Innobase to generate the
 
1861
        query graphs needed in deleting the dictionary data from system
 
1862
        tables in Innobase. Deleting a row from SYS_INDEXES table also
 
1863
        frees the file segments of the B-tree associated with the index. */
 
1864
        static const char drop_temp_indexes[] =
 
1865
                "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
 
1866
                "indexid CHAR;\n"
 
1867
                "DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
 
1868
                "WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "';\n"
 
1869
                "BEGIN\n"
 
1870
                "\tOPEN c;\n"
 
1871
                "\tWHILE 1=1 LOOP\n"
 
1872
                "\t\tFETCH c INTO indexid;\n"
 
1873
                "\t\tIF (SQL % NOTFOUND) THEN\n"
 
1874
                "\t\t\tEXIT;\n"
 
1875
                "\t\tEND IF;\n"
 
1876
                "\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
 
1877
                "\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
 
1878
                "\tEND LOOP;\n"
 
1879
                "\tCLOSE c;\n"
 
1880
                "\tCOMMIT WORK;\n"
 
1881
                "END;\n";
2057
1882
 
2058
1883
        trx = trx_allocate_for_background();
2059
1884
        trx->op_info = "dropping partially created indexes";
2060
1885
        row_mysql_lock_data_dictionary(trx);
2061
1886
 
2062
 
        mtr_start(&mtr);
2063
 
 
2064
 
        btr_pcur_open_at_index_side(
2065
 
                TRUE,
2066
 
                dict_table_get_first_index(dict_sys->sys_indexes),
2067
 
                BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
2068
 
 
2069
 
        for (;;) {
2070
 
                const rec_t*    rec;
2071
 
                const byte*     field;
2072
 
                ulint           len;
2073
 
                dulint          table_id;
2074
 
                dict_table_t*   table;
2075
 
 
2076
 
                btr_pcur_move_to_next_user_rec(&pcur, &mtr);
2077
 
 
2078
 
                if (!btr_pcur_is_on_user_rec(&pcur)) {
2079
 
                        break;
2080
 
                }
2081
 
 
2082
 
                rec = btr_pcur_get_rec(&pcur);
2083
 
                field = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_NAME_FIELD,
2084
 
                                              &len);
2085
 
                if (len == UNIV_SQL_NULL || len == 0
2086
 
                    || (char) *field != TEMP_INDEX_PREFIX) {
2087
 
                        continue;
2088
 
                }
2089
 
 
2090
 
                /* This is a temporary index. */
2091
 
 
2092
 
                field = rec_get_nth_field_old(rec, 0/*TABLE_ID*/, &len);
2093
 
                if (len != 8) {
2094
 
                        /* Corrupted TABLE_ID */
2095
 
                        continue;
2096
 
                }
2097
 
 
2098
 
                table_id = mach_read_from_8(field);
2099
 
 
2100
 
                btr_pcur_store_position(&pcur, &mtr);
2101
 
                btr_pcur_commit_specify_mtr(&pcur, &mtr);
2102
 
 
2103
 
                table = dict_table_get_on_id_low(table_id);
2104
 
 
2105
 
                if (table) {
2106
 
                        dict_index_t*   index;
2107
 
                        dict_index_t*   next_index;
2108
 
 
2109
 
                        for (index = dict_table_get_first_index(table);
2110
 
                             index; index = next_index) {
2111
 
 
2112
 
                                next_index = dict_table_get_next_index(index);
2113
 
 
2114
 
                                if (*index->name == TEMP_INDEX_PREFIX) {
2115
 
                                        row_merge_drop_index(index, table, trx);
2116
 
                                        trx_commit_for_mysql(trx);
2117
 
                                }
2118
 
                        }
2119
 
                }
2120
 
 
2121
 
                mtr_start(&mtr);
2122
 
                btr_pcur_restore_position(BTR_SEARCH_LEAF,
2123
 
                                          &pcur, &mtr);
2124
 
        }
2125
 
 
2126
 
        btr_pcur_close(&pcur);
2127
 
        mtr_commit(&mtr);
 
1887
        /* Incomplete transactions may be holding some locks on the
 
1888
        data dictionary tables.  However, they should never have been
 
1889
        able to lock the records corresponding to the partially
 
1890
        created indexes that we are attempting to delete, because the
 
1891
        table was locked when the indexes were being created.  We will
 
1892
        drop the partially created indexes before the rollback of
 
1893
        incomplete transactions is initiated.  Thus, this should not
 
1894
        interfere with the incomplete transactions. */
 
1895
        trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
 
1896
        pars_info_t *info = pars_info_create();
 
1897
        err = que_eval_sql(info, drop_temp_indexes, FALSE, trx);
 
1898
        ut_a(err == DB_SUCCESS);
 
1899
 
2128
1900
        row_mysql_unlock_data_dictionary(trx);
2129
1901
        trx_free_for_background(trx);
2130
1902
}
2139
1911
{
2140
1912
        merge_file->fd = innobase_mysql_tmpfile();
2141
1913
        merge_file->offset = 0;
2142
 
        merge_file->n_rec = 0;
2143
1914
}
2144
1915
 
2145
1916
/*********************************************************************//**
2360
2131
        if (err != DB_SUCCESS) {
2361
2132
err_exit:
2362
2133
                trx->error_state = DB_SUCCESS;
2363
 
                trx_general_rollback_for_mysql(trx, NULL);
 
2134
                trx_general_rollback_for_mysql(trx, FALSE, NULL);
2364
2135
                trx->error_state = DB_SUCCESS;
2365
2136
        }
2366
2137
 
2562
2333
        sorting and inserting. */
2563
2334
 
2564
2335
        for (i = 0; i < n_indexes; i++) {
2565
 
                error = row_merge_sort(trx, indexes[i], &merge_files[i],
 
2336
                error = row_merge_sort(indexes[i], &merge_files[i],
2566
2337
                                       block, &tmpfd, table);
2567
2338
 
2568
2339
                if (error == DB_SUCCESS) {