733
699
/********************************************************************//**
734
Write a merge block to the file system.
700
Read a merge block from the file system.
735
701
@return TRUE if request was successful, FALSE if fail */
740
706
int fd, /*!< in: file descriptor */
741
ulint offset, /*!< in: offset where to write,
742
in number of row_merge_block_t elements */
707
ulint offset, /*!< in: offset where to write */
743
708
const void* buf) /*!< in: data */
745
size_t buf_len = sizeof(row_merge_block_t);
746
ib_uint64_t ofs = buf_len * (ib_uint64_t) offset;
749
ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
750
(ulint) (ofs & 0xFFFFFFFF),
755
if (row_merge_print_block_write) {
756
fprintf(stderr, "row_merge_write fd=%d ofs=%lu\n",
759
#endif /* UNIV_DEBUG */
761
#ifdef POSIX_FADV_DONTNEED
762
/* The block will be needed on the next merge pass,
763
but it can be evicted from the file cache meanwhile. */
764
posix_fadvise(fd, ofs, buf_len, POSIX_FADV_DONTNEED);
765
#endif /* POSIX_FADV_DONTNEED */
767
return(UNIV_LIKELY(ret));
710
ib_uint64_t ofs = ((ib_uint64_t) offset)
711
* sizeof(row_merge_block_t);
713
return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
714
(ulint) (ofs & 0xFFFFFFFF),
716
sizeof(row_merge_block_t))));
770
719
/********************************************************************//**
771
720
Read a merge record.
772
721
@return pointer to next record, or NULL on I/O error or end of list */
773
static __attribute__((nonnull))
775
724
row_merge_read_rec(
776
725
/*===============*/
1523
1442
/*************************************************************//**
1524
Copy a block of index entries.
1525
@return TRUE on success, FALSE on failure */
1526
static __attribute__((nonnull))
1528
row_merge_blocks_copy(
1529
/*==================*/
1530
const dict_index_t* index, /*!< in: index being created */
1531
const merge_file_t* file, /*!< in: input file */
1532
row_merge_block_t* block, /*!< in/out: 3 buffers */
1533
ulint* foffs0, /*!< in/out: input file offset */
1534
merge_file_t* of) /*!< in/out: output file */
1536
mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
1538
mrec_buf_t* buf; /*!< buffer for handling
1539
split mrec in block[] */
1540
const byte* b0; /*!< pointer to block[0] */
1541
byte* b2; /*!< pointer to block[2] */
1542
const mrec_t* mrec0; /*!< merge rec, points to block[0] */
1543
ulint* offsets0;/* offsets of mrec0 */
1544
ulint* offsets1;/* dummy offsets */
1547
if (row_merge_print_block) {
1549
"row_merge_blocks_copy fd=%d ofs=%lu"
1550
" = fd=%d ofs=%lu\n",
1551
file->fd, (ulong) foffs0,
1552
of->fd, (ulong) of->offset);
1554
#endif /* UNIV_DEBUG */
1556
heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
1557
buf = mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3);
1559
/* Write a record and read the next record. Split the output
1560
file in two halves, which can be merged on the following pass. */
1562
if (!row_merge_read(file->fd, *foffs0, &block[0])) {
1564
mem_heap_free(heap);
1571
b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
1572
foffs0, &mrec0, offsets0);
1573
if (UNIV_UNLIKELY(!b0 && mrec0)) {
1579
/* append all mrec0 to output */
1581
ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
1586
/* The file offset points to the beginning of the last page
1587
that has been read. Update it to point to the next block. */
1590
mem_heap_free(heap);
1591
return(row_merge_write_eof(&block[2], b2, of->fd, &of->offset)
1595
/*************************************************************//**
1596
1443
Merge disk files.
1597
1444
@return DB_SUCCESS or error code */
1598
static __attribute__((nonnull))
1602
trx_t* trx, /*!< in: transaction */
1603
1449
const dict_index_t* index, /*!< in: index being created */
1604
1450
merge_file_t* file, /*!< in/out: file containing
1605
1451
index entries */
1452
ulint half, /*!< in: half the file */
1606
1453
row_merge_block_t* block, /*!< in/out: 3 buffers */
1607
1454
int* tmpfd, /*!< in/out: temporary file handle */
1608
struct TABLE* table, /*!< in/out: MySQL table, for
1609
reporting erroneous key value
1611
ulint* num_run,/*!< in/out: Number of runs remain
1613
ulint* run_offset) /*!< in/out: Array contains the
1614
first offset number for each merge
1455
TABLE* table) /*!< in/out: MySQL table, for
1456
reporting erroneous key value
1617
1459
ulint foffs0; /*!< first input offset */
1618
1460
ulint foffs1; /*!< second input offset */
1619
1461
ulint error; /*!< error code */
1620
1462
merge_file_t of; /*!< output file */
1621
const ulint ihalf = run_offset[*num_run / 2];
1622
/*!< half the input file */
1624
/*!< num of runs generated from this merge */
1627
1464
UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
1628
ut_ad(ihalf < file->offset);
1630
1467
of.fd = *tmpfd;
1634
#ifdef POSIX_FADV_SEQUENTIAL
1635
/* The input file will be read sequentially, starting from the
1636
beginning and the middle. In Linux, the POSIX_FADV_SEQUENTIAL
1637
affects the entire file. Each block will be read exactly once. */
1638
posix_fadvise(file->fd, 0, 0,
1639
POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE);
1640
#endif /* POSIX_FADV_SEQUENTIAL */
1642
1470
/* Merge blocks to the output file. */
1646
UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset);
1648
for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
1650
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1651
return(DB_INTERRUPTED);
1654
/* Remember the offset number for this run */
1655
run_offset[n_run++] = of.offset;
1474
for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {
1657
1475
error = row_merge_blocks(index, file, block,
1658
1476
&foffs0, &foffs1, &of, table);
1660
1478
if (error != DB_SUCCESS) {
1666
/* Copy the last blocks, if there are any. */
1668
while (foffs0 < ihalf) {
1669
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1670
return(DB_INTERRUPTED);
1673
/* Remember the offset number for this run */
1674
run_offset[n_run++] = of.offset;
1676
if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) {
1483
/* Copy the last block, if there is one. */
1484
while (foffs0 < half) {
1485
if (!row_merge_read(file->fd, foffs0++, block)
1486
|| !row_merge_write(of.fd, of.offset++, block)) {
1677
1487
return(DB_CORRUPTION);
1681
ut_ad(foffs0 == ihalf);
1683
1490
while (foffs1 < file->offset) {
1684
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1685
return(DB_INTERRUPTED);
1688
/* Remember the offset number for this run */
1689
run_offset[n_run++] = of.offset;
1691
if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) {
1491
if (!row_merge_read(file->fd, foffs1++, block)
1492
|| !row_merge_write(of.fd, of.offset++, block)) {
1692
1493
return(DB_CORRUPTION);
1696
ut_ad(foffs1 == file->offset);
1698
if (UNIV_UNLIKELY(of.n_rec != file->n_rec)) {
1699
return(DB_CORRUPTION);
1702
ut_ad(n_run <= *num_run);
1706
/* Each run can contain one or more offsets. As merge goes on,
1707
the number of runs (to merge) will reduce until we have one
1708
single run. So the number of runs will always be smaller than
1709
the number of offsets in file */
1710
ut_ad((*num_run) <= file->offset);
1712
/* The number of offsets in output file is always equal or
1713
smaller than input file */
1714
ut_ad(of.offset <= file->offset);
1716
1497
/* Swap file descriptors for the next pass. */
1717
1498
*tmpfd = file->fd;
1730
1511
row_merge_sort(
1731
1512
/*===========*/
1732
trx_t* trx, /*!< in: transaction */
1733
1513
const dict_index_t* index, /*!< in: index being created */
1734
1514
merge_file_t* file, /*!< in/out: file containing
1735
1515
index entries */
1736
1516
row_merge_block_t* block, /*!< in/out: 3 buffers */
1737
1517
int* tmpfd, /*!< in/out: temporary file handle */
1738
struct TABLE* table) /*!< in/out: MySQL table, for
1518
TABLE* table) /*!< in/out: MySQL table, for
1739
1519
reporting erroneous key value
1740
1520
if applicable */
1742
ulint half = file->offset / 2;
1745
ulint error = DB_SUCCESS;
1747
/* Record the number of merge runs we need to perform */
1748
num_runs = file->offset;
1750
/* If num_runs are less than 1, nothing to merge */
1751
if (num_runs <= 1) {
1755
/* "run_offset" records each run's first offset number */
1756
run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint));
1758
/* This tells row_merge() where to start for the first round
1760
run_offset[half] = half;
1762
/* The file should always contain at least one byte (the end
1763
of file marker). Thus, it must be at least one block. */
1764
ut_ad(file->offset > 0);
1766
/* Merge the runs until we have one big run */
1768
error = row_merge(trx, index, file, block, tmpfd,
1769
table, &num_runs, run_offset);
1771
UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
1522
ulint blksz; /*!< block size */
1524
for (blksz = 1; blksz < file->offset; blksz *= 2) {
1528
ut_ad(ut_is_2pow(blksz));
1529
half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);
1530
error = row_merge(index, file, half, block, tmpfd, table);
1773
1532
if (error != DB_SUCCESS) {
1776
} while (num_runs > 1);
1778
mem_free(run_offset);
1783
1540
/*************************************************************//**
2111
1856
/*=============================*/
2117
/* Load the table definitions that contain partially defined
2118
indexes, so that the data dictionary information can be checked
2119
when accessing the tablename.ibd files. */
1861
/* We use the private SQL parser of Innobase to generate the
1862
query graphs needed in deleting the dictionary data from system
1863
tables in Innobase. Deleting a row from SYS_INDEXES table also
1864
frees the file segments of the B-tree associated with the index. */
1865
static const char drop_temp_indexes[] =
1866
"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
1868
"DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
1869
"WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "';\n"
1872
"\tWHILE 1=1 LOOP\n"
1873
"\t\tFETCH c INTO indexid;\n"
1874
"\t\tIF (SQL % NOTFOUND) THEN\n"
1877
"\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
1878
"\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
2121
1884
trx = trx_allocate_for_background();
2122
1885
trx->op_info = "dropping partially created indexes";
2123
1886
row_mysql_lock_data_dictionary(trx);
2127
btr_pcur_open_at_index_side(
2129
dict_table_get_first_index(dict_sys->sys_indexes),
2130
BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
2136
table_id_t table_id;
2137
dict_table_t* table;
2139
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
2141
if (!btr_pcur_is_on_user_rec(&pcur)) {
2145
rec = btr_pcur_get_rec(&pcur);
2146
field = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_NAME_FIELD,
2148
if (len == UNIV_SQL_NULL || len == 0
2149
|| (char) *field != TEMP_INDEX_PREFIX) {
2153
/* This is a temporary index. */
2155
field = rec_get_nth_field_old(rec, 0/*TABLE_ID*/, &len);
2157
/* Corrupted TABLE_ID */
2161
table_id = mach_read_from_8(field);
2163
btr_pcur_store_position(&pcur, &mtr);
2164
btr_pcur_commit_specify_mtr(&pcur, &mtr);
2166
table = dict_table_get_on_id_low(table_id);
2169
dict_index_t* index;
2170
dict_index_t* next_index;
2172
for (index = dict_table_get_first_index(table);
2173
index; index = next_index) {
2175
next_index = dict_table_get_next_index(index);
2177
if (*index->name == TEMP_INDEX_PREFIX) {
2178
row_merge_drop_index(index, table, trx);
2179
trx_commit_for_mysql(trx);
2185
btr_pcur_restore_position(BTR_SEARCH_LEAF,
2189
btr_pcur_close(&pcur);
1888
/* Incomplete transactions may be holding some locks on the
1889
data dictionary tables. However, they should never have been
1890
able to lock the records corresponding to the partially
1891
created indexes that we are attempting to delete, because the
1892
table was locked when the indexes were being created. We will
1893
drop the partially created indexes before the rollback of
1894
incomplete transactions is initiated. Thus, this should not
1895
interfere with the incomplete transactions. */
1896
trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
1897
pars_info_t *info = pars_info_create();
1898
err = que_eval_sql(info, drop_temp_indexes, FALSE, trx);
1899
ut_a(err == DB_SUCCESS);
2191
1901
row_mysql_unlock_data_dictionary(trx);
2192
1902
trx_free_for_background(trx);