187
182
ut_ad(max_tuples <= sizeof(row_merge_block_t));
188
183
ut_ad(max_tuples < buf_size);
190
buf = static_cast<row_merge_buf_t *>(mem_heap_zalloc(heap, buf_size));
185
buf = mem_heap_zalloc(heap, buf_size);
191
186
buf->heap = heap;
192
187
buf->index = index;
193
188
buf->max_tuples = max_tuples;
194
buf->tuples = static_cast<const dfield_t **>(mem_heap_alloc(heap,
195
2 * max_tuples * sizeof *buf->tuples));
189
buf->tuples = mem_heap_alloc(heap,
190
2 * max_tuples * sizeof *buf->tuples);
196
191
buf->tmp_tuples = buf->tuples + max_tuples;
288
282
n_fields = dict_index_get_n_fields(index);
290
entry = static_cast<dfield_t *>(mem_heap_alloc(buf->heap, n_fields * sizeof *entry));
284
entry = mem_heap_alloc(buf->heap, n_fields * sizeof *entry);
291
285
buf->tuples[buf->n_tuples] = entry;
295
289
extra_size = UT_BITS_IN_BYTES(index->n_nullable);
297
ifield = dict_index_get_nth_field(index, 0);
299
for (i = 0; i < n_fields; i++, field++, ifield++) {
291
for (i = 0; i < n_fields; i++, field++) {
292
const dict_field_t* ifield;
300
293
const dict_col_t* col;
302
295
const dfield_t* row_field;
298
ifield = dict_index_get_nth_field(index, i);
305
299
col = ifield->col;
306
300
col_no = dict_col_get_no(col);
307
301
row_field = dtuple_get_nth_field(row, col_no);
314
308
} else if (UNIV_LIKELY(!ext)) {
315
309
} else if (dict_index_is_clust(index)) {
316
310
/* Flag externally stored fields. */
317
const byte* row_buf = row_ext_lookup(ext, col_no,
311
const byte* buf = row_ext_lookup(ext, col_no,
319
if (UNIV_LIKELY_NULL(row_buf)) {
320
ut_a(row_buf != field_ref_zero);
313
if (UNIV_LIKELY_NULL(buf)) {
314
ut_a(buf != field_ref_zero);
321
315
if (i < dict_index_get_n_unique(index)) {
322
dfield_set_data(field, row_buf, len);
316
dfield_set_data(field, buf, len);
324
318
dfield_set_ext(field);
325
319
len = dfield_get_len(field);
329
const byte* row_buf = row_ext_lookup(ext, col_no,
323
const byte* buf = row_ext_lookup(ext, col_no,
331
if (UNIV_LIKELY_NULL(row_buf)) {
332
ut_a(row_buf != field_ref_zero);
333
dfield_set_data(field, row_buf, len);
325
if (UNIV_LIKELY_NULL(buf)) {
326
ut_a(buf != field_ref_zero);
327
dfield_set_data(field, buf, len);
445
rec_offs_init(offsets_);
450
447
/* Convert the tuple to a record and then to MySQL format. */
451
heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields)
455
buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof *buf));
457
449
tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
458
450
n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
460
rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext);
461
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
452
rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
453
offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
463
456
innobase_rec_to_mysql(dup->table, rec, index, offsets);
458
if (UNIV_LIKELY_NULL(heap)) {
468
463
/*************************************************************//**
635
630
/******************************************************//**
636
Create a memory heap and allocate space for row_merge_rec_offsets()
631
Create a memory heap and allocate space for row_merge_rec_offsets().
638
632
@return memory heap */
641
635
row_merge_heap_create(
642
636
/*==================*/
643
637
const dict_index_t* index, /*!< in: record descriptor */
644
mrec_buf_t** buf, /*!< out: 3 buffers */
645
638
ulint** offsets1, /*!< out: offsets */
646
639
ulint** offsets2) /*!< out: offsets */
648
641
ulint i = 1 + REC_OFFS_HEADER_SIZE
649
642
+ dict_index_get_n_fields(index);
650
mem_heap_t* heap = mem_heap_create(2 * i * sizeof **offsets1
643
mem_heap_t* heap = mem_heap_create(2 * i * sizeof *offsets1);
653
*buf = static_cast<mrec_buf_t*>(mem_heap_alloc(heap, 3 * sizeof **buf));
654
*offsets1 = static_cast<ulint*>(mem_heap_alloc(heap, i * sizeof **offsets1));
655
*offsets2 = static_cast<ulint*>(mem_heap_alloc(heap, i * sizeof **offsets2));
645
*offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
646
*offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
657
648
(*offsets1)[0] = (*offsets2)[0] = i;
658
649
(*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
733
717
/********************************************************************//**
734
Write a merge block to the file system.
718
Read a merge block from the file system.
735
719
@return TRUE if request was successful, FALSE if fail */
740
724
int fd, /*!< in: file descriptor */
741
ulint offset, /*!< in: offset where to write,
742
in number of row_merge_block_t elements */
725
ulint offset, /*!< in: offset where to write */
743
726
const void* buf) /*!< in: data */
745
size_t buf_len = sizeof(row_merge_block_t);
746
ib_uint64_t ofs = buf_len * (ib_uint64_t) offset;
749
ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
750
(ulint) (ofs & 0xFFFFFFFF),
728
ib_uint64_t ofs = ((ib_uint64_t) offset)
729
* sizeof(row_merge_block_t);
754
731
#ifdef UNIV_DEBUG
755
732
if (row_merge_print_block_write) {
759
736
#endif /* UNIV_DEBUG */
761
#ifdef POSIX_FADV_DONTNEED
762
/* The block will be needed on the next merge pass,
763
but it can be evicted from the file cache meanwhile. */
764
posix_fadvise(fd, ofs, buf_len, POSIX_FADV_DONTNEED);
765
#endif /* POSIX_FADV_DONTNEED */
767
return(UNIV_LIKELY(ret));
738
return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
739
(ulint) (ofs & 0xFFFFFFFF),
741
sizeof(row_merge_block_t))));
770
744
/********************************************************************//**
1099
1073
record to be compared */
1100
1074
const ulint* offsets1, /*!< in: first record offsets */
1101
1075
const ulint* offsets2, /*!< in: second record offsets */
1102
const dict_index_t* index, /*!< in: index */
1103
ibool* null_eq) /*!< out: set to TRUE if
1104
found matching null values */
1076
const dict_index_t* index) /*!< in: index */
1108
cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index,
1080
cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);
1111
1082
#ifdef UNIV_DEBUG
1112
1083
if (row_merge_print_cmp) {
1418
1389
ulint* foffs1, /*!< in/out: offset of second
1419
1390
source list in the file */
1420
1391
merge_file_t* of, /*!< in/out: output file */
1421
TABLE* table) /*!< in/out: MySQL table, for
1392
TABLE* table) /*!< in/out: MySQL table, for
1422
1393
reporting erroneous key value
1423
1394
if applicable */
1425
1396
mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
1427
mrec_buf_t* buf; /*!< buffer for handling
1428
split mrec in block[] */
1398
mrec_buf_t buf[3]; /*!< buffer for handling split mrec in block[] */
1429
1399
const byte* b0; /*!< pointer to block[0] */
1430
1400
const byte* b1; /*!< pointer to block[1] */
1431
1401
byte* b2; /*!< pointer to block[2] */
1603
1568
const dict_index_t* index, /*!< in: index being created */
1604
1569
merge_file_t* file, /*!< in/out: file containing
1605
1570
index entries */
1571
ulint* half, /*!< in/out: half the file */
1606
1572
row_merge_block_t* block, /*!< in/out: 3 buffers */
1607
1573
int* tmpfd, /*!< in/out: temporary file handle */
1608
TABLE* table, /*!< in/out: MySQL table, for
1609
reporting erroneous key value
1611
ulint* num_run,/*!< in/out: Number of runs remain
1613
ulint* run_offset) /*!< in/out: Array contains the
1614
first offset number for each merge
1574
TABLE* table) /*!< in/out: MySQL table, for
1575
reporting erroneous key value
1617
1578
ulint foffs0; /*!< first input offset */
1618
1579
ulint foffs1; /*!< second input offset */
1619
1580
ulint error; /*!< error code */
1620
1581
merge_file_t of; /*!< output file */
1621
const ulint ihalf = run_offset[*num_run / 2];
1582
const ulint ihalf = *half;
1622
1583
/*!< half the input file */
1624
/*!< num of runs generated from this merge */
1584
ulint ohalf; /*!< half the output file */
1627
1586
UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
1628
1587
ut_ad(ihalf < file->offset);
1634
#ifdef POSIX_FADV_SEQUENTIAL
1635
/* The input file will be read sequentially, starting from the
1636
beginning and the middle. In Linux, the POSIX_FADV_SEQUENTIAL
1637
affects the entire file. Each block will be read exactly once. */
1638
posix_fadvise(file->fd, 0, 0,
1639
POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE);
1640
#endif /* POSIX_FADV_SEQUENTIAL */
1642
1593
/* Merge blocks to the output file. */
1644
1596
foffs1 = ihalf;
1646
UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset);
1648
1598
for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
1599
ulint ahalf; /*!< arithmetic half the input file */
1650
1601
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1651
1602
return(DB_INTERRUPTED);
1654
/* Remember the offset number for this run */
1655
run_offset[n_run++] = of.offset;
1657
1605
error = row_merge_blocks(index, file, block,
1658
1606
&foffs0, &foffs1, &of, table);
1612
/* Record the offset of the output file when
1613
approximately half the output has been generated. In
1614
this way, the next invocation of row_merge() will
1615
spend most of the time in this loop. The initial
1616
estimate is ohalf==0. */
1617
ahalf = file->offset / 2;
1618
ut_ad(ohalf <= of.offset);
1620
/* Improve the estimate until reaching half the input
1621
file size, or we can not get any closer to it. All
1622
comparands should be non-negative when !(ohalf < ahalf)
1623
because ohalf <= of.offset. */
1624
if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) {
1666
1629
/* Copy the last blocks, if there are any. */
1699
1656
return(DB_CORRUPTION);
1702
ut_ad(n_run <= *num_run);
1706
/* Each run can contain one or more offsets. As merge goes on,
1707
the number of runs (to merge) will reduce until we have one
1708
single run. So the number of runs will always be smaller than
1709
the number of offsets in file */
1710
ut_ad((*num_run) <= file->offset);
1712
/* The number of offsets in output file is always equal or
1713
smaller than input file */
1714
ut_ad(of.offset <= file->offset);
1716
1659
/* Swap file descriptors for the next pass. */
1717
1660
*tmpfd = file->fd;
1720
1664
UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
1735
1679
index entries */
1736
1680
row_merge_block_t* block, /*!< in/out: 3 buffers */
1737
1681
int* tmpfd, /*!< in/out: temporary file handle */
1738
TABLE* table) /*!< in/out: MySQL table, for
1682
TABLE* table) /*!< in/out: MySQL table, for
1739
1683
reporting erroneous key value
1740
1684
if applicable */
1742
1686
ulint half = file->offset / 2;
1745
ulint error = DB_SUCCESS;
1747
/* Record the number of merge runs we need to perform */
1748
num_runs = file->offset;
1750
/* If num_runs are less than 1, nothing to merge */
1751
if (num_runs <= 1) {
1755
/* "run_offset" records each run's first offset number */
1756
run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint));
1758
/* This tells row_merge() where to start for the first round
1760
run_offset[half] = half;
1762
1688
/* The file should always contain at least one byte (the end
1763
1689
of file marker). Thus, it must be at least one block. */
1764
1690
ut_ad(file->offset > 0);
1766
/* Merge the runs until we have one big run */
1768
error = row_merge(trx, index, file, block, tmpfd,
1769
table, &num_runs, run_offset);
1771
UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
1695
error = row_merge(trx, index, file, &half,
1696
block, tmpfd, table);
1773
1698
if (error != DB_SUCCESS) {
1776
} while (num_runs > 1);
1778
mem_free(run_offset);
1702
/* half > 0 should hold except when the file consists
1703
of one block. No need to merge further then. */
1704
ut_ad(half > 0 || file->offset == 1);
1705
} while (half < file->offset && half > 0);
1783
1710
/*************************************************************//**
1879
1802
if (!row_merge_read(fd, foffs, block)) {
1880
1803
error = DB_CORRUPTION;
1882
mrec_buf_t* buf = static_cast<mrec_buf_t *>(mem_heap_alloc(graph_heap, sizeof *buf));
1885
1806
const mrec_t* mrec;
1886
1807
dtuple_t* dtuple;
1889
b = row_merge_read_rec(block, buf, b, index,
1810
b = row_merge_read_rec(block, &buf, b, index,
1890
1811
fd, &foffs, &mrec, offsets);
1891
1812
if (UNIV_UNLIKELY(!b)) {
1892
1813
/* End of list, or I/O error */
2057
1978
/* Drop the field definitions of the index. */
2058
1979
"DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
2059
1980
/* Drop the index definition and the B-tree. */
2060
"DELETE FROM SYS_INDEXES WHERE ID = :indexid;\n"
1981
"DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
1982
" AND TABLE_ID = :tableid;\n"
2063
1985
ut_ad(index && table && trx);
2065
pars_info_add_ull_literal(info, "indexid", index->id);
1987
pars_info_add_dulint_literal(info, "indexid", index->id);
1988
pars_info_add_dulint_literal(info, "tableid", table->id);
2067
1990
trx_start_if_not_started(trx);
2068
1991
trx->op_info = "dropping index";
2111
2034
/*=============================*/
2117
/* Load the table definitions that contain partially defined
2118
indexes, so that the data dictionary information can be checked
2119
when accessing the tablename.ibd files. */
2039
/* We use the private SQL parser of Innobase to generate the
2040
query graphs needed in deleting the dictionary data from system
2041
tables in Innobase. Deleting a row from SYS_INDEXES table also
2042
frees the file segments of the B-tree associated with the index. */
2043
static const char drop_temp_indexes[] =
2044
"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
2046
"DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
2047
"WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "';\n"
2050
"\tWHILE 1=1 LOOP\n"
2051
"\t\tFETCH c INTO indexid;\n"
2052
"\t\tIF (SQL % NOTFOUND) THEN\n"
2055
"\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
2056
"\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
2121
2062
trx = trx_allocate_for_background();
2122
2063
trx->op_info = "dropping partially created indexes";
2123
2064
row_mysql_lock_data_dictionary(trx);
2127
btr_pcur_open_at_index_side(
2129
dict_table_get_first_index(dict_sys->sys_indexes),
2130
BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
2136
table_id_t table_id;
2137
dict_table_t* table;
2139
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
2141
if (!btr_pcur_is_on_user_rec(&pcur)) {
2145
rec = btr_pcur_get_rec(&pcur);
2146
field = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_NAME_FIELD,
2148
if (len == UNIV_SQL_NULL || len == 0
2149
|| (char) *field != TEMP_INDEX_PREFIX) {
2153
/* This is a temporary index. */
2155
field = rec_get_nth_field_old(rec, 0/*TABLE_ID*/, &len);
2157
/* Corrupted TABLE_ID */
2161
table_id = mach_read_from_8(field);
2163
btr_pcur_store_position(&pcur, &mtr);
2164
btr_pcur_commit_specify_mtr(&pcur, &mtr);
2166
table = dict_table_get_on_id_low(table_id);
2169
dict_index_t* index;
2170
dict_index_t* next_index;
2172
for (index = dict_table_get_first_index(table);
2173
index; index = next_index) {
2175
next_index = dict_table_get_next_index(index);
2177
if (*index->name == TEMP_INDEX_PREFIX) {
2178
row_merge_drop_index(index, table, trx);
2179
trx_commit_for_mysql(trx);
2185
btr_pcur_restore_position(BTR_SEARCH_LEAF,
2189
btr_pcur_close(&pcur);
2066
/* Incomplete transactions may be holding some locks on the
2067
data dictionary tables. However, they should never have been
2068
able to lock the records corresponding to the partially
2069
created indexes that we are attempting to delete, because the
2070
table was locked when the indexes were being created. We will
2071
drop the partially created indexes before the rollback of
2072
incomplete transactions is initiated. Thus, this should not
2073
interfere with the incomplete transactions. */
2074
trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
2075
pars_info_t *info = pars_info_create();
2076
err = que_eval_sql(info, drop_temp_indexes, FALSE, trx);
2077
ut_a(err == DB_SUCCESS);
2191
2079
row_mysql_unlock_data_dictionary(trx);
2192
2080
trx_free_for_background(trx);
2200
2088
/*==================*/
2201
2089
merge_file_t* merge_file) /*!< out: merge file structure */
2204
/* This temp file open does not go through normal
2205
file APIs, add instrumentation to register with
2206
performance schema */
2207
struct PSI_file_locker* locker = NULL;
2208
PSI_file_locker_state state;
2209
register_pfs_file_open_begin(&state, locker, innodb_file_temp_key,
2211
"Innodb Merge Temp File",
2212
__FILE__, __LINE__);
2214
2091
merge_file->fd = innobase_mysql_tmpfile();
2215
2092
merge_file->offset = 0;
2216
2093
merge_file->n_rec = 0;
2218
register_pfs_file_open_end(locker, merge_file->fd);
2222
2096
/*********************************************************************//**
2408
2272
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
2410
/* store the old/current name to an automatic variable */
2411
if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
2412
memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
2414
ut_print_timestamp(stderr);
2415
fprintf(stderr, "InnoDB: too long table name: '%s', "
2416
"max length is %d\n", old_table->name,
2417
MAX_TABLE_NAME_LEN);
2421
/* store the old/current name to an automatic variable */
2422
if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
2423
memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
2425
ut_print_timestamp(stderr);
2426
fprintf(stderr, "InnoDB: too long table name: '%s', "
2427
"max length is %d\n", old_table->name,
2428
MAX_TABLE_NAME_LEN);
2432
2274
trx->op_info = "renaming tables";
2434
2276
/* We use the private SQL parser of Innobase to generate the query