1
1
/*****************************************************************************
3
Copyright (C) 2005, 2010, Innobase Oy. All Rights Reserved.
3
Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
5
5
This program is free software; you can redistribute it and/or modify it under
6
6
the terms of the GNU General Public License as published by the Free Software
58
58
#include "handler0alter.h"
59
59
#include <unistd.h>
61
/* Ignore posix_fadvise() on those platforms where it does not exist */
63
# define posix_fadvise(fd, offset, len, advice) /* nothing */
67
62
/** Set these in order ot enable debug printout. */
187
182
ut_ad(max_tuples <= sizeof(row_merge_block_t));
188
183
ut_ad(max_tuples < buf_size);
190
buf = static_cast<row_merge_buf_t *>(mem_heap_zalloc(heap, buf_size));
185
buf = mem_heap_zalloc(heap, buf_size);
191
186
buf->heap = heap;
192
187
buf->index = index;
193
188
buf->max_tuples = max_tuples;
194
buf->tuples = static_cast<const dfield_t **>(mem_heap_alloc(heap,
195
2 * max_tuples * sizeof *buf->tuples));
189
buf->tuples = mem_heap_alloc(heap,
190
2 * max_tuples * sizeof *buf->tuples);
196
191
buf->tmp_tuples = buf->tuples + max_tuples;
288
282
n_fields = dict_index_get_n_fields(index);
290
entry = static_cast<dfield_t *>(mem_heap_alloc(buf->heap, n_fields * sizeof *entry));
284
entry = mem_heap_alloc(buf->heap, n_fields * sizeof *entry);
291
285
buf->tuples[buf->n_tuples] = entry;
295
289
extra_size = UT_BITS_IN_BYTES(index->n_nullable);
297
ifield = dict_index_get_nth_field(index, 0);
299
for (i = 0; i < n_fields; i++, field++, ifield++) {
291
for (i = 0; i < n_fields; i++, field++) {
292
const dict_field_t* ifield;
300
293
const dict_col_t* col;
302
295
const dfield_t* row_field;
298
ifield = dict_index_get_nth_field(index, i);
305
299
col = ifield->col;
306
300
col_no = dict_col_get_no(col);
307
301
row_field = dtuple_get_nth_field(row, col_no);
314
308
} else if (UNIV_LIKELY(!ext)) {
315
309
} else if (dict_index_is_clust(index)) {
316
310
/* Flag externally stored fields. */
317
const byte* row_buf = row_ext_lookup(ext, col_no,
311
const byte* buf = row_ext_lookup(ext, col_no,
319
if (UNIV_LIKELY_NULL(row_buf)) {
320
ut_a(row_buf != field_ref_zero);
313
if (UNIV_LIKELY_NULL(buf)) {
314
ut_a(buf != field_ref_zero);
321
315
if (i < dict_index_get_n_unique(index)) {
322
dfield_set_data(field, row_buf, len);
316
dfield_set_data(field, buf, len);
324
318
dfield_set_ext(field);
325
319
len = dfield_get_len(field);
329
const byte* row_buf = row_ext_lookup(ext, col_no,
323
const byte* buf = row_ext_lookup(ext, col_no,
331
if (UNIV_LIKELY_NULL(row_buf)) {
332
ut_a(row_buf != field_ref_zero);
333
dfield_set_data(field, row_buf, len);
325
if (UNIV_LIKELY_NULL(buf)) {
326
ut_a(buf != field_ref_zero);
327
dfield_set_data(field, buf, len);
339
333
if (ifield->prefix_len) {
340
334
len = dtype_get_at_most_n_mbchars(
336
col->mbminlen, col->mbmaxlen,
343
337
ifield->prefix_len,
344
len, static_cast<const char *>(dfield_get_data(field)));
338
len, dfield_get_data(field));
345
339
dfield_set_len(field, len);
415
409
/** Structure for reporting duplicate records. */
416
410
struct row_merge_dup_struct {
417
411
const dict_index_t* index; /*!< index being sorted */
418
TABLE* table; /*!< MySQL table object */
412
TABLE* table; /*!< MySQL table object */
419
413
ulint n_dup; /*!< number of duplicates */
452
446
* sizeof *offsets
455
buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof *buf));
449
buf = mem_heap_alloc(heap, sizeof *buf);
457
451
tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
458
452
n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
650
644
mem_heap_t* heap = mem_heap_create(2 * i * sizeof **offsets1
651
645
+ 3 * sizeof **buf);
653
*buf = static_cast<mrec_buf_t*>(mem_heap_alloc(heap, 3 * sizeof **buf));
654
*offsets1 = static_cast<ulint*>(mem_heap_alloc(heap, i * sizeof **offsets1));
655
*offsets2 = static_cast<ulint*>(mem_heap_alloc(heap, i * sizeof **offsets2));
647
*buf = mem_heap_alloc(heap, 3 * sizeof **buf);
648
*offsets1 = mem_heap_alloc(heap, i * sizeof **offsets1);
649
*offsets2 = mem_heap_alloc(heap, i * sizeof **offsets2);
657
651
(*offsets1)[0] = (*offsets2)[0] = i;
658
652
(*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
675
669
dict_index_t* index;
676
670
const char** column_names;
678
column_names = static_cast<const char **>(mem_alloc(index_def->n_fields * sizeof *column_names));
672
column_names = mem_alloc(index_def->n_fields * sizeof *column_names);
680
674
for (i = 0; i < index_def->n_fields; ++i) {
681
675
column_names[i] = index_def->fields[i].field_name;
699
693
int fd, /*!< in: file descriptor */
700
ulint offset, /*!< in: offset where to read
701
in number of row_merge_block_t
694
ulint offset, /*!< in: offset where to read */
703
695
row_merge_block_t* buf) /*!< out: data */
705
697
ib_uint64_t ofs = ((ib_uint64_t) offset) * sizeof *buf;
716
708
(ulint) (ofs & 0xFFFFFFFF),
717
709
(ulint) (ofs >> 32),
719
#ifdef POSIX_FADV_DONTNEED
720
/* Each block is read exactly once. Free up the file cache. */
721
posix_fadvise(fd, ofs, sizeof *buf, POSIX_FADV_DONTNEED);
722
#endif /* POSIX_FADV_DONTNEED */
724
711
if (UNIV_UNLIKELY(!success)) {
725
712
ut_print_timestamp(stderr);
740
727
int fd, /*!< in: file descriptor */
741
ulint offset, /*!< in: offset where to write,
742
in number of row_merge_block_t elements */
728
ulint offset, /*!< in: offset where to read
729
in number of row_merge_block_t
743
731
const void* buf) /*!< in: data */
745
size_t buf_len = sizeof(row_merge_block_t);
746
ib_uint64_t ofs = buf_len * (ib_uint64_t) offset;
749
ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
750
(ulint) (ofs & 0xFFFFFFFF),
733
ib_uint64_t ofs = ((ib_uint64_t) offset)
734
* sizeof(row_merge_block_t);
754
736
#ifdef UNIV_DEBUG
755
737
if (row_merge_print_block_write) {
759
741
#endif /* UNIV_DEBUG */
761
#ifdef POSIX_FADV_DONTNEED
762
/* The block will be needed on the next merge pass,
763
but it can be evicted from the file cache meanwhile. */
764
posix_fadvise(fd, ofs, buf_len, POSIX_FADV_DONTNEED);
765
#endif /* POSIX_FADV_DONTNEED */
767
return(UNIV_LIKELY(ret));
743
return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
744
(ulint) (ofs & 0xFFFFFFFF),
746
sizeof(row_merge_block_t))));
770
749
/********************************************************************//**
1130
1109
row_merge_read_clustered_index(
1131
1110
/*===========================*/
1132
1111
trx_t* trx, /*!< in: transaction */
1133
TABLE* table, /*!< in/out: MySQL table object,
1112
TABLE* table, /*!< in/out: MySQL table object,
1134
1113
for reporting erroneous records */
1135
1114
const dict_table_t* old_table,/*!< in: table where rows are
1166
1145
/* Create and initialize memory for record buffers */
1168
merge_buf = static_cast<row_merge_buf_t **>(mem_alloc(n_index * sizeof *merge_buf));
1147
merge_buf = mem_alloc(n_index * sizeof *merge_buf);
1170
1149
for (i = 0; i < n_index; i++) {
1171
1150
merge_buf[i] = row_merge_buf_create(index[i]);
1193
1172
ut_a(n_cols == dict_table_get_n_cols(new_table));
1195
nonnull = static_cast<ulint*>(mem_alloc(n_cols * sizeof *nonnull));
1174
nonnull = mem_alloc(n_cols * sizeof *nonnull);
1197
1176
for (i = 0; i < n_cols; i++) {
1198
1177
if (dict_table_get_nth_col(old_table, i)->prtype
1290
1269
for (i = 0; i < n_index; i++) {
1291
1270
row_merge_buf_t* buf = merge_buf[i];
1292
1271
merge_file_t* file = &files[i];
1293
const dict_index_t* buf_index = buf->index;
1272
const dict_index_t* index = buf->index;
1295
1274
if (UNIV_LIKELY
1296
1275
(row && row_merge_buf_add(buf, row, ext))) {
1418
1397
ulint* foffs1, /*!< in/out: offset of second
1419
1398
source list in the file */
1420
1399
merge_file_t* of, /*!< in/out: output file */
1421
TABLE* table) /*!< in/out: MySQL table, for
1400
TABLE* table) /*!< in/out: MySQL table, for
1422
1401
reporting erroneous key value
1423
1402
if applicable */
1448
1427
heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
1450
buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3));
1429
buf = mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3);
1452
1431
/* Write a record and read the next record. Split the output
1453
1432
file in two halves, which can be merged on the following pass. */
1554
1533
#endif /* UNIV_DEBUG */
1556
1535
heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
1557
buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3));
1537
buf = mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3);
1559
1539
/* Write a record and read the next record. Split the output
1560
1540
file in two halves, which can be merged on the following pass. */
1603
1583
const dict_index_t* index, /*!< in: index being created */
1604
1584
merge_file_t* file, /*!< in/out: file containing
1605
1585
index entries */
1586
ulint* half, /*!< in/out: half the file */
1606
1587
row_merge_block_t* block, /*!< in/out: 3 buffers */
1607
1588
int* tmpfd, /*!< in/out: temporary file handle */
1608
TABLE* table, /*!< in/out: MySQL table, for
1609
reporting erroneous key value
1611
ulint* num_run,/*!< in/out: Number of runs remain
1613
ulint* run_offset) /*!< in/out: Array contains the
1614
first offset number for each merge
1589
TABLE* table) /*!< in/out: MySQL table, for
1590
reporting erroneous key value
1617
1593
ulint foffs0; /*!< first input offset */
1618
1594
ulint foffs1; /*!< second input offset */
1619
1595
ulint error; /*!< error code */
1620
1596
merge_file_t of; /*!< output file */
1621
const ulint ihalf = run_offset[*num_run / 2];
1597
const ulint ihalf = *half;
1622
1598
/*!< half the input file */
1624
/*!< num of runs generated from this merge */
1599
ulint ohalf; /*!< half the output file */
1627
1601
UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
1628
1602
ut_ad(ihalf < file->offset);
1634
#ifdef POSIX_FADV_SEQUENTIAL
1635
/* The input file will be read sequentially, starting from the
1636
beginning and the middle. In Linux, the POSIX_FADV_SEQUENTIAL
1637
affects the entire file. Each block will be read exactly once. */
1638
posix_fadvise(file->fd, 0, 0,
1639
POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE);
1640
#endif /* POSIX_FADV_SEQUENTIAL */
1642
1608
/* Merge blocks to the output file. */
1644
1611
foffs1 = ihalf;
1646
UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset);
1648
1613
for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
1614
ulint ahalf; /*!< arithmetic half the input file */
1650
1616
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1651
1617
return(DB_INTERRUPTED);
1654
/* Remember the offset number for this run */
1655
run_offset[n_run++] = of.offset;
1657
1620
error = row_merge_blocks(index, file, block,
1658
1621
&foffs0, &foffs1, &of, table);
1627
/* Record the offset of the output file when
1628
approximately half the output has been generated. In
1629
this way, the next invocation of row_merge() will
1630
spend most of the time in this loop. The initial
1631
estimate is ohalf==0. */
1632
ahalf = file->offset / 2;
1633
ut_ad(ohalf <= of.offset);
1635
/* Improve the estimate until reaching half the input
1636
file size, or we can not get any closer to it. All
1637
comparands should be non-negative when !(ohalf < ahalf)
1638
because ohalf <= of.offset. */
1639
if (ohalf < ahalf || of.offset - ahalf < ohalf - ahalf) {
1666
1644
/* Copy the last blocks, if there are any. */
1670
1648
return(DB_INTERRUPTED);
1673
/* Remember the offset number for this run */
1674
run_offset[n_run++] = of.offset;
1676
1651
if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) {
1677
1652
return(DB_CORRUPTION);
1685
1660
return(DB_INTERRUPTED);
1688
/* Remember the offset number for this run */
1689
run_offset[n_run++] = of.offset;
1691
1663
if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) {
1692
1664
return(DB_CORRUPTION);
1699
1671
return(DB_CORRUPTION);
1702
ut_ad(n_run <= *num_run);
1706
/* Each run can contain one or more offsets. As merge goes on,
1707
the number of runs (to merge) will reduce until we have one
1708
single run. So the number of runs will always be smaller than
1709
the number of offsets in file */
1710
ut_ad((*num_run) <= file->offset);
1712
/* The number of offsets in output file is always equal or
1713
smaller than input file */
1714
ut_ad(of.offset <= file->offset);
1716
1674
/* Swap file descriptors for the next pass. */
1717
1675
*tmpfd = file->fd;
1720
1679
UNIV_MEM_INVALID(block[0], 3 * sizeof block[0]);
1735
1694
index entries */
1736
1695
row_merge_block_t* block, /*!< in/out: 3 buffers */
1737
1696
int* tmpfd, /*!< in/out: temporary file handle */
1738
TABLE* table) /*!< in/out: MySQL table, for
1697
TABLE* table) /*!< in/out: MySQL table, for
1739
1698
reporting erroneous key value
1740
1699
if applicable */
1742
1701
ulint half = file->offset / 2;
1745
ulint error = DB_SUCCESS;
1747
/* Record the number of merge runs we need to perform */
1748
num_runs = file->offset;
1750
/* If num_runs are less than 1, nothing to merge */
1751
if (num_runs <= 1) {
1755
/* "run_offset" records each run's first offset number */
1756
run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint));
1758
/* This tells row_merge() where to start for the first round
1760
run_offset[half] = half;
1762
1703
/* The file should always contain at least one byte (the end
1763
1704
of file marker). Thus, it must be at least one block. */
1764
1705
ut_ad(file->offset > 0);
1766
/* Merge the runs until we have one big run */
1768
error = row_merge(trx, index, file, block, tmpfd,
1769
table, &num_runs, run_offset);
1771
UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
1710
error = row_merge(trx, index, file, &half,
1711
block, tmpfd, table);
1773
1713
if (error != DB_SUCCESS) {
1776
} while (num_runs > 1);
1778
mem_free(run_offset);
1717
/* half > 0 should hold except when the file consists
1718
of one block. No need to merge further then. */
1719
ut_ad(half > 0 || file->offset == 1);
1720
} while (half < file->offset && half > 0);
1783
1725
/*************************************************************//**
1814
1756
data = btr_rec_copy_externally_stored_field(
1815
1757
mrec, offsets, zip_size, i, &len, heap);
1816
/* Because we have locked the table, any records
1817
written by incomplete transactions must have been
1818
rolled back already. There must not be any incomplete
1822
1759
dfield_set_data(field, data, len);
1870
1807
ulint i = 1 + REC_OFFS_HEADER_SIZE
1871
1808
+ dict_index_get_n_fields(index);
1872
offsets = static_cast<ulint *>(mem_heap_alloc(graph_heap, i * sizeof *offsets));
1809
offsets = mem_heap_alloc(graph_heap, i * sizeof *offsets);
1873
1810
offsets[0] = i;
1874
1811
offsets[1] = dict_index_get_n_fields(index);
1879
1816
if (!row_merge_read(fd, foffs, block)) {
1880
1817
error = DB_CORRUPTION;
1882
mrec_buf_t* buf = static_cast<mrec_buf_t *>(mem_heap_alloc(graph_heap, sizeof *buf));
1819
mrec_buf_t* buf = mem_heap_alloc(graph_heap, sizeof *buf);
1885
1822
const mrec_t* mrec;
1977
1914
/* We use the select query graph as the dummy graph needed
1978
1915
in the lock module call */
1980
thr = que_fork_get_first_thr(static_cast<que_fork_t *>(que_node_get_parent(thr)));
1917
thr = que_fork_get_first_thr(que_node_get_parent(thr));
1981
1918
que_thr_move_to_run_state_for_mysql(thr, trx);
2063
2000
ut_ad(index && table && trx);
2065
pars_info_add_ull_literal(info, "indexid", index->id);
2002
pars_info_add_dulint_literal(info, "indexid", index->id);
2067
2004
trx_start_if_not_started(trx);
2068
2005
trx->op_info = "dropping index";
2076
2013
/* Replace this index with another equivalent index for all
2077
2014
foreign key constraints on this table where this index is used */
2079
dict_table_replace_index_in_foreign_list(table, index, trx);
2016
dict_table_replace_index_in_foreign_list(table, index);
2080
2017
dict_index_remove_from_cache(table, index);
2082
2019
trx->op_info = "";
2200
2137
/*==================*/
2201
2138
merge_file_t* merge_file) /*!< out: merge file structure */
2204
/* This temp file open does not go through normal
2205
file APIs, add instrumentation to register with
2206
performance schema */
2207
struct PSI_file_locker* locker = NULL;
2208
PSI_file_locker_state state;
2209
register_pfs_file_open_begin(&state, locker, innodb_file_temp_key,
2211
"Innodb Merge Temp File",
2212
__FILE__, __LINE__);
2214
2140
merge_file->fd = innobase_mysql_tmpfile();
2215
2141
merge_file->offset = 0;
2216
2142
merge_file->n_rec = 0;
2218
register_pfs_file_open_end(locker, merge_file->fd);
2222
2145
/*********************************************************************//**
2227
2150
/*===================*/
2228
2151
merge_file_t* merge_file) /*!< out: merge file structure */
2231
struct PSI_file_locker* locker = NULL;
2232
PSI_file_locker_state state;
2233
register_pfs_file_io_begin(&state, locker, merge_file->fd, 0, PSI_FILE_CLOSE,
2234
__FILE__, __LINE__);
2236
2153
if (merge_file->fd != -1) {
2237
2154
close(merge_file->fd);
2238
2155
merge_file->fd = -1;
2242
register_pfs_file_io_end(locker, 0);
2246
2159
/*********************************************************************//**
2363
2276
trx->op_info = "renaming indexes";
2365
pars_info_add_ull_literal(info, "tableid", table->id);
2278
pars_info_add_dulint_literal(info, "tableid", table->id);
2367
2280
err = que_eval_sql(info, rename_indexes, FALSE, trx);
2408
2321
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
2410
/* store the old/current name to an automatic variable */
2411
if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
2412
memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
2414
ut_print_timestamp(stderr);
2415
fprintf(stderr, "InnoDB: too long table name: '%s', "
2416
"max length is %d\n", old_table->name,
2417
MAX_TABLE_NAME_LEN);
2421
/* store the old/current name to an automatic variable */
2422
if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
2423
memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
2425
ut_print_timestamp(stderr);
2426
fprintf(stderr, "InnoDB: too long table name: '%s', "
2427
"max length is %d\n", old_table->name,
2428
MAX_TABLE_NAME_LEN);
2432
2323
trx->op_info = "renaming tables";
2434
2325
/* We use the private SQL parser of Innobase to generate the query
2504
2395
node = ind_create_graph_create(index, heap);
2505
2396
thr = pars_complete_graph_for_exec(node, trx, heap);
2507
ut_a(thr == que_fork_start_command(static_cast<que_fork_t *>(que_node_get_parent(thr))));
2398
ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
2509
2400
que_run_threads(thr);
2561
2452
/* Note the id of the transaction that created this
2562
2453
index, we use it to restrict readers from accessing
2563
2454
this index, to ensure read consistency. */
2564
index->trx_id = trx->id;
2455
index->trx_id = (ib_uint64_t)
2456
ut_conv_dulint_to_longlong(trx->id);
2578
2470
const trx_t* trx, /*!< in: transaction */
2579
2471
const dict_index_t* index) /*!< in: index to check */
2581
return(!trx->read_view
2582
|| read_view_sees_trx_id(trx->read_view, index->trx_id));
2473
return(!trx->read_view || read_view_sees_trx_id(
2475
ut_dulint_create((ulint) (index->trx_id >> 32),
2476
(ulint) index->trx_id & 0xFFFFFFFF)));
2585
2479
/*********************************************************************//**
2615
2509
unless creating a PRIMARY KEY */
2616
2510
dict_index_t** indexes, /*!< in: indexes to be created */
2617
2511
ulint n_indexes, /*!< in: size of indexes[] */
2618
TABLE* table) /*!< in/out: MySQL table, for
2512
TABLE* table) /*!< in/out: MySQL table, for
2619
2513
reporting erroneous key value
2620
2514
if applicable */
2637
2531
/* Allocate memory for merge file data structure and initialize
2640
merge_files = static_cast<merge_file_t *>(mem_alloc(n_indexes * sizeof *merge_files));
2534
merge_files = mem_alloc(n_indexes * sizeof *merge_files);
2641
2535
block_size = 3 * sizeof *block;
2642
block = static_cast<row_merge_block_t *>(os_mem_alloc_large(&block_size));
2536
block = os_mem_alloc_large(&block_size);
2644
2538
for (i = 0; i < n_indexes; i++) {