56
39
#include "log0log.h"
57
40
#include "ut0sort.h"
58
41
#include "handler0alter.h"
61
/* Ignore posix_fadvise() on those platforms where it does not exist */
63
# define posix_fadvise(fd, offset, len, advice) /* nothing */
67
/** Set these in order ot enable debug printout. */
69
/** Log the outcome of each row_merge_cmp() call, comparing records. */
44
/* Set these in order ot enable debug printout. */
70
45
static ibool row_merge_print_cmp;
71
/** Log each record read from temporary file. */
72
46
static ibool row_merge_print_read;
73
/** Log each record write to temporary file. */
74
47
static ibool row_merge_print_write;
75
/** Log each row_merge_blocks() call, merging two blocks of records to
77
static ibool row_merge_print_block;
78
/** Log each block read from temporary file. */
79
static ibool row_merge_print_block_read;
80
/** Log each block read from temporary file. */
81
static ibool row_merge_print_block_write;
83
48
#endif /* UNIV_DEBUG */
85
/** @brief Block size for I/O operations in merge sort.
87
The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
88
rounded to a power of 2.
50
/* Block size for I/O operations in merge sort. The minimum is
51
UNIV_PAGE_SIZE, or page_get_free_space_of_empty() rounded to a power of 2.
90
53
When not creating a PRIMARY KEY that contains column prefixes, this
91
54
can be set as small as UNIV_PAGE_SIZE / 2. See the comment above
92
55
ut_ad(data_size < sizeof(row_merge_block_t)). */
93
57
typedef byte row_merge_block_t[1048576];
95
/** @brief Secondary buffer for I/O operations of merge records.
59
/* Secondary buffer for I/O operations of merge records. This buffer
60
is used for writing or reading a record that spans two row_merge_block_t.
61
Thus, it must be able to hold one merge record, whose maximum size is
62
the same as the minimum size of row_merge_block_t. */
97
This buffer is used for writing or reading a record that spans two
98
row_merge_block_t. Thus, it must be able to hold one merge record,
99
whose maximum size is the same as the minimum size of
100
row_merge_block_t. */
101
64
typedef byte mrec_buf_t[UNIV_PAGE_SIZE];
103
/** @brief Merge record in row_merge_block_t.
105
The format is the same as a record in ROW_FORMAT=COMPACT with the
106
exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
66
/* Merge record in row_merge_block_t. The format is the same as a
67
record in ROW_FORMAT=COMPACT with the exception that the
68
REC_N_NEW_EXTRA_BYTES are omitted. */
107
69
typedef byte mrec_t;
109
/** Buffer for sorting in main memory. */
71
/* Buffer for sorting in main memory. */
110
72
struct row_merge_buf_struct {
111
mem_heap_t* heap; /*!< memory heap where allocated */
112
dict_index_t* index; /*!< the index the tuples belong to */
113
ulint total_size; /*!< total amount of data bytes */
114
ulint n_tuples; /*!< number of data tuples */
115
ulint max_tuples; /*!< maximum number of data tuples */
116
const dfield_t**tuples; /*!< array of pointers to
73
mem_heap_t* heap; /* memory heap where allocated */
74
dict_index_t* index; /* the index the tuples belong to */
75
ulint total_size; /* total amount of data bytes */
76
ulint n_tuples; /* number of data tuples */
77
ulint max_tuples; /* maximum number of data tuples */
78
const dfield_t**tuples; /* array of pointers to
117
79
arrays of fields that form
118
80
the data tuples */
119
const dfield_t**tmp_tuples; /*!< temporary copy of tuples,
81
const dfield_t**tmp_tuples; /* temporary copy of tuples,
123
/** Buffer for sorting in main memory. */
124
85
typedef struct row_merge_buf_struct row_merge_buf_t;
126
/** Information about temporary files used in merge sort */
87
/* Information about temporary files used in merge sort are stored
127
90
struct merge_file_struct {
128
int fd; /*!< file descriptor */
129
ulint offset; /*!< file offset (end of file) */
130
ib_uint64_t n_rec; /*!< number of records in the file */
91
int fd; /* File descriptor */
92
ulint offset; /* File offset */
133
/** Information about temporary files used in merge sort */
134
95
typedef struct merge_file_struct merge_file_t;
137
/******************************************************//**
98
/**********************************************************
138
99
Display a merge tuple. */
141
102
row_merge_tuple_print(
142
103
/*==================*/
143
FILE* f, /*!< in: output stream */
144
const dfield_t* entry, /*!< in: tuple to print */
145
ulint n_fields)/*!< in: number of fields in the tuple */
104
FILE* f, /* in: output stream */
105
const dfield_t* entry, /* in: tuple to print */
106
ulint n_fields)/* in: number of fields in the tuple */
314
275
} else if (UNIV_LIKELY(!ext)) {
315
276
} else if (dict_index_is_clust(index)) {
316
277
/* Flag externally stored fields. */
317
const byte* row_buf = row_ext_lookup(ext, col_no,
278
const byte* buf = row_ext_lookup(ext, col_no,
319
if (UNIV_LIKELY_NULL(row_buf)) {
320
ut_a(row_buf != field_ref_zero);
280
if (UNIV_LIKELY_NULL(buf)) {
281
ut_a(buf != field_ref_zero);
321
282
if (i < dict_index_get_n_unique(index)) {
322
dfield_set_data(field, row_buf, len);
283
dfield_set_data(field, buf, len);
324
285
dfield_set_ext(field);
325
286
len = dfield_get_len(field);
329
const byte* row_buf = row_ext_lookup(ext, col_no,
290
const byte* buf = row_ext_lookup(ext, col_no,
331
if (UNIV_LIKELY_NULL(row_buf)) {
332
ut_a(row_buf != field_ref_zero);
333
dfield_set_data(field, row_buf, len);
292
if (UNIV_LIKELY_NULL(buf)) {
293
ut_a(buf != field_ref_zero);
294
dfield_set_data(field, buf, len);
411
rec_offs_init(offsets_);
450
413
/* Convert the tuple to a record and then to MySQL format. */
451
heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields)
455
buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof *buf));
457
415
tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
458
416
n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
460
rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext);
461
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
418
rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
419
offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
463
422
innobase_rec_to_mysql(dup->table, rec, index, offsets);
424
if (UNIV_LIKELY_NULL(heap)) {
468
/*************************************************************//**
470
@return 1, 0, -1 if a is greater, equal, less, respectively, than b */
429
/*****************************************************************
430
Compare two tuples. */
473
433
row_merge_tuple_cmp(
474
434
/*================*/
475
ulint n_field,/*!< in: number of fields */
476
const dfield_t* a, /*!< in: first tuple to be compared */
477
const dfield_t* b, /*!< in: second tuple to be compared */
478
row_merge_dup_t* dup) /*!< in/out: for reporting duplicates */
435
/* out: 1, 0, -1 if a is greater,
436
equal, less, respectively, than b */
437
ulint n_field,/* in: number of fields */
438
const dfield_t* a, /* in: first tuple to be compared */
439
const dfield_t* b, /* in: second tuple to be compared */
440
row_merge_dup_t* dup) /* in/out: for reporting duplicates */
481
443
const dfield_t* field = a;
483
/* Compare the fields of the tuples until a difference is
484
found or we run out of fields to compare. If !cmp at the
485
end, the tuples are equal. */
487
446
cmp = cmp_dfield_dfield(a++, b++);
488
447
} while (!cmp && --n_field);
490
449
if (UNIV_UNLIKELY(!cmp) && UNIV_LIKELY_NULL(dup)) {
491
/* Report a duplicate value error if the tuples are
492
logically equal. NULL columns are logically inequal,
493
although they are equal in the sorting order. Find
494
out if any of the fields are NULL. */
495
for (b = field; b != a; b++) {
496
if (dfield_is_null(b)) {
502
450
row_merge_dup_report(dup, field);
509
/** Wrapper for row_merge_tuple_sort() to inject some more context to
510
UT_SORT_FUNCTION_BODY().
511
@param a array of tuples that being sorted
512
@param b aux (work area), same size as tuples[]
513
@param c lower bound of the sorting area, inclusive
514
@param d upper bound of the sorting area, inclusive */
515
#define row_merge_tuple_sort_ctx(a,b,c,d) \
516
row_merge_tuple_sort(n_field, dup, a, b, c, d)
517
/** Wrapper for row_merge_tuple_cmp() to inject some more context to
518
UT_SORT_FUNCTION_BODY().
519
@param a first tuple to be compared
520
@param b second tuple to be compared
521
@return 1, 0, -1 if a is greater, equal, less, respectively, than b */
522
#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)
524
/**********************************************************************//**
456
/**************************************************************************
525
457
Merge sort the tuple buffer in main memory. */
528
460
row_merge_tuple_sort(
529
461
/*=================*/
530
ulint n_field,/*!< in: number of fields */
531
row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
532
const dfield_t** tuples, /*!< in/out: tuples */
533
const dfield_t** aux, /*!< in/out: work area */
534
ulint low, /*!< in: lower bound of the
462
ulint n_field,/* in: number of fields */
463
row_merge_dup_t* dup, /* in/out: for reporting duplicates */
464
const dfield_t** tuples, /* in/out: tuples */
465
const dfield_t** aux, /* in/out: work area */
466
ulint low, /* in: lower bound of the
535
467
sorting area, inclusive */
536
ulint high) /*!< in: upper bound of the
468
ulint high) /* in: upper bound of the
537
469
sorting area, exclusive */
471
#define row_merge_tuple_sort_ctx(a,b,c,d) \
472
row_merge_tuple_sort(n_field, dup, a, b, c, d)
473
#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)
539
475
UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,
540
476
tuples, aux, low, high, row_merge_tuple_cmp_ctx);
543
/******************************************************//**
479
/**********************************************************
544
480
Sort a buffer. */
547
483
row_merge_buf_sort(
548
484
/*===============*/
549
row_merge_buf_t* buf, /*!< in/out: sort buffer */
550
row_merge_dup_t* dup) /*!< in/out: for reporting duplicates */
485
row_merge_buf_t* buf, /* in/out: sort buffer */
486
row_merge_dup_t* dup) /* in/out: for reporting duplicates */
552
488
row_merge_tuple_sort(dict_index_get_n_unique(buf->index), dup,
553
489
buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);
556
/******************************************************//**
492
/**********************************************************
557
493
Write a buffer to a block. */
560
496
row_merge_buf_write(
561
497
/*================*/
562
const row_merge_buf_t* buf, /*!< in: sorted buffer */
498
const row_merge_buf_t* buf, /* in: sorted buffer */
563
499
#ifdef UNIV_DEBUG
564
const merge_file_t* of, /*!< in: output file */
500
const merge_file_t* of, /* in: output file */
565
501
#endif /* UNIV_DEBUG */
566
row_merge_block_t* block) /*!< out: buffer for writing to file */
502
row_merge_block_t* block) /* out: buffer for writing to file */
567
503
#ifndef UNIV_DEBUG
568
504
# define row_merge_buf_write(buf, of, block) row_merge_buf_write(buf, block)
569
505
#endif /* !UNIV_DEBUG */
632
568
#endif /* UNIV_DEBUG */
635
/******************************************************//**
636
Create a memory heap and allocate space for row_merge_rec_offsets()
638
@return memory heap */
571
/**********************************************************
572
Create a memory heap and allocate space for row_merge_rec_offsets(). */
641
575
row_merge_heap_create(
642
576
/*==================*/
643
const dict_index_t* index, /*!< in: record descriptor */
644
mrec_buf_t** buf, /*!< out: 3 buffers */
645
ulint** offsets1, /*!< out: offsets */
646
ulint** offsets2) /*!< out: offsets */
577
/* out: memory heap */
578
const dict_index_t* index, /* in: record descriptor */
579
ulint** offsets1, /* out: offsets */
580
ulint** offsets2) /* out: offsets */
648
582
ulint i = 1 + REC_OFFS_HEADER_SIZE
649
583
+ dict_index_get_n_fields(index);
650
mem_heap_t* heap = mem_heap_create(2 * i * sizeof **offsets1
584
mem_heap_t* heap = mem_heap_create(2 * i * sizeof *offsets1);
653
*buf = static_cast<mrec_buf_t*>(mem_heap_alloc(heap, 3 * sizeof **buf));
654
*offsets1 = static_cast<ulint*>(mem_heap_alloc(heap, i * sizeof **offsets1));
655
*offsets2 = static_cast<ulint*>(mem_heap_alloc(heap, i * sizeof **offsets2));
586
*offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
587
*offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
657
589
(*offsets1)[0] = (*offsets2)[0] = i;
658
590
(*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
692
/********************************************************************//**
693
Read a merge block from the file system.
694
@return TRUE if request was successful, FALSE if fail */
625
/************************************************************************
626
Read a merge block from the file system. */
699
int fd, /*!< in: file descriptor */
700
ulint offset, /*!< in: offset where to read
701
in number of row_merge_block_t
703
row_merge_block_t* buf) /*!< out: data */
631
/* out: TRUE if request was
632
successful, FALSE if fail */
633
int fd, /* in: file descriptor */
634
ulint offset, /* in: offset where to read */
635
row_merge_block_t* buf) /* out: data */
705
637
ib_uint64_t ofs = ((ib_uint64_t) offset) * sizeof *buf;
709
if (row_merge_print_block_read) {
710
fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
713
#endif /* UNIV_DEBUG */
715
640
success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf,
716
641
(ulint) (ofs & 0xFFFFFFFF),
717
642
(ulint) (ofs >> 32),
719
#ifdef POSIX_FADV_DONTNEED
720
/* Each block is read exactly once. Free up the file cache. */
721
posix_fadvise(fd, ofs, sizeof *buf, POSIX_FADV_DONTNEED);
722
#endif /* POSIX_FADV_DONTNEED */
724
644
if (UNIV_UNLIKELY(!success)) {
725
645
ut_print_timestamp(stderr);
730
650
return(UNIV_LIKELY(success));
733
/********************************************************************//**
734
Write a merge block to the file system.
735
@return TRUE if request was successful, FALSE if fail */
653
/************************************************************************
654
Read a merge block from the file system. */
740
int fd, /*!< in: file descriptor */
741
ulint offset, /*!< in: offset where to write,
742
in number of row_merge_block_t elements */
743
const void* buf) /*!< in: data */
659
/* out: TRUE if request was
660
successful, FALSE if fail */
661
int fd, /* in: file descriptor */
662
ulint offset, /* in: offset where to write */
663
const void* buf) /* in: data */
745
size_t buf_len = sizeof(row_merge_block_t);
746
ib_uint64_t ofs = buf_len * (ib_uint64_t) offset;
749
ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
750
(ulint) (ofs & 0xFFFFFFFF),
755
if (row_merge_print_block_write) {
756
fprintf(stderr, "row_merge_write fd=%d ofs=%lu\n",
759
#endif /* UNIV_DEBUG */
761
#ifdef POSIX_FADV_DONTNEED
762
/* The block will be needed on the next merge pass,
763
but it can be evicted from the file cache meanwhile. */
764
posix_fadvise(fd, ofs, buf_len, POSIX_FADV_DONTNEED);
765
#endif /* POSIX_FADV_DONTNEED */
767
return(UNIV_LIKELY(ret));
665
ib_uint64_t ofs = ((ib_uint64_t) offset)
666
* sizeof(row_merge_block_t);
668
return(UNIV_LIKELY(os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf,
669
(ulint) (ofs & 0xFFFFFFFF),
671
sizeof(row_merge_block_t))));
770
/********************************************************************//**
772
@return pointer to next record, or NULL on I/O error or end of list */
773
static __attribute__((nonnull))
674
/************************************************************************
675
Read a merge record. */
775
678
row_merge_read_rec(
776
679
/*===============*/
777
row_merge_block_t* block, /*!< in/out: file buffer */
778
mrec_buf_t* buf, /*!< in/out: secondary buffer */
779
const byte* b, /*!< in: pointer to record */
780
const dict_index_t* index, /*!< in: index of the record */
781
int fd, /*!< in: file descriptor */
782
ulint* foffs, /*!< in/out: file offset */
783
const mrec_t** mrec, /*!< out: pointer to merge record,
680
/* out: pointer to next record,
683
row_merge_block_t* block, /* in/out: file buffer */
684
mrec_buf_t* buf, /* in/out: secondary buffer */
685
const byte* b, /* in: pointer to record */
686
const dict_index_t* index, /* in: index of the record */
687
int fd, /* in: file descriptor */
688
ulint* foffs, /* in/out: file offset */
689
const mrec_t** mrec, /* out: pointer to merge record,
784
690
or NULL on end of list
785
691
(non-NULL on I/O error) */
786
ulint* offsets)/*!< out: offsets of mrec */
692
ulint* offsets)/* out: offsets of mrec */
788
694
ulint extra_size;
939
/********************************************************************//**
838
/************************************************************************
940
839
Write a merge record. */
943
842
row_merge_write_rec_low(
944
843
/*====================*/
945
byte* b, /*!< out: buffer */
946
ulint e, /*!< in: encoded extra_size */
844
byte* b, /* out: buffer */
845
ulint e, /* in: encoded extra_size */
947
846
#ifdef UNIV_DEBUG
948
ulint size, /*!< in: total size to write */
949
int fd, /*!< in: file descriptor */
950
ulint foffs, /*!< in: file offset */
847
ulint size, /* in: total size to write */
848
int fd, /* in: file descriptor */
849
ulint foffs, /* in: file offset */
951
850
#endif /* UNIV_DEBUG */
952
const mrec_t* mrec, /*!< in: record to write */
953
const ulint* offsets)/*!< in: offsets of mrec */
851
const mrec_t* mrec, /* in: record to write */
852
const ulint* offsets)/* in: offsets of mrec */
954
853
#ifndef UNIV_DEBUG
955
854
# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets) \
956
855
row_merge_write_rec_low(b, e, mrec, offsets)
1086
987
return(block[0]);
1089
/*************************************************************//**
1090
Compare two merge records.
1091
@return 1, 0, -1 if mrec1 is greater, equal, less, respectively, than mrec2 */
990
/*****************************************************************
991
Compare two merge records. */
1096
const mrec_t* mrec1, /*!< in: first merge
1097
record to be compared */
1098
const mrec_t* mrec2, /*!< in: second merge
1099
record to be compared */
1100
const ulint* offsets1, /*!< in: first record offsets */
1101
const ulint* offsets2, /*!< in: second record offsets */
1102
const dict_index_t* index, /*!< in: index */
1103
ibool* null_eq) /*!< out: set to TRUE if
1104
found matching null values */
997
mrec1 is greater, equal, less,
998
respectively, than mrec2 */
999
const mrec_t* mrec1, /* in: first merge
1000
record to be compared */
1001
const mrec_t* mrec2, /* in: second merge
1002
record to be compared */
1003
const ulint* offsets1, /* in: first record offsets */
1004
const ulint* offsets2, /* in: second record offsets */
1005
const dict_index_t* index) /* in: index */
1108
cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index,
1009
cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index);
1111
1011
#ifdef UNIV_DEBUG
1112
1012
if (row_merge_print_cmp) {
1124
/********************************************************************//**
1024
/************************************************************************
1125
1025
Reads clustered index of the table and create temporary files
1126
containing the index entries for the indexes to be built.
1127
@return DB_SUCCESS or error */
1128
static __attribute__((nonnull))
1026
containing the index entries for the indexes to be built. */
1130
1029
row_merge_read_clustered_index(
1131
1030
/*===========================*/
1132
trx_t* trx, /*!< in: transaction */
1133
TABLE* table, /*!< in/out: MySQL table object,
1031
/* out: DB_SUCCESS or error */
1032
trx_t* trx, /* in: transaction */
1033
TABLE* table, /* in/out: MySQL table object,
1134
1034
for reporting erroneous records */
1135
const dict_table_t* old_table,/*!< in: table where rows are
1035
const dict_table_t* old_table,/* in: table where rows are
1137
const dict_table_t* new_table,/*!< in: table where indexes are
1037
const dict_table_t* new_table,/* in: table where indexes are
1138
1038
created; identical to old_table
1139
1039
unless creating a PRIMARY KEY */
1140
dict_index_t** index, /*!< in: indexes to be created */
1141
merge_file_t* files, /*!< in: temporary files */
1142
ulint n_index,/*!< in: number of indexes to create */
1143
row_merge_block_t* block) /*!< in/out: file buffer */
1040
dict_index_t** index, /* in: indexes to be created */
1041
merge_file_t* files, /* in: temporary files */
1042
ulint n_index,/* in: number of indexes to create */
1043
row_merge_block_t* block) /* in/out: file buffer */
1145
1045
dict_index_t* clust_index; /* Clustered index */
1146
1046
mem_heap_t* row_heap; /* Heap memory to create
1382
/** Write a record via buffer 2 and read the next record to buffer N.
1383
@param N number of the buffer (0 or 1)
1384
@param AT_END statement to execute at end of input */
1270
/*****************************************************************
1271
Merge two blocks of linked lists on disk and write a bigger block. */
1276
/* out: DB_SUCCESS or error code */
1277
const dict_index_t* index, /* in: index being created */
1278
merge_file_t* file, /* in/out: file containing
1280
row_merge_block_t* block, /* in/out: 3 buffers */
1281
ulint* foffs0, /* in/out: offset of first
1282
source list in the file */
1283
ulint* foffs1, /* in/out: offset of second
1284
source list in the file */
1285
merge_file_t* of, /* in/out: output file */
1286
TABLE* table) /* in/out: MySQL table, for
1287
reporting erroneous key value
1290
mem_heap_t* heap; /* memory heap for offsets0, offsets1 */
1292
mrec_buf_t buf[3]; /* buffer for handling split mrec in block[] */
1293
const byte* b0; /* pointer to block[0] */
1294
const byte* b1; /* pointer to block[1] */
1295
byte* b2; /* pointer to block[2] */
1296
const mrec_t* mrec0; /* merge rec, points to block[0] or buf[0] */
1297
const mrec_t* mrec1; /* merge rec, points to block[1] or buf[1] */
1298
ulint* offsets0;/* offsets of mrec0 */
1299
ulint* offsets1;/* offsets of mrec1 */
1301
heap = row_merge_heap_create(index, &offsets0, &offsets1);
1303
/* Write a record and read the next record. Split the output
1304
file in two halves, which can be merged on the following pass. */
1385
1305
#define ROW_MERGE_WRITE_GET_NEXT(N, AT_END) \
1387
1307
b2 = row_merge_write_rec(&block[2], &buf[2], b2, \
1388
1308
of->fd, &of->offset, \
1389
1309
mrec##N, offsets##N); \
1390
if (UNIV_UNLIKELY(!b2 || ++of->n_rec > file->n_rec)) { \
1310
if (UNIV_UNLIKELY(!b2)) { \
1391
1311
goto corrupt; \
1393
1313
b##N = row_merge_read_rec(&block[N], &buf[N], \
1405
/*************************************************************//**
1406
Merge two blocks of records on disk and write a bigger block.
1407
@return DB_SUCCESS or error code */
1412
const dict_index_t* index, /*!< in: index being created */
1413
const merge_file_t* file, /*!< in: file containing
1415
row_merge_block_t* block, /*!< in/out: 3 buffers */
1416
ulint* foffs0, /*!< in/out: offset of first
1417
source list in the file */
1418
ulint* foffs1, /*!< in/out: offset of second
1419
source list in the file */
1420
merge_file_t* of, /*!< in/out: output file */
1421
TABLE* table) /*!< in/out: MySQL table, for
1422
reporting erroneous key value
1425
mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
1427
mrec_buf_t* buf; /*!< buffer for handling
1428
split mrec in block[] */
1429
const byte* b0; /*!< pointer to block[0] */
1430
const byte* b1; /*!< pointer to block[1] */
1431
byte* b2; /*!< pointer to block[2] */
1432
const mrec_t* mrec0; /*!< merge rec, points to block[0] or buf[0] */
1433
const mrec_t* mrec1; /*!< merge rec, points to block[1] or buf[1] */
1434
ulint* offsets0;/* offsets of mrec0 */
1435
ulint* offsets1;/* offsets of mrec1 */
1438
if (row_merge_print_block) {
1440
"row_merge_blocks fd=%d ofs=%lu + fd=%d ofs=%lu"
1441
" = fd=%d ofs=%lu\n",
1442
file->fd, (ulong) *foffs0,
1443
file->fd, (ulong) *foffs1,
1444
of->fd, (ulong) of->offset);
1446
#endif /* UNIV_DEBUG */
1448
heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
1450
buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3));
1452
/* Write a record and read the next record. Split the output
1453
file in two halves, which can be merged on the following pass. */
1455
1325
if (!row_merge_read(file->fd, *foffs0, &block[0])
1456
1326
|| !row_merge_read(file->fd, *foffs1, &block[1])) {
1520
1388
return(b2 ? DB_SUCCESS : DB_CORRUPTION);
1523
/*************************************************************//**
1524
Copy a block of index entries.
1525
@return TRUE on success, FALSE on failure */
1526
static __attribute__((nonnull))
1528
row_merge_blocks_copy(
1529
/*==================*/
1530
const dict_index_t* index, /*!< in: index being created */
1531
const merge_file_t* file, /*!< in: input file */
1532
row_merge_block_t* block, /*!< in/out: 3 buffers */
1533
ulint* foffs0, /*!< in/out: input file offset */
1534
merge_file_t* of) /*!< in/out: output file */
1536
mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
1538
mrec_buf_t* buf; /*!< buffer for handling
1539
split mrec in block[] */
1540
const byte* b0; /*!< pointer to block[0] */
1541
byte* b2; /*!< pointer to block[2] */
1542
const mrec_t* mrec0; /*!< merge rec, points to block[0] */
1543
ulint* offsets0;/* offsets of mrec0 */
1544
ulint* offsets1;/* dummy offsets */
1547
if (row_merge_print_block) {
1549
"row_merge_blocks_copy fd=%d ofs=%lu"
1550
" = fd=%d ofs=%lu\n",
1551
file->fd, (ulong) foffs0,
1552
of->fd, (ulong) of->offset);
1554
#endif /* UNIV_DEBUG */
1556
heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
1557
buf = static_cast<mrec_buf_t *>(mem_heap_alloc(heap, sizeof(mrec_buf_t) * 3));
1559
/* Write a record and read the next record. Split the output
1560
file in two halves, which can be merged on the following pass. */
1562
if (!row_merge_read(file->fd, *foffs0, &block[0])) {
1564
mem_heap_free(heap);
1571
b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
1572
foffs0, &mrec0, offsets0);
1573
if (UNIV_UNLIKELY(!b0 && mrec0)) {
1579
/* append all mrec0 to output */
1581
ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
1586
/* The file offset points to the beginning of the last page
1587
that has been read. Update it to point to the next block. */
1590
mem_heap_free(heap);
1591
return(row_merge_write_eof(&block[2], b2, of->fd, &of->offset)
1595
/*************************************************************//**
1597
@return DB_SUCCESS or error code */
1598
static __attribute__((nonnull))
1391
/*****************************************************************
1392
Merge disk files. */
1602
trx_t* trx, /*!< in: transaction */
1603
const dict_index_t* index, /*!< in: index being created */
1604
merge_file_t* file, /*!< in/out: file containing
1397
/* out: DB_SUCCESS or error code */
1398
const dict_index_t* index, /* in: index being created */
1399
merge_file_t* file, /* in/out: file containing
1605
1400
index entries */
1606
row_merge_block_t* block, /*!< in/out: 3 buffers */
1607
int* tmpfd, /*!< in/out: temporary file handle */
1608
TABLE* table, /*!< in/out: MySQL table, for
1609
reporting erroneous key value
1611
ulint* num_run,/*!< in/out: Number of runs remain
1613
ulint* run_offset) /*!< in/out: Array contains the
1614
first offset number for each merge
1401
ulint half, /* in: half the file */
1402
row_merge_block_t* block, /* in/out: 3 buffers */
1403
int* tmpfd, /* in/out: temporary file handle */
1404
TABLE* table) /* in/out: MySQL table, for
1405
reporting erroneous key value
1617
ulint foffs0; /*!< first input offset */
1618
ulint foffs1; /*!< second input offset */
1619
ulint error; /*!< error code */
1620
merge_file_t of; /*!< output file */
1621
const ulint ihalf = run_offset[*num_run / 2];
1622
/*!< half the input file */
1624
/*!< num of runs generated from this merge */
1408
ulint foffs0; /* first input offset */
1409
ulint foffs1; /* second input offset */
1410
ulint error; /* error code */
1411
merge_file_t of; /* output file */
1627
1413
UNIV_MEM_ASSERT_W(block[0], 3 * sizeof block[0]);
1628
ut_ad(ihalf < file->offset);
1630
1416
of.fd = *tmpfd;
1634
#ifdef POSIX_FADV_SEQUENTIAL
1635
/* The input file will be read sequentially, starting from the
1636
beginning and the middle. In Linux, the POSIX_FADV_SEQUENTIAL
1637
affects the entire file. Each block will be read exactly once. */
1638
posix_fadvise(file->fd, 0, 0,
1639
POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE);
1640
#endif /* POSIX_FADV_SEQUENTIAL */
1642
1419
/* Merge blocks to the output file. */
1646
UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset);
1648
for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
1650
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1651
return(DB_INTERRUPTED);
1654
/* Remember the offset number for this run */
1655
run_offset[n_run++] = of.offset;
1423
for (; foffs0 < half && foffs1 < file->offset; foffs0++, foffs1++) {
1657
1424
error = row_merge_blocks(index, file, block,
1658
1425
&foffs0, &foffs1, &of, table);
1660
1427
if (error != DB_SUCCESS) {
1666
/* Copy the last blocks, if there are any. */
1668
while (foffs0 < ihalf) {
1669
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1670
return(DB_INTERRUPTED);
1673
/* Remember the offset number for this run */
1674
run_offset[n_run++] = of.offset;
1676
if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) {
1432
/* Copy the last block, if there is one. */
1433
while (foffs0 < half) {
1434
if (!row_merge_read(file->fd, foffs0++, block)
1435
|| !row_merge_write(of.fd, of.offset++, block)) {
1677
1436
return(DB_CORRUPTION);
1681
ut_ad(foffs0 == ihalf);
1683
1439
while (foffs1 < file->offset) {
1684
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
1685
return(DB_INTERRUPTED);
1688
/* Remember the offset number for this run */
1689
run_offset[n_run++] = of.offset;
1691
if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) {
1440
if (!row_merge_read(file->fd, foffs1++, block)
1441
|| !row_merge_write(of.fd, of.offset++, block)) {
1692
1442
return(DB_CORRUPTION);
1696
ut_ad(foffs1 == file->offset);
1698
if (UNIV_UNLIKELY(of.n_rec != file->n_rec)) {
1699
return(DB_CORRUPTION);
1702
ut_ad(n_run <= *num_run);
1706
/* Each run can contain one or more offsets. As merge goes on,
1707
the number of runs (to merge) will reduce until we have one
1708
single run. So the number of runs will always be smaller than
1709
the number of offsets in file */
1710
ut_ad((*num_run) <= file->offset);
1712
/* The number of offsets in output file is always equal or
1713
smaller than input file */
1714
ut_ad(of.offset <= file->offset);
1716
1446
/* Swap file descriptors for the next pass. */
1717
1447
*tmpfd = file->fd;
1722
1452
return(DB_SUCCESS);
1725
/*************************************************************//**
1727
@return DB_SUCCESS or error code */
1455
/*****************************************************************
1456
Merge disk files. */
1730
1459
row_merge_sort(
1731
1460
/*===========*/
1732
trx_t* trx, /*!< in: transaction */
1733
const dict_index_t* index, /*!< in: index being created */
1734
merge_file_t* file, /*!< in/out: file containing
1461
/* out: DB_SUCCESS or error code */
1462
const dict_index_t* index, /* in: index being created */
1463
merge_file_t* file, /* in/out: file containing
1735
1464
index entries */
1736
row_merge_block_t* block, /*!< in/out: 3 buffers */
1737
int* tmpfd, /*!< in/out: temporary file handle */
1738
TABLE* table) /*!< in/out: MySQL table, for
1465
row_merge_block_t* block, /* in/out: 3 buffers */
1466
int* tmpfd, /* in/out: temporary file handle */
1467
TABLE* table) /* in/out: MySQL table, for
1739
1468
reporting erroneous key value
1740
1469
if applicable */
1742
ulint half = file->offset / 2;
1745
ulint error = DB_SUCCESS;
1747
/* Record the number of merge runs we need to perform */
1748
num_runs = file->offset;
1750
/* If num_runs are less than 1, nothing to merge */
1751
if (num_runs <= 1) {
1755
/* "run_offset" records each run's first offset number */
1756
run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint));
1758
/* This tells row_merge() where to start for the first round
1760
run_offset[half] = half;
1762
/* The file should always contain at least one byte (the end
1763
of file marker). Thus, it must be at least one block. */
1764
ut_ad(file->offset > 0);
1766
/* Merge the runs until we have one big run */
1768
error = row_merge(trx, index, file, block, tmpfd,
1769
table, &num_runs, run_offset);
1771
UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
1471
ulint blksz; /* block size */
1473
for (blksz = 1; blksz < file->offset; blksz *= 2) {
1477
ut_ad(ut_is_2pow(blksz));
1478
half = ut_2pow_round((file->offset + (blksz - 1)) / 2, blksz);
1479
error = row_merge(index, file, half, block, tmpfd, table);
1773
1481
if (error != DB_SUCCESS) {
1776
} while (num_runs > 1);
1778
mem_free(run_offset);
1783
/*************************************************************//**
1489
/*****************************************************************
1784
1490
Copy externally stored columns to the data tuple. */
1787
1493
row_merge_copy_blobs(
1788
1494
/*=================*/
1789
const mrec_t* mrec, /*!< in: merge record */
1790
const ulint* offsets,/*!< in: offsets of mrec */
1791
ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
1792
dtuple_t* tuple, /*!< in/out: data tuple */
1793
mem_heap_t* heap) /*!< in/out: memory heap */
1495
const mrec_t* mrec, /* in: merge record */
1496
const ulint* offsets,/* in: offsets of mrec */
1497
ulint zip_size,/* in: compressed page size in bytes, or 0 */
1498
dtuple_t* tuple, /* in/out: data tuple */
1499
mem_heap_t* heap) /* in/out: memory heap */
1796
1502
ulint n_fields = dtuple_get_n_fields(tuple);
1814
1520
data = btr_rec_copy_externally_stored_field(
1815
1521
mrec, offsets, zip_size, i, &len, heap);
1816
/* Because we have locked the table, any records
1817
written by incomplete transactions must have been
1818
rolled back already. There must not be any incomplete
1822
1523
dfield_set_data(field, data, len);
1826
/********************************************************************//**
1527
/************************************************************************
1827
1528
Read sorted file containing index data tuples and insert these data
1829
@return DB_SUCCESS or error number */
1529
tuples to the index */
1832
1532
row_merge_insert_index_tuples(
1833
1533
/*==========================*/
1834
trx_t* trx, /*!< in: transaction */
1835
dict_index_t* index, /*!< in: index */
1836
dict_table_t* table, /*!< in: new table */
1837
ulint zip_size,/*!< in: compressed page size of
1534
/* out: DB_SUCCESS or error number */
1535
trx_t* trx, /* in: transaction */
1536
dict_index_t* index, /* in: index */
1537
dict_table_t* table, /* in: new table */
1538
ulint zip_size,/* in: compressed page size of
1838
1539
the old table, or 0 if uncompressed */
1839
int fd, /*!< in: file descriptor */
1840
row_merge_block_t* block) /*!< in/out: file buffer */
1540
int fd, /* in: file descriptor */
1541
row_merge_block_t* block) /* in/out: file buffer */
1843
1545
que_thr_t* thr;
1844
1546
ins_node_t* node;
2111
1805
/*=============================*/
2117
/* Load the table definitions that contain partially defined
2118
indexes, so that the data dictionary information can be checked
2119
when accessing the tablename.ibd files. */
1810
/* We use the private SQL parser of Innobase to generate the
1811
query graphs needed in deleting the dictionary data from system
1812
tables in Innobase. Deleting a row from SYS_INDEXES table also
1813
frees the file segments of the B-tree associated with the index. */
1814
#if TEMP_INDEX_PREFIX != '\377'
1815
# error "TEMP_INDEX_PREFIX != '\377'"
1817
static const char drop_temp_indexes[] =
1818
"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
1820
"DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
1821
"WHERE SUBSTR(NAME,0,1)='\377' FOR UPDATE;\n"
1825
"\t\tFETCH c INTO indexid;\n"
1826
"\t\tIF (SQL % NOTFOUND) THEN\n"
1829
"\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
1830
"\t\tDELETE FROM SYS_INDEXES WHERE CURRENT OF c;\n"
2121
1836
trx = trx_allocate_for_background();
2122
1837
trx->op_info = "dropping partially created indexes";
2123
1838
row_mysql_lock_data_dictionary(trx);
2127
btr_pcur_open_at_index_side(
2129
dict_table_get_first_index(dict_sys->sys_indexes),
2130
BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
2136
table_id_t table_id;
2137
dict_table_t* table;
2139
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
2141
if (!btr_pcur_is_on_user_rec(&pcur)) {
2145
rec = btr_pcur_get_rec(&pcur);
2146
field = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_NAME_FIELD,
2148
if (len == UNIV_SQL_NULL || len == 0
2149
|| (char) *field != TEMP_INDEX_PREFIX) {
2153
/* This is a temporary index. */
2155
field = rec_get_nth_field_old(rec, 0/*TABLE_ID*/, &len);
2157
/* Corrupted TABLE_ID */
2161
table_id = mach_read_from_8(field);
2163
btr_pcur_store_position(&pcur, &mtr);
2164
btr_pcur_commit_specify_mtr(&pcur, &mtr);
2166
table = dict_table_get_on_id_low(table_id);
2169
dict_index_t* index;
2170
dict_index_t* next_index;
2172
for (index = dict_table_get_first_index(table);
2173
index; index = next_index) {
2175
next_index = dict_table_get_next_index(index);
2177
if (*index->name == TEMP_INDEX_PREFIX) {
2178
row_merge_drop_index(index, table, trx);
2179
trx_commit_for_mysql(trx);
2185
btr_pcur_restore_position(BTR_SEARCH_LEAF,
2189
btr_pcur_close(&pcur);
1840
err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx);
1841
ut_a(err == DB_SUCCESS);
2191
1843
row_mysql_unlock_data_dictionary(trx);
2192
1844
trx_free_for_background(trx);
2195
/*********************************************************************//**
1847
/*************************************************************************
2196
1848
Create a merge file. */
2199
1851
row_merge_file_create(
2200
1852
/*==================*/
2201
merge_file_t* merge_file) /*!< out: merge file structure */
1853
merge_file_t* merge_file) /* out: merge file structure */
2204
/* This temp file open does not go through normal
2205
file APIs, add instrumentation to register with
2206
performance schema */
2207
struct PSI_file_locker* locker = NULL;
2208
PSI_file_locker_state state;
2209
register_pfs_file_open_begin(&state, locker, innodb_file_temp_key,
2211
"Innodb Merge Temp File",
2212
__FILE__, __LINE__);
2214
1855
merge_file->fd = innobase_mysql_tmpfile();
2215
1856
merge_file->offset = 0;
2216
merge_file->n_rec = 0;
2218
register_pfs_file_open_end(locker, merge_file->fd);
2222
/*********************************************************************//**
1859
/*************************************************************************
2223
1860
Destroy a merge file. */
2226
1863
row_merge_file_destroy(
2227
1864
/*===================*/
2228
merge_file_t* merge_file) /*!< out: merge file structure */
1865
merge_file_t* merge_file) /* out: merge file structure */
2231
struct PSI_file_locker* locker = NULL;
2232
PSI_file_locker_state state;
2233
register_pfs_file_io_begin(&state, locker, merge_file->fd, 0, PSI_FILE_CLOSE,
2234
__FILE__, __LINE__);
2236
1867
if (merge_file->fd != -1) {
2237
1868
close(merge_file->fd);
2238
1869
merge_file->fd = -1;
2242
register_pfs_file_io_end(locker, 0);
2246
/*********************************************************************//**
1873
/*************************************************************************
2247
1874
Determine the precise type of a column that is added to a tem
2248
if a column must be constrained NOT NULL.
2249
@return col->prtype, possibly ORed with DATA_NOT_NULL */
1875
if a column must be constrained NOT NULL. */
2252
1878
row_merge_col_prtype(
2253
1879
/*=================*/
2254
const dict_col_t* col, /*!< in: column */
2255
const char* col_name, /*!< in: name of the column */
2256
const merge_index_def_t*index_def) /*!< in: the index definition
1880
/* out: col->prtype, possibly
1881
ORed with DATA_NOT_NULL */
1882
const dict_col_t* col, /* in: column */
1883
const char* col_name, /* in: name of the column */
1884
const merge_index_def_t*index_def) /* in: the index definition
2257
1885
of the primary key */
2259
1887
ulint prtype = col->prtype;
2572
/*********************************************************************//**
2185
#ifdef ROW_MERGE_IS_INDEX_USABLE
2186
/*************************************************************************
2573
2187
Check if a transaction can use an index. */
2576
2190
row_merge_is_index_usable(
2577
2191
/*======================*/
2578
const trx_t* trx, /*!< in: transaction */
2579
const dict_index_t* index) /*!< in: index to check */
2192
const trx_t* trx, /* in: transaction */
2193
const dict_index_t* index) /* in: index to check */
2581
return(!trx->read_view
2582
|| read_view_sees_trx_id(trx->read_view, index->trx_id));
2195
if (!trx->read_view) {
2199
return(ut_dulint_cmp(index->trx_id, trx->read_view->low_limit_id) < 0);
2201
#endif /* ROW_MERGE_IS_INDEX_USABLE */
2585
/*********************************************************************//**
2587
@return DB_SUCCESS or error code */
2203
/*************************************************************************
2204
Drop the old table. */
2590
2207
row_merge_drop_table(
2591
2208
/*=================*/
2592
trx_t* trx, /*!< in: transaction */
2593
dict_table_t* table) /*!< in: table to drop */
2209
/* out: DB_SUCCESS or error code */
2210
trx_t* trx, /* in: transaction */
2211
dict_table_t* table) /* in: table to drop */
2595
2213
/* There must be no open transactions on the table. */
2596
2214
ut_a(table->n_mysql_handles_opened == 0);
2598
2216
return(row_drop_table_for_mysql(table->name, trx, FALSE));
2601
/*********************************************************************//**
2219
/*************************************************************************
2602
2220
Build indexes on a table by reading a clustered index,
2603
2221
creating a temporary file containing index entries, merge sorting
2604
these index entries and inserting sorted index entries to indexes.
2605
@return DB_SUCCESS or error code */
2222
these index entries and inserting sorted index entries to indexes. */
2608
2225
row_merge_build_indexes(
2609
2226
/*====================*/
2610
trx_t* trx, /*!< in: transaction */
2611
dict_table_t* old_table, /*!< in: table where rows are
2227
/* out: DB_SUCCESS or error code */
2228
trx_t* trx, /* in: transaction */
2229
dict_table_t* old_table, /* in: table where rows are
2613
dict_table_t* new_table, /*!< in: table where indexes are
2231
dict_table_t* new_table, /* in: table where indexes are
2614
2232
created; identical to old_table
2615
2233
unless creating a PRIMARY KEY */
2616
dict_index_t** indexes, /*!< in: indexes to be created */
2617
ulint n_indexes, /*!< in: size of indexes[] */
2618
TABLE* table) /*!< in/out: MySQL table, for
2234
dict_index_t** indexes, /* in: indexes to be created */
2235
ulint n_indexes, /* in: size of indexes[] */
2236
TABLE* table) /* in/out: MySQL table, for
2619
2237
reporting erroneous key value
2620
2238
if applicable */