803
ibuf_bitmap_get_map_page_func(
804
/*==========================*/
805
ulint space, /*!< in: space id of the file page */
806
ulint page_no,/*!< in: page number of the file page */
807
ulint zip_size,/*!< in: compressed page size in bytes;
808
0 for uncompressed pages */
809
const char* file, /*!< in: file name */
810
ulint line, /*!< in: line where called */
811
mtr_t* mtr) /*!< in: mtr */
712
ibuf_bitmap_get_map_page(
713
/*=====================*/
714
ulint space, /*!< in: space id of the file page */
715
ulint page_no,/*!< in: page number of the file page */
716
ulint zip_size,/*!< in: compressed page size in bytes;
717
0 for uncompressed pages */
718
mtr_t* mtr) /*!< in: mtr */
813
720
buf_block_t* block;
815
block = buf_page_get_gen(space, zip_size,
816
ibuf_bitmap_page_no_calc(zip_size, page_no),
817
RW_X_LATCH, NULL, BUF_GET,
722
block = buf_page_get(space, zip_size,
723
ibuf_bitmap_page_no_calc(zip_size, page_no),
819
725
buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
821
727
return(buf_block_get_frame(block));
824
/********************************************************************//**
825
Gets the ibuf bitmap page where the bits describing a given file page are
827
@return bitmap page where the file page is mapped, that is, the bitmap
828
page containing the descriptor bits for the file page; the bitmap page
830
@param space in: space id of the file page
831
@param page_no in: page number of the file page
832
@param zip_size in: compressed page size in bytes; 0 for uncompressed pages
833
@param mtr in: mini-transaction */
834
#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \
835
ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \
836
__FILE__, __LINE__, mtr)
838
730
/************************************************************************//**
839
731
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
840
732
mini-transaction, hence this operation does not restrict further work to only
1223
/****************************************************************//**
1224
Get various information about an ibuf record in >= 4.1.x format. */
1229
const rec_t* rec, /*!< in: ibuf record */
1230
ibuf_op_t* op, /*!< out: operation type, or NULL */
1231
ibool* comp, /*!< out: compact flag, or NULL */
1232
ulint* info_len, /*!< out: length of info fields at the
1233
start of the fourth field, or
1235
ulint* counter) /*!< in: counter value, or NULL */
1241
/* Local variables to shadow arguments. */
1244
ulint info_len_local;
1245
ulint counter_local;
1247
ut_ad(ibuf_inside());
1248
fields = rec_get_n_fields_old(rec);
1251
types = rec_get_nth_field_old(rec, 3, &len);
1253
info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1255
switch (info_len_local) {
1258
op_local = IBUF_OP_INSERT;
1259
comp_local = info_len_local;
1261
counter_local = ULINT_UNDEFINED;
1264
case IBUF_REC_INFO_SIZE:
1265
op_local = (ibuf_op_t)types[IBUF_REC_OFFSET_TYPE];
1266
comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT;
1267
counter_local = mach_read_from_2(
1268
types + IBUF_REC_OFFSET_COUNTER);
1275
ut_a(op_local < IBUF_OP_COUNT);
1276
ut_a((len - info_len_local) ==
1277
(fields - 4) * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1288
*info_len = info_len_local;
1292
*counter = counter_local;
1296
/****************************************************************//**
1297
Returns the operation type field of an ibuf record.
1298
@return operation type */
1301
ibuf_rec_get_op_type(
1302
/*=================*/
1303
const rec_t* rec) /*!< in: ibuf record */
1307
ut_ad(ibuf_inside());
1308
ut_ad(rec_get_n_fields_old(rec) > 2);
1310
(void) rec_get_nth_field_old(rec, 1, &len);
1313
/* This is a < 4.1.x format record */
1315
return(IBUF_OP_INSERT);
1319
ibuf_rec_get_info(rec, &op, NULL, NULL, NULL);
1325
/****************************************************************//**
1326
Read the first two bytes from a record's fourth field (counter field in new
1327
records; something else in older records).
1328
@return "counter" field, or ULINT_UNDEFINED if for some reason it
1332
ibuf_rec_get_counter(
1333
/*=================*/
1334
const rec_t* rec) /*!< in: ibuf record */
1339
if (rec_get_n_fields_old(rec) < 4) {
1341
return(ULINT_UNDEFINED);
1344
ptr = rec_get_nth_field_old(rec, 3, &len);
1348
return(mach_read_from_2(ptr));
1351
return(ULINT_UNDEFINED);
1355
/****************************************************************//**
1356
Add accumulated operation counts to a permanent array. Both arrays must be
1357
of size IBUF_OP_COUNT. */
1362
ulint* arr, /*!< in/out: array to modify */
1363
const ulint* ops) /*!< in: operation counts */
1368
#ifndef HAVE_ATOMIC_BUILTINS
1369
ut_ad(mutex_own(&ibuf_mutex));
1370
#endif /* !HAVE_ATOMIC_BUILTINS */
1372
for (i = 0; i < IBUF_OP_COUNT; i++) {
1373
#ifdef HAVE_ATOMIC_BUILTINS
1374
os_atomic_increment_ulint(&arr[i], ops[i]);
1375
#else /* HAVE_ATOMIC_BUILTINS */
1377
#endif /* HAVE_ATOMIC_BUILTINS */
1381
/****************************************************************//**
1382
Print operation counts. The array must be of size IBUF_OP_COUNT. */
1387
const ulint* ops, /*!< in: operation counts */
1388
FILE* file) /*!< in: file where to print */
1390
static const char* op_names[] = {
1397
ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT);
1399
for (i = 0; i < IBUF_OP_COUNT; i++) {
1400
fprintf(file, "%s %lu%s", op_names[i],
1401
(ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
1407
1115
/********************************************************************//**
1408
1116
Creates a dummy index for inserting a record to a non-clustered index.
1409
1118
@return dummy index */
1699
1351
/* >= 4.1.x format record */
1703
1353
ut_a(trx_sys_multiple_tablespace_format);
1704
1354
ut_a(*data == 0);
1706
1356
types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1708
ibuf_rec_get_info(ibuf_rec, &op, &comp, &info_len, NULL);
1710
if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
1711
/* Delete-marking a record doesn't take any
1712
additional space, and while deleting a record
1713
actually frees up space, we have to play it safe and
1714
pretend it takes no additional space (the record
1715
might not exist, etc.). */
1358
comp = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1362
/* compact record format */
1721
1364
dict_index_t* dummy_index;
1722
1365
mem_heap_t* heap = mem_heap_create(500);
1724
entry = ibuf_build_entry_from_ibuf_rec(
1366
dtuple_t* entry = ibuf_build_entry_from_ibuf_rec(
1725
1367
ibuf_rec, heap, &dummy_index);
1727
1368
volume = rec_get_converted_size(dummy_index, entry, 0);
1729
1369
ibuf_dummy_index_free(dummy_index);
1730
1370
mem_heap_free(heap);
1732
1371
return(volume + page_dir_calc_reserved_space(1));
1736
1374
n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1739
data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, pre_4_1, comp);
1379
for (i = 0; i < n_fields; i++) {
1381
data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1383
dtype_new_read_for_order_and_null_size(
1385
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1387
data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1389
dtype_read_for_order_and_null_size(
1391
* DATA_ORDER_NULL_TYPE_BUF_SIZE);
1394
if (len == UNIV_SQL_NULL) {
1395
data_size += dtype_get_sql_null_size(&dtype, comp);
1741
1401
return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
1742
1402
+ page_dir_calc_reserved_space(1));
2640
2260
ulint sum_sizes;
2644
/* Perform dirty reads of ibuf->size and ibuf->max_size, to
2645
reduce ibuf_mutex contention. ibuf->max_size remains constant
2646
after ibuf_init_at_db_start(), but ibuf->size should be
2647
protected by ibuf_mutex. Given that ibuf->size fits in a
2648
machine word, this should be OK; at worst we are doing some
2649
excessive ibuf_contract() or occasionally skipping a
2652
max_size = ibuf->max_size;
2654
if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
2263
mutex_enter(&ibuf_mutex);
2265
if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
2266
mutex_exit(&ibuf_mutex);
2658
sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
2273
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
2278
mutex_exit(&ibuf_mutex);
2660
2280
/* Contract at least entry_size many bytes */
2284
while ((size > 0) && (sum_sizes < entry_size)) {
2666
2286
size = ibuf_contract(sync);
2667
2287
sum_sizes += size;
2668
} while (size > 0 && sum_sizes < entry_size);
2671
/*********************************************************************//**
2672
Determine if an insert buffer record has been encountered already.
2673
@return TRUE if a new record, FALSE if possible duplicate */
2676
ibuf_get_volume_buffered_hash(
2677
/*==========================*/
2678
const rec_t* rec, /*!< in: ibuf record in post-4.1 format */
2679
const byte* types, /*!< in: fields */
2680
const byte* data, /*!< in: start of user record data */
2681
ulint comp, /*!< in: 0=ROW_FORMAT=REDUNDANT,
2682
nonzero=ROW_FORMAT=COMPACT */
2683
ulint* hash, /*!< in/out: hash array */
2684
ulint size) /*!< in: number of elements in hash array */
2690
len = ibuf_rec_get_size(rec, types, rec_get_n_fields_old(rec) - 4,
2692
fold = ut_fold_binary(data, len);
2694
hash += (fold / (8 * sizeof *hash)) % size; // 8 = bits in byte
2695
bitmask = 1 << (fold % (8 * sizeof *hash));
2697
if (*hash & bitmask) {
2702
/* We have not seen this record yet. Insert it. */
2708
/*********************************************************************//**
2709
Update the estimate of the number of records on a page, and
2710
get the space taken by merging the buffered record to the index page.
2711
@return size of index record in bytes + an upper limit of the space
2712
taken in the page directory */
2715
ibuf_get_volume_buffered_count(
2716
/*===========================*/
2717
const rec_t* rec, /*!< in: insert buffer record */
2718
ulint* hash, /*!< in/out: hash array */
2719
ulint size, /*!< in: number of elements in hash array */
2720
lint* n_recs) /*!< in/out: estimated number of records
2721
on the page that rec points to */
2726
ulint n_fields = rec_get_n_fields_old(rec);
2728
ut_ad(ibuf_inside());
2729
ut_ad(n_fields > 4);
2732
rec_get_nth_field_offs_old(rec, 1, &len);
2733
/* This function is only invoked when buffering new
2734
operations. All pre-4.1 records should have been merged
2735
when the database was started up. */
2737
ut_ad(trx_sys_multiple_tablespace_format);
2739
types = rec_get_nth_field_old(rec, 3, &len);
2741
switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
2742
IBUF_REC_INFO_SIZE)) {
2746
/* This ROW_TYPE=REDUNDANT record does not include an
2747
operation counter. Exclude it from the *n_recs,
2748
because deletes cannot be buffered if there are
2749
old-style inserts buffered for the page. */
2751
len = ibuf_rec_get_size(rec, types, n_fields, FALSE, 0);
2754
+ rec_get_converted_extra_size(len, n_fields, 0)
2755
+ page_dir_calc_reserved_space(1));
2757
/* This ROW_TYPE=COMPACT record does not include an
2758
operation counter. Exclude it from the *n_recs,
2759
because deletes cannot be buffered if there are
2760
old-style inserts buffered for the page. */
2761
goto get_volume_comp;
2763
case IBUF_REC_INFO_SIZE:
2764
ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
2769
case IBUF_OP_INSERT:
2770
/* Inserts can be done by updating a delete-marked record.
2771
Because delete-mark and insert operations can be pointing to
2772
the same records, we must not count duplicates. */
2773
case IBUF_OP_DELETE_MARK:
2774
/* There must be a record to delete-mark.
2775
See if this record has been already buffered. */
2776
if (n_recs && ibuf_get_volume_buffered_hash(
2777
rec, types + IBUF_REC_INFO_SIZE,
2779
types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT,
2784
if (ibuf_op == IBUF_OP_DELETE_MARK) {
2785
/* Setting the delete-mark flag does not
2786
affect the available space on the page. */
2790
case IBUF_OP_DELETE:
2791
/* A record will be removed from the page. */
2795
/* While deleting a record actually frees up space,
2796
we have to play it safe and pretend that it takes no
2797
additional space (the record might not exist, etc.). */
2803
ut_ad(ibuf_op == IBUF_OP_INSERT);
2809
dict_index_t* dummy_index;
2810
mem_heap_t* heap = mem_heap_create(500);
2812
entry = ibuf_build_entry_from_ibuf_rec(
2813
rec, heap, &dummy_index);
2815
volume = rec_get_converted_size(dummy_index, entry, 0);
2817
ibuf_dummy_index_free(dummy_index);
2818
mem_heap_free(heap);
2820
return(volume + page_dir_calc_reserved_space(1));
3072
2519
fil_set_max_space_id_if_bigger(max_space_id);
3075
/****************************************************************//**
3076
Helper function for ibuf_set_entry_counter. Checks if rec is for (space,
3077
page_no), and if so, reads counter value from it and returns that + 1.
3078
Otherwise, returns 0.
3079
@return new counter value, or 0 */
3082
ibuf_get_entry_counter_low(
3083
/*=======================*/
3084
const rec_t* rec, /*!< in: insert buffer record */
3085
ulint space, /*!< in: space id */
3086
ulint page_no) /*!< in: page number */
3092
ut_ad(ibuf_inside());
3093
ut_ad(rec_get_n_fields_old(rec) > 2);
3095
field = rec_get_nth_field_old(rec, 1, &len);
3097
if (UNIV_UNLIKELY(len != 1)) {
3098
/* pre-4.1 format */
3099
ut_a(trx_doublewrite_must_reset_space_ids);
3100
ut_a(!trx_sys_multiple_tablespace_format);
3102
return(ULINT_UNDEFINED);
3105
ut_a(trx_sys_multiple_tablespace_format);
3107
/* Check the tablespace identifier. */
3108
field = rec_get_nth_field_old(rec, 0, &len);
3111
if (mach_read_from_4(field) != space) {
3116
/* Check the page offset. */
3117
field = rec_get_nth_field_old(rec, 2, &len);
3120
if (mach_read_from_4(field) != page_no) {
3125
/* Check if the record contains a counter field. */
3126
field = rec_get_nth_field_old(rec, 3, &len);
3128
switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
3131
case 0: /* ROW_FORMAT=REDUNDANT */
3132
case 1: /* ROW_FORMAT=COMPACT */
3133
return(ULINT_UNDEFINED);
3135
case IBUF_REC_INFO_SIZE:
3136
counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER);
3137
ut_a(counter < 0xFFFF);
3138
return(counter + 1);
3142
/****************************************************************//**
3143
Set the counter field in entry to the correct value based on the current
3144
last record in ibuf for (space, page_no).
3145
@return FALSE if we should abort this insertion to ibuf */
3148
ibuf_set_entry_counter(
3149
/*===================*/
3150
dtuple_t* entry, /*!< in/out: entry to patch */
3151
ulint space, /*!< in: space id of entry */
3152
ulint page_no, /*!< in: page number of entry */
3153
btr_pcur_t* pcur, /*!< in: pcur positioned on the record
3154
found by btr_pcur_open(.., entry,
3155
PAGE_CUR_LE, ..., pcur, ...) */
3156
ibool is_optimistic, /*!< in: is this an optimistic insert */
3157
mtr_t* mtr) /*!< in: mtr */
3163
/* pcur points to either a user rec or to a page's infimum record. */
3164
ut_ad(page_validate(btr_pcur_get_page(pcur), ibuf->index));
3166
if (btr_pcur_is_on_user_rec(pcur)) {
3168
counter = ibuf_get_entry_counter_low(
3169
btr_pcur_get_rec(pcur), space, page_no);
3171
if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
3172
/* The record lacks a counter field.
3173
Such old records must be merged before
3174
new records can be buffered. */
3178
} else if (btr_pcur_is_before_first_in_tree(pcur, mtr)) {
3179
/* Ibuf tree is either completely empty, or the insert
3180
position is at the very first record of a non-empty tree. In
3181
either case we have no previous records for (space,
3185
} else if (btr_pcur_is_before_first_on_page(pcur)) {
3186
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
3188
if (cursor->low_match < 3) {
3189
/* If low_match < 3, we know that the father node
3190
pointer did not contain the searched for (space,
3191
page_no), which means that the search ended on the
3192
right page regardless of the counter value, and
3193
since we're at the infimum record, there are no
3194
existing records. */
3204
ut_a(cursor->ibuf_cnt != ULINT_UNDEFINED);
3206
page = btr_pcur_get_page(pcur);
3207
prev_page_no = btr_page_get_prev(page, mtr);
3209
ut_a(prev_page_no != FIL_NULL);
3211
block = buf_page_get(
3212
IBUF_SPACE_ID, 0, prev_page_no,
3215
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
3217
prev_page = buf_block_get_frame(block);
3219
rec = page_rec_get_prev(
3220
page_get_supremum_rec(prev_page));
3222
ut_ad(page_rec_is_user_rec(rec));
3224
counter = ibuf_get_entry_counter_low(
3225
rec, space, page_no);
3227
if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
3228
/* The record lacks a counter field.
3229
Such old records must be merged before
3230
new records can be buffered. */
3235
if (counter < cursor->ibuf_cnt) {
3236
/* Search ended on the wrong page. */
3238
if (is_optimistic) {
3239
/* In an optimistic insert, we can
3240
shift the insert position to the left
3241
page, since it only needs an X-latch
3242
on the page itself, which the
3243
original search acquired for us. */
3246
ibuf->index, rec, block,
3247
btr_pcur_get_btr_cur(pcur));
3249
/* We can't shift the insert
3250
position to the left page in a
3251
pessimistic insert since it would
3252
require an X-latch on the left
3253
page's left page, so we have to
3259
/* The counter field in the father node is
3260
the same as we would insert; we don't know
3261
whether the insert should go to this page or
3262
the left page (the later fields can differ),
3263
so refuse the insert. */
3269
/* The cursor is not positioned at or before a user record. */
3273
/* Patch counter value in already built entry. */
3274
field = dtuple_get_nth_field(entry, 3);
3275
data = static_cast<byte *>(dfield_get_data(field));
3277
mach_write_to_2(data + IBUF_REC_OFFSET_COUNTER, counter);
3282
2522
/*********************************************************************//**
3283
Buffer an operation in the insert/delete buffer, instead of doing it
3284
directly to the disk page, if this is possible.
3285
@return DB_SUCCESS, DB_STRONG_FAIL or other error */
2523
Makes an index insert to the insert buffer, instead of directly to the disk
2524
page, if this is possible.
2525
@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
3288
2528
ibuf_insert_low(
3289
2529
/*============*/
3290
2530
ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
3291
ibuf_op_t op, /*!< in: operation type */
3293
/*!< in: TRUE=use 5.0.3 format;
3294
FALSE=allow delete buffering */
3295
2531
const dtuple_t* entry, /*!< in: index entry to insert */
3296
2532
ulint entry_size,
3297
2533
/*!< in: rec_get_converted_size(index, entry) */
3354
2583
return(DB_STRONG_FAIL);
2586
mutex_exit(&ibuf_mutex);
2588
if (mode == BTR_MODIFY_TREE) {
2589
mutex_enter(&ibuf_pessimistic_insert_mutex);
2593
mutex_enter(&ibuf_mutex);
2595
while (!ibuf_data_enough_free_for_insert()) {
2597
mutex_exit(&ibuf_mutex);
2601
mutex_exit(&ibuf_pessimistic_insert_mutex);
2603
err = ibuf_add_free_page();
2605
if (err == DB_STRONG_FAIL) {
2610
mutex_enter(&ibuf_pessimistic_insert_mutex);
2614
mutex_enter(&ibuf_mutex);
3357
2620
heap = mem_heap_create(512);
3359
/* Build the entry which contains the space id and the page number
3360
as the first fields and the type information for other fields, and
3361
which will be inserted to the insert buffer. Using a counter value
3362
of 0xFFFF we find the last record for (space, page_no), from which
3363
we can then read the counter value N and use N + 1 in the record we
3364
insert. (We patch the ibuf_entry's counter field to the correct
3365
value just before actually inserting the entry.) */
2622
/* Build the entry which contains the space id and the page number as
2623
the first fields and the type information for other fields, and which
2624
will be inserted to the insert buffer. */
3367
ibuf_entry = ibuf_entry_build(
3368
op, index, entry, space, page_no,
3369
no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
2626
ibuf_entry = ibuf_entry_build(index, entry, space, page_no, heap);
3371
2628
/* Open a cursor to the insert buffer tree to calculate if we can add
3372
2629
the new entry to it without exceeding the free space limit for the
3375
if (mode == BTR_MODIFY_TREE) {
3378
mutex_enter(&ibuf_pessimistic_insert_mutex);
3379
mutex_enter(&ibuf_mutex);
3381
if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
3386
mutex_exit(&ibuf_mutex);
3387
mutex_exit(&ibuf_pessimistic_insert_mutex);
3390
if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
3392
mem_heap_free(heap);
3393
return(DB_STRONG_FAIL);
3400
2632
mtr_start(&mtr);
3402
2634
btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
3403
ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
3405
2636
/* Find out the volume of already buffered inserts for the same index
3408
buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
3409
op == IBUF_OP_DELETE
3413
if (op == IBUF_OP_DELETE
3415
|| buf_pool_watch_occurred(space, page_no))) {
3416
/* The page could become empty after the record is
3417
deleted, or the page has been read in to the buffer
3418
pool. Refuse to buffer the operation. */
3420
/* The buffer pool watch is needed for IBUF_OP_DELETE
3421
because of latching order considerations. We can
3422
check buf_pool_watch_occurred() only after latching
3423
the insert buffer B-tree pages that contain buffered
3424
changes for the page. We never buffer IBUF_OP_DELETE,
3425
unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have
3426
been previously buffered for the page. Because there
3427
are buffered operations for the page, the insert
3428
buffer B-tree page latches held by mtr will guarantee
3429
that no changes for the user page will be merged
3430
before mtr_commit(&mtr). We must not mtr_commit(&mtr)
3431
until after the IBUF_OP_DELETE has been buffered. */
3434
if (mode == BTR_MODIFY_TREE) {
3435
mutex_exit(&ibuf_mutex);
3436
mutex_exit(&ibuf_pessimistic_insert_mutex);
3439
err = DB_STRONG_FAIL;
3443
/* After this point, the page could still be loaded to the
3444
buffer pool, but we do not have to care about it, since we are
3445
holding a latch on the insert buffer leaf page that contains
3446
buffered changes for (space, page_no). If the page enters the
3447
buffer pool, buf_page_io_complete() for (space, page_no) will
3448
have to acquire a latch on the same insert buffer leaf page,
3449
which it cannot do until we have buffered the IBUF_OP_DELETE
3450
and done mtr_commit(&mtr) to release the latch. */
2638
buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
3452
2640
#ifdef UNIV_IBUF_COUNT_DEBUG
3453
2641
ut_a((buffered == 0) || ibuf_count_get(space, page_no));
3629
2798
ut_a(!dict_index_is_clust(index));
3631
no_counter = use <= IBUF_USE_INSERT;
3634
case IBUF_OP_INSERT:
3637
case IBUF_USE_DELETE:
3638
case IBUF_USE_DELETE_MARK:
3640
case IBUF_USE_INSERT:
3641
case IBUF_USE_INSERT_DELETE_MARK:
3644
case IBUF_USE_COUNT:
3648
case IBUF_OP_DELETE_MARK:
3651
case IBUF_USE_INSERT:
3653
case IBUF_USE_DELETE_MARK:
3654
case IBUF_USE_DELETE:
3655
case IBUF_USE_INSERT_DELETE_MARK:
3659
case IBUF_USE_COUNT:
3663
case IBUF_OP_DELETE:
3666
case IBUF_USE_INSERT:
3667
case IBUF_USE_INSERT_DELETE_MARK:
3669
case IBUF_USE_DELETE_MARK:
3670
case IBUF_USE_DELETE:
3674
case IBUF_USE_COUNT:
3682
/* unknown op or use */
3686
/* If a thread attempts to buffer an insert on a page while a
3687
purge is in progress on the same page, the purge must not be
3688
buffered, because it could remove a record that was
3689
re-inserted later. For simplicity, we block the buffering of
3690
all operations on a page that has a purge pending.
3692
We do not check this in the IBUF_OP_DELETE case, because that
3693
would always trigger the buffer pool watch during purge and
3694
thus prevent the buffering of delete operations. We assume
3695
that the issuer of IBUF_OP_DELETE has called
3696
buf_pool_watch_set(space, page_no). */
3700
ulint fold = buf_page_address_fold(space, page_no);
3701
buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3703
buf_pool_mutex_enter(buf_pool);
3704
bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3705
buf_pool_mutex_exit(buf_pool);
3707
if (UNIV_LIKELY_NULL(bpage)) {
3708
/* A buffer pool watch has been set or the
3709
page has been read into the buffer pool.
3710
Do not buffer the request. If a purge operation
3711
is being buffered, have this request executed
3712
directly on the page in the buffer pool after the
3713
buffered entries for this page have been merged. */
2800
switch (UNIV_EXPECT(ibuf_use, IBUF_USE_INSERT)) {
2803
case IBUF_USE_INSERT:
2805
case IBUF_USE_COUNT:
2809
ut_error; /* unknown value of ibuf_use */
3719
2812
entry_size = rec_get_converted_size(index, entry, 0);
3722
>= page_get_free_space_of_empty(dict_table_is_comp(index->table))
2815
>= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
3728
err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
2820
err = ibuf_insert_low(BTR_MODIFY_PREV, entry, entry_size,
3730
2821
index, space, zip_size, page_no, thr);
3731
2822
if (err == DB_FAIL) {
3732
err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
2823
err = ibuf_insert_low(BTR_MODIFY_TREE, entry, entry_size,
3734
2824
index, space, zip_size, page_no, thr);
3753
2843
from the insert buffer. */
3756
ibuf_insert_to_index_page_low(
3757
/*==========================*/
3758
const dtuple_t* entry, /*!< in: buffered entry to insert */
3759
buf_block_t* block, /*!< in/out: index page where the buffered
3760
entry should be placed */
3761
dict_index_t* index, /*!< in: record descriptor */
3762
mtr_t* mtr, /*!< in/out: mtr */
3763
page_cur_t* page_cur)/*!< in/out: cursor positioned on the record
3764
after which to insert the buffered entry */
3770
const page_t* bitmap_page;
3774
(page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
3778
/* If the record did not fit, reorganize */
3780
btr_page_reorganize(block, index, mtr);
3781
page_cur_search(block, index, entry, PAGE_CUR_LE, page_cur);
3783
/* This time the record must fit */
3786
(page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
3790
page = buf_block_get_frame(block);
3792
ut_print_timestamp(stderr);
3795
" InnoDB: Error: Insert buffer insert fails;"
3796
" page free %lu, dtuple size %lu\n",
3797
(ulong) page_get_max_insert_size(page, 1),
3798
(ulong) rec_get_converted_size(index, entry, 0));
3799
fputs("InnoDB: Cannot insert index record ", stderr);
3800
dtuple_print(stderr, entry);
3801
fputs("\nInnoDB: The table where this index record belongs\n"
3802
"InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
3803
"InnoDB: that table.\n", stderr);
3805
space = page_get_space_id(page);
3806
zip_size = buf_block_get_zip_size(block);
3807
page_no = page_get_page_no(page);
3809
bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
3810
old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
3811
IBUF_BITMAP_FREE, mtr);
3814
"InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n",
3815
(ulong) space, (ulong) page_no,
3816
(ulong) zip_size, (ulong) old_bits);
3818
fputs("InnoDB: Submit a detailed bug report"
3819
" to http://bugs.mysql.com\n", stderr);
3822
/************************************************************************
3823
During merge, inserts to an index page a secondary index entry extracted
3824
from the insert buffer. */
3827
2846
ibuf_insert_to_index_page(
3828
2847
/*======================*/
3829
const dtuple_t* entry, /*!< in: buffered entry to insert */
2848
dtuple_t* entry, /*!< in: buffered entry to insert */
3830
2849
buf_block_t* block, /*!< in/out: index page where the buffered entry
3831
2850
should be placed */
3832
2851
dict_index_t* index, /*!< in: record descriptor */
3885
2897
low_match = page_cur_search(block, index, entry,
3886
2898
PAGE_CUR_LE, &page_cur);
3888
if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
2900
if (low_match == dtuple_get_n_fields(entry)) {
3892
2901
page_zip_des_t* page_zip;
3894
2903
rec = page_cur_get_rec(&page_cur);
3897
row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
3898
ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
3900
heap = mem_heap_create(1024);
3902
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
3904
update = row_upd_build_sec_rec_difference_binary(
3905
index, entry, rec, NULL, heap);
3907
2904
page_zip = buf_block_get_page_zip(block);
3909
if (update->n_fields == 0) {
3910
/* The records only differ in the delete-mark.
3911
Clear the delete-mark, like we did before
3912
Bug #56680 was fixed. */
3913
btr_cur_set_deleted_flag_for_ibuf(
3914
rec, page_zip, FALSE, mtr);
3916
mem_heap_free(heap);
2906
btr_cur_del_unmark_for_ibuf(rec, page_zip, mtr);
2908
rec = page_cur_tuple_insert(&page_cur, entry, index, 0, mtr);
2910
if (UNIV_LIKELY(rec != NULL)) {
3920
/* Copy the info bits. Clear the delete-mark. */
3921
update->info_bits = rec_get_info_bits(rec, page_is_comp(page));
3922
update->info_bits &= ~REC_INFO_DELETED_FLAG;
3924
/* We cannot invoke btr_cur_optimistic_update() here,
3925
because we do not have a btr_cur_t or que_thr_t,
3926
as the insert buffer merge occurs at a very low level. */
3927
if (!row_upd_changes_field_size_or_external(index, offsets,
3929
&& (!page_zip || btr_cur_update_alloc_zip(
3930
page_zip, block, index,
3931
rec_offs_size(offsets), FALSE, mtr))) {
3932
/* This is the easy case. Do something similar
3933
to btr_cur_update_in_place(). */
3934
row_upd_rec_in_place(rec, index, offsets,
3936
goto updated_in_place;
3939
/* A collation may identify values that differ in
3941
Some examples (1 or 2 bytes):
3942
utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I
3943
utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S
3944
utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
3946
latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S
3948
Examples of a character (3-byte UTF-8 sequence)
3949
identified with 2 or 4 characters (1-byte UTF-8 sequences):
3951
utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO
3952
utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN
3955
/* Delete the different-length record, and insert the
3958
lock_rec_store_on_page_infimum(block, rec);
3959
page_cur_delete_rec(&page_cur, index, offsets, mtr);
3960
page_cur_move_to_prev(&page_cur);
3961
mem_heap_free(heap);
3963
ibuf_insert_to_index_page_low(entry, block, index, mtr,
3965
lock_rec_restore_from_page_infimum(block, rec, block);
3967
ibuf_insert_to_index_page_low(entry, block, index, mtr,
3972
/****************************************************************//**
3973
During merge, sets the delete mark on a record for a secondary index
3979
const dtuple_t* entry, /*!< in: entry */
3980
buf_block_t* block, /*!< in/out: block */
3981
const dict_index_t* index, /*!< in: record descriptor */
3982
mtr_t* mtr) /*!< in: mtr */
3984
page_cur_t page_cur;
3987
ut_ad(ibuf_inside());
3988
ut_ad(dtuple_check_typed(entry));
3990
low_match = page_cur_search(
3991
block, index, entry, PAGE_CUR_LE, &page_cur);
3993
if (low_match == dtuple_get_n_fields(entry)) {
3995
page_zip_des_t* page_zip;
3997
rec = page_cur_get_rec(&page_cur);
3998
page_zip = page_cur_get_page_zip(&page_cur);
4000
/* Delete mark the old index record. According to a
4001
comment in row_upd_sec_index_entry(), it can already
4002
have been delete marked if a lock wait occurred in
4003
row_ins_index_entry() in a previous invocation of
4004
row_upd_sec_index_entry(). */
4007
(!rec_get_deleted_flag(
4008
rec, dict_table_is_comp(index->table)))) {
4009
btr_cur_set_deleted_flag_for_ibuf(rec, page_zip,
4013
ut_print_timestamp(stderr);
4014
fputs(" InnoDB: unable to find a record to delete-mark\n",
4016
fputs("InnoDB: tuple ", stderr);
4017
dtuple_print(stderr, entry);
4019
"InnoDB: record ", stderr);
4020
rec_print(stderr, page_cur_get_rec(&page_cur), index);
4023
"InnoDB: Submit a detailed bug report"
4024
" to http://bugs.mysql.com\n", stderr);
4029
/****************************************************************//**
4030
During merge, delete a record for a secondary index entry. */
4035
const dtuple_t* entry, /*!< in: entry */
4036
buf_block_t* block, /*!< in/out: block */
4037
dict_index_t* index, /*!< in: record descriptor */
4038
mtr_t* mtr) /*!< in/out: mtr; must be committed
4039
before latching any further pages */
4041
page_cur_t page_cur;
4044
ut_ad(ibuf_inside());
4045
ut_ad(dtuple_check_typed(entry));
4047
low_match = page_cur_search(
4048
block, index, entry, PAGE_CUR_LE, &page_cur);
4050
if (low_match == dtuple_get_n_fields(entry)) {
4051
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
4052
page_t* page = buf_block_get_frame(block);
4053
rec_t* rec = page_cur_get_rec(&page_cur);
4055
/* TODO: the below should probably be a separate function,
4056
it's a bastardized version of btr_cur_optimistic_delete. */
4058
ulint offsets_[REC_OFFS_NORMAL_SIZE];
4059
ulint* offsets = offsets_;
4060
mem_heap_t* heap = NULL;
4063
rec_offs_init(offsets_);
4065
offsets = rec_get_offsets(
4066
rec, index, offsets, ULINT_UNDEFINED, &heap);
4068
/* Refuse to delete the last record. */
4069
ut_a(page_get_n_recs(page) > 1);
4071
/* The record should have been marked for deletion. */
4072
ut_ad(REC_INFO_DELETED_FLAG
4073
& rec_get_info_bits(rec, page_is_comp(page)));
4075
lock_update_delete(block, rec);
4079
= page_get_max_insert_size_after_reorganize(
4082
#ifdef UNIV_ZIP_DEBUG
4083
ut_a(!page_zip || page_zip_validate(page_zip, page));
4084
#endif /* UNIV_ZIP_DEBUG */
4085
page_cur_delete_rec(&page_cur, index, offsets, mtr);
4086
#ifdef UNIV_ZIP_DEBUG
4087
ut_a(!page_zip || page_zip_validate(page_zip, page));
4088
#endif /* UNIV_ZIP_DEBUG */
4091
ibuf_update_free_bits_zip(block, mtr);
4093
ibuf_update_free_bits_low(block, max_ins_size, mtr);
4096
if (UNIV_LIKELY_NULL(heap)) {
4097
mem_heap_free(heap);
4100
/* The record must have been purged already. */
4104
/*********************************************************************//**
4105
Restores insert buffer tree cursor position
4106
@return TRUE if the position was restored; FALSE if not */
4107
static __attribute__((nonnull))
4111
ulint space, /*!< in: space id */
4112
ulint page_no,/*!< in: index page number where the record
4114
const dtuple_t* search_tuple,
4115
/*!< in: search tuple for entries of page_no */
4116
ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
4117
btr_pcur_t* pcur, /*!< in/out: persistent cursor whose
4118
position is to be restored */
4119
mtr_t* mtr) /*!< in/out: mini-transaction */
4121
ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
4123
if (btr_pcur_restore_position(mode, pcur, mtr)) {
4128
if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
4129
/* The tablespace has been dropped. It is possible
4130
that another thread has deleted the insert buffer
4131
entry. Do not complain. */
4132
btr_pcur_commit_specify_mtr(pcur, mtr);
4135
"InnoDB: ERROR: Submit the output to"
4136
" http://bugs.mysql.com\n"
4137
"InnoDB: ibuf cursor restoration fails!\n"
4138
"InnoDB: ibuf record inserted to page %lu:%lu\n",
4139
(ulong) space, (ulong) page_no);
4142
rec_print_old(stderr, btr_pcur_get_rec(pcur));
4143
rec_print_old(stderr, pcur->old_rec);
4144
dtuple_print(stderr, search_tuple);
4146
rec_print_old(stderr,
4147
page_rec_get_next(btr_pcur_get_rec(pcur)));
4150
btr_pcur_commit_specify_mtr(pcur, mtr);
4152
fputs("InnoDB: Validating insert buffer tree:\n", stderr);
4153
if (!btr_validate_index(ibuf->index, NULL)) {
4157
fprintf(stderr, "InnoDB: ibuf tree ok\n");
2914
/* If the record did not fit, reorganize */
2916
btr_page_reorganize(block, index, mtr);
2917
page_cur_search(block, index, entry, PAGE_CUR_LE, &page_cur);
2919
/* This time the record must fit */
2921
(!page_cur_tuple_insert(&page_cur, entry, index,
2927
ut_print_timestamp(stderr);
2930
" InnoDB: Error: Insert buffer insert"
2931
" fails; page free %lu,"
2932
" dtuple size %lu\n",
2933
(ulong) page_get_max_insert_size(
2935
(ulong) rec_get_converted_size(
2937
fputs("InnoDB: Cannot insert index record ",
2939
dtuple_print(stderr, entry);
2940
fputs("\nInnoDB: The table where"
2941
" this index record belongs\n"
2942
"InnoDB: is now probably corrupt."
2943
" Please run CHECK TABLE on\n"
2944
"InnoDB: that table.\n", stderr);
2946
space = page_get_space_id(page);
2947
zip_size = buf_block_get_zip_size(block);
2948
page_no = page_get_page_no(page);
2950
bitmap_page = ibuf_bitmap_get_map_page(
2951
space, page_no, zip_size, mtr);
2952
old_bits = ibuf_bitmap_page_get_bits(
2953
bitmap_page, page_no, zip_size,
2954
IBUF_BITMAP_FREE, mtr);
2957
"InnoDB: space %lu, page %lu,"
2958
" zip_size %lu, bitmap bits %lu\n",
2959
(ulong) space, (ulong) page_no,
2960
(ulong) zip_size, (ulong) old_bits);
2962
fputs("InnoDB: Submit a detailed bug report"
2963
" to http://bugs.mysql.com\n", stderr);
4164
2968
/*********************************************************************//**
4495
3314
fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
4496
3315
} else if (block) {
4497
3316
/* Now we have at pcur a record which should be
4498
applied on the index page; NOTE that the call below
3317
inserted to the index page; NOTE that the call below
4499
3318
copies pointers to fields in rec, and we must
4500
3319
keep the latch to the rec page until the
4501
3320
insertion is finished! */
4502
3321
dtuple_t* entry;
4503
3322
trx_id_t max_trx_id;
4504
3323
dict_index_t* dummy_index;
4505
ibuf_op_t op = ibuf_rec_get_op_type(rec);
4507
3325
max_trx_id = page_get_max_trx_id(page_align(rec));
4508
3326
page_update_max_trx_id(block, page_zip, max_trx_id,
4511
ut_ad(page_validate(page_align(rec), ibuf->index));
4513
3329
entry = ibuf_build_entry_from_ibuf_rec(
4514
3330
rec, heap, &dummy_index);
4516
ut_ad(page_validate(block->frame, dummy_index));
4520
case IBUF_OP_INSERT:
4521
3331
#ifdef UNIV_IBUF_DEBUG
4522
volume += rec_get_converted_size(
4523
dummy_index, entry, 0);
4525
volume += page_dir_calc_reserved_space(1);
4527
ut_a(volume <= 4 * UNIV_PAGE_SIZE
4528
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
3332
volume += rec_get_converted_size(dummy_index, entry, 0)
3333
+ page_dir_calc_reserved_space(1);
3334
ut_a(volume <= 4 * UNIV_PAGE_SIZE
3335
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
4530
ibuf_insert_to_index_page(
4531
entry, block, dummy_index, &mtr);
4534
case IBUF_OP_DELETE_MARK:
4536
entry, block, dummy_index, &mtr);
4539
case IBUF_OP_DELETE:
4540
ibuf_delete(entry, block, dummy_index, &mtr);
4541
/* Because ibuf_delete() will latch an
4542
insert buffer bitmap page, commit mtr
4543
before latching any further pages.
4544
Store and restore the cursor position. */
4545
ut_ad(rec == btr_pcur_get_rec(&pcur));
4546
ut_ad(page_rec_is_user_rec(rec));
4547
ut_ad(ibuf_rec_get_page_no(rec) == page_no);
4548
ut_ad(ibuf_rec_get_space(rec) == space);
4550
btr_pcur_store_position(&pcur, &mtr);
4551
btr_pcur_commit_specify_mtr(&pcur, &mtr);
4555
success = buf_page_get_known_nowait(
4558
__FILE__, __LINE__, &mtr);
4561
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
4563
if (!ibuf_restore_pos(space, page_no,
4570
ibuf_dummy_index_free(dummy_index);
3337
ibuf_insert_to_index_page(entry, block,
4581
3339
ibuf_dummy_index_free(dummy_index);
4583
dops[ibuf_rec_get_op_type(rec)]++;
4586
3344
/* Delete the record from ibuf */
4587
3345
if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,