1209
/****************************************************************//**
1210
Get various information about an ibuf record in >= 4.1.x format. */
1215
const rec_t* rec, /*!< in: ibuf record */
1216
ibuf_op_t* op, /*!< out: operation type, or NULL */
1217
ibool* comp, /*!< out: compact flag, or NULL */
1218
ulint* info_len, /*!< out: length of info fields at the
1219
start of the fourth field, or
1221
ulint* counter) /*!< in: counter value, or NULL */
1227
/* Local variables to shadow arguments. */
1230
ulint info_len_local;
1231
ulint counter_local;
1233
ut_ad(ibuf_inside());
1234
fields = rec_get_n_fields_old(rec);
1237
types = rec_get_nth_field_old(rec, 3, &len);
1239
info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1241
switch (info_len_local) {
1244
op_local = IBUF_OP_INSERT;
1245
comp_local = info_len_local;
1247
counter_local = ULINT_UNDEFINED;
1250
case IBUF_REC_INFO_SIZE:
1251
op_local = (ibuf_op_t)types[IBUF_REC_OFFSET_TYPE];
1252
comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT;
1253
counter_local = mach_read_from_2(
1254
types + IBUF_REC_OFFSET_COUNTER);
1261
ut_a(op_local < IBUF_OP_COUNT);
1262
ut_a((len - info_len_local) ==
1263
(fields - 4) * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1274
*info_len = info_len_local;
1278
*counter = counter_local;
1282
/****************************************************************//**
1283
Returns the operation type field of an ibuf record.
1284
@return operation type */
1287
ibuf_rec_get_op_type(
1288
/*=================*/
1289
const rec_t* rec) /*!< in: ibuf record */
1294
ut_ad(ibuf_inside());
1295
ut_ad(rec_get_n_fields_old(rec) > 2);
1297
field = rec_get_nth_field_old(rec, 1, &len);
1300
/* This is a < 4.1.x format record */
1302
return(IBUF_OP_INSERT);
1306
ibuf_rec_get_info(rec, &op, NULL, NULL, NULL);
1312
/****************************************************************//**
1313
Read the first two bytes from a record's fourth field (counter field in new
1314
records; something else in older records).
1315
@return "counter" field, or ULINT_UNDEFINED if for some reason it
1319
ibuf_rec_get_counter(
1320
/*=================*/
1321
const rec_t* rec) /*!< in: ibuf record */
1326
if (rec_get_n_fields_old(rec) < 4) {
1328
return(ULINT_UNDEFINED);
1331
ptr = rec_get_nth_field_old(rec, 3, &len);
1335
return(mach_read_from_2(ptr));
1338
return(ULINT_UNDEFINED);
1342
/****************************************************************//**
1343
Add accumulated operation counts to a permanent array. Both arrays must be
1344
of size IBUF_OP_COUNT. */
1349
ulint* arr, /*!< in/out: array to modify */
1350
const ulint* ops) /*!< in: operation counts */
1355
for (i = 0; i < IBUF_OP_COUNT; i++) {
1360
/****************************************************************//**
1361
Print operation counts. The array must be of size IBUF_OP_COUNT. */
1366
const ulint* ops, /*!< in: operation counts */
1367
FILE* file) /*!< in: file where to print */
1369
static const char* op_names[] = {
1376
ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT);
1378
for (i = 0; i < IBUF_OP_COUNT; i++) {
1379
fprintf(file, "%s %lu%s", op_names[i],
1380
(ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
1153
1386
/********************************************************************//**
1154
1387
Creates a dummy index for inserting a record to a non-clustered index.
1156
1388
@return dummy index */
1588
/******************************************************************//**
1590
@return size of fields */
1595
const rec_t* rec, /*!< in: ibuf record */
1596
const byte* types, /*!< in: fields */
1597
ulint n_fields, /*!< in: number of fields */
1598
ibool pre_4_1, /*!< in: TRUE=pre-4.1 format,
1600
ulint comp) /*!< in: 0=ROW_FORMAT=REDUNDANT,
1601
nonzero=ROW_FORMAT=COMPACT */
1610
types_offset = DATA_ORDER_NULL_TYPE_BUF_SIZE;
1613
types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1616
for (i = 0; i < n_fields; i++) {
1620
rec_get_nth_field_offs_old(rec, i + field_offset, &len);
1622
if (len != UNIV_SQL_NULL) {
1624
} else if (pre_4_1) {
1625
dtype_read_for_order_and_null_size(&dtype, types);
1627
size += dtype_get_sql_null_size(&dtype, comp);
1629
dtype_new_read_for_order_and_null_size(&dtype, types);
1631
size += dtype_get_sql_null_size(&dtype, comp);
1634
types += types_offset;
1350
1640
/********************************************************************//**
1351
1641
Returns the space taken by a stored non-clustered index entry if converted to
1352
1642
an index record.
1389
1678
/* >= 4.1.x format record */
1391
1682
ut_a(trx_sys_multiple_tablespace_format);
1392
1683
ut_a(*data == 0);
1394
1685
types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1396
comp = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1400
/* compact record format */
1687
ibuf_rec_get_info(ibuf_rec, &op, &comp, &info_len, NULL);
1689
if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
1690
/* Delete-marking a record doesn't take any
1691
additional space, and while deleting a record
1692
actually frees up space, we have to play it safe and
1693
pretend it takes no additional space (the record
1694
might not exist, etc.). */
1402
1700
dict_index_t* dummy_index;
1403
1701
mem_heap_t* heap = mem_heap_create(500);
1404
dtuple_t* entry = ibuf_build_entry_from_ibuf_rec(
1703
entry = ibuf_build_entry_from_ibuf_rec(
1405
1704
ibuf_rec, heap, &dummy_index);
1406
1706
volume = rec_get_converted_size(dummy_index, entry, 0);
1407
1708
ibuf_dummy_index_free(dummy_index);
1408
1709
mem_heap_free(heap);
1409
1711
return(volume + page_dir_calc_reserved_space(1));
1412
1715
n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1417
for (i = 0; i < n_fields; i++) {
1419
data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1421
dtype_new_read_for_order_and_null_size(
1423
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1425
data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1427
dtype_read_for_order_and_null_size(
1429
* DATA_ORDER_NULL_TYPE_BUF_SIZE);
1432
if (len == UNIV_SQL_NULL) {
1433
data_size += dtype_get_sql_null_size(&dtype, comp);
1718
data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, pre_4_1, comp);
1439
1720
return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
1440
1721
+ page_dir_calc_reserved_space(1));
1464
1748
const dfield_t* entry_field;
1465
1749
ulint n_fields;
1470
/* Starting from 4.1.x, we have to build a tuple whose
1471
(1) first field is the space id,
1472
(2) the second field a single marker byte (0) to tell that this
1473
is a new format record,
1474
(3) the third contains the page number, and
1475
(4) the fourth contains the relevent type information of each data
1476
field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is
1477
(a) 0 for b-trees in the old format, and
1478
(b) 1 for b-trees in the compact format, the first byte of the field
1479
being the marker (0);
1480
(5) and the rest of the fields are copied from entry. All fields
1481
in the tuple are ordered like the type binary in our insert buffer
1755
ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT);
1756
ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF);
1757
ut_ad(op < IBUF_OP_COUNT);
1759
/* We have to build a tuple with the following fields:
1761
1-4) These are described at the top of this file.
1763
5) The rest of the fields are copied from the entry.
1765
All fields in the tuple are ordered like the type binary in our
1766
insert buffer tree. */
1484
1768
n_fields = dtuple_get_n_fields(entry);
1486
1770
tuple = dtuple_create(heap, n_fields + 4);
1488
/* Store the space id in tuple */
1490
1774
field = dtuple_get_nth_field(tuple, 0);
1518
1802
dfield_set_data(field, buf, 4);
1520
/* Store the type info in buf2, and add the fields from entry to
1522
buf2 = mem_heap_alloc(heap, n_fields
1523
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
1524
+ dict_table_is_comp(index->table));
1525
if (dict_table_is_comp(index->table)) {
1526
*buf2++ = 0; /* write the compact format indicator */
1804
/* 4) Type info, part #1 */
1806
if (counter == ULINT_UNDEFINED) {
1807
i = dict_table_is_comp(index->table) ? 1 : 0;
1809
ut_ad(counter <= 0xFFFF);
1810
i = IBUF_REC_INFO_SIZE;
1813
ti = type_info = mem_heap_alloc(heap, i + n_fields
1814
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1821
/* set the flag for ROW_FORMAT=COMPACT */
1825
/* the old format does not allow delete buffering */
1826
ut_ad(op == IBUF_OP_INSERT);
1828
case IBUF_REC_INFO_SIZE:
1829
mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter);
1831
ti[IBUF_REC_OFFSET_TYPE] = (byte) op;
1832
ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table)
1833
? IBUF_REC_COMPACT : 0;
1834
ti += IBUF_REC_INFO_SIZE;
1838
/* 5+) Fields from the entry */
1528
1840
for (i = 0; i < n_fields; i++) {
1529
1841
ulint fixed_len;
1530
1842
const dict_field_t* ifield;
2329
2653
/*********************************************************************//**
2654
Determine if an insert buffer record has been encountered already.
2655
@return TRUE if a new record, FALSE if possible duplicate */
2658
ibuf_get_volume_buffered_hash(
2659
/*==========================*/
2660
const rec_t* rec, /*!< in: ibuf record in post-4.1 format */
2661
const byte* types, /*!< in: fields */
2662
const byte* data, /*!< in: start of user record data */
2663
ulint comp, /*!< in: 0=ROW_FORMAT=REDUNDANT,
2664
nonzero=ROW_FORMAT=COMPACT */
2665
ulint* hash, /*!< in/out: hash array */
2666
ulint size) /*!< in: number of elements in hash array */
2672
len = ibuf_rec_get_size(rec, types, rec_get_n_fields_old(rec) - 4,
2674
fold = ut_fold_binary(data, len);
2676
hash += (fold / (8 * sizeof *hash)) % size; // 8 = bits in byte
2677
bitmask = 1 << (fold % (8 * sizeof *hash));
2679
if (*hash & bitmask) {
2684
/* We have not seen this record yet. Insert it. */
2690
/*********************************************************************//**
2691
Update the estimate of the number of records on a page, and
2692
get the space taken by merging the buffered record to the index page.
2693
@return size of index record in bytes + an upper limit of the space
2694
taken in the page directory */
2697
ibuf_get_volume_buffered_count(
2698
/*===========================*/
2699
const rec_t* rec, /*!< in: insert buffer record */
2700
ulint* hash, /*!< in/out: hash array */
2701
ulint size, /*!< in: number of elements in hash array */
2702
lint* n_recs) /*!< in/out: estimated number of records
2703
on the page that rec points to */
2708
ulint n_fields = rec_get_n_fields_old(rec);
2710
ut_ad(ibuf_inside());
2711
ut_ad(n_fields > 4);
2714
rec_get_nth_field_offs_old(rec, 1, &len);
2715
/* This function is only invoked when buffering new
2716
operations. All pre-4.1 records should have been merged
2717
when the database was started up. */
2719
ut_ad(trx_sys_multiple_tablespace_format);
2721
types = rec_get_nth_field_old(rec, 3, &len);
2723
switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
2724
IBUF_REC_INFO_SIZE)) {
2728
/* This ROW_TYPE=REDUNDANT record does not include an
2729
operation counter. Exclude it from the *n_recs,
2730
because deletes cannot be buffered if there are
2731
old-style inserts buffered for the page. */
2733
len = ibuf_rec_get_size(rec, types, n_fields, FALSE, 0);
2736
+ rec_get_converted_extra_size(len, n_fields, 0)
2737
+ page_dir_calc_reserved_space(1));
2739
/* This ROW_TYPE=COMPACT record does not include an
2740
operation counter. Exclude it from the *n_recs,
2741
because deletes cannot be buffered if there are
2742
old-style inserts buffered for the page. */
2743
goto get_volume_comp;
2745
case IBUF_REC_INFO_SIZE:
2746
ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
2751
case IBUF_OP_INSERT:
2752
/* Inserts can be done by
2753
btr_cur_set_deleted_flag_for_ibuf(). Because
2754
delete-mark and insert operations can be pointing to
2755
the same records, we must not count duplicates. */
2756
case IBUF_OP_DELETE_MARK:
2757
/* There must be a record to delete-mark.
2758
See if this record has been already buffered. */
2759
if (n_recs && ibuf_get_volume_buffered_hash(
2760
rec, types + IBUF_REC_INFO_SIZE,
2762
types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT,
2767
if (ibuf_op == IBUF_OP_DELETE_MARK) {
2768
/* Setting the delete-mark flag does not
2769
affect the available space on the page. */
2773
case IBUF_OP_DELETE:
2774
/* A record will be removed from the page. */
2778
/* While deleting a record actually frees up space,
2779
we have to play it safe and pretend that it takes no
2780
additional space (the record might not exist, etc.). */
2786
ut_ad(ibuf_op == IBUF_OP_INSERT);
2792
dict_index_t* dummy_index;
2793
mem_heap_t* heap = mem_heap_create(500);
2795
entry = ibuf_build_entry_from_ibuf_rec(
2796
rec, heap, &dummy_index);
2798
volume = rec_get_converted_size(dummy_index, entry, 0);
2800
ibuf_dummy_index_free(dummy_index);
2801
mem_heap_free(heap);
2803
return(volume + page_dir_calc_reserved_space(1));
2807
/*********************************************************************//**
2330
2808
Gets an upper limit for the combined size of entries buffered in the insert
2331
2809
buffer for a given page.
2332
2810
@return upper limit for the volume of buffered inserts for the index
2557
3055
fil_set_max_space_id_if_bigger(max_space_id);
3058
/****************************************************************//**
3059
Helper function for ibuf_set_entry_counter. Checks if rec is for (space,
3060
page_no), and if so, reads counter value from it and returns that + 1.
3061
Otherwise, returns 0.
3062
@return new counter value, or 0 */
3065
ibuf_get_entry_counter_low(
3066
/*=======================*/
3067
const rec_t* rec, /*!< in: insert buffer record */
3068
ulint space, /*!< in: space id */
3069
ulint page_no) /*!< in: page number */
3075
ut_ad(ibuf_inside());
3076
ut_ad(rec_get_n_fields_old(rec) > 2);
3078
field = rec_get_nth_field_old(rec, 1, &len);
3080
if (UNIV_UNLIKELY(len != 1)) {
3081
/* pre-4.1 format */
3082
ut_a(trx_doublewrite_must_reset_space_ids);
3083
ut_a(!trx_sys_multiple_tablespace_format);
3085
return(ULINT_UNDEFINED);
3088
ut_a(trx_sys_multiple_tablespace_format);
3090
/* Check the tablespace identifier. */
3091
field = rec_get_nth_field_old(rec, 0, &len);
3094
if (mach_read_from_4(field) != space) {
3099
/* Check the page offset. */
3100
field = rec_get_nth_field_old(rec, 2, &len);
3103
if (mach_read_from_4(field) != page_no) {
3108
/* Check if the record contains a counter field. */
3109
field = rec_get_nth_field_old(rec, 3, &len);
3111
switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
3114
case 0: /* ROW_FORMAT=REDUNDANT */
3115
case 1: /* ROW_FORMAT=COMPACT */
3116
return(ULINT_UNDEFINED);
3118
case IBUF_REC_INFO_SIZE:
3119
counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER);
3120
ut_a(counter < 0xFFFF);
3121
return(counter + 1);
3125
/****************************************************************//**
3126
Set the counter field in entry to the correct value based on the current
3127
last record in ibuf for (space, page_no).
3128
@return FALSE if we should abort this insertion to ibuf */
3131
ibuf_set_entry_counter(
3132
/*===================*/
3133
dtuple_t* entry, /*!< in/out: entry to patch */
3134
ulint space, /*!< in: space id of entry */
3135
ulint page_no, /*!< in: page number of entry */
3136
btr_pcur_t* pcur, /*!< in: pcur positioned on the record
3137
found by btr_pcur_open(.., entry,
3138
PAGE_CUR_LE, ..., pcur, ...) */
3139
ibool is_optimistic, /*!< in: is this an optimistic insert */
3140
mtr_t* mtr) /*!< in: mtr */
3146
/* pcur points to either a user rec or to a page's infimum record. */
3147
ut_ad(page_validate(btr_pcur_get_page(pcur), ibuf->index));
3149
if (btr_pcur_is_on_user_rec(pcur)) {
3151
counter = ibuf_get_entry_counter_low(
3152
btr_pcur_get_rec(pcur), space, page_no);
3154
if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
3155
/* The record lacks a counter field.
3156
Such old records must be merged before
3157
new records can be buffered. */
3161
} else if (btr_pcur_is_before_first_in_tree(pcur, mtr)) {
3162
/* Ibuf tree is either completely empty, or the insert
3163
position is at the very first record of a non-empty tree. In
3164
either case we have no previous records for (space,
3168
} else if (btr_pcur_is_before_first_on_page(pcur)) {
3169
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
3171
if (cursor->low_match < 3) {
3172
/* If low_match < 3, we know that the father node
3173
pointer did not contain the searched for (space,
3174
page_no), which means that the search ended on the
3175
right page regardless of the counter value, and
3176
since we're at the infimum record, there are no
3177
existing records. */
3187
ut_a(cursor->ibuf_cnt != ULINT_UNDEFINED);
3189
page = btr_pcur_get_page(pcur);
3190
prev_page_no = btr_page_get_prev(page, mtr);
3192
ut_a(prev_page_no != FIL_NULL);
3194
block = buf_page_get(
3195
IBUF_SPACE_ID, 0, prev_page_no,
3198
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
3200
prev_page = buf_block_get_frame(block);
3202
rec = page_rec_get_prev(
3203
page_get_supremum_rec(prev_page));
3205
ut_ad(page_rec_is_user_rec(rec));
3207
counter = ibuf_get_entry_counter_low(
3208
rec, space, page_no);
3210
if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
3211
/* The record lacks a counter field.
3212
Such old records must be merged before
3213
new records can be buffered. */
3218
if (counter < cursor->ibuf_cnt) {
3219
/* Search ended on the wrong page. */
3221
if (is_optimistic) {
3222
/* In an optimistic insert, we can
3223
shift the insert position to the left
3224
page, since it only needs an X-latch
3225
on the page itself, which the
3226
original search acquired for us. */
3229
ibuf->index, rec, block,
3230
btr_pcur_get_btr_cur(pcur));
3232
/* We can't shift the insert
3233
position to the left page in a
3234
pessimistic insert since it would
3235
require an X-latch on the left
3236
page's left page, so we have to
3242
/* The counter field in the father node is
3243
the same as we would insert; we don't know
3244
whether the insert should go to this page or
3245
the left page (the later fields can differ),
3246
so refuse the insert. */
3252
/* The cursor is not positioned at or before a user record. */
3256
/* Patch counter value in already built entry. */
3257
field = dtuple_get_nth_field(entry, 3);
3258
data = dfield_get_data(field);
3260
mach_write_to_2(data + IBUF_REC_OFFSET_COUNTER, counter);
2560
3265
/*********************************************************************//**
2561
Makes an index insert to the insert buffer, instead of directly to the disk
2562
page, if this is possible.
3266
Buffer an operation in the insert/delete buffer, instead of doing it
3267
directly to the disk page, if this is possible.
2563
3268
@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
2566
3271
ibuf_insert_low(
2567
3272
/*============*/
2568
3273
ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
3274
ibuf_op_t op, /*!< in: operation type */
3276
/*!< in: TRUE=use 5.0.3 format;
3277
FALSE=allow delete buffering */
2569
3278
const dtuple_t* entry, /*!< in: index entry to insert */
2570
3279
ulint entry_size,
2571
3280
/*!< in: rec_get_converted_size(index, entry) */
2658
3369
heap = mem_heap_create(512);
2660
/* Build the entry which contains the space id and the page number as
2661
the first fields and the type information for other fields, and which
2662
will be inserted to the insert buffer. */
3371
/* Build the entry which contains the space id and the page number
3372
as the first fields and the type information for other fields, and
3373
which will be inserted to the insert buffer. Using a counter value
3374
of 0xFFFF we find the last record for (space, page_no), from which
3375
we can then read the counter value N and use N + 1 in the record we
3376
insert. (We patch the ibuf_entry's counter field to the correct
3377
value just before actually inserting the entry.) */
2664
ibuf_entry = ibuf_entry_build(index, entry, space, page_no, heap);
3379
ibuf_entry = ibuf_entry_build(
3380
op, index, entry, space, page_no,
3381
no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
2666
3383
/* Open a cursor to the insert buffer tree to calculate if we can add
2667
3384
the new entry to it without exceeding the free space limit for the
2670
3387
mtr_start(&mtr);
2672
3389
btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
3390
ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
2674
3392
/* Find out the volume of already buffered inserts for the same index
2676
buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
3395
buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
3396
op == IBUF_OP_DELETE
3400
if (op == IBUF_OP_DELETE
3402
|| buf_pool_watch_occurred(space, page_no))) {
3403
/* The page could become empty after the record is
3404
deleted, or the page has been read in to the buffer
3405
pool. Refuse to buffer the operation. */
3407
/* The buffer pool watch is needed for IBUF_OP_DELETE
3408
because of latching order considerations. We can
3409
check buf_pool_watch_occurred() only after latching
3410
the insert buffer B-tree pages that contain buffered
3411
changes for the page. We never buffer IBUF_OP_DELETE,
3412
unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have
3413
been previously buffered for the page. Because there
3414
are buffered operations for the page, the insert
3415
buffer B-tree page latches held by mtr will guarantee
3416
that no changes for the user page will be merged
3417
before mtr_commit(&mtr). We must not mtr_commit(&mtr)
3418
until after the IBUF_OP_DELETE has been buffered. */
3420
err = DB_STRONG_FAIL;
3425
/* After this point, the page could still be loaded to the
3426
buffer pool, but we do not have to care about it, since we are
3427
holding a latch on the insert buffer leaf page that contains
3428
buffered changes for (space, page_no). If the page enters the
3429
buffer pool, buf_page_io_complete() for (space, page_no) will
3430
have to acquire a latch on the same insert buffer leaf page,
3431
which it cannot do until we have buffered the IBUF_OP_DELETE
3432
and done mtr_commit(&mtr) to release the latch. */
2678
3434
#ifdef UNIV_IBUF_COUNT_DEBUG
2679
3435
ut_a((buffered == 0) || ibuf_count_get(space, page_no));
2688
3444
if (buf_page_peek(space, page_no)
2689
3445
|| lock_rec_expl_exist_on_page(space, page_no)) {
2690
err = DB_STRONG_FAIL;
2692
mtr_commit(&bitmap_mtr);
2697
bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
2698
IBUF_BITMAP_FREE, &bitmap_mtr);
2700
if (buffered + entry_size + page_dir_calc_reserved_space(1)
2701
> ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
2702
mtr_commit(&bitmap_mtr);
2704
/* It may not fit */
2705
err = DB_STRONG_FAIL;
2709
ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
2710
space_ids, space_versions,
2711
page_nos, &n_stored);
3450
if (op == IBUF_OP_INSERT) {
3451
ulint bits = ibuf_bitmap_page_get_bits(
3452
bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE,
3455
if (buffered + entry_size + page_dir_calc_reserved_space(1)
3456
> ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
3457
/* Release the bitmap page latch early. */
3458
mtr_commit(&bitmap_mtr);
3460
/* It may not fit */
3463
ibuf_get_merge_page_nos(
3464
FALSE, btr_pcur_get_rec(&pcur),
3465
space_ids, space_versions,
3466
page_nos, &n_stored);
3468
err = DB_STRONG_FAIL;
3474
/* Patch correct counter value to the entry to insert. This can
3475
change the insert position, which can result in the need to abort in
3478
&& !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur,
3479
mode == BTR_MODIFY_PREV, &mtr)) {
3481
err = DB_STRONG_FAIL;
3483
mtr_commit(&bitmap_mtr);
2712
3485
goto function_exit;
2836
3613
ut_a(!dict_index_is_clust(index));
2838
switch (UNIV_EXPECT(ibuf_use, IBUF_USE_INSERT)) {
2841
case IBUF_USE_INSERT:
2843
case IBUF_USE_COUNT:
2847
ut_error; /* unknown value of ibuf_use */
3615
no_counter = use <= IBUF_USE_INSERT;
3618
case IBUF_OP_INSERT:
3621
case IBUF_USE_DELETE:
3622
case IBUF_USE_DELETE_MARK:
3624
case IBUF_USE_INSERT:
3625
case IBUF_USE_INSERT_DELETE_MARK:
3628
case IBUF_USE_COUNT:
3632
case IBUF_OP_DELETE_MARK:
3635
case IBUF_USE_INSERT:
3637
case IBUF_USE_DELETE_MARK:
3638
case IBUF_USE_DELETE:
3639
case IBUF_USE_INSERT_DELETE_MARK:
3643
case IBUF_USE_COUNT:
3647
case IBUF_OP_DELETE:
3650
case IBUF_USE_INSERT:
3651
case IBUF_USE_INSERT_DELETE_MARK:
3653
case IBUF_USE_DELETE_MARK:
3654
case IBUF_USE_DELETE:
3658
case IBUF_USE_COUNT:
3666
/* unknown op or use */
3670
/* If a thread attempts to buffer an insert on a page while a
3671
purge is in progress on the same page, the purge must not be
3672
buffered, because it could remove a record that was
3673
re-inserted later. For simplicity, we block the buffering of
3674
all operations on a page that has a purge pending.
3676
We do not check this in the IBUF_OP_DELETE case, because that
3677
would always trigger the buffer pool watch during purge and
3678
thus prevent the buffering of delete operations. We assume
3679
that the issuer of IBUF_OP_DELETE has called
3680
buf_pool_watch_set(space, page_no). */
3684
ulint fold = buf_page_address_fold(space, page_no);
3685
buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3687
buf_pool_mutex_enter(buf_pool);
3688
bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3689
buf_pool_mutex_exit(buf_pool);
3691
if (UNIV_LIKELY_NULL(bpage)) {
3692
/* A buffer pool watch has been set or the
3693
page has been read into the buffer pool.
3694
Do not buffer the request. If a purge operation
3695
is being buffered, have this request executed
3696
directly on the page in the buffer pool after the
3697
buffered entries for this page have been merged. */
2850
3703
entry_size = rec_get_converted_size(index, entry, 0);
2853
>= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
3706
>= page_get_free_space_of_empty(dict_table_is_comp(index->table))
2858
err = ibuf_insert_low(BTR_MODIFY_PREV, entry, entry_size,
3712
err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
2859
3714
index, space, zip_size, page_no, thr);
2860
3715
if (err == DB_FAIL) {
2861
err = ibuf_insert_low(BTR_MODIFY_TREE, entry, entry_size,
3716
err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
2862
3718
index, space, zip_size, page_no, thr);
3876
/****************************************************************//**
3877
During merge, sets the delete mark on a record for a secondary index
3883
const dtuple_t* entry, /*!< in: entry */
3884
buf_block_t* block, /*!< in/out: block */
3885
const dict_index_t* index, /*!< in: record descriptor */
3886
mtr_t* mtr) /*!< in: mtr */
3888
page_cur_t page_cur;
3891
ut_ad(ibuf_inside());
3892
ut_ad(dtuple_check_typed(entry));
3894
low_match = page_cur_search(
3895
block, index, entry, PAGE_CUR_LE, &page_cur);
3897
if (low_match == dtuple_get_n_fields(entry)) {
3899
page_zip_des_t* page_zip;
3901
rec = page_cur_get_rec(&page_cur);
3902
page_zip = page_cur_get_page_zip(&page_cur);
3904
btr_cur_set_deleted_flag_for_ibuf(rec, page_zip, TRUE, mtr);
3906
/* This can happen benignly in some situations. */
3910
/****************************************************************//**
3911
During merge, delete a record for a secondary index entry. */
3916
const dtuple_t* entry, /*!< in: entry */
3917
buf_block_t* block, /*!< in/out: block */
3918
dict_index_t* index, /*!< in: record descriptor */
3919
mtr_t* mtr) /*!< in/out: mtr; must be committed
3920
before latching any further pages */
3922
page_cur_t page_cur;
3925
ut_ad(ibuf_inside());
3926
ut_ad(dtuple_check_typed(entry));
3928
low_match = page_cur_search(
3929
block, index, entry, PAGE_CUR_LE, &page_cur);
3931
if (low_match == dtuple_get_n_fields(entry)) {
3932
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
3933
page_t* page = buf_block_get_frame(block);
3934
rec_t* rec = page_cur_get_rec(&page_cur);
3936
/* TODO: the below should probably be a separate function,
3937
it's a bastardized version of btr_cur_optimistic_delete. */
3939
ulint offsets_[REC_OFFS_NORMAL_SIZE];
3940
ulint* offsets = offsets_;
3941
mem_heap_t* heap = NULL;
3944
rec_offs_init(offsets_);
3946
offsets = rec_get_offsets(
3947
rec, index, offsets, ULINT_UNDEFINED, &heap);
3949
/* Refuse to delete the last record. */
3950
ut_a(page_get_n_recs(page) > 1);
3952
/* The record should have been marked for deletion. */
3953
ut_ad(REC_INFO_DELETED_FLAG
3954
& rec_get_info_bits(rec, page_is_comp(page)));
3956
lock_update_delete(block, rec);
3960
= page_get_max_insert_size_after_reorganize(
3963
#ifdef UNIV_ZIP_DEBUG
3964
ut_a(!page_zip || page_zip_validate(page_zip, page));
3965
#endif /* UNIV_ZIP_DEBUG */
3966
page_cur_delete_rec(&page_cur, index, offsets, mtr);
3967
#ifdef UNIV_ZIP_DEBUG
3968
ut_a(!page_zip || page_zip_validate(page_zip, page));
3969
#endif /* UNIV_ZIP_DEBUG */
3972
ibuf_update_free_bits_zip(block, mtr);
3974
ibuf_update_free_bits_low(block, max_ins_size, mtr);
3977
if (UNIV_LIKELY_NULL(heap)) {
3978
mem_heap_free(heap);
3981
/* This can happen benignly in some situations: either when
3982
we crashed at just the right time, or on database startup
3983
when we redo some old log entries (due to worse stored
3984
position granularity on disk than in memory). */
3988
/*********************************************************************//**
3989
Restores insert buffer tree cursor position
3990
@return TRUE if the position was restored; FALSE if not */
3991
static __attribute__((nonnull))
3995
ulint space, /*!< in: space id */
3996
ulint page_no,/*!< in: index page number where the record
3998
const dtuple_t* search_tuple,
3999
/*!< in: search tuple for entries of page_no */
4000
ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
4001
btr_pcur_t* pcur, /*!< in/out: persistent cursor whose
4002
position is to be restored */
4003
mtr_t* mtr) /*!< in/out: mini-transaction */
4005
ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
4007
if (btr_pcur_restore_position(mode, pcur, mtr)) {
4012
if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
4013
/* The tablespace has been dropped. It is possible
4014
that another thread has deleted the insert buffer
4015
entry. Do not complain. */
4016
btr_pcur_commit_specify_mtr(pcur, mtr);
4019
"InnoDB: ERROR: Submit the output to"
4020
" http://bugs.mysql.com\n"
4021
"InnoDB: ibuf cursor restoration fails!\n"
4022
"InnoDB: ibuf record inserted to page %lu:%lu\n",
4023
(ulong) space, (ulong) page_no);
4026
rec_print_old(stderr, btr_pcur_get_rec(pcur));
4027
rec_print_old(stderr, pcur->old_rec);
4028
dtuple_print(stderr, search_tuple);
4030
rec_print_old(stderr,
4031
page_rec_get_next(btr_pcur_get_rec(pcur)));
4034
btr_pcur_commit_specify_mtr(pcur, mtr);
4036
fputs("InnoDB: Validating insert buffer tree:\n", stderr);
4037
if (!btr_validate_index(ibuf->index, NULL)) {
4041
fprintf(stderr, "InnoDB: ibuf tree ok\n");
3006
4048
/*********************************************************************//**
3007
4049
Deletes from ibuf the record on which pcur is positioned. If we have to
3008
4050
resort to a pessimistic delete, this function commits mtr and closes
3058
4100
mtr_start(mtr);
3060
success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
3063
if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
3064
/* The tablespace has been dropped. It is possible
3065
that another thread has deleted the insert buffer
3066
entry. Do not complain. */
3067
goto commit_and_exit;
3071
"InnoDB: ERROR: Submit the output to"
3072
" http://bugs.mysql.com\n"
3073
"InnoDB: ibuf cursor restoration fails!\n"
3074
"InnoDB: ibuf record inserted to page %lu\n",
3078
rec_print_old(stderr, btr_pcur_get_rec(pcur));
3079
rec_print_old(stderr, pcur->old_rec);
3080
dtuple_print(stderr, search_tuple);
3082
rec_print_old(stderr,
3083
page_rec_get_next(btr_pcur_get_rec(pcur)));
3086
btr_pcur_commit_specify_mtr(pcur, mtr);
3088
fputs("InnoDB: Validating insert buffer tree:\n", stderr);
3089
if (!btr_validate_index(ibuf->index, NULL)) {
3093
fprintf(stderr, "InnoDB: ibuf tree ok\n");
4102
if (!ibuf_restore_pos(space, page_no, search_tuple,
4103
BTR_MODIFY_TREE, pcur, mtr)) {
3096
4105
goto func_exit;
3352
4361
fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
3353
4362
} else if (block) {
3354
4363
/* Now we have at pcur a record which should be
3355
inserted to the index page; NOTE that the call below
4364
applied on the index page; NOTE that the call below
3356
4365
copies pointers to fields in rec, and we must
3357
4366
keep the latch to the rec page until the
3358
4367
insertion is finished! */
3359
4368
dtuple_t* entry;
3360
4369
trx_id_t max_trx_id;
3361
4370
dict_index_t* dummy_index;
4371
ibuf_op_t op = ibuf_rec_get_op_type(rec);
3363
4373
max_trx_id = page_get_max_trx_id(page_align(rec));
3364
4374
page_update_max_trx_id(block, page_zip, max_trx_id,
4377
ut_ad(page_validate(page_align(rec), ibuf->index));
3367
4379
entry = ibuf_build_entry_from_ibuf_rec(
3368
4380
rec, heap, &dummy_index);
4382
ut_ad(page_validate(block->frame, dummy_index));
4386
case IBUF_OP_INSERT:
3369
4387
#ifdef UNIV_IBUF_DEBUG
3370
volume += rec_get_converted_size(dummy_index, entry, 0)
3371
+ page_dir_calc_reserved_space(1);
3372
ut_a(volume <= 4 * UNIV_PAGE_SIZE
3373
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
4388
volume += rec_get_converted_size(
4389
dummy_index, entry, 0);
4391
volume += page_dir_calc_reserved_space(1);
4393
ut_a(volume <= 4 * UNIV_PAGE_SIZE
4394
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
3375
ibuf_insert_to_index_page(entry, block,
4396
ibuf_insert_to_index_page(
4397
entry, block, dummy_index, &mtr);
4400
case IBUF_OP_DELETE_MARK:
4402
entry, block, dummy_index, &mtr);
4405
case IBUF_OP_DELETE:
4406
ibuf_delete(entry, block, dummy_index, &mtr);
4407
/* Because ibuf_delete() will latch an
4408
insert buffer bitmap page, commit mtr
4409
before latching any further pages.
4410
Store and restore the cursor position. */
4411
ut_ad(rec == btr_pcur_get_rec(&pcur));
4412
ut_ad(page_rec_is_user_rec(rec));
4413
ut_ad(ibuf_rec_get_page_no(rec) == page_no);
4414
ut_ad(ibuf_rec_get_space(rec) == space);
4416
btr_pcur_store_position(&pcur, &mtr);
4417
btr_pcur_commit_specify_mtr(&pcur, &mtr);
4421
success = buf_page_get_known_nowait(
4424
__FILE__, __LINE__, &mtr);
4427
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
4429
if (!ibuf_restore_pos(space, page_no,
4436
ibuf_dummy_index_free(dummy_index);
3377
4447
ibuf_dummy_index_free(dummy_index);
4449
dops[ibuf_rec_get_op_type(rec)]++;
3382
4452
/* Delete the record from ibuf */
3383
4453
if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,