797
ibuf_bitmap_get_map_page_func(
798
/*==========================*/
799
ulint space, /*!< in: space id of the file page */
800
ulint page_no,/*!< in: page number of the file page */
801
ulint zip_size,/*!< in: compressed page size in bytes;
802
0 for uncompressed pages */
803
const char* file, /*!< in: file name */
804
ulint line, /*!< in: line where called */
805
mtr_t* mtr) /*!< in: mtr */
733
ibuf_bitmap_get_map_page(
734
/*=====================*/
735
ulint space, /*!< in: space id of the file page */
736
ulint page_no,/*!< in: page number of the file page */
737
ulint zip_size,/*!< in: compressed page size in bytes;
738
0 for uncompressed pages */
739
mtr_t* mtr) /*!< in: mtr */
807
741
buf_block_t* block;
809
block = buf_page_get_gen(space, zip_size,
810
ibuf_bitmap_page_no_calc(zip_size, page_no),
811
RW_X_LATCH, NULL, BUF_GET,
743
block = buf_page_get(space, zip_size,
744
ibuf_bitmap_page_no_calc(zip_size, page_no),
813
746
buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
815
748
return(buf_block_get_frame(block));
818
/********************************************************************//**
819
Gets the ibuf bitmap page where the bits describing a given file page are
821
@return bitmap page where the file page is mapped, that is, the bitmap
822
page containing the descriptor bits for the file page; the bitmap page
824
@param space in: space id of the file page
825
@param page_no in: page number of the file page
826
@param zip_size in: compressed page size in bytes; 0 for uncompressed pages
827
@param mtr in: mini-transaction */
828
#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \
829
ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \
830
__FILE__, __LINE__, mtr)
832
751
/************************************************************************//**
833
752
Sets the free bits of the page in the ibuf bitmap. This is done in a separate
834
753
mini-transaction, hence this operation does not restrict further work to only
1217
/****************************************************************//**
1218
Get various information about an ibuf record in >= 4.1.x format. */
1223
const rec_t* rec, /*!< in: ibuf record */
1224
ibuf_op_t* op, /*!< out: operation type, or NULL */
1225
ibool* comp, /*!< out: compact flag, or NULL */
1226
ulint* info_len, /*!< out: length of info fields at the
1227
start of the fourth field, or
1229
ulint* counter) /*!< in: counter value, or NULL */
1235
/* Local variables to shadow arguments. */
1238
ulint info_len_local;
1239
ulint counter_local;
1241
ut_ad(ibuf_inside());
1242
fields = rec_get_n_fields_old(rec);
1245
types = rec_get_nth_field_old(rec, 3, &len);
1247
info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1249
switch (info_len_local) {
1252
op_local = IBUF_OP_INSERT;
1253
comp_local = info_len_local;
1255
counter_local = ULINT_UNDEFINED;
1258
case IBUF_REC_INFO_SIZE:
1259
op_local = (ibuf_op_t)types[IBUF_REC_OFFSET_TYPE];
1260
comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT;
1261
counter_local = mach_read_from_2(
1262
types + IBUF_REC_OFFSET_COUNTER);
1269
ut_a(op_local < IBUF_OP_COUNT);
1270
ut_a((len - info_len_local) ==
1271
(fields - 4) * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1282
*info_len = info_len_local;
1286
*counter = counter_local;
1290
/****************************************************************//**
1291
Returns the operation type field of an ibuf record.
1292
@return operation type */
1295
ibuf_rec_get_op_type(
1296
/*=================*/
1297
const rec_t* rec) /*!< in: ibuf record */
1301
ut_ad(ibuf_inside());
1302
ut_ad(rec_get_n_fields_old(rec) > 2);
1304
(void) rec_get_nth_field_old(rec, 1, &len);
1307
/* This is a < 4.1.x format record */
1309
return(IBUF_OP_INSERT);
1313
ibuf_rec_get_info(rec, &op, NULL, NULL, NULL);
1319
/****************************************************************//**
1320
Read the first two bytes from a record's fourth field (counter field in new
1321
records; something else in older records).
1322
@return "counter" field, or ULINT_UNDEFINED if for some reason it
1326
ibuf_rec_get_counter(
1327
/*=================*/
1328
const rec_t* rec) /*!< in: ibuf record */
1333
if (rec_get_n_fields_old(rec) < 4) {
1335
return(ULINT_UNDEFINED);
1338
ptr = rec_get_nth_field_old(rec, 3, &len);
1342
return(mach_read_from_2(ptr));
1345
return(ULINT_UNDEFINED);
1349
/****************************************************************//**
1350
Add accumulated operation counts to a permanent array. Both arrays must be
1351
of size IBUF_OP_COUNT. */
1356
ulint* arr, /*!< in/out: array to modify */
1357
const ulint* ops) /*!< in: operation counts */
1360
#ifndef HAVE_ATOMIC_BUILTINS
1361
ut_ad(mutex_own(&ibuf_mutex));
1362
#endif /* !HAVE_ATOMIC_BUILTINS */
1366
for (i = 0; i < IBUF_OP_COUNT; i++) {
1367
#ifdef HAVE_ATOMIC_BUILTINS
1368
os_atomic_increment_ulint(&arr[i], ops[i]);
1369
#else /* HAVE_ATOMIC_BUILTINS */
1371
#endif /* HAVE_ATOMIC_BUILTINS */
1375
/****************************************************************//**
1376
Print operation counts. The array must be of size IBUF_OP_COUNT. */
1381
const ulint* ops, /*!< in: operation counts */
1382
FILE* file) /*!< in: file where to print */
1384
static const char* op_names[] = {
1391
ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT);
1393
for (i = 0; i < IBUF_OP_COUNT; i++) {
1394
fprintf(file, "%s %lu%s", op_names[i],
1395
(ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
1401
1136
/********************************************************************//**
1402
1137
Creates a dummy index for inserting a record to a non-clustered index.
1403
1139
@return dummy index */
1603
/******************************************************************//**
1605
@return size of fields */
1610
const rec_t* rec, /*!< in: ibuf record */
1611
const byte* types, /*!< in: fields */
1612
ulint n_fields, /*!< in: number of fields */
1613
ibool pre_4_1, /*!< in: TRUE=pre-4.1 format,
1615
ulint comp) /*!< in: 0=ROW_FORMAT=REDUNDANT,
1616
nonzero=ROW_FORMAT=COMPACT */
1625
types_offset = DATA_ORDER_NULL_TYPE_BUF_SIZE;
1628
types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1631
for (i = 0; i < n_fields; i++) {
1635
rec_get_nth_field_offs_old(rec, i + field_offset, &len);
1637
if (len != UNIV_SQL_NULL) {
1639
} else if (pre_4_1) {
1640
dtype_read_for_order_and_null_size(&dtype, types);
1642
size += dtype_get_sql_null_size(&dtype, comp);
1644
dtype_new_read_for_order_and_null_size(&dtype, types);
1646
size += dtype_get_sql_null_size(&dtype, comp);
1649
types += types_offset;
1655
1333
/********************************************************************//**
1656
1334
Returns the space taken by a stored non-clustered index entry if converted to
1657
1335
an index record.
1693
1372
/* >= 4.1.x format record */
1697
1374
ut_a(trx_sys_multiple_tablespace_format);
1698
1375
ut_a(*data == 0);
1700
1377
types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1702
ibuf_rec_get_info(ibuf_rec, &op, &comp, &info_len, NULL);
1704
if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
1705
/* Delete-marking a record doesn't take any
1706
additional space, and while deleting a record
1707
actually frees up space, we have to play it safe and
1708
pretend it takes no additional space (the record
1709
might not exist, etc.). */
1379
comp = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1383
/* compact record format */
1715
1385
dict_index_t* dummy_index;
1716
1386
mem_heap_t* heap = mem_heap_create(500);
1718
entry = ibuf_build_entry_from_ibuf_rec(
1387
dtuple_t* entry = ibuf_build_entry_from_ibuf_rec(
1719
1388
ibuf_rec, heap, &dummy_index);
1721
1389
volume = rec_get_converted_size(dummy_index, entry, 0);
1723
1390
ibuf_dummy_index_free(dummy_index);
1724
1391
mem_heap_free(heap);
1726
1392
return(volume + page_dir_calc_reserved_space(1));
1730
1395
n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1733
data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, pre_4_1, comp);
1400
for (i = 0; i < n_fields; i++) {
1402
data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1404
dtype_new_read_for_order_and_null_size(
1406
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1408
data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1410
dtype_read_for_order_and_null_size(
1412
* DATA_ORDER_NULL_TYPE_BUF_SIZE);
1415
if (len == UNIV_SQL_NULL) {
1416
data_size += dtype_get_sql_null_size(&dtype, comp);
1735
1422
return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
1736
1423
+ page_dir_calc_reserved_space(1));
1763
1447
const dfield_t* entry_field;
1764
1448
ulint n_fields;
1770
ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT);
1771
ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF);
1772
ut_ad(op < IBUF_OP_COUNT);
1774
/* We have to build a tuple with the following fields:
1776
1-4) These are described at the top of this file.
1778
5) The rest of the fields are copied from the entry.
1780
All fields in the tuple are ordered like the type binary in our
1781
insert buffer tree. */
1453
/* Starting from 4.1.x, we have to build a tuple whose
1454
(1) first field is the space id,
1455
(2) the second field a single marker byte (0) to tell that this
1456
is a new format record,
1457
(3) the third contains the page number, and
1458
(4) the fourth contains the relevent type information of each data
1459
field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is
1460
(a) 0 for b-trees in the old format, and
1461
(b) 1 for b-trees in the compact format, the first byte of the field
1462
being the marker (0);
1463
(5) and the rest of the fields are copied from entry. All fields
1464
in the tuple are ordered like the type binary in our insert buffer
1783
1467
n_fields = dtuple_get_n_fields(entry);
1785
1469
tuple = dtuple_create(heap, n_fields + 4);
1471
/* Store the space id in tuple */
1789
1473
field = dtuple_get_nth_field(tuple, 0);
1817
1501
dfield_set_data(field, buf, 4);
1819
/* 4) Type info, part #1 */
1821
if (counter == ULINT_UNDEFINED) {
1822
i = dict_table_is_comp(index->table) ? 1 : 0;
1824
ut_ad(counter <= 0xFFFF);
1825
i = IBUF_REC_INFO_SIZE;
1828
ti = type_info = mem_heap_alloc(heap, i + n_fields
1829
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1836
/* set the flag for ROW_FORMAT=COMPACT */
1840
/* the old format does not allow delete buffering */
1841
ut_ad(op == IBUF_OP_INSERT);
1843
case IBUF_REC_INFO_SIZE:
1844
mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter);
1846
ti[IBUF_REC_OFFSET_TYPE] = (byte) op;
1847
ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table)
1848
? IBUF_REC_COMPACT : 0;
1849
ti += IBUF_REC_INFO_SIZE;
1853
/* 5+) Fields from the entry */
1503
/* Store the type info in buf2, and add the fields from entry to
1505
buf2 = mem_heap_alloc(heap, n_fields
1506
* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
1507
+ dict_table_is_comp(index->table));
1508
if (dict_table_is_comp(index->table)) {
1509
*buf2++ = 0; /* write the compact format indicator */
1855
1511
for (i = 0; i < n_fields; i++) {
1856
1512
ulint fixed_len;
1857
1513
const dict_field_t* ifield;
2634
2281
ulint sum_sizes;
2638
/* Perform dirty reads of ibuf->size and ibuf->max_size, to
2639
reduce ibuf_mutex contention. ibuf->max_size remains constant
2640
after ibuf_init_at_db_start(), but ibuf->size should be
2641
protected by ibuf_mutex. Given that ibuf->size fits in a
2642
machine word, this should be OK; at worst we are doing some
2643
excessive ibuf_contract() or occasionally skipping a
2646
max_size = ibuf->max_size;
2648
if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
2284
mutex_enter(&ibuf_mutex);
2286
if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
2287
mutex_exit(&ibuf_mutex);
2652
sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
2294
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
2299
mutex_exit(&ibuf_mutex);
2654
2301
/* Contract at least entry_size many bytes */
2305
while ((size > 0) && (sum_sizes < entry_size)) {
2660
2307
size = ibuf_contract(sync);
2661
2308
sum_sizes += size;
2662
} while (size > 0 && sum_sizes < entry_size);
2665
/*********************************************************************//**
2666
Determine if an insert buffer record has been encountered already.
2667
@return TRUE if a new record, FALSE if possible duplicate */
2670
ibuf_get_volume_buffered_hash(
2671
/*==========================*/
2672
const rec_t* rec, /*!< in: ibuf record in post-4.1 format */
2673
const byte* types, /*!< in: fields */
2674
const byte* data, /*!< in: start of user record data */
2675
ulint comp, /*!< in: 0=ROW_FORMAT=REDUNDANT,
2676
nonzero=ROW_FORMAT=COMPACT */
2677
ulint* hash, /*!< in/out: hash array */
2678
ulint size) /*!< in: number of elements in hash array */
2684
len = ibuf_rec_get_size(rec, types, rec_get_n_fields_old(rec) - 4,
2686
fold = ut_fold_binary(data, len);
2688
hash += (fold / (8 * sizeof *hash)) % size; // 8 = bits in byte
2689
bitmask = 1 << (fold % (8 * sizeof *hash));
2691
if (*hash & bitmask) {
2696
/* We have not seen this record yet. Insert it. */
2702
/*********************************************************************//**
2703
Update the estimate of the number of records on a page, and
2704
get the space taken by merging the buffered record to the index page.
2705
@return size of index record in bytes + an upper limit of the space
2706
taken in the page directory */
2709
ibuf_get_volume_buffered_count(
2710
/*===========================*/
2711
const rec_t* rec, /*!< in: insert buffer record */
2712
ulint* hash, /*!< in/out: hash array */
2713
ulint size, /*!< in: number of elements in hash array */
2714
lint* n_recs) /*!< in/out: estimated number of records
2715
on the page that rec points to */
2720
ulint n_fields = rec_get_n_fields_old(rec);
2722
ut_ad(ibuf_inside());
2723
ut_ad(n_fields > 4);
2726
rec_get_nth_field_offs_old(rec, 1, &len);
2727
/* This function is only invoked when buffering new
2728
operations. All pre-4.1 records should have been merged
2729
when the database was started up. */
2731
ut_ad(trx_sys_multiple_tablespace_format);
2733
types = rec_get_nth_field_old(rec, 3, &len);
2735
switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
2736
IBUF_REC_INFO_SIZE)) {
2740
/* This ROW_TYPE=REDUNDANT record does not include an
2741
operation counter. Exclude it from the *n_recs,
2742
because deletes cannot be buffered if there are
2743
old-style inserts buffered for the page. */
2745
len = ibuf_rec_get_size(rec, types, n_fields, FALSE, 0);
2748
+ rec_get_converted_extra_size(len, n_fields, 0)
2749
+ page_dir_calc_reserved_space(1));
2751
/* This ROW_TYPE=COMPACT record does not include an
2752
operation counter. Exclude it from the *n_recs,
2753
because deletes cannot be buffered if there are
2754
old-style inserts buffered for the page. */
2755
goto get_volume_comp;
2757
case IBUF_REC_INFO_SIZE:
2758
ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
2763
case IBUF_OP_INSERT:
2764
/* Inserts can be done by
2765
btr_cur_set_deleted_flag_for_ibuf(). Because
2766
delete-mark and insert operations can be pointing to
2767
the same records, we must not count duplicates. */
2768
case IBUF_OP_DELETE_MARK:
2769
/* There must be a record to delete-mark.
2770
See if this record has been already buffered. */
2771
if (n_recs && ibuf_get_volume_buffered_hash(
2772
rec, types + IBUF_REC_INFO_SIZE,
2774
types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT,
2779
if (ibuf_op == IBUF_OP_DELETE_MARK) {
2780
/* Setting the delete-mark flag does not
2781
affect the available space on the page. */
2785
case IBUF_OP_DELETE:
2786
/* A record will be removed from the page. */
2790
/* While deleting a record actually frees up space,
2791
we have to play it safe and pretend that it takes no
2792
additional space (the record might not exist, etc.). */
2798
ut_ad(ibuf_op == IBUF_OP_INSERT);
2804
dict_index_t* dummy_index;
2805
mem_heap_t* heap = mem_heap_create(500);
2807
entry = ibuf_build_entry_from_ibuf_rec(
2808
rec, heap, &dummy_index);
2810
volume = rec_get_converted_size(dummy_index, entry, 0);
2812
ibuf_dummy_index_free(dummy_index);
2813
mem_heap_free(heap);
2815
return(volume + page_dir_calc_reserved_space(1));
3067
2540
fil_set_max_space_id_if_bigger(max_space_id);
3070
/****************************************************************//**
3071
Helper function for ibuf_set_entry_counter. Checks if rec is for (space,
3072
page_no), and if so, reads counter value from it and returns that + 1.
3073
Otherwise, returns 0.
3074
@return new counter value, or 0 */
3077
ibuf_get_entry_counter_low(
3078
/*=======================*/
3079
const rec_t* rec, /*!< in: insert buffer record */
3080
ulint space, /*!< in: space id */
3081
ulint page_no) /*!< in: page number */
3087
ut_ad(ibuf_inside());
3088
ut_ad(rec_get_n_fields_old(rec) > 2);
3090
field = rec_get_nth_field_old(rec, 1, &len);
3092
if (UNIV_UNLIKELY(len != 1)) {
3093
/* pre-4.1 format */
3094
ut_a(trx_doublewrite_must_reset_space_ids);
3095
ut_a(!trx_sys_multiple_tablespace_format);
3097
return(ULINT_UNDEFINED);
3100
ut_a(trx_sys_multiple_tablespace_format);
3102
/* Check the tablespace identifier. */
3103
field = rec_get_nth_field_old(rec, 0, &len);
3106
if (mach_read_from_4(field) != space) {
3111
/* Check the page offset. */
3112
field = rec_get_nth_field_old(rec, 2, &len);
3115
if (mach_read_from_4(field) != page_no) {
3120
/* Check if the record contains a counter field. */
3121
field = rec_get_nth_field_old(rec, 3, &len);
3123
switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
3126
case 0: /* ROW_FORMAT=REDUNDANT */
3127
case 1: /* ROW_FORMAT=COMPACT */
3128
return(ULINT_UNDEFINED);
3130
case IBUF_REC_INFO_SIZE:
3131
counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER);
3132
ut_a(counter < 0xFFFF);
3133
return(counter + 1);
3137
/****************************************************************//**
3138
Set the counter field in entry to the correct value based on the current
3139
last record in ibuf for (space, page_no).
3140
@return FALSE if we should abort this insertion to ibuf */
3143
ibuf_set_entry_counter(
3144
/*===================*/
3145
dtuple_t* entry, /*!< in/out: entry to patch */
3146
ulint space, /*!< in: space id of entry */
3147
ulint page_no, /*!< in: page number of entry */
3148
btr_pcur_t* pcur, /*!< in: pcur positioned on the record
3149
found by btr_pcur_open(.., entry,
3150
PAGE_CUR_LE, ..., pcur, ...) */
3151
ibool is_optimistic, /*!< in: is this an optimistic insert */
3152
mtr_t* mtr) /*!< in: mtr */
3158
/* pcur points to either a user rec or to a page's infimum record. */
3159
ut_ad(page_validate(btr_pcur_get_page(pcur), ibuf->index));
3161
if (btr_pcur_is_on_user_rec(pcur)) {
3163
counter = ibuf_get_entry_counter_low(
3164
btr_pcur_get_rec(pcur), space, page_no);
3166
if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
3167
/* The record lacks a counter field.
3168
Such old records must be merged before
3169
new records can be buffered. */
3173
} else if (btr_pcur_is_before_first_in_tree(pcur, mtr)) {
3174
/* Ibuf tree is either completely empty, or the insert
3175
position is at the very first record of a non-empty tree. In
3176
either case we have no previous records for (space,
3180
} else if (btr_pcur_is_before_first_on_page(pcur)) {
3181
btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
3183
if (cursor->low_match < 3) {
3184
/* If low_match < 3, we know that the father node
3185
pointer did not contain the searched for (space,
3186
page_no), which means that the search ended on the
3187
right page regardless of the counter value, and
3188
since we're at the infimum record, there are no
3189
existing records. */
3199
ut_a(cursor->ibuf_cnt != ULINT_UNDEFINED);
3201
page = btr_pcur_get_page(pcur);
3202
prev_page_no = btr_page_get_prev(page, mtr);
3204
ut_a(prev_page_no != FIL_NULL);
3206
block = buf_page_get(
3207
IBUF_SPACE_ID, 0, prev_page_no,
3210
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
3212
prev_page = buf_block_get_frame(block);
3214
rec = page_rec_get_prev(
3215
page_get_supremum_rec(prev_page));
3217
ut_ad(page_rec_is_user_rec(rec));
3219
counter = ibuf_get_entry_counter_low(
3220
rec, space, page_no);
3222
if (UNIV_UNLIKELY(counter == ULINT_UNDEFINED)) {
3223
/* The record lacks a counter field.
3224
Such old records must be merged before
3225
new records can be buffered. */
3230
if (counter < cursor->ibuf_cnt) {
3231
/* Search ended on the wrong page. */
3233
if (is_optimistic) {
3234
/* In an optimistic insert, we can
3235
shift the insert position to the left
3236
page, since it only needs an X-latch
3237
on the page itself, which the
3238
original search acquired for us. */
3241
ibuf->index, rec, block,
3242
btr_pcur_get_btr_cur(pcur));
3244
/* We can't shift the insert
3245
position to the left page in a
3246
pessimistic insert since it would
3247
require an X-latch on the left
3248
page's left page, so we have to
3254
/* The counter field in the father node is
3255
the same as we would insert; we don't know
3256
whether the insert should go to this page or
3257
the left page (the later fields can differ),
3258
so refuse the insert. */
3264
/* The cursor is not positioned at or before a user record. */
3268
/* Patch counter value in already built entry. */
3269
field = dtuple_get_nth_field(entry, 3);
3270
data = dfield_get_data(field);
3272
mach_write_to_2(data + IBUF_REC_OFFSET_COUNTER, counter);
3277
2543
/*********************************************************************//**
3278
Buffer an operation in the insert/delete buffer, instead of doing it
3279
directly to the disk page, if this is possible.
3280
@return DB_SUCCESS, DB_STRONG_FAIL or other error */
2544
Makes an index insert to the insert buffer, instead of directly to the disk
2545
page, if this is possible.
2546
@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
3283
2549
ibuf_insert_low(
3284
2550
/*============*/
3285
2551
ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
3286
ibuf_op_t op, /*!< in: operation type */
3288
/*!< in: TRUE=use 5.0.3 format;
3289
FALSE=allow delete buffering */
3290
2552
const dtuple_t* entry, /*!< in: index entry to insert */
3291
2553
ulint entry_size,
3292
2554
/*!< in: rec_get_converted_size(index, entry) */
3349
2604
return(DB_STRONG_FAIL);
2607
mutex_exit(&ibuf_mutex);
2609
if (mode == BTR_MODIFY_TREE) {
2610
mutex_enter(&ibuf_pessimistic_insert_mutex);
2614
mutex_enter(&ibuf_mutex);
2616
while (!ibuf_data_enough_free_for_insert()) {
2618
mutex_exit(&ibuf_mutex);
2622
mutex_exit(&ibuf_pessimistic_insert_mutex);
2624
err = ibuf_add_free_page();
2626
if (err == DB_STRONG_FAIL) {
2631
mutex_enter(&ibuf_pessimistic_insert_mutex);
2635
mutex_enter(&ibuf_mutex);
3352
2641
heap = mem_heap_create(512);
3354
/* Build the entry which contains the space id and the page number
3355
as the first fields and the type information for other fields, and
3356
which will be inserted to the insert buffer. Using a counter value
3357
of 0xFFFF we find the last record for (space, page_no), from which
3358
we can then read the counter value N and use N + 1 in the record we
3359
insert. (We patch the ibuf_entry's counter field to the correct
3360
value just before actually inserting the entry.) */
2643
/* Build the entry which contains the space id and the page number as
2644
the first fields and the type information for other fields, and which
2645
will be inserted to the insert buffer. */
3362
ibuf_entry = ibuf_entry_build(
3363
op, index, entry, space, page_no,
3364
no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
2647
ibuf_entry = ibuf_entry_build(index, entry, space, page_no, heap);
3366
2649
/* Open a cursor to the insert buffer tree to calculate if we can add
3367
2650
the new entry to it without exceeding the free space limit for the
3370
if (mode == BTR_MODIFY_TREE) {
3373
mutex_enter(&ibuf_pessimistic_insert_mutex);
3374
mutex_enter(&ibuf_mutex);
3376
if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
3381
mutex_exit(&ibuf_mutex);
3382
mutex_exit(&ibuf_pessimistic_insert_mutex);
3385
if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
3387
mem_heap_free(heap);
3388
return(DB_STRONG_FAIL);
3395
2653
mtr_start(&mtr);
3397
2655
btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
3398
ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
3400
2657
/* Find out the volume of already buffered inserts for the same index
3403
buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
3404
op == IBUF_OP_DELETE
3408
if (op == IBUF_OP_DELETE
3410
|| buf_pool_watch_occurred(space, page_no))) {
3411
/* The page could become empty after the record is
3412
deleted, or the page has been read in to the buffer
3413
pool. Refuse to buffer the operation. */
3415
/* The buffer pool watch is needed for IBUF_OP_DELETE
3416
because of latching order considerations. We can
3417
check buf_pool_watch_occurred() only after latching
3418
the insert buffer B-tree pages that contain buffered
3419
changes for the page. We never buffer IBUF_OP_DELETE,
3420
unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have
3421
been previously buffered for the page. Because there
3422
are buffered operations for the page, the insert
3423
buffer B-tree page latches held by mtr will guarantee
3424
that no changes for the user page will be merged
3425
before mtr_commit(&mtr). We must not mtr_commit(&mtr)
3426
until after the IBUF_OP_DELETE has been buffered. */
3429
if (mode == BTR_MODIFY_TREE) {
3430
mutex_exit(&ibuf_mutex);
3431
mutex_exit(&ibuf_pessimistic_insert_mutex);
3434
err = DB_STRONG_FAIL;
3438
/* After this point, the page could still be loaded to the
3439
buffer pool, but we do not have to care about it, since we are
3440
holding a latch on the insert buffer leaf page that contains
3441
buffered changes for (space, page_no). If the page enters the
3442
buffer pool, buf_page_io_complete() for (space, page_no) will
3443
have to acquire a latch on the same insert buffer leaf page,
3444
which it cannot do until we have buffered the IBUF_OP_DELETE
3445
and done mtr_commit(&mtr) to release the latch. */
2659
buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
3447
2661
#ifdef UNIV_IBUF_COUNT_DEBUG
3448
2662
ut_a((buffered == 0) || ibuf_count_get(space, page_no));
3457
2671
if (buf_page_peek(space, page_no)
3458
2672
|| lock_rec_expl_exist_on_page(space, page_no)) {
3463
if (op == IBUF_OP_INSERT) {
3464
ulint bits = ibuf_bitmap_page_get_bits(
3465
bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE,
3468
if (buffered + entry_size + page_dir_calc_reserved_space(1)
3469
> ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
3470
/* Release the bitmap page latch early. */
3471
mtr_commit(&bitmap_mtr);
3473
/* It may not fit */
3476
ibuf_get_merge_page_nos(
3477
FALSE, btr_pcur_get_rec(&pcur),
3478
space_ids, space_versions,
3479
page_nos, &n_stored);
3485
/* Patch correct counter value to the entry to insert. This can
3486
change the insert position, which can result in the need to abort in
3489
&& !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur,
3490
mode == BTR_MODIFY_PREV, &mtr)) {
3492
mtr_commit(&bitmap_mtr);
2673
err = DB_STRONG_FAIL;
2675
mtr_commit(&bitmap_mtr);
2680
bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
2681
IBUF_BITMAP_FREE, &bitmap_mtr);
2683
if (buffered + entry_size + page_dir_calc_reserved_space(1)
2684
> ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
2685
mtr_commit(&bitmap_mtr);
2687
/* It may not fit */
2688
err = DB_STRONG_FAIL;
2692
ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
2693
space_ids, space_versions,
2694
page_nos, &n_stored);
3497
2698
/* Set the bitmap bit denoting that the insert buffer contains
3624
2819
ut_a(!dict_index_is_clust(index));
3626
no_counter = use <= IBUF_USE_INSERT;
3629
case IBUF_OP_INSERT:
3632
case IBUF_USE_DELETE:
3633
case IBUF_USE_DELETE_MARK:
3635
case IBUF_USE_INSERT:
3636
case IBUF_USE_INSERT_DELETE_MARK:
3639
case IBUF_USE_COUNT:
3643
case IBUF_OP_DELETE_MARK:
3646
case IBUF_USE_INSERT:
3648
case IBUF_USE_DELETE_MARK:
3649
case IBUF_USE_DELETE:
3650
case IBUF_USE_INSERT_DELETE_MARK:
3654
case IBUF_USE_COUNT:
3658
case IBUF_OP_DELETE:
3661
case IBUF_USE_INSERT:
3662
case IBUF_USE_INSERT_DELETE_MARK:
3664
case IBUF_USE_DELETE_MARK:
3665
case IBUF_USE_DELETE:
3669
case IBUF_USE_COUNT:
3677
/* unknown op or use */
3681
/* If a thread attempts to buffer an insert on a page while a
3682
purge is in progress on the same page, the purge must not be
3683
buffered, because it could remove a record that was
3684
re-inserted later. For simplicity, we block the buffering of
3685
all operations on a page that has a purge pending.
3687
We do not check this in the IBUF_OP_DELETE case, because that
3688
would always trigger the buffer pool watch during purge and
3689
thus prevent the buffering of delete operations. We assume
3690
that the issuer of IBUF_OP_DELETE has called
3691
buf_pool_watch_set(space, page_no). */
3695
ulint fold = buf_page_address_fold(space, page_no);
3696
buf_pool_t* buf_pool = buf_pool_get(space, page_no);
3698
buf_pool_mutex_enter(buf_pool);
3699
bpage = buf_page_hash_get_low(buf_pool, space, page_no, fold);
3700
buf_pool_mutex_exit(buf_pool);
3702
if (UNIV_LIKELY_NULL(bpage)) {
3703
/* A buffer pool watch has been set or the
3704
page has been read into the buffer pool.
3705
Do not buffer the request. If a purge operation
3706
is being buffered, have this request executed
3707
directly on the page in the buffer pool after the
3708
buffered entries for this page have been merged. */
2821
switch (UNIV_EXPECT(ibuf_use, IBUF_USE_INSERT)) {
2824
case IBUF_USE_INSERT:
2826
case IBUF_USE_COUNT:
2830
ut_error; /* unknown value of ibuf_use */
3714
2833
entry_size = rec_get_converted_size(index, entry, 0);
3717
>= page_get_free_space_of_empty(dict_table_is_comp(index->table))
2836
>= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
3723
err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
2841
err = ibuf_insert_low(BTR_MODIFY_PREV, entry, entry_size,
3725
2842
index, space, zip_size, page_no, thr);
3726
2843
if (err == DB_FAIL) {
3727
err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
2844
err = ibuf_insert_low(BTR_MODIFY_TREE, entry, entry_size,
3729
2845
index, space, zip_size, page_no, thr);
3881
/****************************************************************//**
3882
During merge, sets the delete mark on a record for a secondary index
3888
const dtuple_t* entry, /*!< in: entry */
3889
buf_block_t* block, /*!< in/out: block */
3890
const dict_index_t* index, /*!< in: record descriptor */
3891
mtr_t* mtr) /*!< in: mtr */
3893
page_cur_t page_cur;
3896
ut_ad(ibuf_inside());
3897
ut_ad(dtuple_check_typed(entry));
3899
low_match = page_cur_search(
3900
block, index, entry, PAGE_CUR_LE, &page_cur);
3902
if (low_match == dtuple_get_n_fields(entry)) {
3904
page_zip_des_t* page_zip;
3906
rec = page_cur_get_rec(&page_cur);
3907
page_zip = page_cur_get_page_zip(&page_cur);
3909
btr_cur_set_deleted_flag_for_ibuf(rec, page_zip, TRUE, mtr);
3911
/* This can happen benignly in some situations. */
3915
/****************************************************************//**
3916
During merge, delete a record for a secondary index entry. */
3921
const dtuple_t* entry, /*!< in: entry */
3922
buf_block_t* block, /*!< in/out: block */
3923
dict_index_t* index, /*!< in: record descriptor */
3924
mtr_t* mtr) /*!< in/out: mtr; must be committed
3925
before latching any further pages */
3927
page_cur_t page_cur;
3930
ut_ad(ibuf_inside());
3931
ut_ad(dtuple_check_typed(entry));
3933
low_match = page_cur_search(
3934
block, index, entry, PAGE_CUR_LE, &page_cur);
3936
if (low_match == dtuple_get_n_fields(entry)) {
3937
page_zip_des_t* page_zip= buf_block_get_page_zip(block);
3938
page_t* page = buf_block_get_frame(block);
3939
rec_t* rec = page_cur_get_rec(&page_cur);
3941
/* TODO: the below should probably be a separate function,
3942
it's a bastardized version of btr_cur_optimistic_delete. */
3944
ulint offsets_[REC_OFFS_NORMAL_SIZE];
3945
ulint* offsets = offsets_;
3946
mem_heap_t* heap = NULL;
3949
rec_offs_init(offsets_);
3951
offsets = rec_get_offsets(
3952
rec, index, offsets, ULINT_UNDEFINED, &heap);
3954
/* Refuse to delete the last record. */
3955
ut_a(page_get_n_recs(page) > 1);
3957
/* The record should have been marked for deletion. */
3958
ut_ad(REC_INFO_DELETED_FLAG
3959
& rec_get_info_bits(rec, page_is_comp(page)));
3961
lock_update_delete(block, rec);
3965
= page_get_max_insert_size_after_reorganize(
3968
#ifdef UNIV_ZIP_DEBUG
3969
ut_a(!page_zip || page_zip_validate(page_zip, page));
3970
#endif /* UNIV_ZIP_DEBUG */
3971
page_cur_delete_rec(&page_cur, index, offsets, mtr);
3972
#ifdef UNIV_ZIP_DEBUG
3973
ut_a(!page_zip || page_zip_validate(page_zip, page));
3974
#endif /* UNIV_ZIP_DEBUG */
3977
ibuf_update_free_bits_zip(block, mtr);
3979
ibuf_update_free_bits_low(block, max_ins_size, mtr);
3982
if (UNIV_LIKELY_NULL(heap)) {
3983
mem_heap_free(heap);
3986
/* This can happen benignly in some situations: either when
3987
we crashed at just the right time, or on database startup
3988
when we redo some old log entries (due to worse stored
3989
position granularity on disk than in memory). */
3993
/*********************************************************************//**
3994
Restores insert buffer tree cursor position
3995
@return TRUE if the position was restored; FALSE if not */
3996
static __attribute__((nonnull))
4000
ulint space, /*!< in: space id */
4001
ulint page_no,/*!< in: index page number where the record
4003
const dtuple_t* search_tuple,
4004
/*!< in: search tuple for entries of page_no */
4005
ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
4006
btr_pcur_t* pcur, /*!< in/out: persistent cursor whose
4007
position is to be restored */
4008
mtr_t* mtr) /*!< in/out: mini-transaction */
4010
ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
4012
if (btr_pcur_restore_position(mode, pcur, mtr)) {
4017
if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
4018
/* The tablespace has been dropped. It is possible
4019
that another thread has deleted the insert buffer
4020
entry. Do not complain. */
4021
btr_pcur_commit_specify_mtr(pcur, mtr);
4024
"InnoDB: ERROR: Submit the output to"
4025
" http://bugs.mysql.com\n"
4026
"InnoDB: ibuf cursor restoration fails!\n"
4027
"InnoDB: ibuf record inserted to page %lu:%lu\n",
4028
(ulong) space, (ulong) page_no);
4031
rec_print_old(stderr, btr_pcur_get_rec(pcur));
4032
rec_print_old(stderr, pcur->old_rec);
4033
dtuple_print(stderr, search_tuple);
4035
rec_print_old(stderr,
4036
page_rec_get_next(btr_pcur_get_rec(pcur)));
4039
btr_pcur_commit_specify_mtr(pcur, mtr);
4041
fputs("InnoDB: Validating insert buffer tree:\n", stderr);
4042
if (!btr_validate_index(ibuf->index, NULL)) {
4046
fprintf(stderr, "InnoDB: ibuf tree ok\n");
4053
2989
/*********************************************************************//**
4054
2990
Deletes from ibuf the record on which pcur is positioned. If we have to
4055
2991
resort to a pessimistic delete, this function commits mtr and closes
4121
3041
mtr_start(mtr);
4123
if (!ibuf_restore_pos(space, page_no, search_tuple,
4124
BTR_MODIFY_TREE, pcur, mtr)) {
4126
mutex_exit(&ibuf_mutex);
3043
success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
3046
if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
3047
/* The tablespace has been dropped. It is possible
3048
that another thread has deleted the insert buffer
3049
entry. Do not complain. */
3050
goto commit_and_exit;
3054
"InnoDB: ERROR: Submit the output to"
3055
" http://bugs.mysql.com\n"
3056
"InnoDB: ibuf cursor restoration fails!\n"
3057
"InnoDB: ibuf record inserted to page %lu\n",
3061
rec_print_old(stderr, btr_pcur_get_rec(pcur));
3062
rec_print_old(stderr, pcur->old_rec);
3063
dtuple_print(stderr, search_tuple);
3065
rec_print_old(stderr,
3066
page_rec_get_next(btr_pcur_get_rec(pcur)));
3069
btr_pcur_commit_specify_mtr(pcur, mtr);
3071
fputs("InnoDB: Validating insert buffer tree:\n", stderr);
3072
if (!btr_validate_index(ibuf->index, NULL)) {
3076
fprintf(stderr, "InnoDB: ibuf tree ok\n");
4127
3079
goto func_exit;
4384
3335
fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
4385
3336
} else if (block) {
4386
3337
/* Now we have at pcur a record which should be
4387
applied on the index page; NOTE that the call below
3338
inserted to the index page; NOTE that the call below
4388
3339
copies pointers to fields in rec, and we must
4389
3340
keep the latch to the rec page until the
4390
3341
insertion is finished! */
4391
3342
dtuple_t* entry;
4392
3343
trx_id_t max_trx_id;
4393
3344
dict_index_t* dummy_index;
4394
ibuf_op_t op = ibuf_rec_get_op_type(rec);
4396
3346
max_trx_id = page_get_max_trx_id(page_align(rec));
4397
3347
page_update_max_trx_id(block, page_zip, max_trx_id,
4400
ut_ad(page_validate(page_align(rec), ibuf->index));
4402
3350
entry = ibuf_build_entry_from_ibuf_rec(
4403
3351
rec, heap, &dummy_index);
4405
ut_ad(page_validate(block->frame, dummy_index));
4409
case IBUF_OP_INSERT:
4410
3352
#ifdef UNIV_IBUF_DEBUG
4411
volume += rec_get_converted_size(
4412
dummy_index, entry, 0);
4414
volume += page_dir_calc_reserved_space(1);
4416
ut_a(volume <= 4 * UNIV_PAGE_SIZE
4417
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
3353
volume += rec_get_converted_size(dummy_index, entry, 0)
3354
+ page_dir_calc_reserved_space(1);
3355
ut_a(volume <= 4 * UNIV_PAGE_SIZE
3356
/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
4419
ibuf_insert_to_index_page(
4420
entry, block, dummy_index, &mtr);
4423
case IBUF_OP_DELETE_MARK:
4425
entry, block, dummy_index, &mtr);
4428
case IBUF_OP_DELETE:
4429
ibuf_delete(entry, block, dummy_index, &mtr);
4430
/* Because ibuf_delete() will latch an
4431
insert buffer bitmap page, commit mtr
4432
before latching any further pages.
4433
Store and restore the cursor position. */
4434
ut_ad(rec == btr_pcur_get_rec(&pcur));
4435
ut_ad(page_rec_is_user_rec(rec));
4436
ut_ad(ibuf_rec_get_page_no(rec) == page_no);
4437
ut_ad(ibuf_rec_get_space(rec) == space);
4439
btr_pcur_store_position(&pcur, &mtr);
4440
btr_pcur_commit_specify_mtr(&pcur, &mtr);
4444
success = buf_page_get_known_nowait(
4447
__FILE__, __LINE__, &mtr);
4450
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
4452
if (!ibuf_restore_pos(space, page_no,
4459
ibuf_dummy_index_free(dummy_index);
3358
ibuf_insert_to_index_page(entry, block,
4470
3360
ibuf_dummy_index_free(dummy_index);
4472
dops[ibuf_rec_get_op_type(rec)]++;
4475
3365
/* Delete the record from ibuf */
4476
3366
if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,