174
126
page_t* page, /* in: leaf page where the search
176
128
ulint space, /* in: space id */
177
ulint zip_size, /* in: compressed page size in bytes
178
or 0 for uncompressed pages */
179
129
ulint page_no, /* in: page number of the leaf */
180
130
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
181
131
btr_cur_t* cursor, /* in: cursor */
182
132
mtr_t* mtr) /* in: mtr */
187
buf_block_t* get_block;
189
138
ut_ad(page && mtr);
191
switch (latch_mode) {
192
case BTR_SEARCH_LEAF:
193
case BTR_MODIFY_LEAF:
194
mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
195
get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
196
#ifdef UNIV_BTR_DEBUG
197
ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
198
#endif /* UNIV_BTR_DEBUG */
199
get_block->check_index_page_at_flush = TRUE;
201
case BTR_MODIFY_TREE:
140
if (latch_mode == BTR_SEARCH_LEAF) {
142
get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
143
ut_a(page_is_comp(get_page) == page_is_comp(page));
144
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
146
} else if (latch_mode == BTR_MODIFY_LEAF) {
148
get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
149
ut_a(page_is_comp(get_page) == page_is_comp(page));
150
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
152
} else if (latch_mode == BTR_MODIFY_TREE) {
202
154
/* x-latch also brothers from left to right */
203
155
left_page_no = btr_page_get_prev(page, mtr);
205
157
if (left_page_no != FIL_NULL) {
206
get_block = btr_block_get(space, zip_size,
158
get_page = btr_page_get(space, left_page_no,
209
160
#ifdef UNIV_BTR_DEBUG
210
ut_a(page_is_comp(get_block->frame)
211
== page_is_comp(page));
212
ut_a(btr_page_get_next(get_block->frame, mtr)
213
== page_get_page_no(page));
161
ut_a(btr_page_get_next(get_page, mtr)
162
== buf_frame_get_page_no(page));
214
163
#endif /* UNIV_BTR_DEBUG */
215
get_block->check_index_page_at_flush = TRUE;
164
ut_a(page_is_comp(get_page) == page_is_comp(page));
165
buf_block_align(get_page)->check_index_page_at_flush
218
get_block = btr_block_get(space, zip_size, page_no,
220
#ifdef UNIV_BTR_DEBUG
221
ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
222
#endif /* UNIV_BTR_DEBUG */
223
get_block->check_index_page_at_flush = TRUE;
169
get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
170
ut_a(page_is_comp(get_page) == page_is_comp(page));
171
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
225
173
right_page_no = btr_page_get_next(page, mtr);
227
175
if (right_page_no != FIL_NULL) {
228
get_block = btr_block_get(space, zip_size,
231
#ifdef UNIV_BTR_DEBUG
232
ut_a(page_is_comp(get_block->frame)
233
== page_is_comp(page));
234
ut_a(btr_page_get_prev(get_block->frame, mtr)
235
== page_get_page_no(page));
236
#endif /* UNIV_BTR_DEBUG */
237
get_block->check_index_page_at_flush = TRUE;
242
case BTR_SEARCH_PREV:
243
case BTR_MODIFY_PREV:
244
mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
245
/* latch also left brother */
246
left_page_no = btr_page_get_prev(page, mtr);
248
if (left_page_no != FIL_NULL) {
249
get_block = btr_block_get(space, zip_size,
250
left_page_no, mode, mtr);
251
cursor->left_block = get_block;
252
#ifdef UNIV_BTR_DEBUG
253
ut_a(page_is_comp(get_block->frame)
254
== page_is_comp(page));
255
ut_a(btr_page_get_next(get_block->frame, mtr)
256
== page_get_page_no(page));
257
#endif /* UNIV_BTR_DEBUG */
258
get_block->check_index_page_at_flush = TRUE;
261
get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
262
#ifdef UNIV_BTR_DEBUG
263
ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
264
#endif /* UNIV_BTR_DEBUG */
265
get_block->check_index_page_at_flush = TRUE;
176
get_page = btr_page_get(space, right_page_no,
178
#ifdef UNIV_BTR_DEBUG
179
ut_a(btr_page_get_prev(get_page, mtr)
180
== buf_frame_get_page_no(page));
181
#endif /* UNIV_BTR_DEBUG */
182
buf_block_align(get_page)->check_index_page_at_flush
186
} else if (latch_mode == BTR_SEARCH_PREV) {
188
/* s-latch also left brother */
189
left_page_no = btr_page_get_prev(page, mtr);
191
if (left_page_no != FIL_NULL) {
192
cursor->left_page = btr_page_get(space, left_page_no,
194
#ifdef UNIV_BTR_DEBUG
195
ut_a(btr_page_get_next(cursor->left_page, mtr)
196
== buf_frame_get_page_no(page));
197
#endif /* UNIV_BTR_DEBUG */
198
ut_a(page_is_comp(cursor->left_page)
199
== page_is_comp(page));
200
buf_block_align(cursor->left_page)
201
->check_index_page_at_flush = TRUE;
204
get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
205
ut_a(page_is_comp(get_page) == page_is_comp(page));
206
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
208
} else if (latch_mode == BTR_MODIFY_PREV) {
210
/* x-latch also left brother */
211
left_page_no = btr_page_get_prev(page, mtr);
213
if (left_page_no != FIL_NULL) {
214
cursor->left_page = btr_page_get(space, left_page_no,
216
#ifdef UNIV_BTR_DEBUG
217
ut_a(btr_page_get_next(cursor->left_page, mtr)
218
== buf_frame_get_page_no(page));
219
#endif /* UNIV_BTR_DEBUG */
220
ut_a(page_is_comp(cursor->left_page)
221
== page_is_comp(page));
222
buf_block_align(cursor->left_page)
223
->check_index_page_at_flush = TRUE;
226
get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
227
ut_a(page_is_comp(get_page) == page_is_comp(page));
228
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
272
234
/************************************************************************
866
803
btr_cur_t* cursor, /* in: cursor on page after which to insert;
867
804
cursor stays valid */
868
const dtuple_t* tuple, /* in: tuple to insert; the size info need not
805
dtuple_t* tuple, /* in: tuple to insert; the size info need not
869
806
have been stored to tuple */
870
ulint n_ext, /* in: number of externally stored columns */
807
ibool* reorg, /* out: TRUE if reorganization occurred */
871
808
mtr_t* mtr) /* in: mtr */
873
810
page_cur_t* page_cursor;
877
814
ut_ad(dtuple_check_typed(tuple));
879
block = btr_cur_get_block(cursor);
881
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
818
page = btr_cur_get_page(cursor);
820
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
821
MTR_MEMO_PAGE_X_FIX));
882
822
page_cursor = btr_cur_get_page_cur(cursor);
884
824
/* Now, try the insert */
885
rec = page_cur_tuple_insert(page_cursor, tuple,
886
cursor->index, n_ext, mtr);
825
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
888
if (UNIV_UNLIKELY(!rec)) {
889
828
/* If record did not fit, reorganize */
891
if (btr_page_reorganize(block, cursor->index, mtr)) {
893
page_cur_search(block, cursor->index, tuple,
894
PAGE_CUR_LE, page_cursor);
896
rec = page_cur_tuple_insert(page_cursor, tuple,
897
cursor->index, n_ext, mtr);
830
btr_page_reorganize(page, cursor->index, mtr);
834
page_cur_search(page, cursor->index, tuple,
835
PAGE_CUR_LE, page_cursor);
837
rec = page_cur_tuple_insert(page_cursor, tuple,
1052
976
#endif /* UNIV_DEBUG */
1054
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
978
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
979
MTR_MEMO_PAGE_X_FIX));
1055
980
max_size = page_get_max_insert_size_after_reorganize(page, 1);
1056
leaf = page_is_leaf(page);
981
level = btr_page_get_level(page, mtr);
983
calculate_sizes_again:
1058
984
/* Calculate the record size when entry is converted to a record */
1059
rec_size = rec_get_converted_size(index, entry, n_ext);
985
rec_size = rec_get_converted_size(index, entry);
1061
if (page_zip_rec_needs_ext(rec_size, page_is_comp(page), zip_size)) {
988
>= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
989
REC_MAX_DATA_SIZE)) {
1063
991
/* The record is so big that we have to store some fields
1064
992
externally on separate database pages */
1065
big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
1067
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
994
big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
996
if (big_rec_vec == NULL) {
1069
998
return(DB_TOO_BIG_RECORD);
1072
rec_size = rec_get_converted_size(index, entry, n_ext);
1001
goto calculate_sizes_again;
1075
1004
/* If there have been many consecutive inserts, and we are on the leaf
1076
1005
level, check if we have to split the page to reserve enough free space
1077
1006
for future updates of records. */
1079
if (dict_index_is_clust(index)
1010
if ((type & DICT_CLUSTERED)
1011
&& (dict_index_get_space_reserve() + rec_size > max_size)
1080
1012
&& (page_get_n_recs(page) >= 2)
1081
&& UNIV_LIKELY(leaf)
1082
&& (dict_index_get_space_reserve() + rec_size > max_size)
1083
1014
&& (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
1084
1015
|| btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
1089
1017
if (big_rec_vec) {
1090
1018
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1093
if (UNIV_LIKELY_NULL(heap)) {
1094
mem_heap_free(heap);
1100
if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
1101
|| max_size < rec_size)
1102
&& UNIV_LIKELY(page_get_n_recs(page) > 1)
1103
&& page_get_max_insert_size(page, 1) < rec_size) {
1024
if (!(((max_size >= rec_size)
1025
&& (max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT))
1026
|| (page_get_max_insert_size(page, 1) >= rec_size)
1027
|| (page_get_n_recs(page) <= 1))) {
1030
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1108
1035
/* Check locks and write to the undo log, if specified */
1109
1036
err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &inherit);
1111
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1038
if (err != DB_SUCCESS) {
1041
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1116
1046
page_cursor = btr_cur_get_page_cur(cursor);
1118
1050
/* Now, try the insert */
1121
const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
1122
*rec = page_cur_tuple_insert(page_cursor, entry, index,
1124
reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
1126
if (UNIV_UNLIKELY(reorg)) {
1132
if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
1052
*rec = page_cur_insert_rec_low(page_cursor, entry, index,
1054
if (UNIV_UNLIKELY(!(*rec))) {
1133
1055
/* If the record did not fit, reorganize */
1134
if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
1141
|| page_get_max_insert_size(page, 1) == max_size);
1056
btr_page_reorganize(page, index, mtr);
1058
ut_ad(page_get_max_insert_size(page, 1) == max_size);
1145
page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
1062
page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor);
1147
*rec = page_cur_tuple_insert(page_cursor, entry, index,
1064
*rec = page_cur_tuple_insert(page_cursor, entry, index, mtr);
1150
1066
if (UNIV_UNLIKELY(!*rec)) {
1151
if (UNIV_LIKELY(zip_size != 0)) {
1156
1067
fputs("InnoDB: Error: cannot insert tuple ", stderr);
1157
1068
dtuple_print(stderr, entry);
1158
1069
fputs(" into ", stderr);
1329
if (UNIV_UNLIKELY(zip_size)) {
1330
/* Estimate the free space of an empty compressed page. */
1331
ulint free_space_zip = page_zip_empty_size(
1332
cursor->index->n_fields, zip_size);
1334
if (UNIV_UNLIKELY(rec_get_converted_size(index, entry, n_ext)
1335
> free_space_zip)) {
1336
/* Try to insert the record by itself on a new page.
1337
If it fails, no amount of splitting will help. */
1338
buf_block_t* temp_block
1339
= buf_block_alloc(zip_size);
1341
= page_create_zip(temp_block, index, 0, NULL);
1342
page_cur_t temp_cursor;
1345
page_cur_position(temp_page + PAGE_NEW_INFIMUM,
1346
temp_block, &temp_cursor);
1348
temp_rec = page_cur_tuple_insert(&temp_cursor,
1351
buf_block_free(temp_block);
1353
if (UNIV_UNLIKELY(!temp_rec)) {
1355
dtuple_convert_back_big_rec(
1356
index, entry, big_rec_vec);
1360
mem_heap_free(heap);
1363
return(DB_TOO_BIG_RECORD);
1368
if (dict_index_get_page(index)
1369
== buf_block_get_page_no(btr_cur_get_block(cursor))) {
1212
if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
1371
1214
/* The page is the root page */
1372
*rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
1215
*rec = btr_root_raise_and_insert(cursor, entry, mtr);
1374
*rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
1377
if (UNIV_LIKELY_NULL(heap)) {
1378
mem_heap_free(heap);
1381
ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
1217
*rec = btr_page_split_and_insert(cursor, entry, mtr);
1220
btr_cur_position(index, page_rec_get_prev(*rec), cursor);
1383
1222
#ifdef BTR_CUR_ADAPT
1384
1223
btr_search_update_hash_on_insert(cursor);
1386
1225
if (!(flags & BTR_NO_LOCKING_FLAG)) {
1388
lock_update_insert(btr_cur_get_block(cursor), *rec);
1227
lock_update_insert(*rec);
1391
1232
if (n_extents > 0) {
1392
1233
fil_space_release_free_extents(index->space, n_reserved);
1395
1236
*big_rec = big_rec_vec;
1400
1241
/*==================== B-TREE UPDATE =========================*/
2194
1921
delete the lock structs set on the root page even if the root
2195
1922
page carries just node pointers. */
2197
lock_rec_store_on_page_infimum(block, rec);
1924
lock_rec_store_on_page_infimum(buf_frame_align(rec), rec);
2199
1926
btr_search_update_hash_on_delete(cursor);
2201
#ifdef UNIV_ZIP_DEBUG
2202
ut_a(!page_zip || page_zip_validate(page_zip, page));
2203
#endif /* UNIV_ZIP_DEBUG */
2204
page_cursor = btr_cur_get_page_cur(cursor);
2206
1928
page_cur_delete_rec(page_cursor, index, offsets, mtr);
2208
1930
page_cur_move_to_prev(page_cursor);
2210
rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
1932
rec = btr_cur_insert_if_possible(cursor, new_entry,
1933
&dummy_reorganized, mtr);
1934
ut_a(rec || optim_err != DB_UNDERFLOW);
2213
lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
1937
lock_rec_restore_from_page_infimum(rec, page);
1938
rec_set_field_extern_bits(rec, index,
1939
ext_vect, n_ext_vect, mtr);
2216
1941
offsets = rec_get_offsets(rec, index, offsets,
2217
ULINT_UNDEFINED, heap);
1942
ULINT_UNDEFINED, &heap);
2219
1944
if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
2220
1945
/* The new inserted record owns its possible externally
2221
1946
stored fields */
2222
btr_cur_unmark_extern_fields(page_zip,
2223
rec, index, offsets, mtr);
1947
btr_cur_unmark_extern_fields(rec, mtr, offsets);
2226
1950
btr_cur_compress_if_useful(cursor, mtr);
2228
if (page_zip && !dict_index_is_clust(index)
2229
&& page_is_leaf(page)) {
2230
/* Update the free bits in the insert buffer. */
2231
ibuf_update_free_bits_zip(block, mtr);
2234
1952
err = DB_SUCCESS;
2235
1953
goto return_after_reservations;
1956
if (page_cur_is_before_first(page_cursor)) {
1957
/* The record to be updated was positioned as the first user
1958
record on its page */
2237
ut_a(optim_err != DB_UNDERFLOW);
2239
/* Out of space: reset the free bits. */
2240
if (!dict_index_is_clust(index)
2241
&& page_is_leaf(page)) {
2242
ibuf_reset_free_bits(block);
2246
/* Was the record to be updated positioned as the first user
2247
record on its page? */
2248
was_first = page_cur_is_before_first(page_cursor);
2250
1965
/* The first parameter means that no lock checking and undo logging
2251
1966
is made in the insert */
3511
3202
ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3512
3203
n = rec_offs_n_fields(offsets);
3514
if (!rec_offs_any_extern(offsets)) {
3519
3205
for (i = 0; i < n; i++) {
3520
3206
if (rec_offs_nth_extern(offsets, i)) {
3522
btr_cur_set_ownership_of_extern_field(
3523
page_zip, rec, index, offsets, i, TRUE, mtr);
3208
btr_cur_set_ownership_of_extern_field(rec, offsets, i,
3528
3214
/***********************************************************************
3529
3215
Marks all extern fields in a dtuple as owned by the record. */
3532
3218
btr_cur_unmark_dtuple_extern_fields(
3533
3219
/*================================*/
3534
dtuple_t* entry) /* in/out: clustered index entry */
3220
dtuple_t* entry, /* in: clustered index entry */
3221
ulint* ext_vec, /* in: array of numbers of fields
3222
which have been stored externally */
3223
ulint n_ext_vec) /* in: number of elements in ext_vec */
3538
for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3539
dfield_t* dfield = dtuple_get_nth_field(entry, i);
3541
if (dfield_is_ext(dfield)) {
3542
byte* data = dfield_get_data(dfield);
3543
ulint len = dfield_get_len(dfield);
3545
data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3546
&= ~BTR_EXTERN_OWNER_FLAG;
3231
for (i = 0; i < n_ext_vec; i++) {
3232
dfield = dtuple_get_nth_field(entry, ext_vec[i]);
3234
data = (byte*) dfield_get_data(dfield);
3235
len = dfield_get_len(dfield);
3237
len -= BTR_EXTERN_FIELD_REF_SIZE;
3239
byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN);
3241
byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
3243
mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
3551
3247
/***********************************************************************
3552
Flags the data tuple fields that are marked as extern storage in the
3553
update vector. We use this function to remember which fields we must
3554
mark as extern storage in a record inserted for an update. */
3248
Stores the positions of the fields marked as extern storage in the update
3249
vector, and also those fields who are marked as extern storage in rec
3250
and not mentioned in updated fields. We use this function to remember
3251
which fields we must mark as extern storage in a record inserted for an
3557
3255
btr_push_update_extern_fields(
3558
3256
/*==========================*/
3559
/* out: number of flagged external columns */
3560
dtuple_t* tuple, /* in/out: data tuple */
3561
const upd_t* update, /* in: update vector */
3562
mem_heap_t* heap) /* in: memory heap */
3257
/* out: number of values stored in ext_vect */
3258
ulint* ext_vect,/* in: array of ulints, must be preallocated
3259
to have space for all fields in rec */
3260
const ulint* offsets,/* in: array returned by rec_get_offsets() */
3261
upd_t* update) /* in: update vector or NULL */
3566
const upd_field_t* uf;
3571
uf = update->fields;
3572
n = upd_get_n_fields(update);
3575
if (dfield_is_ext(&uf->new_val)) {
3577
= dtuple_get_nth_field(tuple, uf->field_no);
3579
if (!dfield_is_ext(field)) {
3580
dfield_set_ext(field);
3584
switch (uf->orig_len) {
3590
case BTR_EXTERN_FIELD_REF_SIZE:
3591
/* Restore the original locally stored
3592
part of the column. In the undo log,
3593
InnoDB writes a longer prefix of externally
3594
stored columns, so that column prefixes
3595
in secondary indexes can be reconstructed. */
3596
dfield_set_data(field, (byte*) dfield_get_data(field)
3597
+ dfield_get_len(field)
3598
- BTR_EXTERN_FIELD_REF_SIZE,
3599
BTR_EXTERN_FIELD_REF_SIZE);
3600
dfield_set_ext(field);
3603
/* Reconstruct the original locally
3604
stored part of the column. The data
3605
will have to be copied. */
3606
ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
3608
data = dfield_get_data(field);
3609
len = dfield_get_len(field);
3611
buf = mem_heap_alloc(heap, uf->orig_len);
3612
/* Copy the locally stored prefix. */
3615
- BTR_EXTERN_FIELD_REF_SIZE);
3616
/* Copy the BLOB pointer. */
3617
memcpy(buf + uf->orig_len
3618
- BTR_EXTERN_FIELD_REF_SIZE,
3619
data + len - BTR_EXTERN_FIELD_REF_SIZE,
3620
BTR_EXTERN_FIELD_REF_SIZE);
3622
dfield_set_data(field, buf, uf->orig_len);
3623
dfield_set_ext(field);
3270
n = upd_get_n_fields(update);
3272
for (i = 0; i < n; i++) {
3274
if (upd_get_nth_field(update, i)->extern_storage) {
3276
ext_vect[n_pushed] = upd_get_nth_field(
3277
update, i)->field_no;
3284
n = rec_offs_n_fields(offsets);
3286
for (i = 0; i < n; i++) {
3287
if (rec_offs_nth_extern(offsets, i)) {
3289
/* Check it is not in updated fields */
3293
for (j = 0; j < upd_get_n_fields(update);
3295
if (upd_get_nth_field(update, j)
3303
ext_vect[n_pushed] = i;
3647
3328
btr_blob_get_next_page_no(
3648
3329
/*======================*/
3649
/* out: page number or FIL_NULL if
3651
const byte* blob_header) /* in: blob header */
3330
/* out: page number or FIL_NULL if
3332
byte* blob_header) /* in: blob header */
3653
3334
return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
3656
3337
/***********************************************************************
3657
Deallocate a buffer block that was reserved for a BLOB part. */
3662
buf_block_t* block, /* in: buffer block */
3663
ibool all, /* in: TRUE=remove also the compressed page
3665
mtr_t* mtr) /* in: mini-transaction to commit */
3667
ulint space = buf_block_get_space(block);
3668
ulint page_no = buf_block_get_page_no(block);
3670
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3674
buf_pool_mutex_enter();
3675
mutex_enter(&block->mutex);
3677
/* Only free the block if it is still allocated to
3678
the same file page. */
3680
if (buf_block_get_state(block)
3681
== BUF_BLOCK_FILE_PAGE
3682
&& buf_block_get_space(block) == space
3683
&& buf_block_get_page_no(block) == page_no) {
3685
if (buf_LRU_free_block(&block->page, all, NULL)
3687
&& all && block->page.zip.data) {
3688
/* Attempt to deallocate the uncompressed page
3689
if the whole block cannot be deallocted. */
3691
buf_LRU_free_block(&block->page, FALSE, NULL);
3695
buf_pool_mutex_exit();
3696
mutex_exit(&block->mutex);
3699
/***********************************************************************
3700
3338
Stores the fields in big_rec_vec to the tablespace and puts pointers to
3701
them in rec. The extern flags in rec will have to be set beforehand.
3702
The fields are stored on pages allocated from leaf node
3339
them in rec. The fields are stored on pages allocated from leaf node
3703
3340
file segment of the index tree. */
3706
3343
btr_store_big_rec_extern_fields(
3707
3344
/*============================*/
3708
3345
/* out: DB_SUCCESS or error */
3709
3346
dict_index_t* index, /* in: index of rec; the index tree
3710
3347
MUST be X-latched */
3711
buf_block_t* rec_block, /* in/out: block containing rec */
3712
rec_t* rec, /* in/out: record */
3348
rec_t* rec, /* in: record */
3713
3349
const ulint* offsets, /* in: rec_get_offsets(rec, index);
3714
3350
the "external storage" flags in offsets
3715
3351
will not correspond to rec when
3720
3356
containing the latch to rec and to the
3725
3361
ulint extern_len;
3726
3362
ulint store_len;
3728
3365
ulint space_id;
3730
3368
ulint prev_page_no;
3731
3369
ulint hint_page_no;
3734
mem_heap_t* heap = NULL;
3735
page_zip_des_t* page_zip;
3738
3373
ut_ad(rec_offs_validate(rec, index, offsets));
3739
3374
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
3740
3375
MTR_MEMO_X_LOCK));
3741
ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
3742
ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
3743
ut_a(dict_index_is_clust(index));
3745
page_zip = buf_block_get_page_zip(rec_block);
3746
ut_a(dict_table_zip_size(index->table)
3747
== buf_block_get_zip_size(rec_block));
3749
space_id = buf_block_get_space(rec_block);
3750
zip_size = buf_block_get_zip_size(rec_block);
3751
rec_page_no = buf_block_get_page_no(rec_block);
3752
ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
3754
if (UNIV_LIKELY_NULL(page_zip)) {
3757
/* Zlib deflate needs 128 kilobytes for the default
3758
window size, plus 512 << memLevel, plus a few
3759
kilobytes for small objects. We use reduced memLevel
3760
to limit the memory consumption, and preallocate the
3761
heap, hoping to avoid memory fragmentation. */
3762
heap = mem_heap_create(250000);
3763
page_zip_set_alloc(&c_stream, heap);
3765
err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
3766
Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
3376
ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
3377
MTR_MEMO_PAGE_X_FIX));
3378
ut_a(index->type & DICT_CLUSTERED);
3380
space_id = buf_frame_get_space_id(rec);
3770
3382
/* We have to create a file segment to the tablespace
3771
3383
for each field and put the pointer to the field in rec */
3773
3385
for (i = 0; i < big_rec_vec->n_fields; i++) {
3774
ut_ad(rec_offs_nth_extern(offsets,
3775
big_rec_vec->fields[i].field_no));
3778
field_ref = rec_get_nth_field(
3779
rec, offsets, big_rec_vec->fields[i].field_no,
3781
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3782
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3783
field_ref += local_len;
3387
data = rec_get_nth_field(rec, offsets,
3388
big_rec_vec->fields[i].field_no,
3390
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3391
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3785
3392
extern_len = big_rec_vec->fields[i].len;
3787
3394
ut_a(extern_len > 0);
3789
3396
prev_page_no = FIL_NULL;
3791
if (UNIV_LIKELY_NULL(page_zip)) {
3792
int err = deflateReset(&c_stream);
3795
c_stream.next_in = (void*) big_rec_vec->fields[i].data;
3796
c_stream.avail_in = extern_len;
3398
while (extern_len > 0) {
3803
3399
mtr_start(&mtr);
3805
3401
if (prev_page_no == FIL_NULL) {
3806
hint_page_no = 1 + rec_page_no;
3402
hint_page_no = buf_frame_get_page_no(rec) + 1;
3808
3404
hint_page_no = prev_page_no + 1;
3811
block = btr_page_alloc(index, hint_page_no,
3812
FSP_NO_DIR, 0, &mtr);
3813
if (UNIV_UNLIKELY(block == NULL)) {
3407
page = btr_page_alloc(index, hint_page_no,
3408
FSP_NO_DIR, 0, &mtr);
3815
3411
mtr_commit(&mtr);
3817
if (UNIV_LIKELY_NULL(page_zip)) {
3818
deflateEnd(&c_stream);
3819
mem_heap_free(heap);
3822
3413
return(DB_OUT_OF_FILE_SPACE);
3825
page_no = buf_block_get_page_no(block);
3826
page = buf_block_get_frame(block);
3416
mlog_write_ulint(page + FIL_PAGE_TYPE,
3420
page_no = buf_frame_get_page_no(page);
3828
3422
if (prev_page_no != FIL_NULL) {
3829
buf_block_t* prev_block;
3832
prev_block = buf_page_get(space_id, zip_size,
3835
#ifdef UNIV_SYNC_DEBUG
3836
buf_block_dbg_add_level(prev_block,
3837
SYNC_EXTERN_STORAGE);
3838
#endif /* UNIV_SYNC_DEBUG */
3839
prev_page = buf_block_get_frame(prev_block);
3841
if (UNIV_LIKELY_NULL(page_zip)) {
3843
prev_page + FIL_PAGE_NEXT,
3844
page_no, MLOG_4BYTES, &mtr);
3845
memcpy(buf_block_get_page_zip(
3847
->data + FIL_PAGE_NEXT,
3848
prev_page + FIL_PAGE_NEXT, 4);
3851
prev_page + FIL_PAGE_DATA
3852
+ BTR_BLOB_HDR_NEXT_PAGE_NO,
3853
page_no, MLOG_4BYTES, &mtr);
3858
if (UNIV_LIKELY_NULL(page_zip)) {
3860
page_zip_des_t* blob_page_zip;
3862
mach_write_to_2(page + FIL_PAGE_TYPE,
3863
prev_page_no == FIL_NULL
3864
? FIL_PAGE_TYPE_ZBLOB
3865
: FIL_PAGE_TYPE_ZBLOB2);
3867
c_stream.next_out = page
3870
= page_zip_get_size(page_zip)
3873
err = deflate(&c_stream, Z_FINISH);
3874
ut_a(err == Z_OK || err == Z_STREAM_END);
3875
ut_a(err == Z_STREAM_END
3876
|| c_stream.avail_out == 0);
3878
/* Write the "next BLOB page" pointer */
3879
mlog_write_ulint(page + FIL_PAGE_NEXT,
3880
FIL_NULL, MLOG_4BYTES, &mtr);
3881
/* Initialize the unused "prev page" pointer */
3882
mlog_write_ulint(page + FIL_PAGE_PREV,
3883
FIL_NULL, MLOG_4BYTES, &mtr);
3884
/* Write a back pointer to the record
3885
into the otherwise unused area. This
3886
information could be useful in
3887
debugging. Later, we might want to
3888
implement the possibility to relocate
3889
BLOB pages. Then, we would need to be
3890
able to adjust the BLOB pointer in the
3891
record. We do not store the heap
3892
number of the record, because it can
3893
change in page_zip_reorganize() or
3894
btr_page_reorganize(). However, also
3895
the page number of the record may
3896
change when B-tree nodes are split or
3898
mlog_write_ulint(page
3899
+ FIL_PAGE_FILE_FLUSH_LSN,
3423
prev_page = buf_page_get(space_id,
3427
#ifdef UNIV_SYNC_DEBUG
3428
buf_page_dbg_add_level(prev_page,
3429
SYNC_EXTERN_STORAGE);
3430
#endif /* UNIV_SYNC_DEBUG */
3432
mlog_write_ulint(prev_page + FIL_PAGE_DATA
3433
+ BTR_BLOB_HDR_NEXT_PAGE_NO,
3434
page_no, MLOG_4BYTES, &mtr);
3437
if (extern_len > (UNIV_PAGE_SIZE - FIL_PAGE_DATA
3439
- FIL_PAGE_DATA_END)) {
3440
store_len = UNIV_PAGE_SIZE - FIL_PAGE_DATA
3442
- FIL_PAGE_DATA_END;
3444
store_len = extern_len;
3447
mlog_write_string(page + FIL_PAGE_DATA
3448
+ BTR_BLOB_HDR_SIZE,
3449
big_rec_vec->fields[i].data
3450
+ big_rec_vec->fields[i].len
3453
mlog_write_ulint(page + FIL_PAGE_DATA
3454
+ BTR_BLOB_HDR_PART_LEN,
3455
store_len, MLOG_4BYTES, &mtr);
3456
mlog_write_ulint(page + FIL_PAGE_DATA
3457
+ BTR_BLOB_HDR_NEXT_PAGE_NO,
3458
FIL_NULL, MLOG_4BYTES, &mtr);
3460
extern_len -= store_len;
3462
rec_page = buf_page_get(space_id,
3463
buf_frame_get_page_no(data),
3465
#ifdef UNIV_SYNC_DEBUG
3466
buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
3467
#endif /* UNIV_SYNC_DEBUG */
3468
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
3470
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
3471
big_rec_vec->fields[i].len
3475
if (prev_page_no == FIL_NULL) {
3476
mlog_write_ulint(data + local_len
3477
+ BTR_EXTERN_SPACE_ID,
3901
3479
MLOG_4BYTES, &mtr);
3902
mlog_write_ulint(page
3903
+ FIL_PAGE_FILE_FLUSH_LSN + 4,
3907
/* Zero out the unused part of the page. */
3908
memset(page + page_zip_get_size(page_zip)
3909
- c_stream.avail_out,
3910
0, c_stream.avail_out);
3911
mlog_log_string(page + FIL_PAGE_TYPE,
3912
page_zip_get_size(page_zip)
3915
/* Copy the page to compressed storage,
3916
because it will be flushed to disk
3918
blob_page_zip = buf_block_get_page_zip(block);
3919
ut_ad(blob_page_zip);
3920
ut_ad(page_zip_get_size(blob_page_zip)
3921
== page_zip_get_size(page_zip));
3922
memcpy(blob_page_zip->data, page,
3923
page_zip_get_size(page_zip));
3925
if (err == Z_OK && prev_page_no != FIL_NULL) {
3930
rec_block = buf_page_get(space_id, zip_size,
3933
#ifdef UNIV_SYNC_DEBUG
3934
buf_block_dbg_add_level(rec_block,
3935
SYNC_NO_ORDER_CHECK);
3936
#endif /* UNIV_SYNC_DEBUG */
3937
if (err == Z_STREAM_END) {
3938
mach_write_to_4(field_ref
3939
+ BTR_EXTERN_LEN, 0);
3940
mach_write_to_4(field_ref
3941
+ BTR_EXTERN_LEN + 4,
3944
memset(field_ref + BTR_EXTERN_LEN,
3948
if (prev_page_no == FIL_NULL) {
3949
mach_write_to_4(field_ref
3950
+ BTR_EXTERN_SPACE_ID,
3953
mach_write_to_4(field_ref
3954
+ BTR_EXTERN_PAGE_NO,
3957
mach_write_to_4(field_ref
3958
+ BTR_EXTERN_OFFSET,
3962
page_zip_write_blob_ptr(
3963
page_zip, rec, index, offsets,
3964
big_rec_vec->fields[i].field_no, &mtr);
3967
prev_page_no = page_no;
3969
/* Commit mtr and release the
3970
uncompressed page frame to save memory. */
3971
btr_blob_free(block, FALSE, &mtr);
3973
if (err == Z_STREAM_END) {
3977
mlog_write_ulint(page + FIL_PAGE_TYPE,
3981
if (extern_len > (UNIV_PAGE_SIZE
3984
- FIL_PAGE_DATA_END)) {
3985
store_len = UNIV_PAGE_SIZE
3988
- FIL_PAGE_DATA_END;
3990
store_len = extern_len;
3993
mlog_write_string(page + FIL_PAGE_DATA
3994
+ BTR_BLOB_HDR_SIZE,
3996
big_rec_vec->fields[i].data
3997
+ big_rec_vec->fields[i].len
4000
mlog_write_ulint(page + FIL_PAGE_DATA
4001
+ BTR_BLOB_HDR_PART_LEN,
4002
store_len, MLOG_4BYTES, &mtr);
4003
mlog_write_ulint(page + FIL_PAGE_DATA
4004
+ BTR_BLOB_HDR_NEXT_PAGE_NO,
4005
FIL_NULL, MLOG_4BYTES, &mtr);
4007
extern_len -= store_len;
4009
rec_block = buf_page_get(space_id, zip_size,
4012
#ifdef UNIV_SYNC_DEBUG
4013
buf_block_dbg_add_level(rec_block,
4014
SYNC_NO_ORDER_CHECK);
4015
#endif /* UNIV_SYNC_DEBUG */
4017
mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
4019
mlog_write_ulint(field_ref
4020
+ BTR_EXTERN_LEN + 4,
4021
big_rec_vec->fields[i].len
4025
if (prev_page_no == FIL_NULL) {
4026
mlog_write_ulint(field_ref
4027
+ BTR_EXTERN_SPACE_ID,
4031
mlog_write_ulint(field_ref
4032
+ BTR_EXTERN_PAGE_NO,
4036
mlog_write_ulint(field_ref
4037
+ BTR_EXTERN_OFFSET,
4042
prev_page_no = page_no;
4046
if (extern_len == 0) {
3481
mlog_write_ulint(data + local_len
3482
+ BTR_EXTERN_PAGE_NO,
3486
mlog_write_ulint(data + local_len
3487
+ BTR_EXTERN_OFFSET,
3491
/* Set the bit denoting that this field
3492
in rec is stored externally */
3494
rec_set_nth_field_extern_bit(
3496
big_rec_vec->fields[i].field_no,
3500
prev_page_no = page_no;
4053
if (UNIV_LIKELY_NULL(page_zip)) {
4054
deflateEnd(&c_stream);
4055
mem_heap_free(heap);
4058
3506
return(DB_SUCCESS);
4061
3509
/***********************************************************************
4062
3510
Frees the space in an externally stored field to the file space
4063
management if the field in data is owned by the externally stored field,
3511
management if the field in data is owned the externally stored field,
4064
3512
in a rollback we may have the additional condition that the field must
4065
3513
not be inherited. */
4068
3516
btr_free_externally_stored_field(
4069
3517
/*=============================*/
4091
3534
containing the latch to data an an
4092
3535
X-latch to the index tree */
4096
ulint rec_zip_size = dict_table_zip_size(index->table);
3547
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4102
3548
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
4103
3549
MTR_MEMO_X_LOCK));
4104
ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
4105
MTR_MEMO_PAGE_X_FIX));
4106
ut_ad(!rec || rec_offs_validate(rec, index, offsets));
4110
const byte* f = rec_get_nth_field(rec, offsets,
4112
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4113
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4115
ut_ad(f == field_ref);
4117
#endif /* UNIV_DEBUG */
4119
space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
4121
if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
4122
ext_zip_size = fil_space_get_zip_size(space_id);
4123
/* This must be an undo log record in the system tablespace,
4124
that is, in row_purge_upd_exist_or_extern().
4125
Currently, externally stored records are stored in the
4126
same tablespace as the referring records. */
4127
ut_ad(!page_get_space_id(page_align(field_ref)));
4131
ext_zip_size = rec_zip_size;
4135
/* This is a call from row_purge_upd_exist_or_extern(). */
3550
ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
3551
MTR_MEMO_PAGE_X_FIX));
3552
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3553
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4141
buf_block_t* rec_block;
4142
buf_block_t* ext_block;
4144
3556
mtr_start(&mtr);
4146
rec_block = buf_page_get(page_get_space_id(
4147
page_align(field_ref)),
4150
page_align(field_ref)),
4152
#ifdef UNIV_SYNC_DEBUG
4153
buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
4154
#endif /* UNIV_SYNC_DEBUG */
4155
page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
4157
if (/* There is no external storage data */
4159
/* This field does not own the externally stored field */
4160
|| (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4161
& BTR_EXTERN_OWNER_FLAG)
4162
/* Rollback and inherited field */
4163
|| (do_not_free_inherited
4164
&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4165
& BTR_EXTERN_INHERITED_FLAG))) {
4173
ext_block = buf_page_get(space_id, ext_zip_size, page_no,
4175
#ifdef UNIV_SYNC_DEBUG
4176
buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
4177
#endif /* UNIV_SYNC_DEBUG */
4178
page = buf_block_get_frame(ext_block);
4181
/* Note that page_zip will be NULL
4182
in row_purge_upd_exist_or_extern(). */
4183
switch (fil_page_get_type(page)) {
4184
case FIL_PAGE_TYPE_ZBLOB:
4185
case FIL_PAGE_TYPE_ZBLOB2:
4190
next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
4192
btr_page_free_low(index, ext_block, 0, &mtr);
4194
if (UNIV_LIKELY(page_zip != NULL)) {
4195
mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
4197
mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
4199
page_zip_write_blob_ptr(page_zip, rec, index,
4202
mlog_write_ulint(field_ref
4203
+ BTR_EXTERN_PAGE_NO,
4206
mlog_write_ulint(field_ref
4207
+ BTR_EXTERN_LEN + 4, 0,
4211
ulint extern_len = mach_read_from_4(
4212
field_ref + BTR_EXTERN_LEN + 4);
4213
ulint part_len = btr_blob_get_part_len(
4214
page + FIL_PAGE_DATA);
4216
ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_BLOB);
4218
ut_a(extern_len >= part_len);
4220
next_page_no = mach_read_from_4(
4221
page + FIL_PAGE_DATA
4222
+ BTR_BLOB_HDR_NEXT_PAGE_NO);
4224
/* We must supply the page level (= 0) as an argument
4225
because we did not store it on the page (we save the
4226
space overhead from an index page header. */
4228
ut_a(space_id == page_get_space_id(page));
4229
ut_a(page_no == page_get_page_no(page));
4231
btr_page_free_low(index, ext_block, 0, &mtr);
4233
mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
4236
mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
4237
extern_len - part_len,
4239
if (next_page_no == FIL_NULL) {
4240
ut_a(extern_len - part_len == 0);
4243
if (extern_len - part_len == 0) {
4244
ut_a(next_page_no == FIL_NULL);
4248
/* Commit mtr and release the BLOB block to save memory. */
4249
btr_blob_free(ext_block, TRUE, &mtr);
3558
rec_page = buf_page_get(buf_frame_get_space_id(data),
3559
buf_frame_get_page_no(data),
3561
#ifdef UNIV_SYNC_DEBUG
3562
buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
3563
#endif /* UNIV_SYNC_DEBUG */
3564
space_id = mach_read_from_4(data + local_len
3565
+ BTR_EXTERN_SPACE_ID);
3567
page_no = mach_read_from_4(data + local_len
3568
+ BTR_EXTERN_PAGE_NO);
3570
offset = mach_read_from_4(data + local_len
3571
+ BTR_EXTERN_OFFSET);
3572
extern_len = mach_read_from_4(data + local_len
3573
+ BTR_EXTERN_LEN + 4);
3575
/* If extern len is 0, then there is no external storage data
3578
if (extern_len == 0) {
3585
if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
3586
& BTR_EXTERN_OWNER_FLAG) {
3587
/* This field does not own the externally
3588
stored field: do not free! */
3595
if (do_not_free_inherited
3596
&& mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
3597
& BTR_EXTERN_INHERITED_FLAG) {
3598
/* Rollback and inherited field: do not free! */
3605
page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr);
3606
#ifdef UNIV_SYNC_DEBUG
3607
buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
3608
#endif /* UNIV_SYNC_DEBUG */
3609
next_page_no = mach_read_from_4(page + FIL_PAGE_DATA
3610
+ BTR_BLOB_HDR_NEXT_PAGE_NO);
3612
part_len = btr_blob_get_part_len(page + FIL_PAGE_DATA);
3614
ut_a(extern_len >= part_len);
3616
/* We must supply the page level (= 0) as an argument
3617
because we did not store it on the page (we save the space
3618
overhead from an index page header. */
3620
btr_page_free_low(index, page, 0, &mtr);
3622
mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO,
3625
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
3626
extern_len - part_len,
3628
if (next_page_no == FIL_NULL) {
3629
ut_a(extern_len - part_len == 0);
3632
if (extern_len - part_len == 0) {
3633
ut_a(next_page_no == FIL_NULL);
4253
3640
/***************************************************************
4254
3641
Frees the externally stored fields for a record. */
4257
3644
btr_rec_free_externally_stored_fields(
4258
3645
/*==================================*/
4259
3646
dict_index_t* index, /* in: index of the data, the index
4260
3647
tree MUST be X-latched */
4261
rec_t* rec, /* in/out: record */
3648
rec_t* rec, /* in: record */
4262
3649
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
4263
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
4264
part will be updated, or NULL */
4265
3650
ibool do_not_free_inherited,/* in: TRUE if called in a
4266
3651
rollback and we do not want to free
4267
3652
inherited fields */
4303
3687
/*===============================*/
4304
3688
dict_index_t* index, /* in: index of rec; the index tree MUST be
4306
rec_t* rec, /* in/out: record */
4307
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
4308
part will be updated, or NULL */
3690
rec_t* rec, /* in: record */
4309
3691
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
4310
const upd_t* update, /* in: update vector */
3692
upd_t* update, /* in: update vector */
3693
ibool do_not_free_inherited,/* in: TRUE if called in a
3694
rollback and we do not want to free
4311
3696
mtr_t* mtr) /* in: mini-transaction handle which contains
4312
3697
an X-latch to record page and to the tree */
3699
upd_field_t* ufield;
4317
3705
ut_ad(rec_offs_validate(rec, index, offsets));
4318
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
3706
ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
3707
MTR_MEMO_PAGE_X_FIX));
4320
3709
/* Free possible externally stored fields in the record */
4322
3711
n_fields = upd_get_n_fields(update);
4324
3713
for (i = 0; i < n_fields; i++) {
4325
const upd_field_t* ufield = upd_get_nth_field(update, i);
3714
ufield = upd_get_nth_field(update, i);
4327
3716
if (rec_offs_nth_extern(offsets, ufield->field_no)) {
4329
byte* data = rec_get_nth_field(
4330
rec, offsets, ufield->field_no, &len);
4331
ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
4333
btr_free_externally_stored_field(
4334
index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
4335
rec, offsets, page_zip,
4336
ufield->field_no, TRUE, mtr);
4341
/***********************************************************************
4342
Copies the prefix of an uncompressed BLOB. The clustered index record
4343
that points to this BLOB must be protected by a lock or a page latch. */
4346
btr_copy_blob_prefix(
4347
/*=================*/
4348
/* out: number of bytes written to buf */
4349
byte* buf, /* out: the externally stored part of
4350
the field, or a prefix of it */
4351
ulint len, /* in: length of buf, in bytes */
4352
ulint space_id,/* in: space id of the BLOB pages */
4353
ulint page_no,/* in: page number of the first BLOB page */
4354
ulint offset) /* in: offset on the first BLOB page */
4356
ulint copied_len = 0;
4362
const byte* blob_header;
4368
block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
4369
#ifdef UNIV_SYNC_DEBUG
4370
buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
4371
#endif /* UNIV_SYNC_DEBUG */
4372
page = buf_block_get_frame(block);
4374
/* Unfortunately, FIL_PAGE_TYPE was uninitialized for
4375
many pages until MySQL/InnoDB 5.1.7. */
4376
/* ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_BLOB); */
4377
blob_header = page + offset;
4378
part_len = btr_blob_get_part_len(blob_header);
4379
copy_len = ut_min(part_len, len - copied_len);
4381
memcpy(buf + copied_len,
4382
blob_header + BTR_BLOB_HDR_SIZE, copy_len);
4383
copied_len += copy_len;
4385
page_no = btr_blob_get_next_page_no(blob_header);
4389
if (page_no == FIL_NULL || copy_len != part_len) {
4393
/* On other BLOB pages except the first the BLOB header
4394
always is at the page data start: */
4396
offset = FIL_PAGE_DATA;
4398
ut_ad(copied_len <= len);
4402
/***********************************************************************
4403
Copies the prefix of a compressed BLOB. The clustered index record
4404
that points to this BLOB must be protected by a lock or a page latch. */
4407
btr_copy_zblob_prefix(
4408
/*==================*/
4409
z_stream* d_stream,/* in/out: the decompressing stream */
4410
ulint zip_size,/* in: compressed BLOB page size */
4411
ulint space_id,/* in: space id of the BLOB pages */
4412
ulint page_no,/* in: page number of the first BLOB page */
4413
ulint offset) /* in: offset on the first BLOB page */
4415
ulint page_type = FIL_PAGE_TYPE_ZBLOB;
4417
ut_ad(ut_is_2pow(zip_size));
4418
ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
4419
ut_ad(zip_size <= UNIV_PAGE_SIZE);
4427
/* There is no latch on bpage directly. Instead,
4428
bpage is protected by the B-tree page latch that
4429
is being held on the clustered index record, or,
4430
in row_merge_copy_blobs(), by an exclusive table lock. */
4431
bpage = buf_page_get_zip(space_id, zip_size, page_no);
4433
if (UNIV_UNLIKELY(!bpage)) {
4434
ut_print_timestamp(stderr);
4436
" InnoDB: Cannot load"
4438
" page %lu space %lu\n",
4439
(ulong) page_no, (ulong) space_id);
4444
(fil_page_get_type(bpage->zip.data) != page_type)) {
4445
ut_print_timestamp(stderr);
4447
" InnoDB: Unexpected type %lu of"
4449
" page %lu space %lu\n",
4450
(ulong) fil_page_get_type(bpage->zip.data),
4451
(ulong) page_no, (ulong) space_id);
4455
next_page_no = mach_read_from_4(bpage->zip.data + offset);
4457
if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
4458
/* When the BLOB begins at page header,
4459
the compressed data payload does not
4460
immediately follow the next page pointer. */
4461
offset = FIL_PAGE_DATA;
4466
d_stream->next_in = bpage->zip.data + offset;
4467
d_stream->avail_in = zip_size - offset;
4469
err = inflate(d_stream, Z_NO_FLUSH);
4472
if (!d_stream->avail_out) {
4477
if (next_page_no == FIL_NULL) {
4483
ut_print_timestamp(stderr);
4485
" InnoDB: inflate() of"
4487
" page %lu space %lu returned %d (%s)\n",
4488
(ulong) page_no, (ulong) space_id,
4489
err, d_stream->msg);
4494
if (next_page_no == FIL_NULL) {
4495
if (!d_stream->avail_in) {
4496
ut_print_timestamp(stderr);
4498
" InnoDB: unexpected end of"
4500
" page %lu space %lu\n",
4504
err = inflate(d_stream, Z_FINISH);
4515
buf_page_release_zip(bpage);
4519
buf_page_release_zip(bpage);
4521
/* On other BLOB pages except the first
4522
the BLOB header always is at the page header: */
4524
page_no = next_page_no;
4525
offset = FIL_PAGE_NEXT;
4526
page_type = FIL_PAGE_TYPE_ZBLOB2;
4530
/***********************************************************************
4531
Copies the prefix of an externally stored field of a record. The
4532
clustered index record that points to this BLOB must be protected by a
4533
lock or a page latch. */
4536
btr_copy_externally_stored_field_prefix_low(
4537
/*========================================*/
4538
/* out: number of bytes written to buf */
4539
byte* buf, /* out: the externally stored part of
4540
the field, or a prefix of it */
4541
ulint len, /* in: length of buf, in bytes */
4542
ulint zip_size,/* in: nonzero=compressed BLOB page size,
4543
zero for uncompressed BLOBs */
4544
ulint space_id,/* in: space id of the first BLOB page */
4545
ulint page_no,/* in: page number of the first BLOB page */
4546
ulint offset) /* in: offset on the first BLOB page */
4548
if (UNIV_UNLIKELY(len == 0)) {
4552
if (UNIV_UNLIKELY(zip_size)) {
4557
/* Zlib inflate needs 32 kilobytes for the default
4558
window size, plus a few kilobytes for small objects. */
4559
heap = mem_heap_create(40000);
4560
page_zip_set_alloc(&d_stream, heap);
4562
err = inflateInit(&d_stream);
4565
d_stream.next_out = buf;
4566
d_stream.avail_out = len;
4567
d_stream.avail_in = 0;
4569
btr_copy_zblob_prefix(&d_stream, zip_size,
4570
space_id, page_no, offset);
4571
inflateEnd(&d_stream);
4572
mem_heap_free(heap);
4573
return(d_stream.total_out);
4575
return(btr_copy_blob_prefix(buf, len, space_id,
4580
/***********************************************************************
4581
Copies the prefix of an externally stored field of a record. The
4582
clustered index record must be protected by a lock or a page latch. */
4585
btr_copy_externally_stored_field_prefix(
4586
/*====================================*/
4587
/* out: the length of the copied field */
4588
byte* buf, /* out: the field, or a prefix of it */
4589
ulint len, /* in: length of buf, in bytes */
4590
ulint zip_size,/* in: nonzero=compressed BLOB page size,
4591
zero for uncompressed BLOBs */
4592
const byte* data, /* in: 'internally' stored part of the
4593
field containing also the reference to
4594
the external part; must be protected by
4595
a lock or a page latch */
4596
ulint local_len)/* in: length of data, in bytes */
4602
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4604
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4606
if (UNIV_UNLIKELY(local_len >= len)) {
4607
memcpy(buf, data, len);
4611
memcpy(buf, data, local_len);
4614
ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
4616
space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
4618
page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
4620
offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
4623
+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
4630
/***********************************************************************
4631
Copies an externally stored field of a record to mem heap. The
4632
clustered index record must be protected by a lock or a page latch. */
3718
data = rec_get_nth_field(rec, offsets,
3719
ufield->field_no, &len);
3720
btr_free_externally_stored_field(index, data, len,
3721
do_not_free_inherited,
3727
/***********************************************************************
3728
Copies an externally stored field of a record to mem heap. Parameter
3729
data contains a pointer to 'internally' stored part of the field:
3730
possibly some data, and the reference to the externally stored part in
3731
the last 20 bytes of data. */
4635
3734
btr_copy_externally_stored_field(
4636
3735
/*=============================*/
4637
3736
/* out: the whole field copied to heap */
4638
3737
ulint* len, /* out: length of the whole field */
4639
const byte* data, /* in: 'internally' stored part of the
3738
byte* data, /* in: 'internally' stored part of the
4640
3739
field containing also the reference to
4641
the external part; must be protected by
4642
a lock or a page latch */
4643
ulint zip_size,/* in: nonzero=compressed BLOB page size,
4644
zero for uncompressed BLOBs */
3740
the external part */
4645
3741
ulint local_len,/* in: length of data */
4646
3742
mem_heap_t* heap) /* in: mem heap */
4648
3745
ulint space_id;
4651
3748
ulint extern_len;
4654
3755
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4662
3763
offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
4664
/* Currently a BLOB cannot be bigger than 4 GB; we
3765
/* Currently a BLOB cannot be bigger that 4 GB; we
4665
3766
leave the 4 upper bytes in the length field unused */
4667
3768
extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
4669
3770
buf = mem_heap_alloc(heap, local_len + extern_len);
4671
memcpy(buf, data, local_len);
4673
+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
3772
ut_memcpy(buf, data, local_len);
3773
copied_len = local_len;
3775
if (extern_len == 0) {
3784
page = buf_page_get(space_id, page_no, RW_S_LATCH, &mtr);
3785
#ifdef UNIV_SYNC_DEBUG
3786
buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
3787
#endif /* UNIV_SYNC_DEBUG */
3788
blob_header = page + offset;
3790
part_len = btr_blob_get_part_len(blob_header);
3792
ut_memcpy(buf + copied_len, blob_header + BTR_BLOB_HDR_SIZE,
3794
copied_len += part_len;
3796
page_no = btr_blob_get_next_page_no(blob_header);
3800
if (page_no == FIL_NULL) {
3801
ut_a(copied_len == local_len + extern_len);
3808
/* On other BLOB pages except the first the BLOB header
3809
always is at the page data start: */
3811
offset = FIL_PAGE_DATA;
3813
ut_a(copied_len < local_len + extern_len);
4682
3817
/***********************************************************************
4683
3818
Copies an externally stored field of a record to mem heap. */
4686
3821
btr_rec_copy_externally_stored_field(
4687
3822
/*=================================*/
4688
3823
/* out: the field copied to heap */
4689
const rec_t* rec, /* in: record in a clustered index;
4690
must be protected by a lock or a page latch */
3824
rec_t* rec, /* in: record */
4691
3825
const ulint* offsets,/* in: array returned by rec_get_offsets() */
4692
ulint zip_size,/* in: nonzero=compressed BLOB page size,
4693
zero for uncompressed BLOBs */
4694
3826
ulint no, /* in: field number */
4695
3827
ulint* len, /* out: length of the field */
4696
3828
mem_heap_t* heap) /* in: mem heap */
3833
ut_ad(rec_offs_validate(rec, NULL, offsets));
4701
3834
ut_a(rec_offs_nth_extern(offsets, no));
4703
3836
/* An externally stored field can contain some initial