126
174
page_t* page, /* in: leaf page where the search
128
176
ulint space, /* in: space id */
177
ulint zip_size, /* in: compressed page size in bytes
178
or 0 for uncompressed pages */
129
179
ulint page_no, /* in: page number of the leaf */
130
180
ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */
131
181
btr_cur_t* cursor, /* in: cursor */
132
182
mtr_t* mtr) /* in: mtr */
187
buf_block_t* get_block;
138
189
ut_ad(page && mtr);
140
if (latch_mode == BTR_SEARCH_LEAF) {
142
get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
143
ut_a(page_is_comp(get_page) == page_is_comp(page));
144
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
146
} else if (latch_mode == BTR_MODIFY_LEAF) {
148
get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
149
ut_a(page_is_comp(get_page) == page_is_comp(page));
150
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
152
} else if (latch_mode == BTR_MODIFY_TREE) {
191
switch (latch_mode) {
192
case BTR_SEARCH_LEAF:
193
case BTR_MODIFY_LEAF:
194
mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
195
get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
196
#ifdef UNIV_BTR_DEBUG
197
ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
198
#endif /* UNIV_BTR_DEBUG */
199
get_block->check_index_page_at_flush = TRUE;
201
case BTR_MODIFY_TREE:
154
202
/* x-latch also brothers from left to right */
155
203
left_page_no = btr_page_get_prev(page, mtr);
157
205
if (left_page_no != FIL_NULL) {
158
get_page = btr_page_get(space, left_page_no,
206
get_block = btr_block_get(space, zip_size,
160
209
#ifdef UNIV_BTR_DEBUG
161
ut_a(btr_page_get_next(get_page, mtr)
162
== buf_frame_get_page_no(page));
210
ut_a(page_is_comp(get_block->frame)
211
== page_is_comp(page));
212
ut_a(btr_page_get_next(get_block->frame, mtr)
213
== page_get_page_no(page));
163
214
#endif /* UNIV_BTR_DEBUG */
164
ut_a(page_is_comp(get_page) == page_is_comp(page));
165
buf_block_align(get_page)->check_index_page_at_flush
215
get_block->check_index_page_at_flush = TRUE;
169
get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
170
ut_a(page_is_comp(get_page) == page_is_comp(page));
171
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
218
get_block = btr_block_get(space, zip_size, page_no,
220
#ifdef UNIV_BTR_DEBUG
221
ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
222
#endif /* UNIV_BTR_DEBUG */
223
get_block->check_index_page_at_flush = TRUE;
173
225
right_page_no = btr_page_get_next(page, mtr);
175
227
if (right_page_no != FIL_NULL) {
176
get_page = btr_page_get(space, right_page_no,
178
#ifdef UNIV_BTR_DEBUG
179
ut_a(btr_page_get_prev(get_page, mtr)
180
== buf_frame_get_page_no(page));
181
#endif /* UNIV_BTR_DEBUG */
182
buf_block_align(get_page)->check_index_page_at_flush
186
} else if (latch_mode == BTR_SEARCH_PREV) {
188
/* s-latch also left brother */
189
left_page_no = btr_page_get_prev(page, mtr);
191
if (left_page_no != FIL_NULL) {
192
cursor->left_page = btr_page_get(space, left_page_no,
194
#ifdef UNIV_BTR_DEBUG
195
ut_a(btr_page_get_next(cursor->left_page, mtr)
196
== buf_frame_get_page_no(page));
197
#endif /* UNIV_BTR_DEBUG */
198
ut_a(page_is_comp(cursor->left_page)
199
== page_is_comp(page));
200
buf_block_align(cursor->left_page)
201
->check_index_page_at_flush = TRUE;
204
get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
205
ut_a(page_is_comp(get_page) == page_is_comp(page));
206
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
208
} else if (latch_mode == BTR_MODIFY_PREV) {
210
/* x-latch also left brother */
211
left_page_no = btr_page_get_prev(page, mtr);
213
if (left_page_no != FIL_NULL) {
214
cursor->left_page = btr_page_get(space, left_page_no,
216
#ifdef UNIV_BTR_DEBUG
217
ut_a(btr_page_get_next(cursor->left_page, mtr)
218
== buf_frame_get_page_no(page));
219
#endif /* UNIV_BTR_DEBUG */
220
ut_a(page_is_comp(cursor->left_page)
221
== page_is_comp(page));
222
buf_block_align(cursor->left_page)
223
->check_index_page_at_flush = TRUE;
226
get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
227
ut_a(page_is_comp(get_page) == page_is_comp(page));
228
buf_block_align(get_page)->check_index_page_at_flush = TRUE;
228
get_block = btr_block_get(space, zip_size,
231
#ifdef UNIV_BTR_DEBUG
232
ut_a(page_is_comp(get_block->frame)
233
== page_is_comp(page));
234
ut_a(btr_page_get_prev(get_block->frame, mtr)
235
== page_get_page_no(page));
236
#endif /* UNIV_BTR_DEBUG */
237
get_block->check_index_page_at_flush = TRUE;
242
case BTR_SEARCH_PREV:
243
case BTR_MODIFY_PREV:
244
mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
245
/* latch also left brother */
246
left_page_no = btr_page_get_prev(page, mtr);
248
if (left_page_no != FIL_NULL) {
249
get_block = btr_block_get(space, zip_size,
250
left_page_no, mode, mtr);
251
cursor->left_block = get_block;
252
#ifdef UNIV_BTR_DEBUG
253
ut_a(page_is_comp(get_block->frame)
254
== page_is_comp(page));
255
ut_a(btr_page_get_next(get_block->frame, mtr)
256
== page_get_page_no(page));
257
#endif /* UNIV_BTR_DEBUG */
258
get_block->check_index_page_at_flush = TRUE;
261
get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
262
#ifdef UNIV_BTR_DEBUG
263
ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
264
#endif /* UNIV_BTR_DEBUG */
265
get_block->check_index_page_at_flush = TRUE;
234
272
/************************************************************************
803
866
btr_cur_t* cursor, /* in: cursor on page after which to insert;
804
867
cursor stays valid */
805
dtuple_t* tuple, /* in: tuple to insert; the size info need not
868
const dtuple_t* tuple, /* in: tuple to insert; the size info need not
806
869
have been stored to tuple */
807
ibool* reorg, /* out: TRUE if reorganization occurred */
870
ulint n_ext, /* in: number of externally stored columns */
808
871
mtr_t* mtr) /* in: mtr */
810
873
page_cur_t* page_cursor;
814
877
ut_ad(dtuple_check_typed(tuple));
818
page = btr_cur_get_page(cursor);
820
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
821
MTR_MEMO_PAGE_X_FIX));
879
block = btr_cur_get_block(cursor);
881
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
822
882
page_cursor = btr_cur_get_page_cur(cursor);
824
884
/* Now, try the insert */
825
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
885
rec = page_cur_tuple_insert(page_cursor, tuple,
886
cursor->index, n_ext, mtr);
888
if (UNIV_UNLIKELY(!rec)) {
828
889
/* If record did not fit, reorganize */
830
btr_page_reorganize(page, cursor->index, mtr);
834
page_cur_search(page, cursor->index, tuple,
835
PAGE_CUR_LE, page_cursor);
837
rec = page_cur_tuple_insert(page_cursor, tuple,
891
if (btr_page_reorganize(block, cursor->index, mtr)) {
893
page_cur_search(block, cursor->index, tuple,
894
PAGE_CUR_LE, page_cursor);
896
rec = page_cur_tuple_insert(page_cursor, tuple,
897
cursor->index, n_ext, mtr);
976
1052
#endif /* UNIV_DEBUG */
978
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
979
MTR_MEMO_PAGE_X_FIX));
1054
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
980
1055
max_size = page_get_max_insert_size_after_reorganize(page, 1);
981
level = btr_page_get_level(page, mtr);
1056
leaf = page_is_leaf(page);
983
calculate_sizes_again:
984
1058
/* Calculate the record size when entry is converted to a record */
985
rec_size = rec_get_converted_size(index, entry);
1059
rec_size = rec_get_converted_size(index, entry, n_ext);
988
>= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
989
REC_MAX_DATA_SIZE)) {
1061
if (page_zip_rec_needs_ext(rec_size, page_is_comp(page), zip_size)) {
991
1063
/* The record is so big that we have to store some fields
992
1064
externally on separate database pages */
994
big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
996
if (big_rec_vec == NULL) {
1065
big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
1067
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
998
1069
return(DB_TOO_BIG_RECORD);
1001
goto calculate_sizes_again;
1072
rec_size = rec_get_converted_size(index, entry, n_ext);
1004
1075
/* If there have been many consecutive inserts, and we are on the leaf
1005
1076
level, check if we have to split the page to reserve enough free space
1006
1077
for future updates of records. */
1010
if ((type & DICT_CLUSTERED)
1079
if (dict_index_is_clust(index)
1080
&& (page_get_n_recs(page) >= 2)
1081
&& UNIV_LIKELY(leaf)
1011
1082
&& (dict_index_get_space_reserve() + rec_size > max_size)
1012
&& (page_get_n_recs(page) >= 2)
1014
1083
&& (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
1015
1084
|| btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
1017
1089
if (big_rec_vec) {
1018
1090
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1093
if (UNIV_LIKELY_NULL(heap)) {
1094
mem_heap_free(heap);
1024
if (!(((max_size >= rec_size)
1025
&& (max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT))
1026
|| (page_get_max_insert_size(page, 1) >= rec_size)
1027
|| (page_get_n_recs(page) <= 1))) {
1100
if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
1101
|| max_size < rec_size)
1102
&& UNIV_LIKELY(page_get_n_recs(page) > 1)
1103
&& page_get_max_insert_size(page, 1) < rec_size) {
1030
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1035
1108
/* Check locks and write to the undo log, if specified */
1036
1109
err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &inherit);
1038
if (err != DB_SUCCESS) {
1111
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1041
dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1046
1116
page_cursor = btr_cur_get_page_cur(cursor);
1050
1118
/* Now, try the insert */
1052
*rec = page_cur_insert_rec_low(page_cursor, entry, index,
1054
if (UNIV_UNLIKELY(!(*rec))) {
1121
const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
1122
*rec = page_cur_tuple_insert(page_cursor, entry, index,
1124
reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
1126
if (UNIV_UNLIKELY(reorg)) {
1132
if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
1055
1133
/* If the record did not fit, reorganize */
1056
btr_page_reorganize(page, index, mtr);
1058
ut_ad(page_get_max_insert_size(page, 1) == max_size);
1134
if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
1141
|| page_get_max_insert_size(page, 1) == max_size);
1062
page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor);
1145
page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
1064
*rec = page_cur_tuple_insert(page_cursor, entry, index, mtr);
1147
*rec = page_cur_tuple_insert(page_cursor, entry, index,
1066
1150
if (UNIV_UNLIKELY(!*rec)) {
1151
if (UNIV_LIKELY(zip_size != 0)) {
1067
1156
fputs("InnoDB: Error: cannot insert tuple ", stderr);
1068
1157
dtuple_print(stderr, entry);
1069
1158
fputs(" into ", stderr);
1212
if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
1329
if (UNIV_UNLIKELY(zip_size)) {
1330
/* Estimate the free space of an empty compressed page. */
1331
ulint free_space_zip = page_zip_empty_size(
1332
cursor->index->n_fields, zip_size);
1334
if (UNIV_UNLIKELY(rec_get_converted_size(index, entry, n_ext)
1335
> free_space_zip)) {
1336
/* Try to insert the record by itself on a new page.
1337
If it fails, no amount of splitting will help. */
1338
buf_block_t* temp_block
1339
= buf_block_alloc(zip_size);
1341
= page_create_zip(temp_block, index, 0, NULL);
1342
page_cur_t temp_cursor;
1345
page_cur_position(temp_page + PAGE_NEW_INFIMUM,
1346
temp_block, &temp_cursor);
1348
temp_rec = page_cur_tuple_insert(&temp_cursor,
1351
buf_block_free(temp_block);
1353
if (UNIV_UNLIKELY(!temp_rec)) {
1355
dtuple_convert_back_big_rec(
1356
index, entry, big_rec_vec);
1360
mem_heap_free(heap);
1363
return(DB_TOO_BIG_RECORD);
1368
if (dict_index_get_page(index)
1369
== buf_block_get_page_no(btr_cur_get_block(cursor))) {
1214
1371
/* The page is the root page */
1215
*rec = btr_root_raise_and_insert(cursor, entry, mtr);
1372
*rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
1217
*rec = btr_page_split_and_insert(cursor, entry, mtr);
1220
btr_cur_position(index, page_rec_get_prev(*rec), cursor);
1374
*rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
1377
if (UNIV_LIKELY_NULL(heap)) {
1378
mem_heap_free(heap);
1381
ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
1222
1383
#ifdef BTR_CUR_ADAPT
1223
1384
btr_search_update_hash_on_insert(cursor);
1225
1386
if (!(flags & BTR_NO_LOCKING_FLAG)) {
1227
lock_update_insert(*rec);
1388
lock_update_insert(btr_cur_get_block(cursor), *rec);
1232
1391
if (n_extents > 0) {
1233
1392
fil_space_release_free_extents(index->space, n_reserved);
1236
1395
*big_rec = big_rec_vec;
1241
1400
/*==================== B-TREE UPDATE =========================*/
1431
1592
/*****************************************************************
1593
See if there is enough place in the page modification log to log
1594
an update-in-place. */
1597
btr_cur_update_alloc_zip(
1598
/*=====================*/
1599
/* out: TRUE if enough place */
1600
page_zip_des_t* page_zip,/* in/out: compressed page */
1601
buf_block_t* block, /* in/out: buffer page */
1602
dict_index_t* index, /* in: the index corresponding to the block */
1603
ulint length, /* in: size needed */
1604
mtr_t* mtr) /* in: mini-transaction */
1606
ut_a(page_zip == buf_block_get_page_zip(block));
1609
if (page_zip_available(page_zip, dict_index_is_clust(index),
1614
if (!page_zip->m_nonempty) {
1615
/* The page has been freshly compressed, so
1616
recompressing it will not help. */
1620
if (!page_zip_compress(page_zip, buf_block_get_frame(block),
1622
/* Unable to compress the page */
1626
/* After recompressing a page, we must make sure that the free
1627
bits in the insert buffer bitmap will not exceed the free
1628
space on the page. Because this function will not attempt
1629
recompression unless page_zip_available() fails above, it is
1630
safe to reset the free bits if page_zip_available() fails
1631
again, below. The free bits can safely be reset in a separate
1632
mini-transaction. If page_zip_available() succeeds below, we
1633
can be sure that the page_zip_compress() above did not reduce
1634
the free space available on the page. */
1636
if (!page_zip_available(page_zip, dict_index_is_clust(index),
1638
/* Out of space: reset the free bits. */
1639
if (!dict_index_is_clust(index)
1640
&& page_is_leaf(buf_block_get_frame(block))) {
1641
ibuf_reset_free_bits(block);
1649
/*****************************************************************
1432
1650
Updates a record when the update causes no size changes in its fields.
1433
1651
We assume here that the ordering fields of the record do not change. */
1436
1654
btr_cur_update_in_place(
1437
1655
/*====================*/
1880
2160
updated the primary key to another value, and then
1881
2161
update it back again. */
1883
ut_a(big_rec_vec == NULL);
2163
ut_ad(big_rec_vec == NULL);
1885
btr_rec_free_updated_extern_fields(index, rec, offsets,
2165
btr_rec_free_updated_extern_fields(index, rec, page_zip,
2166
offsets, update, mtr);
1889
2169
/* We have to set appropriate extern storage bits in the new
1890
2170
record to be inserted: we have to remember which fields were such */
1892
ext_vect = mem_heap_alloc(heap, sizeof(ulint)
1893
* dict_index_get_n_fields(index));
1894
2172
ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
1895
offsets = rec_get_offsets(rec, index, offsets,
1896
ULINT_UNDEFINED, &heap);
1897
n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update);
1899
if (UNIV_UNLIKELY(rec_get_converted_size(index, new_entry)
1900
>= ut_min(page_get_free_space_of_empty(
1901
page_is_comp(page)) / 2,
1902
REC_MAX_DATA_SIZE))) {
1904
big_rec_vec = dtuple_convert_big_rec(index, new_entry,
1905
ext_vect, n_ext_vect);
1906
if (big_rec_vec == NULL) {
2173
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
2174
n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
2176
if (page_zip_rec_needs_ext(rec_get_converted_size(index, new_entry,
2178
page_is_comp(page), page_zip
2179
? page_zip_get_size(page_zip) : 0)) {
2180
big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
2181
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
1908
2183
err = DB_TOO_BIG_RECORD;
1909
2184
goto return_after_reservations;
1913
page_cursor = btr_cur_get_page_cur(cursor);
1915
2188
/* Store state of explicit locks on rec on the page infimum record,
1916
2189
before deleting rec. The page infimum acts as a dummy carrier of the
1917
2190
locks, taking care also of lock releases, before we can move the locks
1921
2194
delete the lock structs set on the root page even if the root
1922
2195
page carries just node pointers. */
1924
lock_rec_store_on_page_infimum(buf_frame_align(rec), rec);
2197
lock_rec_store_on_page_infimum(block, rec);
1926
2199
btr_search_update_hash_on_delete(cursor);
2201
#ifdef UNIV_ZIP_DEBUG
2202
ut_a(!page_zip || page_zip_validate(page_zip, page));
2203
#endif /* UNIV_ZIP_DEBUG */
2204
page_cursor = btr_cur_get_page_cur(cursor);
1928
2206
page_cur_delete_rec(page_cursor, index, offsets, mtr);
1930
2208
page_cur_move_to_prev(page_cursor);
1932
rec = btr_cur_insert_if_possible(cursor, new_entry,
1933
&dummy_reorganized, mtr);
1934
ut_a(rec || optim_err != DB_UNDERFLOW);
2210
rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
1937
lock_rec_restore_from_page_infimum(rec, page);
1938
rec_set_field_extern_bits(rec, index,
1939
ext_vect, n_ext_vect, mtr);
2213
lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
1941
2216
offsets = rec_get_offsets(rec, index, offsets,
1942
ULINT_UNDEFINED, &heap);
2217
ULINT_UNDEFINED, heap);
1944
2219
if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
1945
2220
/* The new inserted record owns its possible externally
1946
2221
stored fields */
1947
btr_cur_unmark_extern_fields(rec, mtr, offsets);
2222
btr_cur_unmark_extern_fields(page_zip,
2223
rec, index, offsets, mtr);
1950
2226
btr_cur_compress_if_useful(cursor, mtr);
2228
if (page_zip && !dict_index_is_clust(index)
2229
&& page_is_leaf(page)) {
2230
/* Update the free bits in the insert buffer. */
2231
ibuf_update_free_bits_zip(block, mtr);
1952
2234
err = DB_SUCCESS;
1953
2235
goto return_after_reservations;
1956
if (page_cur_is_before_first(page_cursor)) {
1957
/* The record to be updated was positioned as the first user
1958
record on its page */
2237
ut_a(optim_err != DB_UNDERFLOW);
2239
/* Out of space: reset the free bits. */
2240
if (!dict_index_is_clust(index)
2241
&& page_is_leaf(page)) {
2242
ibuf_reset_free_bits(block);
2246
/* Was the record to be updated positioned as the first user
2247
record on its page? */
2248
was_first = page_cur_is_before_first(page_cursor);
1965
2250
/* The first parameter means that no lock checking and undo logging
1966
2251
is made in the insert */
3202
3511
ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3203
3512
n = rec_offs_n_fields(offsets);
3514
if (!rec_offs_any_extern(offsets)) {
3205
3519
for (i = 0; i < n; i++) {
3206
3520
if (rec_offs_nth_extern(offsets, i)) {
3208
btr_cur_set_ownership_of_extern_field(rec, offsets, i,
3522
btr_cur_set_ownership_of_extern_field(
3523
page_zip, rec, index, offsets, i, TRUE, mtr);
3214
3528
/***********************************************************************
3215
3529
Marks all extern fields in a dtuple as owned by the record. */
3218
3532
btr_cur_unmark_dtuple_extern_fields(
3219
3533
/*================================*/
3220
dtuple_t* entry, /* in: clustered index entry */
3221
ulint* ext_vec, /* in: array of numbers of fields
3222
which have been stored externally */
3223
ulint n_ext_vec) /* in: number of elements in ext_vec */
3534
dtuple_t* entry) /* in/out: clustered index entry */
3231
for (i = 0; i < n_ext_vec; i++) {
3232
dfield = dtuple_get_nth_field(entry, ext_vec[i]);
3234
data = (byte*) dfield_get_data(dfield);
3235
len = dfield_get_len(dfield);
3237
len -= BTR_EXTERN_FIELD_REF_SIZE;
3239
byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN);
3241
byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
3243
mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
3538
for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3539
dfield_t* dfield = dtuple_get_nth_field(entry, i);
3541
if (dfield_is_ext(dfield)) {
3542
byte* data = dfield_get_data(dfield);
3543
ulint len = dfield_get_len(dfield);
3545
data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3546
&= ~BTR_EXTERN_OWNER_FLAG;
3247
3551
/***********************************************************************
3248
Stores the positions of the fields marked as extern storage in the update
3249
vector, and also those fields who are marked as extern storage in rec
3250
and not mentioned in updated fields. We use this function to remember
3251
which fields we must mark as extern storage in a record inserted for an
3552
Flags the data tuple fields that are marked as extern storage in the
3553
update vector. We use this function to remember which fields we must
3554
mark as extern storage in a record inserted for an update. */
3255
3557
btr_push_update_extern_fields(
3256
3558
/*==========================*/
3257
/* out: number of values stored in ext_vect */
3258
ulint* ext_vect,/* in: array of ulints, must be preallocated
3259
to have space for all fields in rec */
3260
const ulint* offsets,/* in: array returned by rec_get_offsets() */
3261
upd_t* update) /* in: update vector or NULL */
3559
/* out: number of flagged external columns */
3560
dtuple_t* tuple, /* in/out: data tuple */
3561
const upd_t* update, /* in: update vector */
3562
mem_heap_t* heap) /* in: memory heap */
3270
n = upd_get_n_fields(update);
3272
for (i = 0; i < n; i++) {
3274
if (upd_get_nth_field(update, i)->extern_storage) {
3276
ext_vect[n_pushed] = upd_get_nth_field(
3277
update, i)->field_no;
3284
n = rec_offs_n_fields(offsets);
3286
for (i = 0; i < n; i++) {
3287
if (rec_offs_nth_extern(offsets, i)) {
3289
/* Check it is not in updated fields */
3293
for (j = 0; j < upd_get_n_fields(update);
3295
if (upd_get_nth_field(update, j)
3303
ext_vect[n_pushed] = i;
3566
const upd_field_t* uf;
3571
uf = update->fields;
3572
n = upd_get_n_fields(update);
3575
if (dfield_is_ext(&uf->new_val)) {
3577
= dtuple_get_nth_field(tuple, uf->field_no);
3579
if (!dfield_is_ext(field)) {
3580
dfield_set_ext(field);
3584
switch (uf->orig_len) {
3590
case BTR_EXTERN_FIELD_REF_SIZE:
3591
/* Restore the original locally stored
3592
part of the column. In the undo log,
3593
InnoDB writes a longer prefix of externally
3594
stored columns, so that column prefixes
3595
in secondary indexes can be reconstructed. */
3596
dfield_set_data(field, (byte*) dfield_get_data(field)
3597
+ dfield_get_len(field)
3598
- BTR_EXTERN_FIELD_REF_SIZE,
3599
BTR_EXTERN_FIELD_REF_SIZE);
3600
dfield_set_ext(field);
3603
/* Reconstruct the original locally
3604
stored part of the column. The data
3605
will have to be copied. */
3606
ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
3608
data = dfield_get_data(field);
3609
len = dfield_get_len(field);
3611
buf = mem_heap_alloc(heap, uf->orig_len);
3612
/* Copy the locally stored prefix. */
3615
- BTR_EXTERN_FIELD_REF_SIZE);
3616
/* Copy the BLOB pointer. */
3617
memcpy(buf + uf->orig_len
3618
- BTR_EXTERN_FIELD_REF_SIZE,
3619
data + len - BTR_EXTERN_FIELD_REF_SIZE,
3620
BTR_EXTERN_FIELD_REF_SIZE);
3622
dfield_set_data(field, buf, uf->orig_len);
3623
dfield_set_ext(field);
3328
3647
btr_blob_get_next_page_no(
3329
3648
/*======================*/
3330
/* out: page number or FIL_NULL if
3332
byte* blob_header) /* in: blob header */
3649
/* out: page number or FIL_NULL if
3651
const byte* blob_header) /* in: blob header */
3334
3653
return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
3337
3656
/***********************************************************************
3657
Deallocate a buffer block that was reserved for a BLOB part. */
3662
buf_block_t* block, /* in: buffer block */
3663
ibool all, /* in: TRUE=remove also the compressed page
3665
mtr_t* mtr) /* in: mini-transaction to commit */
3667
ulint space = buf_block_get_space(block);
3668
ulint page_no = buf_block_get_page_no(block);
3670
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3674
buf_pool_mutex_enter();
3675
mutex_enter(&block->mutex);
3677
/* Only free the block if it is still allocated to
3678
the same file page. */
3680
if (buf_block_get_state(block)
3681
== BUF_BLOCK_FILE_PAGE
3682
&& buf_block_get_space(block) == space
3683
&& buf_block_get_page_no(block) == page_no) {
3685
if (buf_LRU_free_block(&block->page, all, NULL)
3687
&& all && block->page.zip.data) {
3688
/* Attempt to deallocate the uncompressed page
3689
if the whole block cannot be deallocted. */
3691
buf_LRU_free_block(&block->page, FALSE, NULL);
3695
buf_pool_mutex_exit();
3696
mutex_exit(&block->mutex);
3699
/***********************************************************************
3338
3700
Stores the fields in big_rec_vec to the tablespace and puts pointers to
3339
them in rec. The fields are stored on pages allocated from leaf node
3701
them in rec. The extern flags in rec will have to be set beforehand.
3702
The fields are stored on pages allocated from leaf node
3340
3703
file segment of the index tree. */
3343
3706
btr_store_big_rec_extern_fields(
3344
3707
/*============================*/
3345
3708
/* out: DB_SUCCESS or error */
3346
3709
dict_index_t* index, /* in: index of rec; the index tree
3347
3710
MUST be X-latched */
3348
rec_t* rec, /* in: record */
3711
buf_block_t* rec_block, /* in/out: block containing rec */
3712
rec_t* rec, /* in/out: record */
3349
3713
const ulint* offsets, /* in: rec_get_offsets(rec, index);
3350
3714
the "external storage" flags in offsets
3351
3715
will not correspond to rec when
3356
3720
containing the latch to rec and to the
3361
3725
ulint extern_len;
3362
3726
ulint store_len;
3365
3728
ulint space_id;
3368
3730
ulint prev_page_no;
3369
3731
ulint hint_page_no;
3734
mem_heap_t* heap = NULL;
3735
page_zip_des_t* page_zip;
3373
3738
ut_ad(rec_offs_validate(rec, index, offsets));
3374
3739
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
3375
3740
MTR_MEMO_X_LOCK));
3376
ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
3377
MTR_MEMO_PAGE_X_FIX));
3378
ut_a(index->type & DICT_CLUSTERED);
3380
space_id = buf_frame_get_space_id(rec);
3741
ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
3742
ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
3743
ut_a(dict_index_is_clust(index));
3745
page_zip = buf_block_get_page_zip(rec_block);
3746
ut_a(dict_table_zip_size(index->table)
3747
== buf_block_get_zip_size(rec_block));
3749
space_id = buf_block_get_space(rec_block);
3750
zip_size = buf_block_get_zip_size(rec_block);
3751
rec_page_no = buf_block_get_page_no(rec_block);
3752
ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
3754
if (UNIV_LIKELY_NULL(page_zip)) {
3757
/* Zlib deflate needs 128 kilobytes for the default
3758
window size, plus 512 << memLevel, plus a few
3759
kilobytes for small objects. We use reduced memLevel
3760
to limit the memory consumption, and preallocate the
3761
heap, hoping to avoid memory fragmentation. */
3762
heap = mem_heap_create(250000);
3763
page_zip_set_alloc(&c_stream, heap);
3765
err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
3766
Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
3382
3770
/* We have to create a file segment to the tablespace
3383
3771
for each field and put the pointer to the field in rec */
3385
3773
for (i = 0; i < big_rec_vec->n_fields; i++) {
3387
data = rec_get_nth_field(rec, offsets,
3388
big_rec_vec->fields[i].field_no,
3390
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3391
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3774
ut_ad(rec_offs_nth_extern(offsets,
3775
big_rec_vec->fields[i].field_no));
3778
field_ref = rec_get_nth_field(
3779
rec, offsets, big_rec_vec->fields[i].field_no,
3781
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3782
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3783
field_ref += local_len;
3392
3785
extern_len = big_rec_vec->fields[i].len;
3394
3787
ut_a(extern_len > 0);
3396
3789
prev_page_no = FIL_NULL;
3398
while (extern_len > 0) {
3791
if (UNIV_LIKELY_NULL(page_zip)) {
3792
int err = deflateReset(&c_stream);
3795
c_stream.next_in = (void*) big_rec_vec->fields[i].data;
3796
c_stream.avail_in = extern_len;
3399
3803
mtr_start(&mtr);
3401
3805
if (prev_page_no == FIL_NULL) {
3402
hint_page_no = buf_frame_get_page_no(rec) + 1;
3806
hint_page_no = 1 + rec_page_no;
3404
3808
hint_page_no = prev_page_no + 1;
3407
page = btr_page_alloc(index, hint_page_no,
3408
FSP_NO_DIR, 0, &mtr);
3811
block = btr_page_alloc(index, hint_page_no,
3812
FSP_NO_DIR, 0, &mtr);
3813
if (UNIV_UNLIKELY(block == NULL)) {
3411
3815
mtr_commit(&mtr);
3817
if (UNIV_LIKELY_NULL(page_zip)) {
3818
deflateEnd(&c_stream);
3819
mem_heap_free(heap);
3413
3822
return(DB_OUT_OF_FILE_SPACE);
3416
mlog_write_ulint(page + FIL_PAGE_TYPE,
3420
page_no = buf_frame_get_page_no(page);
3825
page_no = buf_block_get_page_no(block);
3826
page = buf_block_get_frame(block);
3422
3828
if (prev_page_no != FIL_NULL) {
3423
prev_page = buf_page_get(space_id,
3829
buf_block_t* prev_block;
3832
prev_block = buf_page_get(space_id, zip_size,
3835
#ifdef UNIV_SYNC_DEBUG
3836
buf_block_dbg_add_level(prev_block,
3837
SYNC_EXTERN_STORAGE);
3838
#endif /* UNIV_SYNC_DEBUG */
3839
prev_page = buf_block_get_frame(prev_block);
3841
if (UNIV_LIKELY_NULL(page_zip)) {
3843
prev_page + FIL_PAGE_NEXT,
3844
page_no, MLOG_4BYTES, &mtr);
3845
memcpy(buf_block_get_page_zip(
3847
->data + FIL_PAGE_NEXT,
3848
prev_page + FIL_PAGE_NEXT, 4);
3851
prev_page + FIL_PAGE_DATA
3852
+ BTR_BLOB_HDR_NEXT_PAGE_NO,
3853
page_no, MLOG_4BYTES, &mtr);
3858
if (UNIV_LIKELY_NULL(page_zip)) {
3860
page_zip_des_t* blob_page_zip;
3862
mach_write_to_2(page + FIL_PAGE_TYPE,
3863
prev_page_no == FIL_NULL
3864
? FIL_PAGE_TYPE_ZBLOB
3865
: FIL_PAGE_TYPE_ZBLOB2);
3867
c_stream.next_out = page
3870
= page_zip_get_size(page_zip)
3873
err = deflate(&c_stream, Z_FINISH);
3874
ut_a(err == Z_OK || err == Z_STREAM_END);
3875
ut_a(err == Z_STREAM_END
3876
|| c_stream.avail_out == 0);
3878
/* Write the "next BLOB page" pointer */
3879
mlog_write_ulint(page + FIL_PAGE_NEXT,
3880
FIL_NULL, MLOG_4BYTES, &mtr);
3881
/* Initialize the unused "prev page" pointer */
3882
mlog_write_ulint(page + FIL_PAGE_PREV,
3883
FIL_NULL, MLOG_4BYTES, &mtr);
3884
/* Write a back pointer to the record
3885
into the otherwise unused area. This
3886
information could be useful in
3887
debugging. Later, we might want to
3888
implement the possibility to relocate
3889
BLOB pages. Then, we would need to be
3890
able to adjust the BLOB pointer in the
3891
record. We do not store the heap
3892
number of the record, because it can
3893
change in page_zip_reorganize() or
3894
btr_page_reorganize(). However, also
3895
the page number of the record may
3896
change when B-tree nodes are split or
3898
mlog_write_ulint(page
3899
+ FIL_PAGE_FILE_FLUSH_LSN,
3902
mlog_write_ulint(page
3903
+ FIL_PAGE_FILE_FLUSH_LSN + 4,
3907
/* Zero out the unused part of the page. */
3908
memset(page + page_zip_get_size(page_zip)
3909
- c_stream.avail_out,
3910
0, c_stream.avail_out);
3911
mlog_log_string(page + FIL_PAGE_TYPE,
3912
page_zip_get_size(page_zip)
3915
/* Copy the page to compressed storage,
3916
because it will be flushed to disk
3918
blob_page_zip = buf_block_get_page_zip(block);
3919
ut_ad(blob_page_zip);
3920
ut_ad(page_zip_get_size(blob_page_zip)
3921
== page_zip_get_size(page_zip));
3922
memcpy(blob_page_zip->data, page,
3923
page_zip_get_size(page_zip));
3925
if (err == Z_OK && prev_page_no != FIL_NULL) {
3930
rec_block = buf_page_get(space_id, zip_size,
3425
3932
RW_X_LATCH, &mtr);
3427
3933
#ifdef UNIV_SYNC_DEBUG
3428
buf_page_dbg_add_level(prev_page,
3429
SYNC_EXTERN_STORAGE);
3934
buf_block_dbg_add_level(rec_block,
3935
SYNC_NO_ORDER_CHECK);
3430
3936
#endif /* UNIV_SYNC_DEBUG */
3432
mlog_write_ulint(prev_page + FIL_PAGE_DATA
3937
if (err == Z_STREAM_END) {
3938
mach_write_to_4(field_ref
3939
+ BTR_EXTERN_LEN, 0);
3940
mach_write_to_4(field_ref
3941
+ BTR_EXTERN_LEN + 4,
3944
memset(field_ref + BTR_EXTERN_LEN,
3948
if (prev_page_no == FIL_NULL) {
3949
mach_write_to_4(field_ref
3950
+ BTR_EXTERN_SPACE_ID,
3953
mach_write_to_4(field_ref
3954
+ BTR_EXTERN_PAGE_NO,
3957
mach_write_to_4(field_ref
3958
+ BTR_EXTERN_OFFSET,
3962
page_zip_write_blob_ptr(
3963
page_zip, rec, index, offsets,
3964
big_rec_vec->fields[i].field_no, &mtr);
3967
prev_page_no = page_no;
3969
/* Commit mtr and release the
3970
uncompressed page frame to save memory. */
3971
btr_blob_free(block, FALSE, &mtr);
3973
if (err == Z_STREAM_END) {
3977
mlog_write_ulint(page + FIL_PAGE_TYPE,
3981
if (extern_len > (UNIV_PAGE_SIZE
3984
- FIL_PAGE_DATA_END)) {
3985
store_len = UNIV_PAGE_SIZE
3988
- FIL_PAGE_DATA_END;
3990
store_len = extern_len;
3993
mlog_write_string(page + FIL_PAGE_DATA
3994
+ BTR_BLOB_HDR_SIZE,
3996
big_rec_vec->fields[i].data
3997
+ big_rec_vec->fields[i].len
4000
mlog_write_ulint(page + FIL_PAGE_DATA
4001
+ BTR_BLOB_HDR_PART_LEN,
4002
store_len, MLOG_4BYTES, &mtr);
4003
mlog_write_ulint(page + FIL_PAGE_DATA
3433
4004
+ BTR_BLOB_HDR_NEXT_PAGE_NO,
3434
page_no, MLOG_4BYTES, &mtr);
3437
if (extern_len > (UNIV_PAGE_SIZE - FIL_PAGE_DATA
3439
- FIL_PAGE_DATA_END)) {
3440
store_len = UNIV_PAGE_SIZE - FIL_PAGE_DATA
3442
- FIL_PAGE_DATA_END;
3444
store_len = extern_len;
3447
mlog_write_string(page + FIL_PAGE_DATA
3448
+ BTR_BLOB_HDR_SIZE,
3449
big_rec_vec->fields[i].data
3450
+ big_rec_vec->fields[i].len
3453
mlog_write_ulint(page + FIL_PAGE_DATA
3454
+ BTR_BLOB_HDR_PART_LEN,
3455
store_len, MLOG_4BYTES, &mtr);
3456
mlog_write_ulint(page + FIL_PAGE_DATA
3457
+ BTR_BLOB_HDR_NEXT_PAGE_NO,
3458
FIL_NULL, MLOG_4BYTES, &mtr);
3460
extern_len -= store_len;
3462
rec_page = buf_page_get(space_id,
3463
buf_frame_get_page_no(data),
4005
FIL_NULL, MLOG_4BYTES, &mtr);
4007
extern_len -= store_len;
4009
rec_block = buf_page_get(space_id, zip_size,
3465
4012
#ifdef UNIV_SYNC_DEBUG
3466
buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
4013
buf_block_dbg_add_level(rec_block,
4014
SYNC_NO_ORDER_CHECK);
3467
4015
#endif /* UNIV_SYNC_DEBUG */
3468
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
3470
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
3471
big_rec_vec->fields[i].len
3475
if (prev_page_no == FIL_NULL) {
3476
mlog_write_ulint(data + local_len
3477
+ BTR_EXTERN_SPACE_ID,
3481
mlog_write_ulint(data + local_len
3482
+ BTR_EXTERN_PAGE_NO,
3486
mlog_write_ulint(data + local_len
3487
+ BTR_EXTERN_OFFSET,
3491
/* Set the bit denoting that this field
3492
in rec is stored externally */
3494
rec_set_nth_field_extern_bit(
3496
big_rec_vec->fields[i].field_no,
4017
mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
4019
mlog_write_ulint(field_ref
4020
+ BTR_EXTERN_LEN + 4,
4021
big_rec_vec->fields[i].len
4025
if (prev_page_no == FIL_NULL) {
4026
mlog_write_ulint(field_ref
4027
+ BTR_EXTERN_SPACE_ID,
4031
mlog_write_ulint(field_ref
4032
+ BTR_EXTERN_PAGE_NO,
4036
mlog_write_ulint(field_ref
4037
+ BTR_EXTERN_OFFSET,
4042
prev_page_no = page_no;
4046
if (extern_len == 0) {
3500
prev_page_no = page_no;
4053
if (UNIV_LIKELY_NULL(page_zip)) {
4054
deflateEnd(&c_stream);
4055
mem_heap_free(heap);
3506
4058
return(DB_SUCCESS);
3509
4061
/***********************************************************************
3510
4062
Frees the space in an externally stored field to the file space
3511
management if the field in data is owned the externally stored field,
4063
management if the field in data is owned by the externally stored field,
3512
4064
in a rollback we may have the additional condition that the field must
3513
4065
not be inherited. */
3516
4068
btr_free_externally_stored_field(
3517
4069
/*=============================*/
3534
4091
containing the latch to data an an
3535
4092
X-latch to the index tree */
3547
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4096
ulint rec_zip_size = dict_table_zip_size(index->table);
3548
4102
ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
3549
4103
MTR_MEMO_X_LOCK));
3550
ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
3551
MTR_MEMO_PAGE_X_FIX));
3552
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3553
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4104
ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
4105
MTR_MEMO_PAGE_X_FIX));
4106
ut_ad(!rec || rec_offs_validate(rec, index, offsets));
4110
const byte* f = rec_get_nth_field(rec, offsets,
4112
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4113
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4115
ut_ad(f == field_ref);
4117
#endif /* UNIV_DEBUG */
4119
space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
4121
if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
4122
ext_zip_size = fil_space_get_zip_size(space_id);
4123
/* This must be an undo log record in the system tablespace,
4124
that is, in row_purge_upd_exist_or_extern().
4125
Currently, externally stored records are stored in the
4126
same tablespace as the referring records. */
4127
ut_ad(!page_get_space_id(page_align(field_ref)));
4131
ext_zip_size = rec_zip_size;
4135
/* This is a call from row_purge_upd_exist_or_extern(). */
4141
buf_block_t* rec_block;
4142
buf_block_t* ext_block;
3556
4144
mtr_start(&mtr);
3558
rec_page = buf_page_get(buf_frame_get_space_id(data),
3559
buf_frame_get_page_no(data),
3561
#ifdef UNIV_SYNC_DEBUG
3562
buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
3563
#endif /* UNIV_SYNC_DEBUG */
3564
space_id = mach_read_from_4(data + local_len
3565
+ BTR_EXTERN_SPACE_ID);
3567
page_no = mach_read_from_4(data + local_len
3568
+ BTR_EXTERN_PAGE_NO);
3570
offset = mach_read_from_4(data + local_len
3571
+ BTR_EXTERN_OFFSET);
3572
extern_len = mach_read_from_4(data + local_len
3573
+ BTR_EXTERN_LEN + 4);
3575
/* If extern len is 0, then there is no external storage data
3578
if (extern_len == 0) {
3585
if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
3586
& BTR_EXTERN_OWNER_FLAG) {
3587
/* This field does not own the externally
3588
stored field: do not free! */
3595
if (do_not_free_inherited
3596
&& mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
3597
& BTR_EXTERN_INHERITED_FLAG) {
3598
/* Rollback and inherited field: do not free! */
3605
page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr);
3606
#ifdef UNIV_SYNC_DEBUG
3607
buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
3608
#endif /* UNIV_SYNC_DEBUG */
3609
next_page_no = mach_read_from_4(page + FIL_PAGE_DATA
3610
+ BTR_BLOB_HDR_NEXT_PAGE_NO);
3612
part_len = btr_blob_get_part_len(page + FIL_PAGE_DATA);
3614
ut_a(extern_len >= part_len);
3616
/* We must supply the page level (= 0) as an argument
3617
because we did not store it on the page (we save the space
3618
overhead from an index page header. */
3620
btr_page_free_low(index, page, 0, &mtr);
3622
mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO,
3625
mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
3626
extern_len - part_len,
3628
if (next_page_no == FIL_NULL) {
3629
ut_a(extern_len - part_len == 0);
3632
if (extern_len - part_len == 0) {
3633
ut_a(next_page_no == FIL_NULL);
4146
rec_block = buf_page_get(page_get_space_id(
4147
page_align(field_ref)),
4150
page_align(field_ref)),
4152
#ifdef UNIV_SYNC_DEBUG
4153
buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
4154
#endif /* UNIV_SYNC_DEBUG */
4155
page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
4157
if (/* There is no external storage data */
4159
/* This field does not own the externally stored field */
4160
|| (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4161
& BTR_EXTERN_OWNER_FLAG)
4162
/* Rollback and inherited field */
4163
|| (do_not_free_inherited
4164
&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4165
& BTR_EXTERN_INHERITED_FLAG))) {
4173
ext_block = buf_page_get(space_id, ext_zip_size, page_no,
4175
#ifdef UNIV_SYNC_DEBUG
4176
buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
4177
#endif /* UNIV_SYNC_DEBUG */
4178
page = buf_block_get_frame(ext_block);
4181
/* Note that page_zip will be NULL
4182
in row_purge_upd_exist_or_extern(). */
4183
switch (fil_page_get_type(page)) {
4184
case FIL_PAGE_TYPE_ZBLOB:
4185
case FIL_PAGE_TYPE_ZBLOB2:
4190
next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
4192
btr_page_free_low(index, ext_block, 0, &mtr);
4194
if (UNIV_LIKELY(page_zip != NULL)) {
4195
mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
4197
mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
4199
page_zip_write_blob_ptr(page_zip, rec, index,
4202
mlog_write_ulint(field_ref
4203
+ BTR_EXTERN_PAGE_NO,
4206
mlog_write_ulint(field_ref
4207
+ BTR_EXTERN_LEN + 4, 0,
4211
ulint extern_len = mach_read_from_4(
4212
field_ref + BTR_EXTERN_LEN + 4);
4213
ulint part_len = btr_blob_get_part_len(
4214
page + FIL_PAGE_DATA);
4216
ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_BLOB);
4218
ut_a(extern_len >= part_len);
4220
next_page_no = mach_read_from_4(
4221
page + FIL_PAGE_DATA
4222
+ BTR_BLOB_HDR_NEXT_PAGE_NO);
4224
/* We must supply the page level (= 0) as an argument
4225
because we did not store it on the page (we save the
4226
space overhead from an index page header. */
4228
ut_a(space_id == page_get_space_id(page));
4229
ut_a(page_no == page_get_page_no(page));
4231
btr_page_free_low(index, ext_block, 0, &mtr);
4233
mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
4236
mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
4237
extern_len - part_len,
4239
if (next_page_no == FIL_NULL) {
4240
ut_a(extern_len - part_len == 0);
4243
if (extern_len - part_len == 0) {
4244
ut_a(next_page_no == FIL_NULL);
4248
/* Commit mtr and release the BLOB block to save memory. */
4249
btr_blob_free(ext_block, TRUE, &mtr);
3640
4253
/***************************************************************
3641
4254
Frees the externally stored fields for a record. */
3644
4257
btr_rec_free_externally_stored_fields(
3645
4258
/*==================================*/
3646
4259
dict_index_t* index, /* in: index of the data, the index
3647
4260
tree MUST be X-latched */
3648
rec_t* rec, /* in: record */
4261
rec_t* rec, /* in/out: record */
3649
4262
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
4263
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
4264
part will be updated, or NULL */
3650
4265
ibool do_not_free_inherited,/* in: TRUE if called in a
3651
4266
rollback and we do not want to free
3652
4267
inherited fields */
3687
4303
/*===============================*/
3688
4304
dict_index_t* index, /* in: index of rec; the index tree MUST be
3690
rec_t* rec, /* in: record */
4306
rec_t* rec, /* in/out: record */
4307
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
4308
part will be updated, or NULL */
3691
4309
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
3692
upd_t* update, /* in: update vector */
3693
ibool do_not_free_inherited,/* in: TRUE if called in a
3694
rollback and we do not want to free
4310
const upd_t* update, /* in: update vector */
3696
4311
mtr_t* mtr) /* in: mini-transaction handle which contains
3697
4312
an X-latch to record page and to the tree */
3699
upd_field_t* ufield;
3705
4317
ut_ad(rec_offs_validate(rec, index, offsets));
3706
ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
3707
MTR_MEMO_PAGE_X_FIX));
4318
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
3709
4320
/* Free possible externally stored fields in the record */
3711
4322
n_fields = upd_get_n_fields(update);
3713
4324
for (i = 0; i < n_fields; i++) {
3714
ufield = upd_get_nth_field(update, i);
4325
const upd_field_t* ufield = upd_get_nth_field(update, i);
3716
4327
if (rec_offs_nth_extern(offsets, ufield->field_no)) {
3718
data = rec_get_nth_field(rec, offsets,
3719
ufield->field_no, &len);
3720
btr_free_externally_stored_field(index, data, len,
3721
do_not_free_inherited,
3727
/***********************************************************************
3728
Copies an externally stored field of a record to mem heap. Parameter
3729
data contains a pointer to 'internally' stored part of the field:
3730
possibly some data, and the reference to the externally stored part in
3731
the last 20 bytes of data. */
4329
byte* data = rec_get_nth_field(
4330
rec, offsets, ufield->field_no, &len);
4331
ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
4333
btr_free_externally_stored_field(
4334
index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
4335
rec, offsets, page_zip,
4336
ufield->field_no, TRUE, mtr);
4341
/***********************************************************************
4342
Copies the prefix of an uncompressed BLOB. The clustered index record
4343
that points to this BLOB must be protected by a lock or a page latch. */
4346
btr_copy_blob_prefix(
4347
/*=================*/
4348
/* out: number of bytes written to buf */
4349
byte* buf, /* out: the externally stored part of
4350
the field, or a prefix of it */
4351
ulint len, /* in: length of buf, in bytes */
4352
ulint space_id,/* in: space id of the BLOB pages */
4353
ulint page_no,/* in: page number of the first BLOB page */
4354
ulint offset) /* in: offset on the first BLOB page */
4356
ulint copied_len = 0;
4362
const byte* blob_header;
4368
block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
4369
#ifdef UNIV_SYNC_DEBUG
4370
buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
4371
#endif /* UNIV_SYNC_DEBUG */
4372
page = buf_block_get_frame(block);
4374
/* Unfortunately, FIL_PAGE_TYPE was uninitialized for
4375
many pages until MySQL/InnoDB 5.1.7. */
4376
/* ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_BLOB); */
4377
blob_header = page + offset;
4378
part_len = btr_blob_get_part_len(blob_header);
4379
copy_len = ut_min(part_len, len - copied_len);
4381
memcpy(buf + copied_len,
4382
blob_header + BTR_BLOB_HDR_SIZE, copy_len);
4383
copied_len += copy_len;
4385
page_no = btr_blob_get_next_page_no(blob_header);
4389
if (page_no == FIL_NULL || copy_len != part_len) {
4393
/* On other BLOB pages except the first the BLOB header
4394
always is at the page data start: */
4396
offset = FIL_PAGE_DATA;
4398
ut_ad(copied_len <= len);
4402
/***********************************************************************
4403
Copies the prefix of a compressed BLOB. The clustered index record
4404
that points to this BLOB must be protected by a lock or a page latch. */
4407
btr_copy_zblob_prefix(
4408
/*==================*/
4409
z_stream* d_stream,/* in/out: the decompressing stream */
4410
ulint zip_size,/* in: compressed BLOB page size */
4411
ulint space_id,/* in: space id of the BLOB pages */
4412
ulint page_no,/* in: page number of the first BLOB page */
4413
ulint offset) /* in: offset on the first BLOB page */
4415
ulint page_type = FIL_PAGE_TYPE_ZBLOB;
4417
ut_ad(ut_is_2pow(zip_size));
4418
ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
4419
ut_ad(zip_size <= UNIV_PAGE_SIZE);
4427
/* There is no latch on bpage directly. Instead,
4428
bpage is protected by the B-tree page latch that
4429
is being held on the clustered index record, or,
4430
in row_merge_copy_blobs(), by an exclusive table lock. */
4431
bpage = buf_page_get_zip(space_id, zip_size, page_no);
4433
if (UNIV_UNLIKELY(!bpage)) {
4434
ut_print_timestamp(stderr);
4436
" InnoDB: Cannot load"
4438
" page %lu space %lu\n",
4439
(ulong) page_no, (ulong) space_id);
4444
(fil_page_get_type(bpage->zip.data) != page_type)) {
4445
ut_print_timestamp(stderr);
4447
" InnoDB: Unexpected type %lu of"
4449
" page %lu space %lu\n",
4450
(ulong) fil_page_get_type(bpage->zip.data),
4451
(ulong) page_no, (ulong) space_id);
4455
next_page_no = mach_read_from_4(bpage->zip.data + offset);
4457
if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
4458
/* When the BLOB begins at page header,
4459
the compressed data payload does not
4460
immediately follow the next page pointer. */
4461
offset = FIL_PAGE_DATA;
4466
d_stream->next_in = bpage->zip.data + offset;
4467
d_stream->avail_in = zip_size - offset;
4469
err = inflate(d_stream, Z_NO_FLUSH);
4472
if (!d_stream->avail_out) {
4477
if (next_page_no == FIL_NULL) {
4483
ut_print_timestamp(stderr);
4485
" InnoDB: inflate() of"
4487
" page %lu space %lu returned %d (%s)\n",
4488
(ulong) page_no, (ulong) space_id,
4489
err, d_stream->msg);
4494
if (next_page_no == FIL_NULL) {
4495
if (!d_stream->avail_in) {
4496
ut_print_timestamp(stderr);
4498
" InnoDB: unexpected end of"
4500
" page %lu space %lu\n",
4504
err = inflate(d_stream, Z_FINISH);
4515
buf_page_release_zip(bpage);
4519
buf_page_release_zip(bpage);
4521
/* On other BLOB pages except the first
4522
the BLOB header always is at the page header: */
4524
page_no = next_page_no;
4525
offset = FIL_PAGE_NEXT;
4526
page_type = FIL_PAGE_TYPE_ZBLOB2;
4530
/***********************************************************************
4531
Copies the prefix of an externally stored field of a record. The
4532
clustered index record that points to this BLOB must be protected by a
4533
lock or a page latch. */
4536
btr_copy_externally_stored_field_prefix_low(
4537
/*========================================*/
4538
/* out: number of bytes written to buf */
4539
byte* buf, /* out: the externally stored part of
4540
the field, or a prefix of it */
4541
ulint len, /* in: length of buf, in bytes */
4542
ulint zip_size,/* in: nonzero=compressed BLOB page size,
4543
zero for uncompressed BLOBs */
4544
ulint space_id,/* in: space id of the first BLOB page */
4545
ulint page_no,/* in: page number of the first BLOB page */
4546
ulint offset) /* in: offset on the first BLOB page */
4548
if (UNIV_UNLIKELY(len == 0)) {
4552
if (UNIV_UNLIKELY(zip_size)) {
4557
/* Zlib inflate needs 32 kilobytes for the default
4558
window size, plus a few kilobytes for small objects. */
4559
heap = mem_heap_create(40000);
4560
page_zip_set_alloc(&d_stream, heap);
4562
err = inflateInit(&d_stream);
4565
d_stream.next_out = buf;
4566
d_stream.avail_out = len;
4567
d_stream.avail_in = 0;
4569
btr_copy_zblob_prefix(&d_stream, zip_size,
4570
space_id, page_no, offset);
4571
inflateEnd(&d_stream);
4572
mem_heap_free(heap);
4573
return(d_stream.total_out);
4575
return(btr_copy_blob_prefix(buf, len, space_id,
4580
/***********************************************************************
4581
Copies the prefix of an externally stored field of a record. The
4582
clustered index record must be protected by a lock or a page latch. */
4585
btr_copy_externally_stored_field_prefix(
4586
/*====================================*/
4587
/* out: the length of the copied field */
4588
byte* buf, /* out: the field, or a prefix of it */
4589
ulint len, /* in: length of buf, in bytes */
4590
ulint zip_size,/* in: nonzero=compressed BLOB page size,
4591
zero for uncompressed BLOBs */
4592
const byte* data, /* in: 'internally' stored part of the
4593
field containing also the reference to
4594
the external part; must be protected by
4595
a lock or a page latch */
4596
ulint local_len)/* in: length of data, in bytes */
4602
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4604
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4606
if (UNIV_UNLIKELY(local_len >= len)) {
4607
memcpy(buf, data, len);
4611
memcpy(buf, data, local_len);
4614
ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
4616
space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
4618
page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
4620
offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
4623
+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
4630
/***********************************************************************
4631
Copies an externally stored field of a record to mem heap. The
4632
clustered index record must be protected by a lock or a page latch. */
3734
4635
btr_copy_externally_stored_field(
3735
4636
/*=============================*/
3736
4637
/* out: the whole field copied to heap */
3737
4638
ulint* len, /* out: length of the whole field */
3738
byte* data, /* in: 'internally' stored part of the
4639
const byte* data, /* in: 'internally' stored part of the
3739
4640
field containing also the reference to
3740
the external part */
4641
the external part; must be protected by
4642
a lock or a page latch */
4643
ulint zip_size,/* in: nonzero=compressed BLOB page size,
4644
zero for uncompressed BLOBs */
3741
4645
ulint local_len,/* in: length of data */
3742
4646
mem_heap_t* heap) /* in: mem heap */
3745
4648
ulint space_id;
3748
4651
ulint extern_len;
3755
4654
ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3763
4662
offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
3765
/* Currently a BLOB cannot be bigger that 4 GB; we
4664
/* Currently a BLOB cannot be bigger than 4 GB; we
3766
4665
leave the 4 upper bytes in the length field unused */
3768
4667
extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
3770
4669
buf = mem_heap_alloc(heap, local_len + extern_len);
3772
ut_memcpy(buf, data, local_len);
3773
copied_len = local_len;
3775
if (extern_len == 0) {
3784
page = buf_page_get(space_id, page_no, RW_S_LATCH, &mtr);
3785
#ifdef UNIV_SYNC_DEBUG
3786
buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
3787
#endif /* UNIV_SYNC_DEBUG */
3788
blob_header = page + offset;
3790
part_len = btr_blob_get_part_len(blob_header);
3792
ut_memcpy(buf + copied_len, blob_header + BTR_BLOB_HDR_SIZE,
3794
copied_len += part_len;
3796
page_no = btr_blob_get_next_page_no(blob_header);
3800
if (page_no == FIL_NULL) {
3801
ut_a(copied_len == local_len + extern_len);
3808
/* On other BLOB pages except the first the BLOB header
3809
always is at the page data start: */
3811
offset = FIL_PAGE_DATA;
3813
ut_a(copied_len < local_len + extern_len);
4671
memcpy(buf, data, local_len);
4673
+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
3817
4682
/***********************************************************************
3818
4683
Copies an externally stored field of a record to mem heap. */
3821
4686
btr_rec_copy_externally_stored_field(
3822
4687
/*=================================*/
3823
4688
/* out: the field copied to heap */
3824
rec_t* rec, /* in: record */
4689
const rec_t* rec, /* in: record in a clustered index;
4690
must be protected by a lock or a page latch */
3825
4691
const ulint* offsets,/* in: array returned by rec_get_offsets() */
4692
ulint zip_size,/* in: nonzero=compressed BLOB page size,
4693
zero for uncompressed BLOBs */
3826
4694
ulint no, /* in: field number */
3827
4695
ulint* len, /* out: length of the field */
3828
4696
mem_heap_t* heap) /* in: mem heap */
3833
ut_ad(rec_offs_validate(rec, NULL, offsets));
3834
4701
ut_a(rec_offs_nth_extern(offsets, no));
3836
4703
/* An externally stored field can contain some initial