~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/btr/btr0cur.c

  • Committer: Brian Aker
  • Date: 2008-11-04 15:39:09 UTC
  • mfrom: (575.1.2 devel)
  • Revision ID: brian@tangent.org-20081104153909-c72hn65udxs1ccal
Merge of Monty's work

Show diffs side-by-side

added added

removed removed

Lines of Context:
24
24
#endif
25
25
 
26
26
#include "page0page.h"
 
27
#include "page0zip.h"
27
28
#include "rem0rec.h"
28
29
#include "rem0cmp.h"
 
30
#include "buf0lru.h"
29
31
#include "btr0btr.h"
30
32
#include "btr0sea.h"
31
33
#include "row0upd.h"
35
37
#include "srv0srv.h"
36
38
#include "ibuf0ibuf.h"
37
39
#include "lock0lock.h"
 
40
#include "zlib.h"
38
41
 
39
42
#ifdef UNIV_DEBUG
40
43
/* If the following is set to TRUE, this module prints a lot of
41
44
trace information of individual record operations */
42
 
ibool   btr_cur_print_record_ops = FALSE;
 
45
UNIV_INTERN ibool       btr_cur_print_record_ops = FALSE;
43
46
#endif /* UNIV_DEBUG */
44
47
 
45
 
ulint   btr_cur_n_non_sea       = 0;
46
 
ulint   btr_cur_n_sea           = 0;
47
 
ulint   btr_cur_n_non_sea_old   = 0;
48
 
ulint   btr_cur_n_sea_old       = 0;
 
48
UNIV_INTERN ulint       btr_cur_n_non_sea       = 0;
 
49
UNIV_INTERN ulint       btr_cur_n_sea           = 0;
 
50
UNIV_INTERN ulint       btr_cur_n_non_sea_old   = 0;
 
51
UNIV_INTERN ulint       btr_cur_n_sea_old       = 0;
49
52
 
50
53
/* In the optimistic insert, if the insert does not fit, but this much space
51
54
can be released by page reorganize, then it is reorganized */
52
55
 
53
56
#define BTR_CUR_PAGE_REORGANIZE_LIMIT   (UNIV_PAGE_SIZE / 32)
54
57
 
55
 
/* When estimating number of different kay values in an index sample
 
58
/* When estimating number of different key values in an index, sample
56
59
this many index pages */
57
60
#define BTR_KEY_VAL_ESTIMATE_N_PAGES    8
58
61
 
65
68
/*--------------------------------------*/
66
69
#define BTR_BLOB_HDR_SIZE               8
67
70
 
 
71
/* A BLOB field reference full of zero, for use in assertions and tests.
 
72
Initially, BLOB field references are set to zero, in
 
73
dtuple_convert_big_rec(). */
 
74
UNIV_INTERN const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
 
75
 
68
76
/***********************************************************************
69
77
Marks all extern fields in a record as owned by the record. This function
70
78
should be called if the delete mark of a record is removed: a not delete
73
81
void
74
82
btr_cur_unmark_extern_fields(
75
83
/*=========================*/
76
 
        rec_t*          rec,    /* in: record in a clustered index */
77
 
        mtr_t*          mtr,    /* in: mtr */
78
 
        const ulint*    offsets);/* in: array returned by rec_get_offsets() */
 
84
        page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
 
85
                                part will be updated, or NULL */
 
86
        rec_t*          rec,    /* in/out: record in a clustered index */
 
87
        dict_index_t*   index,  /* in: index of the page */
 
88
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
 
89
        mtr_t*          mtr);   /* in: mtr, or NULL if not logged */
79
90
/***********************************************************************
80
91
Adds path information to the cursor for the current page, for which
81
92
the binary search has been performed. */
97
108
        dict_index_t*   index,  /* in: index of rec; the index tree MUST be
98
109
                                X-latched */
99
110
        rec_t*          rec,    /* in: record */
100
 
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
101
 
        upd_t*          update, /* in: update vector */
 
111
        page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
 
112
                                part will be updated, or NULL */
 
113
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
 
114
        const upd_t*    update, /* in: update vector */
 
115
        mtr_t*          mtr);   /* in: mini-transaction handle which contains
 
116
                                an X-latch to record page and to the tree */
 
117
/***************************************************************
 
118
Frees the externally stored fields for a record. */
 
119
static
 
120
void
 
121
btr_rec_free_externally_stored_fields(
 
122
/*==================================*/
 
123
        dict_index_t*   index,  /* in: index of the data, the index
 
124
                                tree MUST be X-latched */
 
125
        rec_t*          rec,    /* in: record */
 
126
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
 
127
        page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
 
128
                                part will be updated, or NULL */
102
129
        ibool           do_not_free_inherited,/* in: TRUE if called in a
103
130
                                rollback and we do not want to free
104
131
                                inherited fields */
105
132
        mtr_t*          mtr);   /* in: mini-transaction handle which contains
106
 
                                an X-latch to record page and to the tree */
 
133
                                an X-latch to record page and to the index
 
134
                                tree */
107
135
/***************************************************************
108
136
Gets the externally stored size of a record, in units of a database page. */
109
137
static
115
143
        rec_t*          rec,    /* in: record */
116
144
        const ulint*    offsets);/* in: array returned by rec_get_offsets() */
117
145
 
 
146
/**********************************************************
 
147
The following function is used to set the deleted bit of a record. */
 
148
UNIV_INLINE
 
149
void
 
150
btr_rec_set_deleted_flag(
 
151
/*=====================*/
 
152
                                /* out: TRUE on success;
 
153
                                FALSE on page_zip overflow */
 
154
        rec_t*          rec,    /* in/out: physical record */
 
155
        page_zip_des_t* page_zip,/* in/out: compressed page (or NULL) */
 
156
        ulint           flag)   /* in: nonzero if delete marked */
 
157
{
 
158
        if (page_rec_is_comp(rec)) {
 
159
                rec_set_deleted_flag_new(rec, page_zip, flag);
 
160
        } else {
 
161
                ut_ad(!page_zip);
 
162
                rec_set_deleted_flag_old(rec, flag);
 
163
        }
 
164
}
 
165
 
118
166
/*==================== B-TREE SEARCH =========================*/
119
167
 
120
168
/************************************************************************
126
174
        page_t*         page,           /* in: leaf page where the search
127
175
                                        converged */
128
176
        ulint           space,          /* in: space id */
 
177
        ulint           zip_size,       /* in: compressed page size in bytes
 
178
                                        or 0 for uncompressed pages */
129
179
        ulint           page_no,        /* in: page number of the leaf */
130
180
        ulint           latch_mode,     /* in: BTR_SEARCH_LEAF, ... */
131
181
        btr_cur_t*      cursor,         /* in: cursor */
132
182
        mtr_t*          mtr)            /* in: mtr */
133
183
{
134
 
        ulint   left_page_no;
135
 
        ulint   right_page_no;
136
 
        page_t* get_page;
 
184
        ulint           mode;
 
185
        ulint           left_page_no;
 
186
        ulint           right_page_no;
 
187
        buf_block_t*    get_block;
137
188
 
138
189
        ut_ad(page && mtr);
139
190
 
140
 
        if (latch_mode == BTR_SEARCH_LEAF) {
141
 
 
142
 
                get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
143
 
                ut_a(page_is_comp(get_page) == page_is_comp(page));
144
 
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
145
 
 
146
 
        } else if (latch_mode == BTR_MODIFY_LEAF) {
147
 
 
148
 
                get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
149
 
                ut_a(page_is_comp(get_page) == page_is_comp(page));
150
 
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
151
 
 
152
 
        } else if (latch_mode == BTR_MODIFY_TREE) {
153
 
 
 
191
        switch (latch_mode) {
 
192
        case BTR_SEARCH_LEAF:
 
193
        case BTR_MODIFY_LEAF:
 
194
                mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
 
195
                get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
 
196
#ifdef UNIV_BTR_DEBUG
 
197
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
 
198
#endif /* UNIV_BTR_DEBUG */
 
199
                get_block->check_index_page_at_flush = TRUE;
 
200
                return;
 
201
        case BTR_MODIFY_TREE:
154
202
                /* x-latch also brothers from left to right */
155
203
                left_page_no = btr_page_get_prev(page, mtr);
156
204
 
157
205
                if (left_page_no != FIL_NULL) {
158
 
                        get_page = btr_page_get(space, left_page_no,
159
 
                                                RW_X_LATCH, mtr);
 
206
                        get_block = btr_block_get(space, zip_size,
 
207
                                                  left_page_no,
 
208
                                                  RW_X_LATCH, mtr);
160
209
#ifdef UNIV_BTR_DEBUG
161
 
                        ut_a(btr_page_get_next(get_page, mtr)
162
 
                             == buf_frame_get_page_no(page));
 
210
                        ut_a(page_is_comp(get_block->frame)
 
211
                             == page_is_comp(page));
 
212
                        ut_a(btr_page_get_next(get_block->frame, mtr)
 
213
                             == page_get_page_no(page));
163
214
#endif /* UNIV_BTR_DEBUG */
164
 
                        ut_a(page_is_comp(get_page) == page_is_comp(page));
165
 
                        buf_block_align(get_page)->check_index_page_at_flush
166
 
                                = TRUE;
 
215
                        get_block->check_index_page_at_flush = TRUE;
167
216
                }
168
217
 
169
 
                get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
170
 
                ut_a(page_is_comp(get_page) == page_is_comp(page));
171
 
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
 
218
                get_block = btr_block_get(space, zip_size, page_no,
 
219
                                          RW_X_LATCH, mtr);
 
220
#ifdef UNIV_BTR_DEBUG
 
221
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
 
222
#endif /* UNIV_BTR_DEBUG */
 
223
                get_block->check_index_page_at_flush = TRUE;
172
224
 
173
225
                right_page_no = btr_page_get_next(page, mtr);
174
226
 
175
227
                if (right_page_no != FIL_NULL) {
176
 
                        get_page = btr_page_get(space, right_page_no,
177
 
                                                RW_X_LATCH, mtr);
178
 
#ifdef UNIV_BTR_DEBUG
179
 
                        ut_a(btr_page_get_prev(get_page, mtr)
180
 
                             == buf_frame_get_page_no(page));
181
 
#endif /* UNIV_BTR_DEBUG */
182
 
                        buf_block_align(get_page)->check_index_page_at_flush
183
 
                                = TRUE;
184
 
                }
185
 
 
186
 
        } else if (latch_mode == BTR_SEARCH_PREV) {
187
 
 
188
 
                /* s-latch also left brother */
189
 
                left_page_no = btr_page_get_prev(page, mtr);
190
 
 
191
 
                if (left_page_no != FIL_NULL) {
192
 
                        cursor->left_page = btr_page_get(space, left_page_no,
193
 
                                                         RW_S_LATCH, mtr);
194
 
#ifdef UNIV_BTR_DEBUG
195
 
                        ut_a(btr_page_get_next(cursor->left_page, mtr)
196
 
                             == buf_frame_get_page_no(page));
197
 
#endif /* UNIV_BTR_DEBUG */
198
 
                        ut_a(page_is_comp(cursor->left_page)
199
 
                             == page_is_comp(page));
200
 
                        buf_block_align(cursor->left_page)
201
 
                                ->check_index_page_at_flush = TRUE;
202
 
                }
203
 
 
204
 
                get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
205
 
                ut_a(page_is_comp(get_page) == page_is_comp(page));
206
 
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
207
 
 
208
 
        } else if (latch_mode == BTR_MODIFY_PREV) {
209
 
 
210
 
                /* x-latch also left brother */
211
 
                left_page_no = btr_page_get_prev(page, mtr);
212
 
 
213
 
                if (left_page_no != FIL_NULL) {
214
 
                        cursor->left_page = btr_page_get(space, left_page_no,
215
 
                                                         RW_X_LATCH, mtr);
216
 
#ifdef UNIV_BTR_DEBUG
217
 
                        ut_a(btr_page_get_next(cursor->left_page, mtr)
218
 
                             == buf_frame_get_page_no(page));
219
 
#endif /* UNIV_BTR_DEBUG */
220
 
                        ut_a(page_is_comp(cursor->left_page)
221
 
                             == page_is_comp(page));
222
 
                        buf_block_align(cursor->left_page)
223
 
                                ->check_index_page_at_flush = TRUE;
224
 
                }
225
 
 
226
 
                get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
227
 
                ut_a(page_is_comp(get_page) == page_is_comp(page));
228
 
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
229
 
        } else {
230
 
                ut_error;
 
228
                        get_block = btr_block_get(space, zip_size,
 
229
                                                  right_page_no,
 
230
                                                  RW_X_LATCH, mtr);
 
231
#ifdef UNIV_BTR_DEBUG
 
232
                        ut_a(page_is_comp(get_block->frame)
 
233
                             == page_is_comp(page));
 
234
                        ut_a(btr_page_get_prev(get_block->frame, mtr)
 
235
                             == page_get_page_no(page));
 
236
#endif /* UNIV_BTR_DEBUG */
 
237
                        get_block->check_index_page_at_flush = TRUE;
 
238
                }
 
239
 
 
240
                return;
 
241
 
 
242
        case BTR_SEARCH_PREV:
 
243
        case BTR_MODIFY_PREV:
 
244
                mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
 
245
                /* latch also left brother */
 
246
                left_page_no = btr_page_get_prev(page, mtr);
 
247
 
 
248
                if (left_page_no != FIL_NULL) {
 
249
                        get_block = btr_block_get(space, zip_size,
 
250
                                                  left_page_no, mode, mtr);
 
251
                        cursor->left_block = get_block;
 
252
#ifdef UNIV_BTR_DEBUG
 
253
                        ut_a(page_is_comp(get_block->frame)
 
254
                             == page_is_comp(page));
 
255
                        ut_a(btr_page_get_next(get_block->frame, mtr)
 
256
                             == page_get_page_no(page));
 
257
#endif /* UNIV_BTR_DEBUG */
 
258
                        get_block->check_index_page_at_flush = TRUE;
 
259
                }
 
260
 
 
261
                get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
 
262
#ifdef UNIV_BTR_DEBUG
 
263
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
 
264
#endif /* UNIV_BTR_DEBUG */
 
265
                get_block->check_index_page_at_flush = TRUE;
 
266
                return;
231
267
        }
 
268
 
 
269
        ut_error;
232
270
}
233
271
 
234
272
/************************************************************************
243
281
search tuple should be performed in the B-tree. InnoDB does an insert
244
282
immediately after the cursor. Thus, the cursor may end up on a user record,
245
283
or on a page infimum record. */
246
 
 
 
284
UNIV_INTERN
247
285
void
248
286
btr_cur_search_to_nth_level(
249
287
/*========================*/
250
288
        dict_index_t*   index,  /* in: index */
251
289
        ulint           level,  /* in: the tree level of search */
252
 
        dtuple_t*       tuple,  /* in: data tuple; NOTE: n_fields_cmp in
 
290
        const dtuple_t* tuple,  /* in: data tuple; NOTE: n_fields_cmp in
253
291
                                tuple must be set so that it cannot get
254
292
                                compared to the node ptr page number field! */
255
293
        ulint           mode,   /* in: PAGE_CUR_L, ...;
257
295
                                PAGE_CUR_LE to search the position! */
258
296
        ulint           latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
259
297
                                BTR_INSERT and BTR_ESTIMATE;
260
 
                                cursor->left_page is used to store a pointer
 
298
                                cursor->left_block is used to store a pointer
261
299
                                to the left neighbor page, in the cases
262
300
                                BTR_SEARCH_PREV and BTR_MODIFY_PREV;
263
301
                                NOTE that if has_search_latch
274
312
{
275
313
        page_cur_t*     page_cursor;
276
314
        page_t*         page;
277
 
        page_t*         guess;
 
315
        buf_block_t*    guess;
278
316
        rec_t*          node_ptr;
279
317
        ulint           page_no;
280
318
        ulint           space;
297
335
        mem_heap_t*     heap            = NULL;
298
336
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
299
337
        ulint*          offsets         = offsets_;
300
 
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
338
        rec_offs_init(offsets_);
301
339
        /* Currently, PAGE_CUR_LE is the only search mode used for searches
302
340
        ending to upper levels */
303
341
 
304
342
        ut_ad(level == 0 || mode == PAGE_CUR_LE);
305
343
        ut_ad(dict_index_check_search_tuple(index, tuple));
306
 
        ut_ad(!(index->type & DICT_IBUF) || ibuf_inside());
 
344
        ut_ad(!dict_index_is_ibuf(index) || ibuf_inside());
307
345
        ut_ad(dtuple_check_typed(tuple));
308
346
 
309
347
#ifdef UNIV_DEBUG
339
377
#ifdef PAGE_CUR_LE_OR_EXTENDS
340
378
            && mode != PAGE_CUR_LE_OR_EXTENDS
341
379
#endif /* PAGE_CUR_LE_OR_EXTENDS */
342
 
            && srv_use_adaptive_hash_indexes
 
380
            && !UNIV_UNLIKELY(btr_search_disabled)
343
381
            && btr_search_guess_on_hash(index, info, tuple, mode,
344
382
                                        latch_mode, cursor,
345
383
                                        has_search_latch, mtr)) {
423
461
        /* Loop and search until we arrive at the desired level */
424
462
 
425
463
        for (;;) {
426
 
                if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
427
 
 
428
 
                        rw_latch = latch_mode;
429
 
 
430
 
                        if (insert_planned
431
 
                            && ibuf_should_try(index, ignore_sec_unique)) {
432
 
 
433
 
                                /* Try insert to the insert buffer if the
434
 
                                page is not in the buffer pool */
435
 
 
436
 
                                buf_mode = BUF_GET_IF_IN_POOL;
437
 
                        }
438
 
                }
 
464
                ulint           zip_size;
 
465
                buf_block_t*    block;
439
466
retry_page_get:
440
 
                page = buf_page_get_gen(space, page_no, rw_latch, guess,
441
 
                                        buf_mode,
442
 
                                        __FILE__, __LINE__,
443
 
                                        mtr);
444
 
                if (page == NULL) {
 
467
                zip_size = dict_table_zip_size(index->table);
 
468
 
 
469
                block = buf_page_get_gen(space, zip_size, page_no,
 
470
                                         rw_latch, guess, buf_mode,
 
471
                                         __FILE__, __LINE__,
 
472
                                         mtr);
 
473
                if (block == NULL) {
445
474
                        /* This must be a search to perform an insert;
446
475
                        try insert to the insert buffer */
447
476
 
450
479
                        ut_ad(cursor->thr);
451
480
 
452
481
                        if (ibuf_should_try(index, ignore_sec_unique)
453
 
                            && ibuf_insert(tuple, index, space, page_no,
454
 
                                           cursor->thr)) {
 
482
                            && ibuf_insert(tuple, index, space, zip_size,
 
483
                                           page_no, cursor->thr)) {
455
484
                                /* Insertion to the insert buffer succeeded */
456
485
                                cursor->flag = BTR_CUR_INSERT_TO_IBUF;
457
486
                                if (UNIV_LIKELY_NULL(heap)) {
468
497
                        goto retry_page_get;
469
498
                }
470
499
 
471
 
                buf_block_align(page)->check_index_page_at_flush = TRUE;
 
500
                page = buf_block_get_frame(block);
 
501
#ifdef UNIV_ZIP_DEBUG
 
502
                if (rw_latch != RW_NO_LATCH) {
 
503
                        const page_zip_des_t*   page_zip
 
504
                                = buf_block_get_page_zip(block);
 
505
                        ut_a(!page_zip || page_zip_validate(page_zip, page));
 
506
                }
 
507
#endif /* UNIV_ZIP_DEBUG */
 
508
 
 
509
                block->check_index_page_at_flush = TRUE;
472
510
 
473
511
#ifdef UNIV_SYNC_DEBUG
474
512
                if (rw_latch != RW_NO_LATCH) {
475
 
                        buf_page_dbg_add_level(page, SYNC_TREE_NODE);
 
513
                        buf_block_dbg_add_level(block, SYNC_TREE_NODE);
476
514
                }
477
515
#endif
478
516
                ut_ad(0 == ut_dulint_cmp(index->id,
479
517
                                         btr_page_get_index_id(page)));
480
518
 
481
 
                if (height == ULINT_UNDEFINED) {
 
519
                if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
482
520
                        /* We are in the root node */
483
521
 
484
522
                        height = btr_page_get_level(page, mtr);
485
523
                        root_height = height;
486
524
                        cursor->tree_height = root_height + 1;
487
525
#ifdef BTR_CUR_ADAPT
488
 
                        if (page != guess) {
489
 
                                info->root_guess = page;
 
526
                        if (block != guess) {
 
527
                                info->root_guess = block;
490
528
                        }
491
529
#endif
492
530
                }
494
532
                if (height == 0) {
495
533
                        if (rw_latch == RW_NO_LATCH) {
496
534
 
497
 
                                btr_cur_latch_leaves(page, space,
 
535
                                btr_cur_latch_leaves(page, space, zip_size,
498
536
                                                     page_no, latch_mode,
499
537
                                                     cursor, mtr);
500
538
                        }
512
550
                        page_mode = mode;
513
551
                }
514
552
 
515
 
                page_cur_search_with_match(page, index, tuple, page_mode,
 
553
                page_cur_search_with_match(block, index, tuple, page_mode,
516
554
                                           &up_match, &up_bytes,
517
555
                                           &low_match, &low_bytes,
518
556
                                           page_cursor);
 
557
 
519
558
                if (estimate) {
520
559
                        btr_cur_add_path_info(cursor, height, root_height);
521
560
                }
529
568
 
530
569
                        if (level > 0) {
531
570
                                /* x-latch the page */
532
 
                                page = btr_page_get(space,
 
571
                                page = btr_page_get(space, zip_size,
533
572
                                                    page_no, RW_X_LATCH, mtr);
534
573
                                ut_a((ibool)!!page_is_comp(page)
535
574
                                     == dict_table_is_comp(index->table));
541
580
                ut_ad(height > 0);
542
581
 
543
582
                height--;
 
583
 
 
584
                if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
 
585
 
 
586
                        rw_latch = latch_mode;
 
587
 
 
588
                        if (insert_planned
 
589
                            && ibuf_should_try(index, ignore_sec_unique)) {
 
590
 
 
591
                                /* Try insert to the insert buffer if the
 
592
                                page is not in the buffer pool */
 
593
 
 
594
                                buf_mode = BUF_GET_IF_IN_POOL;
 
595
                        }
 
596
                }
 
597
 
544
598
                guess = NULL;
545
599
 
546
600
                node_ptr = page_cur_get_rec(page_cursor);
561
615
                cursor->up_bytes = up_bytes;
562
616
 
563
617
#ifdef BTR_CUR_ADAPT
564
 
                if (srv_use_adaptive_hash_indexes) {
 
618
                if (!UNIV_UNLIKELY(btr_search_disabled)) {
565
619
 
566
620
                        btr_search_info_update(index, cursor);
567
621
                }
583
637
 
584
638
/*********************************************************************
585
639
Opens a cursor at either end of an index. */
586
 
 
 
640
UNIV_INTERN
587
641
void
588
642
btr_cur_open_at_index_side(
589
643
/*=======================*/
595
649
        mtr_t*          mtr)            /* in: mtr */
596
650
{
597
651
        page_cur_t*     page_cursor;
598
 
        page_t*         page;
599
652
        ulint           page_no;
600
653
        ulint           space;
 
654
        ulint           zip_size;
601
655
        ulint           height;
602
656
        ulint           root_height = 0; /* remove warning */
603
657
        rec_t*          node_ptr;
606
660
        mem_heap_t*     heap            = NULL;
607
661
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
608
662
        ulint*          offsets         = offsets_;
609
 
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
663
        rec_offs_init(offsets_);
610
664
 
611
665
        estimate = latch_mode & BTR_ESTIMATE;
612
666
        latch_mode = latch_mode & ~BTR_ESTIMATE;
626
680
        cursor->index = index;
627
681
 
628
682
        space = dict_index_get_space(index);
 
683
        zip_size = dict_table_zip_size(index->table);
629
684
        page_no = dict_index_get_page(index);
630
685
 
631
686
        height = ULINT_UNDEFINED;
632
687
 
633
688
        for (;;) {
634
 
                page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
635
 
                                        BUF_GET,
636
 
                                        __FILE__, __LINE__,
637
 
                                        mtr);
 
689
                buf_block_t*    block;
 
690
                page_t*         page;
 
691
                block = buf_page_get_gen(space, zip_size, page_no,
 
692
                                         RW_NO_LATCH, NULL, BUF_GET,
 
693
                                         __FILE__, __LINE__,
 
694
                                         mtr);
 
695
                page = buf_block_get_frame(block);
638
696
                ut_ad(0 == ut_dulint_cmp(index->id,
639
697
                                         btr_page_get_index_id(page)));
640
698
 
641
 
                buf_block_align(page)->check_index_page_at_flush = TRUE;
 
699
                block->check_index_page_at_flush = TRUE;
642
700
 
643
701
                if (height == ULINT_UNDEFINED) {
644
702
                        /* We are in the root node */
648
706
                }
649
707
 
650
708
                if (height == 0) {
651
 
                        btr_cur_latch_leaves(page, space, page_no,
 
709
                        btr_cur_latch_leaves(page, space, zip_size, page_no,
652
710
                                             latch_mode, cursor, mtr);
653
711
 
654
712
                        /* In versions <= 3.23.52 we had forgotten to
669
727
                }
670
728
 
671
729
                if (from_left) {
672
 
                        page_cur_set_before_first(page, page_cursor);
 
730
                        page_cur_set_before_first(block, page_cursor);
673
731
                } else {
674
 
                        page_cur_set_after_last(page, page_cursor);
 
732
                        page_cur_set_after_last(block, page_cursor);
675
733
                }
676
734
 
677
735
                if (height == 0) {
711
769
 
712
770
/**************************************************************************
713
771
Positions a cursor at a randomly chosen position within a B-tree. */
714
 
 
 
772
UNIV_INTERN
715
773
void
716
774
btr_cur_open_at_rnd_pos(
717
775
/*====================*/
721
779
        mtr_t*          mtr)            /* in: mtr */
722
780
{
723
781
        page_cur_t*     page_cursor;
724
 
        page_t*         page;
725
782
        ulint           page_no;
726
783
        ulint           space;
 
784
        ulint           zip_size;
727
785
        ulint           height;
728
786
        rec_t*          node_ptr;
729
787
        mem_heap_t*     heap            = NULL;
730
788
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
731
789
        ulint*          offsets         = offsets_;
732
 
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
790
        rec_offs_init(offsets_);
733
791
 
734
792
        if (latch_mode == BTR_MODIFY_TREE) {
735
793
                mtr_x_lock(dict_index_get_lock(index), mtr);
741
799
        cursor->index = index;
742
800
 
743
801
        space = dict_index_get_space(index);
 
802
        zip_size = dict_table_zip_size(index->table);
744
803
        page_no = dict_index_get_page(index);
745
804
 
746
805
        height = ULINT_UNDEFINED;
747
806
 
748
807
        for (;;) {
749
 
                page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
750
 
                                        BUF_GET,
751
 
                                        __FILE__, __LINE__,
752
 
                                        mtr);
 
808
                buf_block_t*    block;
 
809
                page_t*         page;
 
810
 
 
811
                block = buf_page_get_gen(space, zip_size, page_no,
 
812
                                         RW_NO_LATCH, NULL, BUF_GET,
 
813
                                         __FILE__, __LINE__,
 
814
                                         mtr);
 
815
                page = buf_block_get_frame(block);
753
816
                ut_ad(0 == ut_dulint_cmp(index->id,
754
817
                                         btr_page_get_index_id(page)));
755
818
 
760
823
                }
761
824
 
762
825
                if (height == 0) {
763
 
                        btr_cur_latch_leaves(page, space, page_no,
 
826
                        btr_cur_latch_leaves(page, space, zip_size, page_no,
764
827
                                             latch_mode, cursor, mtr);
765
828
                }
766
829
 
767
 
                page_cur_open_on_rnd_user_rec(page, page_cursor);
 
830
                page_cur_open_on_rnd_user_rec(block, page_cursor);
768
831
 
769
832
                if (height == 0) {
770
833
 
802
865
                                else NULL */
803
866
        btr_cur_t*      cursor, /* in: cursor on page after which to insert;
804
867
                                cursor stays valid */
805
 
        dtuple_t*       tuple,  /* in: tuple to insert; the size info need not
 
868
        const dtuple_t* tuple,  /* in: tuple to insert; the size info need not
806
869
                                have been stored to tuple */
807
 
        ibool*          reorg,  /* out: TRUE if reorganization occurred */
 
870
        ulint           n_ext,  /* in: number of externally stored columns */
808
871
        mtr_t*          mtr)    /* in: mtr */
809
872
{
810
873
        page_cur_t*     page_cursor;
811
 
        page_t*         page;
 
874
        buf_block_t*    block;
812
875
        rec_t*          rec;
813
876
 
814
877
        ut_ad(dtuple_check_typed(tuple));
815
878
 
816
 
        *reorg = FALSE;
817
 
 
818
 
        page = btr_cur_get_page(cursor);
819
 
 
820
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
821
 
                                MTR_MEMO_PAGE_X_FIX));
 
879
        block = btr_cur_get_block(cursor);
 
880
 
 
881
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
822
882
        page_cursor = btr_cur_get_page_cur(cursor);
823
883
 
824
884
        /* Now, try the insert */
825
 
        rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
 
885
        rec = page_cur_tuple_insert(page_cursor, tuple,
 
886
                                    cursor->index, n_ext, mtr);
826
887
 
827
 
        if (!rec) {
 
888
        if (UNIV_UNLIKELY(!rec)) {
828
889
                /* If record did not fit, reorganize */
829
890
 
830
 
                btr_page_reorganize(page, cursor->index, mtr);
831
 
 
832
 
                *reorg = TRUE;
833
 
 
834
 
                page_cur_search(page, cursor->index, tuple,
835
 
                                PAGE_CUR_LE, page_cursor);
836
 
 
837
 
                rec = page_cur_tuple_insert(page_cursor, tuple,
838
 
                                            cursor->index, mtr);
 
891
                if (btr_page_reorganize(block, cursor->index, mtr)) {
 
892
 
 
893
                        page_cur_search(block, cursor->index, tuple,
 
894
                                        PAGE_CUR_LE, page_cursor);
 
895
 
 
896
                        rec = page_cur_tuple_insert(page_cursor, tuple,
 
897
                                                    cursor->index, n_ext, mtr);
 
898
                }
839
899
        }
840
900
 
841
901
        return(rec);
853
913
                                not zero, the parameters index and thr
854
914
                                should be specified */
855
915
        btr_cur_t*      cursor, /* in: cursor on page after which to insert */
856
 
        dtuple_t*       entry,  /* in: entry to insert */
 
916
        const dtuple_t* entry,  /* in: entry to insert */
857
917
        que_thr_t*      thr,    /* in: query thread or NULL */
858
918
        ibool*          inherit)/* out: TRUE if the inserted new record maybe
859
919
                                should inherit LOCK_GAP type locks from the
870
930
        rec = btr_cur_get_rec(cursor);
871
931
        index = cursor->index;
872
932
 
873
 
        err = lock_rec_insert_check_and_lock(flags, rec, index, thr, inherit);
 
933
        err = lock_rec_insert_check_and_lock(flags, rec,
 
934
                                             btr_cur_get_block(cursor),
 
935
                                             index, thr, inherit);
874
936
 
875
937
        if (err != DB_SUCCESS) {
876
938
 
877
939
                return(err);
878
940
        }
879
941
 
880
 
        if ((index->type & DICT_CLUSTERED) && !(index->type & DICT_IBUF)) {
 
942
        if (dict_index_is_clust(index) && !dict_index_is_ibuf(index)) {
881
943
 
882
944
                err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
883
945
                                                    thr, index, entry,
911
973
        const dict_index_t*     index,  /* in: index */
912
974
        const char*             op)     /* in: operation */
913
975
{
914
 
        fprintf(stderr, "Trx with id %lu %lu going to ",
915
 
                ut_dulint_get_high(trx->id),
916
 
                ut_dulint_get_low(trx->id));
 
976
        fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ",
 
977
                TRX_ID_PREP_PRINTF(trx->id));
917
978
        fputs(op, stderr);
918
979
        dict_index_name_print(stderr, trx, index);
919
980
        putc('\n', stderr);
926
987
not succeed if there is too little space on the page. If there is just
927
988
one record on the page, the insert will always succeed; this is to
928
989
prevent trying to split a page with just one record. */
929
 
 
 
990
UNIV_INTERN
930
991
ulint
931
992
btr_cur_optimistic_insert(
932
993
/*======================*/
937
998
                                specified */
938
999
        btr_cur_t*      cursor, /* in: cursor on page after which to insert;
939
1000
                                cursor stays valid */
940
 
        dtuple_t*       entry,  /* in: entry to insert */
 
1001
        dtuple_t*       entry,  /* in/out: entry to insert */
941
1002
        rec_t**         rec,    /* out: pointer to inserted record if
942
1003
                                succeed */
943
1004
        big_rec_t**     big_rec,/* out: big rec vector whose fields have to
944
1005
                                be stored externally by the caller, or
945
1006
                                NULL */
 
1007
        ulint           n_ext,  /* in: number of externally stored columns */
946
1008
        que_thr_t*      thr,    /* in: query thread or NULL */
947
 
        mtr_t*          mtr)    /* in: mtr */
 
1009
        mtr_t*          mtr)    /* in: mtr; if this function returns
 
1010
                                DB_SUCCESS on a leaf page of a secondary
 
1011
                                index in a compressed tablespace, the
 
1012
                                mtr must be committed before latching
 
1013
                                any further pages */
948
1014
{
949
1015
        big_rec_t*      big_rec_vec     = NULL;
950
1016
        dict_index_t*   index;
951
1017
        page_cur_t*     page_cursor;
 
1018
        buf_block_t*    block;
952
1019
        page_t*         page;
953
1020
        ulint           max_size;
954
1021
        rec_t*          dummy_rec;
955
 
        ulint           level;
 
1022
        ibool           leaf;
956
1023
        ibool           reorg;
957
1024
        ibool           inherit;
 
1025
        ulint           zip_size;
958
1026
        ulint           rec_size;
959
 
        ulint           type;
 
1027
        mem_heap_t*     heap            = NULL;
960
1028
        ulint           err;
961
1029
 
962
1030
        *big_rec = NULL;
963
1031
 
964
 
        page = btr_cur_get_page(cursor);
 
1032
        block = btr_cur_get_block(cursor);
 
1033
        page = buf_block_get_frame(block);
965
1034
        index = cursor->index;
 
1035
        zip_size = buf_block_get_zip_size(block);
 
1036
#ifdef UNIV_DEBUG_VALGRIND
 
1037
        if (zip_size) {
 
1038
                UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
 
1039
                UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
 
1040
        }
 
1041
#endif /* UNIV_DEBUG_VALGRIND */
966
1042
 
967
1043
        if (!dtuple_check_typed_no_assert(entry)) {
968
1044
                fputs("InnoDB: Error in a tuple to insert into ", stderr);
975
1051
        }
976
1052
#endif /* UNIV_DEBUG */
977
1053
 
978
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
979
 
                                MTR_MEMO_PAGE_X_FIX));
 
1054
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
980
1055
        max_size = page_get_max_insert_size_after_reorganize(page, 1);
981
 
        level = btr_page_get_level(page, mtr);
 
1056
        leaf = page_is_leaf(page);
982
1057
 
983
 
calculate_sizes_again:
984
1058
        /* Calculate the record size when entry is converted to a record */
985
 
        rec_size = rec_get_converted_size(index, entry);
 
1059
        rec_size = rec_get_converted_size(index, entry, n_ext);
986
1060
 
987
 
        if (rec_size
988
 
            >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
989
 
                      REC_MAX_DATA_SIZE)) {
 
1061
        if (page_zip_rec_needs_ext(rec_size, page_is_comp(page), zip_size)) {
990
1062
 
991
1063
                /* The record is so big that we have to store some fields
992
1064
                externally on separate database pages */
993
 
 
994
 
                big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
995
 
 
996
 
                if (big_rec_vec == NULL) {
 
1065
                big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
 
1066
 
 
1067
                if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
997
1068
 
998
1069
                        return(DB_TOO_BIG_RECORD);
999
1070
                }
1000
1071
 
1001
 
                goto calculate_sizes_again;
 
1072
                rec_size = rec_get_converted_size(index, entry, n_ext);
1002
1073
        }
1003
1074
 
1004
1075
        /* If there have been many consecutive inserts, and we are on the leaf
1005
1076
        level, check if we have to split the page to reserve enough free space
1006
1077
        for future updates of records. */
1007
1078
 
1008
 
        type = index->type;
1009
 
 
1010
 
        if ((type & DICT_CLUSTERED)
 
1079
        if (dict_index_is_clust(index)
 
1080
            && (page_get_n_recs(page) >= 2)
 
1081
            && UNIV_LIKELY(leaf)
1011
1082
            && (dict_index_get_space_reserve() + rec_size > max_size)
1012
 
            && (page_get_n_recs(page) >= 2)
1013
 
            && (0 == level)
1014
1083
            && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
1015
1084
                || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
 
1085
fail:
 
1086
                err = DB_FAIL;
 
1087
fail_err:
1016
1088
 
1017
1089
                if (big_rec_vec) {
1018
1090
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1019
1091
                }
1020
1092
 
1021
 
                return(DB_FAIL);
 
1093
                if (UNIV_LIKELY_NULL(heap)) {
 
1094
                        mem_heap_free(heap);
 
1095
                }
 
1096
 
 
1097
                return(err);
1022
1098
        }
1023
1099
 
1024
 
        if (!(((max_size >= rec_size)
1025
 
               && (max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT))
1026
 
              || (page_get_max_insert_size(page, 1) >= rec_size)
1027
 
              || (page_get_n_recs(page) <= 1))) {
 
1100
        if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
 
1101
             || max_size < rec_size)
 
1102
            && UNIV_LIKELY(page_get_n_recs(page) > 1)
 
1103
            && page_get_max_insert_size(page, 1) < rec_size) {
1028
1104
 
1029
 
                if (big_rec_vec) {
1030
 
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1031
 
                }
1032
 
                return(DB_FAIL);
 
1105
                goto fail;
1033
1106
        }
1034
1107
 
1035
1108
        /* Check locks and write to the undo log, if specified */
1036
1109
        err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &inherit);
1037
1110
 
1038
 
        if (err != DB_SUCCESS) {
 
1111
        if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1039
1112
 
1040
 
                if (big_rec_vec) {
1041
 
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1042
 
                }
1043
 
                return(err);
 
1113
                goto fail_err;
1044
1114
        }
1045
1115
 
1046
1116
        page_cursor = btr_cur_get_page_cur(cursor);
1047
1117
 
1048
 
        reorg = FALSE;
1049
 
 
1050
1118
        /* Now, try the insert */
1051
1119
 
1052
 
        *rec = page_cur_insert_rec_low(page_cursor, entry, index,
1053
 
                                       NULL, NULL, mtr);
1054
 
        if (UNIV_UNLIKELY(!(*rec))) {
 
1120
        {
 
1121
                const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
 
1122
                *rec = page_cur_tuple_insert(page_cursor, entry, index,
 
1123
                                             n_ext, mtr);
 
1124
                reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
 
1125
 
 
1126
                if (UNIV_UNLIKELY(reorg)) {
 
1127
                        ut_a(zip_size);
 
1128
                        ut_a(*rec);
 
1129
                }
 
1130
        }
 
1131
 
 
1132
        if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
1055
1133
                /* If the record did not fit, reorganize */
1056
 
                btr_page_reorganize(page, index, mtr);
1057
 
 
1058
 
                ut_ad(page_get_max_insert_size(page, 1) == max_size);
 
1134
                if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
 
1135
                        ut_a(zip_size);
 
1136
 
 
1137
                        goto fail;
 
1138
                }
 
1139
 
 
1140
                ut_ad(zip_size
 
1141
                      || page_get_max_insert_size(page, 1) == max_size);
1059
1142
 
1060
1143
                reorg = TRUE;
1061
1144
 
1062
 
                page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor);
 
1145
                page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
1063
1146
 
1064
 
                *rec = page_cur_tuple_insert(page_cursor, entry, index, mtr);
 
1147
                *rec = page_cur_tuple_insert(page_cursor, entry, index,
 
1148
                                             n_ext, mtr);
1065
1149
 
1066
1150
                if (UNIV_UNLIKELY(!*rec)) {
 
1151
                        if (UNIV_LIKELY(zip_size != 0)) {
 
1152
 
 
1153
                                goto fail;
 
1154
                        }
 
1155
 
1067
1156
                        fputs("InnoDB: Error: cannot insert tuple ", stderr);
1068
1157
                        dtuple_print(stderr, entry);
1069
1158
                        fputs(" into ", stderr);
1074
1163
                }
1075
1164
        }
1076
1165
 
 
1166
        if (UNIV_LIKELY_NULL(heap)) {
 
1167
                mem_heap_free(heap);
 
1168
        }
 
1169
 
1077
1170
#ifdef BTR_CUR_HASH_ADAPT
1078
 
        if (!reorg && (0 == level) && (cursor->flag == BTR_CUR_HASH)) {
 
1171
        if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
1079
1172
                btr_search_update_hash_node_on_insert(cursor);
1080
1173
        } else {
1081
1174
                btr_search_update_hash_on_insert(cursor);
1084
1177
 
1085
1178
        if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
1086
1179
 
1087
 
                lock_update_insert(*rec);
 
1180
                lock_update_insert(block, *rec);
1088
1181
        }
1089
1182
 
1090
1183
#if 0
1091
1184
        fprintf(stderr, "Insert into page %lu, max ins size %lu,"
1092
1185
                " rec %lu ind type %lu\n",
1093
 
                buf_frame_get_page_no(page), max_size,
1094
 
                rec_size + PAGE_DIR_SLOT_SIZE, type);
 
1186
                buf_block_get_page_no(block), max_size,
 
1187
                rec_size + PAGE_DIR_SLOT_SIZE, index->type);
1095
1188
#endif
1096
 
        if (!(type & DICT_CLUSTERED)) {
1097
 
                /* We have added a record to page: update its free bits */
1098
 
                ibuf_update_free_bits_if_full(cursor->index, page, max_size,
1099
 
                                              rec_size + PAGE_DIR_SLOT_SIZE);
 
1189
        if (!dict_index_is_clust(index) && leaf) {
 
1190
                /* Update the free bits of the B-tree page in the
 
1191
                insert buffer bitmap. */
 
1192
 
 
1193
                /* The free bits in the insert buffer bitmap must
 
1194
                never exceed the free space on a page.  It is safe to
 
1195
                decrement or reset the bits in the bitmap in a
 
1196
                mini-transaction that is committed before the
 
1197
                mini-transaction that affects the free space. */
 
1198
 
 
1199
                /* It is unsafe to increment the bits in a separately
 
1200
                committed mini-transaction, because in crash recovery,
 
1201
                the free bits could momentarily be set too high. */
 
1202
 
 
1203
                if (zip_size) {
 
1204
                        /* Update the bits in the same mini-transaction. */
 
1205
                        ibuf_update_free_bits_zip(block, mtr);
 
1206
                } else {
 
1207
                        /* Decrement the bits in a separate
 
1208
                        mini-transaction. */
 
1209
                        ibuf_update_free_bits_if_full(
 
1210
                                block, max_size,
 
1211
                                rec_size + PAGE_DIR_SLOT_SIZE);
 
1212
                }
1100
1213
        }
1101
1214
 
1102
1215
        *big_rec = big_rec_vec;
1109
1222
holds an x-latch on the tree and on the cursor page. If the insert is
1110
1223
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
1111
1224
to brothers of page, if those brothers exist. */
1112
 
 
 
1225
UNIV_INTERN
1113
1226
ulint
1114
1227
btr_cur_pessimistic_insert(
1115
1228
/*=======================*/
1122
1235
                                insertion will certainly succeed */
1123
1236
        btr_cur_t*      cursor, /* in: cursor after which to insert;
1124
1237
                                cursor stays valid */
1125
 
        dtuple_t*       entry,  /* in: entry to insert */
 
1238
        dtuple_t*       entry,  /* in/out: entry to insert */
1126
1239
        rec_t**         rec,    /* out: pointer to inserted record if
1127
1240
                                succeed */
1128
1241
        big_rec_t**     big_rec,/* out: big rec vector whose fields have to
1129
1242
                                be stored externally by the caller, or
1130
1243
                                NULL */
 
1244
        ulint           n_ext,  /* in: number of externally stored columns */
1131
1245
        que_thr_t*      thr,    /* in: query thread or NULL */
1132
1246
        mtr_t*          mtr)    /* in: mtr */
1133
1247
{
1134
1248
        dict_index_t*   index           = cursor->index;
 
1249
        ulint           zip_size        = dict_table_zip_size(index->table);
1135
1250
        big_rec_t*      big_rec_vec     = NULL;
1136
 
        page_t*         page;
 
1251
        mem_heap_t*     heap            = NULL;
1137
1252
        ulint           err;
1138
1253
        ibool           dummy_inh;
1139
1254
        ibool           success;
1144
1259
 
1145
1260
        *big_rec = NULL;
1146
1261
 
1147
 
        page = btr_cur_get_page(cursor);
1148
 
 
1149
1262
        ut_ad(mtr_memo_contains(mtr,
1150
1263
                                dict_index_get_lock(btr_cur_get_index(cursor)),
1151
1264
                                MTR_MEMO_X_LOCK));
1152
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
 
1265
        ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
1153
1266
                                MTR_MEMO_PAGE_X_FIX));
1154
1267
 
1155
1268
        /* Try first an optimistic insert; reset the cursor flag: we do not
1157
1270
 
1158
1271
        cursor->flag = BTR_CUR_BINARY;
1159
1272
 
1160
 
        err = btr_cur_optimistic_insert(flags, cursor, entry, rec, big_rec,
1161
 
                                        thr, mtr);
 
1273
        err = btr_cur_optimistic_insert(flags, cursor, entry, rec,
 
1274
                                        big_rec, n_ext, thr, mtr);
1162
1275
        if (err != DB_FAIL) {
1163
1276
 
1164
1277
                return(err);
1184
1297
                success = fsp_reserve_free_extents(&n_reserved, index->space,
1185
1298
                                                   n_extents, FSP_NORMAL, mtr);
1186
1299
                if (!success) {
1187
 
                        err = DB_OUT_OF_FILE_SPACE;
1188
 
 
1189
 
                        return(err);
 
1300
                        return(DB_OUT_OF_FILE_SPACE);
1190
1301
                }
1191
1302
        }
1192
1303
 
1193
 
        if (rec_get_converted_size(index, entry)
1194
 
            >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
1195
 
                      REC_MAX_DATA_SIZE)) {
1196
 
 
 
1304
        if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
 
1305
                                   dict_table_is_comp(index->table),
 
1306
                                   zip_size)) {
1197
1307
                /* The record is so big that we have to store some fields
1198
1308
                externally on separate database pages */
1199
1309
 
1200
 
                big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
 
1310
                if (UNIV_LIKELY_NULL(big_rec_vec)) {
 
1311
                        /* This should never happen, but we handle
 
1312
                        the situation in a robust manner. */
 
1313
                        ut_ad(0);
 
1314
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
 
1315
                }
 
1316
 
 
1317
                big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
1201
1318
 
1202
1319
                if (big_rec_vec == NULL) {
1203
1320
 
1209
1326
                }
1210
1327
        }
1211
1328
 
1212
 
        if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
 
1329
        if (UNIV_UNLIKELY(zip_size)) {
 
1330
                /* Estimate the free space of an empty compressed page. */
 
1331
                ulint   free_space_zip = page_zip_empty_size(
 
1332
                        cursor->index->n_fields, zip_size);
 
1333
 
 
1334
                if (UNIV_UNLIKELY(rec_get_converted_size(index, entry, n_ext)
 
1335
                                  > free_space_zip)) {
 
1336
                        /* Try to insert the record by itself on a new page.
 
1337
                        If it fails, no amount of splitting will help. */
 
1338
                        buf_block_t*    temp_block
 
1339
                                = buf_block_alloc(zip_size);
 
1340
                        page_t*         temp_page
 
1341
                                = page_create_zip(temp_block, index, 0, NULL);
 
1342
                        page_cur_t      temp_cursor;
 
1343
                        rec_t*          temp_rec;
 
1344
 
 
1345
                        page_cur_position(temp_page + PAGE_NEW_INFIMUM,
 
1346
                                          temp_block, &temp_cursor);
 
1347
 
 
1348
                        temp_rec = page_cur_tuple_insert(&temp_cursor,
 
1349
                                                         entry, index,
 
1350
                                                         n_ext, NULL);
 
1351
                        buf_block_free(temp_block);
 
1352
 
 
1353
                        if (UNIV_UNLIKELY(!temp_rec)) {
 
1354
                                if (big_rec_vec) {
 
1355
                                        dtuple_convert_back_big_rec(
 
1356
                                                index, entry, big_rec_vec);
 
1357
                                }
 
1358
 
 
1359
                                if (heap) {
 
1360
                                        mem_heap_free(heap);
 
1361
                                }
 
1362
 
 
1363
                                return(DB_TOO_BIG_RECORD);
 
1364
                        }
 
1365
                }
 
1366
        }
 
1367
 
 
1368
        if (dict_index_get_page(index)
 
1369
            == buf_block_get_page_no(btr_cur_get_block(cursor))) {
1213
1370
 
1214
1371
                /* The page is the root page */
1215
 
                *rec = btr_root_raise_and_insert(cursor, entry, mtr);
 
1372
                *rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
1216
1373
        } else {
1217
 
                *rec = btr_page_split_and_insert(cursor, entry, mtr);
1218
 
        }
1219
 
 
1220
 
        btr_cur_position(index, page_rec_get_prev(*rec), cursor);
 
1374
                *rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
 
1375
        }
 
1376
 
 
1377
        if (UNIV_LIKELY_NULL(heap)) {
 
1378
                mem_heap_free(heap);
 
1379
        }
 
1380
 
 
1381
        ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
1221
1382
 
1222
1383
#ifdef BTR_CUR_ADAPT
1223
1384
        btr_search_update_hash_on_insert(cursor);
1224
1385
#endif
1225
1386
        if (!(flags & BTR_NO_LOCKING_FLAG)) {
1226
1387
 
1227
 
                lock_update_insert(*rec);
 
1388
                lock_update_insert(btr_cur_get_block(cursor), *rec);
1228
1389
        }
1229
1390
 
1230
 
        err = DB_SUCCESS;
1231
 
 
1232
1391
        if (n_extents > 0) {
1233
1392
                fil_space_release_free_extents(index->space, n_reserved);
1234
1393
        }
1235
1394
 
1236
1395
        *big_rec = big_rec_vec;
1237
1396
 
1238
 
        return(err);
 
1397
        return(DB_SUCCESS);
1239
1398
}
1240
1399
 
1241
1400
/*==================== B-TREE UPDATE =========================*/
1250
1409
                                number */
1251
1410
        ulint           flags,  /* in: undo logging and locking flags */
1252
1411
        btr_cur_t*      cursor, /* in: cursor on record to update */
1253
 
        upd_t*          update, /* in: update vector */
 
1412
        const upd_t*    update, /* in: update vector */
1254
1413
        ulint           cmpl_info,/* in: compiler info on secondary index
1255
1414
                                updates */
1256
1415
        que_thr_t*      thr,    /* in: query thread */
1265
1424
        rec = btr_cur_get_rec(cursor);
1266
1425
        index = cursor->index;
1267
1426
 
1268
 
        if (!(index->type & DICT_CLUSTERED)) {
 
1427
        if (!dict_index_is_clust(index)) {
1269
1428
                /* We do undo logging only when we update a clustered index
1270
1429
                record */
1271
 
                return(lock_sec_rec_modify_check_and_lock(flags, rec, index,
1272
 
                                                          thr));
 
1430
                return(lock_sec_rec_modify_check_and_lock(
 
1431
                               flags, btr_cur_get_block(cursor), rec,
 
1432
                               index, thr));
1273
1433
        }
1274
1434
 
1275
1435
        /* Check if we have to wait for a lock: enqueue an explicit lock
1280
1440
        if (!(flags & BTR_NO_LOCKING_FLAG)) {
1281
1441
                mem_heap_t*     heap            = NULL;
1282
1442
                ulint           offsets_[REC_OFFS_NORMAL_SIZE];
1283
 
                *offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
1443
                rec_offs_init(offsets_);
1284
1444
 
1285
1445
                err = lock_clust_rec_modify_check_and_lock(
1286
 
                        flags, rec, index,
 
1446
                        flags, btr_cur_get_block(cursor), rec, index,
1287
1447
                        rec_get_offsets(rec, index, offsets_,
1288
1448
                                        ULINT_UNDEFINED, &heap), thr);
1289
1449
                if (UNIV_LIKELY_NULL(heap)) {
1312
1472
        ulint           flags,          /* in: flags */
1313
1473
        rec_t*          rec,            /* in: record */
1314
1474
        dict_index_t*   index,          /* in: index where cursor positioned */
1315
 
        upd_t*          update,         /* in: update vector */
 
1475
        const upd_t*    update,         /* in: update vector */
1316
1476
        trx_t*          trx,            /* in: transaction */
1317
1477
        dulint          roll_ptr,       /* in: roll ptr */
1318
1478
        mtr_t*          mtr)            /* in: mtr */
1353
1513
 
1354
1514
/***************************************************************
1355
1515
Parses a redo log record of updating a record in-place. */
1356
 
 
 
1516
UNIV_INTERN
1357
1517
byte*
1358
1518
btr_cur_parse_update_in_place(
1359
1519
/*==========================*/
1360
1520
                                /* out: end of log record or NULL */
1361
1521
        byte*           ptr,    /* in: buffer */
1362
1522
        byte*           end_ptr,/* in: buffer end */
1363
 
        page_t*         page,   /* in: page or NULL */
 
1523
        page_t*         page,   /* in/out: page or NULL */
 
1524
        page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
1364
1525
        dict_index_t*   index)  /* in: index corresponding to page */
1365
1526
{
1366
1527
        ulint   flags;
1416
1577
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
1417
1578
 
1418
1579
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1419
 
                row_upd_rec_sys_fields_in_recovery(rec, offsets,
 
1580
                row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
1420
1581
                                                   pos, trx_id, roll_ptr);
1421
1582
        }
1422
1583
 
1423
 
        row_upd_rec_in_place(rec, offsets, update);
 
1584
        row_upd_rec_in_place(rec, index, offsets, update, page_zip);
1424
1585
 
1425
1586
func_exit:
1426
1587
        mem_heap_free(heap);
1429
1590
}
1430
1591
 
1431
1592
/*****************************************************************
 
1593
See if there is enough place in the page modification log to log
 
1594
an update-in-place. */
 
1595
static
 
1596
ibool
 
1597
btr_cur_update_alloc_zip(
 
1598
/*=====================*/
 
1599
                                /* out: TRUE if enough place */
 
1600
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
1601
        buf_block_t*    block,  /* in/out: buffer page */
 
1602
        dict_index_t*   index,  /* in: the index corresponding to the block */
 
1603
        ulint           length, /* in: size needed */
 
1604
        mtr_t*          mtr)    /* in: mini-transaction */
 
1605
{
 
1606
        ut_a(page_zip == buf_block_get_page_zip(block));
 
1607
        ut_ad(page_zip);
 
1608
 
 
1609
        if (page_zip_available(page_zip, dict_index_is_clust(index),
 
1610
                               length, 0)) {
 
1611
                return(TRUE);
 
1612
        }
 
1613
 
 
1614
        if (!page_zip->m_nonempty) {
 
1615
                /* The page has been freshly compressed, so
 
1616
                recompressing it will not help. */
 
1617
                return(FALSE);
 
1618
        }
 
1619
 
 
1620
        if (!page_zip_compress(page_zip, buf_block_get_frame(block),
 
1621
                               index, mtr)) {
 
1622
                /* Unable to compress the page */
 
1623
                return(FALSE);
 
1624
        }
 
1625
 
 
1626
        /* After recompressing a page, we must make sure that the free
 
1627
        bits in the insert buffer bitmap will not exceed the free
 
1628
        space on the page.  Because this function will not attempt
 
1629
        recompression unless page_zip_available() fails above, it is
 
1630
        safe to reset the free bits if page_zip_available() fails
 
1631
        again, below.  The free bits can safely be reset in a separate
 
1632
        mini-transaction.  If page_zip_available() succeeds below, we
 
1633
        can be sure that the page_zip_compress() above did not reduce
 
1634
        the free space available on the page. */
 
1635
 
 
1636
        if (!page_zip_available(page_zip, dict_index_is_clust(index),
 
1637
                                length, 0)) {
 
1638
                /* Out of space: reset the free bits. */
 
1639
                if (!dict_index_is_clust(index)
 
1640
                    && page_is_leaf(buf_block_get_frame(block))) {
 
1641
                        ibuf_reset_free_bits(block);
 
1642
                }
 
1643
                return(FALSE);
 
1644
        }
 
1645
 
 
1646
        return(TRUE);
 
1647
}
 
1648
 
 
1649
/*****************************************************************
1432
1650
Updates a record when the update causes no size changes in its fields.
1433
1651
We assume here that the ordering fields of the record do not change. */
1434
 
 
 
1652
UNIV_INTERN
1435
1653
ulint
1436
1654
btr_cur_update_in_place(
1437
1655
/*====================*/
1440
1658
        btr_cur_t*      cursor, /* in: cursor on the record to update;
1441
1659
                                cursor stays valid and positioned on the
1442
1660
                                same record */
1443
 
        upd_t*          update, /* in: update vector */
 
1661
        const upd_t*    update, /* in: update vector */
1444
1662
        ulint           cmpl_info,/* in: compiler info on secondary index
1445
1663
                                updates */
1446
1664
        que_thr_t*      thr,    /* in: query thread */
1447
 
        mtr_t*          mtr)    /* in: mtr */
 
1665
        mtr_t*          mtr)    /* in: mtr; must be committed before
 
1666
                                latching any further pages */
1448
1667
{
1449
1668
        dict_index_t*   index;
1450
1669
        buf_block_t*    block;
 
1670
        page_zip_des_t* page_zip;
1451
1671
        ulint           err;
1452
1672
        rec_t*          rec;
1453
1673
        dulint          roll_ptr        = ut_dulint_zero;
1456
1676
        mem_heap_t*     heap            = NULL;
1457
1677
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
1458
1678
        ulint*          offsets         = offsets_;
1459
 
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
1679
        rec_offs_init(offsets_);
1460
1680
 
1461
1681
        rec = btr_cur_get_rec(cursor);
1462
1682
        index = cursor->index;
1470
1690
        }
1471
1691
#endif /* UNIV_DEBUG */
1472
1692
 
 
1693
        block = btr_cur_get_block(cursor);
 
1694
        page_zip = buf_block_get_page_zip(block);
 
1695
 
 
1696
        /* Check that enough space is available on the compressed page. */
 
1697
        if (UNIV_LIKELY_NULL(page_zip)
 
1698
            && !btr_cur_update_alloc_zip(page_zip, block, index,
 
1699
                                         rec_offs_size(offsets), mtr)) {
 
1700
                return(DB_ZIP_OVERFLOW);
 
1701
        }
 
1702
 
1473
1703
        /* Do lock checking and undo logging */
1474
1704
        err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
1475
1705
                                        thr, &roll_ptr);
1481
1711
                return(err);
1482
1712
        }
1483
1713
 
1484
 
        block = buf_block_align(rec);
1485
 
        ut_ad(!!page_is_comp(buf_block_get_frame(block))
1486
 
              == dict_table_is_comp(index->table));
1487
 
 
1488
1714
        if (block->is_hashed) {
1489
1715
                /* The function row_upd_changes_ord_field_binary works only
1490
1716
                if the update vector was built for a clustered index, we must
1491
1717
                NOT call it if index is secondary */
1492
1718
 
1493
 
                if (!(index->type & DICT_CLUSTERED)
 
1719
                if (!dict_index_is_clust(index)
1494
1720
                    || row_upd_changes_ord_field_binary(NULL, index, update)) {
1495
1721
 
1496
1722
                        /* Remove possible hash index pointer to this record */
1501
1727
        }
1502
1728
 
1503
1729
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1504
 
                row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
 
1730
                row_upd_rec_sys_fields(rec, NULL,
 
1731
                                       index, offsets, trx, roll_ptr);
1505
1732
        }
1506
1733
 
1507
1734
        was_delete_marked = rec_get_deleted_flag(
1508
1735
                rec, page_is_comp(buf_block_get_frame(block)));
1509
1736
 
1510
 
        row_upd_rec_in_place(rec, offsets, update);
 
1737
        row_upd_rec_in_place(rec, index, offsets, update, page_zip);
1511
1738
 
1512
1739
        if (block->is_hashed) {
1513
1740
                rw_lock_x_unlock(&btr_search_latch);
1514
1741
        }
1515
1742
 
1516
 
        btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr,
1517
 
                                    mtr);
 
1743
        if (page_zip && !dict_index_is_clust(index)
 
1744
            && page_is_leaf(buf_block_get_frame(block))) {
 
1745
                /* Update the free bits in the insert buffer. */
 
1746
                ibuf_update_free_bits_zip(block, mtr);
 
1747
        }
 
1748
 
 
1749
        btr_cur_update_in_place_log(flags, rec, index, update,
 
1750
                                    trx, roll_ptr, mtr);
 
1751
 
1518
1752
        if (was_delete_marked
1519
1753
            && !rec_get_deleted_flag(rec, page_is_comp(
1520
1754
                                             buf_block_get_frame(block)))) {
1521
1755
                /* The new updated record owns its possible externally
1522
1756
                stored fields */
1523
1757
 
1524
 
                btr_cur_unmark_extern_fields(rec, mtr, offsets);
 
1758
                btr_cur_unmark_extern_fields(page_zip,
 
1759
                                             rec, index, offsets, mtr);
1525
1760
        }
1526
1761
 
1527
1762
        if (UNIV_LIKELY_NULL(heap)) {
1536
1771
little space on the page or if the update would result in too empty a page,
1537
1772
so that tree compression is recommended. We assume here that the ordering
1538
1773
fields of the record do not change. */
1539
 
 
 
1774
UNIV_INTERN
1540
1775
ulint
1541
1776
btr_cur_optimistic_update(
1542
1777
/*======================*/
1543
1778
                                /* out: DB_SUCCESS, or DB_OVERFLOW if the
1544
1779
                                updated record does not fit, DB_UNDERFLOW
1545
 
                                if the page would become too empty */
 
1780
                                if the page would become too empty, or
 
1781
                                DB_ZIP_OVERFLOW if there is not enough
 
1782
                                space left on the compressed page */
1546
1783
        ulint           flags,  /* in: undo logging and locking flags */
1547
1784
        btr_cur_t*      cursor, /* in: cursor on the record to update;
1548
1785
                                cursor stays valid and positioned on the
1549
1786
                                same record */
1550
 
        upd_t*          update, /* in: update vector; this must also
 
1787
        const upd_t*    update, /* in: update vector; this must also
1551
1788
                                contain trx id and roll ptr fields */
1552
1789
        ulint           cmpl_info,/* in: compiler info on secondary index
1553
1790
                                updates */
1554
1791
        que_thr_t*      thr,    /* in: query thread */
1555
 
        mtr_t*          mtr)    /* in: mtr */
 
1792
        mtr_t*          mtr)    /* in: mtr; must be committed before
 
1793
                                latching any further pages */
1556
1794
{
1557
1795
        dict_index_t*   index;
1558
1796
        page_cur_t*     page_cursor;
1559
1797
        ulint           err;
 
1798
        buf_block_t*    block;
1560
1799
        page_t*         page;
 
1800
        page_zip_des_t* page_zip;
1561
1801
        rec_t*          rec;
 
1802
        rec_t*          orig_rec;
1562
1803
        ulint           max_size;
1563
1804
        ulint           new_rec_size;
1564
1805
        ulint           old_rec_size;
1566
1807
        dulint          roll_ptr;
1567
1808
        trx_t*          trx;
1568
1809
        mem_heap_t*     heap;
1569
 
        ibool           reorganized     = FALSE;
1570
1810
        ulint           i;
 
1811
        ulint           n_ext;
1571
1812
        ulint*          offsets;
1572
1813
 
1573
 
        page = btr_cur_get_page(cursor);
1574
 
        rec = btr_cur_get_rec(cursor);
 
1814
        block = btr_cur_get_block(cursor);
 
1815
        page = buf_block_get_frame(block);
 
1816
        orig_rec = rec = btr_cur_get_rec(cursor);
1575
1817
        index = cursor->index;
1576
1818
        ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
 
1819
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1577
1820
 
1578
1821
        heap = mem_heap_create(1024);
1579
1822
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
1585
1828
        }
1586
1829
#endif /* UNIV_DEBUG */
1587
1830
 
1588
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
1589
 
                                MTR_MEMO_PAGE_X_FIX));
1590
1831
        if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
1591
1832
 
1592
1833
                /* The simplest and the most common case: the update does not
1593
1834
                change the size of any field and none of the updated fields is
1594
 
                externally stored in rec or update */
 
1835
                externally stored in rec or update, and there is enough space
 
1836
                on the compressed page to log the update. */
 
1837
 
1595
1838
                mem_heap_free(heap);
1596
1839
                return(btr_cur_update_in_place(flags, cursor, update,
1597
1840
                                               cmpl_info, thr, mtr));
1598
1841
        }
1599
1842
 
1600
 
        for (i = 0; i < upd_get_n_fields(update); i++) {
1601
 
                if (upd_get_nth_field(update, i)->extern_storage) {
1602
 
 
1603
 
                        /* Externally stored fields are treated in pessimistic
1604
 
                        update */
1605
 
 
1606
 
                        mem_heap_free(heap);
1607
 
                        return(DB_OVERFLOW);
1608
 
                }
1609
 
        }
1610
 
 
1611
1843
        if (rec_offs_any_extern(offsets)) {
 
1844
any_extern:
1612
1845
                /* Externally stored fields are treated in pessimistic
1613
1846
                update */
1614
1847
 
1616
1849
                return(DB_OVERFLOW);
1617
1850
        }
1618
1851
 
 
1852
        for (i = 0; i < upd_get_n_fields(update); i++) {
 
1853
                if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) {
 
1854
 
 
1855
                        goto any_extern;
 
1856
                }
 
1857
        }
 
1858
 
1619
1859
        page_cursor = btr_cur_get_page_cur(cursor);
1620
1860
 
1621
 
        new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
 
1861
        new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
 
1862
                                           &n_ext, heap);
 
1863
        /* We checked above that there are no externally stored fields. */
 
1864
        ut_a(!n_ext);
1622
1865
 
 
1866
        /* The page containing the clustered index record
 
1867
        corresponding to new_entry is latched in mtr.
 
1868
        Thus the following call is safe. */
1623
1869
        row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
1624
 
                                                     FALSE, NULL);
 
1870
                                                     FALSE, heap);
1625
1871
        old_rec_size = rec_offs_size(offsets);
1626
 
        new_rec_size = rec_get_converted_size(index, new_entry);
 
1872
        new_rec_size = rec_get_converted_size(index, new_entry, 0);
 
1873
 
 
1874
        page_zip = buf_block_get_page_zip(block);
 
1875
#ifdef UNIV_ZIP_DEBUG
 
1876
        ut_a(!page_zip || page_zip_validate(page_zip, page));
 
1877
#endif /* UNIV_ZIP_DEBUG */
 
1878
 
 
1879
        if (UNIV_LIKELY_NULL(page_zip)
 
1880
            && !btr_cur_update_alloc_zip(page_zip, block, index,
 
1881
                                         new_rec_size, mtr)) {
 
1882
                err = DB_ZIP_OVERFLOW;
 
1883
                goto err_exit;
 
1884
        }
1627
1885
 
1628
1886
        if (UNIV_UNLIKELY(new_rec_size
1629
1887
                          >= (page_get_free_space_of_empty(page_is_comp(page))
1630
1888
                              / 2))) {
1631
1889
 
1632
 
                mem_heap_free(heap);
1633
 
 
1634
 
                return(DB_OVERFLOW);
 
1890
                err = DB_OVERFLOW;
 
1891
                goto err_exit;
1635
1892
        }
1636
1893
 
1637
 
        max_size = old_rec_size
1638
 
                + page_get_max_insert_size_after_reorganize(page, 1);
1639
 
 
1640
1894
        if (UNIV_UNLIKELY(page_get_data_size(page)
1641
1895
                          - old_rec_size + new_rec_size
1642
1896
                          < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
1643
1897
 
1644
1898
                /* The page would become too empty */
1645
1899
 
1646
 
                mem_heap_free(heap);
1647
 
 
1648
 
                return(DB_UNDERFLOW);
 
1900
                err = DB_UNDERFLOW;
 
1901
                goto err_exit;
1649
1902
        }
1650
1903
 
 
1904
        max_size = old_rec_size
 
1905
                + page_get_max_insert_size_after_reorganize(page, 1);
 
1906
 
1651
1907
        if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
1652
1908
               && (max_size >= new_rec_size))
1653
1909
              || (page_get_n_recs(page) <= 1))) {
1656
1912
                reorganize: for simplicity, we decide what to do assuming a
1657
1913
                reorganization is needed, though it might not be necessary */
1658
1914
 
1659
 
                mem_heap_free(heap);
1660
 
 
1661
 
                return(DB_OVERFLOW);
 
1915
                err = DB_OVERFLOW;
 
1916
                goto err_exit;
1662
1917
        }
1663
1918
 
1664
1919
        /* Do lock checking and undo logging */
1665
1920
        err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr,
1666
1921
                                        &roll_ptr);
1667
1922
        if (err != DB_SUCCESS) {
1668
 
 
 
1923
err_exit:
1669
1924
                mem_heap_free(heap);
1670
 
 
1671
1925
                return(err);
1672
1926
        }
1673
1927
 
1674
1928
        /* Ok, we may do the replacement. Store on the page infimum the
1675
1929
        explicit locks on rec, before deleting rec (see the comment in
1676
 
        .._pessimistic_update). */
 
1930
        btr_cur_pessimistic_update). */
1677
1931
 
1678
 
        lock_rec_store_on_page_infimum(page, rec);
 
1932
        lock_rec_store_on_page_infimum(block, rec);
1679
1933
 
1680
1934
        btr_search_update_hash_on_delete(cursor);
1681
1935
 
 
1936
        /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
 
1937
        invokes rec_offs_make_valid() to point to the copied record that
 
1938
        the fields of new_entry point to.  We have to undo it here. */
 
1939
        ut_ad(rec_offs_validate(NULL, index, offsets));
 
1940
        rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
 
1941
 
1682
1942
        page_cur_delete_rec(page_cursor, index, offsets, mtr);
1683
1943
 
1684
1944
        page_cur_move_to_prev(page_cursor);
1692
1952
                                              trx->id);
1693
1953
        }
1694
1954
 
1695
 
        rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
1696
 
 
 
1955
        /* There are no externally stored columns in new_entry */
 
1956
        rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
1697
1957
        ut_a(rec); /* <- We calculated above the insert would fit */
1698
1958
 
1699
 
        if (!rec_get_deleted_flag(rec, page_is_comp(page))) {
1700
 
                /* The new inserted record owns its possible externally
1701
 
                stored fields */
1702
 
 
1703
 
                offsets = rec_get_offsets(rec, index, offsets,
1704
 
                                          ULINT_UNDEFINED, &heap);
1705
 
                btr_cur_unmark_extern_fields(rec, mtr, offsets);
 
1959
        if (page_zip && !dict_index_is_clust(index)
 
1960
            && page_is_leaf(page)) {
 
1961
                /* Update the free bits in the insert buffer. */
 
1962
                ibuf_update_free_bits_zip(block, mtr);
1706
1963
        }
1707
1964
 
1708
1965
        /* Restore the old explicit lock state on the record */
1709
1966
 
1710
 
        lock_rec_restore_from_page_infimum(rec, page);
 
1967
        lock_rec_restore_from_page_infimum(block, rec, block);
1711
1968
 
1712
1969
        page_cur_move_to_next(page_cursor);
1713
1970
 
1726
1983
void
1727
1984
btr_cur_pess_upd_restore_supremum(
1728
1985
/*==============================*/
1729
 
        rec_t*  rec,    /* in: updated record */
1730
 
        mtr_t*  mtr)    /* in: mtr */
 
1986
        buf_block_t*    block,  /* in: buffer block of rec */
 
1987
        const rec_t*    rec,    /* in: updated record */
 
1988
        mtr_t*          mtr)    /* in: mtr */
1731
1989
{
1732
 
        page_t* page;
1733
 
        page_t* prev_page;
1734
 
        ulint   space;
1735
 
        ulint   prev_page_no;
 
1990
        page_t*         page;
 
1991
        buf_block_t*    prev_block;
 
1992
        ulint           space;
 
1993
        ulint           zip_size;
 
1994
        ulint           prev_page_no;
1736
1995
 
1737
 
        page = buf_frame_align(rec);
 
1996
        page = buf_block_get_frame(block);
1738
1997
 
1739
1998
        if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
1740
1999
                /* Updated record is not the first user record on its page */
1742
2001
                return;
1743
2002
        }
1744
2003
 
1745
 
        space = buf_frame_get_space_id(page);
 
2004
        space = buf_block_get_space(block);
 
2005
        zip_size = buf_block_get_zip_size(block);
1746
2006
        prev_page_no = btr_page_get_prev(page, mtr);
1747
2007
 
1748
2008
        ut_ad(prev_page_no != FIL_NULL);
1749
 
        prev_page = buf_page_get_with_no_latch(space, prev_page_no, mtr);
 
2009
        prev_block = buf_page_get_with_no_latch(space, zip_size,
 
2010
                                                prev_page_no, mtr);
1750
2011
#ifdef UNIV_BTR_DEBUG
1751
 
        ut_a(btr_page_get_next(prev_page, mtr)
1752
 
             == buf_frame_get_page_no(page));
 
2012
        ut_a(btr_page_get_next(prev_block->frame, mtr)
 
2013
             == page_get_page_no(page));
1753
2014
#endif /* UNIV_BTR_DEBUG */
1754
2015
 
1755
 
        /* We must already have an x-latch to prev_page! */
1756
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(prev_page),
1757
 
                                MTR_MEMO_PAGE_X_FIX));
 
2016
        /* We must already have an x-latch on prev_block! */
 
2017
        ut_ad(mtr_memo_contains(mtr, prev_block, MTR_MEMO_PAGE_X_FIX));
1758
2018
 
1759
 
        lock_rec_reset_and_inherit_gap_locks(page_get_supremum_rec(prev_page),
1760
 
                                             rec);
 
2019
        lock_rec_reset_and_inherit_gap_locks(prev_block, block,
 
2020
                                             PAGE_HEAP_NO_SUPREMUM,
 
2021
                                             page_rec_get_heap_no(rec));
1761
2022
}
1762
2023
 
1763
2024
/*****************************************************************
1766
2027
update is made on the leaf level, to avoid deadlocks, mtr must also
1767
2028
own x-latches to brothers of page, if those brothers exist. We assume
1768
2029
here that the ordering fields of the record do not change. */
1769
 
 
 
2030
UNIV_INTERN
1770
2031
ulint
1771
2032
btr_cur_pessimistic_update(
1772
2033
/*=======================*/
1774
2035
        ulint           flags,  /* in: undo logging, locking, and rollback
1775
2036
                                flags */
1776
2037
        btr_cur_t*      cursor, /* in: cursor on the record to update */
 
2038
        mem_heap_t**    heap,   /* in/out: pointer to memory heap, or NULL */
1777
2039
        big_rec_t**     big_rec,/* out: big rec vector whose fields have to
1778
2040
                                be stored externally by the caller, or NULL */
1779
 
        upd_t*          update, /* in: update vector; this is allowed also
 
2041
        const upd_t*    update, /* in: update vector; this is allowed also
1780
2042
                                contain trx id and roll ptr fields, but
1781
2043
                                the values in update vector have no effect */
1782
2044
        ulint           cmpl_info,/* in: compiler info on secondary index
1783
2045
                                updates */
1784
2046
        que_thr_t*      thr,    /* in: query thread */
1785
 
        mtr_t*          mtr)    /* in: mtr */
 
2047
        mtr_t*          mtr)    /* in: mtr; must be committed before
 
2048
                                latching any further pages */
1786
2049
{
1787
2050
        big_rec_t*      big_rec_vec     = NULL;
1788
2051
        big_rec_t*      dummy_big_rec;
1789
2052
        dict_index_t*   index;
 
2053
        buf_block_t*    block;
1790
2054
        page_t*         page;
 
2055
        page_zip_des_t* page_zip;
1791
2056
        rec_t*          rec;
1792
2057
        page_cur_t*     page_cursor;
1793
2058
        dtuple_t*       new_entry;
1794
 
        mem_heap_t*     heap;
1795
2059
        ulint           err;
1796
2060
        ulint           optim_err;
1797
 
        ibool           dummy_reorganized;
1798
2061
        dulint          roll_ptr;
1799
2062
        trx_t*          trx;
1800
2063
        ibool           was_first;
1801
 
        ibool           success;
1802
2064
        ulint           n_extents       = 0;
1803
2065
        ulint           n_reserved;
1804
 
        ulint*          ext_vect;
1805
 
        ulint           n_ext_vect;
1806
 
        ulint           reserve_flag;
 
2066
        ulint           n_ext;
1807
2067
        ulint*          offsets         = NULL;
1808
2068
 
1809
2069
        *big_rec = NULL;
1810
2070
 
1811
 
        page = btr_cur_get_page(cursor);
 
2071
        block = btr_cur_get_block(cursor);
 
2072
        page = buf_block_get_frame(block);
 
2073
        page_zip = buf_block_get_page_zip(block);
1812
2074
        rec = btr_cur_get_rec(cursor);
1813
2075
        index = cursor->index;
1814
2076
 
1815
2077
        ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1816
2078
                                MTR_MEMO_X_LOCK));
1817
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
1818
 
                                MTR_MEMO_PAGE_X_FIX));
 
2079
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
2080
#ifdef UNIV_ZIP_DEBUG
 
2081
        ut_a(!page_zip || page_zip_validate(page_zip, page));
 
2082
#endif /* UNIV_ZIP_DEBUG */
1819
2083
 
1820
2084
        optim_err = btr_cur_optimistic_update(flags, cursor, update,
1821
2085
                                              cmpl_info, thr, mtr);
1822
2086
 
1823
 
        if (optim_err != DB_UNDERFLOW && optim_err != DB_OVERFLOW) {
1824
 
 
 
2087
        switch (optim_err) {
 
2088
        case DB_UNDERFLOW:
 
2089
        case DB_OVERFLOW:
 
2090
        case DB_ZIP_OVERFLOW:
 
2091
                break;
 
2092
        default:
1825
2093
                return(optim_err);
1826
2094
        }
1827
2095
 
1834
2102
        }
1835
2103
 
1836
2104
        if (optim_err == DB_OVERFLOW) {
 
2105
                ulint   reserve_flag;
 
2106
 
1837
2107
                /* First reserve enough free space for the file segments
1838
2108
                of the index tree, so that the update will not fail because
1839
2109
                of lack of space */
1846
2116
                        reserve_flag = FSP_NORMAL;
1847
2117
                }
1848
2118
 
1849
 
                success = fsp_reserve_free_extents(&n_reserved, index->space,
1850
 
                                                   n_extents,
1851
 
                                                   reserve_flag, mtr);
1852
 
                if (!success) {
1853
 
                        err = DB_OUT_OF_FILE_SPACE;
1854
 
 
1855
 
                        return(err);
 
2119
                if (!fsp_reserve_free_extents(&n_reserved, index->space,
 
2120
                                              n_extents, reserve_flag, mtr)) {
 
2121
                        return(DB_OUT_OF_FILE_SPACE);
1856
2122
                }
1857
2123
        }
1858
2124
 
1859
 
        heap = mem_heap_create(1024);
1860
 
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
 
2125
        if (!*heap) {
 
2126
                *heap = mem_heap_create(1024);
 
2127
        }
 
2128
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
1861
2129
 
1862
2130
        trx = thr_get_trx(thr);
1863
2131
 
1864
 
        new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
 
2132
        new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
 
2133
                                           &n_ext, *heap);
 
2134
        /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
 
2135
        invokes rec_offs_make_valid() to point to the copied record that
 
2136
        the fields of new_entry point to.  We have to undo it here. */
 
2137
        ut_ad(rec_offs_validate(NULL, index, offsets));
 
2138
        rec_offs_make_valid(rec, index, offsets);
1865
2139
 
 
2140
        /* The page containing the clustered index record
 
2141
        corresponding to new_entry is latched in mtr.  If the
 
2142
        clustered index record is delete-marked, then its externally
 
2143
        stored fields cannot have been purged yet, because then the
 
2144
        purge would also have removed the clustered index record
 
2145
        itself.  Thus the following call is safe. */
1866
2146
        row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
1867
 
                                                     FALSE, heap);
 
2147
                                                     FALSE, *heap);
1868
2148
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1869
2149
                row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
1870
2150
                                              roll_ptr);
1872
2152
                                              trx->id);
1873
2153
        }
1874
2154
 
1875
 
        if (flags & BTR_NO_UNDO_LOG_FLAG) {
 
2155
        if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
1876
2156
                /* We are in a transaction rollback undoing a row
1877
2157
                update: we must free possible externally stored fields
1878
2158
                which got new values in the update, if they are not
1880
2160
                updated the primary key to another value, and then
1881
2161
                update it back again. */
1882
2162
 
1883
 
                ut_a(big_rec_vec == NULL);
 
2163
                ut_ad(big_rec_vec == NULL);
1884
2164
 
1885
 
                btr_rec_free_updated_extern_fields(index, rec, offsets,
1886
 
                                                   update, TRUE, mtr);
 
2165
                btr_rec_free_updated_extern_fields(index, rec, page_zip,
 
2166
                                                   offsets, update, mtr);
1887
2167
        }
1888
2168
 
1889
2169
        /* We have to set appropriate extern storage bits in the new
1890
2170
        record to be inserted: we have to remember which fields were such */
1891
2171
 
1892
 
        ext_vect = mem_heap_alloc(heap, sizeof(ulint)
1893
 
                                  * dict_index_get_n_fields(index));
1894
2172
        ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
1895
 
        offsets = rec_get_offsets(rec, index, offsets,
1896
 
                                  ULINT_UNDEFINED, &heap);
1897
 
        n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update);
1898
 
 
1899
 
        if (UNIV_UNLIKELY(rec_get_converted_size(index, new_entry)
1900
 
                          >= ut_min(page_get_free_space_of_empty(
1901
 
                                            page_is_comp(page)) / 2,
1902
 
                                    REC_MAX_DATA_SIZE))) {
1903
 
 
1904
 
                big_rec_vec = dtuple_convert_big_rec(index, new_entry,
1905
 
                                                     ext_vect, n_ext_vect);
1906
 
                if (big_rec_vec == NULL) {
 
2173
        offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
 
2174
        n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
 
2175
 
 
2176
        if (page_zip_rec_needs_ext(rec_get_converted_size(index, new_entry,
 
2177
                                                          n_ext),
 
2178
                                   page_is_comp(page), page_zip
 
2179
                                   ? page_zip_get_size(page_zip) : 0)) {
 
2180
                big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
 
2181
                if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
1907
2182
 
1908
2183
                        err = DB_TOO_BIG_RECORD;
1909
2184
                        goto return_after_reservations;
1910
2185
                }
1911
2186
        }
1912
2187
 
1913
 
        page_cursor = btr_cur_get_page_cur(cursor);
1914
 
 
1915
2188
        /* Store state of explicit locks on rec on the page infimum record,
1916
2189
        before deleting rec. The page infimum acts as a dummy carrier of the
1917
2190
        locks, taking care also of lock releases, before we can move the locks
1921
2194
        delete the lock structs set on the root page even if the root
1922
2195
        page carries just node pointers. */
1923
2196
 
1924
 
        lock_rec_store_on_page_infimum(buf_frame_align(rec), rec);
 
2197
        lock_rec_store_on_page_infimum(block, rec);
1925
2198
 
1926
2199
        btr_search_update_hash_on_delete(cursor);
1927
2200
 
 
2201
#ifdef UNIV_ZIP_DEBUG
 
2202
        ut_a(!page_zip || page_zip_validate(page_zip, page));
 
2203
#endif /* UNIV_ZIP_DEBUG */
 
2204
        page_cursor = btr_cur_get_page_cur(cursor);
 
2205
 
1928
2206
        page_cur_delete_rec(page_cursor, index, offsets, mtr);
1929
2207
 
1930
2208
        page_cur_move_to_prev(page_cursor);
1931
2209
 
1932
 
        rec = btr_cur_insert_if_possible(cursor, new_entry,
1933
 
                                         &dummy_reorganized, mtr);
1934
 
        ut_a(rec || optim_err != DB_UNDERFLOW);
 
2210
        rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
1935
2211
 
1936
2212
        if (rec) {
1937
 
                lock_rec_restore_from_page_infimum(rec, page);
1938
 
                rec_set_field_extern_bits(rec, index,
1939
 
                                          ext_vect, n_ext_vect, mtr);
 
2213
                lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
 
2214
                                                   rec, block);
1940
2215
 
1941
2216
                offsets = rec_get_offsets(rec, index, offsets,
1942
 
                                          ULINT_UNDEFINED, &heap);
 
2217
                                          ULINT_UNDEFINED, heap);
1943
2218
 
1944
2219
                if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
1945
2220
                        /* The new inserted record owns its possible externally
1946
2221
                        stored fields */
1947
 
                        btr_cur_unmark_extern_fields(rec, mtr, offsets);
 
2222
                        btr_cur_unmark_extern_fields(page_zip,
 
2223
                                                     rec, index, offsets, mtr);
1948
2224
                }
1949
2225
 
1950
2226
                btr_cur_compress_if_useful(cursor, mtr);
1951
2227
 
 
2228
                if (page_zip && !dict_index_is_clust(index)
 
2229
                    && page_is_leaf(page)) {
 
2230
                        /* Update the free bits in the insert buffer. */
 
2231
                        ibuf_update_free_bits_zip(block, mtr);
 
2232
                }
 
2233
 
1952
2234
                err = DB_SUCCESS;
1953
2235
                goto return_after_reservations;
1954
 
        }
1955
 
 
1956
 
        if (page_cur_is_before_first(page_cursor)) {
1957
 
                /* The record to be updated was positioned as the first user
1958
 
                record on its page */
1959
 
 
1960
 
                was_first = TRUE;
1961
2236
        } else {
1962
 
                was_first = FALSE;
 
2237
                ut_a(optim_err != DB_UNDERFLOW);
 
2238
 
 
2239
                /* Out of space: reset the free bits. */
 
2240
                if (!dict_index_is_clust(index)
 
2241
                    && page_is_leaf(page)) {
 
2242
                        ibuf_reset_free_bits(block);
 
2243
                }
1963
2244
        }
1964
2245
 
 
2246
        /* Was the record to be updated positioned as the first user
 
2247
        record on its page? */
 
2248
        was_first = page_cur_is_before_first(page_cursor);
 
2249
 
1965
2250
        /* The first parameter means that no lock checking and undo logging
1966
2251
        is made in the insert */
1967
2252
 
1969
2254
                                         | BTR_NO_LOCKING_FLAG
1970
2255
                                         | BTR_KEEP_SYS_FLAG,
1971
2256
                                         cursor, new_entry, &rec,
1972
 
                                         &dummy_big_rec, NULL, mtr);
 
2257
                                         &dummy_big_rec, n_ext, NULL, mtr);
1973
2258
        ut_a(rec);
1974
2259
        ut_a(err == DB_SUCCESS);
1975
2260
        ut_a(dummy_big_rec == NULL);
1976
2261
 
1977
 
        rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr);
1978
 
        offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1979
 
 
1980
2262
        if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
1981
2263
                /* The new inserted record owns its possible externally
1982
2264
                stored fields */
1983
 
 
1984
 
                btr_cur_unmark_extern_fields(rec, mtr, offsets);
 
2265
                buf_block_t*    rec_block = btr_cur_get_block(cursor);
 
2266
 
 
2267
#ifdef UNIV_ZIP_DEBUG
 
2268
                ut_a(!page_zip || page_zip_validate(page_zip, page));
 
2269
                page = buf_block_get_frame(rec_block);
 
2270
#endif /* UNIV_ZIP_DEBUG */
 
2271
                page_zip = buf_block_get_page_zip(rec_block);
 
2272
 
 
2273
                offsets = rec_get_offsets(rec, index, offsets,
 
2274
                                          ULINT_UNDEFINED, heap);
 
2275
                btr_cur_unmark_extern_fields(page_zip,
 
2276
                                             rec, index, offsets, mtr);
1985
2277
        }
1986
2278
 
1987
 
        lock_rec_restore_from_page_infimum(rec, page);
 
2279
        lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
 
2280
                                           rec, block);
1988
2281
 
1989
2282
        /* If necessary, restore also the correct lock state for a new,
1990
2283
        preceding supremum record created in a page split. While the old
1992
2285
        from a wrong record. */
1993
2286
 
1994
2287
        if (!was_first) {
1995
 
                btr_cur_pess_upd_restore_supremum(rec, mtr);
 
2288
                btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
 
2289
                                                  rec, mtr);
1996
2290
        }
1997
2291
 
1998
2292
return_after_reservations:
1999
 
        mem_heap_free(heap);
 
2293
#ifdef UNIV_ZIP_DEBUG
 
2294
        ut_a(!page_zip || page_zip_validate(page_zip, page));
 
2295
#endif /* UNIV_ZIP_DEBUG */
2000
2296
 
2001
2297
        if (n_extents > 0) {
2002
2298
                fil_space_release_free_extents(index->space, n_reserved);
2058
2354
/********************************************************************
2059
2355
Parses the redo log record for delete marking or unmarking of a clustered
2060
2356
index record. */
2061
 
 
 
2357
UNIV_INTERN
2062
2358
byte*
2063
2359
btr_cur_parse_del_mark_set_clust_rec(
2064
2360
/*=================================*/
2065
2361
                                /* out: end of log record or NULL */
2066
2362
        byte*           ptr,    /* in: buffer */
2067
2363
        byte*           end_ptr,/* in: buffer end */
2068
 
        dict_index_t*   index,  /* in: index corresponding to page */
2069
 
        page_t*         page)   /* in: page or NULL */
 
2364
        page_t*         page,   /* in/out: page or NULL */
 
2365
        page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
 
2366
        dict_index_t*   index)  /* in: index corresponding to page */
2070
2367
{
2071
2368
        ulint   flags;
2072
2369
        ulint   val;
2109
2406
        if (page) {
2110
2407
                rec = page + offset;
2111
2408
 
 
2409
                /* We do not need to reserve btr_search_latch, as the page
 
2410
                is only being recovered, and there cannot be a hash index to
 
2411
                it. */
 
2412
 
 
2413
                btr_rec_set_deleted_flag(rec, page_zip, val);
 
2414
 
2112
2415
                if (!(flags & BTR_KEEP_SYS_FLAG)) {
2113
2416
                        mem_heap_t*     heap            = NULL;
2114
2417
                        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
2115
 
                        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
2418
                        rec_offs_init(offsets_);
2116
2419
 
2117
2420
                        row_upd_rec_sys_fields_in_recovery(
2118
 
                                rec, rec_get_offsets(rec, index, offsets_,
2119
 
                                                     ULINT_UNDEFINED, &heap),
 
2421
                                rec, page_zip,
 
2422
                                rec_get_offsets(rec, index, offsets_,
 
2423
                                                ULINT_UNDEFINED, &heap),
2120
2424
                                pos, trx_id, roll_ptr);
2121
2425
                        if (UNIV_LIKELY_NULL(heap)) {
2122
2426
                                mem_heap_free(heap);
2123
2427
                        }
2124
2428
                }
2125
 
 
2126
 
                /* We do not need to reserve btr_search_latch, as the page
2127
 
                is only being recovered, and there cannot be a hash index to
2128
 
                it. */
2129
 
 
2130
 
                rec_set_deleted_flag(rec, page_is_comp(page), val);
2131
2429
        }
2132
2430
 
2133
2431
        return(ptr);
2138
2436
undo log on this delete marking. Writes in the trx id field the id
2139
2437
of the deleting transaction, and in the roll ptr field pointer to the
2140
2438
undo log record created. */
2141
 
 
 
2439
UNIV_INTERN
2142
2440
ulint
2143
2441
btr_cur_del_mark_set_clust_rec(
2144
2442
/*===========================*/
2155
2453
        dulint          roll_ptr;
2156
2454
        ulint           err;
2157
2455
        rec_t*          rec;
 
2456
        page_zip_des_t* page_zip;
2158
2457
        trx_t*          trx;
2159
2458
        mem_heap_t*     heap            = NULL;
2160
2459
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
2161
2460
        ulint*          offsets         = offsets_;
2162
 
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
2461
        rec_offs_init(offsets_);
2163
2462
 
2164
2463
        rec = btr_cur_get_rec(cursor);
2165
2464
        index = cursor->index;
2173
2472
        }
2174
2473
#endif /* UNIV_DEBUG */
2175
2474
 
2176
 
        ut_ad(index->type & DICT_CLUSTERED);
 
2475
        ut_ad(dict_index_is_clust(index));
2177
2476
        ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
2178
2477
 
2179
2478
        err = lock_clust_rec_modify_check_and_lock(flags,
 
2479
                                                   btr_cur_get_block(cursor),
2180
2480
                                                   rec, index, offsets, thr);
2181
2481
 
2182
2482
        if (err != DB_SUCCESS) {
2183
2483
 
2184
 
                if (UNIV_LIKELY_NULL(heap)) {
2185
 
                        mem_heap_free(heap);
2186
 
                }
2187
 
                return(err);
 
2484
                goto func_exit;
2188
2485
        }
2189
2486
 
2190
2487
        err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
2192
2489
                                            &roll_ptr);
2193
2490
        if (err != DB_SUCCESS) {
2194
2491
 
2195
 
                if (UNIV_LIKELY_NULL(heap)) {
2196
 
                        mem_heap_free(heap);
2197
 
                }
2198
 
                return(err);
 
2492
                goto func_exit;
2199
2493
        }
2200
2494
 
2201
 
        block = buf_block_align(rec);
 
2495
        block = btr_cur_get_block(cursor);
2202
2496
 
2203
2497
        if (block->is_hashed) {
2204
2498
                rw_lock_x_lock(&btr_search_latch);
2205
2499
        }
2206
2500
 
2207
 
        rec_set_deleted_flag(rec, rec_offs_comp(offsets), val);
 
2501
        page_zip = buf_block_get_page_zip(block);
 
2502
 
 
2503
        btr_rec_set_deleted_flag(rec, page_zip, val);
2208
2504
 
2209
2505
        trx = thr_get_trx(thr);
2210
2506
 
2211
2507
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
2212
 
                row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
 
2508
                row_upd_rec_sys_fields(rec, page_zip,
 
2509
                                       index, offsets, trx, roll_ptr);
2213
2510
        }
2214
2511
 
2215
2512
        if (block->is_hashed) {
2218
2515
 
2219
2516
        btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
2220
2517
                                           roll_ptr, mtr);
 
2518
 
 
2519
func_exit:
2221
2520
        if (UNIV_LIKELY_NULL(heap)) {
2222
2521
                mem_heap_free(heap);
2223
2522
        }
2224
 
        return(DB_SUCCESS);
 
2523
        return(err);
2225
2524
}
2226
2525
 
2227
2526
/********************************************************************
2260
2559
/********************************************************************
2261
2560
Parses the redo log record for delete marking or unmarking of a secondary
2262
2561
index record. */
2263
 
 
 
2562
UNIV_INTERN
2264
2563
byte*
2265
2564
btr_cur_parse_del_mark_set_sec_rec(
2266
2565
/*===============================*/
2267
2566
                                /* out: end of log record or NULL */
2268
2567
        byte*           ptr,    /* in: buffer */
2269
2568
        byte*           end_ptr,/* in: buffer end */
2270
 
        page_t*         page)   /* in: page or NULL */
 
2569
        page_t*         page,   /* in/out: page or NULL */
 
2570
        page_zip_des_t* page_zip)/* in/out: compressed page, or NULL */
2271
2571
{
2272
2572
        ulint   val;
2273
2573
        ulint   offset;
2293
2593
                is only being recovered, and there cannot be a hash index to
2294
2594
                it. */
2295
2595
 
2296
 
                rec_set_deleted_flag(rec, page_is_comp(page), val);
 
2596
                btr_rec_set_deleted_flag(rec, page_zip, val);
2297
2597
        }
2298
2598
 
2299
2599
        return(ptr);
2301
2601
 
2302
2602
/***************************************************************
2303
2603
Sets a secondary index record delete mark to TRUE or FALSE. */
2304
 
 
 
2604
UNIV_INTERN
2305
2605
ulint
2306
2606
btr_cur_del_mark_set_sec_rec(
2307
2607
/*=========================*/
2317
2617
        rec_t*          rec;
2318
2618
        ulint           err;
2319
2619
 
 
2620
        block = btr_cur_get_block(cursor);
2320
2621
        rec = btr_cur_get_rec(cursor);
2321
2622
 
2322
2623
#ifdef UNIV_DEBUG
2327
2628
        }
2328
2629
#endif /* UNIV_DEBUG */
2329
2630
 
2330
 
        err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index,
2331
 
                                                 thr);
 
2631
        err = lock_sec_rec_modify_check_and_lock(flags,
 
2632
                                                 btr_cur_get_block(cursor),
 
2633
                                                 rec, cursor->index, thr);
2332
2634
        if (err != DB_SUCCESS) {
2333
2635
 
2334
2636
                return(err);
2335
2637
        }
2336
2638
 
2337
 
        block = buf_block_align(rec);
2338
 
        ut_ad(!!page_is_comp(buf_block_get_frame(block))
 
2639
        ut_ad(!!page_rec_is_comp(rec)
2339
2640
              == dict_table_is_comp(cursor->index->table));
2340
2641
 
2341
2642
        if (block->is_hashed) {
2342
2643
                rw_lock_x_lock(&btr_search_latch);
2343
2644
        }
2344
2645
 
2345
 
        rec_set_deleted_flag(rec, page_is_comp(buf_block_get_frame(block)),
2346
 
                             val);
 
2646
        btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
2347
2647
 
2348
2648
        if (block->is_hashed) {
2349
2649
                rw_lock_x_unlock(&btr_search_latch);
2357
2657
/***************************************************************
2358
2658
Sets a secondary index record delete mark to FALSE. This function is only
2359
2659
used by the insert buffer insert merge mechanism. */
2360
 
 
 
2660
UNIV_INTERN
2361
2661
void
2362
2662
btr_cur_del_unmark_for_ibuf(
2363
2663
/*========================*/
2364
 
        rec_t*          rec,    /* in: record to delete unmark */
2365
 
        mtr_t*          mtr)    /* in: mtr */
 
2664
        rec_t*          rec,            /* in/out: record to delete unmark */
 
2665
        page_zip_des_t* page_zip,       /* in/out: compressed page
 
2666
                                        corresponding to rec, or NULL
 
2667
                                        when the tablespace is
 
2668
                                        uncompressed */
 
2669
        mtr_t*          mtr)            /* in: mtr */
2366
2670
{
2367
2671
        /* We do not need to reserve btr_search_latch, as the page has just
2368
2672
        been read to the buffer pool and there cannot be a hash index to it. */
2369
2673
 
2370
 
        rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE);
 
2674
        btr_rec_set_deleted_flag(rec, page_zip, FALSE);
2371
2675
 
2372
2676
        btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
2373
2677
}
2375
2679
/*==================== B-TREE RECORD REMOVE =========================*/
2376
2680
 
2377
2681
/*****************************************************************
2378
 
Tries to compress a page of the tree on the leaf level. It is assumed
2379
 
that mtr holds an x-latch on the tree and on the cursor page. To avoid
2380
 
deadlocks, mtr must also own x-latches to brothers of page, if those
2381
 
brothers exist. NOTE: it is assumed that the caller has reserved enough
2382
 
free extents so that the compression will always succeed if done! */
2383
 
 
2384
 
void
2385
 
btr_cur_compress(
2386
 
/*=============*/
2387
 
        btr_cur_t*      cursor, /* in: cursor on the page to compress;
2388
 
                                cursor does not stay valid */
2389
 
        mtr_t*          mtr)    /* in: mtr */
2390
 
{
2391
 
        ut_ad(mtr_memo_contains(mtr,
2392
 
                                dict_index_get_lock(btr_cur_get_index(cursor)),
2393
 
                                MTR_MEMO_X_LOCK));
2394
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
2395
 
                                MTR_MEMO_PAGE_X_FIX));
2396
 
        ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0);
2397
 
 
2398
 
        btr_compress(cursor, mtr);
2399
 
}
2400
 
 
2401
 
/*****************************************************************
2402
2682
Tries to compress a page of the tree if it seems useful. It is assumed
2403
2683
that mtr holds an x-latch on the tree and on the cursor page. To avoid
2404
2684
deadlocks, mtr must also own x-latches to brothers of page, if those
2405
2685
brothers exist. NOTE: it is assumed that the caller has reserved enough
2406
2686
free extents so that the compression will always succeed if done! */
2407
 
 
 
2687
UNIV_INTERN
2408
2688
ibool
2409
2689
btr_cur_compress_if_useful(
2410
2690
/*=======================*/
2417
2697
        ut_ad(mtr_memo_contains(mtr,
2418
2698
                                dict_index_get_lock(btr_cur_get_index(cursor)),
2419
2699
                                MTR_MEMO_X_LOCK));
2420
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
 
2700
        ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
2421
2701
                                MTR_MEMO_PAGE_X_FIX));
2422
2702
 
2423
 
        if (btr_cur_compress_recommendation(cursor, mtr)) {
2424
 
 
2425
 
                btr_compress(cursor, mtr);
2426
 
 
2427
 
                return(TRUE);
2428
 
        }
2429
 
 
2430
 
        return(FALSE);
 
2703
        return(btr_cur_compress_recommendation(cursor, mtr)
 
2704
               && btr_compress(cursor, mtr));
2431
2705
}
2432
2706
 
2433
2707
/***********************************************************
2434
2708
Removes the record on which the tree cursor is positioned on a leaf page.
2435
2709
It is assumed that the mtr has an x-latch on the page where the cursor is
2436
2710
positioned, but no latch on the whole tree. */
2437
 
 
 
2711
UNIV_INTERN
2438
2712
ibool
2439
2713
btr_cur_optimistic_delete(
2440
2714
/*======================*/
2446
2720
                                successor of the deleted record */
2447
2721
        mtr_t*          mtr)    /* in: mtr */
2448
2722
{
2449
 
        page_t*         page;
2450
 
        ulint           max_ins_size;
 
2723
        buf_block_t*    block;
2451
2724
        rec_t*          rec;
2452
2725
        mem_heap_t*     heap            = NULL;
2453
2726
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
2454
2727
        ulint*          offsets         = offsets_;
2455
2728
        ibool           no_compress_needed;
2456
 
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
2729
        rec_offs_init(offsets_);
2457
2730
 
2458
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
 
2731
        ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
2459
2732
                                MTR_MEMO_PAGE_X_FIX));
2460
2733
        /* This is intended only for leaf page deletions */
2461
2734
 
2462
 
        page = btr_cur_get_page(cursor);
 
2735
        block = btr_cur_get_block(cursor);
2463
2736
 
2464
 
        ut_ad(btr_page_get_level(page, mtr) == 0);
 
2737
        ut_ad(page_is_leaf(buf_block_get_frame(block)));
2465
2738
 
2466
2739
        rec = btr_cur_get_rec(cursor);
2467
2740
        offsets = rec_get_offsets(rec, cursor->index, offsets,
2473
2746
 
2474
2747
        if (no_compress_needed) {
2475
2748
 
2476
 
                lock_update_delete(rec);
 
2749
                page_t*         page    = buf_block_get_frame(block);
 
2750
                page_zip_des_t* page_zip= buf_block_get_page_zip(block);
 
2751
                ulint           max_ins = 0;
 
2752
 
 
2753
                lock_update_delete(block, rec);
2477
2754
 
2478
2755
                btr_search_update_hash_on_delete(cursor);
2479
2756
 
2480
 
                max_ins_size = page_get_max_insert_size_after_reorganize(
2481
 
                        page, 1);
 
2757
                if (!page_zip) {
 
2758
                        max_ins = page_get_max_insert_size_after_reorganize(
 
2759
                                page, 1);
 
2760
                }
 
2761
#ifdef UNIV_ZIP_DEBUG
 
2762
                ut_a(!page_zip || page_zip_validate(page_zip, page));
 
2763
#endif /* UNIV_ZIP_DEBUG */
2482
2764
                page_cur_delete_rec(btr_cur_get_page_cur(cursor),
2483
2765
                                    cursor->index, offsets, mtr);
 
2766
#ifdef UNIV_ZIP_DEBUG
 
2767
                ut_a(!page_zip || page_zip_validate(page_zip, page));
 
2768
#endif /* UNIV_ZIP_DEBUG */
2484
2769
 
2485
 
                ibuf_update_free_bits_low(cursor->index, page, max_ins_size,
2486
 
                                          mtr);
 
2770
                if (dict_index_is_clust(cursor->index)
 
2771
                    || !page_is_leaf(page)) {
 
2772
                        /* The insert buffer does not handle
 
2773
                        inserts to clustered indexes or to non-leaf
 
2774
                        pages of secondary index B-trees. */
 
2775
                } else if (page_zip) {
 
2776
                        ibuf_update_free_bits_zip(block, mtr);
 
2777
                } else {
 
2778
                        ibuf_update_free_bits_low(block, max_ins, mtr);
 
2779
                }
2487
2780
        }
2488
2781
 
2489
2782
        if (UNIV_LIKELY_NULL(heap)) {
2500
2793
an x-latch on the tree and on the cursor page. To avoid deadlocks,
2501
2794
mtr must also own x-latches to brothers of page, if those brothers
2502
2795
exist. */
2503
 
 
 
2796
UNIV_INTERN
2504
2797
ibool
2505
2798
btr_cur_pessimistic_delete(
2506
2799
/*=======================*/
2521
2814
        ibool           in_rollback,/* in: TRUE if called in rollback */
2522
2815
        mtr_t*          mtr)    /* in: mtr */
2523
2816
{
 
2817
        buf_block_t*    block;
2524
2818
        page_t*         page;
 
2819
        page_zip_des_t* page_zip;
2525
2820
        dict_index_t*   index;
2526
2821
        rec_t*          rec;
2527
2822
        dtuple_t*       node_ptr;
2533
2828
        mem_heap_t*     heap;
2534
2829
        ulint*          offsets;
2535
2830
 
2536
 
        page = btr_cur_get_page(cursor);
 
2831
        block = btr_cur_get_block(cursor);
 
2832
        page = buf_block_get_frame(block);
2537
2833
        index = btr_cur_get_index(cursor);
2538
2834
 
2539
2835
        ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2540
2836
                                MTR_MEMO_X_LOCK));
2541
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
2542
 
                                MTR_MEMO_PAGE_X_FIX));
 
2837
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2543
2838
        if (!has_reserved_extents) {
2544
2839
                /* First reserve enough free space for the file segments
2545
2840
                of the index tree, so that the node pointer updates will
2560
2855
 
2561
2856
        heap = mem_heap_create(1024);
2562
2857
        rec = btr_cur_get_rec(cursor);
 
2858
        page_zip = buf_block_get_page_zip(block);
 
2859
#ifdef UNIV_ZIP_DEBUG
 
2860
        ut_a(!page_zip || page_zip_validate(page_zip, page));
 
2861
#endif /* UNIV_ZIP_DEBUG */
2563
2862
 
2564
2863
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
2565
2864
 
2566
 
        /* Free externally stored fields if the record is neither
2567
 
        a node pointer nor in two-byte format.
2568
 
        This avoids an unnecessary loop. */
2569
 
        if (page_is_comp(page)
2570
 
            ? !rec_get_node_ptr_flag(rec)
2571
 
            : !rec_get_1byte_offs_flag(rec)) {
 
2865
        if (rec_offs_any_extern(offsets)) {
2572
2866
                btr_rec_free_externally_stored_fields(index,
2573
 
                                                      rec, offsets,
 
2867
                                                      rec, offsets, page_zip,
2574
2868
                                                      in_rollback, mtr);
 
2869
#ifdef UNIV_ZIP_DEBUG
 
2870
                ut_a(!page_zip || page_zip_validate(page_zip, page));
 
2871
#endif /* UNIV_ZIP_DEBUG */
2575
2872
        }
2576
2873
 
2577
2874
        if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
2578
2875
            && UNIV_UNLIKELY(dict_index_get_page(btr_cur_get_index(cursor))
2579
 
                             != buf_frame_get_page_no(page))) {
 
2876
                             != buf_block_get_page_no(block))) {
2580
2877
 
2581
2878
                /* If there is only one record, drop the whole page in
2582
2879
                btr_discard_page, if this is not the root page */
2589
2886
                goto return_after_reservations;
2590
2887
        }
2591
2888
 
2592
 
        lock_update_delete(rec);
 
2889
        lock_update_delete(block, rec);
2593
2890
        level = btr_page_get_level(page, mtr);
2594
2891
 
2595
2892
        if (level > 0
2604
2901
                        non-leaf level, we must mark the new leftmost node
2605
2902
                        pointer as the predefined minimum record */
2606
2903
 
2607
 
                        btr_set_min_rec_mark(next_rec, page_is_comp(page),
2608
 
                                             mtr);
 
2904
                        /* This will make page_zip_validate() fail until
 
2905
                        page_cur_delete_rec() completes.  This is harmless,
 
2906
                        because everything will take place within a single
 
2907
                        mini-transaction and because writing to the redo log
 
2908
                        is an atomic operation (performed by mtr_commit()). */
 
2909
                        btr_set_min_rec_mark(next_rec, mtr);
2609
2910
                } else {
2610
2911
                        /* Otherwise, if we delete the leftmost node pointer
2611
2912
                        on a page, we have to change the father node pointer
2612
2913
                        so that it is equal to the new leftmost node pointer
2613
2914
                        on the page */
2614
2915
 
2615
 
                        btr_node_ptr_delete(index, page, mtr);
 
2916
                        btr_node_ptr_delete(index, block, mtr);
2616
2917
 
2617
2918
                        node_ptr = dict_index_build_node_ptr(
2618
 
                                index, next_rec, buf_frame_get_page_no(page),
 
2919
                                index, next_rec, buf_block_get_page_no(block),
2619
2920
                                heap, level);
2620
2921
 
2621
2922
                        btr_insert_on_non_leaf_level(index,
2626
2927
        btr_search_update_hash_on_delete(cursor);
2627
2928
 
2628
2929
        page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
 
2930
#ifdef UNIV_ZIP_DEBUG
 
2931
        ut_a(!page_zip || page_zip_validate(page_zip, page));
 
2932
#endif /* UNIV_ZIP_DEBUG */
2629
2933
 
2630
 
        ut_ad(btr_check_node_ptr(index, page, mtr));
 
2934
        ut_ad(btr_check_node_ptr(index, block, mtr));
2631
2935
 
2632
2936
        *err = DB_SUCCESS;
2633
2937
 
2682
2986
        slot = cursor->path_arr + (root_height - height);
2683
2987
 
2684
2988
        slot->nth_rec = page_rec_get_n_recs_before(rec);
2685
 
        slot->n_recs = page_get_n_recs(buf_frame_align(rec));
 
2989
        slot->n_recs = page_get_n_recs(page_align(rec));
2686
2990
}
2687
2991
 
2688
2992
/***********************************************************************
2689
2993
Estimates the number of rows in a given index range. */
2690
 
 
2691
 
ib_longlong
 
2994
UNIV_INTERN
 
2995
ib_int64_t
2692
2996
btr_estimate_n_rows_in_range(
2693
2997
/*=========================*/
2694
2998
                                /* out: estimated number of rows */
2695
2999
        dict_index_t*   index,  /* in: index */
2696
 
        dtuple_t*       tuple1, /* in: range start, may also be empty tuple */
 
3000
        const dtuple_t* tuple1, /* in: range start, may also be empty tuple */
2697
3001
        ulint           mode1,  /* in: search mode for range start */
2698
 
        dtuple_t*       tuple2, /* in: range end, may also be empty tuple */
 
3002
        const dtuple_t* tuple2, /* in: range end, may also be empty tuple */
2699
3003
        ulint           mode2)  /* in: search mode for range end */
2700
3004
{
2701
3005
        btr_path_t      path1[BTR_PATH_ARRAY_N_SLOTS];
2706
3010
        ibool           diverged;
2707
3011
        ibool           diverged_lot;
2708
3012
        ulint           divergence_level;
2709
 
        ib_longlong     n_rows;
 
3013
        ib_int64_t      n_rows;
2710
3014
        ulint           i;
2711
3015
        mtr_t           mtr;
2712
3016
 
2837
3141
Estimates the number of different key values in a given index, for
2838
3142
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
2839
3143
The estimates are stored in the array index->stat_n_diff_key_vals. */
2840
 
 
 
3144
UNIV_INTERN
2841
3145
void
2842
3146
btr_estimate_number_of_different_key_vals(
2843
3147
/*======================================*/
2849
3153
        ulint           n_cols;
2850
3154
        ulint           matched_fields;
2851
3155
        ulint           matched_bytes;
2852
 
        ib_longlong*    n_diff;
 
3156
        ib_int64_t*     n_diff;
2853
3157
        ulint           not_empty_flag  = 0;
2854
3158
        ulint           total_external_size = 0;
2855
3159
        ulint           i;
2861
3165
        ulint           offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
2862
3166
        ulint*          offsets_rec     = offsets_rec_;
2863
3167
        ulint*          offsets_next_rec= offsets_next_rec_;
2864
 
        *offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_;
2865
 
        *offsets_next_rec_
2866
 
                = (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_;
 
3168
        rec_offs_init(offsets_rec_);
 
3169
        rec_offs_init(offsets_next_rec_);
2867
3170
 
2868
3171
        n_cols = dict_index_get_n_unique(index);
2869
3172
 
2870
 
        n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
2871
 
 
2872
 
        memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong));
 
3173
        n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
2873
3174
 
2874
3175
        /* We sample some pages in the index to get an estimate */
2875
3176
 
2972
3273
        for (j = 0; j <= n_cols; j++) {
2973
3274
                index->stat_n_diff_key_vals[j]
2974
3275
                        = ((n_diff[j]
2975
 
                            * (ib_longlong)index->stat_n_leaf_pages
 
3276
                            * (ib_int64_t)index->stat_n_leaf_pages
2976
3277
                            + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
2977
3278
                            + total_external_size
2978
3279
                            + not_empty_flag)
3051
3352
void
3052
3353
btr_cur_set_ownership_of_extern_field(
3053
3354
/*==================================*/
3054
 
        rec_t*          rec,    /* in: clustered index record */
 
3355
        page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
 
3356
                                part will be updated, or NULL */
 
3357
        rec_t*          rec,    /* in/out: clustered index record */
 
3358
        dict_index_t*   index,  /* in: index of the page */
3055
3359
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
3056
3360
        ulint           i,      /* in: field number */
3057
3361
        ibool           val,    /* in: value to set */
3058
 
        mtr_t*          mtr)    /* in: mtr */
 
3362
        mtr_t*          mtr)    /* in: mtr, or NULL if not logged */
3059
3363
{
3060
3364
        byte*   data;
3061
3365
        ulint   local_len;
3075
3379
                byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
3076
3380
        }
3077
3381
 
3078
 
        mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
3079
 
                         MLOG_1BYTE, mtr);
 
3382
        if (UNIV_LIKELY_NULL(page_zip)) {
 
3383
                mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
 
3384
                page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr);
 
3385
        } else if (UNIV_LIKELY(mtr != NULL)) {
 
3386
 
 
3387
                mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
 
3388
                                 MLOG_1BYTE, mtr);
 
3389
        } else {
 
3390
                mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
 
3391
        }
3080
3392
}
3081
3393
 
3082
3394
/***********************************************************************
3084
3396
is transferred to the updated record which is inserted elsewhere in the
3085
3397
index tree. In purge only the owner of externally stored field is allowed
3086
3398
to free the field. */
3087
 
 
 
3399
UNIV_INTERN
3088
3400
void
3089
3401
btr_cur_mark_extern_inherited_fields(
3090
3402
/*=================================*/
3091
 
        rec_t*          rec,    /* in: record in a clustered index */
 
3403
        page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
 
3404
                                part will be updated, or NULL */
 
3405
        rec_t*          rec,    /* in/out: record in a clustered index */
 
3406
        dict_index_t*   index,  /* in: index of the page */
3092
3407
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
3093
 
        upd_t*          update, /* in: update vector */
3094
 
        mtr_t*          mtr)    /* in: mtr */
 
3408
        const upd_t*    update, /* in: update vector */
 
3409
        mtr_t*          mtr)    /* in: mtr, or NULL if not logged */
3095
3410
{
3096
 
        ibool   is_updated;
3097
3411
        ulint   n;
3098
3412
        ulint   j;
3099
3413
        ulint   i;
3100
3414
 
3101
3415
        ut_ad(rec_offs_validate(rec, NULL, offsets));
3102
3416
        ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
 
3417
 
 
3418
        if (!rec_offs_any_extern(offsets)) {
 
3419
 
 
3420
                return;
 
3421
        }
 
3422
 
3103
3423
        n = rec_offs_n_fields(offsets);
3104
3424
 
3105
3425
        for (i = 0; i < n; i++) {
3106
3426
                if (rec_offs_nth_extern(offsets, i)) {
3107
3427
 
3108
3428
                        /* Check it is not in updated fields */
3109
 
                        is_updated = FALSE;
3110
3429
 
3111
3430
                        if (update) {
3112
3431
                                for (j = 0; j < upd_get_n_fields(update);
3113
3432
                                     j++) {
3114
3433
                                        if (upd_get_nth_field(update, j)
3115
3434
                                            ->field_no == i) {
3116
 
                                                is_updated = TRUE;
 
3435
 
 
3436
                                                goto updated;
3117
3437
                                        }
3118
3438
                                }
3119
3439
                        }
3120
3440
 
3121
 
                        if (!is_updated) {
3122
 
                                btr_cur_set_ownership_of_extern_field(
3123
 
                                        rec, offsets, i, FALSE, mtr);
3124
 
                        }
 
3441
                        btr_cur_set_ownership_of_extern_field(
 
3442
                                page_zip, rec, index, offsets, i, FALSE, mtr);
 
3443
updated:
 
3444
                        ;
3125
3445
                }
3126
3446
        }
3127
3447
}
3130
3450
The complement of the previous function: in an update entry may inherit
3131
3451
some externally stored fields from a record. We must mark them as inherited
3132
3452
in entry, so that they are not freed in a rollback. */
3133
 
 
 
3453
UNIV_INTERN
3134
3454
void
3135
3455
btr_cur_mark_dtuple_inherited_extern(
3136
3456
/*=================================*/
3137
 
        dtuple_t*       entry,          /* in: updated entry to be inserted to
3138
 
                                        clustered index */
3139
 
        ulint*          ext_vec,        /* in: array of extern fields in the
3140
 
                                        original record */
3141
 
        ulint           n_ext_vec,      /* in: number of elements in ext_vec */
3142
 
        upd_t*          update)         /* in: update vector */
 
3457
        dtuple_t*       entry,          /* in/out: updated entry to be
 
3458
                                        inserted to clustered index */
 
3459
        const upd_t*    update)         /* in: update vector */
3143
3460
{
3144
 
        dfield_t* dfield;
3145
 
        ulint   byte_val;
3146
 
        byte*   data;
3147
 
        ulint   len;
3148
 
        ibool   is_updated;
3149
 
        ulint   j;
3150
 
        ulint   i;
3151
 
 
3152
 
        if (ext_vec == NULL) {
3153
 
 
3154
 
                return;
3155
 
        }
3156
 
 
3157
 
        for (i = 0; i < n_ext_vec; i++) {
3158
 
 
3159
 
                /* Check ext_vec[i] is in updated fields */
3160
 
                is_updated = FALSE;
 
3461
        ulint           i;
 
3462
 
 
3463
        for (i = 0; i < dtuple_get_n_fields(entry); i++) {
 
3464
 
 
3465
                dfield_t*       dfield = dtuple_get_nth_field(entry, i);
 
3466
                byte*           data;
 
3467
                ulint           len;
 
3468
                ulint           j;
 
3469
 
 
3470
                if (!dfield_is_ext(dfield)) {
 
3471
                        continue;
 
3472
                }
 
3473
 
 
3474
                /* Check if it is in updated fields */
3161
3475
 
3162
3476
                for (j = 0; j < upd_get_n_fields(update); j++) {
3163
 
                        if (upd_get_nth_field(update, j)->field_no
3164
 
                            == ext_vec[i]) {
3165
 
                                is_updated = TRUE;
 
3477
                        if (upd_get_nth_field(update, j)->field_no == i) {
 
3478
 
 
3479
                                goto is_updated;
3166
3480
                        }
3167
3481
                }
3168
3482
 
3169
 
                if (!is_updated) {
3170
 
                        dfield = dtuple_get_nth_field(entry, ext_vec[i]);
3171
 
 
3172
 
                        data = (byte*) dfield_get_data(dfield);
3173
 
                        len = dfield_get_len(dfield);
3174
 
 
3175
 
                        len -= BTR_EXTERN_FIELD_REF_SIZE;
3176
 
 
3177
 
                        byte_val = mach_read_from_1(data + len
3178
 
                                                    + BTR_EXTERN_LEN);
3179
 
 
3180
 
                        byte_val = byte_val | BTR_EXTERN_INHERITED_FLAG;
3181
 
 
3182
 
                        mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
3183
 
                }
 
3483
                data = dfield_get_data(dfield);
 
3484
                len = dfield_get_len(dfield);
 
3485
                data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
 
3486
                        |= BTR_EXTERN_INHERITED_FLAG;
 
3487
 
 
3488
is_updated:
 
3489
                ;
3184
3490
        }
3185
3491
}
3186
3492
 
3192
3498
void
3193
3499
btr_cur_unmark_extern_fields(
3194
3500
/*=========================*/
3195
 
        rec_t*          rec,    /* in: record in a clustered index */
3196
 
        mtr_t*          mtr,    /* in: mtr */
3197
 
        const ulint*    offsets)/* in: array returned by rec_get_offsets() */
 
3501
        page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
 
3502
                                part will be updated, or NULL */
 
3503
        rec_t*          rec,    /* in/out: record in a clustered index */
 
3504
        dict_index_t*   index,  /* in: index of the page */
 
3505
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
 
3506
        mtr_t*          mtr)    /* in: mtr, or NULL if not logged */
3198
3507
{
3199
3508
        ulint   n;
3200
3509
        ulint   i;
3202
3511
        ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3203
3512
        n = rec_offs_n_fields(offsets);
3204
3513
 
 
3514
        if (!rec_offs_any_extern(offsets)) {
 
3515
 
 
3516
                return;
 
3517
        }
 
3518
 
3205
3519
        for (i = 0; i < n; i++) {
3206
3520
                if (rec_offs_nth_extern(offsets, i)) {
3207
3521
 
3208
 
                        btr_cur_set_ownership_of_extern_field(rec, offsets, i,
3209
 
                                                              TRUE, mtr);
 
3522
                        btr_cur_set_ownership_of_extern_field(
 
3523
                                page_zip, rec, index, offsets, i, TRUE, mtr);
3210
3524
                }
3211
3525
        }
3212
3526
}
3213
3527
 
3214
3528
/***********************************************************************
3215
3529
Marks all extern fields in a dtuple as owned by the record. */
3216
 
 
 
3530
UNIV_INTERN
3217
3531
void
3218
3532
btr_cur_unmark_dtuple_extern_fields(
3219
3533
/*================================*/
3220
 
        dtuple_t*       entry,          /* in: clustered index entry */
3221
 
        ulint*          ext_vec,        /* in: array of numbers of fields
3222
 
                                        which have been stored externally */
3223
 
        ulint           n_ext_vec)      /* in: number of elements in ext_vec */
 
3534
        dtuple_t*       entry)          /* in/out: clustered index entry */
3224
3535
{
3225
 
        dfield_t* dfield;
3226
 
        ulint   byte_val;
3227
 
        byte*   data;
3228
 
        ulint   len;
3229
3536
        ulint   i;
3230
3537
 
3231
 
        for (i = 0; i < n_ext_vec; i++) {
3232
 
                dfield = dtuple_get_nth_field(entry, ext_vec[i]);
3233
 
 
3234
 
                data = (byte*) dfield_get_data(dfield);
3235
 
                len = dfield_get_len(dfield);
3236
 
 
3237
 
                len -= BTR_EXTERN_FIELD_REF_SIZE;
3238
 
 
3239
 
                byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN);
3240
 
 
3241
 
                byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
3242
 
 
3243
 
                mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
 
3538
        for (i = 0; i < dtuple_get_n_fields(entry); i++) {
 
3539
                dfield_t* dfield = dtuple_get_nth_field(entry, i);
 
3540
 
 
3541
                if (dfield_is_ext(dfield)) {
 
3542
                        byte*   data = dfield_get_data(dfield);
 
3543
                        ulint   len = dfield_get_len(dfield);
 
3544
 
 
3545
                        data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
 
3546
                                &= ~BTR_EXTERN_OWNER_FLAG;
 
3547
                }
3244
3548
        }
3245
3549
}
3246
3550
 
3247
3551
/***********************************************************************
3248
 
Stores the positions of the fields marked as extern storage in the update
3249
 
vector, and also those fields who are marked as extern storage in rec
3250
 
and not mentioned in updated fields. We use this function to remember
3251
 
which fields we must mark as extern storage in a record inserted for an
3252
 
update. */
3253
 
 
 
3552
Flags the data tuple fields that are marked as extern storage in the
 
3553
update vector.  We use this function to remember which fields we must
 
3554
mark as extern storage in a record inserted for an update. */
 
3555
UNIV_INTERN
3254
3556
ulint
3255
3557
btr_push_update_extern_fields(
3256
3558
/*==========================*/
3257
 
                                /* out: number of values stored in ext_vect */
3258
 
        ulint*          ext_vect,/* in: array of ulints, must be preallocated
3259
 
                                to have space for all fields in rec */
3260
 
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
3261
 
        upd_t*          update) /* in: update vector or NULL */
 
3559
                                /* out: number of flagged external columns */
 
3560
        dtuple_t*       tuple,  /* in/out: data tuple */
 
3561
        const upd_t*    update, /* in: update vector */
 
3562
        mem_heap_t*     heap)   /* in: memory heap */
3262
3563
{
3263
 
        ulint   n_pushed        = 0;
3264
 
        ibool   is_updated;
3265
 
        ulint   n;
3266
 
        ulint   j;
3267
 
        ulint   i;
3268
 
 
3269
 
        if (update) {
3270
 
                n = upd_get_n_fields(update);
3271
 
 
3272
 
                for (i = 0; i < n; i++) {
3273
 
 
3274
 
                        if (upd_get_nth_field(update, i)->extern_storage) {
3275
 
 
3276
 
                                ext_vect[n_pushed] = upd_get_nth_field(
3277
 
                                        update, i)->field_no;
3278
 
 
3279
 
                                n_pushed++;
3280
 
                        }
3281
 
                }
3282
 
        }
3283
 
 
3284
 
        n = rec_offs_n_fields(offsets);
3285
 
 
3286
 
        for (i = 0; i < n; i++) {
3287
 
                if (rec_offs_nth_extern(offsets, i)) {
3288
 
 
3289
 
                        /* Check it is not in updated fields */
3290
 
                        is_updated = FALSE;
3291
 
 
3292
 
                        if (update) {
3293
 
                                for (j = 0; j < upd_get_n_fields(update);
3294
 
                                     j++) {
3295
 
                                        if (upd_get_nth_field(update, j)
3296
 
                                            ->field_no == i) {
3297
 
                                                is_updated = TRUE;
3298
 
                                        }
3299
 
                                }
3300
 
                        }
3301
 
 
3302
 
                        if (!is_updated) {
3303
 
                                ext_vect[n_pushed] = i;
3304
 
                                n_pushed++;
 
3564
        ulint                   n_pushed        = 0;
 
3565
        ulint                   n;
 
3566
        const upd_field_t*      uf;
 
3567
 
 
3568
        ut_ad(tuple);
 
3569
        ut_ad(update);
 
3570
 
 
3571
        uf = update->fields;
 
3572
        n = upd_get_n_fields(update);
 
3573
 
 
3574
        for (; n--; uf++) {
 
3575
                if (dfield_is_ext(&uf->new_val)) {
 
3576
                        dfield_t*       field
 
3577
                                = dtuple_get_nth_field(tuple, uf->field_no);
 
3578
 
 
3579
                        if (!dfield_is_ext(field)) {
 
3580
                                dfield_set_ext(field);
 
3581
                                n_pushed++;
 
3582
                        }
 
3583
 
 
3584
                        switch (uf->orig_len) {
 
3585
                                byte*   data;
 
3586
                                ulint   len;
 
3587
                                byte*   buf;
 
3588
                        case 0:
 
3589
                                break;
 
3590
                        case BTR_EXTERN_FIELD_REF_SIZE:
 
3591
                                /* Restore the original locally stored
 
3592
                                part of the column.  In the undo log,
 
3593
                                InnoDB writes a longer prefix of externally
 
3594
                                stored columns, so that column prefixes
 
3595
                                in secondary indexes can be reconstructed. */
 
3596
                                dfield_set_data(field, (byte*) dfield_get_data(field)
 
3597
                                                + dfield_get_len(field)
 
3598
                                                - BTR_EXTERN_FIELD_REF_SIZE,
 
3599
                                                BTR_EXTERN_FIELD_REF_SIZE);
 
3600
                                dfield_set_ext(field);
 
3601
                                break;
 
3602
                        default:
 
3603
                                /* Reconstruct the original locally
 
3604
                                stored part of the column.  The data
 
3605
                                will have to be copied. */
 
3606
                                ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
 
3607
 
 
3608
                                data = dfield_get_data(field);
 
3609
                                len = dfield_get_len(field);
 
3610
 
 
3611
                                buf = mem_heap_alloc(heap, uf->orig_len);
 
3612
                                /* Copy the locally stored prefix. */
 
3613
                                memcpy(buf, data,
 
3614
                                       uf->orig_len
 
3615
                                       - BTR_EXTERN_FIELD_REF_SIZE);
 
3616
                                /* Copy the BLOB pointer. */
 
3617
                                memcpy(buf + uf->orig_len
 
3618
                                       - BTR_EXTERN_FIELD_REF_SIZE,
 
3619
                                       data + len - BTR_EXTERN_FIELD_REF_SIZE,
 
3620
                                       BTR_EXTERN_FIELD_REF_SIZE);
 
3621
 
 
3622
                                dfield_set_data(field, buf, uf->orig_len);
 
3623
                                dfield_set_ext(field);
3305
3624
                        }
3306
3625
                }
3307
3626
        }
3315
3634
ulint
3316
3635
btr_blob_get_part_len(
3317
3636
/*==================*/
3318
 
                                /* out: part length */
3319
 
        byte*   blob_header)    /* in: blob header */
 
3637
                                        /* out: part length */
 
3638
        const byte*     blob_header)    /* in: blob header */
3320
3639
{
3321
3640
        return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
3322
3641
}
3327
3646
ulint
3328
3647
btr_blob_get_next_page_no(
3329
3648
/*======================*/
3330
 
                                /* out: page number or FIL_NULL if
3331
 
                                no more pages */
3332
 
        byte*   blob_header)    /* in: blob header */
 
3649
                                        /* out: page number or FIL_NULL if
 
3650
                                        no more pages */
 
3651
        const byte*     blob_header)    /* in: blob header */
3333
3652
{
3334
3653
        return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
3335
3654
}
3336
3655
 
3337
3656
/***********************************************************************
 
3657
Deallocate a buffer block that was reserved for a BLOB part. */
 
3658
static
 
3659
void
 
3660
btr_blob_free(
 
3661
/*==========*/
 
3662
        buf_block_t*    block,  /* in: buffer block */
 
3663
        ibool           all,    /* in: TRUE=remove also the compressed page
 
3664
                                if there is one */
 
3665
        mtr_t*          mtr)    /* in: mini-transaction to commit */
 
3666
{
 
3667
        ulint   space   = buf_block_get_space(block);
 
3668
        ulint   page_no = buf_block_get_page_no(block);
 
3669
 
 
3670
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
3671
 
 
3672
        mtr_commit(mtr);
 
3673
 
 
3674
        buf_pool_mutex_enter();
 
3675
        mutex_enter(&block->mutex);
 
3676
 
 
3677
        /* Only free the block if it is still allocated to
 
3678
        the same file page. */
 
3679
 
 
3680
        if (buf_block_get_state(block)
 
3681
            == BUF_BLOCK_FILE_PAGE
 
3682
            && buf_block_get_space(block) == space
 
3683
            && buf_block_get_page_no(block) == page_no) {
 
3684
 
 
3685
                if (buf_LRU_free_block(&block->page, all, NULL)
 
3686
                    != BUF_LRU_FREED
 
3687
                    && all && block->page.zip.data) {
 
3688
                        /* Attempt to deallocate the uncompressed page
 
3689
                        if the whole block cannot be deallocted. */
 
3690
 
 
3691
                        buf_LRU_free_block(&block->page, FALSE, NULL);
 
3692
                }
 
3693
        }
 
3694
 
 
3695
        buf_pool_mutex_exit();
 
3696
        mutex_exit(&block->mutex);
 
3697
}
 
3698
 
 
3699
/***********************************************************************
3338
3700
Stores the fields in big_rec_vec to the tablespace and puts pointers to
3339
 
them in rec. The fields are stored on pages allocated from leaf node
 
3701
them in rec.  The extern flags in rec will have to be set beforehand.
 
3702
The fields are stored on pages allocated from leaf node
3340
3703
file segment of the index tree. */
3341
 
 
 
3704
UNIV_INTERN
3342
3705
ulint
3343
3706
btr_store_big_rec_extern_fields(
3344
3707
/*============================*/
3345
3708
                                        /* out: DB_SUCCESS or error */
3346
3709
        dict_index_t*   index,          /* in: index of rec; the index tree
3347
3710
                                        MUST be X-latched */
3348
 
        rec_t*          rec,            /* in: record */
 
3711
        buf_block_t*    rec_block,      /* in/out: block containing rec */
 
3712
        rec_t*          rec,            /* in/out: record */
3349
3713
        const ulint*    offsets,        /* in: rec_get_offsets(rec, index);
3350
3714
                                        the "external storage" flags in offsets
3351
3715
                                        will not correspond to rec when
3356
3720
                                        containing the latch to rec and to the
3357
3721
                                        tree */
3358
3722
{
3359
 
        byte*   data;
3360
 
        ulint   local_len;
 
3723
        ulint   rec_page_no;
 
3724
        byte*   field_ref;
3361
3725
        ulint   extern_len;
3362
3726
        ulint   store_len;
3363
3727
        ulint   page_no;
3364
 
        page_t* page;
3365
3728
        ulint   space_id;
3366
 
        page_t* prev_page;
3367
 
        page_t* rec_page;
 
3729
        ulint   zip_size;
3368
3730
        ulint   prev_page_no;
3369
3731
        ulint   hint_page_no;
3370
3732
        ulint   i;
3371
3733
        mtr_t   mtr;
 
3734
        mem_heap_t* heap = NULL;
 
3735
        page_zip_des_t* page_zip;
 
3736
        z_stream c_stream;
3372
3737
 
3373
3738
        ut_ad(rec_offs_validate(rec, index, offsets));
3374
3739
        ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
3375
3740
                                MTR_MEMO_X_LOCK));
3376
 
        ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
3377
 
                                MTR_MEMO_PAGE_X_FIX));
3378
 
        ut_a(index->type & DICT_CLUSTERED);
3379
 
 
3380
 
        space_id = buf_frame_get_space_id(rec);
 
3741
        ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
 
3742
        ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
 
3743
        ut_a(dict_index_is_clust(index));
 
3744
 
 
3745
        page_zip = buf_block_get_page_zip(rec_block);
 
3746
        ut_a(dict_table_zip_size(index->table)
 
3747
             == buf_block_get_zip_size(rec_block));
 
3748
 
 
3749
        space_id = buf_block_get_space(rec_block);
 
3750
        zip_size = buf_block_get_zip_size(rec_block);
 
3751
        rec_page_no = buf_block_get_page_no(rec_block);
 
3752
        ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
 
3753
 
 
3754
        if (UNIV_LIKELY_NULL(page_zip)) {
 
3755
                int     err;
 
3756
 
 
3757
                /* Zlib deflate needs 128 kilobytes for the default
 
3758
                window size, plus 512 << memLevel, plus a few
 
3759
                kilobytes for small objects.  We use reduced memLevel
 
3760
                to limit the memory consumption, and preallocate the
 
3761
                heap, hoping to avoid memory fragmentation. */
 
3762
                heap = mem_heap_create(250000);
 
3763
                page_zip_set_alloc(&c_stream, heap);
 
3764
 
 
3765
                err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
 
3766
                                   Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
 
3767
                ut_a(err == Z_OK);
 
3768
        }
3381
3769
 
3382
3770
        /* We have to create a file segment to the tablespace
3383
3771
        for each field and put the pointer to the field in rec */
3384
3772
 
3385
3773
        for (i = 0; i < big_rec_vec->n_fields; i++) {
3386
 
 
3387
 
                data = rec_get_nth_field(rec, offsets,
3388
 
                                         big_rec_vec->fields[i].field_no,
3389
 
                                         &local_len);
3390
 
                ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3391
 
                local_len -= BTR_EXTERN_FIELD_REF_SIZE;
 
3774
                ut_ad(rec_offs_nth_extern(offsets,
 
3775
                                          big_rec_vec->fields[i].field_no));
 
3776
                {
 
3777
                        ulint   local_len;
 
3778
                        field_ref = rec_get_nth_field(
 
3779
                                rec, offsets, big_rec_vec->fields[i].field_no,
 
3780
                                &local_len);
 
3781
                        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
3782
                        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
 
3783
                        field_ref += local_len;
 
3784
                }
3392
3785
                extern_len = big_rec_vec->fields[i].len;
3393
3786
 
3394
3787
                ut_a(extern_len > 0);
3395
3788
 
3396
3789
                prev_page_no = FIL_NULL;
3397
3790
 
3398
 
                while (extern_len > 0) {
 
3791
                if (UNIV_LIKELY_NULL(page_zip)) {
 
3792
                        int     err = deflateReset(&c_stream);
 
3793
                        ut_a(err == Z_OK);
 
3794
 
 
3795
                        c_stream.next_in = (void*) big_rec_vec->fields[i].data;
 
3796
                        c_stream.avail_in = extern_len;
 
3797
                }
 
3798
 
 
3799
                for (;;) {
 
3800
                        buf_block_t*    block;
 
3801
                        page_t*         page;
 
3802
 
3399
3803
                        mtr_start(&mtr);
3400
3804
 
3401
3805
                        if (prev_page_no == FIL_NULL) {
3402
 
                                hint_page_no = buf_frame_get_page_no(rec) + 1;
 
3806
                                hint_page_no = 1 + rec_page_no;
3403
3807
                        } else {
3404
3808
                                hint_page_no = prev_page_no + 1;
3405
3809
                        }
3406
3810
 
3407
 
                        page = btr_page_alloc(index, hint_page_no,
3408
 
                                              FSP_NO_DIR, 0, &mtr);
3409
 
                        if (page == NULL) {
 
3811
                        block = btr_page_alloc(index, hint_page_no,
 
3812
                                               FSP_NO_DIR, 0, &mtr);
 
3813
                        if (UNIV_UNLIKELY(block == NULL)) {
3410
3814
 
3411
3815
                                mtr_commit(&mtr);
3412
3816
 
 
3817
                                if (UNIV_LIKELY_NULL(page_zip)) {
 
3818
                                        deflateEnd(&c_stream);
 
3819
                                        mem_heap_free(heap);
 
3820
                                }
 
3821
 
3413
3822
                                return(DB_OUT_OF_FILE_SPACE);
3414
3823
                        }
3415
3824
 
3416
 
                        mlog_write_ulint(page + FIL_PAGE_TYPE,
3417
 
                                         FIL_PAGE_TYPE_BLOB,
3418
 
                                         MLOG_2BYTES, &mtr);
3419
 
 
3420
 
                        page_no = buf_frame_get_page_no(page);
 
3825
                        page_no = buf_block_get_page_no(block);
 
3826
                        page = buf_block_get_frame(block);
3421
3827
 
3422
3828
                        if (prev_page_no != FIL_NULL) {
3423
 
                                prev_page = buf_page_get(space_id,
3424
 
                                                         prev_page_no,
 
3829
                                buf_block_t*    prev_block;
 
3830
                                page_t*         prev_page;
 
3831
 
 
3832
                                prev_block = buf_page_get(space_id, zip_size,
 
3833
                                                          prev_page_no,
 
3834
                                                          RW_X_LATCH, &mtr);
 
3835
#ifdef UNIV_SYNC_DEBUG
 
3836
                                buf_block_dbg_add_level(prev_block,
 
3837
                                                        SYNC_EXTERN_STORAGE);
 
3838
#endif /* UNIV_SYNC_DEBUG */
 
3839
                                prev_page = buf_block_get_frame(prev_block);
 
3840
 
 
3841
                                if (UNIV_LIKELY_NULL(page_zip)) {
 
3842
                                        mlog_write_ulint(
 
3843
                                                prev_page + FIL_PAGE_NEXT,
 
3844
                                                page_no, MLOG_4BYTES, &mtr);
 
3845
                                        memcpy(buf_block_get_page_zip(
 
3846
                                                       prev_block)
 
3847
                                               ->data + FIL_PAGE_NEXT,
 
3848
                                               prev_page + FIL_PAGE_NEXT, 4);
 
3849
                                } else {
 
3850
                                        mlog_write_ulint(
 
3851
                                                prev_page + FIL_PAGE_DATA
 
3852
                                                + BTR_BLOB_HDR_NEXT_PAGE_NO,
 
3853
                                                page_no, MLOG_4BYTES, &mtr);
 
3854
                                }
 
3855
 
 
3856
                        }
 
3857
 
 
3858
                        if (UNIV_LIKELY_NULL(page_zip)) {
 
3859
                                int             err;
 
3860
                                page_zip_des_t* blob_page_zip;
 
3861
 
 
3862
                                mach_write_to_2(page + FIL_PAGE_TYPE,
 
3863
                                                prev_page_no == FIL_NULL
 
3864
                                                ? FIL_PAGE_TYPE_ZBLOB
 
3865
                                                : FIL_PAGE_TYPE_ZBLOB2);
 
3866
 
 
3867
                                c_stream.next_out = page
 
3868
                                        + FIL_PAGE_DATA;
 
3869
                                c_stream.avail_out
 
3870
                                        = page_zip_get_size(page_zip)
 
3871
                                        - FIL_PAGE_DATA;
 
3872
 
 
3873
                                err = deflate(&c_stream, Z_FINISH);
 
3874
                                ut_a(err == Z_OK || err == Z_STREAM_END);
 
3875
                                ut_a(err == Z_STREAM_END
 
3876
                                     || c_stream.avail_out == 0);
 
3877
 
 
3878
                                /* Write the "next BLOB page" pointer */
 
3879
                                mlog_write_ulint(page + FIL_PAGE_NEXT,
 
3880
                                                 FIL_NULL, MLOG_4BYTES, &mtr);
 
3881
                                /* Initialize the unused "prev page" pointer */
 
3882
                                mlog_write_ulint(page + FIL_PAGE_PREV,
 
3883
                                                 FIL_NULL, MLOG_4BYTES, &mtr);
 
3884
                                /* Write a back pointer to the record
 
3885
                                into the otherwise unused area.  This
 
3886
                                information could be useful in
 
3887
                                debugging.  Later, we might want to
 
3888
                                implement the possibility to relocate
 
3889
                                BLOB pages.  Then, we would need to be
 
3890
                                able to adjust the BLOB pointer in the
 
3891
                                record.  We do not store the heap
 
3892
                                number of the record, because it can
 
3893
                                change in page_zip_reorganize() or
 
3894
                                btr_page_reorganize().  However, also
 
3895
                                the page number of the record may
 
3896
                                change when B-tree nodes are split or
 
3897
                                merged. */
 
3898
                                mlog_write_ulint(page
 
3899
                                                 + FIL_PAGE_FILE_FLUSH_LSN,
 
3900
                                                 space_id,
 
3901
                                                 MLOG_4BYTES, &mtr);
 
3902
                                mlog_write_ulint(page
 
3903
                                                 + FIL_PAGE_FILE_FLUSH_LSN + 4,
 
3904
                                                 rec_page_no,
 
3905
                                                 MLOG_4BYTES, &mtr);
 
3906
 
 
3907
                                /* Zero out the unused part of the page. */
 
3908
                                memset(page + page_zip_get_size(page_zip)
 
3909
                                       - c_stream.avail_out,
 
3910
                                       0, c_stream.avail_out);
 
3911
                                mlog_log_string(page + FIL_PAGE_TYPE,
 
3912
                                                page_zip_get_size(page_zip)
 
3913
                                                - FIL_PAGE_TYPE,
 
3914
                                                &mtr);
 
3915
                                /* Copy the page to compressed storage,
 
3916
                                because it will be flushed to disk
 
3917
                                from there. */
 
3918
                                blob_page_zip = buf_block_get_page_zip(block);
 
3919
                                ut_ad(blob_page_zip);
 
3920
                                ut_ad(page_zip_get_size(blob_page_zip)
 
3921
                                      == page_zip_get_size(page_zip));
 
3922
                                memcpy(blob_page_zip->data, page,
 
3923
                                       page_zip_get_size(page_zip));
 
3924
 
 
3925
                                if (err == Z_OK && prev_page_no != FIL_NULL) {
 
3926
 
 
3927
                                        goto next_zip_page;
 
3928
                                }
 
3929
 
 
3930
                                rec_block = buf_page_get(space_id, zip_size,
 
3931
                                                         rec_page_no,
3425
3932
                                                         RW_X_LATCH, &mtr);
3426
 
 
3427
3933
#ifdef UNIV_SYNC_DEBUG
3428
 
                                buf_page_dbg_add_level(prev_page,
3429
 
                                                       SYNC_EXTERN_STORAGE);
 
3934
                                buf_block_dbg_add_level(rec_block,
 
3935
                                                        SYNC_NO_ORDER_CHECK);
3430
3936
#endif /* UNIV_SYNC_DEBUG */
3431
 
 
3432
 
                                mlog_write_ulint(prev_page + FIL_PAGE_DATA
 
3937
                                if (err == Z_STREAM_END) {
 
3938
                                        mach_write_to_4(field_ref
 
3939
                                                        + BTR_EXTERN_LEN, 0);
 
3940
                                        mach_write_to_4(field_ref
 
3941
                                                        + BTR_EXTERN_LEN + 4,
 
3942
                                                        c_stream.total_in);
 
3943
                                } else {
 
3944
                                        memset(field_ref + BTR_EXTERN_LEN,
 
3945
                                               0, 8);
 
3946
                                }
 
3947
 
 
3948
                                if (prev_page_no == FIL_NULL) {
 
3949
                                        mach_write_to_4(field_ref
 
3950
                                                        + BTR_EXTERN_SPACE_ID,
 
3951
                                                        space_id);
 
3952
 
 
3953
                                        mach_write_to_4(field_ref
 
3954
                                                        + BTR_EXTERN_PAGE_NO,
 
3955
                                                        page_no);
 
3956
 
 
3957
                                        mach_write_to_4(field_ref
 
3958
                                                        + BTR_EXTERN_OFFSET,
 
3959
                                                        FIL_PAGE_NEXT);
 
3960
                                }
 
3961
 
 
3962
                                page_zip_write_blob_ptr(
 
3963
                                        page_zip, rec, index, offsets,
 
3964
                                        big_rec_vec->fields[i].field_no, &mtr);
 
3965
 
 
3966
next_zip_page:
 
3967
                                prev_page_no = page_no;
 
3968
 
 
3969
                                /* Commit mtr and release the
 
3970
                                uncompressed page frame to save memory. */
 
3971
                                btr_blob_free(block, FALSE, &mtr);
 
3972
 
 
3973
                                if (err == Z_STREAM_END) {
 
3974
                                        break;
 
3975
                                }
 
3976
                        } else {
 
3977
                                mlog_write_ulint(page + FIL_PAGE_TYPE,
 
3978
                                                 FIL_PAGE_TYPE_BLOB,
 
3979
                                                 MLOG_2BYTES, &mtr);
 
3980
 
 
3981
                                if (extern_len > (UNIV_PAGE_SIZE
 
3982
                                                  - FIL_PAGE_DATA
 
3983
                                                  - BTR_BLOB_HDR_SIZE
 
3984
                                                  - FIL_PAGE_DATA_END)) {
 
3985
                                        store_len = UNIV_PAGE_SIZE
 
3986
                                                - FIL_PAGE_DATA
 
3987
                                                - BTR_BLOB_HDR_SIZE
 
3988
                                                - FIL_PAGE_DATA_END;
 
3989
                                } else {
 
3990
                                        store_len = extern_len;
 
3991
                                }
 
3992
 
 
3993
                                mlog_write_string(page + FIL_PAGE_DATA
 
3994
                                                  + BTR_BLOB_HDR_SIZE,
 
3995
                                                  (const byte*)
 
3996
                                                  big_rec_vec->fields[i].data
 
3997
                                                  + big_rec_vec->fields[i].len
 
3998
                                                  - extern_len,
 
3999
                                                  store_len, &mtr);
 
4000
                                mlog_write_ulint(page + FIL_PAGE_DATA
 
4001
                                                 + BTR_BLOB_HDR_PART_LEN,
 
4002
                                                 store_len, MLOG_4BYTES, &mtr);
 
4003
                                mlog_write_ulint(page + FIL_PAGE_DATA
3433
4004
                                                 + BTR_BLOB_HDR_NEXT_PAGE_NO,
3434
 
                                                 page_no, MLOG_4BYTES, &mtr);
3435
 
                        }
3436
 
 
3437
 
                        if (extern_len > (UNIV_PAGE_SIZE - FIL_PAGE_DATA
3438
 
                                          - BTR_BLOB_HDR_SIZE
3439
 
                                          - FIL_PAGE_DATA_END)) {
3440
 
                                store_len = UNIV_PAGE_SIZE - FIL_PAGE_DATA
3441
 
                                        - BTR_BLOB_HDR_SIZE
3442
 
                                        - FIL_PAGE_DATA_END;
3443
 
                        } else {
3444
 
                                store_len = extern_len;
3445
 
                        }
3446
 
 
3447
 
                        mlog_write_string(page + FIL_PAGE_DATA
3448
 
                                          + BTR_BLOB_HDR_SIZE,
3449
 
                                          big_rec_vec->fields[i].data
3450
 
                                          + big_rec_vec->fields[i].len
3451
 
                                          - extern_len,
3452
 
                                          store_len, &mtr);
3453
 
                        mlog_write_ulint(page + FIL_PAGE_DATA
3454
 
                                         + BTR_BLOB_HDR_PART_LEN,
3455
 
                                         store_len, MLOG_4BYTES, &mtr);
3456
 
                        mlog_write_ulint(page + FIL_PAGE_DATA
3457
 
                                         + BTR_BLOB_HDR_NEXT_PAGE_NO,
3458
 
                                         FIL_NULL, MLOG_4BYTES, &mtr);
3459
 
 
3460
 
                        extern_len -= store_len;
3461
 
 
3462
 
                        rec_page = buf_page_get(space_id,
3463
 
                                                buf_frame_get_page_no(data),
3464
 
                                                RW_X_LATCH, &mtr);
 
4005
                                                 FIL_NULL, MLOG_4BYTES, &mtr);
 
4006
 
 
4007
                                extern_len -= store_len;
 
4008
 
 
4009
                                rec_block = buf_page_get(space_id, zip_size,
 
4010
                                                         rec_page_no,
 
4011
                                                         RW_X_LATCH, &mtr);
3465
4012
#ifdef UNIV_SYNC_DEBUG
3466
 
                        buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
 
4013
                                buf_block_dbg_add_level(rec_block,
 
4014
                                                        SYNC_NO_ORDER_CHECK);
3467
4015
#endif /* UNIV_SYNC_DEBUG */
3468
 
                        mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
3469
 
                                         MLOG_4BYTES, &mtr);
3470
 
                        mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
3471
 
                                         big_rec_vec->fields[i].len
3472
 
                                         - extern_len,
3473
 
                                         MLOG_4BYTES, &mtr);
3474
 
 
3475
 
                        if (prev_page_no == FIL_NULL) {
3476
 
                                mlog_write_ulint(data + local_len
3477
 
                                                 + BTR_EXTERN_SPACE_ID,
3478
 
                                                 space_id,
3479
 
                                                 MLOG_4BYTES, &mtr);
3480
 
 
3481
 
                                mlog_write_ulint(data + local_len
3482
 
                                                 + BTR_EXTERN_PAGE_NO,
3483
 
                                                 page_no,
3484
 
                                                 MLOG_4BYTES, &mtr);
3485
 
 
3486
 
                                mlog_write_ulint(data + local_len
3487
 
                                                 + BTR_EXTERN_OFFSET,
3488
 
                                                 FIL_PAGE_DATA,
3489
 
                                                 MLOG_4BYTES, &mtr);
3490
 
 
3491
 
                                /* Set the bit denoting that this field
3492
 
                                in rec is stored externally */
3493
 
 
3494
 
                                rec_set_nth_field_extern_bit(
3495
 
                                        rec, index,
3496
 
                                        big_rec_vec->fields[i].field_no,
3497
 
                                        TRUE, &mtr);
 
4016
 
 
4017
                                mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
 
4018
                                                 MLOG_4BYTES, &mtr);
 
4019
                                mlog_write_ulint(field_ref
 
4020
                                                 + BTR_EXTERN_LEN + 4,
 
4021
                                                 big_rec_vec->fields[i].len
 
4022
                                                 - extern_len,
 
4023
                                                 MLOG_4BYTES, &mtr);
 
4024
 
 
4025
                                if (prev_page_no == FIL_NULL) {
 
4026
                                        mlog_write_ulint(field_ref
 
4027
                                                         + BTR_EXTERN_SPACE_ID,
 
4028
                                                         space_id,
 
4029
                                                         MLOG_4BYTES, &mtr);
 
4030
 
 
4031
                                        mlog_write_ulint(field_ref
 
4032
                                                         + BTR_EXTERN_PAGE_NO,
 
4033
                                                         page_no,
 
4034
                                                         MLOG_4BYTES, &mtr);
 
4035
 
 
4036
                                        mlog_write_ulint(field_ref
 
4037
                                                         + BTR_EXTERN_OFFSET,
 
4038
                                                         FIL_PAGE_DATA,
 
4039
                                                         MLOG_4BYTES, &mtr);
 
4040
                                }
 
4041
 
 
4042
                                prev_page_no = page_no;
 
4043
 
 
4044
                                mtr_commit(&mtr);
 
4045
 
 
4046
                                if (extern_len == 0) {
 
4047
                                        break;
 
4048
                                }
3498
4049
                        }
3499
 
 
3500
 
                        prev_page_no = page_no;
3501
 
 
3502
 
                        mtr_commit(&mtr);
3503
4050
                }
3504
4051
        }
3505
4052
 
 
4053
        if (UNIV_LIKELY_NULL(page_zip)) {
 
4054
                deflateEnd(&c_stream);
 
4055
                mem_heap_free(heap);
 
4056
        }
 
4057
 
3506
4058
        return(DB_SUCCESS);
3507
4059
}
3508
4060
 
3509
4061
/***********************************************************************
3510
4062
Frees the space in an externally stored field to the file space
3511
 
management if the field in data is owned the externally stored field,
 
4063
management if the field in data is owned by the externally stored field,
3512
4064
in a rollback we may have the additional condition that the field must
3513
4065
not be inherited. */
3514
 
 
 
4066
UNIV_INTERN
3515
4067
void
3516
4068
btr_free_externally_stored_field(
3517
4069
/*=============================*/
3523
4075
                                        from purge where 'data' is located on
3524
4076
                                        an undo log page, not an index
3525
4077
                                        page) */
3526
 
        byte*           data,           /* in: internally stored data
3527
 
                                        + reference to the externally
3528
 
                                        stored part */
3529
 
        ulint           local_len,      /* in: length of data */
 
4078
        byte*           field_ref,      /* in/out: field reference */
 
4079
        const rec_t*    rec,            /* in: record containing field_ref, for
 
4080
                                        page_zip_write_blob_ptr(), or NULL */
 
4081
        const ulint*    offsets,        /* in: rec_get_offsets(rec, index),
 
4082
                                        or NULL */
 
4083
        page_zip_des_t* page_zip,       /* in: compressed page corresponding
 
4084
                                        to rec, or NULL if rec == NULL */
 
4085
        ulint           i,              /* in: field number of field_ref;
 
4086
                                        ignored if rec == NULL */
3530
4087
        ibool           do_not_free_inherited,/* in: TRUE if called in a
3531
4088
                                        rollback and we do not want to free
3532
4089
                                        inherited fields */
3534
4091
                                        containing the latch to data an an
3535
4092
                                        X-latch to the index tree */
3536
4093
{
3537
 
        page_t* page;
3538
 
        page_t* rec_page;
3539
 
        ulint   space_id;
3540
 
        ulint   page_no;
3541
 
        ulint   offset;
3542
 
        ulint   extern_len;
3543
 
        ulint   next_page_no;
3544
 
        ulint   part_len;
3545
 
        mtr_t   mtr;
3546
 
 
3547
 
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
4094
        page_t*         page;
 
4095
        ulint           space_id;
 
4096
        ulint           rec_zip_size = dict_table_zip_size(index->table);
 
4097
        ulint           ext_zip_size;
 
4098
        ulint           page_no;
 
4099
        ulint           next_page_no;
 
4100
        mtr_t           mtr;
 
4101
#ifdef UNIV_DEBUG
3548
4102
        ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
3549
4103
                                MTR_MEMO_X_LOCK));
3550
 
        ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
3551
 
                                MTR_MEMO_PAGE_X_FIX));
3552
 
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3553
 
        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
 
4104
        ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
 
4105
                                     MTR_MEMO_PAGE_X_FIX));
 
4106
        ut_ad(!rec || rec_offs_validate(rec, index, offsets));
 
4107
 
 
4108
        if (rec) {
 
4109
                ulint   local_len;
 
4110
                const byte*     f = rec_get_nth_field(rec, offsets,
 
4111
                                                      i, &local_len);
 
4112
                ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
4113
                local_len -= BTR_EXTERN_FIELD_REF_SIZE;
 
4114
                f += local_len;
 
4115
                ut_ad(f == field_ref);
 
4116
        }
 
4117
#endif /* UNIV_DEBUG */
 
4118
 
 
4119
        space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
 
4120
 
 
4121
        if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
 
4122
                ext_zip_size = fil_space_get_zip_size(space_id);
 
4123
                /* This must be an undo log record in the system tablespace,
 
4124
                that is, in row_purge_upd_exist_or_extern().
 
4125
                Currently, externally stored records are stored in the
 
4126
                same tablespace as the referring records. */
 
4127
                ut_ad(!page_get_space_id(page_align(field_ref)));
 
4128
                ut_ad(!rec);
 
4129
                ut_ad(!page_zip);
 
4130
        } else {
 
4131
                ext_zip_size = rec_zip_size;
 
4132
        }
 
4133
 
 
4134
        if (!rec) {
 
4135
                /* This is a call from row_purge_upd_exist_or_extern(). */
 
4136
                ut_ad(!page_zip);
 
4137
                rec_zip_size = 0;
 
4138
        }
3554
4139
 
3555
4140
        for (;;) {
 
4141
                buf_block_t*    rec_block;
 
4142
                buf_block_t*    ext_block;
 
4143
 
3556
4144
                mtr_start(&mtr);
3557
4145
 
3558
 
                rec_page = buf_page_get(buf_frame_get_space_id(data),
3559
 
                                        buf_frame_get_page_no(data),
3560
 
                                        RW_X_LATCH, &mtr);
3561
 
#ifdef UNIV_SYNC_DEBUG
3562
 
                buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
3563
 
#endif /* UNIV_SYNC_DEBUG */
3564
 
                space_id = mach_read_from_4(data + local_len
3565
 
                                            + BTR_EXTERN_SPACE_ID);
3566
 
 
3567
 
                page_no = mach_read_from_4(data + local_len
3568
 
                                           + BTR_EXTERN_PAGE_NO);
3569
 
 
3570
 
                offset = mach_read_from_4(data + local_len
3571
 
                                          + BTR_EXTERN_OFFSET);
3572
 
                extern_len = mach_read_from_4(data + local_len
3573
 
                                              + BTR_EXTERN_LEN + 4);
3574
 
 
3575
 
                /* If extern len is 0, then there is no external storage data
3576
 
                at all */
3577
 
 
3578
 
                if (extern_len == 0) {
3579
 
 
3580
 
                        mtr_commit(&mtr);
3581
 
 
3582
 
                        return;
3583
 
                }
3584
 
 
3585
 
                if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
3586
 
                    & BTR_EXTERN_OWNER_FLAG) {
3587
 
                        /* This field does not own the externally
3588
 
                        stored field: do not free! */
3589
 
 
3590
 
                        mtr_commit(&mtr);
3591
 
 
3592
 
                        return;
3593
 
                }
3594
 
 
3595
 
                if (do_not_free_inherited
3596
 
                    && mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
3597
 
                    & BTR_EXTERN_INHERITED_FLAG) {
3598
 
                        /* Rollback and inherited field: do not free! */
3599
 
 
3600
 
                        mtr_commit(&mtr);
3601
 
 
3602
 
                        return;
3603
 
                }
3604
 
 
3605
 
                page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr);
3606
 
#ifdef UNIV_SYNC_DEBUG
3607
 
                buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
3608
 
#endif /* UNIV_SYNC_DEBUG */
3609
 
                next_page_no = mach_read_from_4(page + FIL_PAGE_DATA
3610
 
                                                + BTR_BLOB_HDR_NEXT_PAGE_NO);
3611
 
 
3612
 
                part_len = btr_blob_get_part_len(page + FIL_PAGE_DATA);
3613
 
 
3614
 
                ut_a(extern_len >= part_len);
3615
 
 
3616
 
                /* We must supply the page level (= 0) as an argument
3617
 
                because we did not store it on the page (we save the space
3618
 
                overhead from an index page header. */
3619
 
 
3620
 
                btr_page_free_low(index, page, 0, &mtr);
3621
 
 
3622
 
                mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO,
3623
 
                                 next_page_no,
3624
 
                                 MLOG_4BYTES, &mtr);
3625
 
                mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
3626
 
                                 extern_len - part_len,
3627
 
                                 MLOG_4BYTES, &mtr);
3628
 
                if (next_page_no == FIL_NULL) {
3629
 
                        ut_a(extern_len - part_len == 0);
3630
 
                }
3631
 
 
3632
 
                if (extern_len - part_len == 0) {
3633
 
                        ut_a(next_page_no == FIL_NULL);
3634
 
                }
3635
 
 
3636
 
                mtr_commit(&mtr);
 
4146
                rec_block = buf_page_get(page_get_space_id(
 
4147
                                                 page_align(field_ref)),
 
4148
                                         rec_zip_size,
 
4149
                                         page_get_page_no(
 
4150
                                                 page_align(field_ref)),
 
4151
                                         RW_X_LATCH, &mtr);
 
4152
#ifdef UNIV_SYNC_DEBUG
 
4153
                buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
 
4154
#endif /* UNIV_SYNC_DEBUG */
 
4155
                page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
 
4156
 
 
4157
                if (/* There is no external storage data */
 
4158
                    page_no == FIL_NULL
 
4159
                    /* This field does not own the externally stored field */
 
4160
                    || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
 
4161
                        & BTR_EXTERN_OWNER_FLAG)
 
4162
                    /* Rollback and inherited field */
 
4163
                    || (do_not_free_inherited
 
4164
                        && (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
 
4165
                            & BTR_EXTERN_INHERITED_FLAG))) {
 
4166
 
 
4167
                        /* Do not free */
 
4168
                        mtr_commit(&mtr);
 
4169
 
 
4170
                        return;
 
4171
                }
 
4172
 
 
4173
                ext_block = buf_page_get(space_id, ext_zip_size, page_no,
 
4174
                                         RW_X_LATCH, &mtr);
 
4175
#ifdef UNIV_SYNC_DEBUG
 
4176
                buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
 
4177
#endif /* UNIV_SYNC_DEBUG */
 
4178
                page = buf_block_get_frame(ext_block);
 
4179
 
 
4180
                if (ext_zip_size) {
 
4181
                        /* Note that page_zip will be NULL
 
4182
                        in row_purge_upd_exist_or_extern(). */
 
4183
                        switch (fil_page_get_type(page)) {
 
4184
                        case FIL_PAGE_TYPE_ZBLOB:
 
4185
                        case FIL_PAGE_TYPE_ZBLOB2:
 
4186
                                break;
 
4187
                        default:
 
4188
                                ut_error;
 
4189
                        }
 
4190
                        next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
 
4191
 
 
4192
                        btr_page_free_low(index, ext_block, 0, &mtr);
 
4193
 
 
4194
                        if (UNIV_LIKELY(page_zip != NULL)) {
 
4195
                                mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
 
4196
                                                next_page_no);
 
4197
                                mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
 
4198
                                                0);
 
4199
                                page_zip_write_blob_ptr(page_zip, rec, index,
 
4200
                                                        offsets, i, &mtr);
 
4201
                        } else {
 
4202
                                mlog_write_ulint(field_ref
 
4203
                                                 + BTR_EXTERN_PAGE_NO,
 
4204
                                                 next_page_no,
 
4205
                                                 MLOG_4BYTES, &mtr);
 
4206
                                mlog_write_ulint(field_ref
 
4207
                                                 + BTR_EXTERN_LEN + 4, 0,
 
4208
                                                 MLOG_4BYTES, &mtr);
 
4209
                        }
 
4210
                } else {
 
4211
                        ulint   extern_len      = mach_read_from_4(
 
4212
                                field_ref + BTR_EXTERN_LEN + 4);
 
4213
                        ulint   part_len        = btr_blob_get_part_len(
 
4214
                                page + FIL_PAGE_DATA);
 
4215
 
 
4216
                        ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_BLOB);
 
4217
                        ut_a(!page_zip);
 
4218
                        ut_a(extern_len >= part_len);
 
4219
 
 
4220
                        next_page_no = mach_read_from_4(
 
4221
                                page + FIL_PAGE_DATA
 
4222
                                + BTR_BLOB_HDR_NEXT_PAGE_NO);
 
4223
 
 
4224
                        /* We must supply the page level (= 0) as an argument
 
4225
                        because we did not store it on the page (we save the
 
4226
                        space overhead from an index page header. */
 
4227
 
 
4228
                        ut_a(space_id == page_get_space_id(page));
 
4229
                        ut_a(page_no == page_get_page_no(page));
 
4230
 
 
4231
                        btr_page_free_low(index, ext_block, 0, &mtr);
 
4232
 
 
4233
                        mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
 
4234
                                         next_page_no,
 
4235
                                         MLOG_4BYTES, &mtr);
 
4236
                        mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
 
4237
                                         extern_len - part_len,
 
4238
                                         MLOG_4BYTES, &mtr);
 
4239
                        if (next_page_no == FIL_NULL) {
 
4240
                                ut_a(extern_len - part_len == 0);
 
4241
                        }
 
4242
 
 
4243
                        if (extern_len - part_len == 0) {
 
4244
                                ut_a(next_page_no == FIL_NULL);
 
4245
                        }
 
4246
                }
 
4247
 
 
4248
                /* Commit mtr and release the BLOB block to save memory. */
 
4249
                btr_blob_free(ext_block, TRUE, &mtr);
3637
4250
        }
3638
4251
}
3639
4252
 
3640
4253
/***************************************************************
3641
4254
Frees the externally stored fields for a record. */
3642
 
 
 
4255
static
3643
4256
void
3644
4257
btr_rec_free_externally_stored_fields(
3645
4258
/*==================================*/
3646
4259
        dict_index_t*   index,  /* in: index of the data, the index
3647
4260
                                tree MUST be X-latched */
3648
 
        rec_t*          rec,    /* in: record */
 
4261
        rec_t*          rec,    /* in/out: record */
3649
4262
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
 
4263
        page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
 
4264
                                part will be updated, or NULL */
3650
4265
        ibool           do_not_free_inherited,/* in: TRUE if called in a
3651
4266
                                rollback and we do not want to free
3652
4267
                                inherited fields */
3655
4270
                                tree */
3656
4271
{
3657
4272
        ulint   n_fields;
3658
 
        byte*   data;
3659
 
        ulint   len;
3660
4273
        ulint   i;
3661
4274
 
3662
4275
        ut_ad(rec_offs_validate(rec, index, offsets));
3663
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
3664
 
                                MTR_MEMO_PAGE_X_FIX));
 
4276
        ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
3665
4277
        /* Free possible externally stored fields in the record */
3666
4278
 
3667
4279
        ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
3669
4281
 
3670
4282
        for (i = 0; i < n_fields; i++) {
3671
4283
                if (rec_offs_nth_extern(offsets, i)) {
 
4284
                        ulint   len;
 
4285
                        byte*   data
 
4286
                                = rec_get_nth_field(rec, offsets, i, &len);
 
4287
                        ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
3672
4288
 
3673
 
                        data = rec_get_nth_field(rec, offsets, i, &len);
3674
 
                        btr_free_externally_stored_field(index, data, len,
3675
 
                                                         do_not_free_inherited,
3676
 
                                                         mtr);
 
4289
                        btr_free_externally_stored_field(
 
4290
                                index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
 
4291
                                rec, offsets, page_zip, i,
 
4292
                                do_not_free_inherited, mtr);
3677
4293
                }
3678
4294
        }
3679
4295
}
3687
4303
/*===============================*/
3688
4304
        dict_index_t*   index,  /* in: index of rec; the index tree MUST be
3689
4305
                                X-latched */
3690
 
        rec_t*          rec,    /* in: record */
 
4306
        rec_t*          rec,    /* in/out: record */
 
4307
        page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
 
4308
                                part will be updated, or NULL */
3691
4309
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
3692
 
        upd_t*          update, /* in: update vector */
3693
 
        ibool           do_not_free_inherited,/* in: TRUE if called in a
3694
 
                                rollback and we do not want to free
3695
 
                                inherited fields */
 
4310
        const upd_t*    update, /* in: update vector */
3696
4311
        mtr_t*          mtr)    /* in: mini-transaction handle which contains
3697
4312
                                an X-latch to record page and to the tree */
3698
4313
{
3699
 
        upd_field_t*    ufield;
3700
 
        ulint           n_fields;
3701
 
        byte*           data;
3702
 
        ulint           len;
3703
 
        ulint           i;
 
4314
        ulint   n_fields;
 
4315
        ulint   i;
3704
4316
 
3705
4317
        ut_ad(rec_offs_validate(rec, index, offsets));
3706
 
        ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
3707
 
                                MTR_MEMO_PAGE_X_FIX));
 
4318
        ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
3708
4319
 
3709
4320
        /* Free possible externally stored fields in the record */
3710
4321
 
3711
4322
        n_fields = upd_get_n_fields(update);
3712
4323
 
3713
4324
        for (i = 0; i < n_fields; i++) {
3714
 
                ufield = upd_get_nth_field(update, i);
 
4325
                const upd_field_t* ufield = upd_get_nth_field(update, i);
3715
4326
 
3716
4327
                if (rec_offs_nth_extern(offsets, ufield->field_no)) {
3717
 
 
3718
 
                        data = rec_get_nth_field(rec, offsets,
3719
 
                                                 ufield->field_no, &len);
3720
 
                        btr_free_externally_stored_field(index, data, len,
3721
 
                                                         do_not_free_inherited,
3722
 
                                                         mtr);
3723
 
                }
3724
 
        }
3725
 
}
3726
 
 
3727
 
/***********************************************************************
3728
 
Copies an externally stored field of a record to mem heap. Parameter
3729
 
data contains a pointer to 'internally' stored part of the field:
3730
 
possibly some data, and the reference to the externally stored part in
3731
 
the last 20 bytes of data. */
3732
 
 
 
4328
                        ulint   len;
 
4329
                        byte*   data = rec_get_nth_field(
 
4330
                                rec, offsets, ufield->field_no, &len);
 
4331
                        ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
 
4332
 
 
4333
                        btr_free_externally_stored_field(
 
4334
                                index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
 
4335
                                rec, offsets, page_zip,
 
4336
                                ufield->field_no, TRUE, mtr);
 
4337
                }
 
4338
        }
 
4339
}
 
4340
 
 
4341
/***********************************************************************
 
4342
Copies the prefix of an uncompressed BLOB.  The clustered index record
 
4343
that points to this BLOB must be protected by a lock or a page latch. */
 
4344
static
 
4345
ulint
 
4346
btr_copy_blob_prefix(
 
4347
/*=================*/
 
4348
                                /* out: number of bytes written to buf */
 
4349
        byte*           buf,    /* out: the externally stored part of
 
4350
                                the field, or a prefix of it */
 
4351
        ulint           len,    /* in: length of buf, in bytes */
 
4352
        ulint           space_id,/* in: space id of the BLOB pages */
 
4353
        ulint           page_no,/* in: page number of the first BLOB page */
 
4354
        ulint           offset) /* in: offset on the first BLOB page */
 
4355
{
 
4356
        ulint   copied_len      = 0;
 
4357
 
 
4358
        for (;;) {
 
4359
                mtr_t           mtr;
 
4360
                buf_block_t*    block;
 
4361
                const page_t*   page;
 
4362
                const byte*     blob_header;
 
4363
                ulint           part_len;
 
4364
                ulint           copy_len;
 
4365
 
 
4366
                mtr_start(&mtr);
 
4367
 
 
4368
                block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
 
4369
#ifdef UNIV_SYNC_DEBUG
 
4370
                buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
 
4371
#endif /* UNIV_SYNC_DEBUG */
 
4372
                page = buf_block_get_frame(block);
 
4373
 
 
4374
                /* Unfortunately, FIL_PAGE_TYPE was uninitialized for
 
4375
                many pages until MySQL/InnoDB 5.1.7. */
 
4376
                /* ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_BLOB); */
 
4377
                blob_header = page + offset;
 
4378
                part_len = btr_blob_get_part_len(blob_header);
 
4379
                copy_len = ut_min(part_len, len - copied_len);
 
4380
 
 
4381
                memcpy(buf + copied_len,
 
4382
                       blob_header + BTR_BLOB_HDR_SIZE, copy_len);
 
4383
                copied_len += copy_len;
 
4384
 
 
4385
                page_no = btr_blob_get_next_page_no(blob_header);
 
4386
 
 
4387
                mtr_commit(&mtr);
 
4388
 
 
4389
                if (page_no == FIL_NULL || copy_len != part_len) {
 
4390
                        return(copied_len);
 
4391
                }
 
4392
 
 
4393
                /* On other BLOB pages except the first the BLOB header
 
4394
                always is at the page data start: */
 
4395
 
 
4396
                offset = FIL_PAGE_DATA;
 
4397
 
 
4398
                ut_ad(copied_len <= len);
 
4399
        }
 
4400
}
 
4401
 
 
4402
/***********************************************************************
 
4403
Copies the prefix of a compressed BLOB.  The clustered index record
 
4404
that points to this BLOB must be protected by a lock or a page latch. */
 
4405
static
 
4406
void
 
4407
btr_copy_zblob_prefix(
 
4408
/*==================*/
 
4409
        z_stream*       d_stream,/* in/out: the decompressing stream */
 
4410
        ulint           zip_size,/* in: compressed BLOB page size */
 
4411
        ulint           space_id,/* in: space id of the BLOB pages */
 
4412
        ulint           page_no,/* in: page number of the first BLOB page */
 
4413
        ulint           offset) /* in: offset on the first BLOB page */
 
4414
{
 
4415
        ulint   page_type = FIL_PAGE_TYPE_ZBLOB;
 
4416
 
 
4417
        ut_ad(ut_is_2pow(zip_size));
 
4418
        ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
 
4419
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
 
4420
        ut_ad(space_id);
 
4421
 
 
4422
        for (;;) {
 
4423
                buf_page_t*     bpage;
 
4424
                int             err;
 
4425
                ulint           next_page_no;
 
4426
 
 
4427
                /* There is no latch on bpage directly.  Instead,
 
4428
                bpage is protected by the B-tree page latch that
 
4429
                is being held on the clustered index record, or,
 
4430
                in row_merge_copy_blobs(), by an exclusive table lock. */
 
4431
                bpage = buf_page_get_zip(space_id, zip_size, page_no);
 
4432
 
 
4433
                if (UNIV_UNLIKELY(!bpage)) {
 
4434
                        ut_print_timestamp(stderr);
 
4435
                        fprintf(stderr,
 
4436
                                "  InnoDB: Cannot load"
 
4437
                                " compressed BLOB"
 
4438
                                " page %lu space %lu\n",
 
4439
                                (ulong) page_no, (ulong) space_id);
 
4440
                        return;
 
4441
                }
 
4442
 
 
4443
                if (UNIV_UNLIKELY
 
4444
                    (fil_page_get_type(bpage->zip.data) != page_type)) {
 
4445
                        ut_print_timestamp(stderr);
 
4446
                        fprintf(stderr,
 
4447
                                "  InnoDB: Unexpected type %lu of"
 
4448
                                " compressed BLOB"
 
4449
                                " page %lu space %lu\n",
 
4450
                                (ulong) fil_page_get_type(bpage->zip.data),
 
4451
                                (ulong) page_no, (ulong) space_id);
 
4452
                        goto end_of_blob;
 
4453
                }
 
4454
 
 
4455
                next_page_no = mach_read_from_4(bpage->zip.data + offset);
 
4456
 
 
4457
                if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
 
4458
                        /* When the BLOB begins at page header,
 
4459
                        the compressed data payload does not
 
4460
                        immediately follow the next page pointer. */
 
4461
                        offset = FIL_PAGE_DATA;
 
4462
                } else {
 
4463
                        offset += 4;
 
4464
                }
 
4465
 
 
4466
                d_stream->next_in = bpage->zip.data + offset;
 
4467
                d_stream->avail_in = zip_size - offset;
 
4468
 
 
4469
                err = inflate(d_stream, Z_NO_FLUSH);
 
4470
                switch (err) {
 
4471
                case Z_OK:
 
4472
                        if (!d_stream->avail_out) {
 
4473
                                goto end_of_blob;
 
4474
                        }
 
4475
                        break;
 
4476
                case Z_STREAM_END:
 
4477
                        if (next_page_no == FIL_NULL) {
 
4478
                                goto end_of_blob;
 
4479
                        }
 
4480
                        /* fall through */
 
4481
                default:
 
4482
inflate_error:
 
4483
                        ut_print_timestamp(stderr);
 
4484
                        fprintf(stderr,
 
4485
                                "  InnoDB: inflate() of"
 
4486
                                " compressed BLOB"
 
4487
                                " page %lu space %lu returned %d (%s)\n",
 
4488
                                (ulong) page_no, (ulong) space_id,
 
4489
                                err, d_stream->msg);
 
4490
                case Z_BUF_ERROR:
 
4491
                        goto end_of_blob;
 
4492
                }
 
4493
 
 
4494
                if (next_page_no == FIL_NULL) {
 
4495
                        if (!d_stream->avail_in) {
 
4496
                                ut_print_timestamp(stderr);
 
4497
                                fprintf(stderr,
 
4498
                                        "  InnoDB: unexpected end of"
 
4499
                                        " compressed BLOB"
 
4500
                                        " page %lu space %lu\n",
 
4501
                                        (ulong) page_no,
 
4502
                                        (ulong) space_id);
 
4503
                        } else {
 
4504
                                err = inflate(d_stream, Z_FINISH);
 
4505
                                switch (err) {
 
4506
                                case Z_STREAM_END:
 
4507
                                case Z_BUF_ERROR:
 
4508
                                        break;
 
4509
                                default:
 
4510
                                        goto inflate_error;
 
4511
                                }
 
4512
                        }
 
4513
 
 
4514
end_of_blob:
 
4515
                        buf_page_release_zip(bpage);
 
4516
                        return;
 
4517
                }
 
4518
 
 
4519
                buf_page_release_zip(bpage);
 
4520
 
 
4521
                /* On other BLOB pages except the first
 
4522
                the BLOB header always is at the page header: */
 
4523
 
 
4524
                page_no = next_page_no;
 
4525
                offset = FIL_PAGE_NEXT;
 
4526
                page_type = FIL_PAGE_TYPE_ZBLOB2;
 
4527
        }
 
4528
}
 
4529
 
 
4530
/***********************************************************************
 
4531
Copies the prefix of an externally stored field of a record.  The
 
4532
clustered index record that points to this BLOB must be protected by a
 
4533
lock or a page latch. */
 
4534
static
 
4535
ulint
 
4536
btr_copy_externally_stored_field_prefix_low(
 
4537
/*========================================*/
 
4538
                                /* out: number of bytes written to buf */
 
4539
        byte*           buf,    /* out: the externally stored part of
 
4540
                                the field, or a prefix of it */
 
4541
        ulint           len,    /* in: length of buf, in bytes */
 
4542
        ulint           zip_size,/* in: nonzero=compressed BLOB page size,
 
4543
                                zero for uncompressed BLOBs */
 
4544
        ulint           space_id,/* in: space id of the first BLOB page */
 
4545
        ulint           page_no,/* in: page number of the first BLOB page */
 
4546
        ulint           offset) /* in: offset on the first BLOB page */
 
4547
{
 
4548
        if (UNIV_UNLIKELY(len == 0)) {
 
4549
                return(0);
 
4550
        }
 
4551
 
 
4552
        if (UNIV_UNLIKELY(zip_size)) {
 
4553
                int             err;
 
4554
                z_stream        d_stream;
 
4555
                mem_heap_t*     heap;
 
4556
 
 
4557
                /* Zlib inflate needs 32 kilobytes for the default
 
4558
                window size, plus a few kilobytes for small objects. */
 
4559
                heap = mem_heap_create(40000);
 
4560
                page_zip_set_alloc(&d_stream, heap);
 
4561
 
 
4562
                err = inflateInit(&d_stream);
 
4563
                ut_a(err == Z_OK);
 
4564
 
 
4565
                d_stream.next_out = buf;
 
4566
                d_stream.avail_out = len;
 
4567
                d_stream.avail_in = 0;
 
4568
 
 
4569
                btr_copy_zblob_prefix(&d_stream, zip_size,
 
4570
                                      space_id, page_no, offset);
 
4571
                inflateEnd(&d_stream);
 
4572
                mem_heap_free(heap);
 
4573
                return(d_stream.total_out);
 
4574
        } else {
 
4575
                return(btr_copy_blob_prefix(buf, len, space_id,
 
4576
                                            page_no, offset));
 
4577
        }
 
4578
}
 
4579
 
 
4580
/***********************************************************************
 
4581
Copies the prefix of an externally stored field of a record.  The
 
4582
clustered index record must be protected by a lock or a page latch. */
 
4583
UNIV_INTERN
 
4584
ulint
 
4585
btr_copy_externally_stored_field_prefix(
 
4586
/*====================================*/
 
4587
                                /* out: the length of the copied field */
 
4588
        byte*           buf,    /* out: the field, or a prefix of it */
 
4589
        ulint           len,    /* in: length of buf, in bytes */
 
4590
        ulint           zip_size,/* in: nonzero=compressed BLOB page size,
 
4591
                                zero for uncompressed BLOBs */
 
4592
        const byte*     data,   /* in: 'internally' stored part of the
 
4593
                                field containing also the reference to
 
4594
                                the external part; must be protected by
 
4595
                                a lock or a page latch */
 
4596
        ulint           local_len)/* in: length of data, in bytes */
 
4597
{
 
4598
        ulint   space_id;
 
4599
        ulint   page_no;
 
4600
        ulint   offset;
 
4601
 
 
4602
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
4603
 
 
4604
        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
 
4605
 
 
4606
        if (UNIV_UNLIKELY(local_len >= len)) {
 
4607
                memcpy(buf, data, len);
 
4608
                return(len);
 
4609
        }
 
4610
 
 
4611
        memcpy(buf, data, local_len);
 
4612
        data += local_len;
 
4613
 
 
4614
        ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
 
4615
 
 
4616
        space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
 
4617
 
 
4618
        page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
 
4619
 
 
4620
        offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
 
4621
 
 
4622
        return(local_len
 
4623
               + btr_copy_externally_stored_field_prefix_low(buf + local_len,
 
4624
                                                             len - local_len,
 
4625
                                                             zip_size,
 
4626
                                                             space_id, page_no,
 
4627
                                                             offset));
 
4628
}
 
4629
 
 
4630
/***********************************************************************
 
4631
Copies an externally stored field of a record to mem heap.  The
 
4632
clustered index record must be protected by a lock or a page latch. */
 
4633
static
3733
4634
byte*
3734
4635
btr_copy_externally_stored_field(
3735
4636
/*=============================*/
3736
4637
                                /* out: the whole field copied to heap */
3737
4638
        ulint*          len,    /* out: length of the whole field */
3738
 
        byte*           data,   /* in: 'internally' stored part of the
 
4639
        const byte*     data,   /* in: 'internally' stored part of the
3739
4640
                                field containing also the reference to
3740
 
                                the external part */
 
4641
                                the external part; must be protected by
 
4642
                                a lock or a page latch */
 
4643
        ulint           zip_size,/* in: nonzero=compressed BLOB page size,
 
4644
                                zero for uncompressed BLOBs */
3741
4645
        ulint           local_len,/* in: length of data */
3742
4646
        mem_heap_t*     heap)   /* in: mem heap */
3743
4647
{
3744
 
        page_t* page;
3745
4648
        ulint   space_id;
3746
4649
        ulint   page_no;
3747
4650
        ulint   offset;
3748
4651
        ulint   extern_len;
3749
 
        byte*   blob_header;
3750
 
        ulint   part_len;
3751
4652
        byte*   buf;
3752
 
        ulint   copied_len;
3753
 
        mtr_t   mtr;
3754
4653
 
3755
4654
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3756
4655
 
3762
4661
 
3763
4662
        offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
3764
4663
 
3765
 
        /* Currently a BLOB cannot be bigger that 4 GB; we
 
4664
        /* Currently a BLOB cannot be bigger than 4 GB; we
3766
4665
        leave the 4 upper bytes in the length field unused */
3767
4666
 
3768
4667
        extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
3769
4668
 
3770
4669
        buf = mem_heap_alloc(heap, local_len + extern_len);
3771
4670
 
3772
 
        ut_memcpy(buf, data, local_len);
3773
 
        copied_len = local_len;
3774
 
 
3775
 
        if (extern_len == 0) {
3776
 
                *len = copied_len;
3777
 
 
3778
 
                return(buf);
3779
 
        }
3780
 
 
3781
 
        for (;;) {
3782
 
                mtr_start(&mtr);
3783
 
 
3784
 
                page = buf_page_get(space_id, page_no, RW_S_LATCH, &mtr);
3785
 
#ifdef UNIV_SYNC_DEBUG
3786
 
                buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
3787
 
#endif /* UNIV_SYNC_DEBUG */
3788
 
                blob_header = page + offset;
3789
 
 
3790
 
                part_len = btr_blob_get_part_len(blob_header);
3791
 
 
3792
 
                ut_memcpy(buf + copied_len, blob_header + BTR_BLOB_HDR_SIZE,
3793
 
                          part_len);
3794
 
                copied_len += part_len;
3795
 
 
3796
 
                page_no = btr_blob_get_next_page_no(blob_header);
3797
 
 
3798
 
                mtr_commit(&mtr);
3799
 
 
3800
 
                if (page_no == FIL_NULL) {
3801
 
                        ut_a(copied_len == local_len + extern_len);
3802
 
 
3803
 
                        *len = copied_len;
3804
 
 
3805
 
                        return(buf);
3806
 
                }
3807
 
 
3808
 
                /* On other BLOB pages except the first the BLOB header
3809
 
                always is at the page data start: */
3810
 
 
3811
 
                offset = FIL_PAGE_DATA;
3812
 
 
3813
 
                ut_a(copied_len < local_len + extern_len);
3814
 
        }
 
4671
        memcpy(buf, data, local_len);
 
4672
        *len = local_len
 
4673
                + btr_copy_externally_stored_field_prefix_low(buf + local_len,
 
4674
                                                              extern_len,
 
4675
                                                              zip_size,
 
4676
                                                              space_id,
 
4677
                                                              page_no, offset);
 
4678
 
 
4679
        return(buf);
3815
4680
}
3816
4681
 
3817
4682
/***********************************************************************
3818
4683
Copies an externally stored field of a record to mem heap. */
3819
 
 
 
4684
UNIV_INTERN
3820
4685
byte*
3821
4686
btr_rec_copy_externally_stored_field(
3822
4687
/*=================================*/
3823
4688
                                /* out: the field copied to heap */
3824
 
        rec_t*          rec,    /* in: record */
 
4689
        const rec_t*    rec,    /* in: record in a clustered index;
 
4690
                                must be protected by a lock or a page latch */
3825
4691
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
 
4692
        ulint           zip_size,/* in: nonzero=compressed BLOB page size,
 
4693
                                zero for uncompressed BLOBs */
3826
4694
        ulint           no,     /* in: field number */
3827
4695
        ulint*          len,    /* out: length of the field */
3828
4696
        mem_heap_t*     heap)   /* in: mem heap */
3829
4697
{
3830
 
        ulint   local_len;
3831
 
        byte*   data;
 
4698
        ulint           local_len;
 
4699
        const byte*     data;
3832
4700
 
3833
 
        ut_ad(rec_offs_validate(rec, NULL, offsets));
3834
4701
        ut_a(rec_offs_nth_extern(offsets, no));
3835
4702
 
3836
4703
        /* An externally stored field can contain some initial
3844
4711
 
3845
4712
        data = rec_get_nth_field(rec, offsets, no, &local_len);
3846
4713
 
3847
 
        return(btr_copy_externally_stored_field(len, data, local_len, heap));
 
4714
        return(btr_copy_externally_stored_field(len, data,
 
4715
                                                zip_size, local_len, heap));
3848
4716
}