~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/btr/btr0cur.c

  • Committer: Monty Taylor
  • Date: 2008-09-15 17:24:04 UTC
  • Revision ID: monty@inaugust.com-20080915172404-ygh6hiyu0q7qpa9x
Removed strndup calls.

Show diffs side-by-side

added added

removed removed

Lines of Context:
24
24
#endif
25
25
 
26
26
#include "page0page.h"
27
 
#include "page0zip.h"
28
27
#include "rem0rec.h"
29
28
#include "rem0cmp.h"
30
 
#include "buf0lru.h"
31
29
#include "btr0btr.h"
32
30
#include "btr0sea.h"
33
31
#include "row0upd.h"
37
35
#include "srv0srv.h"
38
36
#include "ibuf0ibuf.h"
39
37
#include "lock0lock.h"
40
 
#include "zlib.h"
41
38
 
42
39
#ifdef UNIV_DEBUG
43
40
/* If the following is set to TRUE, this module prints a lot of
44
41
trace information of individual record operations */
45
 
UNIV_INTERN ibool       btr_cur_print_record_ops = FALSE;
 
42
ibool   btr_cur_print_record_ops = FALSE;
46
43
#endif /* UNIV_DEBUG */
47
44
 
48
 
UNIV_INTERN ulint       btr_cur_n_non_sea       = 0;
49
 
UNIV_INTERN ulint       btr_cur_n_sea           = 0;
50
 
UNIV_INTERN ulint       btr_cur_n_non_sea_old   = 0;
51
 
UNIV_INTERN ulint       btr_cur_n_sea_old       = 0;
 
45
ulint   btr_cur_n_non_sea       = 0;
 
46
ulint   btr_cur_n_sea           = 0;
 
47
ulint   btr_cur_n_non_sea_old   = 0;
 
48
ulint   btr_cur_n_sea_old       = 0;
52
49
 
53
50
/* In the optimistic insert, if the insert does not fit, but this much space
54
51
can be released by page reorganize, then it is reorganized */
55
52
 
56
53
#define BTR_CUR_PAGE_REORGANIZE_LIMIT   (UNIV_PAGE_SIZE / 32)
57
54
 
58
 
/* When estimating number of different key values in an index, sample
 
55
/* When estimating number of different kay values in an index sample
59
56
this many index pages */
60
57
#define BTR_KEY_VAL_ESTIMATE_N_PAGES    8
61
58
 
68
65
/*--------------------------------------*/
69
66
#define BTR_BLOB_HDR_SIZE               8
70
67
 
71
 
/* A BLOB field reference full of zero, for use in assertions and tests.
72
 
Initially, BLOB field references are set to zero, in
73
 
dtuple_convert_big_rec(). */
74
 
UNIV_INTERN const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
75
 
 
76
68
/***********************************************************************
77
69
Marks all extern fields in a record as owned by the record. This function
78
70
should be called if the delete mark of a record is removed: a not delete
81
73
void
82
74
btr_cur_unmark_extern_fields(
83
75
/*=========================*/
84
 
        page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
85
 
                                part will be updated, or NULL */
86
 
        rec_t*          rec,    /* in/out: record in a clustered index */
87
 
        dict_index_t*   index,  /* in: index of the page */
88
 
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
89
 
        mtr_t*          mtr);   /* in: mtr, or NULL if not logged */
 
76
        rec_t*          rec,    /* in: record in a clustered index */
 
77
        mtr_t*          mtr,    /* in: mtr */
 
78
        const ulint*    offsets);/* in: array returned by rec_get_offsets() */
90
79
/***********************************************************************
91
80
Adds path information to the cursor for the current page, for which
92
81
the binary search has been performed. */
108
97
        dict_index_t*   index,  /* in: index of rec; the index tree MUST be
109
98
                                X-latched */
110
99
        rec_t*          rec,    /* in: record */
111
 
        page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
112
 
                                part will be updated, or NULL */
113
 
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
114
 
        const upd_t*    update, /* in: update vector */
115
 
        mtr_t*          mtr);   /* in: mini-transaction handle which contains
116
 
                                an X-latch to record page and to the tree */
117
 
/***************************************************************
118
 
Frees the externally stored fields for a record. */
119
 
static
120
 
void
121
 
btr_rec_free_externally_stored_fields(
122
 
/*==================================*/
123
 
        dict_index_t*   index,  /* in: index of the data, the index
124
 
                                tree MUST be X-latched */
125
 
        rec_t*          rec,    /* in: record */
126
 
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
127
 
        page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
128
 
                                part will be updated, or NULL */
 
100
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
 
101
        upd_t*          update, /* in: update vector */
129
102
        ibool           do_not_free_inherited,/* in: TRUE if called in a
130
103
                                rollback and we do not want to free
131
104
                                inherited fields */
132
105
        mtr_t*          mtr);   /* in: mini-transaction handle which contains
133
 
                                an X-latch to record page and to the index
134
 
                                tree */
 
106
                                an X-latch to record page and to the tree */
135
107
/***************************************************************
136
108
Gets the externally stored size of a record, in units of a database page. */
137
109
static
143
115
        rec_t*          rec,    /* in: record */
144
116
        const ulint*    offsets);/* in: array returned by rec_get_offsets() */
145
117
 
146
 
/**********************************************************
147
 
The following function is used to set the deleted bit of a record. */
148
 
UNIV_INLINE
149
 
void
150
 
btr_rec_set_deleted_flag(
151
 
/*=====================*/
152
 
                                /* out: TRUE on success;
153
 
                                FALSE on page_zip overflow */
154
 
        rec_t*          rec,    /* in/out: physical record */
155
 
        page_zip_des_t* page_zip,/* in/out: compressed page (or NULL) */
156
 
        ulint           flag)   /* in: nonzero if delete marked */
157
 
{
158
 
        if (page_rec_is_comp(rec)) {
159
 
                rec_set_deleted_flag_new(rec, page_zip, flag);
160
 
        } else {
161
 
                ut_ad(!page_zip);
162
 
                rec_set_deleted_flag_old(rec, flag);
163
 
        }
164
 
}
165
 
 
166
118
/*==================== B-TREE SEARCH =========================*/
167
119
 
168
120
/************************************************************************
174
126
        page_t*         page,           /* in: leaf page where the search
175
127
                                        converged */
176
128
        ulint           space,          /* in: space id */
177
 
        ulint           zip_size,       /* in: compressed page size in bytes
178
 
                                        or 0 for uncompressed pages */
179
129
        ulint           page_no,        /* in: page number of the leaf */
180
130
        ulint           latch_mode,     /* in: BTR_SEARCH_LEAF, ... */
181
131
        btr_cur_t*      cursor,         /* in: cursor */
182
132
        mtr_t*          mtr)            /* in: mtr */
183
133
{
184
 
        ulint           mode;
185
 
        ulint           left_page_no;
186
 
        ulint           right_page_no;
187
 
        buf_block_t*    get_block;
 
134
        ulint   left_page_no;
 
135
        ulint   right_page_no;
 
136
        page_t* get_page;
188
137
 
189
138
        ut_ad(page && mtr);
190
139
 
191
 
        switch (latch_mode) {
192
 
        case BTR_SEARCH_LEAF:
193
 
        case BTR_MODIFY_LEAF:
194
 
                mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
195
 
                get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
196
 
#ifdef UNIV_BTR_DEBUG
197
 
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
198
 
#endif /* UNIV_BTR_DEBUG */
199
 
                get_block->check_index_page_at_flush = TRUE;
200
 
                return;
201
 
        case BTR_MODIFY_TREE:
 
140
        if (latch_mode == BTR_SEARCH_LEAF) {
 
141
 
 
142
                get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
 
143
                ut_a(page_is_comp(get_page) == page_is_comp(page));
 
144
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
 
145
 
 
146
        } else if (latch_mode == BTR_MODIFY_LEAF) {
 
147
 
 
148
                get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
 
149
                ut_a(page_is_comp(get_page) == page_is_comp(page));
 
150
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
 
151
 
 
152
        } else if (latch_mode == BTR_MODIFY_TREE) {
 
153
 
202
154
                /* x-latch also brothers from left to right */
203
155
                left_page_no = btr_page_get_prev(page, mtr);
204
156
 
205
157
                if (left_page_no != FIL_NULL) {
206
 
                        get_block = btr_block_get(space, zip_size,
207
 
                                                  left_page_no,
208
 
                                                  RW_X_LATCH, mtr);
 
158
                        get_page = btr_page_get(space, left_page_no,
 
159
                                                RW_X_LATCH, mtr);
209
160
#ifdef UNIV_BTR_DEBUG
210
 
                        ut_a(page_is_comp(get_block->frame)
211
 
                             == page_is_comp(page));
212
 
                        ut_a(btr_page_get_next(get_block->frame, mtr)
213
 
                             == page_get_page_no(page));
 
161
                        ut_a(btr_page_get_next(get_page, mtr)
 
162
                             == buf_frame_get_page_no(page));
214
163
#endif /* UNIV_BTR_DEBUG */
215
 
                        get_block->check_index_page_at_flush = TRUE;
 
164
                        ut_a(page_is_comp(get_page) == page_is_comp(page));
 
165
                        buf_block_align(get_page)->check_index_page_at_flush
 
166
                                = TRUE;
216
167
                }
217
168
 
218
 
                get_block = btr_block_get(space, zip_size, page_no,
219
 
                                          RW_X_LATCH, mtr);
220
 
#ifdef UNIV_BTR_DEBUG
221
 
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
222
 
#endif /* UNIV_BTR_DEBUG */
223
 
                get_block->check_index_page_at_flush = TRUE;
 
169
                get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
 
170
                ut_a(page_is_comp(get_page) == page_is_comp(page));
 
171
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
224
172
 
225
173
                right_page_no = btr_page_get_next(page, mtr);
226
174
 
227
175
                if (right_page_no != FIL_NULL) {
228
 
                        get_block = btr_block_get(space, zip_size,
229
 
                                                  right_page_no,
230
 
                                                  RW_X_LATCH, mtr);
231
 
#ifdef UNIV_BTR_DEBUG
232
 
                        ut_a(page_is_comp(get_block->frame)
233
 
                             == page_is_comp(page));
234
 
                        ut_a(btr_page_get_prev(get_block->frame, mtr)
235
 
                             == page_get_page_no(page));
236
 
#endif /* UNIV_BTR_DEBUG */
237
 
                        get_block->check_index_page_at_flush = TRUE;
238
 
                }
239
 
 
240
 
                return;
241
 
 
242
 
        case BTR_SEARCH_PREV:
243
 
        case BTR_MODIFY_PREV:
244
 
                mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
245
 
                /* latch also left brother */
246
 
                left_page_no = btr_page_get_prev(page, mtr);
247
 
 
248
 
                if (left_page_no != FIL_NULL) {
249
 
                        get_block = btr_block_get(space, zip_size,
250
 
                                                  left_page_no, mode, mtr);
251
 
                        cursor->left_block = get_block;
252
 
#ifdef UNIV_BTR_DEBUG
253
 
                        ut_a(page_is_comp(get_block->frame)
254
 
                             == page_is_comp(page));
255
 
                        ut_a(btr_page_get_next(get_block->frame, mtr)
256
 
                             == page_get_page_no(page));
257
 
#endif /* UNIV_BTR_DEBUG */
258
 
                        get_block->check_index_page_at_flush = TRUE;
259
 
                }
260
 
 
261
 
                get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
262
 
#ifdef UNIV_BTR_DEBUG
263
 
                ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
264
 
#endif /* UNIV_BTR_DEBUG */
265
 
                get_block->check_index_page_at_flush = TRUE;
266
 
                return;
 
176
                        get_page = btr_page_get(space, right_page_no,
 
177
                                                RW_X_LATCH, mtr);
 
178
#ifdef UNIV_BTR_DEBUG
 
179
                        ut_a(btr_page_get_prev(get_page, mtr)
 
180
                             == buf_frame_get_page_no(page));
 
181
#endif /* UNIV_BTR_DEBUG */
 
182
                        buf_block_align(get_page)->check_index_page_at_flush
 
183
                                = TRUE;
 
184
                }
 
185
 
 
186
        } else if (latch_mode == BTR_SEARCH_PREV) {
 
187
 
 
188
                /* s-latch also left brother */
 
189
                left_page_no = btr_page_get_prev(page, mtr);
 
190
 
 
191
                if (left_page_no != FIL_NULL) {
 
192
                        cursor->left_page = btr_page_get(space, left_page_no,
 
193
                                                         RW_S_LATCH, mtr);
 
194
#ifdef UNIV_BTR_DEBUG
 
195
                        ut_a(btr_page_get_next(cursor->left_page, mtr)
 
196
                             == buf_frame_get_page_no(page));
 
197
#endif /* UNIV_BTR_DEBUG */
 
198
                        ut_a(page_is_comp(cursor->left_page)
 
199
                             == page_is_comp(page));
 
200
                        buf_block_align(cursor->left_page)
 
201
                                ->check_index_page_at_flush = TRUE;
 
202
                }
 
203
 
 
204
                get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr);
 
205
                ut_a(page_is_comp(get_page) == page_is_comp(page));
 
206
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
 
207
 
 
208
        } else if (latch_mode == BTR_MODIFY_PREV) {
 
209
 
 
210
                /* x-latch also left brother */
 
211
                left_page_no = btr_page_get_prev(page, mtr);
 
212
 
 
213
                if (left_page_no != FIL_NULL) {
 
214
                        cursor->left_page = btr_page_get(space, left_page_no,
 
215
                                                         RW_X_LATCH, mtr);
 
216
#ifdef UNIV_BTR_DEBUG
 
217
                        ut_a(btr_page_get_next(cursor->left_page, mtr)
 
218
                             == buf_frame_get_page_no(page));
 
219
#endif /* UNIV_BTR_DEBUG */
 
220
                        ut_a(page_is_comp(cursor->left_page)
 
221
                             == page_is_comp(page));
 
222
                        buf_block_align(cursor->left_page)
 
223
                                ->check_index_page_at_flush = TRUE;
 
224
                }
 
225
 
 
226
                get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr);
 
227
                ut_a(page_is_comp(get_page) == page_is_comp(page));
 
228
                buf_block_align(get_page)->check_index_page_at_flush = TRUE;
 
229
        } else {
 
230
                ut_error;
267
231
        }
268
 
 
269
 
        ut_error;
270
232
}
271
233
 
272
234
/************************************************************************
281
243
search tuple should be performed in the B-tree. InnoDB does an insert
282
244
immediately after the cursor. Thus, the cursor may end up on a user record,
283
245
or on a page infimum record. */
284
 
UNIV_INTERN
 
246
 
285
247
void
286
248
btr_cur_search_to_nth_level(
287
249
/*========================*/
288
250
        dict_index_t*   index,  /* in: index */
289
251
        ulint           level,  /* in: the tree level of search */
290
 
        const dtuple_t* tuple,  /* in: data tuple; NOTE: n_fields_cmp in
 
252
        dtuple_t*       tuple,  /* in: data tuple; NOTE: n_fields_cmp in
291
253
                                tuple must be set so that it cannot get
292
254
                                compared to the node ptr page number field! */
293
255
        ulint           mode,   /* in: PAGE_CUR_L, ...;
295
257
                                PAGE_CUR_LE to search the position! */
296
258
        ulint           latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with
297
259
                                BTR_INSERT and BTR_ESTIMATE;
298
 
                                cursor->left_block is used to store a pointer
 
260
                                cursor->left_page is used to store a pointer
299
261
                                to the left neighbor page, in the cases
300
262
                                BTR_SEARCH_PREV and BTR_MODIFY_PREV;
301
263
                                NOTE that if has_search_latch
312
274
{
313
275
        page_cur_t*     page_cursor;
314
276
        page_t*         page;
315
 
        buf_block_t*    guess;
 
277
        page_t*         guess;
316
278
        rec_t*          node_ptr;
317
279
        ulint           page_no;
318
280
        ulint           space;
335
297
        mem_heap_t*     heap            = NULL;
336
298
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
337
299
        ulint*          offsets         = offsets_;
338
 
        rec_offs_init(offsets_);
 
300
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
339
301
        /* Currently, PAGE_CUR_LE is the only search mode used for searches
340
302
        ending to upper levels */
341
303
 
342
304
        ut_ad(level == 0 || mode == PAGE_CUR_LE);
343
305
        ut_ad(dict_index_check_search_tuple(index, tuple));
344
 
        ut_ad(!dict_index_is_ibuf(index) || ibuf_inside());
 
306
        ut_ad(!(index->type & DICT_IBUF) || ibuf_inside());
345
307
        ut_ad(dtuple_check_typed(tuple));
346
308
 
347
309
#ifdef UNIV_DEBUG
377
339
#ifdef PAGE_CUR_LE_OR_EXTENDS
378
340
            && mode != PAGE_CUR_LE_OR_EXTENDS
379
341
#endif /* PAGE_CUR_LE_OR_EXTENDS */
380
 
            && !UNIV_UNLIKELY(btr_search_disabled)
 
342
            && srv_use_adaptive_hash_indexes
381
343
            && btr_search_guess_on_hash(index, info, tuple, mode,
382
344
                                        latch_mode, cursor,
383
345
                                        has_search_latch, mtr)) {
461
423
        /* Loop and search until we arrive at the desired level */
462
424
 
463
425
        for (;;) {
464
 
                ulint           zip_size;
465
 
                buf_block_t*    block;
 
426
                if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
 
427
 
 
428
                        rw_latch = latch_mode;
 
429
 
 
430
                        if (insert_planned
 
431
                            && ibuf_should_try(index, ignore_sec_unique)) {
 
432
 
 
433
                                /* Try insert to the insert buffer if the
 
434
                                page is not in the buffer pool */
 
435
 
 
436
                                buf_mode = BUF_GET_IF_IN_POOL;
 
437
                        }
 
438
                }
466
439
retry_page_get:
467
 
                zip_size = dict_table_zip_size(index->table);
468
 
 
469
 
                block = buf_page_get_gen(space, zip_size, page_no,
470
 
                                         rw_latch, guess, buf_mode,
471
 
                                         __FILE__, __LINE__,
472
 
                                         mtr);
473
 
                if (block == NULL) {
 
440
                page = buf_page_get_gen(space, page_no, rw_latch, guess,
 
441
                                        buf_mode,
 
442
                                        __FILE__, __LINE__,
 
443
                                        mtr);
 
444
                if (page == NULL) {
474
445
                        /* This must be a search to perform an insert;
475
446
                        try insert to the insert buffer */
476
447
 
479
450
                        ut_ad(cursor->thr);
480
451
 
481
452
                        if (ibuf_should_try(index, ignore_sec_unique)
482
 
                            && ibuf_insert(tuple, index, space, zip_size,
483
 
                                           page_no, cursor->thr)) {
 
453
                            && ibuf_insert(tuple, index, space, page_no,
 
454
                                           cursor->thr)) {
484
455
                                /* Insertion to the insert buffer succeeded */
485
456
                                cursor->flag = BTR_CUR_INSERT_TO_IBUF;
486
457
                                if (UNIV_LIKELY_NULL(heap)) {
497
468
                        goto retry_page_get;
498
469
                }
499
470
 
500
 
                page = buf_block_get_frame(block);
501
 
#ifdef UNIV_ZIP_DEBUG
502
 
                if (rw_latch != RW_NO_LATCH) {
503
 
                        const page_zip_des_t*   page_zip
504
 
                                = buf_block_get_page_zip(block);
505
 
                        ut_a(!page_zip || page_zip_validate(page_zip, page));
506
 
                }
507
 
#endif /* UNIV_ZIP_DEBUG */
508
 
 
509
 
                block->check_index_page_at_flush = TRUE;
 
471
                buf_block_align(page)->check_index_page_at_flush = TRUE;
510
472
 
511
473
#ifdef UNIV_SYNC_DEBUG
512
474
                if (rw_latch != RW_NO_LATCH) {
513
 
                        buf_block_dbg_add_level(block, SYNC_TREE_NODE);
 
475
                        buf_page_dbg_add_level(page, SYNC_TREE_NODE);
514
476
                }
515
477
#endif
516
478
                ut_ad(0 == ut_dulint_cmp(index->id,
517
479
                                         btr_page_get_index_id(page)));
518
480
 
519
 
                if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
 
481
                if (height == ULINT_UNDEFINED) {
520
482
                        /* We are in the root node */
521
483
 
522
484
                        height = btr_page_get_level(page, mtr);
523
485
                        root_height = height;
524
486
                        cursor->tree_height = root_height + 1;
525
487
#ifdef BTR_CUR_ADAPT
526
 
                        if (block != guess) {
527
 
                                info->root_guess = block;
 
488
                        if (page != guess) {
 
489
                                info->root_guess = page;
528
490
                        }
529
491
#endif
530
492
                }
532
494
                if (height == 0) {
533
495
                        if (rw_latch == RW_NO_LATCH) {
534
496
 
535
 
                                btr_cur_latch_leaves(page, space, zip_size,
 
497
                                btr_cur_latch_leaves(page, space,
536
498
                                                     page_no, latch_mode,
537
499
                                                     cursor, mtr);
538
500
                        }
550
512
                        page_mode = mode;
551
513
                }
552
514
 
553
 
                page_cur_search_with_match(block, index, tuple, page_mode,
 
515
                page_cur_search_with_match(page, index, tuple, page_mode,
554
516
                                           &up_match, &up_bytes,
555
517
                                           &low_match, &low_bytes,
556
518
                                           page_cursor);
557
 
 
558
519
                if (estimate) {
559
520
                        btr_cur_add_path_info(cursor, height, root_height);
560
521
                }
568
529
 
569
530
                        if (level > 0) {
570
531
                                /* x-latch the page */
571
 
                                page = btr_page_get(space, zip_size,
 
532
                                page = btr_page_get(space,
572
533
                                                    page_no, RW_X_LATCH, mtr);
573
534
                                ut_a((ibool)!!page_is_comp(page)
574
535
                                     == dict_table_is_comp(index->table));
580
541
                ut_ad(height > 0);
581
542
 
582
543
                height--;
583
 
 
584
 
                if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) {
585
 
 
586
 
                        rw_latch = latch_mode;
587
 
 
588
 
                        if (insert_planned
589
 
                            && ibuf_should_try(index, ignore_sec_unique)) {
590
 
 
591
 
                                /* Try insert to the insert buffer if the
592
 
                                page is not in the buffer pool */
593
 
 
594
 
                                buf_mode = BUF_GET_IF_IN_POOL;
595
 
                        }
596
 
                }
597
 
 
598
544
                guess = NULL;
599
545
 
600
546
                node_ptr = page_cur_get_rec(page_cursor);
615
561
                cursor->up_bytes = up_bytes;
616
562
 
617
563
#ifdef BTR_CUR_ADAPT
618
 
                if (!UNIV_UNLIKELY(btr_search_disabled)) {
 
564
                if (srv_use_adaptive_hash_indexes) {
619
565
 
620
566
                        btr_search_info_update(index, cursor);
621
567
                }
637
583
 
638
584
/*********************************************************************
639
585
Opens a cursor at either end of an index. */
640
 
UNIV_INTERN
 
586
 
641
587
void
642
588
btr_cur_open_at_index_side(
643
589
/*=======================*/
649
595
        mtr_t*          mtr)            /* in: mtr */
650
596
{
651
597
        page_cur_t*     page_cursor;
 
598
        page_t*         page;
652
599
        ulint           page_no;
653
600
        ulint           space;
654
 
        ulint           zip_size;
655
601
        ulint           height;
656
602
        ulint           root_height = 0; /* remove warning */
657
603
        rec_t*          node_ptr;
660
606
        mem_heap_t*     heap            = NULL;
661
607
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
662
608
        ulint*          offsets         = offsets_;
663
 
        rec_offs_init(offsets_);
 
609
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
664
610
 
665
611
        estimate = latch_mode & BTR_ESTIMATE;
666
612
        latch_mode = latch_mode & ~BTR_ESTIMATE;
680
626
        cursor->index = index;
681
627
 
682
628
        space = dict_index_get_space(index);
683
 
        zip_size = dict_table_zip_size(index->table);
684
629
        page_no = dict_index_get_page(index);
685
630
 
686
631
        height = ULINT_UNDEFINED;
687
632
 
688
633
        for (;;) {
689
 
                buf_block_t*    block;
690
 
                page_t*         page;
691
 
                block = buf_page_get_gen(space, zip_size, page_no,
692
 
                                         RW_NO_LATCH, NULL, BUF_GET,
693
 
                                         __FILE__, __LINE__,
694
 
                                         mtr);
695
 
                page = buf_block_get_frame(block);
 
634
                page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
 
635
                                        BUF_GET,
 
636
                                        __FILE__, __LINE__,
 
637
                                        mtr);
696
638
                ut_ad(0 == ut_dulint_cmp(index->id,
697
639
                                         btr_page_get_index_id(page)));
698
640
 
699
 
                block->check_index_page_at_flush = TRUE;
 
641
                buf_block_align(page)->check_index_page_at_flush = TRUE;
700
642
 
701
643
                if (height == ULINT_UNDEFINED) {
702
644
                        /* We are in the root node */
706
648
                }
707
649
 
708
650
                if (height == 0) {
709
 
                        btr_cur_latch_leaves(page, space, zip_size, page_no,
 
651
                        btr_cur_latch_leaves(page, space, page_no,
710
652
                                             latch_mode, cursor, mtr);
711
653
 
712
654
                        /* In versions <= 3.23.52 we had forgotten to
727
669
                }
728
670
 
729
671
                if (from_left) {
730
 
                        page_cur_set_before_first(block, page_cursor);
 
672
                        page_cur_set_before_first(page, page_cursor);
731
673
                } else {
732
 
                        page_cur_set_after_last(block, page_cursor);
 
674
                        page_cur_set_after_last(page, page_cursor);
733
675
                }
734
676
 
735
677
                if (height == 0) {
769
711
 
770
712
/**************************************************************************
771
713
Positions a cursor at a randomly chosen position within a B-tree. */
772
 
UNIV_INTERN
 
714
 
773
715
void
774
716
btr_cur_open_at_rnd_pos(
775
717
/*====================*/
779
721
        mtr_t*          mtr)            /* in: mtr */
780
722
{
781
723
        page_cur_t*     page_cursor;
 
724
        page_t*         page;
782
725
        ulint           page_no;
783
726
        ulint           space;
784
 
        ulint           zip_size;
785
727
        ulint           height;
786
728
        rec_t*          node_ptr;
787
729
        mem_heap_t*     heap            = NULL;
788
730
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
789
731
        ulint*          offsets         = offsets_;
790
 
        rec_offs_init(offsets_);
 
732
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
791
733
 
792
734
        if (latch_mode == BTR_MODIFY_TREE) {
793
735
                mtr_x_lock(dict_index_get_lock(index), mtr);
799
741
        cursor->index = index;
800
742
 
801
743
        space = dict_index_get_space(index);
802
 
        zip_size = dict_table_zip_size(index->table);
803
744
        page_no = dict_index_get_page(index);
804
745
 
805
746
        height = ULINT_UNDEFINED;
806
747
 
807
748
        for (;;) {
808
 
                buf_block_t*    block;
809
 
                page_t*         page;
810
 
 
811
 
                block = buf_page_get_gen(space, zip_size, page_no,
812
 
                                         RW_NO_LATCH, NULL, BUF_GET,
813
 
                                         __FILE__, __LINE__,
814
 
                                         mtr);
815
 
                page = buf_block_get_frame(block);
 
749
                page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL,
 
750
                                        BUF_GET,
 
751
                                        __FILE__, __LINE__,
 
752
                                        mtr);
816
753
                ut_ad(0 == ut_dulint_cmp(index->id,
817
754
                                         btr_page_get_index_id(page)));
818
755
 
823
760
                }
824
761
 
825
762
                if (height == 0) {
826
 
                        btr_cur_latch_leaves(page, space, zip_size, page_no,
 
763
                        btr_cur_latch_leaves(page, space, page_no,
827
764
                                             latch_mode, cursor, mtr);
828
765
                }
829
766
 
830
 
                page_cur_open_on_rnd_user_rec(block, page_cursor);
 
767
                page_cur_open_on_rnd_user_rec(page, page_cursor);
831
768
 
832
769
                if (height == 0) {
833
770
 
865
802
                                else NULL */
866
803
        btr_cur_t*      cursor, /* in: cursor on page after which to insert;
867
804
                                cursor stays valid */
868
 
        const dtuple_t* tuple,  /* in: tuple to insert; the size info need not
 
805
        dtuple_t*       tuple,  /* in: tuple to insert; the size info need not
869
806
                                have been stored to tuple */
870
 
        ulint           n_ext,  /* in: number of externally stored columns */
 
807
        ibool*          reorg,  /* out: TRUE if reorganization occurred */
871
808
        mtr_t*          mtr)    /* in: mtr */
872
809
{
873
810
        page_cur_t*     page_cursor;
874
 
        buf_block_t*    block;
 
811
        page_t*         page;
875
812
        rec_t*          rec;
876
813
 
877
814
        ut_ad(dtuple_check_typed(tuple));
878
815
 
879
 
        block = btr_cur_get_block(cursor);
880
 
 
881
 
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
816
        *reorg = FALSE;
 
817
 
 
818
        page = btr_cur_get_page(cursor);
 
819
 
 
820
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
 
821
                                MTR_MEMO_PAGE_X_FIX));
882
822
        page_cursor = btr_cur_get_page_cur(cursor);
883
823
 
884
824
        /* Now, try the insert */
885
 
        rec = page_cur_tuple_insert(page_cursor, tuple,
886
 
                                    cursor->index, n_ext, mtr);
 
825
        rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr);
887
826
 
888
 
        if (UNIV_UNLIKELY(!rec)) {
 
827
        if (!rec) {
889
828
                /* If record did not fit, reorganize */
890
829
 
891
 
                if (btr_page_reorganize(block, cursor->index, mtr)) {
892
 
 
893
 
                        page_cur_search(block, cursor->index, tuple,
894
 
                                        PAGE_CUR_LE, page_cursor);
895
 
 
896
 
                        rec = page_cur_tuple_insert(page_cursor, tuple,
897
 
                                                    cursor->index, n_ext, mtr);
898
 
                }
 
830
                btr_page_reorganize(page, cursor->index, mtr);
 
831
 
 
832
                *reorg = TRUE;
 
833
 
 
834
                page_cur_search(page, cursor->index, tuple,
 
835
                                PAGE_CUR_LE, page_cursor);
 
836
 
 
837
                rec = page_cur_tuple_insert(page_cursor, tuple,
 
838
                                            cursor->index, mtr);
899
839
        }
900
840
 
901
841
        return(rec);
913
853
                                not zero, the parameters index and thr
914
854
                                should be specified */
915
855
        btr_cur_t*      cursor, /* in: cursor on page after which to insert */
916
 
        const dtuple_t* entry,  /* in: entry to insert */
 
856
        dtuple_t*       entry,  /* in: entry to insert */
917
857
        que_thr_t*      thr,    /* in: query thread or NULL */
918
858
        ibool*          inherit)/* out: TRUE if the inserted new record maybe
919
859
                                should inherit LOCK_GAP type locks from the
930
870
        rec = btr_cur_get_rec(cursor);
931
871
        index = cursor->index;
932
872
 
933
 
        err = lock_rec_insert_check_and_lock(flags, rec,
934
 
                                             btr_cur_get_block(cursor),
935
 
                                             index, thr, inherit);
 
873
        err = lock_rec_insert_check_and_lock(flags, rec, index, thr, inherit);
936
874
 
937
875
        if (err != DB_SUCCESS) {
938
876
 
939
877
                return(err);
940
878
        }
941
879
 
942
 
        if (dict_index_is_clust(index) && !dict_index_is_ibuf(index)) {
 
880
        if ((index->type & DICT_CLUSTERED) && !(index->type & DICT_IBUF)) {
943
881
 
944
882
                err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
945
883
                                                    thr, index, entry,
973
911
        const dict_index_t*     index,  /* in: index */
974
912
        const char*             op)     /* in: operation */
975
913
{
976
 
        fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ",
977
 
                TRX_ID_PREP_PRINTF(trx->id));
 
914
        fprintf(stderr, "Trx with id %lu %lu going to ",
 
915
                ut_dulint_get_high(trx->id),
 
916
                ut_dulint_get_low(trx->id));
978
917
        fputs(op, stderr);
979
918
        dict_index_name_print(stderr, trx, index);
980
919
        putc('\n', stderr);
987
926
not succeed if there is too little space on the page. If there is just
988
927
one record on the page, the insert will always succeed; this is to
989
928
prevent trying to split a page with just one record. */
990
 
UNIV_INTERN
 
929
 
991
930
ulint
992
931
btr_cur_optimistic_insert(
993
932
/*======================*/
998
937
                                specified */
999
938
        btr_cur_t*      cursor, /* in: cursor on page after which to insert;
1000
939
                                cursor stays valid */
1001
 
        dtuple_t*       entry,  /* in/out: entry to insert */
 
940
        dtuple_t*       entry,  /* in: entry to insert */
1002
941
        rec_t**         rec,    /* out: pointer to inserted record if
1003
942
                                succeed */
1004
943
        big_rec_t**     big_rec,/* out: big rec vector whose fields have to
1005
944
                                be stored externally by the caller, or
1006
945
                                NULL */
1007
 
        ulint           n_ext,  /* in: number of externally stored columns */
1008
946
        que_thr_t*      thr,    /* in: query thread or NULL */
1009
 
        mtr_t*          mtr)    /* in: mtr; if this function returns
1010
 
                                DB_SUCCESS on a leaf page of a secondary
1011
 
                                index in a compressed tablespace, the
1012
 
                                mtr must be committed before latching
1013
 
                                any further pages */
 
947
        mtr_t*          mtr)    /* in: mtr */
1014
948
{
1015
949
        big_rec_t*      big_rec_vec     = NULL;
1016
950
        dict_index_t*   index;
1017
951
        page_cur_t*     page_cursor;
1018
 
        buf_block_t*    block;
1019
952
        page_t*         page;
1020
953
        ulint           max_size;
1021
954
        rec_t*          dummy_rec;
1022
 
        ibool           leaf;
 
955
        ulint           level;
1023
956
        ibool           reorg;
1024
957
        ibool           inherit;
1025
 
        ulint           zip_size;
1026
958
        ulint           rec_size;
1027
 
        mem_heap_t*     heap            = NULL;
 
959
        ulint           type;
1028
960
        ulint           err;
1029
961
 
1030
962
        *big_rec = NULL;
1031
963
 
1032
 
        block = btr_cur_get_block(cursor);
1033
 
        page = buf_block_get_frame(block);
 
964
        page = btr_cur_get_page(cursor);
1034
965
        index = cursor->index;
1035
 
        zip_size = buf_block_get_zip_size(block);
1036
 
#ifdef UNIV_DEBUG_VALGRIND
1037
 
        if (zip_size) {
1038
 
                UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1039
 
                UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
1040
 
        }
1041
 
#endif /* UNIV_DEBUG_VALGRIND */
1042
966
 
1043
967
        if (!dtuple_check_typed_no_assert(entry)) {
1044
968
                fputs("InnoDB: Error in a tuple to insert into ", stderr);
1051
975
        }
1052
976
#endif /* UNIV_DEBUG */
1053
977
 
1054
 
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
978
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
 
979
                                MTR_MEMO_PAGE_X_FIX));
1055
980
        max_size = page_get_max_insert_size_after_reorganize(page, 1);
1056
 
        leaf = page_is_leaf(page);
 
981
        level = btr_page_get_level(page, mtr);
1057
982
 
 
983
calculate_sizes_again:
1058
984
        /* Calculate the record size when entry is converted to a record */
1059
 
        rec_size = rec_get_converted_size(index, entry, n_ext);
 
985
        rec_size = rec_get_converted_size(index, entry);
1060
986
 
1061
 
        if (page_zip_rec_needs_ext(rec_size, page_is_comp(page), zip_size)) {
 
987
        if (rec_size
 
988
            >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
 
989
                      REC_MAX_DATA_SIZE)) {
1062
990
 
1063
991
                /* The record is so big that we have to store some fields
1064
992
                externally on separate database pages */
1065
 
                big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
1066
 
 
1067
 
                if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
 
993
 
 
994
                big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
 
995
 
 
996
                if (big_rec_vec == NULL) {
1068
997
 
1069
998
                        return(DB_TOO_BIG_RECORD);
1070
999
                }
1071
1000
 
1072
 
                rec_size = rec_get_converted_size(index, entry, n_ext);
 
1001
                goto calculate_sizes_again;
1073
1002
        }
1074
1003
 
1075
1004
        /* If there have been many consecutive inserts, and we are on the leaf
1076
1005
        level, check if we have to split the page to reserve enough free space
1077
1006
        for future updates of records. */
1078
1007
 
1079
 
        if (dict_index_is_clust(index)
 
1008
        type = index->type;
 
1009
 
 
1010
        if ((type & DICT_CLUSTERED)
 
1011
            && (dict_index_get_space_reserve() + rec_size > max_size)
1080
1012
            && (page_get_n_recs(page) >= 2)
1081
 
            && UNIV_LIKELY(leaf)
1082
 
            && (dict_index_get_space_reserve() + rec_size > max_size)
 
1013
            && (0 == level)
1083
1014
            && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
1084
1015
                || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
1085
 
fail:
1086
 
                err = DB_FAIL;
1087
 
fail_err:
1088
1016
 
1089
1017
                if (big_rec_vec) {
1090
1018
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1091
1019
                }
1092
1020
 
1093
 
                if (UNIV_LIKELY_NULL(heap)) {
1094
 
                        mem_heap_free(heap);
1095
 
                }
1096
 
 
1097
 
                return(err);
 
1021
                return(DB_FAIL);
1098
1022
        }
1099
1023
 
1100
 
        if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
1101
 
             || max_size < rec_size)
1102
 
            && UNIV_LIKELY(page_get_n_recs(page) > 1)
1103
 
            && page_get_max_insert_size(page, 1) < rec_size) {
 
1024
        if (!(((max_size >= rec_size)
 
1025
               && (max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT))
 
1026
              || (page_get_max_insert_size(page, 1) >= rec_size)
 
1027
              || (page_get_n_recs(page) <= 1))) {
1104
1028
 
1105
 
                goto fail;
 
1029
                if (big_rec_vec) {
 
1030
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
 
1031
                }
 
1032
                return(DB_FAIL);
1106
1033
        }
1107
1034
 
1108
1035
        /* Check locks and write to the undo log, if specified */
1109
1036
        err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &inherit);
1110
1037
 
1111
 
        if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
 
1038
        if (err != DB_SUCCESS) {
1112
1039
 
1113
 
                goto fail_err;
 
1040
                if (big_rec_vec) {
 
1041
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
 
1042
                }
 
1043
                return(err);
1114
1044
        }
1115
1045
 
1116
1046
        page_cursor = btr_cur_get_page_cur(cursor);
1117
1047
 
 
1048
        reorg = FALSE;
 
1049
 
1118
1050
        /* Now, try the insert */
1119
1051
 
1120
 
        {
1121
 
                const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
1122
 
                *rec = page_cur_tuple_insert(page_cursor, entry, index,
1123
 
                                             n_ext, mtr);
1124
 
                reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
1125
 
 
1126
 
                if (UNIV_UNLIKELY(reorg)) {
1127
 
                        ut_a(zip_size);
1128
 
                        ut_a(*rec);
1129
 
                }
1130
 
        }
1131
 
 
1132
 
        if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
 
1052
        *rec = page_cur_insert_rec_low(page_cursor, entry, index,
 
1053
                                       NULL, NULL, mtr);
 
1054
        if (UNIV_UNLIKELY(!(*rec))) {
1133
1055
                /* If the record did not fit, reorganize */
1134
 
                if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
1135
 
                        ut_a(zip_size);
1136
 
 
1137
 
                        goto fail;
1138
 
                }
1139
 
 
1140
 
                ut_ad(zip_size
1141
 
                      || page_get_max_insert_size(page, 1) == max_size);
 
1056
                btr_page_reorganize(page, index, mtr);
 
1057
 
 
1058
                ut_ad(page_get_max_insert_size(page, 1) == max_size);
1142
1059
 
1143
1060
                reorg = TRUE;
1144
1061
 
1145
 
                page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
 
1062
                page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor);
1146
1063
 
1147
 
                *rec = page_cur_tuple_insert(page_cursor, entry, index,
1148
 
                                             n_ext, mtr);
 
1064
                *rec = page_cur_tuple_insert(page_cursor, entry, index, mtr);
1149
1065
 
1150
1066
                if (UNIV_UNLIKELY(!*rec)) {
1151
 
                        if (UNIV_LIKELY(zip_size != 0)) {
1152
 
 
1153
 
                                goto fail;
1154
 
                        }
1155
 
 
1156
1067
                        fputs("InnoDB: Error: cannot insert tuple ", stderr);
1157
1068
                        dtuple_print(stderr, entry);
1158
1069
                        fputs(" into ", stderr);
1163
1074
                }
1164
1075
        }
1165
1076
 
1166
 
        if (UNIV_LIKELY_NULL(heap)) {
1167
 
                mem_heap_free(heap);
1168
 
        }
1169
 
 
1170
1077
#ifdef BTR_CUR_HASH_ADAPT
1171
 
        if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
 
1078
        if (!reorg && (0 == level) && (cursor->flag == BTR_CUR_HASH)) {
1172
1079
                btr_search_update_hash_node_on_insert(cursor);
1173
1080
        } else {
1174
1081
                btr_search_update_hash_on_insert(cursor);
1177
1084
 
1178
1085
        if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
1179
1086
 
1180
 
                lock_update_insert(block, *rec);
 
1087
                lock_update_insert(*rec);
1181
1088
        }
1182
1089
 
1183
1090
#if 0
1184
1091
        fprintf(stderr, "Insert into page %lu, max ins size %lu,"
1185
1092
                " rec %lu ind type %lu\n",
1186
 
                buf_block_get_page_no(block), max_size,
1187
 
                rec_size + PAGE_DIR_SLOT_SIZE, index->type);
 
1093
                buf_frame_get_page_no(page), max_size,
 
1094
                rec_size + PAGE_DIR_SLOT_SIZE, type);
1188
1095
#endif
1189
 
        if (!dict_index_is_clust(index) && leaf) {
1190
 
                /* Update the free bits of the B-tree page in the
1191
 
                insert buffer bitmap. */
1192
 
 
1193
 
                /* The free bits in the insert buffer bitmap must
1194
 
                never exceed the free space on a page.  It is safe to
1195
 
                decrement or reset the bits in the bitmap in a
1196
 
                mini-transaction that is committed before the
1197
 
                mini-transaction that affects the free space. */
1198
 
 
1199
 
                /* It is unsafe to increment the bits in a separately
1200
 
                committed mini-transaction, because in crash recovery,
1201
 
                the free bits could momentarily be set too high. */
1202
 
 
1203
 
                if (zip_size) {
1204
 
                        /* Update the bits in the same mini-transaction. */
1205
 
                        ibuf_update_free_bits_zip(block, mtr);
1206
 
                } else {
1207
 
                        /* Decrement the bits in a separate
1208
 
                        mini-transaction. */
1209
 
                        ibuf_update_free_bits_if_full(
1210
 
                                block, max_size,
1211
 
                                rec_size + PAGE_DIR_SLOT_SIZE);
1212
 
                }
 
1096
        if (!(type & DICT_CLUSTERED)) {
 
1097
                /* We have added a record to page: update its free bits */
 
1098
                ibuf_update_free_bits_if_full(cursor->index, page, max_size,
 
1099
                                              rec_size + PAGE_DIR_SLOT_SIZE);
1213
1100
        }
1214
1101
 
1215
1102
        *big_rec = big_rec_vec;
1222
1109
holds an x-latch on the tree and on the cursor page. If the insert is
1223
1110
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
1224
1111
to brothers of page, if those brothers exist. */
1225
 
UNIV_INTERN
 
1112
 
1226
1113
ulint
1227
1114
btr_cur_pessimistic_insert(
1228
1115
/*=======================*/
1235
1122
                                insertion will certainly succeed */
1236
1123
        btr_cur_t*      cursor, /* in: cursor after which to insert;
1237
1124
                                cursor stays valid */
1238
 
        dtuple_t*       entry,  /* in/out: entry to insert */
 
1125
        dtuple_t*       entry,  /* in: entry to insert */
1239
1126
        rec_t**         rec,    /* out: pointer to inserted record if
1240
1127
                                succeed */
1241
1128
        big_rec_t**     big_rec,/* out: big rec vector whose fields have to
1242
1129
                                be stored externally by the caller, or
1243
1130
                                NULL */
1244
 
        ulint           n_ext,  /* in: number of externally stored columns */
1245
1131
        que_thr_t*      thr,    /* in: query thread or NULL */
1246
1132
        mtr_t*          mtr)    /* in: mtr */
1247
1133
{
1248
1134
        dict_index_t*   index           = cursor->index;
1249
 
        ulint           zip_size        = dict_table_zip_size(index->table);
1250
1135
        big_rec_t*      big_rec_vec     = NULL;
1251
 
        mem_heap_t*     heap            = NULL;
 
1136
        page_t*         page;
1252
1137
        ulint           err;
1253
1138
        ibool           dummy_inh;
1254
1139
        ibool           success;
1259
1144
 
1260
1145
        *big_rec = NULL;
1261
1146
 
 
1147
        page = btr_cur_get_page(cursor);
 
1148
 
1262
1149
        ut_ad(mtr_memo_contains(mtr,
1263
1150
                                dict_index_get_lock(btr_cur_get_index(cursor)),
1264
1151
                                MTR_MEMO_X_LOCK));
1265
 
        ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 
1152
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
1266
1153
                                MTR_MEMO_PAGE_X_FIX));
1267
1154
 
1268
1155
        /* Try first an optimistic insert; reset the cursor flag: we do not
1270
1157
 
1271
1158
        cursor->flag = BTR_CUR_BINARY;
1272
1159
 
1273
 
        err = btr_cur_optimistic_insert(flags, cursor, entry, rec,
1274
 
                                        big_rec, n_ext, thr, mtr);
 
1160
        err = btr_cur_optimistic_insert(flags, cursor, entry, rec, big_rec,
 
1161
                                        thr, mtr);
1275
1162
        if (err != DB_FAIL) {
1276
1163
 
1277
1164
                return(err);
1297
1184
                success = fsp_reserve_free_extents(&n_reserved, index->space,
1298
1185
                                                   n_extents, FSP_NORMAL, mtr);
1299
1186
                if (!success) {
1300
 
                        return(DB_OUT_OF_FILE_SPACE);
 
1187
                        err = DB_OUT_OF_FILE_SPACE;
 
1188
 
 
1189
                        return(err);
1301
1190
                }
1302
1191
        }
1303
1192
 
1304
 
        if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
1305
 
                                   dict_table_is_comp(index->table),
1306
 
                                   zip_size)) {
 
1193
        if (rec_get_converted_size(index, entry)
 
1194
            >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2,
 
1195
                      REC_MAX_DATA_SIZE)) {
 
1196
 
1307
1197
                /* The record is so big that we have to store some fields
1308
1198
                externally on separate database pages */
1309
1199
 
1310
 
                if (UNIV_LIKELY_NULL(big_rec_vec)) {
1311
 
                        /* This should never happen, but we handle
1312
 
                        the situation in a robust manner. */
1313
 
                        ut_ad(0);
1314
 
                        dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1315
 
                }
1316
 
 
1317
 
                big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
 
1200
                big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0);
1318
1201
 
1319
1202
                if (big_rec_vec == NULL) {
1320
1203
 
1326
1209
                }
1327
1210
        }
1328
1211
 
1329
 
        if (UNIV_UNLIKELY(zip_size)) {
1330
 
                /* Estimate the free space of an empty compressed page. */
1331
 
                ulint   free_space_zip = page_zip_empty_size(
1332
 
                        cursor->index->n_fields, zip_size);
1333
 
 
1334
 
                if (UNIV_UNLIKELY(rec_get_converted_size(index, entry, n_ext)
1335
 
                                  > free_space_zip)) {
1336
 
                        /* Try to insert the record by itself on a new page.
1337
 
                        If it fails, no amount of splitting will help. */
1338
 
                        buf_block_t*    temp_block
1339
 
                                = buf_block_alloc(zip_size);
1340
 
                        page_t*         temp_page
1341
 
                                = page_create_zip(temp_block, index, 0, NULL);
1342
 
                        page_cur_t      temp_cursor;
1343
 
                        rec_t*          temp_rec;
1344
 
 
1345
 
                        page_cur_position(temp_page + PAGE_NEW_INFIMUM,
1346
 
                                          temp_block, &temp_cursor);
1347
 
 
1348
 
                        temp_rec = page_cur_tuple_insert(&temp_cursor,
1349
 
                                                         entry, index,
1350
 
                                                         n_ext, NULL);
1351
 
                        buf_block_free(temp_block);
1352
 
 
1353
 
                        if (UNIV_UNLIKELY(!temp_rec)) {
1354
 
                                if (big_rec_vec) {
1355
 
                                        dtuple_convert_back_big_rec(
1356
 
                                                index, entry, big_rec_vec);
1357
 
                                }
1358
 
 
1359
 
                                if (heap) {
1360
 
                                        mem_heap_free(heap);
1361
 
                                }
1362
 
 
1363
 
                                return(DB_TOO_BIG_RECORD);
1364
 
                        }
1365
 
                }
1366
 
        }
1367
 
 
1368
 
        if (dict_index_get_page(index)
1369
 
            == buf_block_get_page_no(btr_cur_get_block(cursor))) {
 
1212
        if (dict_index_get_page(index) == buf_frame_get_page_no(page)) {
1370
1213
 
1371
1214
                /* The page is the root page */
1372
 
                *rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
 
1215
                *rec = btr_root_raise_and_insert(cursor, entry, mtr);
1373
1216
        } else {
1374
 
                *rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
1375
 
        }
1376
 
 
1377
 
        if (UNIV_LIKELY_NULL(heap)) {
1378
 
                mem_heap_free(heap);
1379
 
        }
1380
 
 
1381
 
        ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
 
1217
                *rec = btr_page_split_and_insert(cursor, entry, mtr);
 
1218
        }
 
1219
 
 
1220
        btr_cur_position(index, page_rec_get_prev(*rec), cursor);
1382
1221
 
1383
1222
#ifdef BTR_CUR_ADAPT
1384
1223
        btr_search_update_hash_on_insert(cursor);
1385
1224
#endif
1386
1225
        if (!(flags & BTR_NO_LOCKING_FLAG)) {
1387
1226
 
1388
 
                lock_update_insert(btr_cur_get_block(cursor), *rec);
 
1227
                lock_update_insert(*rec);
1389
1228
        }
1390
1229
 
 
1230
        err = DB_SUCCESS;
 
1231
 
1391
1232
        if (n_extents > 0) {
1392
1233
                fil_space_release_free_extents(index->space, n_reserved);
1393
1234
        }
1394
1235
 
1395
1236
        *big_rec = big_rec_vec;
1396
1237
 
1397
 
        return(DB_SUCCESS);
 
1238
        return(err);
1398
1239
}
1399
1240
 
1400
1241
/*==================== B-TREE UPDATE =========================*/
1409
1250
                                number */
1410
1251
        ulint           flags,  /* in: undo logging and locking flags */
1411
1252
        btr_cur_t*      cursor, /* in: cursor on record to update */
1412
 
        const upd_t*    update, /* in: update vector */
 
1253
        upd_t*          update, /* in: update vector */
1413
1254
        ulint           cmpl_info,/* in: compiler info on secondary index
1414
1255
                                updates */
1415
1256
        que_thr_t*      thr,    /* in: query thread */
1424
1265
        rec = btr_cur_get_rec(cursor);
1425
1266
        index = cursor->index;
1426
1267
 
1427
 
        if (!dict_index_is_clust(index)) {
 
1268
        if (!(index->type & DICT_CLUSTERED)) {
1428
1269
                /* We do undo logging only when we update a clustered index
1429
1270
                record */
1430
 
                return(lock_sec_rec_modify_check_and_lock(
1431
 
                               flags, btr_cur_get_block(cursor), rec,
1432
 
                               index, thr));
 
1271
                return(lock_sec_rec_modify_check_and_lock(flags, rec, index,
 
1272
                                                          thr));
1433
1273
        }
1434
1274
 
1435
1275
        /* Check if we have to wait for a lock: enqueue an explicit lock
1440
1280
        if (!(flags & BTR_NO_LOCKING_FLAG)) {
1441
1281
                mem_heap_t*     heap            = NULL;
1442
1282
                ulint           offsets_[REC_OFFS_NORMAL_SIZE];
1443
 
                rec_offs_init(offsets_);
 
1283
                *offsets_ = (sizeof offsets_) / sizeof *offsets_;
1444
1284
 
1445
1285
                err = lock_clust_rec_modify_check_and_lock(
1446
 
                        flags, btr_cur_get_block(cursor), rec, index,
 
1286
                        flags, rec, index,
1447
1287
                        rec_get_offsets(rec, index, offsets_,
1448
1288
                                        ULINT_UNDEFINED, &heap), thr);
1449
1289
                if (UNIV_LIKELY_NULL(heap)) {
1472
1312
        ulint           flags,          /* in: flags */
1473
1313
        rec_t*          rec,            /* in: record */
1474
1314
        dict_index_t*   index,          /* in: index where cursor positioned */
1475
 
        const upd_t*    update,         /* in: update vector */
 
1315
        upd_t*          update,         /* in: update vector */
1476
1316
        trx_t*          trx,            /* in: transaction */
1477
1317
        dulint          roll_ptr,       /* in: roll ptr */
1478
1318
        mtr_t*          mtr)            /* in: mtr */
1513
1353
 
1514
1354
/***************************************************************
1515
1355
Parses a redo log record of updating a record in-place. */
1516
 
UNIV_INTERN
 
1356
 
1517
1357
byte*
1518
1358
btr_cur_parse_update_in_place(
1519
1359
/*==========================*/
1520
1360
                                /* out: end of log record or NULL */
1521
1361
        byte*           ptr,    /* in: buffer */
1522
1362
        byte*           end_ptr,/* in: buffer end */
1523
 
        page_t*         page,   /* in/out: page or NULL */
1524
 
        page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
 
1363
        page_t*         page,   /* in: page or NULL */
1525
1364
        dict_index_t*   index)  /* in: index corresponding to page */
1526
1365
{
1527
1366
        ulint   flags;
1577
1416
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
1578
1417
 
1579
1418
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1580
 
                row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
 
1419
                row_upd_rec_sys_fields_in_recovery(rec, offsets,
1581
1420
                                                   pos, trx_id, roll_ptr);
1582
1421
        }
1583
1422
 
1584
 
        row_upd_rec_in_place(rec, index, offsets, update, page_zip);
 
1423
        row_upd_rec_in_place(rec, offsets, update);
1585
1424
 
1586
1425
func_exit:
1587
1426
        mem_heap_free(heap);
1590
1429
}
1591
1430
 
1592
1431
/*****************************************************************
1593
 
See if there is enough place in the page modification log to log
1594
 
an update-in-place. */
1595
 
static
1596
 
ibool
1597
 
btr_cur_update_alloc_zip(
1598
 
/*=====================*/
1599
 
                                /* out: TRUE if enough place */
1600
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
1601
 
        buf_block_t*    block,  /* in/out: buffer page */
1602
 
        dict_index_t*   index,  /* in: the index corresponding to the block */
1603
 
        ulint           length, /* in: size needed */
1604
 
        mtr_t*          mtr)    /* in: mini-transaction */
1605
 
{
1606
 
        ut_a(page_zip == buf_block_get_page_zip(block));
1607
 
        ut_ad(page_zip);
1608
 
 
1609
 
        if (page_zip_available(page_zip, dict_index_is_clust(index),
1610
 
                               length, 0)) {
1611
 
                return(TRUE);
1612
 
        }
1613
 
 
1614
 
        if (!page_zip->m_nonempty) {
1615
 
                /* The page has been freshly compressed, so
1616
 
                recompressing it will not help. */
1617
 
                return(FALSE);
1618
 
        }
1619
 
 
1620
 
        if (!page_zip_compress(page_zip, buf_block_get_frame(block),
1621
 
                               index, mtr)) {
1622
 
                /* Unable to compress the page */
1623
 
                return(FALSE);
1624
 
        }
1625
 
 
1626
 
        /* After recompressing a page, we must make sure that the free
1627
 
        bits in the insert buffer bitmap will not exceed the free
1628
 
        space on the page.  Because this function will not attempt
1629
 
        recompression unless page_zip_available() fails above, it is
1630
 
        safe to reset the free bits if page_zip_available() fails
1631
 
        again, below.  The free bits can safely be reset in a separate
1632
 
        mini-transaction.  If page_zip_available() succeeds below, we
1633
 
        can be sure that the page_zip_compress() above did not reduce
1634
 
        the free space available on the page. */
1635
 
 
1636
 
        if (!page_zip_available(page_zip, dict_index_is_clust(index),
1637
 
                                length, 0)) {
1638
 
                /* Out of space: reset the free bits. */
1639
 
                if (!dict_index_is_clust(index)
1640
 
                    && page_is_leaf(buf_block_get_frame(block))) {
1641
 
                        ibuf_reset_free_bits(block);
1642
 
                }
1643
 
                return(FALSE);
1644
 
        }
1645
 
 
1646
 
        return(TRUE);
1647
 
}
1648
 
 
1649
 
/*****************************************************************
1650
1432
Updates a record when the update causes no size changes in its fields.
1651
1433
We assume here that the ordering fields of the record do not change. */
1652
 
UNIV_INTERN
 
1434
 
1653
1435
ulint
1654
1436
btr_cur_update_in_place(
1655
1437
/*====================*/
1658
1440
        btr_cur_t*      cursor, /* in: cursor on the record to update;
1659
1441
                                cursor stays valid and positioned on the
1660
1442
                                same record */
1661
 
        const upd_t*    update, /* in: update vector */
 
1443
        upd_t*          update, /* in: update vector */
1662
1444
        ulint           cmpl_info,/* in: compiler info on secondary index
1663
1445
                                updates */
1664
1446
        que_thr_t*      thr,    /* in: query thread */
1665
 
        mtr_t*          mtr)    /* in: mtr; must be committed before
1666
 
                                latching any further pages */
 
1447
        mtr_t*          mtr)    /* in: mtr */
1667
1448
{
1668
1449
        dict_index_t*   index;
1669
1450
        buf_block_t*    block;
1670
 
        page_zip_des_t* page_zip;
1671
1451
        ulint           err;
1672
1452
        rec_t*          rec;
1673
1453
        dulint          roll_ptr        = ut_dulint_zero;
1676
1456
        mem_heap_t*     heap            = NULL;
1677
1457
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
1678
1458
        ulint*          offsets         = offsets_;
1679
 
        rec_offs_init(offsets_);
 
1459
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
1680
1460
 
1681
1461
        rec = btr_cur_get_rec(cursor);
1682
1462
        index = cursor->index;
1690
1470
        }
1691
1471
#endif /* UNIV_DEBUG */
1692
1472
 
1693
 
        block = btr_cur_get_block(cursor);
1694
 
        page_zip = buf_block_get_page_zip(block);
1695
 
 
1696
 
        /* Check that enough space is available on the compressed page. */
1697
 
        if (UNIV_LIKELY_NULL(page_zip)
1698
 
            && !btr_cur_update_alloc_zip(page_zip, block, index,
1699
 
                                         rec_offs_size(offsets), mtr)) {
1700
 
                return(DB_ZIP_OVERFLOW);
1701
 
        }
1702
 
 
1703
1473
        /* Do lock checking and undo logging */
1704
1474
        err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
1705
1475
                                        thr, &roll_ptr);
1711
1481
                return(err);
1712
1482
        }
1713
1483
 
 
1484
        block = buf_block_align(rec);
 
1485
        ut_ad(!!page_is_comp(buf_block_get_frame(block))
 
1486
              == dict_table_is_comp(index->table));
 
1487
 
1714
1488
        if (block->is_hashed) {
1715
1489
                /* The function row_upd_changes_ord_field_binary works only
1716
1490
                if the update vector was built for a clustered index, we must
1717
1491
                NOT call it if index is secondary */
1718
1492
 
1719
 
                if (!dict_index_is_clust(index)
 
1493
                if (!(index->type & DICT_CLUSTERED)
1720
1494
                    || row_upd_changes_ord_field_binary(NULL, index, update)) {
1721
1495
 
1722
1496
                        /* Remove possible hash index pointer to this record */
1727
1501
        }
1728
1502
 
1729
1503
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
1730
 
                row_upd_rec_sys_fields(rec, NULL,
1731
 
                                       index, offsets, trx, roll_ptr);
 
1504
                row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
1732
1505
        }
1733
1506
 
1734
1507
        was_delete_marked = rec_get_deleted_flag(
1735
1508
                rec, page_is_comp(buf_block_get_frame(block)));
1736
1509
 
1737
 
        row_upd_rec_in_place(rec, index, offsets, update, page_zip);
 
1510
        row_upd_rec_in_place(rec, offsets, update);
1738
1511
 
1739
1512
        if (block->is_hashed) {
1740
1513
                rw_lock_x_unlock(&btr_search_latch);
1741
1514
        }
1742
1515
 
1743
 
        if (page_zip && !dict_index_is_clust(index)
1744
 
            && page_is_leaf(buf_block_get_frame(block))) {
1745
 
                /* Update the free bits in the insert buffer. */
1746
 
                ibuf_update_free_bits_zip(block, mtr);
1747
 
        }
1748
 
 
1749
 
        btr_cur_update_in_place_log(flags, rec, index, update,
1750
 
                                    trx, roll_ptr, mtr);
1751
 
 
 
1516
        btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr,
 
1517
                                    mtr);
1752
1518
        if (was_delete_marked
1753
1519
            && !rec_get_deleted_flag(rec, page_is_comp(
1754
1520
                                             buf_block_get_frame(block)))) {
1755
1521
                /* The new updated record owns its possible externally
1756
1522
                stored fields */
1757
1523
 
1758
 
                btr_cur_unmark_extern_fields(page_zip,
1759
 
                                             rec, index, offsets, mtr);
 
1524
                btr_cur_unmark_extern_fields(rec, mtr, offsets);
1760
1525
        }
1761
1526
 
1762
1527
        if (UNIV_LIKELY_NULL(heap)) {
1771
1536
little space on the page or if the update would result in too empty a page,
1772
1537
so that tree compression is recommended. We assume here that the ordering
1773
1538
fields of the record do not change. */
1774
 
UNIV_INTERN
 
1539
 
1775
1540
ulint
1776
1541
btr_cur_optimistic_update(
1777
1542
/*======================*/
1778
1543
                                /* out: DB_SUCCESS, or DB_OVERFLOW if the
1779
1544
                                updated record does not fit, DB_UNDERFLOW
1780
 
                                if the page would become too empty, or
1781
 
                                DB_ZIP_OVERFLOW if there is not enough
1782
 
                                space left on the compressed page */
 
1545
                                if the page would become too empty */
1783
1546
        ulint           flags,  /* in: undo logging and locking flags */
1784
1547
        btr_cur_t*      cursor, /* in: cursor on the record to update;
1785
1548
                                cursor stays valid and positioned on the
1786
1549
                                same record */
1787
 
        const upd_t*    update, /* in: update vector; this must also
 
1550
        upd_t*          update, /* in: update vector; this must also
1788
1551
                                contain trx id and roll ptr fields */
1789
1552
        ulint           cmpl_info,/* in: compiler info on secondary index
1790
1553
                                updates */
1791
1554
        que_thr_t*      thr,    /* in: query thread */
1792
 
        mtr_t*          mtr)    /* in: mtr; must be committed before
1793
 
                                latching any further pages */
 
1555
        mtr_t*          mtr)    /* in: mtr */
1794
1556
{
1795
1557
        dict_index_t*   index;
1796
1558
        page_cur_t*     page_cursor;
1797
1559
        ulint           err;
1798
 
        buf_block_t*    block;
1799
1560
        page_t*         page;
1800
 
        page_zip_des_t* page_zip;
1801
1561
        rec_t*          rec;
1802
 
        rec_t*          orig_rec;
1803
1562
        ulint           max_size;
1804
1563
        ulint           new_rec_size;
1805
1564
        ulint           old_rec_size;
1807
1566
        dulint          roll_ptr;
1808
1567
        trx_t*          trx;
1809
1568
        mem_heap_t*     heap;
 
1569
        ibool           reorganized     = FALSE;
1810
1570
        ulint           i;
1811
 
        ulint           n_ext;
1812
1571
        ulint*          offsets;
1813
1572
 
1814
 
        block = btr_cur_get_block(cursor);
1815
 
        page = buf_block_get_frame(block);
1816
 
        orig_rec = rec = btr_cur_get_rec(cursor);
 
1573
        page = btr_cur_get_page(cursor);
 
1574
        rec = btr_cur_get_rec(cursor);
1817
1575
        index = cursor->index;
1818
1576
        ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
1819
 
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1820
1577
 
1821
1578
        heap = mem_heap_create(1024);
1822
1579
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
1828
1585
        }
1829
1586
#endif /* UNIV_DEBUG */
1830
1587
 
 
1588
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
 
1589
                                MTR_MEMO_PAGE_X_FIX));
1831
1590
        if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
1832
1591
 
1833
1592
                /* The simplest and the most common case: the update does not
1834
1593
                change the size of any field and none of the updated fields is
1835
 
                externally stored in rec or update, and there is enough space
1836
 
                on the compressed page to log the update. */
1837
 
 
 
1594
                externally stored in rec or update */
1838
1595
                mem_heap_free(heap);
1839
1596
                return(btr_cur_update_in_place(flags, cursor, update,
1840
1597
                                               cmpl_info, thr, mtr));
1841
1598
        }
1842
1599
 
 
1600
        for (i = 0; i < upd_get_n_fields(update); i++) {
 
1601
                if (upd_get_nth_field(update, i)->extern_storage) {
 
1602
 
 
1603
                        /* Externally stored fields are treated in pessimistic
 
1604
                        update */
 
1605
 
 
1606
                        mem_heap_free(heap);
 
1607
                        return(DB_OVERFLOW);
 
1608
                }
 
1609
        }
 
1610
 
1843
1611
        if (rec_offs_any_extern(offsets)) {
1844
 
any_extern:
1845
1612
                /* Externally stored fields are treated in pessimistic
1846
1613
                update */
1847
1614
 
1849
1616
                return(DB_OVERFLOW);
1850
1617
        }
1851
1618
 
1852
 
        for (i = 0; i < upd_get_n_fields(update); i++) {
1853
 
                if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) {
1854
 
 
1855
 
                        goto any_extern;
1856
 
                }
1857
 
        }
1858
 
 
1859
1619
        page_cursor = btr_cur_get_page_cur(cursor);
1860
1620
 
1861
 
        new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
1862
 
                                           &n_ext, heap);
1863
 
        /* We checked above that there are no externally stored fields. */
1864
 
        ut_a(!n_ext);
 
1621
        new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
1865
1622
 
1866
 
        /* The page containing the clustered index record
1867
 
        corresponding to new_entry is latched in mtr.
1868
 
        Thus the following call is safe. */
1869
1623
        row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
1870
 
                                                     FALSE, heap);
 
1624
                                                     FALSE, NULL);
1871
1625
        old_rec_size = rec_offs_size(offsets);
1872
 
        new_rec_size = rec_get_converted_size(index, new_entry, 0);
1873
 
 
1874
 
        page_zip = buf_block_get_page_zip(block);
1875
 
#ifdef UNIV_ZIP_DEBUG
1876
 
        ut_a(!page_zip || page_zip_validate(page_zip, page));
1877
 
#endif /* UNIV_ZIP_DEBUG */
1878
 
 
1879
 
        if (UNIV_LIKELY_NULL(page_zip)
1880
 
            && !btr_cur_update_alloc_zip(page_zip, block, index,
1881
 
                                         new_rec_size, mtr)) {
1882
 
                err = DB_ZIP_OVERFLOW;
1883
 
                goto err_exit;
1884
 
        }
 
1626
        new_rec_size = rec_get_converted_size(index, new_entry);
1885
1627
 
1886
1628
        if (UNIV_UNLIKELY(new_rec_size
1887
1629
                          >= (page_get_free_space_of_empty(page_is_comp(page))
1888
1630
                              / 2))) {
1889
1631
 
1890
 
                err = DB_OVERFLOW;
1891
 
                goto err_exit;
 
1632
                mem_heap_free(heap);
 
1633
 
 
1634
                return(DB_OVERFLOW);
1892
1635
        }
1893
1636
 
 
1637
        max_size = old_rec_size
 
1638
                + page_get_max_insert_size_after_reorganize(page, 1);
 
1639
 
1894
1640
        if (UNIV_UNLIKELY(page_get_data_size(page)
1895
1641
                          - old_rec_size + new_rec_size
1896
1642
                          < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
1897
1643
 
1898
1644
                /* The page would become too empty */
1899
1645
 
1900
 
                err = DB_UNDERFLOW;
1901
 
                goto err_exit;
 
1646
                mem_heap_free(heap);
 
1647
 
 
1648
                return(DB_UNDERFLOW);
1902
1649
        }
1903
1650
 
1904
 
        max_size = old_rec_size
1905
 
                + page_get_max_insert_size_after_reorganize(page, 1);
1906
 
 
1907
1651
        if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
1908
1652
               && (max_size >= new_rec_size))
1909
1653
              || (page_get_n_recs(page) <= 1))) {
1912
1656
                reorganize: for simplicity, we decide what to do assuming a
1913
1657
                reorganization is needed, though it might not be necessary */
1914
1658
 
1915
 
                err = DB_OVERFLOW;
1916
 
                goto err_exit;
 
1659
                mem_heap_free(heap);
 
1660
 
 
1661
                return(DB_OVERFLOW);
1917
1662
        }
1918
1663
 
1919
1664
        /* Do lock checking and undo logging */
1920
1665
        err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr,
1921
1666
                                        &roll_ptr);
1922
1667
        if (err != DB_SUCCESS) {
1923
 
err_exit:
 
1668
 
1924
1669
                mem_heap_free(heap);
 
1670
 
1925
1671
                return(err);
1926
1672
        }
1927
1673
 
1928
1674
        /* Ok, we may do the replacement. Store on the page infimum the
1929
1675
        explicit locks on rec, before deleting rec (see the comment in
1930
 
        btr_cur_pessimistic_update). */
 
1676
        .._pessimistic_update). */
1931
1677
 
1932
 
        lock_rec_store_on_page_infimum(block, rec);
 
1678
        lock_rec_store_on_page_infimum(page, rec);
1933
1679
 
1934
1680
        btr_search_update_hash_on_delete(cursor);
1935
1681
 
1936
 
        /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
1937
 
        invokes rec_offs_make_valid() to point to the copied record that
1938
 
        the fields of new_entry point to.  We have to undo it here. */
1939
 
        ut_ad(rec_offs_validate(NULL, index, offsets));
1940
 
        rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
1941
 
 
1942
1682
        page_cur_delete_rec(page_cursor, index, offsets, mtr);
1943
1683
 
1944
1684
        page_cur_move_to_prev(page_cursor);
1952
1692
                                              trx->id);
1953
1693
        }
1954
1694
 
1955
 
        /* There are no externally stored columns in new_entry */
1956
 
        rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
 
1695
        rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr);
 
1696
 
1957
1697
        ut_a(rec); /* <- We calculated above the insert would fit */
1958
1698
 
1959
 
        if (page_zip && !dict_index_is_clust(index)
1960
 
            && page_is_leaf(page)) {
1961
 
                /* Update the free bits in the insert buffer. */
1962
 
                ibuf_update_free_bits_zip(block, mtr);
 
1699
        if (!rec_get_deleted_flag(rec, page_is_comp(page))) {
 
1700
                /* The new inserted record owns its possible externally
 
1701
                stored fields */
 
1702
 
 
1703
                offsets = rec_get_offsets(rec, index, offsets,
 
1704
                                          ULINT_UNDEFINED, &heap);
 
1705
                btr_cur_unmark_extern_fields(rec, mtr, offsets);
1963
1706
        }
1964
1707
 
1965
1708
        /* Restore the old explicit lock state on the record */
1966
1709
 
1967
 
        lock_rec_restore_from_page_infimum(block, rec, block);
 
1710
        lock_rec_restore_from_page_infimum(rec, page);
1968
1711
 
1969
1712
        page_cur_move_to_next(page_cursor);
1970
1713
 
1983
1726
void
1984
1727
btr_cur_pess_upd_restore_supremum(
1985
1728
/*==============================*/
1986
 
        buf_block_t*    block,  /* in: buffer block of rec */
1987
 
        const rec_t*    rec,    /* in: updated record */
1988
 
        mtr_t*          mtr)    /* in: mtr */
 
1729
        rec_t*  rec,    /* in: updated record */
 
1730
        mtr_t*  mtr)    /* in: mtr */
1989
1731
{
1990
 
        page_t*         page;
1991
 
        buf_block_t*    prev_block;
1992
 
        ulint           space;
1993
 
        ulint           zip_size;
1994
 
        ulint           prev_page_no;
 
1732
        page_t* page;
 
1733
        page_t* prev_page;
 
1734
        ulint   space;
 
1735
        ulint   prev_page_no;
1995
1736
 
1996
 
        page = buf_block_get_frame(block);
 
1737
        page = buf_frame_align(rec);
1997
1738
 
1998
1739
        if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
1999
1740
                /* Updated record is not the first user record on its page */
2001
1742
                return;
2002
1743
        }
2003
1744
 
2004
 
        space = buf_block_get_space(block);
2005
 
        zip_size = buf_block_get_zip_size(block);
 
1745
        space = buf_frame_get_space_id(page);
2006
1746
        prev_page_no = btr_page_get_prev(page, mtr);
2007
1747
 
2008
1748
        ut_ad(prev_page_no != FIL_NULL);
2009
 
        prev_block = buf_page_get_with_no_latch(space, zip_size,
2010
 
                                                prev_page_no, mtr);
 
1749
        prev_page = buf_page_get_with_no_latch(space, prev_page_no, mtr);
2011
1750
#ifdef UNIV_BTR_DEBUG
2012
 
        ut_a(btr_page_get_next(prev_block->frame, mtr)
2013
 
             == page_get_page_no(page));
 
1751
        ut_a(btr_page_get_next(prev_page, mtr)
 
1752
             == buf_frame_get_page_no(page));
2014
1753
#endif /* UNIV_BTR_DEBUG */
2015
1754
 
2016
 
        /* We must already have an x-latch on prev_block! */
2017
 
        ut_ad(mtr_memo_contains(mtr, prev_block, MTR_MEMO_PAGE_X_FIX));
 
1755
        /* We must already have an x-latch to prev_page! */
 
1756
        ut_ad(mtr_memo_contains(mtr, buf_block_align(prev_page),
 
1757
                                MTR_MEMO_PAGE_X_FIX));
2018
1758
 
2019
 
        lock_rec_reset_and_inherit_gap_locks(prev_block, block,
2020
 
                                             PAGE_HEAP_NO_SUPREMUM,
2021
 
                                             page_rec_get_heap_no(rec));
 
1759
        lock_rec_reset_and_inherit_gap_locks(page_get_supremum_rec(prev_page),
 
1760
                                             rec);
2022
1761
}
2023
1762
 
2024
1763
/*****************************************************************
2027
1766
update is made on the leaf level, to avoid deadlocks, mtr must also
2028
1767
own x-latches to brothers of page, if those brothers exist. We assume
2029
1768
here that the ordering fields of the record do not change. */
2030
 
UNIV_INTERN
 
1769
 
2031
1770
ulint
2032
1771
btr_cur_pessimistic_update(
2033
1772
/*=======================*/
2035
1774
        ulint           flags,  /* in: undo logging, locking, and rollback
2036
1775
                                flags */
2037
1776
        btr_cur_t*      cursor, /* in: cursor on the record to update */
2038
 
        mem_heap_t**    heap,   /* in/out: pointer to memory heap, or NULL */
2039
1777
        big_rec_t**     big_rec,/* out: big rec vector whose fields have to
2040
1778
                                be stored externally by the caller, or NULL */
2041
 
        const upd_t*    update, /* in: update vector; this is allowed also
 
1779
        upd_t*          update, /* in: update vector; this is allowed also
2042
1780
                                contain trx id and roll ptr fields, but
2043
1781
                                the values in update vector have no effect */
2044
1782
        ulint           cmpl_info,/* in: compiler info on secondary index
2045
1783
                                updates */
2046
1784
        que_thr_t*      thr,    /* in: query thread */
2047
 
        mtr_t*          mtr)    /* in: mtr; must be committed before
2048
 
                                latching any further pages */
 
1785
        mtr_t*          mtr)    /* in: mtr */
2049
1786
{
2050
1787
        big_rec_t*      big_rec_vec     = NULL;
2051
1788
        big_rec_t*      dummy_big_rec;
2052
1789
        dict_index_t*   index;
2053
 
        buf_block_t*    block;
2054
1790
        page_t*         page;
2055
 
        page_zip_des_t* page_zip;
2056
1791
        rec_t*          rec;
2057
1792
        page_cur_t*     page_cursor;
2058
1793
        dtuple_t*       new_entry;
 
1794
        mem_heap_t*     heap;
2059
1795
        ulint           err;
2060
1796
        ulint           optim_err;
 
1797
        ibool           dummy_reorganized;
2061
1798
        dulint          roll_ptr;
2062
1799
        trx_t*          trx;
2063
1800
        ibool           was_first;
 
1801
        ibool           success;
2064
1802
        ulint           n_extents       = 0;
2065
1803
        ulint           n_reserved;
2066
 
        ulint           n_ext;
 
1804
        ulint*          ext_vect;
 
1805
        ulint           n_ext_vect;
 
1806
        ulint           reserve_flag;
2067
1807
        ulint*          offsets         = NULL;
2068
1808
 
2069
1809
        *big_rec = NULL;
2070
1810
 
2071
 
        block = btr_cur_get_block(cursor);
2072
 
        page = buf_block_get_frame(block);
2073
 
        page_zip = buf_block_get_page_zip(block);
 
1811
        page = btr_cur_get_page(cursor);
2074
1812
        rec = btr_cur_get_rec(cursor);
2075
1813
        index = cursor->index;
2076
1814
 
2077
1815
        ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2078
1816
                                MTR_MEMO_X_LOCK));
2079
 
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2080
 
#ifdef UNIV_ZIP_DEBUG
2081
 
        ut_a(!page_zip || page_zip_validate(page_zip, page));
2082
 
#endif /* UNIV_ZIP_DEBUG */
 
1817
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
 
1818
                                MTR_MEMO_PAGE_X_FIX));
2083
1819
 
2084
1820
        optim_err = btr_cur_optimistic_update(flags, cursor, update,
2085
1821
                                              cmpl_info, thr, mtr);
2086
1822
 
2087
 
        switch (optim_err) {
2088
 
        case DB_UNDERFLOW:
2089
 
        case DB_OVERFLOW:
2090
 
        case DB_ZIP_OVERFLOW:
2091
 
                break;
2092
 
        default:
 
1823
        if (optim_err != DB_UNDERFLOW && optim_err != DB_OVERFLOW) {
 
1824
 
2093
1825
                return(optim_err);
2094
1826
        }
2095
1827
 
2102
1834
        }
2103
1835
 
2104
1836
        if (optim_err == DB_OVERFLOW) {
2105
 
                ulint   reserve_flag;
2106
 
 
2107
1837
                /* First reserve enough free space for the file segments
2108
1838
                of the index tree, so that the update will not fail because
2109
1839
                of lack of space */
2116
1846
                        reserve_flag = FSP_NORMAL;
2117
1847
                }
2118
1848
 
2119
 
                if (!fsp_reserve_free_extents(&n_reserved, index->space,
2120
 
                                              n_extents, reserve_flag, mtr)) {
2121
 
                        return(DB_OUT_OF_FILE_SPACE);
 
1849
                success = fsp_reserve_free_extents(&n_reserved, index->space,
 
1850
                                                   n_extents,
 
1851
                                                   reserve_flag, mtr);
 
1852
                if (!success) {
 
1853
                        err = DB_OUT_OF_FILE_SPACE;
 
1854
 
 
1855
                        return(err);
2122
1856
                }
2123
1857
        }
2124
1858
 
2125
 
        if (!*heap) {
2126
 
                *heap = mem_heap_create(1024);
2127
 
        }
2128
 
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
 
1859
        heap = mem_heap_create(1024);
 
1860
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
2129
1861
 
2130
1862
        trx = thr_get_trx(thr);
2131
1863
 
2132
 
        new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
2133
 
                                           &n_ext, *heap);
2134
 
        /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
2135
 
        invokes rec_offs_make_valid() to point to the copied record that
2136
 
        the fields of new_entry point to.  We have to undo it here. */
2137
 
        ut_ad(rec_offs_validate(NULL, index, offsets));
2138
 
        rec_offs_make_valid(rec, index, offsets);
 
1864
        new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);
2139
1865
 
2140
 
        /* The page containing the clustered index record
2141
 
        corresponding to new_entry is latched in mtr.  If the
2142
 
        clustered index record is delete-marked, then its externally
2143
 
        stored fields cannot have been purged yet, because then the
2144
 
        purge would also have removed the clustered index record
2145
 
        itself.  Thus the following call is safe. */
2146
1866
        row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
2147
 
                                                     FALSE, *heap);
 
1867
                                                     FALSE, heap);
2148
1868
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
2149
1869
                row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
2150
1870
                                              roll_ptr);
2152
1872
                                              trx->id);
2153
1873
        }
2154
1874
 
2155
 
        if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
 
1875
        if (flags & BTR_NO_UNDO_LOG_FLAG) {
2156
1876
                /* We are in a transaction rollback undoing a row
2157
1877
                update: we must free possible externally stored fields
2158
1878
                which got new values in the update, if they are not
2160
1880
                updated the primary key to another value, and then
2161
1881
                update it back again. */
2162
1882
 
2163
 
                ut_ad(big_rec_vec == NULL);
 
1883
                ut_a(big_rec_vec == NULL);
2164
1884
 
2165
 
                btr_rec_free_updated_extern_fields(index, rec, page_zip,
2166
 
                                                   offsets, update, mtr);
 
1885
                btr_rec_free_updated_extern_fields(index, rec, offsets,
 
1886
                                                   update, TRUE, mtr);
2167
1887
        }
2168
1888
 
2169
1889
        /* We have to set appropriate extern storage bits in the new
2170
1890
        record to be inserted: we have to remember which fields were such */
2171
1891
 
 
1892
        ext_vect = mem_heap_alloc(heap, sizeof(ulint)
 
1893
                                  * dict_index_get_n_fields(index));
2172
1894
        ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
2173
 
        offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
2174
 
        n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
2175
 
 
2176
 
        if (page_zip_rec_needs_ext(rec_get_converted_size(index, new_entry,
2177
 
                                                          n_ext),
2178
 
                                   page_is_comp(page), page_zip
2179
 
                                   ? page_zip_get_size(page_zip) : 0)) {
2180
 
                big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
2181
 
                if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
 
1895
        offsets = rec_get_offsets(rec, index, offsets,
 
1896
                                  ULINT_UNDEFINED, &heap);
 
1897
        n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update);
 
1898
 
 
1899
        if (UNIV_UNLIKELY(rec_get_converted_size(index, new_entry)
 
1900
                          >= ut_min(page_get_free_space_of_empty(
 
1901
                                            page_is_comp(page)) / 2,
 
1902
                                    REC_MAX_DATA_SIZE))) {
 
1903
 
 
1904
                big_rec_vec = dtuple_convert_big_rec(index, new_entry,
 
1905
                                                     ext_vect, n_ext_vect);
 
1906
                if (big_rec_vec == NULL) {
2182
1907
 
2183
1908
                        err = DB_TOO_BIG_RECORD;
2184
1909
                        goto return_after_reservations;
2185
1910
                }
2186
1911
        }
2187
1912
 
 
1913
        page_cursor = btr_cur_get_page_cur(cursor);
 
1914
 
2188
1915
        /* Store state of explicit locks on rec on the page infimum record,
2189
1916
        before deleting rec. The page infimum acts as a dummy carrier of the
2190
1917
        locks, taking care also of lock releases, before we can move the locks
2194
1921
        delete the lock structs set on the root page even if the root
2195
1922
        page carries just node pointers. */
2196
1923
 
2197
 
        lock_rec_store_on_page_infimum(block, rec);
 
1924
        lock_rec_store_on_page_infimum(buf_frame_align(rec), rec);
2198
1925
 
2199
1926
        btr_search_update_hash_on_delete(cursor);
2200
1927
 
2201
 
#ifdef UNIV_ZIP_DEBUG
2202
 
        ut_a(!page_zip || page_zip_validate(page_zip, page));
2203
 
#endif /* UNIV_ZIP_DEBUG */
2204
 
        page_cursor = btr_cur_get_page_cur(cursor);
2205
 
 
2206
1928
        page_cur_delete_rec(page_cursor, index, offsets, mtr);
2207
1929
 
2208
1930
        page_cur_move_to_prev(page_cursor);
2209
1931
 
2210
 
        rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
 
1932
        rec = btr_cur_insert_if_possible(cursor, new_entry,
 
1933
                                         &dummy_reorganized, mtr);
 
1934
        ut_a(rec || optim_err != DB_UNDERFLOW);
2211
1935
 
2212
1936
        if (rec) {
2213
 
                lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
2214
 
                                                   rec, block);
 
1937
                lock_rec_restore_from_page_infimum(rec, page);
 
1938
                rec_set_field_extern_bits(rec, index,
 
1939
                                          ext_vect, n_ext_vect, mtr);
2215
1940
 
2216
1941
                offsets = rec_get_offsets(rec, index, offsets,
2217
 
                                          ULINT_UNDEFINED, heap);
 
1942
                                          ULINT_UNDEFINED, &heap);
2218
1943
 
2219
1944
                if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
2220
1945
                        /* The new inserted record owns its possible externally
2221
1946
                        stored fields */
2222
 
                        btr_cur_unmark_extern_fields(page_zip,
2223
 
                                                     rec, index, offsets, mtr);
 
1947
                        btr_cur_unmark_extern_fields(rec, mtr, offsets);
2224
1948
                }
2225
1949
 
2226
1950
                btr_cur_compress_if_useful(cursor, mtr);
2227
1951
 
2228
 
                if (page_zip && !dict_index_is_clust(index)
2229
 
                    && page_is_leaf(page)) {
2230
 
                        /* Update the free bits in the insert buffer. */
2231
 
                        ibuf_update_free_bits_zip(block, mtr);
2232
 
                }
2233
 
 
2234
1952
                err = DB_SUCCESS;
2235
1953
                goto return_after_reservations;
 
1954
        }
 
1955
 
 
1956
        if (page_cur_is_before_first(page_cursor)) {
 
1957
                /* The record to be updated was positioned as the first user
 
1958
                record on its page */
 
1959
 
 
1960
                was_first = TRUE;
2236
1961
        } else {
2237
 
                ut_a(optim_err != DB_UNDERFLOW);
2238
 
 
2239
 
                /* Out of space: reset the free bits. */
2240
 
                if (!dict_index_is_clust(index)
2241
 
                    && page_is_leaf(page)) {
2242
 
                        ibuf_reset_free_bits(block);
2243
 
                }
 
1962
                was_first = FALSE;
2244
1963
        }
2245
1964
 
2246
 
        /* Was the record to be updated positioned as the first user
2247
 
        record on its page? */
2248
 
        was_first = page_cur_is_before_first(page_cursor);
2249
 
 
2250
1965
        /* The first parameter means that no lock checking and undo logging
2251
1966
        is made in the insert */
2252
1967
 
2254
1969
                                         | BTR_NO_LOCKING_FLAG
2255
1970
                                         | BTR_KEEP_SYS_FLAG,
2256
1971
                                         cursor, new_entry, &rec,
2257
 
                                         &dummy_big_rec, n_ext, NULL, mtr);
 
1972
                                         &dummy_big_rec, NULL, mtr);
2258
1973
        ut_a(rec);
2259
1974
        ut_a(err == DB_SUCCESS);
2260
1975
        ut_a(dummy_big_rec == NULL);
2261
1976
 
 
1977
        rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr);
 
1978
        offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
 
1979
 
2262
1980
        if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
2263
1981
                /* The new inserted record owns its possible externally
2264
1982
                stored fields */
2265
 
                buf_block_t*    rec_block = btr_cur_get_block(cursor);
2266
 
 
2267
 
#ifdef UNIV_ZIP_DEBUG
2268
 
                ut_a(!page_zip || page_zip_validate(page_zip, page));
2269
 
                page = buf_block_get_frame(rec_block);
2270
 
#endif /* UNIV_ZIP_DEBUG */
2271
 
                page_zip = buf_block_get_page_zip(rec_block);
2272
 
 
2273
 
                offsets = rec_get_offsets(rec, index, offsets,
2274
 
                                          ULINT_UNDEFINED, heap);
2275
 
                btr_cur_unmark_extern_fields(page_zip,
2276
 
                                             rec, index, offsets, mtr);
 
1983
 
 
1984
                btr_cur_unmark_extern_fields(rec, mtr, offsets);
2277
1985
        }
2278
1986
 
2279
 
        lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
2280
 
                                           rec, block);
 
1987
        lock_rec_restore_from_page_infimum(rec, page);
2281
1988
 
2282
1989
        /* If necessary, restore also the correct lock state for a new,
2283
1990
        preceding supremum record created in a page split. While the old
2285
1992
        from a wrong record. */
2286
1993
 
2287
1994
        if (!was_first) {
2288
 
                btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
2289
 
                                                  rec, mtr);
 
1995
                btr_cur_pess_upd_restore_supremum(rec, mtr);
2290
1996
        }
2291
1997
 
2292
1998
return_after_reservations:
2293
 
#ifdef UNIV_ZIP_DEBUG
2294
 
        ut_a(!page_zip || page_zip_validate(page_zip, page));
2295
 
#endif /* UNIV_ZIP_DEBUG */
 
1999
        mem_heap_free(heap);
2296
2000
 
2297
2001
        if (n_extents > 0) {
2298
2002
                fil_space_release_free_extents(index->space, n_reserved);
2354
2058
/********************************************************************
2355
2059
Parses the redo log record for delete marking or unmarking of a clustered
2356
2060
index record. */
2357
 
UNIV_INTERN
 
2061
 
2358
2062
byte*
2359
2063
btr_cur_parse_del_mark_set_clust_rec(
2360
2064
/*=================================*/
2361
2065
                                /* out: end of log record or NULL */
2362
2066
        byte*           ptr,    /* in: buffer */
2363
2067
        byte*           end_ptr,/* in: buffer end */
2364
 
        page_t*         page,   /* in/out: page or NULL */
2365
 
        page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */
2366
 
        dict_index_t*   index)  /* in: index corresponding to page */
 
2068
        dict_index_t*   index,  /* in: index corresponding to page */
 
2069
        page_t*         page)   /* in: page or NULL */
2367
2070
{
2368
2071
        ulint   flags;
2369
2072
        ulint   val;
2406
2109
        if (page) {
2407
2110
                rec = page + offset;
2408
2111
 
2409
 
                /* We do not need to reserve btr_search_latch, as the page
2410
 
                is only being recovered, and there cannot be a hash index to
2411
 
                it. */
2412
 
 
2413
 
                btr_rec_set_deleted_flag(rec, page_zip, val);
2414
 
 
2415
2112
                if (!(flags & BTR_KEEP_SYS_FLAG)) {
2416
2113
                        mem_heap_t*     heap            = NULL;
2417
2114
                        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
2418
 
                        rec_offs_init(offsets_);
 
2115
                        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
2419
2116
 
2420
2117
                        row_upd_rec_sys_fields_in_recovery(
2421
 
                                rec, page_zip,
2422
 
                                rec_get_offsets(rec, index, offsets_,
2423
 
                                                ULINT_UNDEFINED, &heap),
 
2118
                                rec, rec_get_offsets(rec, index, offsets_,
 
2119
                                                     ULINT_UNDEFINED, &heap),
2424
2120
                                pos, trx_id, roll_ptr);
2425
2121
                        if (UNIV_LIKELY_NULL(heap)) {
2426
2122
                                mem_heap_free(heap);
2427
2123
                        }
2428
2124
                }
 
2125
 
 
2126
                /* We do not need to reserve btr_search_latch, as the page
 
2127
                is only being recovered, and there cannot be a hash index to
 
2128
                it. */
 
2129
 
 
2130
                rec_set_deleted_flag(rec, page_is_comp(page), val);
2429
2131
        }
2430
2132
 
2431
2133
        return(ptr);
2436
2138
undo log on this delete marking. Writes in the trx id field the id
2437
2139
of the deleting transaction, and in the roll ptr field pointer to the
2438
2140
undo log record created. */
2439
 
UNIV_INTERN
 
2141
 
2440
2142
ulint
2441
2143
btr_cur_del_mark_set_clust_rec(
2442
2144
/*===========================*/
2453
2155
        dulint          roll_ptr;
2454
2156
        ulint           err;
2455
2157
        rec_t*          rec;
2456
 
        page_zip_des_t* page_zip;
2457
2158
        trx_t*          trx;
2458
2159
        mem_heap_t*     heap            = NULL;
2459
2160
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
2460
2161
        ulint*          offsets         = offsets_;
2461
 
        rec_offs_init(offsets_);
 
2162
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
2462
2163
 
2463
2164
        rec = btr_cur_get_rec(cursor);
2464
2165
        index = cursor->index;
2472
2173
        }
2473
2174
#endif /* UNIV_DEBUG */
2474
2175
 
2475
 
        ut_ad(dict_index_is_clust(index));
 
2176
        ut_ad(index->type & DICT_CLUSTERED);
2476
2177
        ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
2477
2178
 
2478
2179
        err = lock_clust_rec_modify_check_and_lock(flags,
2479
 
                                                   btr_cur_get_block(cursor),
2480
2180
                                                   rec, index, offsets, thr);
2481
2181
 
2482
2182
        if (err != DB_SUCCESS) {
2483
2183
 
2484
 
                goto func_exit;
 
2184
                if (UNIV_LIKELY_NULL(heap)) {
 
2185
                        mem_heap_free(heap);
 
2186
                }
 
2187
                return(err);
2485
2188
        }
2486
2189
 
2487
2190
        err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
2489
2192
                                            &roll_ptr);
2490
2193
        if (err != DB_SUCCESS) {
2491
2194
 
2492
 
                goto func_exit;
 
2195
                if (UNIV_LIKELY_NULL(heap)) {
 
2196
                        mem_heap_free(heap);
 
2197
                }
 
2198
                return(err);
2493
2199
        }
2494
2200
 
2495
 
        block = btr_cur_get_block(cursor);
 
2201
        block = buf_block_align(rec);
2496
2202
 
2497
2203
        if (block->is_hashed) {
2498
2204
                rw_lock_x_lock(&btr_search_latch);
2499
2205
        }
2500
2206
 
2501
 
        page_zip = buf_block_get_page_zip(block);
2502
 
 
2503
 
        btr_rec_set_deleted_flag(rec, page_zip, val);
 
2207
        rec_set_deleted_flag(rec, rec_offs_comp(offsets), val);
2504
2208
 
2505
2209
        trx = thr_get_trx(thr);
2506
2210
 
2507
2211
        if (!(flags & BTR_KEEP_SYS_FLAG)) {
2508
 
                row_upd_rec_sys_fields(rec, page_zip,
2509
 
                                       index, offsets, trx, roll_ptr);
 
2212
                row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr);
2510
2213
        }
2511
2214
 
2512
2215
        if (block->is_hashed) {
2515
2218
 
2516
2219
        btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
2517
2220
                                           roll_ptr, mtr);
2518
 
 
2519
 
func_exit:
2520
2221
        if (UNIV_LIKELY_NULL(heap)) {
2521
2222
                mem_heap_free(heap);
2522
2223
        }
2523
 
        return(err);
 
2224
        return(DB_SUCCESS);
2524
2225
}
2525
2226
 
2526
2227
/********************************************************************
2559
2260
/********************************************************************
2560
2261
Parses the redo log record for delete marking or unmarking of a secondary
2561
2262
index record. */
2562
 
UNIV_INTERN
 
2263
 
2563
2264
byte*
2564
2265
btr_cur_parse_del_mark_set_sec_rec(
2565
2266
/*===============================*/
2566
2267
                                /* out: end of log record or NULL */
2567
2268
        byte*           ptr,    /* in: buffer */
2568
2269
        byte*           end_ptr,/* in: buffer end */
2569
 
        page_t*         page,   /* in/out: page or NULL */
2570
 
        page_zip_des_t* page_zip)/* in/out: compressed page, or NULL */
 
2270
        page_t*         page)   /* in: page or NULL */
2571
2271
{
2572
2272
        ulint   val;
2573
2273
        ulint   offset;
2593
2293
                is only being recovered, and there cannot be a hash index to
2594
2294
                it. */
2595
2295
 
2596
 
                btr_rec_set_deleted_flag(rec, page_zip, val);
 
2296
                rec_set_deleted_flag(rec, page_is_comp(page), val);
2597
2297
        }
2598
2298
 
2599
2299
        return(ptr);
2601
2301
 
2602
2302
/***************************************************************
2603
2303
Sets a secondary index record delete mark to TRUE or FALSE. */
2604
 
UNIV_INTERN
 
2304
 
2605
2305
ulint
2606
2306
btr_cur_del_mark_set_sec_rec(
2607
2307
/*=========================*/
2617
2317
        rec_t*          rec;
2618
2318
        ulint           err;
2619
2319
 
2620
 
        block = btr_cur_get_block(cursor);
2621
2320
        rec = btr_cur_get_rec(cursor);
2622
2321
 
2623
2322
#ifdef UNIV_DEBUG
2628
2327
        }
2629
2328
#endif /* UNIV_DEBUG */
2630
2329
 
2631
 
        err = lock_sec_rec_modify_check_and_lock(flags,
2632
 
                                                 btr_cur_get_block(cursor),
2633
 
                                                 rec, cursor->index, thr);
 
2330
        err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index,
 
2331
                                                 thr);
2634
2332
        if (err != DB_SUCCESS) {
2635
2333
 
2636
2334
                return(err);
2637
2335
        }
2638
2336
 
2639
 
        ut_ad(!!page_rec_is_comp(rec)
 
2337
        block = buf_block_align(rec);
 
2338
        ut_ad(!!page_is_comp(buf_block_get_frame(block))
2640
2339
              == dict_table_is_comp(cursor->index->table));
2641
2340
 
2642
2341
        if (block->is_hashed) {
2643
2342
                rw_lock_x_lock(&btr_search_latch);
2644
2343
        }
2645
2344
 
2646
 
        btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
 
2345
        rec_set_deleted_flag(rec, page_is_comp(buf_block_get_frame(block)),
 
2346
                             val);
2647
2347
 
2648
2348
        if (block->is_hashed) {
2649
2349
                rw_lock_x_unlock(&btr_search_latch);
2657
2357
/***************************************************************
2658
2358
Sets a secondary index record delete mark to FALSE. This function is only
2659
2359
used by the insert buffer insert merge mechanism. */
2660
 
UNIV_INTERN
 
2360
 
2661
2361
void
2662
2362
btr_cur_del_unmark_for_ibuf(
2663
2363
/*========================*/
2664
 
        rec_t*          rec,            /* in/out: record to delete unmark */
2665
 
        page_zip_des_t* page_zip,       /* in/out: compressed page
2666
 
                                        corresponding to rec, or NULL
2667
 
                                        when the tablespace is
2668
 
                                        uncompressed */
2669
 
        mtr_t*          mtr)            /* in: mtr */
 
2364
        rec_t*          rec,    /* in: record to delete unmark */
 
2365
        mtr_t*          mtr)    /* in: mtr */
2670
2366
{
2671
2367
        /* We do not need to reserve btr_search_latch, as the page has just
2672
2368
        been read to the buffer pool and there cannot be a hash index to it. */
2673
2369
 
2674
 
        btr_rec_set_deleted_flag(rec, page_zip, FALSE);
 
2370
        rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE);
2675
2371
 
2676
2372
        btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
2677
2373
}
2679
2375
/*==================== B-TREE RECORD REMOVE =========================*/
2680
2376
 
2681
2377
/*****************************************************************
 
2378
Tries to compress a page of the tree on the leaf level. It is assumed
 
2379
that mtr holds an x-latch on the tree and on the cursor page. To avoid
 
2380
deadlocks, mtr must also own x-latches to brothers of page, if those
 
2381
brothers exist. NOTE: it is assumed that the caller has reserved enough
 
2382
free extents so that the compression will always succeed if done! */
 
2383
 
 
2384
void
 
2385
btr_cur_compress(
 
2386
/*=============*/
 
2387
        btr_cur_t*      cursor, /* in: cursor on the page to compress;
 
2388
                                cursor does not stay valid */
 
2389
        mtr_t*          mtr)    /* in: mtr */
 
2390
{
 
2391
        ut_ad(mtr_memo_contains(mtr,
 
2392
                                dict_index_get_lock(btr_cur_get_index(cursor)),
 
2393
                                MTR_MEMO_X_LOCK));
 
2394
        ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
 
2395
                                MTR_MEMO_PAGE_X_FIX));
 
2396
        ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0);
 
2397
 
 
2398
        btr_compress(cursor, mtr);
 
2399
}
 
2400
 
 
2401
/*****************************************************************
2682
2402
Tries to compress a page of the tree if it seems useful. It is assumed
2683
2403
that mtr holds an x-latch on the tree and on the cursor page. To avoid
2684
2404
deadlocks, mtr must also own x-latches to brothers of page, if those
2685
2405
brothers exist. NOTE: it is assumed that the caller has reserved enough
2686
2406
free extents so that the compression will always succeed if done! */
2687
 
UNIV_INTERN
 
2407
 
2688
2408
ibool
2689
2409
btr_cur_compress_if_useful(
2690
2410
/*=======================*/
2697
2417
        ut_ad(mtr_memo_contains(mtr,
2698
2418
                                dict_index_get_lock(btr_cur_get_index(cursor)),
2699
2419
                                MTR_MEMO_X_LOCK));
2700
 
        ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 
2420
        ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
2701
2421
                                MTR_MEMO_PAGE_X_FIX));
2702
2422
 
2703
 
        return(btr_cur_compress_recommendation(cursor, mtr)
2704
 
               && btr_compress(cursor, mtr));
 
2423
        if (btr_cur_compress_recommendation(cursor, mtr)) {
 
2424
 
 
2425
                btr_compress(cursor, mtr);
 
2426
 
 
2427
                return(TRUE);
 
2428
        }
 
2429
 
 
2430
        return(FALSE);
2705
2431
}
2706
2432
 
2707
2433
/***********************************************************
2708
2434
Removes the record on which the tree cursor is positioned on a leaf page.
2709
2435
It is assumed that the mtr has an x-latch on the page where the cursor is
2710
2436
positioned, but no latch on the whole tree. */
2711
 
UNIV_INTERN
 
2437
 
2712
2438
ibool
2713
2439
btr_cur_optimistic_delete(
2714
2440
/*======================*/
2720
2446
                                successor of the deleted record */
2721
2447
        mtr_t*          mtr)    /* in: mtr */
2722
2448
{
2723
 
        buf_block_t*    block;
 
2449
        page_t*         page;
 
2450
        ulint           max_ins_size;
2724
2451
        rec_t*          rec;
2725
2452
        mem_heap_t*     heap            = NULL;
2726
2453
        ulint           offsets_[REC_OFFS_NORMAL_SIZE];
2727
2454
        ulint*          offsets         = offsets_;
2728
2455
        ibool           no_compress_needed;
2729
 
        rec_offs_init(offsets_);
 
2456
        *offsets_ = (sizeof offsets_) / sizeof *offsets_;
2730
2457
 
2731
 
        ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
 
2458
        ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)),
2732
2459
                                MTR_MEMO_PAGE_X_FIX));
2733
2460
        /* This is intended only for leaf page deletions */
2734
2461
 
2735
 
        block = btr_cur_get_block(cursor);
 
2462
        page = btr_cur_get_page(cursor);
2736
2463
 
2737
 
        ut_ad(page_is_leaf(buf_block_get_frame(block)));
 
2464
        ut_ad(btr_page_get_level(page, mtr) == 0);
2738
2465
 
2739
2466
        rec = btr_cur_get_rec(cursor);
2740
2467
        offsets = rec_get_offsets(rec, cursor->index, offsets,
2746
2473
 
2747
2474
        if (no_compress_needed) {
2748
2475
 
2749
 
                page_t*         page    = buf_block_get_frame(block);
2750
 
                page_zip_des_t* page_zip= buf_block_get_page_zip(block);
2751
 
                ulint           max_ins = 0;
2752
 
 
2753
 
                lock_update_delete(block, rec);
 
2476
                lock_update_delete(rec);
2754
2477
 
2755
2478
                btr_search_update_hash_on_delete(cursor);
2756
2479
 
2757
 
                if (!page_zip) {
2758
 
                        max_ins = page_get_max_insert_size_after_reorganize(
2759
 
                                page, 1);
2760
 
                }
2761
 
#ifdef UNIV_ZIP_DEBUG
2762
 
                ut_a(!page_zip || page_zip_validate(page_zip, page));
2763
 
#endif /* UNIV_ZIP_DEBUG */
 
2480
                max_ins_size = page_get_max_insert_size_after_reorganize(
 
2481
                        page, 1);
2764
2482
                page_cur_delete_rec(btr_cur_get_page_cur(cursor),
2765
2483
                                    cursor->index, offsets, mtr);
2766
 
#ifdef UNIV_ZIP_DEBUG
2767
 
                ut_a(!page_zip || page_zip_validate(page_zip, page));
2768
 
#endif /* UNIV_ZIP_DEBUG */
2769
2484
 
2770
 
                if (dict_index_is_clust(cursor->index)
2771
 
                    || !page_is_leaf(page)) {
2772
 
                        /* The insert buffer does not handle
2773
 
                        inserts to clustered indexes or to non-leaf
2774
 
                        pages of secondary index B-trees. */
2775
 
                } else if (page_zip) {
2776
 
                        ibuf_update_free_bits_zip(block, mtr);
2777
 
                } else {
2778
 
                        ibuf_update_free_bits_low(block, max_ins, mtr);
2779
 
                }
 
2485
                ibuf_update_free_bits_low(cursor->index, page, max_ins_size,
 
2486
                                          mtr);
2780
2487
        }
2781
2488
 
2782
2489
        if (UNIV_LIKELY_NULL(heap)) {
2793
2500
an x-latch on the tree and on the cursor page. To avoid deadlocks,
2794
2501
mtr must also own x-latches to brothers of page, if those brothers
2795
2502
exist. */
2796
 
UNIV_INTERN
 
2503
 
2797
2504
ibool
2798
2505
btr_cur_pessimistic_delete(
2799
2506
/*=======================*/
2814
2521
        ibool           in_rollback,/* in: TRUE if called in rollback */
2815
2522
        mtr_t*          mtr)    /* in: mtr */
2816
2523
{
2817
 
        buf_block_t*    block;
2818
2524
        page_t*         page;
2819
 
        page_zip_des_t* page_zip;
2820
2525
        dict_index_t*   index;
2821
2526
        rec_t*          rec;
2822
2527
        dtuple_t*       node_ptr;
2828
2533
        mem_heap_t*     heap;
2829
2534
        ulint*          offsets;
2830
2535
 
2831
 
        block = btr_cur_get_block(cursor);
2832
 
        page = buf_block_get_frame(block);
 
2536
        page = btr_cur_get_page(cursor);
2833
2537
        index = btr_cur_get_index(cursor);
2834
2538
 
2835
2539
        ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2836
2540
                                MTR_MEMO_X_LOCK));
2837
 
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
2541
        ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
 
2542
                                MTR_MEMO_PAGE_X_FIX));
2838
2543
        if (!has_reserved_extents) {
2839
2544
                /* First reserve enough free space for the file segments
2840
2545
                of the index tree, so that the node pointer updates will
2855
2560
 
2856
2561
        heap = mem_heap_create(1024);
2857
2562
        rec = btr_cur_get_rec(cursor);
2858
 
        page_zip = buf_block_get_page_zip(block);
2859
 
#ifdef UNIV_ZIP_DEBUG
2860
 
        ut_a(!page_zip || page_zip_validate(page_zip, page));
2861
 
#endif /* UNIV_ZIP_DEBUG */
2862
2563
 
2863
2564
        offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
2864
2565
 
2865
 
        if (rec_offs_any_extern(offsets)) {
 
2566
        /* Free externally stored fields if the record is neither
 
2567
        a node pointer nor in two-byte format.
 
2568
        This avoids an unnecessary loop. */
 
2569
        if (page_is_comp(page)
 
2570
            ? !rec_get_node_ptr_flag(rec)
 
2571
            : !rec_get_1byte_offs_flag(rec)) {
2866
2572
                btr_rec_free_externally_stored_fields(index,
2867
 
                                                      rec, offsets, page_zip,
 
2573
                                                      rec, offsets,
2868
2574
                                                      in_rollback, mtr);
2869
 
#ifdef UNIV_ZIP_DEBUG
2870
 
                ut_a(!page_zip || page_zip_validate(page_zip, page));
2871
 
#endif /* UNIV_ZIP_DEBUG */
2872
2575
        }
2873
2576
 
2874
2577
        if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
2875
2578
            && UNIV_UNLIKELY(dict_index_get_page(btr_cur_get_index(cursor))
2876
 
                             != buf_block_get_page_no(block))) {
 
2579
                             != buf_frame_get_page_no(page))) {
2877
2580
 
2878
2581
                /* If there is only one record, drop the whole page in
2879
2582
                btr_discard_page, if this is not the root page */
2886
2589
                goto return_after_reservations;
2887
2590
        }
2888
2591
 
2889
 
        lock_update_delete(block, rec);
 
2592
        lock_update_delete(rec);
2890
2593
        level = btr_page_get_level(page, mtr);
2891
2594
 
2892
2595
        if (level > 0
2901
2604
                        non-leaf level, we must mark the new leftmost node
2902
2605
                        pointer as the predefined minimum record */
2903
2606
 
2904
 
                        /* This will make page_zip_validate() fail until
2905
 
                        page_cur_delete_rec() completes.  This is harmless,
2906
 
                        because everything will take place within a single
2907
 
                        mini-transaction and because writing to the redo log
2908
 
                        is an atomic operation (performed by mtr_commit()). */
2909
 
                        btr_set_min_rec_mark(next_rec, mtr);
 
2607
                        btr_set_min_rec_mark(next_rec, page_is_comp(page),
 
2608
                                             mtr);
2910
2609
                } else {
2911
2610
                        /* Otherwise, if we delete the leftmost node pointer
2912
2611
                        on a page, we have to change the father node pointer
2913
2612
                        so that it is equal to the new leftmost node pointer
2914
2613
                        on the page */
2915
2614
 
2916
 
                        btr_node_ptr_delete(index, block, mtr);
 
2615
                        btr_node_ptr_delete(index, page, mtr);
2917
2616
 
2918
2617
                        node_ptr = dict_index_build_node_ptr(
2919
 
                                index, next_rec, buf_block_get_page_no(block),
 
2618
                                index, next_rec, buf_frame_get_page_no(page),
2920
2619
                                heap, level);
2921
2620
 
2922
2621
                        btr_insert_on_non_leaf_level(index,
2927
2626
        btr_search_update_hash_on_delete(cursor);
2928
2627
 
2929
2628
        page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
2930
 
#ifdef UNIV_ZIP_DEBUG
2931
 
        ut_a(!page_zip || page_zip_validate(page_zip, page));
2932
 
#endif /* UNIV_ZIP_DEBUG */
2933
2629
 
2934
 
        ut_ad(btr_check_node_ptr(index, block, mtr));
 
2630
        ut_ad(btr_check_node_ptr(index, page, mtr));
2935
2631
 
2936
2632
        *err = DB_SUCCESS;
2937
2633
 
2986
2682
        slot = cursor->path_arr + (root_height - height);
2987
2683
 
2988
2684
        slot->nth_rec = page_rec_get_n_recs_before(rec);
2989
 
        slot->n_recs = page_get_n_recs(page_align(rec));
 
2685
        slot->n_recs = page_get_n_recs(buf_frame_align(rec));
2990
2686
}
2991
2687
 
2992
2688
/***********************************************************************
2993
2689
Estimates the number of rows in a given index range. */
2994
 
UNIV_INTERN
2995
 
ib_int64_t
 
2690
 
 
2691
ib_longlong
2996
2692
btr_estimate_n_rows_in_range(
2997
2693
/*=========================*/
2998
2694
                                /* out: estimated number of rows */
2999
2695
        dict_index_t*   index,  /* in: index */
3000
 
        const dtuple_t* tuple1, /* in: range start, may also be empty tuple */
 
2696
        dtuple_t*       tuple1, /* in: range start, may also be empty tuple */
3001
2697
        ulint           mode1,  /* in: search mode for range start */
3002
 
        const dtuple_t* tuple2, /* in: range end, may also be empty tuple */
 
2698
        dtuple_t*       tuple2, /* in: range end, may also be empty tuple */
3003
2699
        ulint           mode2)  /* in: search mode for range end */
3004
2700
{
3005
2701
        btr_path_t      path1[BTR_PATH_ARRAY_N_SLOTS];
3010
2706
        ibool           diverged;
3011
2707
        ibool           diverged_lot;
3012
2708
        ulint           divergence_level;
3013
 
        ib_int64_t      n_rows;
 
2709
        ib_longlong     n_rows;
3014
2710
        ulint           i;
3015
2711
        mtr_t           mtr;
3016
2712
 
3141
2837
Estimates the number of different key values in a given index, for
3142
2838
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
3143
2839
The estimates are stored in the array index->stat_n_diff_key_vals. */
3144
 
UNIV_INTERN
 
2840
 
3145
2841
void
3146
2842
btr_estimate_number_of_different_key_vals(
3147
2843
/*======================================*/
3153
2849
        ulint           n_cols;
3154
2850
        ulint           matched_fields;
3155
2851
        ulint           matched_bytes;
3156
 
        ib_int64_t*     n_diff;
 
2852
        ib_longlong*    n_diff;
3157
2853
        ulint           not_empty_flag  = 0;
3158
2854
        ulint           total_external_size = 0;
3159
2855
        ulint           i;
3165
2861
        ulint           offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
3166
2862
        ulint*          offsets_rec     = offsets_rec_;
3167
2863
        ulint*          offsets_next_rec= offsets_next_rec_;
3168
 
        rec_offs_init(offsets_rec_);
3169
 
        rec_offs_init(offsets_next_rec_);
 
2864
        *offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_;
 
2865
        *offsets_next_rec_
 
2866
                = (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_;
3170
2867
 
3171
2868
        n_cols = dict_index_get_n_unique(index);
3172
2869
 
3173
 
        n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
 
2870
        n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
 
2871
 
 
2872
        memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong));
3174
2873
 
3175
2874
        /* We sample some pages in the index to get an estimate */
3176
2875
 
3273
2972
        for (j = 0; j <= n_cols; j++) {
3274
2973
                index->stat_n_diff_key_vals[j]
3275
2974
                        = ((n_diff[j]
3276
 
                            * (ib_int64_t)index->stat_n_leaf_pages
 
2975
                            * (ib_longlong)index->stat_n_leaf_pages
3277
2976
                            + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
3278
2977
                            + total_external_size
3279
2978
                            + not_empty_flag)
3352
3051
void
3353
3052
btr_cur_set_ownership_of_extern_field(
3354
3053
/*==================================*/
3355
 
        page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
3356
 
                                part will be updated, or NULL */
3357
 
        rec_t*          rec,    /* in/out: clustered index record */
3358
 
        dict_index_t*   index,  /* in: index of the page */
 
3054
        rec_t*          rec,    /* in: clustered index record */
3359
3055
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
3360
3056
        ulint           i,      /* in: field number */
3361
3057
        ibool           val,    /* in: value to set */
3362
 
        mtr_t*          mtr)    /* in: mtr, or NULL if not logged */
 
3058
        mtr_t*          mtr)    /* in: mtr */
3363
3059
{
3364
3060
        byte*   data;
3365
3061
        ulint   local_len;
3379
3075
                byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
3380
3076
        }
3381
3077
 
3382
 
        if (UNIV_LIKELY_NULL(page_zip)) {
3383
 
                mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
3384
 
                page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr);
3385
 
        } else if (UNIV_LIKELY(mtr != NULL)) {
3386
 
 
3387
 
                mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
3388
 
                                 MLOG_1BYTE, mtr);
3389
 
        } else {
3390
 
                mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
3391
 
        }
 
3078
        mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
 
3079
                         MLOG_1BYTE, mtr);
3392
3080
}
3393
3081
 
3394
3082
/***********************************************************************
3396
3084
is transferred to the updated record which is inserted elsewhere in the
3397
3085
index tree. In purge only the owner of externally stored field is allowed
3398
3086
to free the field. */
3399
 
UNIV_INTERN
 
3087
 
3400
3088
void
3401
3089
btr_cur_mark_extern_inherited_fields(
3402
3090
/*=================================*/
3403
 
        page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
3404
 
                                part will be updated, or NULL */
3405
 
        rec_t*          rec,    /* in/out: record in a clustered index */
3406
 
        dict_index_t*   index,  /* in: index of the page */
 
3091
        rec_t*          rec,    /* in: record in a clustered index */
3407
3092
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
3408
 
        const upd_t*    update, /* in: update vector */
3409
 
        mtr_t*          mtr)    /* in: mtr, or NULL if not logged */
 
3093
        upd_t*          update, /* in: update vector */
 
3094
        mtr_t*          mtr)    /* in: mtr */
3410
3095
{
 
3096
        ibool   is_updated;
3411
3097
        ulint   n;
3412
3098
        ulint   j;
3413
3099
        ulint   i;
3414
3100
 
3415
3101
        ut_ad(rec_offs_validate(rec, NULL, offsets));
3416
3102
        ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3417
 
 
3418
 
        if (!rec_offs_any_extern(offsets)) {
3419
 
 
3420
 
                return;
3421
 
        }
3422
 
 
3423
3103
        n = rec_offs_n_fields(offsets);
3424
3104
 
3425
3105
        for (i = 0; i < n; i++) {
3426
3106
                if (rec_offs_nth_extern(offsets, i)) {
3427
3107
 
3428
3108
                        /* Check it is not in updated fields */
 
3109
                        is_updated = FALSE;
3429
3110
 
3430
3111
                        if (update) {
3431
3112
                                for (j = 0; j < upd_get_n_fields(update);
3432
3113
                                     j++) {
3433
3114
                                        if (upd_get_nth_field(update, j)
3434
3115
                                            ->field_no == i) {
3435
 
 
3436
 
                                                goto updated;
 
3116
                                                is_updated = TRUE;
3437
3117
                                        }
3438
3118
                                }
3439
3119
                        }
3440
3120
 
3441
 
                        btr_cur_set_ownership_of_extern_field(
3442
 
                                page_zip, rec, index, offsets, i, FALSE, mtr);
3443
 
updated:
3444
 
                        ;
 
3121
                        if (!is_updated) {
 
3122
                                btr_cur_set_ownership_of_extern_field(
 
3123
                                        rec, offsets, i, FALSE, mtr);
 
3124
                        }
3445
3125
                }
3446
3126
        }
3447
3127
}
3450
3130
The complement of the previous function: in an update entry may inherit
3451
3131
some externally stored fields from a record. We must mark them as inherited
3452
3132
in entry, so that they are not freed in a rollback. */
3453
 
UNIV_INTERN
 
3133
 
3454
3134
void
3455
3135
btr_cur_mark_dtuple_inherited_extern(
3456
3136
/*=================================*/
3457
 
        dtuple_t*       entry,          /* in/out: updated entry to be
3458
 
                                        inserted to clustered index */
3459
 
        const upd_t*    update)         /* in: update vector */
 
3137
        dtuple_t*       entry,          /* in: updated entry to be inserted to
 
3138
                                        clustered index */
 
3139
        ulint*          ext_vec,        /* in: array of extern fields in the
 
3140
                                        original record */
 
3141
        ulint           n_ext_vec,      /* in: number of elements in ext_vec */
 
3142
        upd_t*          update)         /* in: update vector */
3460
3143
{
3461
 
        ulint           i;
3462
 
 
3463
 
        for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3464
 
 
3465
 
                dfield_t*       dfield = dtuple_get_nth_field(entry, i);
3466
 
                byte*           data;
3467
 
                ulint           len;
3468
 
                ulint           j;
3469
 
 
3470
 
                if (!dfield_is_ext(dfield)) {
3471
 
                        continue;
3472
 
                }
3473
 
 
3474
 
                /* Check if it is in updated fields */
 
3144
        dfield_t* dfield;
 
3145
        ulint   byte_val;
 
3146
        byte*   data;
 
3147
        ulint   len;
 
3148
        ibool   is_updated;
 
3149
        ulint   j;
 
3150
        ulint   i;
 
3151
 
 
3152
        if (ext_vec == NULL) {
 
3153
 
 
3154
                return;
 
3155
        }
 
3156
 
 
3157
        for (i = 0; i < n_ext_vec; i++) {
 
3158
 
 
3159
                /* Check ext_vec[i] is in updated fields */
 
3160
                is_updated = FALSE;
3475
3161
 
3476
3162
                for (j = 0; j < upd_get_n_fields(update); j++) {
3477
 
                        if (upd_get_nth_field(update, j)->field_no == i) {
3478
 
 
3479
 
                                goto is_updated;
 
3163
                        if (upd_get_nth_field(update, j)->field_no
 
3164
                            == ext_vec[i]) {
 
3165
                                is_updated = TRUE;
3480
3166
                        }
3481
3167
                }
3482
3168
 
3483
 
                data = dfield_get_data(dfield);
3484
 
                len = dfield_get_len(dfield);
3485
 
                data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3486
 
                        |= BTR_EXTERN_INHERITED_FLAG;
3487
 
 
3488
 
is_updated:
3489
 
                ;
 
3169
                if (!is_updated) {
 
3170
                        dfield = dtuple_get_nth_field(entry, ext_vec[i]);
 
3171
 
 
3172
                        data = (byte*) dfield_get_data(dfield);
 
3173
                        len = dfield_get_len(dfield);
 
3174
 
 
3175
                        len -= BTR_EXTERN_FIELD_REF_SIZE;
 
3176
 
 
3177
                        byte_val = mach_read_from_1(data + len
 
3178
                                                    + BTR_EXTERN_LEN);
 
3179
 
 
3180
                        byte_val = byte_val | BTR_EXTERN_INHERITED_FLAG;
 
3181
 
 
3182
                        mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
 
3183
                }
3490
3184
        }
3491
3185
}
3492
3186
 
3498
3192
void
3499
3193
btr_cur_unmark_extern_fields(
3500
3194
/*=========================*/
3501
 
        page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed
3502
 
                                part will be updated, or NULL */
3503
 
        rec_t*          rec,    /* in/out: record in a clustered index */
3504
 
        dict_index_t*   index,  /* in: index of the page */
3505
 
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
3506
 
        mtr_t*          mtr)    /* in: mtr, or NULL if not logged */
 
3195
        rec_t*          rec,    /* in: record in a clustered index */
 
3196
        mtr_t*          mtr,    /* in: mtr */
 
3197
        const ulint*    offsets)/* in: array returned by rec_get_offsets() */
3507
3198
{
3508
3199
        ulint   n;
3509
3200
        ulint   i;
3511
3202
        ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3512
3203
        n = rec_offs_n_fields(offsets);
3513
3204
 
3514
 
        if (!rec_offs_any_extern(offsets)) {
3515
 
 
3516
 
                return;
3517
 
        }
3518
 
 
3519
3205
        for (i = 0; i < n; i++) {
3520
3206
                if (rec_offs_nth_extern(offsets, i)) {
3521
3207
 
3522
 
                        btr_cur_set_ownership_of_extern_field(
3523
 
                                page_zip, rec, index, offsets, i, TRUE, mtr);
 
3208
                        btr_cur_set_ownership_of_extern_field(rec, offsets, i,
 
3209
                                                              TRUE, mtr);
3524
3210
                }
3525
3211
        }
3526
3212
}
3527
3213
 
3528
3214
/***********************************************************************
3529
3215
Marks all extern fields in a dtuple as owned by the record. */
3530
 
UNIV_INTERN
 
3216
 
3531
3217
void
3532
3218
btr_cur_unmark_dtuple_extern_fields(
3533
3219
/*================================*/
3534
 
        dtuple_t*       entry)          /* in/out: clustered index entry */
 
3220
        dtuple_t*       entry,          /* in: clustered index entry */
 
3221
        ulint*          ext_vec,        /* in: array of numbers of fields
 
3222
                                        which have been stored externally */
 
3223
        ulint           n_ext_vec)      /* in: number of elements in ext_vec */
3535
3224
{
 
3225
        dfield_t* dfield;
 
3226
        ulint   byte_val;
 
3227
        byte*   data;
 
3228
        ulint   len;
3536
3229
        ulint   i;
3537
3230
 
3538
 
        for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3539
 
                dfield_t* dfield = dtuple_get_nth_field(entry, i);
3540
 
 
3541
 
                if (dfield_is_ext(dfield)) {
3542
 
                        byte*   data = dfield_get_data(dfield);
3543
 
                        ulint   len = dfield_get_len(dfield);
3544
 
 
3545
 
                        data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3546
 
                                &= ~BTR_EXTERN_OWNER_FLAG;
3547
 
                }
 
3231
        for (i = 0; i < n_ext_vec; i++) {
 
3232
                dfield = dtuple_get_nth_field(entry, ext_vec[i]);
 
3233
 
 
3234
                data = (byte*) dfield_get_data(dfield);
 
3235
                len = dfield_get_len(dfield);
 
3236
 
 
3237
                len -= BTR_EXTERN_FIELD_REF_SIZE;
 
3238
 
 
3239
                byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN);
 
3240
 
 
3241
                byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
 
3242
 
 
3243
                mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val);
3548
3244
        }
3549
3245
}
3550
3246
 
3551
3247
/***********************************************************************
3552
 
Flags the data tuple fields that are marked as extern storage in the
3553
 
update vector.  We use this function to remember which fields we must
3554
 
mark as extern storage in a record inserted for an update. */
3555
 
UNIV_INTERN
 
3248
Stores the positions of the fields marked as extern storage in the update
 
3249
vector, and also those fields who are marked as extern storage in rec
 
3250
and not mentioned in updated fields. We use this function to remember
 
3251
which fields we must mark as extern storage in a record inserted for an
 
3252
update. */
 
3253
 
3556
3254
ulint
3557
3255
btr_push_update_extern_fields(
3558
3256
/*==========================*/
3559
 
                                /* out: number of flagged external columns */
3560
 
        dtuple_t*       tuple,  /* in/out: data tuple */
3561
 
        const upd_t*    update, /* in: update vector */
3562
 
        mem_heap_t*     heap)   /* in: memory heap */
 
3257
                                /* out: number of values stored in ext_vect */
 
3258
        ulint*          ext_vect,/* in: array of ulints, must be preallocated
 
3259
                                to have space for all fields in rec */
 
3260
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
 
3261
        upd_t*          update) /* in: update vector or NULL */
3563
3262
{
3564
 
        ulint                   n_pushed        = 0;
3565
 
        ulint                   n;
3566
 
        const upd_field_t*      uf;
3567
 
 
3568
 
        ut_ad(tuple);
3569
 
        ut_ad(update);
3570
 
 
3571
 
        uf = update->fields;
3572
 
        n = upd_get_n_fields(update);
3573
 
 
3574
 
        for (; n--; uf++) {
3575
 
                if (dfield_is_ext(&uf->new_val)) {
3576
 
                        dfield_t*       field
3577
 
                                = dtuple_get_nth_field(tuple, uf->field_no);
3578
 
 
3579
 
                        if (!dfield_is_ext(field)) {
3580
 
                                dfield_set_ext(field);
3581
 
                                n_pushed++;
3582
 
                        }
3583
 
 
3584
 
                        switch (uf->orig_len) {
3585
 
                                byte*   data;
3586
 
                                ulint   len;
3587
 
                                byte*   buf;
3588
 
                        case 0:
3589
 
                                break;
3590
 
                        case BTR_EXTERN_FIELD_REF_SIZE:
3591
 
                                /* Restore the original locally stored
3592
 
                                part of the column.  In the undo log,
3593
 
                                InnoDB writes a longer prefix of externally
3594
 
                                stored columns, so that column prefixes
3595
 
                                in secondary indexes can be reconstructed. */
3596
 
                                dfield_set_data(field, (byte*) dfield_get_data(field)
3597
 
                                                + dfield_get_len(field)
3598
 
                                                - BTR_EXTERN_FIELD_REF_SIZE,
3599
 
                                                BTR_EXTERN_FIELD_REF_SIZE);
3600
 
                                dfield_set_ext(field);
3601
 
                                break;
3602
 
                        default:
3603
 
                                /* Reconstruct the original locally
3604
 
                                stored part of the column.  The data
3605
 
                                will have to be copied. */
3606
 
                                ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
3607
 
 
3608
 
                                data = dfield_get_data(field);
3609
 
                                len = dfield_get_len(field);
3610
 
 
3611
 
                                buf = mem_heap_alloc(heap, uf->orig_len);
3612
 
                                /* Copy the locally stored prefix. */
3613
 
                                memcpy(buf, data,
3614
 
                                       uf->orig_len
3615
 
                                       - BTR_EXTERN_FIELD_REF_SIZE);
3616
 
                                /* Copy the BLOB pointer. */
3617
 
                                memcpy(buf + uf->orig_len
3618
 
                                       - BTR_EXTERN_FIELD_REF_SIZE,
3619
 
                                       data + len - BTR_EXTERN_FIELD_REF_SIZE,
3620
 
                                       BTR_EXTERN_FIELD_REF_SIZE);
3621
 
 
3622
 
                                dfield_set_data(field, buf, uf->orig_len);
3623
 
                                dfield_set_ext(field);
 
3263
        ulint   n_pushed        = 0;
 
3264
        ibool   is_updated;
 
3265
        ulint   n;
 
3266
        ulint   j;
 
3267
        ulint   i;
 
3268
 
 
3269
        if (update) {
 
3270
                n = upd_get_n_fields(update);
 
3271
 
 
3272
                for (i = 0; i < n; i++) {
 
3273
 
 
3274
                        if (upd_get_nth_field(update, i)->extern_storage) {
 
3275
 
 
3276
                                ext_vect[n_pushed] = upd_get_nth_field(
 
3277
                                        update, i)->field_no;
 
3278
 
 
3279
                                n_pushed++;
 
3280
                        }
 
3281
                }
 
3282
        }
 
3283
 
 
3284
        n = rec_offs_n_fields(offsets);
 
3285
 
 
3286
        for (i = 0; i < n; i++) {
 
3287
                if (rec_offs_nth_extern(offsets, i)) {
 
3288
 
 
3289
                        /* Check it is not in updated fields */
 
3290
                        is_updated = FALSE;
 
3291
 
 
3292
                        if (update) {
 
3293
                                for (j = 0; j < upd_get_n_fields(update);
 
3294
                                     j++) {
 
3295
                                        if (upd_get_nth_field(update, j)
 
3296
                                            ->field_no == i) {
 
3297
                                                is_updated = TRUE;
 
3298
                                        }
 
3299
                                }
 
3300
                        }
 
3301
 
 
3302
                        if (!is_updated) {
 
3303
                                ext_vect[n_pushed] = i;
 
3304
                                n_pushed++;
3624
3305
                        }
3625
3306
                }
3626
3307
        }
3634
3315
ulint
3635
3316
btr_blob_get_part_len(
3636
3317
/*==================*/
3637
 
                                        /* out: part length */
3638
 
        const byte*     blob_header)    /* in: blob header */
 
3318
                                /* out: part length */
 
3319
        byte*   blob_header)    /* in: blob header */
3639
3320
{
3640
3321
        return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
3641
3322
}
3646
3327
ulint
3647
3328
btr_blob_get_next_page_no(
3648
3329
/*======================*/
3649
 
                                        /* out: page number or FIL_NULL if
3650
 
                                        no more pages */
3651
 
        const byte*     blob_header)    /* in: blob header */
 
3330
                                /* out: page number or FIL_NULL if
 
3331
                                no more pages */
 
3332
        byte*   blob_header)    /* in: blob header */
3652
3333
{
3653
3334
        return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
3654
3335
}
3655
3336
 
3656
3337
/***********************************************************************
3657
 
Deallocate a buffer block that was reserved for a BLOB part. */
3658
 
static
3659
 
void
3660
 
btr_blob_free(
3661
 
/*==========*/
3662
 
        buf_block_t*    block,  /* in: buffer block */
3663
 
        ibool           all,    /* in: TRUE=remove also the compressed page
3664
 
                                if there is one */
3665
 
        mtr_t*          mtr)    /* in: mini-transaction to commit */
3666
 
{
3667
 
        ulint   space   = buf_block_get_space(block);
3668
 
        ulint   page_no = buf_block_get_page_no(block);
3669
 
 
3670
 
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3671
 
 
3672
 
        mtr_commit(mtr);
3673
 
 
3674
 
        buf_pool_mutex_enter();
3675
 
        mutex_enter(&block->mutex);
3676
 
 
3677
 
        /* Only free the block if it is still allocated to
3678
 
        the same file page. */
3679
 
 
3680
 
        if (buf_block_get_state(block)
3681
 
            == BUF_BLOCK_FILE_PAGE
3682
 
            && buf_block_get_space(block) == space
3683
 
            && buf_block_get_page_no(block) == page_no) {
3684
 
 
3685
 
                if (buf_LRU_free_block(&block->page, all, NULL)
3686
 
                    != BUF_LRU_FREED
3687
 
                    && all && block->page.zip.data) {
3688
 
                        /* Attempt to deallocate the uncompressed page
3689
 
                        if the whole block cannot be deallocted. */
3690
 
 
3691
 
                        buf_LRU_free_block(&block->page, FALSE, NULL);
3692
 
                }
3693
 
        }
3694
 
 
3695
 
        buf_pool_mutex_exit();
3696
 
        mutex_exit(&block->mutex);
3697
 
}
3698
 
 
3699
 
/***********************************************************************
3700
3338
Stores the fields in big_rec_vec to the tablespace and puts pointers to
3701
 
them in rec.  The extern flags in rec will have to be set beforehand.
3702
 
The fields are stored on pages allocated from leaf node
 
3339
them in rec. The fields are stored on pages allocated from leaf node
3703
3340
file segment of the index tree. */
3704
 
UNIV_INTERN
 
3341
 
3705
3342
ulint
3706
3343
btr_store_big_rec_extern_fields(
3707
3344
/*============================*/
3708
3345
                                        /* out: DB_SUCCESS or error */
3709
3346
        dict_index_t*   index,          /* in: index of rec; the index tree
3710
3347
                                        MUST be X-latched */
3711
 
        buf_block_t*    rec_block,      /* in/out: block containing rec */
3712
 
        rec_t*          rec,            /* in/out: record */
 
3348
        rec_t*          rec,            /* in: record */
3713
3349
        const ulint*    offsets,        /* in: rec_get_offsets(rec, index);
3714
3350
                                        the "external storage" flags in offsets
3715
3351
                                        will not correspond to rec when
3720
3356
                                        containing the latch to rec and to the
3721
3357
                                        tree */
3722
3358
{
3723
 
        ulint   rec_page_no;
3724
 
        byte*   field_ref;
 
3359
        byte*   data;
 
3360
        ulint   local_len;
3725
3361
        ulint   extern_len;
3726
3362
        ulint   store_len;
3727
3363
        ulint   page_no;
 
3364
        page_t* page;
3728
3365
        ulint   space_id;
3729
 
        ulint   zip_size;
 
3366
        page_t* prev_page;
 
3367
        page_t* rec_page;
3730
3368
        ulint   prev_page_no;
3731
3369
        ulint   hint_page_no;
3732
3370
        ulint   i;
3733
3371
        mtr_t   mtr;
3734
 
        mem_heap_t* heap = NULL;
3735
 
        page_zip_des_t* page_zip;
3736
 
        z_stream c_stream;
3737
3372
 
3738
3373
        ut_ad(rec_offs_validate(rec, index, offsets));
3739
3374
        ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
3740
3375
                                MTR_MEMO_X_LOCK));
3741
 
        ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
3742
 
        ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
3743
 
        ut_a(dict_index_is_clust(index));
3744
 
 
3745
 
        page_zip = buf_block_get_page_zip(rec_block);
3746
 
        ut_a(dict_table_zip_size(index->table)
3747
 
             == buf_block_get_zip_size(rec_block));
3748
 
 
3749
 
        space_id = buf_block_get_space(rec_block);
3750
 
        zip_size = buf_block_get_zip_size(rec_block);
3751
 
        rec_page_no = buf_block_get_page_no(rec_block);
3752
 
        ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
3753
 
 
3754
 
        if (UNIV_LIKELY_NULL(page_zip)) {
3755
 
                int     err;
3756
 
 
3757
 
                /* Zlib deflate needs 128 kilobytes for the default
3758
 
                window size, plus 512 << memLevel, plus a few
3759
 
                kilobytes for small objects.  We use reduced memLevel
3760
 
                to limit the memory consumption, and preallocate the
3761
 
                heap, hoping to avoid memory fragmentation. */
3762
 
                heap = mem_heap_create(250000);
3763
 
                page_zip_set_alloc(&c_stream, heap);
3764
 
 
3765
 
                err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
3766
 
                                   Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
3767
 
                ut_a(err == Z_OK);
3768
 
        }
 
3376
        ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
 
3377
                                MTR_MEMO_PAGE_X_FIX));
 
3378
        ut_a(index->type & DICT_CLUSTERED);
 
3379
 
 
3380
        space_id = buf_frame_get_space_id(rec);
3769
3381
 
3770
3382
        /* We have to create a file segment to the tablespace
3771
3383
        for each field and put the pointer to the field in rec */
3772
3384
 
3773
3385
        for (i = 0; i < big_rec_vec->n_fields; i++) {
3774
 
                ut_ad(rec_offs_nth_extern(offsets,
3775
 
                                          big_rec_vec->fields[i].field_no));
3776
 
                {
3777
 
                        ulint   local_len;
3778
 
                        field_ref = rec_get_nth_field(
3779
 
                                rec, offsets, big_rec_vec->fields[i].field_no,
3780
 
                                &local_len);
3781
 
                        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3782
 
                        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3783
 
                        field_ref += local_len;
3784
 
                }
 
3386
 
 
3387
                data = rec_get_nth_field(rec, offsets,
 
3388
                                         big_rec_vec->fields[i].field_no,
 
3389
                                         &local_len);
 
3390
                ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
3391
                local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3785
3392
                extern_len = big_rec_vec->fields[i].len;
3786
3393
 
3787
3394
                ut_a(extern_len > 0);
3788
3395
 
3789
3396
                prev_page_no = FIL_NULL;
3790
3397
 
3791
 
                if (UNIV_LIKELY_NULL(page_zip)) {
3792
 
                        int     err = deflateReset(&c_stream);
3793
 
                        ut_a(err == Z_OK);
3794
 
 
3795
 
                        c_stream.next_in = (void*) big_rec_vec->fields[i].data;
3796
 
                        c_stream.avail_in = extern_len;
3797
 
                }
3798
 
 
3799
 
                for (;;) {
3800
 
                        buf_block_t*    block;
3801
 
                        page_t*         page;
3802
 
 
 
3398
                while (extern_len > 0) {
3803
3399
                        mtr_start(&mtr);
3804
3400
 
3805
3401
                        if (prev_page_no == FIL_NULL) {
3806
 
                                hint_page_no = 1 + rec_page_no;
 
3402
                                hint_page_no = buf_frame_get_page_no(rec) + 1;
3807
3403
                        } else {
3808
3404
                                hint_page_no = prev_page_no + 1;
3809
3405
                        }
3810
3406
 
3811
 
                        block = btr_page_alloc(index, hint_page_no,
3812
 
                                               FSP_NO_DIR, 0, &mtr);
3813
 
                        if (UNIV_UNLIKELY(block == NULL)) {
 
3407
                        page = btr_page_alloc(index, hint_page_no,
 
3408
                                              FSP_NO_DIR, 0, &mtr);
 
3409
                        if (page == NULL) {
3814
3410
 
3815
3411
                                mtr_commit(&mtr);
3816
3412
 
3817
 
                                if (UNIV_LIKELY_NULL(page_zip)) {
3818
 
                                        deflateEnd(&c_stream);
3819
 
                                        mem_heap_free(heap);
3820
 
                                }
3821
 
 
3822
3413
                                return(DB_OUT_OF_FILE_SPACE);
3823
3414
                        }
3824
3415
 
3825
 
                        page_no = buf_block_get_page_no(block);
3826
 
                        page = buf_block_get_frame(block);
 
3416
                        mlog_write_ulint(page + FIL_PAGE_TYPE,
 
3417
                                         FIL_PAGE_TYPE_BLOB,
 
3418
                                         MLOG_2BYTES, &mtr);
 
3419
 
 
3420
                        page_no = buf_frame_get_page_no(page);
3827
3421
 
3828
3422
                        if (prev_page_no != FIL_NULL) {
3829
 
                                buf_block_t*    prev_block;
3830
 
                                page_t*         prev_page;
3831
 
 
3832
 
                                prev_block = buf_page_get(space_id, zip_size,
3833
 
                                                          prev_page_no,
3834
 
                                                          RW_X_LATCH, &mtr);
3835
 
#ifdef UNIV_SYNC_DEBUG
3836
 
                                buf_block_dbg_add_level(prev_block,
3837
 
                                                        SYNC_EXTERN_STORAGE);
3838
 
#endif /* UNIV_SYNC_DEBUG */
3839
 
                                prev_page = buf_block_get_frame(prev_block);
3840
 
 
3841
 
                                if (UNIV_LIKELY_NULL(page_zip)) {
3842
 
                                        mlog_write_ulint(
3843
 
                                                prev_page + FIL_PAGE_NEXT,
3844
 
                                                page_no, MLOG_4BYTES, &mtr);
3845
 
                                        memcpy(buf_block_get_page_zip(
3846
 
                                                       prev_block)
3847
 
                                               ->data + FIL_PAGE_NEXT,
3848
 
                                               prev_page + FIL_PAGE_NEXT, 4);
3849
 
                                } else {
3850
 
                                        mlog_write_ulint(
3851
 
                                                prev_page + FIL_PAGE_DATA
3852
 
                                                + BTR_BLOB_HDR_NEXT_PAGE_NO,
3853
 
                                                page_no, MLOG_4BYTES, &mtr);
3854
 
                                }
3855
 
 
3856
 
                        }
3857
 
 
3858
 
                        if (UNIV_LIKELY_NULL(page_zip)) {
3859
 
                                int             err;
3860
 
                                page_zip_des_t* blob_page_zip;
3861
 
 
3862
 
                                mach_write_to_2(page + FIL_PAGE_TYPE,
3863
 
                                                prev_page_no == FIL_NULL
3864
 
                                                ? FIL_PAGE_TYPE_ZBLOB
3865
 
                                                : FIL_PAGE_TYPE_ZBLOB2);
3866
 
 
3867
 
                                c_stream.next_out = page
3868
 
                                        + FIL_PAGE_DATA;
3869
 
                                c_stream.avail_out
3870
 
                                        = page_zip_get_size(page_zip)
3871
 
                                        - FIL_PAGE_DATA;
3872
 
 
3873
 
                                err = deflate(&c_stream, Z_FINISH);
3874
 
                                ut_a(err == Z_OK || err == Z_STREAM_END);
3875
 
                                ut_a(err == Z_STREAM_END
3876
 
                                     || c_stream.avail_out == 0);
3877
 
 
3878
 
                                /* Write the "next BLOB page" pointer */
3879
 
                                mlog_write_ulint(page + FIL_PAGE_NEXT,
3880
 
                                                 FIL_NULL, MLOG_4BYTES, &mtr);
3881
 
                                /* Initialize the unused "prev page" pointer */
3882
 
                                mlog_write_ulint(page + FIL_PAGE_PREV,
3883
 
                                                 FIL_NULL, MLOG_4BYTES, &mtr);
3884
 
                                /* Write a back pointer to the record
3885
 
                                into the otherwise unused area.  This
3886
 
                                information could be useful in
3887
 
                                debugging.  Later, we might want to
3888
 
                                implement the possibility to relocate
3889
 
                                BLOB pages.  Then, we would need to be
3890
 
                                able to adjust the BLOB pointer in the
3891
 
                                record.  We do not store the heap
3892
 
                                number of the record, because it can
3893
 
                                change in page_zip_reorganize() or
3894
 
                                btr_page_reorganize().  However, also
3895
 
                                the page number of the record may
3896
 
                                change when B-tree nodes are split or
3897
 
                                merged. */
3898
 
                                mlog_write_ulint(page
3899
 
                                                 + FIL_PAGE_FILE_FLUSH_LSN,
 
3423
                                prev_page = buf_page_get(space_id,
 
3424
                                                         prev_page_no,
 
3425
                                                         RW_X_LATCH, &mtr);
 
3426
 
 
3427
#ifdef UNIV_SYNC_DEBUG
 
3428
                                buf_page_dbg_add_level(prev_page,
 
3429
                                                       SYNC_EXTERN_STORAGE);
 
3430
#endif /* UNIV_SYNC_DEBUG */
 
3431
 
 
3432
                                mlog_write_ulint(prev_page + FIL_PAGE_DATA
 
3433
                                                 + BTR_BLOB_HDR_NEXT_PAGE_NO,
 
3434
                                                 page_no, MLOG_4BYTES, &mtr);
 
3435
                        }
 
3436
 
 
3437
                        if (extern_len > (UNIV_PAGE_SIZE - FIL_PAGE_DATA
 
3438
                                          - BTR_BLOB_HDR_SIZE
 
3439
                                          - FIL_PAGE_DATA_END)) {
 
3440
                                store_len = UNIV_PAGE_SIZE - FIL_PAGE_DATA
 
3441
                                        - BTR_BLOB_HDR_SIZE
 
3442
                                        - FIL_PAGE_DATA_END;
 
3443
                        } else {
 
3444
                                store_len = extern_len;
 
3445
                        }
 
3446
 
 
3447
                        mlog_write_string(page + FIL_PAGE_DATA
 
3448
                                          + BTR_BLOB_HDR_SIZE,
 
3449
                                          big_rec_vec->fields[i].data
 
3450
                                          + big_rec_vec->fields[i].len
 
3451
                                          - extern_len,
 
3452
                                          store_len, &mtr);
 
3453
                        mlog_write_ulint(page + FIL_PAGE_DATA
 
3454
                                         + BTR_BLOB_HDR_PART_LEN,
 
3455
                                         store_len, MLOG_4BYTES, &mtr);
 
3456
                        mlog_write_ulint(page + FIL_PAGE_DATA
 
3457
                                         + BTR_BLOB_HDR_NEXT_PAGE_NO,
 
3458
                                         FIL_NULL, MLOG_4BYTES, &mtr);
 
3459
 
 
3460
                        extern_len -= store_len;
 
3461
 
 
3462
                        rec_page = buf_page_get(space_id,
 
3463
                                                buf_frame_get_page_no(data),
 
3464
                                                RW_X_LATCH, &mtr);
 
3465
#ifdef UNIV_SYNC_DEBUG
 
3466
                        buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
 
3467
#endif /* UNIV_SYNC_DEBUG */
 
3468
                        mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0,
 
3469
                                         MLOG_4BYTES, &mtr);
 
3470
                        mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
 
3471
                                         big_rec_vec->fields[i].len
 
3472
                                         - extern_len,
 
3473
                                         MLOG_4BYTES, &mtr);
 
3474
 
 
3475
                        if (prev_page_no == FIL_NULL) {
 
3476
                                mlog_write_ulint(data + local_len
 
3477
                                                 + BTR_EXTERN_SPACE_ID,
3900
3478
                                                 space_id,
3901
3479
                                                 MLOG_4BYTES, &mtr);
3902
 
                                mlog_write_ulint(page
3903
 
                                                 + FIL_PAGE_FILE_FLUSH_LSN + 4,
3904
 
                                                 rec_page_no,
3905
 
                                                 MLOG_4BYTES, &mtr);
3906
 
 
3907
 
                                /* Zero out the unused part of the page. */
3908
 
                                memset(page + page_zip_get_size(page_zip)
3909
 
                                       - c_stream.avail_out,
3910
 
                                       0, c_stream.avail_out);
3911
 
                                mlog_log_string(page + FIL_PAGE_TYPE,
3912
 
                                                page_zip_get_size(page_zip)
3913
 
                                                - FIL_PAGE_TYPE,
3914
 
                                                &mtr);
3915
 
                                /* Copy the page to compressed storage,
3916
 
                                because it will be flushed to disk
3917
 
                                from there. */
3918
 
                                blob_page_zip = buf_block_get_page_zip(block);
3919
 
                                ut_ad(blob_page_zip);
3920
 
                                ut_ad(page_zip_get_size(blob_page_zip)
3921
 
                                      == page_zip_get_size(page_zip));
3922
 
                                memcpy(blob_page_zip->data, page,
3923
 
                                       page_zip_get_size(page_zip));
3924
 
 
3925
 
                                if (err == Z_OK && prev_page_no != FIL_NULL) {
3926
 
 
3927
 
                                        goto next_zip_page;
3928
 
                                }
3929
 
 
3930
 
                                rec_block = buf_page_get(space_id, zip_size,
3931
 
                                                         rec_page_no,
3932
 
                                                         RW_X_LATCH, &mtr);
3933
 
#ifdef UNIV_SYNC_DEBUG
3934
 
                                buf_block_dbg_add_level(rec_block,
3935
 
                                                        SYNC_NO_ORDER_CHECK);
3936
 
#endif /* UNIV_SYNC_DEBUG */
3937
 
                                if (err == Z_STREAM_END) {
3938
 
                                        mach_write_to_4(field_ref
3939
 
                                                        + BTR_EXTERN_LEN, 0);
3940
 
                                        mach_write_to_4(field_ref
3941
 
                                                        + BTR_EXTERN_LEN + 4,
3942
 
                                                        c_stream.total_in);
3943
 
                                } else {
3944
 
                                        memset(field_ref + BTR_EXTERN_LEN,
3945
 
                                               0, 8);
3946
 
                                }
3947
 
 
3948
 
                                if (prev_page_no == FIL_NULL) {
3949
 
                                        mach_write_to_4(field_ref
3950
 
                                                        + BTR_EXTERN_SPACE_ID,
3951
 
                                                        space_id);
3952
 
 
3953
 
                                        mach_write_to_4(field_ref
3954
 
                                                        + BTR_EXTERN_PAGE_NO,
3955
 
                                                        page_no);
3956
 
 
3957
 
                                        mach_write_to_4(field_ref
3958
 
                                                        + BTR_EXTERN_OFFSET,
3959
 
                                                        FIL_PAGE_NEXT);
3960
 
                                }
3961
 
 
3962
 
                                page_zip_write_blob_ptr(
3963
 
                                        page_zip, rec, index, offsets,
3964
 
                                        big_rec_vec->fields[i].field_no, &mtr);
3965
 
 
3966
 
next_zip_page:
3967
 
                                prev_page_no = page_no;
3968
 
 
3969
 
                                /* Commit mtr and release the
3970
 
                                uncompressed page frame to save memory. */
3971
 
                                btr_blob_free(block, FALSE, &mtr);
3972
 
 
3973
 
                                if (err == Z_STREAM_END) {
3974
 
                                        break;
3975
 
                                }
3976
 
                        } else {
3977
 
                                mlog_write_ulint(page + FIL_PAGE_TYPE,
3978
 
                                                 FIL_PAGE_TYPE_BLOB,
3979
 
                                                 MLOG_2BYTES, &mtr);
3980
 
 
3981
 
                                if (extern_len > (UNIV_PAGE_SIZE
3982
 
                                                  - FIL_PAGE_DATA
3983
 
                                                  - BTR_BLOB_HDR_SIZE
3984
 
                                                  - FIL_PAGE_DATA_END)) {
3985
 
                                        store_len = UNIV_PAGE_SIZE
3986
 
                                                - FIL_PAGE_DATA
3987
 
                                                - BTR_BLOB_HDR_SIZE
3988
 
                                                - FIL_PAGE_DATA_END;
3989
 
                                } else {
3990
 
                                        store_len = extern_len;
3991
 
                                }
3992
 
 
3993
 
                                mlog_write_string(page + FIL_PAGE_DATA
3994
 
                                                  + BTR_BLOB_HDR_SIZE,
3995
 
                                                  (const byte*)
3996
 
                                                  big_rec_vec->fields[i].data
3997
 
                                                  + big_rec_vec->fields[i].len
3998
 
                                                  - extern_len,
3999
 
                                                  store_len, &mtr);
4000
 
                                mlog_write_ulint(page + FIL_PAGE_DATA
4001
 
                                                 + BTR_BLOB_HDR_PART_LEN,
4002
 
                                                 store_len, MLOG_4BYTES, &mtr);
4003
 
                                mlog_write_ulint(page + FIL_PAGE_DATA
4004
 
                                                 + BTR_BLOB_HDR_NEXT_PAGE_NO,
4005
 
                                                 FIL_NULL, MLOG_4BYTES, &mtr);
4006
 
 
4007
 
                                extern_len -= store_len;
4008
 
 
4009
 
                                rec_block = buf_page_get(space_id, zip_size,
4010
 
                                                         rec_page_no,
4011
 
                                                         RW_X_LATCH, &mtr);
4012
 
#ifdef UNIV_SYNC_DEBUG
4013
 
                                buf_block_dbg_add_level(rec_block,
4014
 
                                                        SYNC_NO_ORDER_CHECK);
4015
 
#endif /* UNIV_SYNC_DEBUG */
4016
 
 
4017
 
                                mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
4018
 
                                                 MLOG_4BYTES, &mtr);
4019
 
                                mlog_write_ulint(field_ref
4020
 
                                                 + BTR_EXTERN_LEN + 4,
4021
 
                                                 big_rec_vec->fields[i].len
4022
 
                                                 - extern_len,
4023
 
                                                 MLOG_4BYTES, &mtr);
4024
 
 
4025
 
                                if (prev_page_no == FIL_NULL) {
4026
 
                                        mlog_write_ulint(field_ref
4027
 
                                                         + BTR_EXTERN_SPACE_ID,
4028
 
                                                         space_id,
4029
 
                                                         MLOG_4BYTES, &mtr);
4030
 
 
4031
 
                                        mlog_write_ulint(field_ref
4032
 
                                                         + BTR_EXTERN_PAGE_NO,
4033
 
                                                         page_no,
4034
 
                                                         MLOG_4BYTES, &mtr);
4035
 
 
4036
 
                                        mlog_write_ulint(field_ref
4037
 
                                                         + BTR_EXTERN_OFFSET,
4038
 
                                                         FIL_PAGE_DATA,
4039
 
                                                         MLOG_4BYTES, &mtr);
4040
 
                                }
4041
 
 
4042
 
                                prev_page_no = page_no;
4043
 
 
4044
 
                                mtr_commit(&mtr);
4045
 
 
4046
 
                                if (extern_len == 0) {
4047
 
                                        break;
4048
 
                                }
 
3480
 
 
3481
                                mlog_write_ulint(data + local_len
 
3482
                                                 + BTR_EXTERN_PAGE_NO,
 
3483
                                                 page_no,
 
3484
                                                 MLOG_4BYTES, &mtr);
 
3485
 
 
3486
                                mlog_write_ulint(data + local_len
 
3487
                                                 + BTR_EXTERN_OFFSET,
 
3488
                                                 FIL_PAGE_DATA,
 
3489
                                                 MLOG_4BYTES, &mtr);
 
3490
 
 
3491
                                /* Set the bit denoting that this field
 
3492
                                in rec is stored externally */
 
3493
 
 
3494
                                rec_set_nth_field_extern_bit(
 
3495
                                        rec, index,
 
3496
                                        big_rec_vec->fields[i].field_no,
 
3497
                                        TRUE, &mtr);
4049
3498
                        }
 
3499
 
 
3500
                        prev_page_no = page_no;
 
3501
 
 
3502
                        mtr_commit(&mtr);
4050
3503
                }
4051
3504
        }
4052
3505
 
4053
 
        if (UNIV_LIKELY_NULL(page_zip)) {
4054
 
                deflateEnd(&c_stream);
4055
 
                mem_heap_free(heap);
4056
 
        }
4057
 
 
4058
3506
        return(DB_SUCCESS);
4059
3507
}
4060
3508
 
4061
3509
/***********************************************************************
4062
3510
Frees the space in an externally stored field to the file space
4063
 
management if the field in data is owned by the externally stored field,
 
3511
management if the field in data is owned the externally stored field,
4064
3512
in a rollback we may have the additional condition that the field must
4065
3513
not be inherited. */
4066
 
UNIV_INTERN
 
3514
 
4067
3515
void
4068
3516
btr_free_externally_stored_field(
4069
3517
/*=============================*/
4075
3523
                                        from purge where 'data' is located on
4076
3524
                                        an undo log page, not an index
4077
3525
                                        page) */
4078
 
        byte*           field_ref,      /* in/out: field reference */
4079
 
        const rec_t*    rec,            /* in: record containing field_ref, for
4080
 
                                        page_zip_write_blob_ptr(), or NULL */
4081
 
        const ulint*    offsets,        /* in: rec_get_offsets(rec, index),
4082
 
                                        or NULL */
4083
 
        page_zip_des_t* page_zip,       /* in: compressed page corresponding
4084
 
                                        to rec, or NULL if rec == NULL */
4085
 
        ulint           i,              /* in: field number of field_ref;
4086
 
                                        ignored if rec == NULL */
 
3526
        byte*           data,           /* in: internally stored data
 
3527
                                        + reference to the externally
 
3528
                                        stored part */
 
3529
        ulint           local_len,      /* in: length of data */
4087
3530
        ibool           do_not_free_inherited,/* in: TRUE if called in a
4088
3531
                                        rollback and we do not want to free
4089
3532
                                        inherited fields */
4091
3534
                                        containing the latch to data an an
4092
3535
                                        X-latch to the index tree */
4093
3536
{
4094
 
        page_t*         page;
4095
 
        ulint           space_id;
4096
 
        ulint           rec_zip_size = dict_table_zip_size(index->table);
4097
 
        ulint           ext_zip_size;
4098
 
        ulint           page_no;
4099
 
        ulint           next_page_no;
4100
 
        mtr_t           mtr;
4101
 
#ifdef UNIV_DEBUG
 
3537
        page_t* page;
 
3538
        page_t* rec_page;
 
3539
        ulint   space_id;
 
3540
        ulint   page_no;
 
3541
        ulint   offset;
 
3542
        ulint   extern_len;
 
3543
        ulint   next_page_no;
 
3544
        ulint   part_len;
 
3545
        mtr_t   mtr;
 
3546
 
 
3547
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4102
3548
        ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
4103
3549
                                MTR_MEMO_X_LOCK));
4104
 
        ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
4105
 
                                     MTR_MEMO_PAGE_X_FIX));
4106
 
        ut_ad(!rec || rec_offs_validate(rec, index, offsets));
4107
 
 
4108
 
        if (rec) {
4109
 
                ulint   local_len;
4110
 
                const byte*     f = rec_get_nth_field(rec, offsets,
4111
 
                                                      i, &local_len);
4112
 
                ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4113
 
                local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4114
 
                f += local_len;
4115
 
                ut_ad(f == field_ref);
4116
 
        }
4117
 
#endif /* UNIV_DEBUG */
4118
 
 
4119
 
        space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
4120
 
 
4121
 
        if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
4122
 
                ext_zip_size = fil_space_get_zip_size(space_id);
4123
 
                /* This must be an undo log record in the system tablespace,
4124
 
                that is, in row_purge_upd_exist_or_extern().
4125
 
                Currently, externally stored records are stored in the
4126
 
                same tablespace as the referring records. */
4127
 
                ut_ad(!page_get_space_id(page_align(field_ref)));
4128
 
                ut_ad(!rec);
4129
 
                ut_ad(!page_zip);
4130
 
        } else {
4131
 
                ext_zip_size = rec_zip_size;
4132
 
        }
4133
 
 
4134
 
        if (!rec) {
4135
 
                /* This is a call from row_purge_upd_exist_or_extern(). */
4136
 
                ut_ad(!page_zip);
4137
 
                rec_zip_size = 0;
4138
 
        }
 
3550
        ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
 
3551
                                MTR_MEMO_PAGE_X_FIX));
 
3552
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
3553
        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4139
3554
 
4140
3555
        for (;;) {
4141
 
                buf_block_t*    rec_block;
4142
 
                buf_block_t*    ext_block;
4143
 
 
4144
3556
                mtr_start(&mtr);
4145
3557
 
4146
 
                rec_block = buf_page_get(page_get_space_id(
4147
 
                                                 page_align(field_ref)),
4148
 
                                         rec_zip_size,
4149
 
                                         page_get_page_no(
4150
 
                                                 page_align(field_ref)),
4151
 
                                         RW_X_LATCH, &mtr);
4152
 
#ifdef UNIV_SYNC_DEBUG
4153
 
                buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
4154
 
#endif /* UNIV_SYNC_DEBUG */
4155
 
                page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
4156
 
 
4157
 
                if (/* There is no external storage data */
4158
 
                    page_no == FIL_NULL
4159
 
                    /* This field does not own the externally stored field */
4160
 
                    || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4161
 
                        & BTR_EXTERN_OWNER_FLAG)
4162
 
                    /* Rollback and inherited field */
4163
 
                    || (do_not_free_inherited
4164
 
                        && (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4165
 
                            & BTR_EXTERN_INHERITED_FLAG))) {
4166
 
 
4167
 
                        /* Do not free */
4168
 
                        mtr_commit(&mtr);
4169
 
 
4170
 
                        return;
4171
 
                }
4172
 
 
4173
 
                ext_block = buf_page_get(space_id, ext_zip_size, page_no,
4174
 
                                         RW_X_LATCH, &mtr);
4175
 
#ifdef UNIV_SYNC_DEBUG
4176
 
                buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
4177
 
#endif /* UNIV_SYNC_DEBUG */
4178
 
                page = buf_block_get_frame(ext_block);
4179
 
 
4180
 
                if (ext_zip_size) {
4181
 
                        /* Note that page_zip will be NULL
4182
 
                        in row_purge_upd_exist_or_extern(). */
4183
 
                        switch (fil_page_get_type(page)) {
4184
 
                        case FIL_PAGE_TYPE_ZBLOB:
4185
 
                        case FIL_PAGE_TYPE_ZBLOB2:
4186
 
                                break;
4187
 
                        default:
4188
 
                                ut_error;
4189
 
                        }
4190
 
                        next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
4191
 
 
4192
 
                        btr_page_free_low(index, ext_block, 0, &mtr);
4193
 
 
4194
 
                        if (UNIV_LIKELY(page_zip != NULL)) {
4195
 
                                mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
4196
 
                                                next_page_no);
4197
 
                                mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
4198
 
                                                0);
4199
 
                                page_zip_write_blob_ptr(page_zip, rec, index,
4200
 
                                                        offsets, i, &mtr);
4201
 
                        } else {
4202
 
                                mlog_write_ulint(field_ref
4203
 
                                                 + BTR_EXTERN_PAGE_NO,
4204
 
                                                 next_page_no,
4205
 
                                                 MLOG_4BYTES, &mtr);
4206
 
                                mlog_write_ulint(field_ref
4207
 
                                                 + BTR_EXTERN_LEN + 4, 0,
4208
 
                                                 MLOG_4BYTES, &mtr);
4209
 
                        }
4210
 
                } else {
4211
 
                        ulint   extern_len      = mach_read_from_4(
4212
 
                                field_ref + BTR_EXTERN_LEN + 4);
4213
 
                        ulint   part_len        = btr_blob_get_part_len(
4214
 
                                page + FIL_PAGE_DATA);
4215
 
 
4216
 
                        ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_BLOB);
4217
 
                        ut_a(!page_zip);
4218
 
                        ut_a(extern_len >= part_len);
4219
 
 
4220
 
                        next_page_no = mach_read_from_4(
4221
 
                                page + FIL_PAGE_DATA
4222
 
                                + BTR_BLOB_HDR_NEXT_PAGE_NO);
4223
 
 
4224
 
                        /* We must supply the page level (= 0) as an argument
4225
 
                        because we did not store it on the page (we save the
4226
 
                        space overhead from an index page header. */
4227
 
 
4228
 
                        ut_a(space_id == page_get_space_id(page));
4229
 
                        ut_a(page_no == page_get_page_no(page));
4230
 
 
4231
 
                        btr_page_free_low(index, ext_block, 0, &mtr);
4232
 
 
4233
 
                        mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
4234
 
                                         next_page_no,
4235
 
                                         MLOG_4BYTES, &mtr);
4236
 
                        mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
4237
 
                                         extern_len - part_len,
4238
 
                                         MLOG_4BYTES, &mtr);
4239
 
                        if (next_page_no == FIL_NULL) {
4240
 
                                ut_a(extern_len - part_len == 0);
4241
 
                        }
4242
 
 
4243
 
                        if (extern_len - part_len == 0) {
4244
 
                                ut_a(next_page_no == FIL_NULL);
4245
 
                        }
4246
 
                }
4247
 
 
4248
 
                /* Commit mtr and release the BLOB block to save memory. */
4249
 
                btr_blob_free(ext_block, TRUE, &mtr);
 
3558
                rec_page = buf_page_get(buf_frame_get_space_id(data),
 
3559
                                        buf_frame_get_page_no(data),
 
3560
                                        RW_X_LATCH, &mtr);
 
3561
#ifdef UNIV_SYNC_DEBUG
 
3562
                buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK);
 
3563
#endif /* UNIV_SYNC_DEBUG */
 
3564
                space_id = mach_read_from_4(data + local_len
 
3565
                                            + BTR_EXTERN_SPACE_ID);
 
3566
 
 
3567
                page_no = mach_read_from_4(data + local_len
 
3568
                                           + BTR_EXTERN_PAGE_NO);
 
3569
 
 
3570
                offset = mach_read_from_4(data + local_len
 
3571
                                          + BTR_EXTERN_OFFSET);
 
3572
                extern_len = mach_read_from_4(data + local_len
 
3573
                                              + BTR_EXTERN_LEN + 4);
 
3574
 
 
3575
                /* If extern len is 0, then there is no external storage data
 
3576
                at all */
 
3577
 
 
3578
                if (extern_len == 0) {
 
3579
 
 
3580
                        mtr_commit(&mtr);
 
3581
 
 
3582
                        return;
 
3583
                }
 
3584
 
 
3585
                if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
 
3586
                    & BTR_EXTERN_OWNER_FLAG) {
 
3587
                        /* This field does not own the externally
 
3588
                        stored field: do not free! */
 
3589
 
 
3590
                        mtr_commit(&mtr);
 
3591
 
 
3592
                        return;
 
3593
                }
 
3594
 
 
3595
                if (do_not_free_inherited
 
3596
                    && mach_read_from_1(data + local_len + BTR_EXTERN_LEN)
 
3597
                    & BTR_EXTERN_INHERITED_FLAG) {
 
3598
                        /* Rollback and inherited field: do not free! */
 
3599
 
 
3600
                        mtr_commit(&mtr);
 
3601
 
 
3602
                        return;
 
3603
                }
 
3604
 
 
3605
                page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr);
 
3606
#ifdef UNIV_SYNC_DEBUG
 
3607
                buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
 
3608
#endif /* UNIV_SYNC_DEBUG */
 
3609
                next_page_no = mach_read_from_4(page + FIL_PAGE_DATA
 
3610
                                                + BTR_BLOB_HDR_NEXT_PAGE_NO);
 
3611
 
 
3612
                part_len = btr_blob_get_part_len(page + FIL_PAGE_DATA);
 
3613
 
 
3614
                ut_a(extern_len >= part_len);
 
3615
 
 
3616
                /* We must supply the page level (= 0) as an argument
 
3617
                because we did not store it on the page (we save the space
 
3618
                overhead from an index page header. */
 
3619
 
 
3620
                btr_page_free_low(index, page, 0, &mtr);
 
3621
 
 
3622
                mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO,
 
3623
                                 next_page_no,
 
3624
                                 MLOG_4BYTES, &mtr);
 
3625
                mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4,
 
3626
                                 extern_len - part_len,
 
3627
                                 MLOG_4BYTES, &mtr);
 
3628
                if (next_page_no == FIL_NULL) {
 
3629
                        ut_a(extern_len - part_len == 0);
 
3630
                }
 
3631
 
 
3632
                if (extern_len - part_len == 0) {
 
3633
                        ut_a(next_page_no == FIL_NULL);
 
3634
                }
 
3635
 
 
3636
                mtr_commit(&mtr);
4250
3637
        }
4251
3638
}
4252
3639
 
4253
3640
/***************************************************************
4254
3641
Frees the externally stored fields for a record. */
4255
 
static
 
3642
 
4256
3643
void
4257
3644
btr_rec_free_externally_stored_fields(
4258
3645
/*==================================*/
4259
3646
        dict_index_t*   index,  /* in: index of the data, the index
4260
3647
                                tree MUST be X-latched */
4261
 
        rec_t*          rec,    /* in/out: record */
 
3648
        rec_t*          rec,    /* in: record */
4262
3649
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
4263
 
        page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
4264
 
                                part will be updated, or NULL */
4265
3650
        ibool           do_not_free_inherited,/* in: TRUE if called in a
4266
3651
                                rollback and we do not want to free
4267
3652
                                inherited fields */
4270
3655
                                tree */
4271
3656
{
4272
3657
        ulint   n_fields;
 
3658
        byte*   data;
 
3659
        ulint   len;
4273
3660
        ulint   i;
4274
3661
 
4275
3662
        ut_ad(rec_offs_validate(rec, index, offsets));
4276
 
        ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
 
3663
        ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
 
3664
                                MTR_MEMO_PAGE_X_FIX));
4277
3665
        /* Free possible externally stored fields in the record */
4278
3666
 
4279
3667
        ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
4281
3669
 
4282
3670
        for (i = 0; i < n_fields; i++) {
4283
3671
                if (rec_offs_nth_extern(offsets, i)) {
4284
 
                        ulint   len;
4285
 
                        byte*   data
4286
 
                                = rec_get_nth_field(rec, offsets, i, &len);
4287
 
                        ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
4288
3672
 
4289
 
                        btr_free_externally_stored_field(
4290
 
                                index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
4291
 
                                rec, offsets, page_zip, i,
4292
 
                                do_not_free_inherited, mtr);
 
3673
                        data = rec_get_nth_field(rec, offsets, i, &len);
 
3674
                        btr_free_externally_stored_field(index, data, len,
 
3675
                                                         do_not_free_inherited,
 
3676
                                                         mtr);
4293
3677
                }
4294
3678
        }
4295
3679
}
4303
3687
/*===============================*/
4304
3688
        dict_index_t*   index,  /* in: index of rec; the index tree MUST be
4305
3689
                                X-latched */
4306
 
        rec_t*          rec,    /* in/out: record */
4307
 
        page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
4308
 
                                part will be updated, or NULL */
 
3690
        rec_t*          rec,    /* in: record */
4309
3691
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
4310
 
        const upd_t*    update, /* in: update vector */
 
3692
        upd_t*          update, /* in: update vector */
 
3693
        ibool           do_not_free_inherited,/* in: TRUE if called in a
 
3694
                                rollback and we do not want to free
 
3695
                                inherited fields */
4311
3696
        mtr_t*          mtr)    /* in: mini-transaction handle which contains
4312
3697
                                an X-latch to record page and to the tree */
4313
3698
{
4314
 
        ulint   n_fields;
4315
 
        ulint   i;
 
3699
        upd_field_t*    ufield;
 
3700
        ulint           n_fields;
 
3701
        byte*           data;
 
3702
        ulint           len;
 
3703
        ulint           i;
4316
3704
 
4317
3705
        ut_ad(rec_offs_validate(rec, index, offsets));
4318
 
        ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
 
3706
        ut_ad(mtr_memo_contains(mtr, buf_block_align(rec),
 
3707
                                MTR_MEMO_PAGE_X_FIX));
4319
3708
 
4320
3709
        /* Free possible externally stored fields in the record */
4321
3710
 
4322
3711
        n_fields = upd_get_n_fields(update);
4323
3712
 
4324
3713
        for (i = 0; i < n_fields; i++) {
4325
 
                const upd_field_t* ufield = upd_get_nth_field(update, i);
 
3714
                ufield = upd_get_nth_field(update, i);
4326
3715
 
4327
3716
                if (rec_offs_nth_extern(offsets, ufield->field_no)) {
4328
 
                        ulint   len;
4329
 
                        byte*   data = rec_get_nth_field(
4330
 
                                rec, offsets, ufield->field_no, &len);
4331
 
                        ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
4332
 
 
4333
 
                        btr_free_externally_stored_field(
4334
 
                                index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
4335
 
                                rec, offsets, page_zip,
4336
 
                                ufield->field_no, TRUE, mtr);
4337
 
                }
4338
 
        }
4339
 
}
4340
 
 
4341
 
/***********************************************************************
4342
 
Copies the prefix of an uncompressed BLOB.  The clustered index record
4343
 
that points to this BLOB must be protected by a lock or a page latch. */
4344
 
static
4345
 
ulint
4346
 
btr_copy_blob_prefix(
4347
 
/*=================*/
4348
 
                                /* out: number of bytes written to buf */
4349
 
        byte*           buf,    /* out: the externally stored part of
4350
 
                                the field, or a prefix of it */
4351
 
        ulint           len,    /* in: length of buf, in bytes */
4352
 
        ulint           space_id,/* in: space id of the BLOB pages */
4353
 
        ulint           page_no,/* in: page number of the first BLOB page */
4354
 
        ulint           offset) /* in: offset on the first BLOB page */
4355
 
{
4356
 
        ulint   copied_len      = 0;
4357
 
 
4358
 
        for (;;) {
4359
 
                mtr_t           mtr;
4360
 
                buf_block_t*    block;
4361
 
                const page_t*   page;
4362
 
                const byte*     blob_header;
4363
 
                ulint           part_len;
4364
 
                ulint           copy_len;
4365
 
 
4366
 
                mtr_start(&mtr);
4367
 
 
4368
 
                block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
4369
 
#ifdef UNIV_SYNC_DEBUG
4370
 
                buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
4371
 
#endif /* UNIV_SYNC_DEBUG */
4372
 
                page = buf_block_get_frame(block);
4373
 
 
4374
 
                /* Unfortunately, FIL_PAGE_TYPE was uninitialized for
4375
 
                many pages until MySQL/InnoDB 5.1.7. */
4376
 
                /* ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_BLOB); */
4377
 
                blob_header = page + offset;
4378
 
                part_len = btr_blob_get_part_len(blob_header);
4379
 
                copy_len = ut_min(part_len, len - copied_len);
4380
 
 
4381
 
                memcpy(buf + copied_len,
4382
 
                       blob_header + BTR_BLOB_HDR_SIZE, copy_len);
4383
 
                copied_len += copy_len;
4384
 
 
4385
 
                page_no = btr_blob_get_next_page_no(blob_header);
4386
 
 
4387
 
                mtr_commit(&mtr);
4388
 
 
4389
 
                if (page_no == FIL_NULL || copy_len != part_len) {
4390
 
                        return(copied_len);
4391
 
                }
4392
 
 
4393
 
                /* On other BLOB pages except the first the BLOB header
4394
 
                always is at the page data start: */
4395
 
 
4396
 
                offset = FIL_PAGE_DATA;
4397
 
 
4398
 
                ut_ad(copied_len <= len);
4399
 
        }
4400
 
}
4401
 
 
4402
 
/***********************************************************************
4403
 
Copies the prefix of a compressed BLOB.  The clustered index record
4404
 
that points to this BLOB must be protected by a lock or a page latch. */
4405
 
static
4406
 
void
4407
 
btr_copy_zblob_prefix(
4408
 
/*==================*/
4409
 
        z_stream*       d_stream,/* in/out: the decompressing stream */
4410
 
        ulint           zip_size,/* in: compressed BLOB page size */
4411
 
        ulint           space_id,/* in: space id of the BLOB pages */
4412
 
        ulint           page_no,/* in: page number of the first BLOB page */
4413
 
        ulint           offset) /* in: offset on the first BLOB page */
4414
 
{
4415
 
        ulint   page_type = FIL_PAGE_TYPE_ZBLOB;
4416
 
 
4417
 
        ut_ad(ut_is_2pow(zip_size));
4418
 
        ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
4419
 
        ut_ad(zip_size <= UNIV_PAGE_SIZE);
4420
 
        ut_ad(space_id);
4421
 
 
4422
 
        for (;;) {
4423
 
                buf_page_t*     bpage;
4424
 
                int             err;
4425
 
                ulint           next_page_no;
4426
 
 
4427
 
                /* There is no latch on bpage directly.  Instead,
4428
 
                bpage is protected by the B-tree page latch that
4429
 
                is being held on the clustered index record, or,
4430
 
                in row_merge_copy_blobs(), by an exclusive table lock. */
4431
 
                bpage = buf_page_get_zip(space_id, zip_size, page_no);
4432
 
 
4433
 
                if (UNIV_UNLIKELY(!bpage)) {
4434
 
                        ut_print_timestamp(stderr);
4435
 
                        fprintf(stderr,
4436
 
                                "  InnoDB: Cannot load"
4437
 
                                " compressed BLOB"
4438
 
                                " page %lu space %lu\n",
4439
 
                                (ulong) page_no, (ulong) space_id);
4440
 
                        return;
4441
 
                }
4442
 
 
4443
 
                if (UNIV_UNLIKELY
4444
 
                    (fil_page_get_type(bpage->zip.data) != page_type)) {
4445
 
                        ut_print_timestamp(stderr);
4446
 
                        fprintf(stderr,
4447
 
                                "  InnoDB: Unexpected type %lu of"
4448
 
                                " compressed BLOB"
4449
 
                                " page %lu space %lu\n",
4450
 
                                (ulong) fil_page_get_type(bpage->zip.data),
4451
 
                                (ulong) page_no, (ulong) space_id);
4452
 
                        goto end_of_blob;
4453
 
                }
4454
 
 
4455
 
                next_page_no = mach_read_from_4(bpage->zip.data + offset);
4456
 
 
4457
 
                if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
4458
 
                        /* When the BLOB begins at page header,
4459
 
                        the compressed data payload does not
4460
 
                        immediately follow the next page pointer. */
4461
 
                        offset = FIL_PAGE_DATA;
4462
 
                } else {
4463
 
                        offset += 4;
4464
 
                }
4465
 
 
4466
 
                d_stream->next_in = bpage->zip.data + offset;
4467
 
                d_stream->avail_in = zip_size - offset;
4468
 
 
4469
 
                err = inflate(d_stream, Z_NO_FLUSH);
4470
 
                switch (err) {
4471
 
                case Z_OK:
4472
 
                        if (!d_stream->avail_out) {
4473
 
                                goto end_of_blob;
4474
 
                        }
4475
 
                        break;
4476
 
                case Z_STREAM_END:
4477
 
                        if (next_page_no == FIL_NULL) {
4478
 
                                goto end_of_blob;
4479
 
                        }
4480
 
                        /* fall through */
4481
 
                default:
4482
 
inflate_error:
4483
 
                        ut_print_timestamp(stderr);
4484
 
                        fprintf(stderr,
4485
 
                                "  InnoDB: inflate() of"
4486
 
                                " compressed BLOB"
4487
 
                                " page %lu space %lu returned %d (%s)\n",
4488
 
                                (ulong) page_no, (ulong) space_id,
4489
 
                                err, d_stream->msg);
4490
 
                case Z_BUF_ERROR:
4491
 
                        goto end_of_blob;
4492
 
                }
4493
 
 
4494
 
                if (next_page_no == FIL_NULL) {
4495
 
                        if (!d_stream->avail_in) {
4496
 
                                ut_print_timestamp(stderr);
4497
 
                                fprintf(stderr,
4498
 
                                        "  InnoDB: unexpected end of"
4499
 
                                        " compressed BLOB"
4500
 
                                        " page %lu space %lu\n",
4501
 
                                        (ulong) page_no,
4502
 
                                        (ulong) space_id);
4503
 
                        } else {
4504
 
                                err = inflate(d_stream, Z_FINISH);
4505
 
                                switch (err) {
4506
 
                                case Z_STREAM_END:
4507
 
                                case Z_BUF_ERROR:
4508
 
                                        break;
4509
 
                                default:
4510
 
                                        goto inflate_error;
4511
 
                                }
4512
 
                        }
4513
 
 
4514
 
end_of_blob:
4515
 
                        buf_page_release_zip(bpage);
4516
 
                        return;
4517
 
                }
4518
 
 
4519
 
                buf_page_release_zip(bpage);
4520
 
 
4521
 
                /* On other BLOB pages except the first
4522
 
                the BLOB header always is at the page header: */
4523
 
 
4524
 
                page_no = next_page_no;
4525
 
                offset = FIL_PAGE_NEXT;
4526
 
                page_type = FIL_PAGE_TYPE_ZBLOB2;
4527
 
        }
4528
 
}
4529
 
 
4530
 
/***********************************************************************
4531
 
Copies the prefix of an externally stored field of a record.  The
4532
 
clustered index record that points to this BLOB must be protected by a
4533
 
lock or a page latch. */
4534
 
static
4535
 
ulint
4536
 
btr_copy_externally_stored_field_prefix_low(
4537
 
/*========================================*/
4538
 
                                /* out: number of bytes written to buf */
4539
 
        byte*           buf,    /* out: the externally stored part of
4540
 
                                the field, or a prefix of it */
4541
 
        ulint           len,    /* in: length of buf, in bytes */
4542
 
        ulint           zip_size,/* in: nonzero=compressed BLOB page size,
4543
 
                                zero for uncompressed BLOBs */
4544
 
        ulint           space_id,/* in: space id of the first BLOB page */
4545
 
        ulint           page_no,/* in: page number of the first BLOB page */
4546
 
        ulint           offset) /* in: offset on the first BLOB page */
4547
 
{
4548
 
        if (UNIV_UNLIKELY(len == 0)) {
4549
 
                return(0);
4550
 
        }
4551
 
 
4552
 
        if (UNIV_UNLIKELY(zip_size)) {
4553
 
                int             err;
4554
 
                z_stream        d_stream;
4555
 
                mem_heap_t*     heap;
4556
 
 
4557
 
                /* Zlib inflate needs 32 kilobytes for the default
4558
 
                window size, plus a few kilobytes for small objects. */
4559
 
                heap = mem_heap_create(40000);
4560
 
                page_zip_set_alloc(&d_stream, heap);
4561
 
 
4562
 
                err = inflateInit(&d_stream);
4563
 
                ut_a(err == Z_OK);
4564
 
 
4565
 
                d_stream.next_out = buf;
4566
 
                d_stream.avail_out = len;
4567
 
                d_stream.avail_in = 0;
4568
 
 
4569
 
                btr_copy_zblob_prefix(&d_stream, zip_size,
4570
 
                                      space_id, page_no, offset);
4571
 
                inflateEnd(&d_stream);
4572
 
                mem_heap_free(heap);
4573
 
                return(d_stream.total_out);
4574
 
        } else {
4575
 
                return(btr_copy_blob_prefix(buf, len, space_id,
4576
 
                                            page_no, offset));
4577
 
        }
4578
 
}
4579
 
 
4580
 
/***********************************************************************
4581
 
Copies the prefix of an externally stored field of a record.  The
4582
 
clustered index record must be protected by a lock or a page latch. */
4583
 
UNIV_INTERN
4584
 
ulint
4585
 
btr_copy_externally_stored_field_prefix(
4586
 
/*====================================*/
4587
 
                                /* out: the length of the copied field */
4588
 
        byte*           buf,    /* out: the field, or a prefix of it */
4589
 
        ulint           len,    /* in: length of buf, in bytes */
4590
 
        ulint           zip_size,/* in: nonzero=compressed BLOB page size,
4591
 
                                zero for uncompressed BLOBs */
4592
 
        const byte*     data,   /* in: 'internally' stored part of the
4593
 
                                field containing also the reference to
4594
 
                                the external part; must be protected by
4595
 
                                a lock or a page latch */
4596
 
        ulint           local_len)/* in: length of data, in bytes */
4597
 
{
4598
 
        ulint   space_id;
4599
 
        ulint   page_no;
4600
 
        ulint   offset;
4601
 
 
4602
 
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4603
 
 
4604
 
        local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4605
 
 
4606
 
        if (UNIV_UNLIKELY(local_len >= len)) {
4607
 
                memcpy(buf, data, len);
4608
 
                return(len);
4609
 
        }
4610
 
 
4611
 
        memcpy(buf, data, local_len);
4612
 
        data += local_len;
4613
 
 
4614
 
        ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
4615
 
 
4616
 
        space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
4617
 
 
4618
 
        page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
4619
 
 
4620
 
        offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
4621
 
 
4622
 
        return(local_len
4623
 
               + btr_copy_externally_stored_field_prefix_low(buf + local_len,
4624
 
                                                             len - local_len,
4625
 
                                                             zip_size,
4626
 
                                                             space_id, page_no,
4627
 
                                                             offset));
4628
 
}
4629
 
 
4630
 
/***********************************************************************
4631
 
Copies an externally stored field of a record to mem heap.  The
4632
 
clustered index record must be protected by a lock or a page latch. */
4633
 
static
 
3717
 
 
3718
                        data = rec_get_nth_field(rec, offsets,
 
3719
                                                 ufield->field_no, &len);
 
3720
                        btr_free_externally_stored_field(index, data, len,
 
3721
                                                         do_not_free_inherited,
 
3722
                                                         mtr);
 
3723
                }
 
3724
        }
 
3725
}
 
3726
 
 
3727
/***********************************************************************
 
3728
Copies an externally stored field of a record to mem heap. Parameter
 
3729
data contains a pointer to 'internally' stored part of the field:
 
3730
possibly some data, and the reference to the externally stored part in
 
3731
the last 20 bytes of data. */
 
3732
 
4634
3733
byte*
4635
3734
btr_copy_externally_stored_field(
4636
3735
/*=============================*/
4637
3736
                                /* out: the whole field copied to heap */
4638
3737
        ulint*          len,    /* out: length of the whole field */
4639
 
        const byte*     data,   /* in: 'internally' stored part of the
 
3738
        byte*           data,   /* in: 'internally' stored part of the
4640
3739
                                field containing also the reference to
4641
 
                                the external part; must be protected by
4642
 
                                a lock or a page latch */
4643
 
        ulint           zip_size,/* in: nonzero=compressed BLOB page size,
4644
 
                                zero for uncompressed BLOBs */
 
3740
                                the external part */
4645
3741
        ulint           local_len,/* in: length of data */
4646
3742
        mem_heap_t*     heap)   /* in: mem heap */
4647
3743
{
 
3744
        page_t* page;
4648
3745
        ulint   space_id;
4649
3746
        ulint   page_no;
4650
3747
        ulint   offset;
4651
3748
        ulint   extern_len;
 
3749
        byte*   blob_header;
 
3750
        ulint   part_len;
4652
3751
        byte*   buf;
 
3752
        ulint   copied_len;
 
3753
        mtr_t   mtr;
4653
3754
 
4654
3755
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4655
3756
 
4661
3762
 
4662
3763
        offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
4663
3764
 
4664
 
        /* Currently a BLOB cannot be bigger than 4 GB; we
 
3765
        /* Currently a BLOB cannot be bigger that 4 GB; we
4665
3766
        leave the 4 upper bytes in the length field unused */
4666
3767
 
4667
3768
        extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
4668
3769
 
4669
3770
        buf = mem_heap_alloc(heap, local_len + extern_len);
4670
3771
 
4671
 
        memcpy(buf, data, local_len);
4672
 
        *len = local_len
4673
 
                + btr_copy_externally_stored_field_prefix_low(buf + local_len,
4674
 
                                                              extern_len,
4675
 
                                                              zip_size,
4676
 
                                                              space_id,
4677
 
                                                              page_no, offset);
4678
 
 
4679
 
        return(buf);
 
3772
        ut_memcpy(buf, data, local_len);
 
3773
        copied_len = local_len;
 
3774
 
 
3775
        if (extern_len == 0) {
 
3776
                *len = copied_len;
 
3777
 
 
3778
                return(buf);
 
3779
        }
 
3780
 
 
3781
        for (;;) {
 
3782
                mtr_start(&mtr);
 
3783
 
 
3784
                page = buf_page_get(space_id, page_no, RW_S_LATCH, &mtr);
 
3785
#ifdef UNIV_SYNC_DEBUG
 
3786
                buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE);
 
3787
#endif /* UNIV_SYNC_DEBUG */
 
3788
                blob_header = page + offset;
 
3789
 
 
3790
                part_len = btr_blob_get_part_len(blob_header);
 
3791
 
 
3792
                ut_memcpy(buf + copied_len, blob_header + BTR_BLOB_HDR_SIZE,
 
3793
                          part_len);
 
3794
                copied_len += part_len;
 
3795
 
 
3796
                page_no = btr_blob_get_next_page_no(blob_header);
 
3797
 
 
3798
                mtr_commit(&mtr);
 
3799
 
 
3800
                if (page_no == FIL_NULL) {
 
3801
                        ut_a(copied_len == local_len + extern_len);
 
3802
 
 
3803
                        *len = copied_len;
 
3804
 
 
3805
                        return(buf);
 
3806
                }
 
3807
 
 
3808
                /* On other BLOB pages except the first the BLOB header
 
3809
                always is at the page data start: */
 
3810
 
 
3811
                offset = FIL_PAGE_DATA;
 
3812
 
 
3813
                ut_a(copied_len < local_len + extern_len);
 
3814
        }
4680
3815
}
4681
3816
 
4682
3817
/***********************************************************************
4683
3818
Copies an externally stored field of a record to mem heap. */
4684
 
UNIV_INTERN
 
3819
 
4685
3820
byte*
4686
3821
btr_rec_copy_externally_stored_field(
4687
3822
/*=================================*/
4688
3823
                                /* out: the field copied to heap */
4689
 
        const rec_t*    rec,    /* in: record in a clustered index;
4690
 
                                must be protected by a lock or a page latch */
 
3824
        rec_t*          rec,    /* in: record */
4691
3825
        const ulint*    offsets,/* in: array returned by rec_get_offsets() */
4692
 
        ulint           zip_size,/* in: nonzero=compressed BLOB page size,
4693
 
                                zero for uncompressed BLOBs */
4694
3826
        ulint           no,     /* in: field number */
4695
3827
        ulint*          len,    /* out: length of the field */
4696
3828
        mem_heap_t*     heap)   /* in: mem heap */
4697
3829
{
4698
 
        ulint           local_len;
4699
 
        const byte*     data;
 
3830
        ulint   local_len;
 
3831
        byte*   data;
4700
3832
 
 
3833
        ut_ad(rec_offs_validate(rec, NULL, offsets));
4701
3834
        ut_a(rec_offs_nth_extern(offsets, no));
4702
3835
 
4703
3836
        /* An externally stored field can contain some initial
4711
3844
 
4712
3845
        data = rec_get_nth_field(rec, offsets, no, &local_len);
4713
3846
 
4714
 
        return(btr_copy_externally_stored_field(len, data,
4715
 
                                                zip_size, local_len, heap));
 
3847
        return(btr_copy_externally_stored_field(len, data, local_len, heap));
4716
3848
}