~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/page/page0zip.c

  • Committer: Monty Taylor
  • Date: 2008-10-02 01:27:37 UTC
  • Revision ID: monty@inaugust.com-20081002012737-3uxmdovii2l14uqe
Removed unused crud.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/******************************************************
2
 
Compressed page interface
3
 
 
4
 
(c) 2005 Innobase Oy
5
 
 
6
 
Created June 2005 by Marko Makela
7
 
*******************************************************/
8
 
 
9
 
#define THIS_MODULE
10
 
#include "page0zip.h"
11
 
#ifdef UNIV_NONINL
12
 
# include "page0zip.ic"
13
 
#endif
14
 
#undef THIS_MODULE
15
 
#include "page0page.h"
16
 
#include "mtr0log.h"
17
 
#include "ut0sort.h"
18
 
#include "dict0boot.h"
19
 
#include "dict0dict.h"
20
 
#include "btr0sea.h"
21
 
#include "btr0cur.h"
22
 
#include "page0types.h"
23
 
#include "lock0lock.h"
24
 
#include "log0recv.h"
25
 
#include "zlib.h"
26
 
#include "buf0lru.h"
27
 
 
28
 
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
29
 
UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
30
 
 
31
 
/* Please refer to ../include/page0zip.ic for a description of the
32
 
compressed page format. */
33
 
 
34
 
/* The infimum and supremum records are omitted from the compressed page.
35
 
On compress, we compare that the records are there, and on uncompress we
36
 
restore the records. */
37
 
static const byte infimum_extra[] = {
38
 
        0x01,                   /* info_bits=0, n_owned=1 */
39
 
        0x00, 0x02              /* heap_no=0, status=2 */
40
 
        /* ?, ? */              /* next=(first user rec, or supremum) */
41
 
};
42
 
static const byte infimum_data[] = {
43
 
        0x69, 0x6e, 0x66, 0x69,
44
 
        0x6d, 0x75, 0x6d, 0x00  /* "infimum\0" */
45
 
};
46
 
static const byte supremum_extra_data[] = {
47
 
        /* 0x0?, */             /* info_bits=0, n_owned=1..8 */
48
 
        0x00, 0x0b,             /* heap_no=1, status=3 */
49
 
        0x00, 0x00,             /* next=0 */
50
 
        0x73, 0x75, 0x70, 0x72,
51
 
        0x65, 0x6d, 0x75, 0x6d  /* "supremum" */
52
 
};
53
 
 
54
 
/** Assert that a block of memory is filled with zero bytes.
55
 
Compare at most sizeof(field_ref_zero) bytes. */
56
 
#define ASSERT_ZERO(b, s) \
57
 
        ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
58
 
/** Assert that a BLOB pointer is filled with zero bytes. */
59
 
#define ASSERT_ZERO_BLOB(b) \
60
 
        ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
61
 
 
62
 
/* Enable some extra debugging output.  This code can be enabled
63
 
independently of any UNIV_ debugging conditions. */
64
 
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
65
 
# include <stdarg.h>
66
 
__attribute__((format (printf, 1, 2)))
67
 
/**************************************************************************
68
 
Report a failure to decompress or compress. */
69
 
static
70
 
int
71
 
page_zip_fail_func(
72
 
/*===============*/
73
 
                                /* out: number of characters printed */
74
 
        const char*     fmt,    /* in: printf(3) format string */
75
 
        ...)                    /* in: arguments corresponding to fmt */
76
 
{
77
 
        int     res;
78
 
        va_list ap;
79
 
 
80
 
        va_start(ap, fmt);
81
 
        res = vfprintf(stderr, fmt, ap);
82
 
        va_end(ap);
83
 
 
84
 
        return(res);
85
 
}
86
 
# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
87
 
#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
88
 
# define page_zip_fail(fmt_args) /* empty */
89
 
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
90
 
 
91
 
/**************************************************************************
92
 
Determine the guaranteed free space on an empty page. */
93
 
UNIV_INTERN
94
 
ulint
95
 
page_zip_empty_size(
96
 
/*================*/
97
 
                                /* out: minimum payload size on the page */
98
 
        ulint   n_fields,       /* in: number of columns in the index */
99
 
        ulint   zip_size)       /* in: compressed page size in bytes */
100
 
{
101
 
        lint    size = zip_size
102
 
                /* subtract the page header and the longest
103
 
                uncompressed data needed for one record */
104
 
                - (PAGE_DATA
105
 
                   + PAGE_ZIP_DIR_SLOT_SIZE
106
 
                   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
107
 
                   + 1/* encoded heap_no==2 in page_zip_write_rec() */
108
 
                   + 1/* end of modification log */
109
 
                   - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
110
 
                /* subtract the space for page_zip_fields_encode() */
111
 
                - compressBound(2 * (n_fields + 1));
112
 
        return(size > 0 ? (ulint) size : 0);
113
 
}
114
 
 
115
 
/*****************************************************************
116
 
Gets the size of the compressed page trailer (the dense page directory),
117
 
including deleted records (the free list). */
118
 
UNIV_INLINE
119
 
ulint
120
 
page_zip_dir_size(
121
 
/*==============*/
122
 
                                                /* out: length of dense page
123
 
                                                directory, in bytes */
124
 
        const page_zip_des_t*   page_zip)       /* in: compressed page */
125
 
{
126
 
        /* Exclude the page infimum and supremum from the record count. */
127
 
        ulint   size = PAGE_ZIP_DIR_SLOT_SIZE
128
 
                * (page_dir_get_n_heap(page_zip->data)
129
 
                   - PAGE_HEAP_NO_USER_LOW);
130
 
        return(size);
131
 
}
132
 
 
133
 
/*****************************************************************
134
 
Gets the size of the compressed page trailer (the dense page directory),
135
 
only including user records (excluding the free list). */
136
 
UNIV_INLINE
137
 
ulint
138
 
page_zip_dir_user_size(
139
 
/*===================*/
140
 
                                                /* out: length of dense page
141
 
                                                directory comprising existing
142
 
                                                records, in bytes */
143
 
        const page_zip_des_t*   page_zip)       /* in: compressed page */
144
 
{
145
 
        ulint   size = PAGE_ZIP_DIR_SLOT_SIZE
146
 
                * page_get_n_recs(page_zip->data);
147
 
        ut_ad(size <= page_zip_dir_size(page_zip));
148
 
        return(size);
149
 
}
150
 
 
151
 
/*****************************************************************
152
 
Find the slot of the given record in the dense page directory. */
153
 
UNIV_INLINE
154
 
byte*
155
 
page_zip_dir_find_low(
156
 
/*==================*/
157
 
                                        /* out: dense directory slot,
158
 
                                        or NULL if record not found */
159
 
        byte*   slot,                   /* in: start of records */
160
 
        byte*   end,                    /* in: end of records */
161
 
        ulint   offset)                 /* in: offset of user record */
162
 
{
163
 
        ut_ad(slot <= end);
164
 
 
165
 
        for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
166
 
                if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
167
 
                    == offset) {
168
 
                        return(slot);
169
 
                }
170
 
        }
171
 
 
172
 
        return(NULL);
173
 
}
174
 
 
175
 
/*****************************************************************
176
 
Find the slot of the given non-free record in the dense page directory. */
177
 
UNIV_INLINE
178
 
byte*
179
 
page_zip_dir_find(
180
 
/*==============*/
181
 
                                                /* out: dense directory slot,
182
 
                                                or NULL if record not found */
183
 
        page_zip_des_t* page_zip,               /* in: compressed page */
184
 
        ulint           offset)                 /* in: offset of user record */
185
 
{
186
 
        byte*   end     = page_zip->data + page_zip_get_size(page_zip);
187
 
 
188
 
        ut_ad(page_zip_simple_validate(page_zip));
189
 
 
190
 
        return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
191
 
                                     end,
192
 
                                     offset));
193
 
}
194
 
 
195
 
/*****************************************************************
196
 
Find the slot of the given free record in the dense page directory. */
197
 
UNIV_INLINE
198
 
byte*
199
 
page_zip_dir_find_free(
200
 
/*===================*/
201
 
                                                /* out: dense directory slot,
202
 
                                                or NULL if record not found */
203
 
        page_zip_des_t* page_zip,               /* in: compressed page */
204
 
        ulint           offset)                 /* in: offset of user record */
205
 
{
206
 
        byte*   end     = page_zip->data + page_zip_get_size(page_zip);
207
 
 
208
 
        ut_ad(page_zip_simple_validate(page_zip));
209
 
 
210
 
        return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
211
 
                                     end - page_zip_dir_user_size(page_zip),
212
 
                                     offset));
213
 
}
214
 
 
215
 
/*****************************************************************
216
 
Read a given slot in the dense page directory. */
217
 
UNIV_INLINE
218
 
ulint
219
 
page_zip_dir_get(
220
 
/*=============*/
221
 
                                                /* out: record offset
222
 
                                                on the uncompressed page,
223
 
                                                possibly ORed with
224
 
                                                PAGE_ZIP_DIR_SLOT_DEL or
225
 
                                                PAGE_ZIP_DIR_SLOT_OWNED */
226
 
        const page_zip_des_t*   page_zip,       /* in: compressed page */
227
 
        ulint                   slot)           /* in: slot
228
 
                                                (0=first user record) */
229
 
{
230
 
        ut_ad(page_zip_simple_validate(page_zip));
231
 
        ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
232
 
        return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
233
 
                                - PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
234
 
}
235
 
 
236
 
/**************************************************************************
237
 
Write a log record of compressing an index page. */
238
 
static
239
 
void
240
 
page_zip_compress_write_log(
241
 
/*========================*/
242
 
        const page_zip_des_t*   page_zip,/* in: compressed page */
243
 
        const page_t*           page,   /* in: uncompressed page */
244
 
        dict_index_t*           index,  /* in: index of the B-tree node */
245
 
        mtr_t*                  mtr)    /* in: mini-transaction */
246
 
{
247
 
        byte*   log_ptr;
248
 
        ulint   trailer_size;
249
 
 
250
 
        log_ptr = mlog_open(mtr, 11 + 2 + 2);
251
 
 
252
 
        if (!log_ptr) {
253
 
 
254
 
                return;
255
 
        }
256
 
 
257
 
        /* Read the number of user records. */
258
 
        trailer_size = page_dir_get_n_heap(page_zip->data)
259
 
                - PAGE_HEAP_NO_USER_LOW;
260
 
        /* Multiply by uncompressed of size stored per record */
261
 
        if (!page_is_leaf(page)) {
262
 
                trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
263
 
        } else if (dict_index_is_clust(index)) {
264
 
                trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
265
 
                        + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
266
 
        } else {
267
 
                trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
268
 
        }
269
 
        /* Add the space occupied by BLOB pointers. */
270
 
        trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
271
 
        ut_a(page_zip->m_end > PAGE_DATA);
272
 
#if FIL_PAGE_DATA > PAGE_DATA
273
 
# error "FIL_PAGE_DATA > PAGE_DATA"
274
 
#endif
275
 
        ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
276
 
 
277
 
        log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
278
 
                                                     MLOG_ZIP_PAGE_COMPRESS,
279
 
                                                     log_ptr, mtr);
280
 
        mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
281
 
        log_ptr += 2;
282
 
        mach_write_to_2(log_ptr, trailer_size);
283
 
        log_ptr += 2;
284
 
        mlog_close(mtr, log_ptr);
285
 
 
286
 
        /* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
287
 
        mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
288
 
        mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
289
 
        /* Write most of the page header, the compressed stream and
290
 
        the modification log. */
291
 
        mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
292
 
                             page_zip->m_end - FIL_PAGE_TYPE);
293
 
        /* Write the uncompressed trailer of the compressed page. */
294
 
        mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
295
 
                             - trailer_size, trailer_size);
296
 
}
297
 
 
298
 
/**********************************************************
299
 
Determine how many externally stored columns are contained
300
 
in existing records with smaller heap_no than rec. */
301
 
static
302
 
ulint
303
 
page_zip_get_n_prev_extern(
304
 
/*=======================*/
305
 
        const page_zip_des_t*   page_zip,/* in: dense page directory on
306
 
                                        compressed page */
307
 
        const rec_t*            rec,    /* in: compact physical record
308
 
                                        on a B-tree leaf page */
309
 
        dict_index_t*           index)  /* in: record descriptor */
310
 
{
311
 
        const page_t*   page    = page_align(rec);
312
 
        ulint           n_ext   = 0;
313
 
        ulint           i;
314
 
        ulint           left;
315
 
        ulint           heap_no;
316
 
        ulint           n_recs  = page_get_n_recs(page_zip->data);
317
 
 
318
 
        ut_ad(page_is_leaf(page));
319
 
        ut_ad(page_is_comp(page));
320
 
        ut_ad(dict_table_is_comp(index->table));
321
 
        ut_ad(dict_index_is_clust(index));
322
 
 
323
 
        heap_no = rec_get_heap_no_new(rec);
324
 
        ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
325
 
        left = heap_no - PAGE_HEAP_NO_USER_LOW;
326
 
        if (UNIV_UNLIKELY(!left)) {
327
 
                return(0);
328
 
        }
329
 
 
330
 
        for (i = 0; i < n_recs; i++) {
331
 
                const rec_t*    r       = page + (page_zip_dir_get(page_zip, i)
332
 
                                                  & PAGE_ZIP_DIR_SLOT_MASK);
333
 
 
334
 
                if (rec_get_heap_no_new(r) < heap_no) {
335
 
                        n_ext += rec_get_n_extern_new(r, index,
336
 
                                                      ULINT_UNDEFINED);
337
 
                        if (!--left) {
338
 
                                break;
339
 
                        }
340
 
                }
341
 
        }
342
 
 
343
 
        return(n_ext);
344
 
}
345
 
 
346
 
/**************************************************************************
347
 
Encode the length of a fixed-length column. */
348
 
static
349
 
byte*
350
 
page_zip_fixed_field_encode(
351
 
/*========================*/
352
 
                        /* out: buf + length of encoded val */
353
 
        byte*   buf,    /* in: pointer to buffer where to write */
354
 
        ulint   val)    /* in: value to write */
355
 
{
356
 
        ut_ad(val >= 2);
357
 
 
358
 
        if (UNIV_LIKELY(val < 126)) {
359
 
                /*
360
 
                0 = nullable variable field of at most 255 bytes length;
361
 
                1 = not null variable field of at most 255 bytes length;
362
 
                126 = nullable variable field with maximum length >255;
363
 
                127 = not null variable field with maximum length >255
364
 
                */
365
 
                *buf++ = (byte) val;
366
 
        } else {
367
 
                *buf++ = (byte) (0x80 | val >> 8);
368
 
                *buf++ = (byte) val;
369
 
        }
370
 
 
371
 
        return(buf);
372
 
}
373
 
 
374
 
/**************************************************************************
375
 
Write the index information for the compressed page. */
376
 
static
377
 
ulint
378
 
page_zip_fields_encode(
379
 
/*===================*/
380
 
                                /* out: used size of buf */
381
 
        ulint           n,      /* in: number of fields to compress */
382
 
        dict_index_t*   index,  /* in: index comprising at least n fields */
383
 
        ulint           trx_id_pos,/* in: position of the trx_id column
384
 
                                in the index, or ULINT_UNDEFINED if
385
 
                                this is a non-leaf page */
386
 
        byte*           buf)    /* out: buffer of (n + 1) * 2 bytes */
387
 
{
388
 
        const byte*     buf_start       = buf;
389
 
        ulint           i;
390
 
        ulint           col;
391
 
        ulint           trx_id_col      = 0;
392
 
        /* sum of lengths of preceding non-nullable fixed fields, or 0 */
393
 
        ulint           fixed_sum       = 0;
394
 
 
395
 
        ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
396
 
 
397
 
        for (i = col = 0; i < n; i++) {
398
 
                dict_field_t*   field = dict_index_get_nth_field(index, i);
399
 
                ulint           val;
400
 
 
401
 
                if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
402
 
                        val = 1; /* set the "not nullable" flag */
403
 
                } else {
404
 
                        val = 0; /* nullable field */
405
 
                }
406
 
 
407
 
                if (!field->fixed_len) {
408
 
                        /* variable-length field */
409
 
                        const dict_col_t*       column
410
 
                                = dict_field_get_col(field);
411
 
 
412
 
                        if (UNIV_UNLIKELY(column->len > 255)
413
 
                            || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
414
 
                                val |= 0x7e; /* max > 255 bytes */
415
 
                        }
416
 
 
417
 
                        if (fixed_sum) {
418
 
                                /* write out the length of any
419
 
                                preceding non-nullable fields */
420
 
                                buf = page_zip_fixed_field_encode(
421
 
                                        buf, fixed_sum << 1 | 1);
422
 
                                fixed_sum = 0;
423
 
                                col++;
424
 
                        }
425
 
 
426
 
                        *buf++ = (byte) val;
427
 
                        col++;
428
 
                } else if (val) {
429
 
                        /* fixed-length non-nullable field */
430
 
 
431
 
                        if (fixed_sum && UNIV_UNLIKELY
432
 
                            (fixed_sum + field->fixed_len
433
 
                             > DICT_MAX_INDEX_COL_LEN)) {
434
 
                                /* Write out the length of the
435
 
                                preceding non-nullable fields,
436
 
                                to avoid exceeding the maximum
437
 
                                length of a fixed-length column. */
438
 
                                buf = page_zip_fixed_field_encode(
439
 
                                        buf, fixed_sum << 1 | 1);
440
 
                                fixed_sum = 0;
441
 
                                col++;
442
 
                        }
443
 
 
444
 
                        if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
445
 
                                if (fixed_sum) {
446
 
                                        /* Write out the length of any
447
 
                                        preceding non-nullable fields,
448
 
                                        and start a new trx_id column. */
449
 
                                        buf = page_zip_fixed_field_encode(
450
 
                                                buf, fixed_sum << 1 | 1);
451
 
                                        col++;
452
 
                                }
453
 
 
454
 
                                trx_id_col = col;
455
 
                                fixed_sum = field->fixed_len;
456
 
                        } else {
457
 
                                /* add to the sum */
458
 
                                fixed_sum += field->fixed_len;
459
 
                        }
460
 
                } else {
461
 
                        /* fixed-length nullable field */
462
 
 
463
 
                        if (fixed_sum) {
464
 
                                /* write out the length of any
465
 
                                preceding non-nullable fields */
466
 
                                buf = page_zip_fixed_field_encode(
467
 
                                        buf, fixed_sum << 1 | 1);
468
 
                                fixed_sum = 0;
469
 
                                col++;
470
 
                        }
471
 
 
472
 
                        buf = page_zip_fixed_field_encode(
473
 
                                buf, field->fixed_len << 1);
474
 
                        col++;
475
 
                }
476
 
        }
477
 
 
478
 
        if (fixed_sum) {
479
 
                /* Write out the lengths of last fixed-length columns. */
480
 
                buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
481
 
        }
482
 
 
483
 
        if (trx_id_pos != ULINT_UNDEFINED) {
484
 
                /* Write out the position of the trx_id column */
485
 
                i = trx_id_col;
486
 
        } else {
487
 
                /* Write out the number of nullable fields */
488
 
                i = index->n_nullable;
489
 
        }
490
 
 
491
 
        if (i < 128) {
492
 
                *buf++ = (byte) i;
493
 
        } else {
494
 
                *buf++ = (byte) (0x80 | i >> 8);
495
 
                *buf++ = (byte) i;
496
 
        }
497
 
 
498
 
        ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
499
 
        return((ulint) (buf - buf_start));
500
 
}
501
 
 
502
 
/**************************************************************************
503
 
Populate the dense page directory from the sparse directory. */
504
 
static
505
 
void
506
 
page_zip_dir_encode(
507
 
/*================*/
508
 
        const page_t*   page,   /* in: compact page */
509
 
        byte*           buf,    /* in: pointer to dense page directory[-1];
510
 
                                out: dense directory on compressed page */
511
 
        const rec_t**   recs)   /* in: pointer to an array of 0, or NULL;
512
 
                                out: dense page directory sorted by ascending
513
 
                                address (and heap_no) */
514
 
{
515
 
        const byte*     rec;
516
 
        ulint           status;
517
 
        ulint           min_mark;
518
 
        ulint           heap_no;
519
 
        ulint           i;
520
 
        ulint           n_heap;
521
 
        ulint           offs;
522
 
 
523
 
        min_mark = 0;
524
 
 
525
 
        if (page_is_leaf(page)) {
526
 
                status = REC_STATUS_ORDINARY;
527
 
        } else {
528
 
                status = REC_STATUS_NODE_PTR;
529
 
                if (UNIV_UNLIKELY
530
 
                    (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
531
 
                        min_mark = REC_INFO_MIN_REC_FLAG;
532
 
                }
533
 
        }
534
 
 
535
 
        n_heap = page_dir_get_n_heap(page);
536
 
 
537
 
        /* Traverse the list of stored records in the collation order,
538
 
        starting from the first user record. */
539
 
 
540
 
        rec = page + PAGE_NEW_INFIMUM, TRUE;
541
 
 
542
 
        i = 0;
543
 
 
544
 
        for (;;) {
545
 
                ulint   info_bits;
546
 
                offs = rec_get_next_offs(rec, TRUE);
547
 
                if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
548
 
                        break;
549
 
                }
550
 
                rec = page + offs;
551
 
                heap_no = rec_get_heap_no_new(rec);
552
 
                ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
553
 
                ut_a(heap_no < n_heap);
554
 
                ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
555
 
                ut_a(offs >= PAGE_ZIP_START);
556
 
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
557
 
# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
558
 
#endif
559
 
#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
560
 
# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
561
 
#endif
562
 
                if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
563
 
                        offs |= PAGE_ZIP_DIR_SLOT_OWNED;
564
 
                }
565
 
 
566
 
                info_bits = rec_get_info_bits(rec, TRUE);
567
 
                if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
568
 
                        info_bits &= ~REC_INFO_DELETED_FLAG;
569
 
                        offs |= PAGE_ZIP_DIR_SLOT_DEL;
570
 
                }
571
 
                ut_a(info_bits == min_mark);
572
 
                /* Only the smallest user record can have
573
 
                REC_INFO_MIN_REC_FLAG set. */
574
 
                min_mark = 0;
575
 
 
576
 
                mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
577
 
 
578
 
                if (UNIV_LIKELY_NULL(recs)) {
579
 
                        /* Ensure that each heap_no occurs at most once. */
580
 
                        ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
581
 
                        /* exclude infimum and supremum */
582
 
                        recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
583
 
                }
584
 
 
585
 
                ut_a(rec_get_status(rec) == status);
586
 
        }
587
 
 
588
 
        offs = page_header_get_field(page, PAGE_FREE);
589
 
 
590
 
        /* Traverse the free list (of deleted records). */
591
 
        while (offs) {
592
 
                ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
593
 
                rec = page + offs;
594
 
 
595
 
                heap_no = rec_get_heap_no_new(rec);
596
 
                ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
597
 
                ut_a(heap_no < n_heap);
598
 
 
599
 
                ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
600
 
                ut_a(rec_get_status(rec) == status);
601
 
 
602
 
                mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
603
 
 
604
 
                if (UNIV_LIKELY_NULL(recs)) {
605
 
                        /* Ensure that each heap_no occurs at most once. */
606
 
                        ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
607
 
                        /* exclude infimum and supremum */
608
 
                        recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
609
 
                }
610
 
 
611
 
                offs = rec_get_next_offs(rec, TRUE);
612
 
        }
613
 
 
614
 
        /* Ensure that each heap no occurs at least once. */
615
 
        ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
616
 
}
617
 
 
618
 
/**************************************************************************
619
 
Allocate memory for zlib. */
620
 
static
621
 
void*
622
 
page_zip_malloc(
623
 
/*============*/
624
 
        void*   opaque,
625
 
        uInt    items,
626
 
        uInt    size)
627
 
{
628
 
        return(mem_heap_alloc(opaque, items * size));
629
 
}
630
 
 
631
 
/**************************************************************************
632
 
Deallocate memory for zlib. */
633
 
static
634
 
void
635
 
page_zip_free(
636
 
/*==========*/
637
 
        void*   opaque __attribute__((unused)),
638
 
        void*   address __attribute__((unused)))
639
 
{
640
 
}
641
 
 
642
 
/**************************************************************************
643
 
Configure the zlib allocator to use the given memory heap. */
644
 
UNIV_INTERN
645
 
void
646
 
page_zip_set_alloc(
647
 
/*===============*/
648
 
        void*           stream,         /* in/out: zlib stream */
649
 
        mem_heap_t*     heap)           /* in: memory heap to use */
650
 
{
651
 
        z_stream*       strm = stream;
652
 
 
653
 
        strm->zalloc = page_zip_malloc;
654
 
        strm->zfree = page_zip_free;
655
 
        strm->opaque = heap;
656
 
}
657
 
 
658
 
#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
659
 
# define PAGE_ZIP_COMPRESS_DBG
660
 
#endif
661
 
 
662
 
#ifdef PAGE_ZIP_COMPRESS_DBG
663
 
/* Set this variable in a debugger to enable
664
 
excessive logging in page_zip_compress(). */
665
 
UNIV_INTERN ibool       page_zip_compress_dbg;
666
 
/* Set this variable in a debugger to enable
667
 
binary logging of the data passed to deflate().
668
 
When this variable is nonzero, it will act
669
 
as a log file name generator. */
670
 
UNIV_INTERN unsigned    page_zip_compress_log;
671
 
 
672
 
/**************************************************************************
673
 
Wrapper for deflate().  Log the operation if page_zip_compress_dbg is set. */
674
 
static
675
 
ibool
676
 
page_zip_compress_deflate(
677
 
/*======================*/
678
 
        FILE*           logfile,/* in: log file, or NULL */
679
 
        z_streamp       strm,   /* in/out: compressed stream for deflate() */
680
 
        int             flush)  /* in: deflate() flushing method */
681
 
{
682
 
        int     status;
683
 
        if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
684
 
                ut_print_buf(stderr, strm->next_in, strm->avail_in);
685
 
        }
686
 
        if (UNIV_LIKELY_NULL(logfile)) {
687
 
                fwrite(strm->next_in, 1, strm->avail_in, logfile);
688
 
        }
689
 
        status = deflate(strm, flush);
690
 
        if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
691
 
                fprintf(stderr, " -> %d\n", status);
692
 
        }
693
 
        return(status);
694
 
}
695
 
 
696
 
/* Redefine deflate(). */
697
 
# undef deflate
698
 
# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
699
 
# define FILE_LOGFILE FILE* logfile,
700
 
# define LOGFILE logfile,
701
 
#else /* PAGE_ZIP_COMPRESS_DBG */
702
 
# define FILE_LOGFILE
703
 
# define LOGFILE
704
 
#endif /* PAGE_ZIP_COMPRESS_DBG */
705
 
 
706
 
/**************************************************************************
707
 
Compress the records of a node pointer page. */
708
 
static
709
 
int
710
 
page_zip_compress_node_ptrs(
711
 
/*========================*/
712
 
                                        /* out: Z_OK, or a zlib error code */
713
 
        FILE_LOGFILE
714
 
        z_stream*       c_stream,       /* in/out: compressed page stream */
715
 
        const rec_t**   recs,           /* in: dense page directory
716
 
                                        sorted by address */
717
 
        ulint           n_dense,        /* in: size of recs[] */
718
 
        dict_index_t*   index,          /* in: the index of the page */
719
 
        byte*           storage,        /* in: end of dense page directory */
720
 
        mem_heap_t*     heap)           /* in: temporary memory heap */
721
 
{
722
 
        int     err     = Z_OK;
723
 
        ulint*  offsets = NULL;
724
 
 
725
 
        do {
726
 
                const rec_t*    rec = *recs++;
727
 
 
728
 
                offsets = rec_get_offsets(rec, index, offsets,
729
 
                                          ULINT_UNDEFINED, &heap);
730
 
                /* Only leaf nodes may contain externally stored columns. */
731
 
                ut_ad(!rec_offs_any_extern(offsets));
732
 
 
733
 
                UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
734
 
                UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
735
 
                                   rec_offs_extra_size(offsets));
736
 
 
737
 
                /* Compress the extra bytes. */
738
 
                c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
739
 
                        - c_stream->next_in;
740
 
 
741
 
                if (c_stream->avail_in) {
742
 
                        err = deflate(c_stream, Z_NO_FLUSH);
743
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
744
 
                                break;
745
 
                        }
746
 
                }
747
 
                ut_ad(!c_stream->avail_in);
748
 
 
749
 
                /* Compress the data bytes, except node_ptr. */
750
 
                c_stream->next_in = (byte*) rec;
751
 
                c_stream->avail_in = rec_offs_data_size(offsets)
752
 
                        - REC_NODE_PTR_SIZE;
753
 
                ut_ad(c_stream->avail_in);
754
 
 
755
 
                err = deflate(c_stream, Z_NO_FLUSH);
756
 
                if (UNIV_UNLIKELY(err != Z_OK)) {
757
 
                        break;
758
 
                }
759
 
 
760
 
                ut_ad(!c_stream->avail_in);
761
 
 
762
 
                memcpy(storage - REC_NODE_PTR_SIZE
763
 
                       * (rec_get_heap_no_new(rec) - 1),
764
 
                       c_stream->next_in, REC_NODE_PTR_SIZE);
765
 
                c_stream->next_in += REC_NODE_PTR_SIZE;
766
 
        } while (--n_dense);
767
 
 
768
 
        return(err);
769
 
}
770
 
 
771
 
/**************************************************************************
772
 
Compress the records of a leaf node of a secondary index. */
773
 
static
774
 
int
775
 
page_zip_compress_sec(
776
 
/*==================*/
777
 
                                        /* out: Z_OK, or a zlib error code */
778
 
        FILE_LOGFILE
779
 
        z_stream*       c_stream,       /* in/out: compressed page stream */
780
 
        const rec_t**   recs,           /* in: dense page directory
781
 
                                        sorted by address */
782
 
        ulint           n_dense)        /* in: size of recs[] */
783
 
{
784
 
        int             err     = Z_OK;
785
 
 
786
 
        ut_ad(n_dense > 0);
787
 
 
788
 
        do {
789
 
                const rec_t*    rec = *recs++;
790
 
 
791
 
                /* Compress everything up to this record. */
792
 
                c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
793
 
                        - c_stream->next_in;
794
 
 
795
 
                if (UNIV_LIKELY(c_stream->avail_in)) {
796
 
                        UNIV_MEM_ASSERT_RW(c_stream->next_in,
797
 
                                           c_stream->avail_in);
798
 
                        err = deflate(c_stream, Z_NO_FLUSH);
799
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
800
 
                                break;
801
 
                        }
802
 
                }
803
 
 
804
 
                ut_ad(!c_stream->avail_in);
805
 
                ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
806
 
 
807
 
                /* Skip the REC_N_NEW_EXTRA_BYTES. */
808
 
 
809
 
                c_stream->next_in = (byte*) rec;
810
 
        } while (--n_dense);
811
 
 
812
 
        return(err);
813
 
}
814
 
 
815
 
/**************************************************************************
816
 
Compress a record of a leaf node of a clustered index that contains
817
 
externally stored columns. */
818
 
static
819
 
int
820
 
page_zip_compress_clust_ext(
821
 
/*========================*/
822
 
                                        /* out: Z_OK, or a zlib error code */
823
 
        FILE_LOGFILE
824
 
        z_stream*       c_stream,       /* in/out: compressed page stream */
825
 
        const rec_t*    rec,            /* in: record */
826
 
        const ulint*    offsets,        /* in: rec_get_offsets(rec) */
827
 
        ulint           trx_id_col,     /* in: position of of DB_TRX_ID */
828
 
        byte*           deleted,        /* in: dense directory entry pointing
829
 
                                        to the head of the free list */
830
 
        byte*           storage,        /* in: end of dense page directory */
831
 
        byte**          externs,        /* in/out: pointer to the next
832
 
                                        available BLOB pointer */
833
 
        ulint*          n_blobs)        /* in/out: number of
834
 
                                        externally stored columns */
835
 
{
836
 
        int     err;
837
 
        ulint   i;
838
 
 
839
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
840
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
841
 
                           rec_offs_extra_size(offsets));
842
 
 
843
 
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
844
 
                ulint           len;
845
 
                const byte*     src;
846
 
 
847
 
                if (UNIV_UNLIKELY(i == trx_id_col)) {
848
 
                        ut_ad(!rec_offs_nth_extern(offsets, i));
849
 
                        /* Store trx_id and roll_ptr
850
 
                        in uncompressed form. */
851
 
                        src = rec_get_nth_field(rec, offsets, i, &len);
852
 
                        ut_ad(src + DATA_TRX_ID_LEN
853
 
                              == rec_get_nth_field(rec, offsets,
854
 
                                                   i + 1, &len));
855
 
                        ut_ad(len == DATA_ROLL_PTR_LEN);
856
 
 
857
 
                        /* Compress any preceding bytes. */
858
 
                        c_stream->avail_in
859
 
                                = src - c_stream->next_in;
860
 
 
861
 
                        if (c_stream->avail_in) {
862
 
                                err = deflate(c_stream, Z_NO_FLUSH);
863
 
                                if (UNIV_UNLIKELY(err != Z_OK)) {
864
 
 
865
 
                                        return(err);
866
 
                                }
867
 
                        }
868
 
 
869
 
                        ut_ad(!c_stream->avail_in);
870
 
                        ut_ad(c_stream->next_in == src);
871
 
 
872
 
                        memcpy(storage
873
 
                               - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
874
 
                               * (rec_get_heap_no_new(rec) - 1),
875
 
                               c_stream->next_in,
876
 
                               DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
877
 
 
878
 
                        c_stream->next_in
879
 
                                += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
880
 
 
881
 
                        /* Skip also roll_ptr */
882
 
                        i++;
883
 
                } else if (rec_offs_nth_extern(offsets, i)) {
884
 
                        src = rec_get_nth_field(rec, offsets, i, &len);
885
 
                        ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
886
 
                        src += len - BTR_EXTERN_FIELD_REF_SIZE;
887
 
 
888
 
                        c_stream->avail_in = src
889
 
                                - c_stream->next_in;
890
 
                        if (UNIV_LIKELY(c_stream->avail_in)) {
891
 
                                err = deflate(c_stream, Z_NO_FLUSH);
892
 
                                if (UNIV_UNLIKELY(err != Z_OK)) {
893
 
 
894
 
                                        return(err);
895
 
                                }
896
 
                        }
897
 
 
898
 
                        ut_ad(!c_stream->avail_in);
899
 
                        ut_ad(c_stream->next_in == src);
900
 
 
901
 
                        /* Reserve space for the data at
902
 
                        the end of the space reserved for
903
 
                        the compressed data and the page
904
 
                        modification log. */
905
 
 
906
 
                        if (UNIV_UNLIKELY
907
 
                            (c_stream->avail_out
908
 
                             <= BTR_EXTERN_FIELD_REF_SIZE)) {
909
 
                                /* out of space */
910
 
                                return(Z_BUF_ERROR);
911
 
                        }
912
 
 
913
 
                        ut_ad(*externs == c_stream->next_out
914
 
                              + c_stream->avail_out
915
 
                              + 1/* end of modif. log */);
916
 
 
917
 
                        c_stream->next_in
918
 
                                += BTR_EXTERN_FIELD_REF_SIZE;
919
 
 
920
 
                        /* Skip deleted records. */
921
 
                        if (UNIV_LIKELY_NULL
922
 
                            (page_zip_dir_find_low(
923
 
                                    storage, deleted,
924
 
                                    page_offset(rec)))) {
925
 
                                continue;
926
 
                        }
927
 
 
928
 
                        (*n_blobs)++;
929
 
                        c_stream->avail_out
930
 
                                -= BTR_EXTERN_FIELD_REF_SIZE;
931
 
                        *externs -= BTR_EXTERN_FIELD_REF_SIZE;
932
 
 
933
 
                        /* Copy the BLOB pointer */
934
 
                        memcpy(*externs, c_stream->next_in
935
 
                               - BTR_EXTERN_FIELD_REF_SIZE,
936
 
                               BTR_EXTERN_FIELD_REF_SIZE);
937
 
                }
938
 
        }
939
 
 
940
 
        return(Z_OK);
941
 
}
942
 
 
943
 
/**************************************************************************
944
 
Compress the records of a leaf node of a clustered index. */
945
 
static
946
 
int
947
 
page_zip_compress_clust(
948
 
/*====================*/
949
 
                                        /* out: Z_OK, or a zlib error code */
950
 
        FILE_LOGFILE
951
 
        z_stream*       c_stream,       /* in/out: compressed page stream */
952
 
        const rec_t**   recs,           /* in: dense page directory
953
 
                                        sorted by address */
954
 
        ulint           n_dense,        /* in: size of recs[] */
955
 
        dict_index_t*   index,          /* in: the index of the page */
956
 
        ulint*          n_blobs,        /* in: 0; out: number of
957
 
                                        externally stored columns */
958
 
        ulint           trx_id_col,     /* index of the trx_id column */
959
 
        byte*           deleted,        /* in: dense directory entry pointing
960
 
                                        to the head of the free list */
961
 
        byte*           storage,        /* in: end of dense page directory */
962
 
        mem_heap_t*     heap)           /* in: temporary memory heap */
963
 
{
964
 
        int     err             = Z_OK;
965
 
        ulint*  offsets         = NULL;
966
 
        /* BTR_EXTERN_FIELD_REF storage */
967
 
        byte*   externs         = storage - n_dense
968
 
                * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
969
 
 
970
 
        ut_ad(*n_blobs == 0);
971
 
 
972
 
        do {
973
 
                const rec_t*    rec = *recs++;
974
 
 
975
 
                offsets = rec_get_offsets(rec, index, offsets,
976
 
                                          ULINT_UNDEFINED, &heap);
977
 
                ut_ad(rec_offs_n_fields(offsets)
978
 
                      == dict_index_get_n_fields(index));
979
 
                UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
980
 
                UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
981
 
                                   rec_offs_extra_size(offsets));
982
 
 
983
 
                /* Compress the extra bytes. */
984
 
                c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
985
 
                        - c_stream->next_in;
986
 
 
987
 
                if (c_stream->avail_in) {
988
 
                        err = deflate(c_stream, Z_NO_FLUSH);
989
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
990
 
 
991
 
                                goto func_exit;
992
 
                        }
993
 
                }
994
 
                ut_ad(!c_stream->avail_in);
995
 
                ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
996
 
 
997
 
                /* Compress the data bytes. */
998
 
 
999
 
                c_stream->next_in = (byte*) rec;
1000
 
 
1001
 
                /* Check if there are any externally stored columns.
1002
 
                For each externally stored column, store the
1003
 
                BTR_EXTERN_FIELD_REF separately. */
1004
 
                if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1005
 
                        ut_ad(dict_index_is_clust(index));
1006
 
 
1007
 
                        err = page_zip_compress_clust_ext(
1008
 
                                LOGFILE
1009
 
                                c_stream, rec, offsets, trx_id_col,
1010
 
                                deleted, storage, &externs, n_blobs);
1011
 
 
1012
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
1013
 
 
1014
 
                                goto func_exit;
1015
 
                        }
1016
 
                } else {
1017
 
                        ulint           len;
1018
 
                        const byte*     src;
1019
 
 
1020
 
                        /* Store trx_id and roll_ptr in uncompressed form. */
1021
 
                        src = rec_get_nth_field(rec, offsets,
1022
 
                                                trx_id_col, &len);
1023
 
                        ut_ad(src + DATA_TRX_ID_LEN
1024
 
                              == rec_get_nth_field(rec, offsets,
1025
 
                                                   trx_id_col + 1, &len));
1026
 
                        ut_ad(len == DATA_ROLL_PTR_LEN);
1027
 
                        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1028
 
                        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1029
 
                                           rec_offs_extra_size(offsets));
1030
 
 
1031
 
                        /* Compress any preceding bytes. */
1032
 
                        c_stream->avail_in = src - c_stream->next_in;
1033
 
 
1034
 
                        if (c_stream->avail_in) {
1035
 
                                err = deflate(c_stream, Z_NO_FLUSH);
1036
 
                                if (UNIV_UNLIKELY(err != Z_OK)) {
1037
 
 
1038
 
                                        return(err);
1039
 
                                }
1040
 
                        }
1041
 
 
1042
 
                        ut_ad(!c_stream->avail_in);
1043
 
                        ut_ad(c_stream->next_in == src);
1044
 
 
1045
 
                        memcpy(storage
1046
 
                               - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1047
 
                               * (rec_get_heap_no_new(rec) - 1),
1048
 
                               c_stream->next_in,
1049
 
                               DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1050
 
 
1051
 
                        c_stream->next_in
1052
 
                                += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1053
 
 
1054
 
                        /* Skip also roll_ptr */
1055
 
                        ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
1056
 
                }
1057
 
 
1058
 
                /* Compress the last bytes of the record. */
1059
 
                c_stream->avail_in = rec + rec_offs_data_size(offsets)
1060
 
                        - c_stream->next_in;
1061
 
 
1062
 
                if (c_stream->avail_in) {
1063
 
                        err = deflate(c_stream, Z_NO_FLUSH);
1064
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
1065
 
 
1066
 
                                goto func_exit;
1067
 
                        }
1068
 
                }
1069
 
                ut_ad(!c_stream->avail_in);
1070
 
        } while (--n_dense);
1071
 
 
1072
 
func_exit:
1073
 
        return(err);
1074
 
}
1075
 
 
1076
 
/**************************************************************************
1077
 
Compress a page. */
1078
 
UNIV_INTERN
1079
 
ibool
1080
 
page_zip_compress(
1081
 
/*==============*/
1082
 
                                /* out: TRUE on success, FALSE on failure;
1083
 
                                page_zip will be left intact on failure. */
1084
 
        page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
1085
 
                                m_start, m_end, m_nonempty */
1086
 
        const page_t*   page,   /* in: uncompressed page */
1087
 
        dict_index_t*   index,  /* in: index of the B-tree node */
1088
 
        mtr_t*          mtr)    /* in: mini-transaction, or NULL */
1089
 
{
1090
 
        z_stream        c_stream;
1091
 
        int             err;
1092
 
        ulint           n_fields;/* number of index fields needed */
1093
 
        byte*           fields; /* index field information */
1094
 
        byte*           buf;    /* compressed payload of the page */
1095
 
        byte*           buf_end;/* end of buf */
1096
 
        ulint           n_dense;
1097
 
        ulint           slot_size;/* amount of uncompressed bytes per record */
1098
 
        const rec_t**   recs;   /* dense page directory, sorted by address */
1099
 
        mem_heap_t*     heap;
1100
 
        ulint           trx_id_col;
1101
 
        ulint*          offsets = NULL;
1102
 
        ulint           n_blobs = 0;
1103
 
        byte*           storage;/* storage of uncompressed columns */
1104
 
        ullint          usec = ut_time_us(NULL);
1105
 
#ifdef PAGE_ZIP_COMPRESS_DBG
1106
 
        FILE*           logfile = NULL;
1107
 
#endif
1108
 
 
1109
 
        ut_a(page_is_comp(page));
1110
 
        ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
1111
 
        ut_ad(page_simple_validate_new((page_t*) page));
1112
 
        ut_ad(page_zip_simple_validate(page_zip));
1113
 
 
1114
 
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1115
 
 
1116
 
        /* Check the data that will be omitted. */
1117
 
        ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
1118
 
                     infimum_extra, sizeof infimum_extra));
1119
 
        ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
1120
 
                     infimum_data, sizeof infimum_data));
1121
 
        ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
1122
 
             /* info_bits == 0, n_owned <= max */
1123
 
             <= PAGE_DIR_SLOT_MAX_N_OWNED);
1124
 
        ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
1125
 
                     supremum_extra_data, sizeof supremum_extra_data));
1126
 
 
1127
 
        if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
1128
 
                ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
1129
 
                     == PAGE_NEW_SUPREMUM);
1130
 
        }
1131
 
 
1132
 
        if (page_is_leaf(page)) {
1133
 
                n_fields = dict_index_get_n_fields(index);
1134
 
        } else {
1135
 
                n_fields = dict_index_get_n_unique_in_tree(index);
1136
 
        }
1137
 
 
1138
 
        /* The dense directory excludes the infimum and supremum records. */
1139
 
        n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1140
 
#ifdef PAGE_ZIP_COMPRESS_DBG
1141
 
        if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
1142
 
                fprintf(stderr, "compress %p %p %lu %lu %lu\n",
1143
 
                        (void*) page_zip, (void*) page,
1144
 
                        page_is_leaf(page),
1145
 
                        n_fields, n_dense);
1146
 
        }
1147
 
        if (UNIV_UNLIKELY(page_zip_compress_log)) {
1148
 
                /* Create a log file for every compression attempt. */
1149
 
                char    logfilename[9];
1150
 
                ut_snprintf(logfilename, sizeof logfilename,
1151
 
                            "%08x", page_zip_compress_log++);
1152
 
                logfile = fopen(logfilename, "wb");
1153
 
 
1154
 
                if (logfile) {
1155
 
                        /* Write the uncompressed page to the log. */
1156
 
                        fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
1157
 
                        /* Record the compressed size as zero.
1158
 
                        This will be overwritten at successful exit. */
1159
 
                        putc(0, logfile);
1160
 
                        putc(0, logfile);
1161
 
                        putc(0, logfile);
1162
 
                        putc(0, logfile);
1163
 
                }
1164
 
        }
1165
 
#endif /* PAGE_ZIP_COMPRESS_DBG */
1166
 
        page_zip_stat[page_zip->ssize - 1].compressed++;
1167
 
 
1168
 
        if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
1169
 
                          >= page_zip_get_size(page_zip))) {
1170
 
 
1171
 
                goto err_exit;
1172
 
        }
1173
 
 
1174
 
        heap = mem_heap_create(page_zip_get_size(page_zip)
1175
 
                               + n_fields * (2 + sizeof *offsets)
1176
 
                               + n_dense * ((sizeof *recs)
1177
 
                                            - PAGE_ZIP_DIR_SLOT_SIZE)
1178
 
                               + UNIV_PAGE_SIZE * 4
1179
 
                               + (512 << MAX_MEM_LEVEL));
1180
 
 
1181
 
        recs = mem_heap_zalloc(heap, n_dense * sizeof *recs);
1182
 
 
1183
 
        fields = mem_heap_alloc(heap, (n_fields + 1) * 2);
1184
 
 
1185
 
        buf = mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA);
1186
 
        buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
1187
 
 
1188
 
        /* Compress the data payload. */
1189
 
        page_zip_set_alloc(&c_stream, heap);
1190
 
 
1191
 
        err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
1192
 
                           Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
1193
 
                           MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
1194
 
        ut_a(err == Z_OK);
1195
 
 
1196
 
        c_stream.next_out = buf;
1197
 
        /* Subtract the space reserved for uncompressed data. */
1198
 
        /* Page header and the end marker of the modification log */
1199
 
        c_stream.avail_out = buf_end - buf - 1;
1200
 
        /* Dense page directory and uncompressed columns, if any */
1201
 
        if (page_is_leaf(page)) {
1202
 
                if (dict_index_is_clust(index)) {
1203
 
                        trx_id_col = dict_index_get_sys_col_pos(
1204
 
                                index, DATA_TRX_ID);
1205
 
                        ut_ad(trx_id_col > 0);
1206
 
                        ut_ad(trx_id_col != ULINT_UNDEFINED);
1207
 
 
1208
 
                        slot_size = PAGE_ZIP_DIR_SLOT_SIZE
1209
 
                                + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1210
 
                } else {
1211
 
                        /* Signal the absence of trx_id
1212
 
                        in page_zip_fields_encode() */
1213
 
                        ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
1214
 
                              == ULINT_UNDEFINED);
1215
 
                        trx_id_col = 0;
1216
 
                        slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
1217
 
                }
1218
 
        } else {
1219
 
                slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
1220
 
                trx_id_col = ULINT_UNDEFINED;
1221
 
        }
1222
 
 
1223
 
        if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
1224
 
                          + 6/* sizeof(zlib header and footer) */)) {
1225
 
                goto zlib_error;
1226
 
        }
1227
 
 
1228
 
        c_stream.avail_out -= n_dense * slot_size;
1229
 
        c_stream.avail_in = page_zip_fields_encode(n_fields, index,
1230
 
                                                   trx_id_col, fields);
1231
 
        c_stream.next_in = fields;
1232
 
        if (UNIV_LIKELY(!trx_id_col)) {
1233
 
                trx_id_col = ULINT_UNDEFINED;
1234
 
        }
1235
 
 
1236
 
        UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1237
 
        err = deflate(&c_stream, Z_FULL_FLUSH);
1238
 
        if (err != Z_OK) {
1239
 
                goto zlib_error;
1240
 
        }
1241
 
 
1242
 
        ut_ad(!c_stream.avail_in);
1243
 
 
1244
 
        page_zip_dir_encode(page, buf_end, recs);
1245
 
 
1246
 
        c_stream.next_in = (byte*) page + PAGE_ZIP_START;
1247
 
 
1248
 
        storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
1249
 
 
1250
 
        /* Compress the records in heap_no order. */
1251
 
        if (UNIV_UNLIKELY(!n_dense)) {
1252
 
        } else if (!page_is_leaf(page)) {
1253
 
                /* This is a node pointer page. */
1254
 
                err = page_zip_compress_node_ptrs(LOGFILE
1255
 
                                                  &c_stream, recs, n_dense,
1256
 
                                                  index, storage, heap);
1257
 
                if (UNIV_UNLIKELY(err != Z_OK)) {
1258
 
                        goto zlib_error;
1259
 
                }
1260
 
        } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1261
 
                /* This is a leaf page in a secondary index. */
1262
 
                err = page_zip_compress_sec(LOGFILE
1263
 
                                            &c_stream, recs, n_dense);
1264
 
                if (UNIV_UNLIKELY(err != Z_OK)) {
1265
 
                        goto zlib_error;
1266
 
                }
1267
 
        } else {
1268
 
                /* This is a leaf page in a clustered index. */
1269
 
                err = page_zip_compress_clust(LOGFILE
1270
 
                                              &c_stream, recs, n_dense,
1271
 
                                              index, &n_blobs, trx_id_col,
1272
 
                                              buf_end - PAGE_ZIP_DIR_SLOT_SIZE
1273
 
                                              * page_get_n_recs(page),
1274
 
                                              storage, heap);
1275
 
                if (UNIV_UNLIKELY(err != Z_OK)) {
1276
 
                        goto zlib_error;
1277
 
                }
1278
 
        }
1279
 
 
1280
 
        /* Finish the compression. */
1281
 
        ut_ad(!c_stream.avail_in);
1282
 
        /* Compress any trailing garbage, in case the last record was
1283
 
        allocated from an originally longer space on the free list,
1284
 
        or the data of the last record from page_zip_compress_sec(). */
1285
 
        c_stream.avail_in
1286
 
                = page_header_get_field(page, PAGE_HEAP_TOP)
1287
 
                - (c_stream.next_in - page);
1288
 
        ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
1289
 
 
1290
 
        UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1291
 
        err = deflate(&c_stream, Z_FINISH);
1292
 
 
1293
 
        if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
1294
 
zlib_error:
1295
 
                deflateEnd(&c_stream);
1296
 
                mem_heap_free(heap);
1297
 
err_exit:
1298
 
#ifdef PAGE_ZIP_COMPRESS_DBG
1299
 
                if (logfile) {
1300
 
                        fclose(logfile);
1301
 
                }
1302
 
#endif /* PAGE_ZIP_COMPRESS_DBG */
1303
 
                page_zip_stat[page_zip->ssize - 1].compressed_usec
1304
 
                        += ut_time_us(NULL) - usec;
1305
 
                return(FALSE);
1306
 
        }
1307
 
 
1308
 
        err = deflateEnd(&c_stream);
1309
 
        ut_a(err == Z_OK);
1310
 
 
1311
 
        ut_ad(buf + c_stream.total_out == c_stream.next_out);
1312
 
        ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
1313
 
 
1314
 
        /* Valgrind believes that zlib does not initialize some bits
1315
 
        in the last 7 or 8 bytes of the stream.  Make Valgrind happy. */
1316
 
        UNIV_MEM_VALID(buf, c_stream.total_out);
1317
 
 
1318
 
        /* Zero out the area reserved for the modification log.
1319
 
        Space for the end marker of the modification log is not
1320
 
        included in avail_out. */
1321
 
        memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
1322
 
 
1323
 
#ifdef UNIV_DEBUG
1324
 
        page_zip->m_start =
1325
 
#endif /* UNIV_DEBUG */
1326
 
                page_zip->m_end = PAGE_DATA + c_stream.total_out;
1327
 
        page_zip->m_nonempty = FALSE;
1328
 
        page_zip->n_blobs = n_blobs;
1329
 
        /* Copy those header fields that will not be written
1330
 
        in buf_flush_init_for_writing() */
1331
 
        memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
1332
 
               FIL_PAGE_LSN - FIL_PAGE_PREV);
1333
 
        memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
1334
 
        memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
1335
 
               PAGE_DATA - FIL_PAGE_DATA);
1336
 
        /* Copy the rest of the compressed page */
1337
 
        memcpy(page_zip->data + PAGE_DATA, buf,
1338
 
               page_zip_get_size(page_zip) - PAGE_DATA);
1339
 
        mem_heap_free(heap);
1340
 
#ifdef UNIV_ZIP_DEBUG
1341
 
        ut_a(page_zip_validate(page_zip, page));
1342
 
#endif /* UNIV_ZIP_DEBUG */
1343
 
 
1344
 
        if (mtr) {
1345
 
                page_zip_compress_write_log(page_zip, page, index, mtr);
1346
 
        }
1347
 
 
1348
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
1349
 
 
1350
 
#ifdef PAGE_ZIP_COMPRESS_DBG
1351
 
        if (logfile) {
1352
 
                /* Record the compressed size of the block. */
1353
 
                byte sz[4];
1354
 
                mach_write_to_4(sz, c_stream.total_out);
1355
 
                fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
1356
 
                fwrite(sz, 1, sizeof sz, logfile);
1357
 
                fclose(logfile);
1358
 
        }
1359
 
#endif /* PAGE_ZIP_COMPRESS_DBG */
1360
 
        {
1361
 
                page_zip_stat_t*        zip_stat
1362
 
                        = &page_zip_stat[page_zip->ssize - 1];
1363
 
                zip_stat->compressed_ok++;
1364
 
                zip_stat->compressed_usec += ut_time_us(NULL) - usec;
1365
 
        }
1366
 
 
1367
 
        return(TRUE);
1368
 
}
1369
 
 
1370
 
/**************************************************************************
1371
 
Compare two page directory entries. */
1372
 
UNIV_INLINE
1373
 
ibool
1374
 
page_zip_dir_cmp(
1375
 
/*=============*/
1376
 
                                /* out: positive if rec1 > rec2 */
1377
 
        const rec_t*    rec1,   /* in: rec1 */
1378
 
        const rec_t*    rec2)   /* in: rec2 */
1379
 
{
1380
 
        return(rec1 > rec2);
1381
 
}
1382
 
 
1383
 
/**************************************************************************
1384
 
Sort the dense page directory by address (heap_no). */
1385
 
static
1386
 
void
1387
 
page_zip_dir_sort(
1388
 
/*==============*/
1389
 
        rec_t** arr,    /* in/out: dense page directory */
1390
 
        rec_t** aux_arr,/* in/out: work area */
1391
 
        ulint   low,    /* in: lower bound of the sorting area, inclusive */
1392
 
        ulint   high)   /* in: upper bound of the sorting area, exclusive */
1393
 
{
1394
 
        UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
1395
 
                              page_zip_dir_cmp);
1396
 
}
1397
 
 
1398
 
/**************************************************************************
1399
 
Deallocate the index information initialized by page_zip_fields_decode(). */
1400
 
static
1401
 
void
1402
 
page_zip_fields_free(
1403
 
/*=================*/
1404
 
        dict_index_t*   index)  /* in: dummy index to be freed */
1405
 
{
1406
 
        if (index) {
1407
 
                dict_table_t*   table = index->table;
1408
 
                mem_heap_free(index->heap);
1409
 
                mutex_free(&(table->autoinc_mutex));
1410
 
                mem_heap_free(table->heap);
1411
 
        }
1412
 
}
1413
 
 
1414
 
/**************************************************************************
1415
 
Read the index information for the compressed page. */
1416
 
static
1417
 
dict_index_t*
1418
 
page_zip_fields_decode(
1419
 
/*===================*/
1420
 
                                /* out,own: dummy index describing the page,
1421
 
                                or NULL on error */
1422
 
        const byte*     buf,    /* in: index information */
1423
 
        const byte*     end,    /* in: end of buf */
1424
 
        ulint*          trx_id_col)/* in: NULL for non-leaf pages;
1425
 
                                for leaf pages, pointer to where to store
1426
 
                                the position of the trx_id column */
1427
 
{
1428
 
        const byte*     b;
1429
 
        ulint           n;
1430
 
        ulint           i;
1431
 
        ulint           val;
1432
 
        dict_table_t*   table;
1433
 
        dict_index_t*   index;
1434
 
 
1435
 
        /* Determine the number of fields. */
1436
 
        for (b = buf, n = 0; b < end; n++) {
1437
 
                if (*b++ & 0x80) {
1438
 
                        b++; /* skip the second byte */
1439
 
                }
1440
 
        }
1441
 
 
1442
 
        n--; /* n_nullable or trx_id */
1443
 
 
1444
 
        if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
1445
 
 
1446
 
                page_zip_fail(("page_zip_fields_decode: n = %lu\n",
1447
 
                               (ulong) n));
1448
 
                return(NULL);
1449
 
        }
1450
 
 
1451
 
        if (UNIV_UNLIKELY(b > end)) {
1452
 
 
1453
 
                page_zip_fail(("page_zip_fields_decode: %p > %p\n",
1454
 
                               (const void*) b, (const void*) end));
1455
 
                return(NULL);
1456
 
        }
1457
 
 
1458
 
        table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
1459
 
                                      DICT_TF_COMPACT);
1460
 
        index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
1461
 
                                      DICT_HDR_SPACE, 0, n);
1462
 
        index->table = table;
1463
 
        index->n_uniq = n;
1464
 
        /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1465
 
        index->cached = TRUE;
1466
 
 
1467
 
        /* Initialize the fields. */
1468
 
        for (b = buf, i = 0; i < n; i++) {
1469
 
                ulint   mtype;
1470
 
                ulint   len;
1471
 
 
1472
 
                val = *b++;
1473
 
 
1474
 
                if (UNIV_UNLIKELY(val & 0x80)) {
1475
 
                        /* fixed length > 62 bytes */
1476
 
                        val = (val & 0x7f) << 8 | *b++;
1477
 
                        len = val >> 1;
1478
 
                        mtype = DATA_FIXBINARY;
1479
 
                } else if (UNIV_UNLIKELY(val >= 126)) {
1480
 
                        /* variable length with max > 255 bytes */
1481
 
                        len = 0x7fff;
1482
 
                        mtype = DATA_BINARY;
1483
 
                } else if (val <= 1) {
1484
 
                        /* variable length with max <= 255 bytes */
1485
 
                        len = 0;
1486
 
                        mtype = DATA_BINARY;
1487
 
                } else {
1488
 
                        /* fixed length < 62 bytes */
1489
 
                        len = val >> 1;
1490
 
                        mtype = DATA_FIXBINARY;
1491
 
                }
1492
 
 
1493
 
                dict_mem_table_add_col(table, NULL, NULL, mtype,
1494
 
                                       val & 1 ? DATA_NOT_NULL : 0, len);
1495
 
                dict_index_add_col(index, table,
1496
 
                                   dict_table_get_nth_col(table, i), 0);
1497
 
        }
1498
 
 
1499
 
        val = *b++;
1500
 
        if (UNIV_UNLIKELY(val & 0x80)) {
1501
 
                val = (val & 0x7f) << 8 | *b++;
1502
 
        }
1503
 
 
1504
 
        /* Decode the position of the trx_id column. */
1505
 
        if (trx_id_col) {
1506
 
                if (!val) {
1507
 
                        val = ULINT_UNDEFINED;
1508
 
                } else if (UNIV_UNLIKELY(val >= n)) {
1509
 
                        page_zip_fields_free(index);
1510
 
                        index = NULL;
1511
 
                } else {
1512
 
                        index->type = DICT_CLUSTERED;
1513
 
                }
1514
 
 
1515
 
                *trx_id_col = val;
1516
 
        } else {
1517
 
                /* Decode the number of nullable fields. */
1518
 
                if (UNIV_UNLIKELY(index->n_nullable > val)) {
1519
 
                        page_zip_fields_free(index);
1520
 
                        index = NULL;
1521
 
                } else {
1522
 
                        index->n_nullable = val;
1523
 
                }
1524
 
        }
1525
 
 
1526
 
        ut_ad(b == end);
1527
 
 
1528
 
        return(index);
1529
 
}
1530
 
 
1531
 
/**************************************************************************
1532
 
Populate the sparse page directory from the dense directory. */
1533
 
static
1534
 
ibool
1535
 
page_zip_dir_decode(
1536
 
/*================*/
1537
 
                                        /* out: TRUE on success,
1538
 
                                        FALSE on failure */
1539
 
        const page_zip_des_t*   page_zip,/* in: dense page directory on
1540
 
                                        compressed page */
1541
 
        page_t*                 page,   /* in: compact page with valid header;
1542
 
                                        out: trailer and sparse page directory
1543
 
                                        filled in */
1544
 
        rec_t**                 recs,   /* out: dense page directory sorted by
1545
 
                                        ascending address (and heap_no) */
1546
 
        rec_t**                 recs_aux,/* in/out: scratch area */
1547
 
        ulint                   n_dense)/* in: number of user records, and
1548
 
                                        size of recs[] and recs_aux[] */
1549
 
{
1550
 
        ulint   i;
1551
 
        ulint   n_recs;
1552
 
        byte*   slot;
1553
 
 
1554
 
        n_recs = page_get_n_recs(page);
1555
 
 
1556
 
        if (UNIV_UNLIKELY(n_recs > n_dense)) {
1557
 
                page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
1558
 
                               (ulong) n_recs, (ulong) n_dense));
1559
 
                return(FALSE);
1560
 
        }
1561
 
 
1562
 
        /* Traverse the list of stored records in the sorting order,
1563
 
        starting from the first user record. */
1564
 
 
1565
 
        slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
1566
 
        UNIV_PREFETCH_RW(slot);
1567
 
 
1568
 
        /* Zero out the page trailer. */
1569
 
        memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
1570
 
 
1571
 
        mach_write_to_2(slot, PAGE_NEW_INFIMUM);
1572
 
        slot -= PAGE_DIR_SLOT_SIZE;
1573
 
        UNIV_PREFETCH_RW(slot);
1574
 
 
1575
 
        /* Initialize the sparse directory and copy the dense directory. */
1576
 
        for (i = 0; i < n_recs; i++) {
1577
 
                ulint   offs = page_zip_dir_get(page_zip, i);
1578
 
 
1579
 
                if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
1580
 
                        mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
1581
 
                        slot -= PAGE_DIR_SLOT_SIZE;
1582
 
                        UNIV_PREFETCH_RW(slot);
1583
 
                }
1584
 
 
1585
 
                if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
1586
 
                                  < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
1587
 
                        page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
1588
 
                                       (unsigned) i, (unsigned) n_recs,
1589
 
                                       (ulong) offs));
1590
 
                        return(FALSE);
1591
 
                }
1592
 
 
1593
 
                recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
1594
 
        }
1595
 
 
1596
 
        mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
1597
 
        {
1598
 
                const page_dir_slot_t*  last_slot = page_dir_get_nth_slot(
1599
 
                        page, page_dir_get_n_slots(page) - 1);
1600
 
 
1601
 
                if (UNIV_UNLIKELY(slot != last_slot)) {
1602
 
                        page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
1603
 
                                       (const void*) slot,
1604
 
                                       (const void*) last_slot));
1605
 
                        return(FALSE);
1606
 
                }
1607
 
        }
1608
 
 
1609
 
        /* Copy the rest of the dense directory. */
1610
 
        for (; i < n_dense; i++) {
1611
 
                ulint   offs = page_zip_dir_get(page_zip, i);
1612
 
 
1613
 
                if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1614
 
                        page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
1615
 
                                       (unsigned) i, (unsigned) n_dense,
1616
 
                                       (ulong) offs));
1617
 
                        return(FALSE);
1618
 
                }
1619
 
 
1620
 
                recs[i] = page + offs;
1621
 
        }
1622
 
 
1623
 
        if (UNIV_LIKELY(n_dense > 1)) {
1624
 
                page_zip_dir_sort(recs, recs_aux, 0, n_dense);
1625
 
        }
1626
 
        return(TRUE);
1627
 
}
1628
 
 
1629
 
/**************************************************************************
1630
 
Initialize the REC_N_NEW_EXTRA_BYTES of each record. */
1631
 
static
1632
 
ibool
1633
 
page_zip_set_extra_bytes(
1634
 
/*=====================*/
1635
 
                                        /* out: TRUE on success,
1636
 
                                        FALSE on failure */
1637
 
        const page_zip_des_t*   page_zip,/* in: compressed page */
1638
 
        page_t*                 page,   /* in/out: uncompressed page */
1639
 
        ulint                   info_bits)/* in: REC_INFO_MIN_REC_FLAG or 0 */
1640
 
{
1641
 
        ulint   n;
1642
 
        ulint   i;
1643
 
        ulint   n_owned = 1;
1644
 
        ulint   offs;
1645
 
        rec_t*  rec;
1646
 
 
1647
 
        n = page_get_n_recs(page);
1648
 
        rec = page + PAGE_NEW_INFIMUM;
1649
 
 
1650
 
        for (i = 0; i < n; i++) {
1651
 
                offs = page_zip_dir_get(page_zip, i);
1652
 
 
1653
 
                if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
1654
 
                        info_bits |= REC_INFO_DELETED_FLAG;
1655
 
                }
1656
 
                if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
1657
 
                        info_bits |= n_owned;
1658
 
                        n_owned = 1;
1659
 
                } else {
1660
 
                        n_owned++;
1661
 
                }
1662
 
                offs &= PAGE_ZIP_DIR_SLOT_MASK;
1663
 
                if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
1664
 
                                  + REC_N_NEW_EXTRA_BYTES)) {
1665
 
                        page_zip_fail(("page_zip_set_extra_bytes 1:"
1666
 
                                       " %u %u %lx\n",
1667
 
                                       (unsigned) i, (unsigned) n,
1668
 
                                       (ulong) offs));
1669
 
                        return(FALSE);
1670
 
                }
1671
 
 
1672
 
                rec_set_next_offs_new(rec, offs);
1673
 
                rec = page + offs;
1674
 
                rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
1675
 
                info_bits = 0;
1676
 
        }
1677
 
 
1678
 
        /* Set the next pointer of the last user record. */
1679
 
        rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
1680
 
 
1681
 
        /* Set n_owned of the supremum record. */
1682
 
        page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
1683
 
 
1684
 
        /* The dense directory excludes the infimum and supremum records. */
1685
 
        n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1686
 
 
1687
 
        if (i >= n) {
1688
 
                if (UNIV_LIKELY(i == n)) {
1689
 
                        return(TRUE);
1690
 
                }
1691
 
 
1692
 
                page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
1693
 
                               (unsigned) i, (unsigned) n));
1694
 
                return(FALSE);
1695
 
        }
1696
 
 
1697
 
        offs = page_zip_dir_get(page_zip, i);
1698
 
 
1699
 
        /* Set the extra bytes of deleted records on the free list. */
1700
 
        for (;;) {
1701
 
                if (UNIV_UNLIKELY(!offs)
1702
 
                    || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1703
 
 
1704
 
                        page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
1705
 
                                       (ulong) offs));
1706
 
                        return(FALSE);
1707
 
                }
1708
 
 
1709
 
                rec = page + offs;
1710
 
                rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1711
 
 
1712
 
                if (++i == n) {
1713
 
                        break;
1714
 
                }
1715
 
 
1716
 
                offs = page_zip_dir_get(page_zip, i);
1717
 
                rec_set_next_offs_new(rec, offs);
1718
 
        }
1719
 
 
1720
 
        /* Terminate the free list. */
1721
 
        rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1722
 
        rec_set_next_offs_new(rec, 0);
1723
 
 
1724
 
        return(TRUE);
1725
 
}
1726
 
 
1727
 
/**************************************************************************
1728
 
Apply the modification log to a record containing externally stored
1729
 
columns.  Do not copy the fields that are stored separately. */
1730
 
static
1731
 
const byte*
1732
 
page_zip_apply_log_ext(
1733
 
/*===================*/
1734
 
                                        /* out: pointer to modification log,
1735
 
                                        or NULL on failure */
1736
 
        rec_t*          rec,            /* in/out: record */
1737
 
        const ulint*    offsets,        /* in: rec_get_offsets(rec) */
1738
 
        ulint           trx_id_col,     /* in: position of of DB_TRX_ID */
1739
 
        const byte*     data,           /* in: modification log */
1740
 
        const byte*     end)            /* in: end of modification log */
1741
 
{
1742
 
        ulint   i;
1743
 
        ulint   len;
1744
 
        byte*   next_out = rec;
1745
 
 
1746
 
        /* Check if there are any externally stored columns.
1747
 
        For each externally stored column, skip the
1748
 
        BTR_EXTERN_FIELD_REF. */
1749
 
 
1750
 
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
1751
 
                byte*   dst;
1752
 
 
1753
 
                if (UNIV_UNLIKELY(i == trx_id_col)) {
1754
 
                        /* Skip trx_id and roll_ptr */
1755
 
                        dst = rec_get_nth_field(rec, offsets,
1756
 
                                                i, &len);
1757
 
                        if (UNIV_UNLIKELY(dst - next_out >= end - data)
1758
 
                            || UNIV_UNLIKELY
1759
 
                            (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
1760
 
                            || rec_offs_nth_extern(offsets, i)) {
1761
 
                                page_zip_fail(("page_zip_apply_log_ext:"
1762
 
                                               " trx_id len %lu,"
1763
 
                                               " %p - %p >= %p - %p\n",
1764
 
                                               (ulong) len,
1765
 
                                               (const void*) dst,
1766
 
                                               (const void*) next_out,
1767
 
                                               (const void*) end,
1768
 
                                               (const void*) data));
1769
 
                                return(NULL);
1770
 
                        }
1771
 
 
1772
 
                        memcpy(next_out, data, dst - next_out);
1773
 
                        data += dst - next_out;
1774
 
                        next_out = dst + (DATA_TRX_ID_LEN
1775
 
                                          + DATA_ROLL_PTR_LEN);
1776
 
                } else if (rec_offs_nth_extern(offsets, i)) {
1777
 
                        dst = rec_get_nth_field(rec, offsets,
1778
 
                                                i, &len);
1779
 
                        ut_ad(len
1780
 
                              >= BTR_EXTERN_FIELD_REF_SIZE);
1781
 
 
1782
 
                        len += dst - next_out
1783
 
                                - BTR_EXTERN_FIELD_REF_SIZE;
1784
 
 
1785
 
                        if (UNIV_UNLIKELY(data + len >= end)) {
1786
 
                                page_zip_fail(("page_zip_apply_log_ext: "
1787
 
                                               "ext %p+%lu >= %p\n",
1788
 
                                               (const void*) data,
1789
 
                                               (ulong) len,
1790
 
                                               (const void*) end));
1791
 
                                return(NULL);
1792
 
                        }
1793
 
 
1794
 
                        memcpy(next_out, data, len);
1795
 
                        data += len;
1796
 
                        next_out += len
1797
 
                                + BTR_EXTERN_FIELD_REF_SIZE;
1798
 
                }
1799
 
        }
1800
 
 
1801
 
        /* Copy the last bytes of the record. */
1802
 
        len = rec_get_end(rec, offsets) - next_out;
1803
 
        if (UNIV_UNLIKELY(data + len >= end)) {
1804
 
                page_zip_fail(("page_zip_apply_log_ext: "
1805
 
                               "last %p+%lu >= %p\n",
1806
 
                               (const void*) data,
1807
 
                               (ulong) len,
1808
 
                               (const void*) end));
1809
 
                return(NULL);
1810
 
        }
1811
 
        memcpy(next_out, data, len);
1812
 
        data += len;
1813
 
 
1814
 
        return(data);
1815
 
}
1816
 
 
1817
 
/**************************************************************************
1818
 
Apply the modification log to an uncompressed page.
1819
 
Do not copy the fields that are stored separately. */
1820
 
static
1821
 
const byte*
1822
 
page_zip_apply_log(
1823
 
/*===============*/
1824
 
                                /* out: pointer to end of modification log,
1825
 
                                or NULL on failure */
1826
 
        const byte*     data,   /* in: modification log */
1827
 
        ulint           size,   /* in: maximum length of the log, in bytes */
1828
 
        rec_t**         recs,   /* in: dense page directory,
1829
 
                                sorted by address (indexed by
1830
 
                                heap_no - PAGE_HEAP_NO_USER_LOW) */
1831
 
        ulint           n_dense,/* in: size of recs[] */
1832
 
        ulint           trx_id_col,/* in: column number of trx_id in the index,
1833
 
                                or ULINT_UNDEFINED if none */
1834
 
        ulint           heap_status,
1835
 
                                /* in: heap_no and status bits for
1836
 
                                the next record to uncompress */
1837
 
        dict_index_t*   index,  /* in: index of the page */
1838
 
        ulint*          offsets)/* in/out: work area for
1839
 
                                rec_get_offsets_reverse() */
1840
 
{
1841
 
        const byte* const end = data + size;
1842
 
 
1843
 
        for (;;) {
1844
 
                ulint   val;
1845
 
                rec_t*  rec;
1846
 
                ulint   len;
1847
 
                ulint   hs;
1848
 
 
1849
 
                val = *data++;
1850
 
                if (UNIV_UNLIKELY(!val)) {
1851
 
                        return(data - 1);
1852
 
                }
1853
 
                if (val & 0x80) {
1854
 
                        val = (val & 0x7f) << 8 | *data++;
1855
 
                        if (UNIV_UNLIKELY(!val)) {
1856
 
                                page_zip_fail(("page_zip_apply_log:"
1857
 
                                               " invalid val %x%x\n",
1858
 
                                               data[-2], data[-1]));
1859
 
                                return(NULL);
1860
 
                        }
1861
 
                }
1862
 
                if (UNIV_UNLIKELY(data >= end)) {
1863
 
                        page_zip_fail(("page_zip_apply_log: %p >= %p\n",
1864
 
                                       (const void*) data,
1865
 
                                       (const void*) end));
1866
 
                        return(NULL);
1867
 
                }
1868
 
                if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
1869
 
                        page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
1870
 
                                       (ulong) val, (ulong) n_dense));
1871
 
                        return(NULL);
1872
 
                }
1873
 
 
1874
 
                /* Determine the heap number and status bits of the record. */
1875
 
                rec = recs[(val >> 1) - 1];
1876
 
 
1877
 
                hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
1878
 
                hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
1879
 
 
1880
 
                /* This may either be an old record that is being
1881
 
                overwritten (updated in place, or allocated from
1882
 
                the free list), or a new record, with the next
1883
 
                available_heap_no. */
1884
 
                if (UNIV_UNLIKELY(hs > heap_status)) {
1885
 
                        page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
1886
 
                                       (ulong) hs, (ulong) heap_status));
1887
 
                        return(NULL);
1888
 
                } else if (hs == heap_status) {
1889
 
                        /* A new record was allocated from the heap. */
1890
 
                        if (UNIV_UNLIKELY(val & 1)) {
1891
 
                                /* Only existing records may be cleared. */
1892
 
                                page_zip_fail(("page_zip_apply_log:"
1893
 
                                               " attempting to create"
1894
 
                                               " deleted rec %lu\n",
1895
 
                                               (ulong) hs));
1896
 
                                return(NULL);
1897
 
                        }
1898
 
                        heap_status += 1 << REC_HEAP_NO_SHIFT;
1899
 
                }
1900
 
 
1901
 
                mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
1902
 
 
1903
 
                if (val & 1) {
1904
 
                        /* Clear the data bytes of the record. */
1905
 
                        mem_heap_t*     heap    = NULL;
1906
 
                        ulint*          offs;
1907
 
                        offs = rec_get_offsets(rec, index, offsets,
1908
 
                                               ULINT_UNDEFINED, &heap);
1909
 
                        memset(rec, 0, rec_offs_data_size(offs));
1910
 
 
1911
 
                        if (UNIV_LIKELY_NULL(heap)) {
1912
 
                                mem_heap_free(heap);
1913
 
                        }
1914
 
                        continue;
1915
 
                }
1916
 
 
1917
 
#if REC_STATUS_NODE_PTR != TRUE
1918
 
# error "REC_STATUS_NODE_PTR != TRUE"
1919
 
#endif
1920
 
                rec_get_offsets_reverse(data, index,
1921
 
                                        hs & REC_STATUS_NODE_PTR,
1922
 
                                        offsets);
1923
 
                rec_offs_make_valid(rec, index, offsets);
1924
 
 
1925
 
                /* Copy the extra bytes (backwards). */
1926
 
                {
1927
 
                        byte*   start   = rec_get_start(rec, offsets);
1928
 
                        byte*   b       = rec - REC_N_NEW_EXTRA_BYTES;
1929
 
                        while (b != start) {
1930
 
                                *--b = *data++;
1931
 
                        }
1932
 
                }
1933
 
 
1934
 
                /* Copy the data bytes. */
1935
 
                if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1936
 
                        /* Non-leaf nodes should not contain any
1937
 
                        externally stored columns. */
1938
 
                        if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1939
 
                                page_zip_fail(("page_zip_apply_log: "
1940
 
                                               "%lu&REC_STATUS_NODE_PTR\n",
1941
 
                                               (ulong) hs));
1942
 
                                return(NULL);
1943
 
                        }
1944
 
 
1945
 
                        data = page_zip_apply_log_ext(
1946
 
                                rec, offsets, trx_id_col, data, end);
1947
 
 
1948
 
                        if (UNIV_UNLIKELY(!data)) {
1949
 
                                return(NULL);
1950
 
                        }
1951
 
                } else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1952
 
                        len = rec_offs_data_size(offsets)
1953
 
                                - REC_NODE_PTR_SIZE;
1954
 
                        /* Copy the data bytes, except node_ptr. */
1955
 
                        if (UNIV_UNLIKELY(data + len >= end)) {
1956
 
                                page_zip_fail(("page_zip_apply_log: "
1957
 
                                               "node_ptr %p+%lu >= %p\n",
1958
 
                                               (const void*) data,
1959
 
                                               (ulong) len,
1960
 
                                               (const void*) end));
1961
 
                                return(NULL);
1962
 
                        }
1963
 
                        memcpy(rec, data, len);
1964
 
                        data += len;
1965
 
                } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1966
 
                        len = rec_offs_data_size(offsets);
1967
 
 
1968
 
                        /* Copy all data bytes of
1969
 
                        a record in a secondary index. */
1970
 
                        if (UNIV_UNLIKELY(data + len >= end)) {
1971
 
                                page_zip_fail(("page_zip_apply_log: "
1972
 
                                               "sec %p+%lu >= %p\n",
1973
 
                                               (const void*) data,
1974
 
                                               (ulong) len,
1975
 
                                               (const void*) end));
1976
 
                                return(NULL);
1977
 
                        }
1978
 
 
1979
 
                        memcpy(rec, data, len);
1980
 
                        data += len;
1981
 
                } else {
1982
 
                        /* Skip DB_TRX_ID and DB_ROLL_PTR. */
1983
 
                        ulint   l = rec_get_nth_field_offs(offsets,
1984
 
                                                           trx_id_col, &len);
1985
 
                        byte*   b;
1986
 
 
1987
 
                        if (UNIV_UNLIKELY(data + l >= end)
1988
 
                            || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
1989
 
                                                    + DATA_ROLL_PTR_LEN))) {
1990
 
                                page_zip_fail(("page_zip_apply_log: "
1991
 
                                               "trx_id %p+%lu >= %p\n",
1992
 
                                               (const void*) data,
1993
 
                                               (ulong) l,
1994
 
                                               (const void*) end));
1995
 
                                return(NULL);
1996
 
                        }
1997
 
 
1998
 
                        /* Copy any preceding data bytes. */
1999
 
                        memcpy(rec, data, l);
2000
 
                        data += l;
2001
 
 
2002
 
                        /* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
2003
 
                        b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2004
 
                        len = rec_get_end(rec, offsets) - b;
2005
 
                        if (UNIV_UNLIKELY(data + len >= end)) {
2006
 
                                page_zip_fail(("page_zip_apply_log: "
2007
 
                                               "clust %p+%lu >= %p\n",
2008
 
                                               (const void*) data,
2009
 
                                               (ulong) len,
2010
 
                                               (const void*) end));
2011
 
                                return(NULL);
2012
 
                        }
2013
 
                        memcpy(b, data, len);
2014
 
                        data += len;
2015
 
                }
2016
 
        }
2017
 
}
2018
 
 
2019
 
/**************************************************************************
2020
 
Decompress the records of a node pointer page. */
2021
 
static
2022
 
ibool
2023
 
page_zip_decompress_node_ptrs(
2024
 
/*==========================*/
2025
 
                                        /* out: TRUE on success,
2026
 
                                        FALSE on failure */
2027
 
        page_zip_des_t* page_zip,       /* in/out: compressed page */
2028
 
        z_stream*       d_stream,       /* in/out: compressed page stream */
2029
 
        rec_t**         recs,           /* in: dense page directory
2030
 
                                        sorted by address */
2031
 
        ulint           n_dense,        /* in: size of recs[] */
2032
 
        dict_index_t*   index,          /* in: the index of the page */
2033
 
        ulint*          offsets,        /* in/out: temporary offsets */
2034
 
        mem_heap_t*     heap)           /* in: temporary memory heap */
2035
 
{
2036
 
        ulint           heap_status = REC_STATUS_NODE_PTR
2037
 
                | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2038
 
        ulint           slot;
2039
 
        const byte*     storage;
2040
 
 
2041
 
        /* Subtract the space reserved for uncompressed data. */
2042
 
        d_stream->avail_in -= n_dense
2043
 
                * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
2044
 
 
2045
 
        /* Decompress the records in heap_no order. */
2046
 
        for (slot = 0; slot < n_dense; slot++) {
2047
 
                rec_t*  rec = recs[slot];
2048
 
 
2049
 
                d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2050
 
                        - d_stream->next_out;
2051
 
 
2052
 
                ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2053
 
                      - PAGE_ZIP_START - PAGE_DIR);
2054
 
                switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2055
 
                case Z_STREAM_END:
2056
 
                        /* Apparently, n_dense has grown
2057
 
                        since the time the page was last compressed. */
2058
 
                        goto zlib_done;
2059
 
                case Z_OK:
2060
 
                case Z_BUF_ERROR:
2061
 
                        if (!d_stream->avail_out) {
2062
 
                                break;
2063
 
                        }
2064
 
                        /* fall through */
2065
 
                default:
2066
 
                        page_zip_fail(("page_zip_decompress_node_ptrs:"
2067
 
                                       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2068
 
                                       d_stream->msg));
2069
 
                        goto zlib_error;
2070
 
                }
2071
 
 
2072
 
                ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2073
 
                /* Prepare to decompress the data bytes. */
2074
 
                d_stream->next_out = rec;
2075
 
                /* Set heap_no and the status bits. */
2076
 
                mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2077
 
                heap_status += 1 << REC_HEAP_NO_SHIFT;
2078
 
 
2079
 
                /* Read the offsets. The status bits are needed here. */
2080
 
                offsets = rec_get_offsets(rec, index, offsets,
2081
 
                                          ULINT_UNDEFINED, &heap);
2082
 
 
2083
 
                /* Non-leaf nodes should not have any externally
2084
 
                stored columns. */
2085
 
                ut_ad(!rec_offs_any_extern(offsets));
2086
 
 
2087
 
                /* Decompress the data bytes, except node_ptr. */
2088
 
                d_stream->avail_out = rec_offs_data_size(offsets)
2089
 
                        - REC_NODE_PTR_SIZE;
2090
 
 
2091
 
                switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2092
 
                case Z_STREAM_END:
2093
 
                        goto zlib_done;
2094
 
                case Z_OK:
2095
 
                case Z_BUF_ERROR:
2096
 
                        if (!d_stream->avail_out) {
2097
 
                                break;
2098
 
                        }
2099
 
                        /* fall through */
2100
 
                default:
2101
 
                        page_zip_fail(("page_zip_decompress_node_ptrs:"
2102
 
                                       " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2103
 
                                       d_stream->msg));
2104
 
                        goto zlib_error;
2105
 
                }
2106
 
 
2107
 
                /* Clear the node pointer in case the record
2108
 
                will be deleted and the space will be reallocated
2109
 
                to a smaller record. */
2110
 
                memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
2111
 
                d_stream->next_out += REC_NODE_PTR_SIZE;
2112
 
 
2113
 
                ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
2114
 
        }
2115
 
 
2116
 
        /* Decompress any trailing garbage, in case the last record was
2117
 
        allocated from an originally longer space on the free list. */
2118
 
        d_stream->avail_out = page_header_get_field(page_zip->data,
2119
 
                                                    PAGE_HEAP_TOP)
2120
 
                - page_offset(d_stream->next_out);
2121
 
        if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2122
 
                          - PAGE_ZIP_START - PAGE_DIR)) {
2123
 
 
2124
 
                page_zip_fail(("page_zip_decompress_node_ptrs:"
2125
 
                               " avail_out = %u\n",
2126
 
                               d_stream->avail_out));
2127
 
                goto zlib_error;
2128
 
        }
2129
 
 
2130
 
        if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2131
 
                page_zip_fail(("page_zip_decompress_node_ptrs:"
2132
 
                               " inflate(Z_FINISH)=%s\n",
2133
 
                               d_stream->msg));
2134
 
zlib_error:
2135
 
                inflateEnd(d_stream);
2136
 
                return(FALSE);
2137
 
        }
2138
 
 
2139
 
        /* Note that d_stream->avail_out > 0 may hold here
2140
 
        if the modification log is nonempty. */
2141
 
 
2142
 
zlib_done:
2143
 
        if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2144
 
                ut_error;
2145
 
        }
2146
 
 
2147
 
        {
2148
 
                page_t* page = page_align(d_stream->next_out);
2149
 
 
2150
 
                /* Clear the unused heap space on the uncompressed page. */
2151
 
                memset(d_stream->next_out, 0,
2152
 
                       page_dir_get_nth_slot(page,
2153
 
                                             page_dir_get_n_slots(page) - 1)
2154
 
                       - d_stream->next_out);
2155
 
        }
2156
 
 
2157
 
#ifdef UNIV_DEBUG
2158
 
        page_zip->m_start = PAGE_DATA + d_stream->total_in;
2159
 
#endif /* UNIV_DEBUG */
2160
 
 
2161
 
        /* Apply the modification log. */
2162
 
        {
2163
 
                const byte*     mod_log_ptr;
2164
 
                mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2165
 
                                                 d_stream->avail_in + 1,
2166
 
                                                 recs, n_dense,
2167
 
                                                 ULINT_UNDEFINED, heap_status,
2168
 
                                                 index, offsets);
2169
 
 
2170
 
                if (UNIV_UNLIKELY(!mod_log_ptr)) {
2171
 
                        return(FALSE);
2172
 
                }
2173
 
                page_zip->m_end = mod_log_ptr - page_zip->data;
2174
 
                page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2175
 
        }
2176
 
 
2177
 
        if (UNIV_UNLIKELY
2178
 
            (page_zip_get_trailer_len(page_zip,
2179
 
                                      dict_index_is_clust(index), NULL)
2180
 
             + page_zip->m_end >= page_zip_get_size(page_zip))) {
2181
 
                page_zip_fail(("page_zip_decompress_node_ptrs:"
2182
 
                               " %lu + %lu >= %lu, %lu\n",
2183
 
                               (ulong) page_zip_get_trailer_len(
2184
 
                                       page_zip, dict_index_is_clust(index),
2185
 
                                       NULL),
2186
 
                               (ulong) page_zip->m_end,
2187
 
                               (ulong) page_zip_get_size(page_zip),
2188
 
                               (ulong) dict_index_is_clust(index)));
2189
 
                return(FALSE);
2190
 
        }
2191
 
 
2192
 
        /* Restore the uncompressed columns in heap_no order. */
2193
 
        storage = page_zip->data + page_zip_get_size(page_zip)
2194
 
                - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2195
 
 
2196
 
        for (slot = 0; slot < n_dense; slot++) {
2197
 
                rec_t*          rec     = recs[slot];
2198
 
 
2199
 
                offsets = rec_get_offsets(rec, index, offsets,
2200
 
                                          ULINT_UNDEFINED, &heap);
2201
 
                /* Non-leaf nodes should not have any externally
2202
 
                stored columns. */
2203
 
                ut_ad(!rec_offs_any_extern(offsets));
2204
 
                storage -= REC_NODE_PTR_SIZE;
2205
 
 
2206
 
                memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
2207
 
                       storage, REC_NODE_PTR_SIZE);
2208
 
        }
2209
 
 
2210
 
        return(TRUE);
2211
 
}
2212
 
 
2213
 
/**************************************************************************
2214
 
Decompress the records of a leaf node of a secondary index. */
2215
 
static
2216
 
ibool
2217
 
page_zip_decompress_sec(
2218
 
/*====================*/
2219
 
                                        /* out: TRUE on success,
2220
 
                                        FALSE on failure */
2221
 
        page_zip_des_t* page_zip,       /* in/out: compressed page */
2222
 
        z_stream*       d_stream,       /* in/out: compressed page stream */
2223
 
        rec_t**         recs,           /* in: dense page directory
2224
 
                                        sorted by address */
2225
 
        ulint           n_dense,        /* in: size of recs[] */
2226
 
        dict_index_t*   index,          /* in: the index of the page */
2227
 
        ulint*          offsets)        /* in/out: temporary offsets */
2228
 
{
2229
 
        ulint   heap_status     = REC_STATUS_ORDINARY
2230
 
                | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2231
 
        ulint   slot;
2232
 
 
2233
 
        ut_a(!dict_index_is_clust(index));
2234
 
 
2235
 
        /* Subtract the space reserved for uncompressed data. */
2236
 
        d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2237
 
 
2238
 
        for (slot = 0; slot < n_dense; slot++) {
2239
 
                rec_t*  rec = recs[slot];
2240
 
 
2241
 
                /* Decompress everything up to this record. */
2242
 
                d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2243
 
                        - d_stream->next_out;
2244
 
 
2245
 
                if (UNIV_LIKELY(d_stream->avail_out)) {
2246
 
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2247
 
                        case Z_STREAM_END:
2248
 
                                /* Apparently, n_dense has grown
2249
 
                                since the time the page was last compressed. */
2250
 
                                goto zlib_done;
2251
 
                        case Z_OK:
2252
 
                        case Z_BUF_ERROR:
2253
 
                                if (!d_stream->avail_out) {
2254
 
                                        break;
2255
 
                                }
2256
 
                                /* fall through */
2257
 
                        default:
2258
 
                                page_zip_fail(("page_zip_decompress_sec:"
2259
 
                                               " inflate(Z_SYNC_FLUSH)=%s\n",
2260
 
                                               d_stream->msg));
2261
 
                                goto zlib_error;
2262
 
                        }
2263
 
                }
2264
 
 
2265
 
                ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2266
 
 
2267
 
                /* Skip the REC_N_NEW_EXTRA_BYTES. */
2268
 
 
2269
 
                d_stream->next_out = rec;
2270
 
 
2271
 
                /* Set heap_no and the status bits. */
2272
 
                mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2273
 
                heap_status += 1 << REC_HEAP_NO_SHIFT;
2274
 
        }
2275
 
 
2276
 
        /* Decompress the data of the last record and any trailing garbage,
2277
 
        in case the last record was allocated from an originally longer space
2278
 
        on the free list. */
2279
 
        d_stream->avail_out = page_header_get_field(page_zip->data,
2280
 
                                                    PAGE_HEAP_TOP)
2281
 
                - page_offset(d_stream->next_out);
2282
 
        if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2283
 
                          - PAGE_ZIP_START - PAGE_DIR)) {
2284
 
 
2285
 
                page_zip_fail(("page_zip_decompress_sec:"
2286
 
                               " avail_out = %u\n",
2287
 
                               d_stream->avail_out));
2288
 
                goto zlib_error;
2289
 
        }
2290
 
 
2291
 
        if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2292
 
                page_zip_fail(("page_zip_decompress_sec:"
2293
 
                               " inflate(Z_FINISH)=%s\n",
2294
 
                               d_stream->msg));
2295
 
zlib_error:
2296
 
                inflateEnd(d_stream);
2297
 
                return(FALSE);
2298
 
        }
2299
 
 
2300
 
        /* Note that d_stream->avail_out > 0 may hold here
2301
 
        if the modification log is nonempty. */
2302
 
 
2303
 
zlib_done:
2304
 
        if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2305
 
                ut_error;
2306
 
        }
2307
 
 
2308
 
        {
2309
 
                page_t* page = page_align(d_stream->next_out);
2310
 
 
2311
 
                /* Clear the unused heap space on the uncompressed page. */
2312
 
                memset(d_stream->next_out, 0,
2313
 
                       page_dir_get_nth_slot(page,
2314
 
                                             page_dir_get_n_slots(page) - 1)
2315
 
                       - d_stream->next_out);
2316
 
        }
2317
 
 
2318
 
#ifdef UNIV_DEBUG
2319
 
        page_zip->m_start = PAGE_DATA + d_stream->total_in;
2320
 
#endif /* UNIV_DEBUG */
2321
 
 
2322
 
        /* Apply the modification log. */
2323
 
        {
2324
 
                const byte*     mod_log_ptr;
2325
 
                mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2326
 
                                                 d_stream->avail_in + 1,
2327
 
                                                 recs, n_dense,
2328
 
                                                 ULINT_UNDEFINED, heap_status,
2329
 
                                                 index, offsets);
2330
 
 
2331
 
                if (UNIV_UNLIKELY(!mod_log_ptr)) {
2332
 
                        return(FALSE);
2333
 
                }
2334
 
                page_zip->m_end = mod_log_ptr - page_zip->data;
2335
 
                page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2336
 
        }
2337
 
 
2338
 
        if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
2339
 
                          + page_zip->m_end >= page_zip_get_size(page_zip))) {
2340
 
 
2341
 
                page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
2342
 
                               (ulong) page_zip_get_trailer_len(
2343
 
                                       page_zip, FALSE, NULL),
2344
 
                               (ulong) page_zip->m_end,
2345
 
                               (ulong) page_zip_get_size(page_zip)));
2346
 
                return(FALSE);
2347
 
        }
2348
 
 
2349
 
        /* There are no uncompressed columns on leaf pages of
2350
 
        secondary indexes. */
2351
 
 
2352
 
        return(TRUE);
2353
 
}
2354
 
 
2355
 
/**************************************************************************
2356
 
Decompress a record of a leaf node of a clustered index that contains
2357
 
externally stored columns. */
2358
 
static
2359
 
ibool
2360
 
page_zip_decompress_clust_ext(
2361
 
/*==========================*/
2362
 
                                        /* out: TRUE on success */
2363
 
        z_stream*       d_stream,       /* in/out: compressed page stream */
2364
 
        rec_t*          rec,            /* in/out: record */
2365
 
        const ulint*    offsets,        /* in: rec_get_offsets(rec) */
2366
 
        ulint           trx_id_col)     /* in: position of of DB_TRX_ID */
2367
 
{
2368
 
        ulint   i;
2369
 
 
2370
 
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2371
 
                ulint   len;
2372
 
                byte*   dst;
2373
 
 
2374
 
                if (UNIV_UNLIKELY(i == trx_id_col)) {
2375
 
                        /* Skip trx_id and roll_ptr */
2376
 
                        dst = rec_get_nth_field(rec, offsets, i, &len);
2377
 
                        if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2378
 
                                          + DATA_ROLL_PTR_LEN)) {
2379
 
 
2380
 
                                page_zip_fail(("page_zip_decompress_clust_ext:"
2381
 
                                               " len[%lu] = %lu\n",
2382
 
                                               (ulong) i, (ulong) len));
2383
 
                                return(FALSE);
2384
 
                        }
2385
 
 
2386
 
                        if (rec_offs_nth_extern(offsets, i)) {
2387
 
 
2388
 
                                page_zip_fail(("page_zip_decompress_clust_ext:"
2389
 
                                               " DB_TRX_ID at %lu is ext\n",
2390
 
                                               (ulong) i));
2391
 
                                return(FALSE);
2392
 
                        }
2393
 
 
2394
 
                        d_stream->avail_out = dst - d_stream->next_out;
2395
 
 
2396
 
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2397
 
                        case Z_STREAM_END:
2398
 
                        case Z_OK:
2399
 
                        case Z_BUF_ERROR:
2400
 
                                if (!d_stream->avail_out) {
2401
 
                                        break;
2402
 
                                }
2403
 
                                /* fall through */
2404
 
                        default:
2405
 
                                page_zip_fail(("page_zip_decompress_clust_ext:"
2406
 
                                               " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2407
 
                                               d_stream->msg));
2408
 
                                return(FALSE);
2409
 
                        }
2410
 
 
2411
 
                        ut_ad(d_stream->next_out == dst);
2412
 
 
2413
 
                        /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2414
 
                        avoid uninitialized bytes in case the record
2415
 
                        is affected by page_zip_apply_log(). */
2416
 
                        memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2417
 
 
2418
 
                        d_stream->next_out += DATA_TRX_ID_LEN
2419
 
                                + DATA_ROLL_PTR_LEN;
2420
 
                } else if (rec_offs_nth_extern(offsets, i)) {
2421
 
                        dst = rec_get_nth_field(rec, offsets, i, &len);
2422
 
                        ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
2423
 
                        dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2424
 
 
2425
 
                        d_stream->avail_out = dst - d_stream->next_out;
2426
 
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2427
 
                        case Z_STREAM_END:
2428
 
                        case Z_OK:
2429
 
                        case Z_BUF_ERROR:
2430
 
                                if (!d_stream->avail_out) {
2431
 
                                        break;
2432
 
                                }
2433
 
                                /* fall through */
2434
 
                        default:
2435
 
                                page_zip_fail(("page_zip_decompress_clust_ext:"
2436
 
                                               " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2437
 
                                               d_stream->msg));
2438
 
                                return(FALSE);
2439
 
                        }
2440
 
 
2441
 
                        ut_ad(d_stream->next_out == dst);
2442
 
 
2443
 
                        /* Clear the BLOB pointer in case
2444
 
                        the record will be deleted and the
2445
 
                        space will not be reused.  Note that
2446
 
                        the final initialization of the BLOB
2447
 
                        pointers (copying from "externs"
2448
 
                        or clearing) will have to take place
2449
 
                        only after the page modification log
2450
 
                        has been applied.  Otherwise, we
2451
 
                        could end up with an uninitialized
2452
 
                        BLOB pointer when a record is deleted,
2453
 
                        reallocated and deleted. */
2454
 
                        memset(d_stream->next_out, 0,
2455
 
                               BTR_EXTERN_FIELD_REF_SIZE);
2456
 
                        d_stream->next_out
2457
 
                                += BTR_EXTERN_FIELD_REF_SIZE;
2458
 
                }
2459
 
        }
2460
 
 
2461
 
        return(TRUE);
2462
 
}
2463
 
 
2464
 
/**************************************************************************
2465
 
Compress the records of a leaf node of a clustered index. */
2466
 
static
2467
 
ibool
2468
 
page_zip_decompress_clust(
2469
 
/*======================*/
2470
 
                                        /* out: TRUE on success,
2471
 
                                        FALSE on failure */
2472
 
        page_zip_des_t* page_zip,       /* in/out: compressed page */
2473
 
        z_stream*       d_stream,       /* in/out: compressed page stream */
2474
 
        rec_t**         recs,           /* in: dense page directory
2475
 
                                        sorted by address */
2476
 
        ulint           n_dense,        /* in: size of recs[] */
2477
 
        dict_index_t*   index,          /* in: the index of the page */
2478
 
        ulint           trx_id_col,     /* index of the trx_id column */
2479
 
        ulint*          offsets,        /* in/out: temporary offsets */
2480
 
        mem_heap_t*     heap)           /* in: temporary memory heap */
2481
 
{
2482
 
        int             err;
2483
 
        ulint           slot;
2484
 
        ulint           heap_status     = REC_STATUS_ORDINARY
2485
 
                | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2486
 
        const byte*     storage;
2487
 
        const byte*     externs;
2488
 
 
2489
 
        ut_a(dict_index_is_clust(index));
2490
 
 
2491
 
        /* Subtract the space reserved for uncompressed data. */
2492
 
        d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
2493
 
                                         + DATA_TRX_ID_LEN
2494
 
                                         + DATA_ROLL_PTR_LEN);
2495
 
 
2496
 
        /* Decompress the records in heap_no order. */
2497
 
        for (slot = 0; slot < n_dense; slot++) {
2498
 
                rec_t*  rec     = recs[slot];
2499
 
 
2500
 
                d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2501
 
                        - d_stream->next_out;
2502
 
 
2503
 
                ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2504
 
                      - PAGE_ZIP_START - PAGE_DIR);
2505
 
                err = inflate(d_stream, Z_SYNC_FLUSH);
2506
 
                switch (err) {
2507
 
                case Z_STREAM_END:
2508
 
                        /* Apparently, n_dense has grown
2509
 
                        since the time the page was last compressed. */
2510
 
                        goto zlib_done;
2511
 
                case Z_OK:
2512
 
                case Z_BUF_ERROR:
2513
 
                        if (UNIV_LIKELY(!d_stream->avail_out)) {
2514
 
                                break;
2515
 
                        }
2516
 
                        /* fall through */
2517
 
                default:
2518
 
                        page_zip_fail(("page_zip_decompress_clust:"
2519
 
                                       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2520
 
                                       d_stream->msg));
2521
 
                        goto zlib_error;
2522
 
                }
2523
 
 
2524
 
                ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2525
 
                /* Prepare to decompress the data bytes. */
2526
 
                d_stream->next_out = rec;
2527
 
                /* Set heap_no and the status bits. */
2528
 
                mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2529
 
                heap_status += 1 << REC_HEAP_NO_SHIFT;
2530
 
 
2531
 
                /* Read the offsets. The status bits are needed here. */
2532
 
                offsets = rec_get_offsets(rec, index, offsets,
2533
 
                                          ULINT_UNDEFINED, &heap);
2534
 
 
2535
 
                /* This is a leaf page in a clustered index. */
2536
 
 
2537
 
                /* Check if there are any externally stored columns.
2538
 
                For each externally stored column, restore the
2539
 
                BTR_EXTERN_FIELD_REF separately. */
2540
 
 
2541
 
                if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
2542
 
                        if (UNIV_UNLIKELY
2543
 
                            (!page_zip_decompress_clust_ext(
2544
 
                                    d_stream, rec, offsets, trx_id_col))) {
2545
 
 
2546
 
                                goto zlib_error;
2547
 
                        }
2548
 
                } else {
2549
 
                        /* Skip trx_id and roll_ptr */
2550
 
                        ulint   len;
2551
 
                        byte*   dst = rec_get_nth_field(rec, offsets,
2552
 
                                                        trx_id_col, &len);
2553
 
                        if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2554
 
                                          + DATA_ROLL_PTR_LEN)) {
2555
 
 
2556
 
                                page_zip_fail(("page_zip_decompress_clust:"
2557
 
                                               " len = %lu\n", (ulong) len));
2558
 
                                goto zlib_error;
2559
 
                        }
2560
 
 
2561
 
                        d_stream->avail_out = dst - d_stream->next_out;
2562
 
 
2563
 
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2564
 
                        case Z_STREAM_END:
2565
 
                        case Z_OK:
2566
 
                        case Z_BUF_ERROR:
2567
 
                                if (!d_stream->avail_out) {
2568
 
                                        break;
2569
 
                                }
2570
 
                                /* fall through */
2571
 
                        default:
2572
 
                                page_zip_fail(("page_zip_decompress_clust:"
2573
 
                                               " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2574
 
                                               d_stream->msg));
2575
 
                                goto zlib_error;
2576
 
                        }
2577
 
 
2578
 
                        ut_ad(d_stream->next_out == dst);
2579
 
 
2580
 
                        /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2581
 
                        avoid uninitialized bytes in case the record
2582
 
                        is affected by page_zip_apply_log(). */
2583
 
                        memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2584
 
 
2585
 
                        d_stream->next_out += DATA_TRX_ID_LEN
2586
 
                                + DATA_ROLL_PTR_LEN;
2587
 
                }
2588
 
 
2589
 
                /* Decompress the last bytes of the record. */
2590
 
                d_stream->avail_out = rec_get_end(rec, offsets)
2591
 
                        - d_stream->next_out;
2592
 
 
2593
 
                switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2594
 
                case Z_STREAM_END:
2595
 
                case Z_OK:
2596
 
                case Z_BUF_ERROR:
2597
 
                        if (!d_stream->avail_out) {
2598
 
                                break;
2599
 
                        }
2600
 
                        /* fall through */
2601
 
                default:
2602
 
                        page_zip_fail(("page_zip_decompress_clust:"
2603
 
                                       " 3 inflate(Z_SYNC_FLUSH)=%s\n",
2604
 
                                       d_stream->msg));
2605
 
                        goto zlib_error;
2606
 
                }
2607
 
        }
2608
 
 
2609
 
        /* Decompress any trailing garbage, in case the last record was
2610
 
        allocated from an originally longer space on the free list. */
2611
 
        d_stream->avail_out = page_header_get_field(page_zip->data,
2612
 
                                                    PAGE_HEAP_TOP)
2613
 
                - page_offset(d_stream->next_out);
2614
 
        if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2615
 
                          - PAGE_ZIP_START - PAGE_DIR)) {
2616
 
 
2617
 
                page_zip_fail(("page_zip_decompress_clust:"
2618
 
                               " avail_out = %u\n",
2619
 
                               d_stream->avail_out));
2620
 
                goto zlib_error;
2621
 
        }
2622
 
 
2623
 
        if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2624
 
                page_zip_fail(("page_zip_decompress_clust:"
2625
 
                               " inflate(Z_FINISH)=%s\n",
2626
 
                               d_stream->msg));
2627
 
zlib_error:
2628
 
                inflateEnd(d_stream);
2629
 
                return(FALSE);
2630
 
        }
2631
 
 
2632
 
        /* Note that d_stream->avail_out > 0 may hold here
2633
 
        if the modification log is nonempty. */
2634
 
 
2635
 
zlib_done:
2636
 
        if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2637
 
                ut_error;
2638
 
        }
2639
 
 
2640
 
        {
2641
 
                page_t* page = page_align(d_stream->next_out);
2642
 
 
2643
 
                /* Clear the unused heap space on the uncompressed page. */
2644
 
                memset(d_stream->next_out, 0,
2645
 
                       page_dir_get_nth_slot(page,
2646
 
                                             page_dir_get_n_slots(page) - 1)
2647
 
                       - d_stream->next_out);
2648
 
        }
2649
 
 
2650
 
#ifdef UNIV_DEBUG
2651
 
        page_zip->m_start = PAGE_DATA + d_stream->total_in;
2652
 
#endif /* UNIV_DEBUG */
2653
 
 
2654
 
        /* Apply the modification log. */
2655
 
        {
2656
 
                const byte*     mod_log_ptr;
2657
 
                mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2658
 
                                                 d_stream->avail_in + 1,
2659
 
                                                 recs, n_dense,
2660
 
                                                 trx_id_col, heap_status,
2661
 
                                                 index, offsets);
2662
 
 
2663
 
                if (UNIV_UNLIKELY(!mod_log_ptr)) {
2664
 
                        return(FALSE);
2665
 
                }
2666
 
                page_zip->m_end = mod_log_ptr - page_zip->data;
2667
 
                page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2668
 
        }
2669
 
 
2670
 
        if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
2671
 
                          + page_zip->m_end >= page_zip_get_size(page_zip))) {
2672
 
 
2673
 
                page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
2674
 
                               (ulong) page_zip_get_trailer_len(
2675
 
                                       page_zip, TRUE, NULL),
2676
 
                               (ulong) page_zip->m_end,
2677
 
                               (ulong) page_zip_get_size(page_zip)));
2678
 
                return(FALSE);
2679
 
        }
2680
 
 
2681
 
        storage = page_zip->data + page_zip_get_size(page_zip)
2682
 
                - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2683
 
 
2684
 
        externs = storage - n_dense
2685
 
                * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2686
 
 
2687
 
        /* Restore the uncompressed columns in heap_no order. */
2688
 
 
2689
 
        for (slot = 0; slot < n_dense; slot++) {
2690
 
                ulint   i;
2691
 
                ulint   len;
2692
 
                byte*   dst;
2693
 
                rec_t*  rec     = recs[slot];
2694
 
                ibool   exists  = !page_zip_dir_find_free(
2695
 
                        page_zip, page_offset(rec));
2696
 
                offsets = rec_get_offsets(rec, index, offsets,
2697
 
                                          ULINT_UNDEFINED, &heap);
2698
 
 
2699
 
                dst = rec_get_nth_field(rec, offsets,
2700
 
                                        trx_id_col, &len);
2701
 
                ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2702
 
                storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
2703
 
                memcpy(dst, storage,
2704
 
                       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2705
 
 
2706
 
                /* Check if there are any externally stored
2707
 
                columns in this record.  For each externally
2708
 
                stored column, restore or clear the
2709
 
                BTR_EXTERN_FIELD_REF. */
2710
 
                if (!rec_offs_any_extern(offsets)) {
2711
 
                        continue;
2712
 
                }
2713
 
 
2714
 
                for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2715
 
                        if (!rec_offs_nth_extern(offsets, i)) {
2716
 
                                continue;
2717
 
                        }
2718
 
                        dst = rec_get_nth_field(rec, offsets, i, &len);
2719
 
 
2720
 
                        if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
2721
 
                                page_zip_fail(("page_zip_decompress_clust:"
2722
 
                                               " %lu < 20\n",
2723
 
                                               (ulong) len));
2724
 
                                return(FALSE);
2725
 
                        }
2726
 
 
2727
 
                        dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2728
 
 
2729
 
                        if (UNIV_LIKELY(exists)) {
2730
 
                                /* Existing record:
2731
 
                                restore the BLOB pointer */
2732
 
                                externs -= BTR_EXTERN_FIELD_REF_SIZE;
2733
 
 
2734
 
                                if (UNIV_UNLIKELY
2735
 
                                    (externs < page_zip->data
2736
 
                                     + page_zip->m_end)) {
2737
 
                                        page_zip_fail(("page_zip_"
2738
 
                                                       "decompress_clust: "
2739
 
                                                       "%p < %p + %lu\n",
2740
 
                                                       (const void*) externs,
2741
 
                                                       (const void*)
2742
 
                                                       page_zip->data,
2743
 
                                                       (ulong)
2744
 
                                                       page_zip->m_end));
2745
 
                                        return(FALSE);
2746
 
                                }
2747
 
 
2748
 
                                memcpy(dst, externs,
2749
 
                                       BTR_EXTERN_FIELD_REF_SIZE);
2750
 
 
2751
 
                                page_zip->n_blobs++;
2752
 
                        } else {
2753
 
                                /* Deleted record:
2754
 
                                clear the BLOB pointer */
2755
 
                                memset(dst, 0,
2756
 
                                       BTR_EXTERN_FIELD_REF_SIZE);
2757
 
                        }
2758
 
                }
2759
 
        }
2760
 
 
2761
 
        return(TRUE);
2762
 
}
2763
 
 
2764
 
/**************************************************************************
2765
 
Decompress a page.  This function should tolerate errors on the compressed
2766
 
page.  Instead of letting assertions fail, it will return FALSE if an
2767
 
inconsistency is detected. */
2768
 
UNIV_INTERN
2769
 
ibool
2770
 
page_zip_decompress(
2771
 
/*================*/
2772
 
                                /* out: TRUE on success, FALSE on failure */
2773
 
        page_zip_des_t* page_zip,/* in: data, ssize;
2774
 
                                out: m_start, m_end, m_nonempty, n_blobs */
2775
 
        page_t*         page)   /* out: uncompressed page, may be trashed */
2776
 
{
2777
 
        z_stream        d_stream;
2778
 
        dict_index_t*   index   = NULL;
2779
 
        rec_t**         recs;   /* dense page directory, sorted by address */
2780
 
        ulint           n_dense;/* number of user records on the page */
2781
 
        ulint           trx_id_col = ULINT_UNDEFINED;
2782
 
        mem_heap_t*     heap;
2783
 
        ulint*          offsets;
2784
 
        ullint          usec = ut_time_us(NULL);
2785
 
 
2786
 
        ut_ad(page_zip_simple_validate(page_zip));
2787
 
        UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
2788
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
2789
 
 
2790
 
        /* The dense directory excludes the infimum and supremum records. */
2791
 
        n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
2792
 
        if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
2793
 
                          >= page_zip_get_size(page_zip))) {
2794
 
                page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
2795
 
                               (ulong) n_dense,
2796
 
                               (ulong) page_zip_get_size(page_zip)));
2797
 
                return(FALSE);
2798
 
        }
2799
 
 
2800
 
        heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
2801
 
        recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs));
2802
 
 
2803
 
#ifdef UNIV_ZIP_DEBUG
2804
 
        /* Clear the page. */
2805
 
        memset(page, 0x55, UNIV_PAGE_SIZE);
2806
 
#endif /* UNIV_ZIP_DEBUG */
2807
 
        UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE);
2808
 
        /* Copy the page header. */
2809
 
        memcpy(page, page_zip->data, PAGE_DATA);
2810
 
 
2811
 
        /* Copy the page directory. */
2812
 
        if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
2813
 
                                               recs + n_dense, n_dense))) {
2814
 
zlib_error:
2815
 
                mem_heap_free(heap);
2816
 
                return(FALSE);
2817
 
        }
2818
 
 
2819
 
        /* Copy the infimum and supremum records. */
2820
 
        memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
2821
 
               infimum_extra, sizeof infimum_extra);
2822
 
        if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
2823
 
                rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2824
 
                                      PAGE_NEW_SUPREMUM);
2825
 
        } else {
2826
 
                rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2827
 
                                      page_zip_dir_get(page_zip, 0)
2828
 
                                      & PAGE_ZIP_DIR_SLOT_MASK);
2829
 
        }
2830
 
        memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
2831
 
        memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
2832
 
               supremum_extra_data, sizeof supremum_extra_data);
2833
 
 
2834
 
        page_zip_set_alloc(&d_stream, heap);
2835
 
 
2836
 
        if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
2837
 
                          != Z_OK)) {
2838
 
                ut_error;
2839
 
        }
2840
 
 
2841
 
        d_stream.next_in = page_zip->data + PAGE_DATA;
2842
 
        /* Subtract the space reserved for
2843
 
        the page header and the end marker of the modification log. */
2844
 
        d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
2845
 
 
2846
 
        d_stream.next_out = page + PAGE_ZIP_START;
2847
 
        d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
2848
 
 
2849
 
        /* Decode the zlib header and the index information. */
2850
 
        if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2851
 
 
2852
 
                page_zip_fail(("page_zip_decompress:"
2853
 
                               " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2854
 
                goto zlib_error;
2855
 
        }
2856
 
 
2857
 
        if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2858
 
 
2859
 
                page_zip_fail(("page_zip_decompress:"
2860
 
                               " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2861
 
                goto zlib_error;
2862
 
        }
2863
 
 
2864
 
        index = page_zip_fields_decode(
2865
 
                page + PAGE_ZIP_START, d_stream.next_out,
2866
 
                page_is_leaf(page) ? &trx_id_col : NULL);
2867
 
 
2868
 
        if (UNIV_UNLIKELY(!index)) {
2869
 
 
2870
 
                goto zlib_error;
2871
 
        }
2872
 
 
2873
 
        /* Decompress the user records. */
2874
 
        page_zip->n_blobs = 0;
2875
 
        d_stream.next_out = page + PAGE_ZIP_START;
2876
 
 
2877
 
        {
2878
 
                /* Pre-allocate the offsets for rec_get_offsets_reverse(). */
2879
 
                ulint   n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
2880
 
                        + dict_index_get_n_fields(index);
2881
 
                offsets = mem_heap_alloc(heap, n * sizeof(ulint));
2882
 
                *offsets = n;
2883
 
        }
2884
 
 
2885
 
        /* Decompress the records in heap_no order. */
2886
 
        if (!page_is_leaf(page)) {
2887
 
                /* This is a node pointer page. */
2888
 
                ulint   info_bits;
2889
 
 
2890
 
                if (UNIV_UNLIKELY
2891
 
                    (!page_zip_decompress_node_ptrs(page_zip, &d_stream,
2892
 
                                                    recs, n_dense, index,
2893
 
                                                    offsets, heap))) {
2894
 
                        goto err_exit;
2895
 
                }
2896
 
 
2897
 
                info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
2898
 
                        ? REC_INFO_MIN_REC_FLAG : 0;
2899
 
 
2900
 
                if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
2901
 
                                                            info_bits))) {
2902
 
                        goto err_exit;
2903
 
                }
2904
 
        } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2905
 
                /* This is a leaf page in a secondary index. */
2906
 
                if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
2907
 
                                                           recs, n_dense,
2908
 
                                                           index, offsets))) {
2909
 
                        goto err_exit;
2910
 
                }
2911
 
 
2912
 
                if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2913
 
                                                            page, 0))) {
2914
 
err_exit:
2915
 
                        page_zip_fields_free(index);
2916
 
                        mem_heap_free(heap);
2917
 
                        return(FALSE);
2918
 
                }
2919
 
        } else {
2920
 
                /* This is a leaf page in a clustered index. */
2921
 
                if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
2922
 
                                                             &d_stream, recs,
2923
 
                                                             n_dense, index,
2924
 
                                                             trx_id_col,
2925
 
                                                             offsets, heap))) {
2926
 
                        goto err_exit;
2927
 
                }
2928
 
 
2929
 
                if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2930
 
                                                            page, 0))) {
2931
 
                        goto err_exit;
2932
 
                }
2933
 
        }
2934
 
 
2935
 
        ut_a(page_is_comp(page));
2936
 
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
2937
 
 
2938
 
        page_zip_fields_free(index);
2939
 
        mem_heap_free(heap);
2940
 
        {
2941
 
                page_zip_stat_t*        zip_stat
2942
 
                        = &page_zip_stat[page_zip->ssize - 1];
2943
 
                zip_stat->decompressed++;
2944
 
                zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
2945
 
        }
2946
 
 
2947
 
        /* Update the stat counter for LRU policy. */
2948
 
        buf_LRU_stat_inc_unzip();
2949
 
 
2950
 
        return(TRUE);
2951
 
}
2952
 
 
2953
 
#ifdef UNIV_ZIP_DEBUG
2954
 
/* Flag: make page_zip_validate() compare page headers only */
2955
 
UNIV_INTERN ibool       page_zip_validate_header_only = FALSE;
2956
 
 
2957
 
/**************************************************************************
2958
 
Check that the compressed and decompressed pages match. */
2959
 
UNIV_INTERN
2960
 
ibool
2961
 
page_zip_validate(
2962
 
/*==============*/
2963
 
                                        /* out: TRUE if valid, FALSE if not */
2964
 
        const page_zip_des_t*   page_zip,/* in: compressed page */
2965
 
        const page_t*           page)   /* in: uncompressed page */
2966
 
{
2967
 
        page_zip_des_t  temp_page_zip;
2968
 
        byte*           temp_page_buf;
2969
 
        page_t*         temp_page;
2970
 
        ibool           valid;
2971
 
 
2972
 
        if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
2973
 
                   FIL_PAGE_LSN - FIL_PAGE_PREV)
2974
 
            || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
2975
 
            || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
2976
 
                      PAGE_DATA - FIL_PAGE_DATA)) {
2977
 
                page_zip_fail(("page_zip_validate: page header\n"));
2978
 
                return(FALSE);
2979
 
        }
2980
 
 
2981
 
        ut_a(page_is_comp(page));
2982
 
 
2983
 
        if (page_zip_validate_header_only) {
2984
 
                return(TRUE);
2985
 
        }
2986
 
 
2987
 
        /* page_zip_decompress() expects the uncompressed page to be
2988
 
        UNIV_PAGE_SIZE aligned. */
2989
 
        temp_page_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
2990
 
        temp_page = ut_align(temp_page_buf, UNIV_PAGE_SIZE);
2991
 
 
2992
 
#ifdef UNIV_DEBUG_VALGRIND
2993
 
        /* Get detailed information on the valid bits in case the
2994
 
        UNIV_MEM_ASSERT_RW() checks fail. */
2995
 
        VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
2996
 
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
2997
 
        VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
2998
 
        UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
2999
 
        VALGRIND_GET_VBITS(page_zip->data, temp_page,
3000
 
                           page_zip_get_size(page_zip));
3001
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3002
 
#endif /* UNIV_DEBUG_VALGRIND */
3003
 
 
3004
 
        temp_page_zip = *page_zip;
3005
 
        valid = page_zip_decompress(&temp_page_zip, temp_page);
3006
 
        if (!valid) {
3007
 
                fputs("page_zip_validate(): failed to decompress\n", stderr);
3008
 
                goto func_exit;
3009
 
        }
3010
 
        if (page_zip->n_blobs != temp_page_zip.n_blobs) {
3011
 
                page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
3012
 
                               page_zip->n_blobs, temp_page_zip.n_blobs));
3013
 
                valid = FALSE;
3014
 
        }
3015
 
#ifdef UNIV_DEBUG
3016
 
        if (page_zip->m_start != temp_page_zip.m_start) {
3017
 
                page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
3018
 
                               page_zip->m_start, temp_page_zip.m_start));
3019
 
                valid = FALSE;
3020
 
        }
3021
 
#endif /* UNIV_DEBUG */
3022
 
        if (page_zip->m_end != temp_page_zip.m_end) {
3023
 
                page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
3024
 
                               page_zip->m_end, temp_page_zip.m_end));
3025
 
                valid = FALSE;
3026
 
        }
3027
 
        if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
3028
 
                page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
3029
 
                               page_zip->m_nonempty,
3030
 
                               temp_page_zip.m_nonempty));
3031
 
                valid = FALSE;
3032
 
        }
3033
 
        if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
3034
 
                   UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
3035
 
                page_zip_fail(("page_zip_validate: content\n"));
3036
 
                valid = FALSE;
3037
 
        }
3038
 
 
3039
 
func_exit:
3040
 
        ut_free(temp_page_buf);
3041
 
        return(valid);
3042
 
}
3043
 
#endif /* UNIV_ZIP_DEBUG */
3044
 
 
3045
 
#ifdef UNIV_DEBUG
3046
 
static
3047
 
ibool
3048
 
page_zip_header_cmp(
3049
 
/*================*/
3050
 
                                        /* out: TRUE */
3051
 
        const page_zip_des_t*   page_zip,/* in: compressed page */
3052
 
        const byte*             page)   /* in: uncompressed page */
3053
 
{
3054
 
        ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3055
 
                      FIL_PAGE_LSN - FIL_PAGE_PREV));
3056
 
        ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
3057
 
                      2));
3058
 
        ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3059
 
                      PAGE_DATA - FIL_PAGE_DATA));
3060
 
 
3061
 
        return(TRUE);
3062
 
}
3063
 
#endif /* UNIV_DEBUG */
3064
 
 
3065
 
/**************************************************************************
3066
 
Write a record on the compressed page that contains externally stored
3067
 
columns.  The data must already have been written to the uncompressed page. */
3068
 
static
3069
 
byte*
3070
 
page_zip_write_rec_ext(
3071
 
/*===================*/
3072
 
                                        /* out: end of modification log */
3073
 
        page_zip_des_t* page_zip,       /* in/out: compressed page */
3074
 
        const page_t*   page,           /* in: page containing rec */
3075
 
        const byte*     rec,            /* in: record being written */
3076
 
        dict_index_t*   index,          /* in: record descriptor */
3077
 
        const ulint*    offsets,        /* in: rec_get_offsets(rec, index) */
3078
 
        ulint           create,         /* in: nonzero=insert, zero=update */
3079
 
        ulint           trx_id_col,     /* in: position of DB_TRX_ID */
3080
 
        ulint           heap_no,        /* in: heap number of rec */
3081
 
        byte*           storage,        /* in: end of dense page directory */
3082
 
        byte*           data)           /* in: end of modification log */
3083
 
{
3084
 
        const byte*     start   = rec;
3085
 
        ulint           i;
3086
 
        ulint           len;
3087
 
        byte*           externs = storage;
3088
 
        ulint           n_ext   = rec_offs_n_extern(offsets);
3089
 
 
3090
 
        ut_ad(rec_offs_validate(rec, index, offsets));
3091
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3092
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3093
 
                           rec_offs_extra_size(offsets));
3094
 
 
3095
 
        externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3096
 
                * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
3097
 
 
3098
 
        /* Note that this will not take into account
3099
 
        the BLOB columns of rec if create==TRUE. */
3100
 
        ut_ad(data + rec_offs_data_size(offsets)
3101
 
              - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3102
 
              - n_ext * BTR_EXTERN_FIELD_REF_SIZE
3103
 
              < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
3104
 
 
3105
 
        {
3106
 
                ulint   blob_no = page_zip_get_n_prev_extern(
3107
 
                        page_zip, rec, index);
3108
 
                byte*   ext_end = externs - page_zip->n_blobs
3109
 
                        * BTR_EXTERN_FIELD_REF_SIZE;
3110
 
                ut_ad(blob_no <= page_zip->n_blobs);
3111
 
                externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
3112
 
 
3113
 
                if (create) {
3114
 
                        page_zip->n_blobs += n_ext;
3115
 
                        ASSERT_ZERO_BLOB(ext_end - n_ext
3116
 
                                         * BTR_EXTERN_FIELD_REF_SIZE);
3117
 
                        memmove(ext_end - n_ext
3118
 
                                * BTR_EXTERN_FIELD_REF_SIZE,
3119
 
                                ext_end,
3120
 
                                externs - ext_end);
3121
 
                }
3122
 
 
3123
 
                ut_a(blob_no + n_ext <= page_zip->n_blobs);
3124
 
        }
3125
 
 
3126
 
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3127
 
                const byte*     src;
3128
 
 
3129
 
                if (UNIV_UNLIKELY(i == trx_id_col)) {
3130
 
                        ut_ad(!rec_offs_nth_extern(offsets,
3131
 
                                                   i));
3132
 
                        ut_ad(!rec_offs_nth_extern(offsets,
3133
 
                                                   i + 1));
3134
 
                        /* Locate trx_id and roll_ptr. */
3135
 
                        src = rec_get_nth_field(rec, offsets,
3136
 
                                                i, &len);
3137
 
                        ut_ad(len == DATA_TRX_ID_LEN);
3138
 
                        ut_ad(src + DATA_TRX_ID_LEN
3139
 
                              == rec_get_nth_field(
3140
 
                                      rec, offsets,
3141
 
                                      i + 1, &len));
3142
 
                        ut_ad(len == DATA_ROLL_PTR_LEN);
3143
 
 
3144
 
                        /* Log the preceding fields. */
3145
 
                        ASSERT_ZERO(data, src - start);
3146
 
                        memcpy(data, start, src - start);
3147
 
                        data += src - start;
3148
 
                        start = src + (DATA_TRX_ID_LEN
3149
 
                                       + DATA_ROLL_PTR_LEN);
3150
 
 
3151
 
                        /* Store trx_id and roll_ptr. */
3152
 
                        memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3153
 
                               * (heap_no - 1),
3154
 
                               src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3155
 
                        i++; /* skip also roll_ptr */
3156
 
                } else if (rec_offs_nth_extern(offsets, i)) {
3157
 
                        src = rec_get_nth_field(rec, offsets,
3158
 
                                                i, &len);
3159
 
 
3160
 
                        ut_ad(dict_index_is_clust(index));
3161
 
                        ut_ad(len
3162
 
                              >= BTR_EXTERN_FIELD_REF_SIZE);
3163
 
                        src += len - BTR_EXTERN_FIELD_REF_SIZE;
3164
 
 
3165
 
                        ASSERT_ZERO(data, src - start);
3166
 
                        memcpy(data, start, src - start);
3167
 
                        data += src - start;
3168
 
                        start = src + BTR_EXTERN_FIELD_REF_SIZE;
3169
 
 
3170
 
                        /* Store the BLOB pointer. */
3171
 
                        externs -= BTR_EXTERN_FIELD_REF_SIZE;
3172
 
                        ut_ad(data < externs);
3173
 
                        memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
3174
 
                }
3175
 
        }
3176
 
 
3177
 
        /* Log the last bytes of the record. */
3178
 
        len = rec_offs_data_size(offsets) - (start - rec);
3179
 
 
3180
 
        ASSERT_ZERO(data, len);
3181
 
        memcpy(data, start, len);
3182
 
        data += len;
3183
 
 
3184
 
        return(data);
3185
 
}
3186
 
 
3187
 
/**************************************************************************
3188
 
Write an entire record on the compressed page.  The data must already
3189
 
have been written to the uncompressed page. */
3190
 
UNIV_INTERN
3191
 
void
3192
 
page_zip_write_rec(
3193
 
/*===============*/
3194
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
3195
 
        const byte*     rec,    /* in: record being written */
3196
 
        dict_index_t*   index,  /* in: the index the record belongs to */
3197
 
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
3198
 
        ulint           create) /* in: nonzero=insert, zero=update */
3199
 
{
3200
 
        const page_t*   page;
3201
 
        byte*           data;
3202
 
        byte*           storage;
3203
 
        ulint           heap_no;
3204
 
        byte*           slot;
3205
 
 
3206
 
        ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3207
 
        ut_ad(page_zip_simple_validate(page_zip));
3208
 
        ut_ad(page_zip_get_size(page_zip)
3209
 
              > PAGE_DATA + page_zip_dir_size(page_zip));
3210
 
        ut_ad(rec_offs_comp(offsets));
3211
 
        ut_ad(rec_offs_validate(rec, index, offsets));
3212
 
 
3213
 
        ut_ad(page_zip->m_start >= PAGE_DATA);
3214
 
 
3215
 
        page = page_align(rec);
3216
 
 
3217
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3218
 
        ut_ad(page_simple_validate_new((page_t*) page));
3219
 
 
3220
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3221
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3222
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3223
 
                           rec_offs_extra_size(offsets));
3224
 
 
3225
 
        slot = page_zip_dir_find(page_zip, page_offset(rec));
3226
 
        ut_a(slot);
3227
 
        /* Copy the delete mark. */
3228
 
        if (rec_get_deleted_flag(rec, TRUE)) {
3229
 
                *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
3230
 
        } else {
3231
 
                *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3232
 
        }
3233
 
 
3234
 
        ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
3235
 
        ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
3236
 
              - PAGE_DIR - PAGE_DIR_SLOT_SIZE
3237
 
              * page_dir_get_n_slots(page));
3238
 
 
3239
 
        heap_no = rec_get_heap_no_new(rec);
3240
 
        ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
3241
 
        ut_ad(heap_no < page_dir_get_n_heap(page));
3242
 
 
3243
 
        /* Append to the modification log. */
3244
 
        data = page_zip->data + page_zip->m_end;
3245
 
        ut_ad(!*data);
3246
 
 
3247
 
        /* Identify the record by writing its heap number - 1.
3248
 
        0 is reserved to indicate the end of the modification log. */
3249
 
 
3250
 
        if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3251
 
                *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3252
 
                ut_ad(!*data);
3253
 
        }
3254
 
        *data++ = (byte) ((heap_no - 1) << 1);
3255
 
        ut_ad(!*data);
3256
 
 
3257
 
        {
3258
 
                const byte*     start   = rec - rec_offs_extra_size(offsets);
3259
 
                const byte*     b       = rec - REC_N_NEW_EXTRA_BYTES;
3260
 
 
3261
 
                /* Write the extra bytes backwards, so that
3262
 
                rec_offs_extra_size() can be easily computed in
3263
 
                page_zip_apply_log() by invoking
3264
 
                rec_get_offsets_reverse(). */
3265
 
 
3266
 
                while (b != start) {
3267
 
                        *data++ = *--b;
3268
 
                        ut_ad(!*data);
3269
 
                }
3270
 
        }
3271
 
 
3272
 
        /* Write the data bytes.  Store the uncompressed bytes separately. */
3273
 
        storage = page_zip->data + page_zip_get_size(page_zip)
3274
 
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3275
 
                * PAGE_ZIP_DIR_SLOT_SIZE;
3276
 
 
3277
 
        if (page_is_leaf(page)) {
3278
 
                ulint           len;
3279
 
 
3280
 
                if (dict_index_is_clust(index)) {
3281
 
                        ulint           trx_id_col;
3282
 
 
3283
 
                        trx_id_col = dict_index_get_sys_col_pos(index,
3284
 
                                                                DATA_TRX_ID);
3285
 
                        ut_ad(trx_id_col != ULINT_UNDEFINED);
3286
 
 
3287
 
                        /* Store separately trx_id, roll_ptr and
3288
 
                        the BTR_EXTERN_FIELD_REF of each BLOB column. */
3289
 
                        if (rec_offs_any_extern(offsets)) {
3290
 
                                data = page_zip_write_rec_ext(
3291
 
                                        page_zip, page,
3292
 
                                        rec, index, offsets, create,
3293
 
                                        trx_id_col, heap_no, storage, data);
3294
 
                        } else {
3295
 
                                /* Locate trx_id and roll_ptr. */
3296
 
                                const byte*     src
3297
 
                                        = rec_get_nth_field(rec, offsets,
3298
 
                                                            trx_id_col, &len);
3299
 
                                ut_ad(len == DATA_TRX_ID_LEN);
3300
 
                                ut_ad(src + DATA_TRX_ID_LEN
3301
 
                                      == rec_get_nth_field(
3302
 
                                              rec, offsets,
3303
 
                                              trx_id_col + 1, &len));
3304
 
                                ut_ad(len == DATA_ROLL_PTR_LEN);
3305
 
 
3306
 
                                /* Log the preceding fields. */
3307
 
                                ASSERT_ZERO(data, src - rec);
3308
 
                                memcpy(data, rec, src - rec);
3309
 
                                data += src - rec;
3310
 
 
3311
 
                                /* Store trx_id and roll_ptr. */
3312
 
                                memcpy(storage
3313
 
                                       - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3314
 
                                       * (heap_no - 1),
3315
 
                                       src,
3316
 
                                       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3317
 
 
3318
 
                                src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
3319
 
 
3320
 
                                /* Log the last bytes of the record. */
3321
 
                                len = rec_offs_data_size(offsets)
3322
 
                                        - (src - rec);
3323
 
 
3324
 
                                ASSERT_ZERO(data, len);
3325
 
                                memcpy(data, src, len);
3326
 
                                data += len;
3327
 
                        }
3328
 
                } else {
3329
 
                        /* Leaf page of a secondary index:
3330
 
                        no externally stored columns */
3331
 
                        ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
3332
 
                              == ULINT_UNDEFINED);
3333
 
                        ut_ad(!rec_offs_any_extern(offsets));
3334
 
 
3335
 
                        /* Log the entire record. */
3336
 
                        len = rec_offs_data_size(offsets);
3337
 
 
3338
 
                        ASSERT_ZERO(data, len);
3339
 
                        memcpy(data, rec, len);
3340
 
                        data += len;
3341
 
                }
3342
 
        } else {
3343
 
                /* This is a node pointer page. */
3344
 
                ulint   len;
3345
 
 
3346
 
                /* Non-leaf nodes should not have any externally
3347
 
                stored columns. */
3348
 
                ut_ad(!rec_offs_any_extern(offsets));
3349
 
 
3350
 
                /* Copy the data bytes, except node_ptr. */
3351
 
                len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
3352
 
                ut_ad(data + len < storage - REC_NODE_PTR_SIZE
3353
 
                      * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
3354
 
                ASSERT_ZERO(data, len);
3355
 
                memcpy(data, rec, len);
3356
 
                data += len;
3357
 
 
3358
 
                /* Copy the node pointer to the uncompressed area. */
3359
 
                memcpy(storage - REC_NODE_PTR_SIZE
3360
 
                       * (heap_no - 1),
3361
 
                       rec + len,
3362
 
                       REC_NODE_PTR_SIZE);
3363
 
        }
3364
 
 
3365
 
        ut_a(!*data);
3366
 
        ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
3367
 
        page_zip->m_end = data - page_zip->data;
3368
 
        page_zip->m_nonempty = TRUE;
3369
 
 
3370
 
#ifdef UNIV_ZIP_DEBUG
3371
 
        ut_a(page_zip_validate(page_zip, page_align(rec)));
3372
 
#endif /* UNIV_ZIP_DEBUG */
3373
 
}
3374
 
 
3375
 
/***************************************************************
3376
 
Parses a log record of writing a BLOB pointer of a record. */
3377
 
UNIV_INTERN
3378
 
byte*
3379
 
page_zip_parse_write_blob_ptr(
3380
 
/*==========================*/
3381
 
                                /* out: end of log record or NULL */
3382
 
        byte*           ptr,    /* in: redo log buffer */
3383
 
        byte*           end_ptr,/* in: redo log buffer end */
3384
 
        page_t*         page,   /* in/out: uncompressed page */
3385
 
        page_zip_des_t* page_zip)/* in/out: compressed page */
3386
 
{
3387
 
        ulint   offset;
3388
 
        ulint   z_offset;
3389
 
 
3390
 
        ut_ad(!page == !page_zip);
3391
 
 
3392
 
        if (UNIV_UNLIKELY
3393
 
            (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
3394
 
 
3395
 
                return(NULL);
3396
 
        }
3397
 
 
3398
 
        offset = mach_read_from_2(ptr);
3399
 
        z_offset = mach_read_from_2(ptr + 2);
3400
 
 
3401
 
        if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3402
 
            || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3403
 
            || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3404
 
corrupt:
3405
 
                recv_sys->found_corrupt_log = TRUE;
3406
 
 
3407
 
                return(NULL);
3408
 
        }
3409
 
 
3410
 
        if (page) {
3411
 
                if (UNIV_UNLIKELY(!page_zip)
3412
 
                    || UNIV_UNLIKELY(!page_is_leaf(page))) {
3413
 
 
3414
 
                        goto corrupt;
3415
 
                }
3416
 
 
3417
 
#ifdef UNIV_ZIP_DEBUG
3418
 
                ut_a(page_zip_validate(page_zip, page));
3419
 
#endif /* UNIV_ZIP_DEBUG */
3420
 
 
3421
 
                memcpy(page + offset,
3422
 
                       ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3423
 
                memcpy(page_zip->data + z_offset,
3424
 
                       ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3425
 
 
3426
 
#ifdef UNIV_ZIP_DEBUG
3427
 
                ut_a(page_zip_validate(page_zip, page));
3428
 
#endif /* UNIV_ZIP_DEBUG */
3429
 
        }
3430
 
 
3431
 
        return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
3432
 
}
3433
 
 
3434
 
/**************************************************************************
3435
 
Write a BLOB pointer of a record on the leaf page of a clustered index.
3436
 
The information must already have been updated on the uncompressed page. */
3437
 
UNIV_INTERN
3438
 
void
3439
 
page_zip_write_blob_ptr(
3440
 
/*====================*/
3441
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
3442
 
        const byte*     rec,    /* in/out: record whose data is being
3443
 
                                written */
3444
 
        dict_index_t*   index,  /* in: index of the page */
3445
 
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
3446
 
        ulint           n,      /* in: column index */
3447
 
        mtr_t*          mtr)    /* in: mini-transaction handle,
3448
 
                                or NULL if no logging is needed */
3449
 
{
3450
 
        const byte*     field;
3451
 
        byte*           externs;
3452
 
        const page_t*   page    = page_align(rec);
3453
 
        ulint           blob_no;
3454
 
        ulint           len;
3455
 
 
3456
 
        ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3457
 
        ut_ad(page_simple_validate_new((page_t*) page));
3458
 
        ut_ad(page_zip_simple_validate(page_zip));
3459
 
        ut_ad(page_zip_get_size(page_zip)
3460
 
              > PAGE_DATA + page_zip_dir_size(page_zip));
3461
 
        ut_ad(rec_offs_comp(offsets));
3462
 
        ut_ad(rec_offs_validate(rec, NULL, offsets));
3463
 
        ut_ad(rec_offs_any_extern(offsets));
3464
 
        ut_ad(rec_offs_nth_extern(offsets, n));
3465
 
 
3466
 
        ut_ad(page_zip->m_start >= PAGE_DATA);
3467
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3468
 
 
3469
 
        ut_ad(page_is_leaf(page));
3470
 
        ut_ad(dict_index_is_clust(index));
3471
 
 
3472
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3473
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3474
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3475
 
                           rec_offs_extra_size(offsets));
3476
 
 
3477
 
        blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
3478
 
                + rec_get_n_extern_new(rec, index, n);
3479
 
        ut_a(blob_no < page_zip->n_blobs);
3480
 
 
3481
 
        externs = page_zip->data + page_zip_get_size(page_zip)
3482
 
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3483
 
                * (PAGE_ZIP_DIR_SLOT_SIZE
3484
 
                   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3485
 
 
3486
 
        field = rec_get_nth_field(rec, offsets, n, &len);
3487
 
 
3488
 
        externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
3489
 
        field += len - BTR_EXTERN_FIELD_REF_SIZE;
3490
 
 
3491
 
        memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
3492
 
 
3493
 
#ifdef UNIV_ZIP_DEBUG
3494
 
        ut_a(page_zip_validate(page_zip, page));
3495
 
#endif /* UNIV_ZIP_DEBUG */
3496
 
 
3497
 
        if (mtr) {
3498
 
                byte*   log_ptr = mlog_open(
3499
 
                        mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
3500
 
                if (UNIV_UNLIKELY(!log_ptr)) {
3501
 
                        return;
3502
 
                }
3503
 
 
3504
 
                log_ptr = mlog_write_initial_log_record_fast(
3505
 
                        (byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
3506
 
                mach_write_to_2(log_ptr, page_offset(field));
3507
 
                log_ptr += 2;
3508
 
                mach_write_to_2(log_ptr, externs - page_zip->data);
3509
 
                log_ptr += 2;
3510
 
                memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
3511
 
                log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
3512
 
                mlog_close(mtr, log_ptr);
3513
 
        }
3514
 
}
3515
 
 
3516
 
/***************************************************************
3517
 
Parses a log record of writing the node pointer of a record. */
3518
 
UNIV_INTERN
3519
 
byte*
3520
 
page_zip_parse_write_node_ptr(
3521
 
/*==========================*/
3522
 
                                /* out: end of log record or NULL */
3523
 
        byte*           ptr,    /* in: redo log buffer */
3524
 
        byte*           end_ptr,/* in: redo log buffer end */
3525
 
        page_t*         page,   /* in/out: uncompressed page */
3526
 
        page_zip_des_t* page_zip)/* in/out: compressed page */
3527
 
{
3528
 
        ulint   offset;
3529
 
        ulint   z_offset;
3530
 
 
3531
 
        ut_ad(!page == !page_zip);
3532
 
 
3533
 
        if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
3534
 
 
3535
 
                return(NULL);
3536
 
        }
3537
 
 
3538
 
        offset = mach_read_from_2(ptr);
3539
 
        z_offset = mach_read_from_2(ptr + 2);
3540
 
 
3541
 
        if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3542
 
            || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3543
 
            || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3544
 
corrupt:
3545
 
                recv_sys->found_corrupt_log = TRUE;
3546
 
 
3547
 
                return(NULL);
3548
 
        }
3549
 
 
3550
 
        if (page) {
3551
 
                byte*   storage_end;
3552
 
                byte*   field;
3553
 
                byte*   storage;
3554
 
                ulint   heap_no;
3555
 
 
3556
 
                if (UNIV_UNLIKELY(!page_zip)
3557
 
                    || UNIV_UNLIKELY(page_is_leaf(page))) {
3558
 
 
3559
 
                        goto corrupt;
3560
 
                }
3561
 
 
3562
 
#ifdef UNIV_ZIP_DEBUG
3563
 
                ut_a(page_zip_validate(page_zip, page));
3564
 
#endif /* UNIV_ZIP_DEBUG */
3565
 
 
3566
 
                field = page + offset;
3567
 
                storage = page_zip->data + z_offset;
3568
 
 
3569
 
                storage_end = page_zip->data + page_zip_get_size(page_zip)
3570
 
                        - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3571
 
                        * PAGE_ZIP_DIR_SLOT_SIZE;
3572
 
 
3573
 
                heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
3574
 
 
3575
 
                if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
3576
 
                    || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
3577
 
                    || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
3578
 
 
3579
 
                        goto corrupt;
3580
 
                }
3581
 
 
3582
 
                memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
3583
 
                memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
3584
 
 
3585
 
#ifdef UNIV_ZIP_DEBUG
3586
 
                ut_a(page_zip_validate(page_zip, page));
3587
 
#endif /* UNIV_ZIP_DEBUG */
3588
 
        }
3589
 
 
3590
 
        return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
3591
 
}
3592
 
 
3593
 
/**************************************************************************
3594
 
Write the node pointer of a record on a non-leaf compressed page. */
3595
 
UNIV_INTERN
3596
 
void
3597
 
page_zip_write_node_ptr(
3598
 
/*====================*/
3599
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
3600
 
        byte*           rec,    /* in/out: record */
3601
 
        ulint           size,   /* in: data size of rec */
3602
 
        ulint           ptr,    /* in: node pointer */
3603
 
        mtr_t*          mtr)    /* in: mini-transaction, or NULL */
3604
 
{
3605
 
        byte*   field;
3606
 
        byte*   storage;
3607
 
        page_t* page    = page_align(rec);
3608
 
 
3609
 
        ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3610
 
        ut_ad(page_simple_validate_new(page));
3611
 
        ut_ad(page_zip_simple_validate(page_zip));
3612
 
        ut_ad(page_zip_get_size(page_zip)
3613
 
              > PAGE_DATA + page_zip_dir_size(page_zip));
3614
 
        ut_ad(page_rec_is_comp(rec));
3615
 
 
3616
 
        ut_ad(page_zip->m_start >= PAGE_DATA);
3617
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3618
 
 
3619
 
        ut_ad(!page_is_leaf(page));
3620
 
 
3621
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3622
 
        UNIV_MEM_ASSERT_RW(rec, size);
3623
 
 
3624
 
        storage = page_zip->data + page_zip_get_size(page_zip)
3625
 
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3626
 
                * PAGE_ZIP_DIR_SLOT_SIZE
3627
 
                - (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
3628
 
        field = rec + size - REC_NODE_PTR_SIZE;
3629
 
 
3630
 
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3631
 
        ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
3632
 
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3633
 
#if REC_NODE_PTR_SIZE != 4
3634
 
# error "REC_NODE_PTR_SIZE != 4"
3635
 
#endif
3636
 
        mach_write_to_4(field, ptr);
3637
 
        memcpy(storage, field, REC_NODE_PTR_SIZE);
3638
 
 
3639
 
        if (mtr) {
3640
 
                byte*   log_ptr = mlog_open(mtr,
3641
 
                                            11 + 2 + 2 + REC_NODE_PTR_SIZE);
3642
 
                if (UNIV_UNLIKELY(!log_ptr)) {
3643
 
                        return;
3644
 
                }
3645
 
 
3646
 
                log_ptr = mlog_write_initial_log_record_fast(
3647
 
                        field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
3648
 
                mach_write_to_2(log_ptr, page_offset(field));
3649
 
                log_ptr += 2;
3650
 
                mach_write_to_2(log_ptr, storage - page_zip->data);
3651
 
                log_ptr += 2;
3652
 
                memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
3653
 
                log_ptr += REC_NODE_PTR_SIZE;
3654
 
                mlog_close(mtr, log_ptr);
3655
 
        }
3656
 
}
3657
 
 
3658
 
/**************************************************************************
3659
 
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
3660
 
UNIV_INTERN
3661
 
void
3662
 
page_zip_write_trx_id_and_roll_ptr(
3663
 
/*===============================*/
3664
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
3665
 
        byte*           rec,    /* in/out: record */
3666
 
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
3667
 
        ulint           trx_id_col,/* in: column number of TRX_ID in rec */
3668
 
        dulint          trx_id, /* in: transaction identifier */
3669
 
        dulint          roll_ptr)/* in: roll_ptr */
3670
 
{
3671
 
        byte*   field;
3672
 
        byte*   storage;
3673
 
        page_t* page    = page_align(rec);
3674
 
        ulint   len;
3675
 
 
3676
 
        ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3677
 
        ut_ad(page_simple_validate_new(page));
3678
 
        ut_ad(page_zip_simple_validate(page_zip));
3679
 
        ut_ad(page_zip_get_size(page_zip)
3680
 
              > PAGE_DATA + page_zip_dir_size(page_zip));
3681
 
        ut_ad(rec_offs_validate(rec, NULL, offsets));
3682
 
        ut_ad(rec_offs_comp(offsets));
3683
 
 
3684
 
        ut_ad(page_zip->m_start >= PAGE_DATA);
3685
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3686
 
 
3687
 
        ut_ad(page_is_leaf(page));
3688
 
 
3689
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3690
 
 
3691
 
        storage = page_zip->data + page_zip_get_size(page_zip)
3692
 
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3693
 
                * PAGE_ZIP_DIR_SLOT_SIZE
3694
 
                - (rec_get_heap_no_new(rec) - 1)
3695
 
                * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3696
 
 
3697
 
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
3698
 
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
3699
 
#endif
3700
 
        field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
3701
 
        ut_ad(len == DATA_TRX_ID_LEN);
3702
 
        ut_ad(field + DATA_TRX_ID_LEN
3703
 
              == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
3704
 
        ut_ad(len == DATA_ROLL_PTR_LEN);
3705
 
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3706
 
        ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
3707
 
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3708
 
#if DATA_TRX_ID_LEN != 6
3709
 
# error "DATA_TRX_ID_LEN != 6"
3710
 
#endif
3711
 
        mach_write_to_6(field, trx_id);
3712
 
#if DATA_ROLL_PTR_LEN != 7
3713
 
# error "DATA_ROLL_PTR_LEN != 7"
3714
 
#endif
3715
 
        mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
3716
 
        memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3717
 
 
3718
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3719
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3720
 
                           rec_offs_extra_size(offsets));
3721
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3722
 
}
3723
 
 
3724
 
#ifdef UNIV_ZIP_DEBUG
3725
 
/* Set this variable in a debugger to disable page_zip_clear_rec().
3726
 
The only observable effect should be the compression ratio due to
3727
 
deleted records not being zeroed out.  In rare cases, there can be
3728
 
page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
3729
 
columns if the space is reallocated for a smaller record. */
3730
 
UNIV_INTERN ibool       page_zip_clear_rec_disable;
3731
 
#endif /* UNIV_ZIP_DEBUG */
3732
 
 
3733
 
/**************************************************************************
3734
 
Clear an area on the uncompressed and compressed page, if possible. */
3735
 
static
3736
 
void
3737
 
page_zip_clear_rec(
3738
 
/*===============*/
3739
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
3740
 
        byte*           rec,    /* in: record to clear */
3741
 
        dict_index_t*   index,  /* in: index of rec */
3742
 
        const ulint*    offsets)/* in: rec_get_offsets(rec, index) */
3743
 
{
3744
 
        ulint   heap_no;
3745
 
        page_t* page    = page_align(rec);
3746
 
        /* page_zip_validate() would fail here if a record
3747
 
        containing externally stored columns is being deleted. */
3748
 
        ut_ad(rec_offs_validate(rec, index, offsets));
3749
 
        ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
3750
 
        ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
3751
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3752
 
 
3753
 
        heap_no = rec_get_heap_no_new(rec);
3754
 
        ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
3755
 
 
3756
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3757
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3758
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3759
 
                           rec_offs_extra_size(offsets));
3760
 
 
3761
 
        if (
3762
 
#ifdef UNIV_ZIP_DEBUG
3763
 
            !page_zip_clear_rec_disable &&
3764
 
#endif /* UNIV_ZIP_DEBUG */
3765
 
            page_zip->m_end
3766
 
            + 1 + ((heap_no - 1) >= 64)/* size of the log entry */
3767
 
            + page_zip_get_trailer_len(page_zip,
3768
 
                                       dict_index_is_clust(index), NULL)
3769
 
            < page_zip_get_size(page_zip)) {
3770
 
                byte*   data;
3771
 
 
3772
 
                /* Clear only the data bytes, because the allocator and
3773
 
                the decompressor depend on the extra bytes. */
3774
 
                memset(rec, 0, rec_offs_data_size(offsets));
3775
 
 
3776
 
                if (!page_is_leaf(page)) {
3777
 
                        /* Clear node_ptr on the compressed page. */
3778
 
                        byte*   storage = page_zip->data
3779
 
                                + page_zip_get_size(page_zip)
3780
 
                                - (page_dir_get_n_heap(page)
3781
 
                                   - PAGE_HEAP_NO_USER_LOW)
3782
 
                                * PAGE_ZIP_DIR_SLOT_SIZE;
3783
 
 
3784
 
                        memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
3785
 
                               0, REC_NODE_PTR_SIZE);
3786
 
                } else if (dict_index_is_clust(index)) {
3787
 
                        /* Clear trx_id and roll_ptr on the compressed page. */
3788
 
                        byte*   storage = page_zip->data
3789
 
                                + page_zip_get_size(page_zip)
3790
 
                                - (page_dir_get_n_heap(page)
3791
 
                                   - PAGE_HEAP_NO_USER_LOW)
3792
 
                                * PAGE_ZIP_DIR_SLOT_SIZE;
3793
 
 
3794
 
                        memset(storage - (heap_no - 1)
3795
 
                               * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
3796
 
                               0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3797
 
                }
3798
 
 
3799
 
                /* Log that the data was zeroed out. */
3800
 
                data = page_zip->data + page_zip->m_end;
3801
 
                ut_ad(!*data);
3802
 
                if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3803
 
                        *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3804
 
                        ut_ad(!*data);
3805
 
                }
3806
 
                *data++ = (byte) ((heap_no - 1) << 1 | 1);
3807
 
                ut_ad(!*data);
3808
 
                ut_ad((ulint) (data - page_zip->data)
3809
 
                      < page_zip_get_size(page_zip));
3810
 
                page_zip->m_end = data - page_zip->data;
3811
 
                page_zip->m_nonempty = TRUE;
3812
 
        } else if (page_is_leaf(page) && dict_index_is_clust(index)) {
3813
 
                /* Do not clear the record, because there is not enough space
3814
 
                to log the operation. */
3815
 
 
3816
 
                if (rec_offs_any_extern(offsets)) {
3817
 
                        ulint   i;
3818
 
 
3819
 
                        for (i = rec_offs_n_fields(offsets); i--; ) {
3820
 
                                /* Clear all BLOB pointers in order to make
3821
 
                                page_zip_validate() pass. */
3822
 
                                if (rec_offs_nth_extern(offsets, i)) {
3823
 
                                        ulint   len;
3824
 
                                        byte*   field = rec_get_nth_field(
3825
 
                                                rec, offsets, i, &len);
3826
 
                                        memset(field + len
3827
 
                                               - BTR_EXTERN_FIELD_REF_SIZE,
3828
 
                                               0, BTR_EXTERN_FIELD_REF_SIZE);
3829
 
                                }
3830
 
                        }
3831
 
                }
3832
 
        }
3833
 
 
3834
 
#ifdef UNIV_ZIP_DEBUG
3835
 
        ut_a(page_zip_validate(page_zip, page));
3836
 
#endif /* UNIV_ZIP_DEBUG */
3837
 
}
3838
 
 
3839
 
/**************************************************************************
3840
 
Write the "deleted" flag of a record on a compressed page.  The flag must
3841
 
already have been written on the uncompressed page. */
3842
 
UNIV_INTERN
3843
 
void
3844
 
page_zip_rec_set_deleted(
3845
 
/*=====================*/
3846
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
3847
 
        const byte*     rec,    /* in: record on the uncompressed page */
3848
 
        ulint           flag)   /* in: the deleted flag (nonzero=TRUE) */
3849
 
{
3850
 
        byte*   slot = page_zip_dir_find(page_zip, page_offset(rec));
3851
 
        ut_a(slot);
3852
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3853
 
        if (flag) {
3854
 
                *slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
3855
 
        } else {
3856
 
                *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3857
 
        }
3858
 
#ifdef UNIV_ZIP_DEBUG
3859
 
        ut_a(page_zip_validate(page_zip, page_align(rec)));
3860
 
#endif /* UNIV_ZIP_DEBUG */
3861
 
}
3862
 
 
3863
 
/**************************************************************************
3864
 
Write the "owned" flag of a record on a compressed page.  The n_owned field
3865
 
must already have been written on the uncompressed page. */
3866
 
UNIV_INTERN
3867
 
void
3868
 
page_zip_rec_set_owned(
3869
 
/*===================*/
3870
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
3871
 
        const byte*     rec,    /* in: record on the uncompressed page */
3872
 
        ulint           flag)   /* in: the owned flag (nonzero=TRUE) */
3873
 
{
3874
 
        byte*   slot = page_zip_dir_find(page_zip, page_offset(rec));
3875
 
        ut_a(slot);
3876
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3877
 
        if (flag) {
3878
 
                *slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
3879
 
        } else {
3880
 
                *slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
3881
 
        }
3882
 
}
3883
 
 
3884
 
/**************************************************************************
3885
 
Insert a record to the dense page directory. */
3886
 
UNIV_INTERN
3887
 
void
3888
 
page_zip_dir_insert(
3889
 
/*================*/
3890
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
3891
 
        const byte*     prev_rec,/* in: record after which to insert */
3892
 
        const byte*     free_rec,/* in: record from which rec was
3893
 
                                allocated, or NULL */
3894
 
        byte*           rec)    /* in: record to insert */
3895
 
{
3896
 
        ulint   n_dense;
3897
 
        byte*   slot_rec;
3898
 
        byte*   slot_free;
3899
 
 
3900
 
        ut_ad(prev_rec != rec);
3901
 
        ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
3902
 
        ut_ad(page_zip_simple_validate(page_zip));
3903
 
 
3904
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3905
 
 
3906
 
        if (page_rec_is_infimum(prev_rec)) {
3907
 
                /* Use the first slot. */
3908
 
                slot_rec = page_zip->data + page_zip_get_size(page_zip);
3909
 
        } else {
3910
 
                byte*   end     = page_zip->data + page_zip_get_size(page_zip);
3911
 
                byte*   start   = end - page_zip_dir_user_size(page_zip);
3912
 
 
3913
 
                if (UNIV_LIKELY(!free_rec)) {
3914
 
                        /* PAGE_N_RECS was already incremented
3915
 
                        in page_cur_insert_rec_zip(), but the
3916
 
                        dense directory slot at that position
3917
 
                        contains garbage.  Skip it. */
3918
 
                        start += PAGE_ZIP_DIR_SLOT_SIZE;
3919
 
                }
3920
 
 
3921
 
                slot_rec = page_zip_dir_find_low(start, end,
3922
 
                                                 page_offset(prev_rec));
3923
 
                ut_a(slot_rec);
3924
 
        }
3925
 
 
3926
 
        /* Read the old n_dense (n_heap may have been incremented). */
3927
 
        n_dense = page_dir_get_n_heap(page_zip->data)
3928
 
                - (PAGE_HEAP_NO_USER_LOW + 1);
3929
 
 
3930
 
        if (UNIV_LIKELY_NULL(free_rec)) {
3931
 
                /* The record was allocated from the free list.
3932
 
                Shift the dense directory only up to that slot.
3933
 
                Note that in this case, n_dense is actually
3934
 
                off by one, because page_cur_insert_rec_zip()
3935
 
                did not increment n_heap. */
3936
 
                ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
3937
 
                      + PAGE_HEAP_NO_USER_LOW);
3938
 
                ut_ad(rec >= free_rec);
3939
 
                slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
3940
 
                ut_ad(slot_free);
3941
 
                slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
3942
 
        } else {
3943
 
                /* The record was allocated from the heap.
3944
 
                Shift the entire dense directory. */
3945
 
                ut_ad(rec_get_heap_no_new(rec) == n_dense
3946
 
                      + PAGE_HEAP_NO_USER_LOW);
3947
 
 
3948
 
                /* Shift to the end of the dense page directory. */
3949
 
                slot_free = page_zip->data + page_zip_get_size(page_zip)
3950
 
                        - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
3951
 
        }
3952
 
 
3953
 
        /* Shift the dense directory to allocate place for rec. */
3954
 
        memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
3955
 
                slot_rec - slot_free);
3956
 
 
3957
 
        /* Write the entry for the inserted record.
3958
 
        The "owned" and "deleted" flags must be zero. */
3959
 
        mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
3960
 
}
3961
 
 
3962
 
/**************************************************************************
3963
 
Shift the dense page directory and the array of BLOB pointers
3964
 
when a record is deleted. */
3965
 
UNIV_INTERN
3966
 
void
3967
 
page_zip_dir_delete(
3968
 
/*================*/
3969
 
        page_zip_des_t* page_zip,/* in/out: compressed page */
3970
 
        byte*           rec,    /* in: record to delete */
3971
 
        dict_index_t*   index,  /* in: index of rec */
3972
 
        const ulint*    offsets,/* in: rec_get_offsets(rec) */
3973
 
        const byte*     free)   /* in: previous start of the free list */
3974
 
{
3975
 
        byte*   slot_rec;
3976
 
        byte*   slot_free;
3977
 
        ulint   n_ext;
3978
 
        page_t* page    = page_align(rec);
3979
 
 
3980
 
        ut_ad(rec_offs_validate(rec, index, offsets));
3981
 
        ut_ad(rec_offs_comp(offsets));
3982
 
 
3983
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3984
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3985
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3986
 
                           rec_offs_extra_size(offsets));
3987
 
 
3988
 
        slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
3989
 
 
3990
 
        ut_a(slot_rec);
3991
 
 
3992
 
        /* This could not be done before page_zip_dir_find(). */
3993
 
        page_header_set_field(page, page_zip, PAGE_N_RECS,
3994
 
                              (ulint)(page_get_n_recs(page) - 1));
3995
 
 
3996
 
        if (UNIV_UNLIKELY(!free)) {
3997
 
                /* Make the last slot the start of the free list. */
3998
 
                slot_free = page_zip->data + page_zip_get_size(page_zip)
3999
 
                        - PAGE_ZIP_DIR_SLOT_SIZE
4000
 
                        * (page_dir_get_n_heap(page_zip->data)
4001
 
                           - PAGE_HEAP_NO_USER_LOW);
4002
 
        } else {
4003
 
                slot_free = page_zip_dir_find_free(page_zip,
4004
 
                                                   page_offset(free));
4005
 
                ut_a(slot_free < slot_rec);
4006
 
                /* Grow the free list by one slot by moving the start. */
4007
 
                slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4008
 
        }
4009
 
 
4010
 
        if (UNIV_LIKELY(slot_rec > slot_free)) {
4011
 
                memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
4012
 
                        slot_free,
4013
 
                        slot_rec - slot_free);
4014
 
        }
4015
 
 
4016
 
        /* Write the entry for the deleted record.
4017
 
        The "owned" and "deleted" flags will be cleared. */
4018
 
        mach_write_to_2(slot_free, page_offset(rec));
4019
 
 
4020
 
        if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
4021
 
                ut_ad(!rec_offs_any_extern(offsets));
4022
 
                goto skip_blobs;
4023
 
        }
4024
 
 
4025
 
        n_ext = rec_offs_n_extern(offsets);
4026
 
        if (UNIV_UNLIKELY(n_ext)) {
4027
 
                /* Shift and zero fill the array of BLOB pointers. */
4028
 
                ulint   blob_no;
4029
 
                byte*   externs;
4030
 
                byte*   ext_end;
4031
 
 
4032
 
                blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
4033
 
                ut_a(blob_no + n_ext <= page_zip->n_blobs);
4034
 
 
4035
 
                externs = page_zip->data + page_zip_get_size(page_zip)
4036
 
                        - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
4037
 
                        * (PAGE_ZIP_DIR_SLOT_SIZE
4038
 
                           + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4039
 
 
4040
 
                ext_end = externs - page_zip->n_blobs
4041
 
                        * BTR_EXTERN_FIELD_REF_SIZE;
4042
 
                externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
4043
 
 
4044
 
                page_zip->n_blobs -= n_ext;
4045
 
                /* Shift and zero fill the array. */
4046
 
                memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
4047
 
                        (page_zip->n_blobs - blob_no)
4048
 
                        * BTR_EXTERN_FIELD_REF_SIZE);
4049
 
                memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
4050
 
        }
4051
 
 
4052
 
skip_blobs:
4053
 
        /* The compression algorithm expects info_bits and n_owned
4054
 
        to be 0 for deleted records. */
4055
 
        rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
4056
 
 
4057
 
        page_zip_clear_rec(page_zip, rec, index, offsets);
4058
 
}
4059
 
 
4060
 
/**************************************************************************
4061
 
Add a slot to the dense page directory. */
4062
 
UNIV_INTERN
4063
 
void
4064
 
page_zip_dir_add_slot(
4065
 
/*==================*/
4066
 
        page_zip_des_t* page_zip,       /* in/out: compressed page */
4067
 
        ulint           is_clustered)   /* in: nonzero for clustered index,
4068
 
                                        zero for others */
4069
 
{
4070
 
        ulint   n_dense;
4071
 
        byte*   dir;
4072
 
        byte*   stored;
4073
 
 
4074
 
        ut_ad(page_is_comp(page_zip->data));
4075
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4076
 
 
4077
 
        /* Read the old n_dense (n_heap has already been incremented). */
4078
 
        n_dense = page_dir_get_n_heap(page_zip->data)
4079
 
                - (PAGE_HEAP_NO_USER_LOW + 1);
4080
 
 
4081
 
        dir = page_zip->data + page_zip_get_size(page_zip)
4082
 
                - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4083
 
 
4084
 
        if (!page_is_leaf(page_zip->data)) {
4085
 
                ut_ad(!page_zip->n_blobs);
4086
 
                stored = dir - n_dense * REC_NODE_PTR_SIZE;
4087
 
        } else if (UNIV_UNLIKELY(is_clustered)) {
4088
 
                /* Move the BLOB pointer array backwards to make space for the
4089
 
                roll_ptr and trx_id columns and the dense directory slot. */
4090
 
                byte*   externs;
4091
 
 
4092
 
                stored = dir - n_dense
4093
 
                        * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4094
 
                externs = stored
4095
 
                        - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4096
 
                ASSERT_ZERO(externs
4097
 
                            - (PAGE_ZIP_DIR_SLOT_SIZE
4098
 
                               + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4099
 
                            PAGE_ZIP_DIR_SLOT_SIZE
4100
 
                            + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4101
 
                memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
4102
 
                                   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4103
 
                        externs, stored - externs);
4104
 
        } else {
4105
 
                stored = dir
4106
 
                        - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4107
 
                ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
4108
 
                            PAGE_ZIP_DIR_SLOT_SIZE);
4109
 
        }
4110
 
 
4111
 
        /* Move the uncompressed area backwards to make space
4112
 
        for one directory slot. */
4113
 
        memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
4114
 
}
4115
 
 
4116
 
/***************************************************************
4117
 
Parses a log record of writing to the header of a page. */
4118
 
UNIV_INTERN
4119
 
byte*
4120
 
page_zip_parse_write_header(
4121
 
/*========================*/
4122
 
                                /* out: end of log record or NULL */
4123
 
        byte*           ptr,    /* in: redo log buffer */
4124
 
        byte*           end_ptr,/* in: redo log buffer end */
4125
 
        page_t*         page,   /* in/out: uncompressed page */
4126
 
        page_zip_des_t* page_zip)/* in/out: compressed page */
4127
 
{
4128
 
        ulint   offset;
4129
 
        ulint   len;
4130
 
 
4131
 
        ut_ad(ptr && end_ptr);
4132
 
        ut_ad(!page == !page_zip);
4133
 
 
4134
 
        if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
4135
 
 
4136
 
                return(NULL);
4137
 
        }
4138
 
 
4139
 
        offset = (ulint) *ptr++;
4140
 
        len = (ulint) *ptr++;
4141
 
 
4142
 
        if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
4143
 
corrupt:
4144
 
                recv_sys->found_corrupt_log = TRUE;
4145
 
 
4146
 
                return(NULL);
4147
 
        }
4148
 
 
4149
 
        if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
4150
 
 
4151
 
                return(NULL);
4152
 
        }
4153
 
 
4154
 
        if (page) {
4155
 
                if (UNIV_UNLIKELY(!page_zip)) {
4156
 
 
4157
 
                        goto corrupt;
4158
 
                }
4159
 
#ifdef UNIV_ZIP_DEBUG
4160
 
                ut_a(page_zip_validate(page_zip, page));
4161
 
#endif /* UNIV_ZIP_DEBUG */
4162
 
 
4163
 
                memcpy(page + offset, ptr, len);
4164
 
                memcpy(page_zip->data + offset, ptr, len);
4165
 
 
4166
 
#ifdef UNIV_ZIP_DEBUG
4167
 
                ut_a(page_zip_validate(page_zip, page));
4168
 
#endif /* UNIV_ZIP_DEBUG */
4169
 
        }
4170
 
 
4171
 
        return(ptr + len);
4172
 
}
4173
 
 
4174
 
/**************************************************************************
4175
 
Write a log record of writing to the uncompressed header portion of a page. */
4176
 
UNIV_INTERN
4177
 
void
4178
 
page_zip_write_header_log(
4179
 
/*======================*/
4180
 
        const byte*     data,   /* in: data on the uncompressed page */
4181
 
        ulint           length, /* in: length of the data */
4182
 
        mtr_t*          mtr)    /* in: mini-transaction */
4183
 
{
4184
 
        byte*   log_ptr = mlog_open(mtr, 11 + 1 + 1);
4185
 
        ulint   offset  = page_offset(data);
4186
 
 
4187
 
        ut_ad(offset < PAGE_DATA);
4188
 
        ut_ad(offset + length < PAGE_DATA);
4189
 
#if PAGE_DATA > 255
4190
 
# error "PAGE_DATA > 255"
4191
 
#endif
4192
 
        ut_ad(length < 256);
4193
 
 
4194
 
        /* If no logging is requested, we may return now */
4195
 
        if (UNIV_UNLIKELY(!log_ptr)) {
4196
 
 
4197
 
                return;
4198
 
        }
4199
 
 
4200
 
        log_ptr = mlog_write_initial_log_record_fast(
4201
 
                (byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
4202
 
        *log_ptr++ = (byte) offset;
4203
 
        *log_ptr++ = (byte) length;
4204
 
        mlog_close(mtr, log_ptr);
4205
 
 
4206
 
        mlog_catenate_string(mtr, data, length);
4207
 
}
4208
 
 
4209
 
/**************************************************************************
4210
 
Reorganize and compress a page.  This is a low-level operation for
4211
 
compressed pages, to be used when page_zip_compress() fails.
4212
 
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
4213
 
The function btr_page_reorganize() should be preferred whenever possible.
4214
 
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
4215
 
non-clustered index, the caller must update the insert buffer free
4216
 
bits in the same mini-transaction in such a way that the modification
4217
 
will be redo-logged. */
4218
 
UNIV_INTERN
4219
 
ibool
4220
 
page_zip_reorganize(
4221
 
/*================*/
4222
 
                                /* out: TRUE on success, FALSE on failure;
4223
 
                                page and page_zip will be left intact
4224
 
                                on failure. */
4225
 
        buf_block_t*    block,  /* in/out: page with compressed page;
4226
 
                                on the compressed page, in: size;
4227
 
                                out: data, n_blobs,
4228
 
                                m_start, m_end, m_nonempty */
4229
 
        dict_index_t*   index,  /* in: index of the B-tree node */
4230
 
        mtr_t*          mtr)    /* in: mini-transaction */
4231
 
{
4232
 
        page_zip_des_t* page_zip        = buf_block_get_page_zip(block);
4233
 
        page_t*         page            = buf_block_get_frame(block);
4234
 
        buf_block_t*    temp_block;
4235
 
        page_t*         temp_page;
4236
 
        ulint           log_mode;
4237
 
 
4238
 
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4239
 
        ut_ad(page_is_comp(page));
4240
 
        /* Note that page_zip_validate(page_zip, page) may fail here. */
4241
 
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
4242
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4243
 
 
4244
 
        /* Disable logging */
4245
 
        log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
4246
 
 
4247
 
        temp_block = buf_block_alloc(0);
4248
 
        temp_page = temp_block->frame;
4249
 
 
4250
 
        btr_search_drop_page_hash_index(block);
4251
 
 
4252
 
        /* Copy the old page to temporary space */
4253
 
        buf_frame_copy(temp_page, page);
4254
 
 
4255
 
        /* Recreate the page: note that global data on page (possible
4256
 
        segment headers, next page-field, etc.) is preserved intact */
4257
 
 
4258
 
        page_create(block, mtr, dict_table_is_comp(index->table));
4259
 
        block->check_index_page_at_flush = TRUE;
4260
 
 
4261
 
        /* Copy the records from the temporary space to the recreated page;
4262
 
        do not copy the lock bits yet */
4263
 
 
4264
 
        page_copy_rec_list_end_no_locks(block, temp_block,
4265
 
                                        page_get_infimum_rec(temp_page),
4266
 
                                        index, mtr);
4267
 
        /* Copy max trx id to recreated page */
4268
 
        page_set_max_trx_id(block, NULL, page_get_max_trx_id(temp_page));
4269
 
 
4270
 
        /* Restore logging. */
4271
 
        mtr_set_log_mode(mtr, log_mode);
4272
 
 
4273
 
        if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
4274
 
 
4275
 
                /* Restore the old page and exit. */
4276
 
                buf_frame_copy(page, temp_page);
4277
 
 
4278
 
                buf_block_free(temp_block);
4279
 
                return(FALSE);
4280
 
        }
4281
 
 
4282
 
        lock_move_reorganize_page(block, temp_block);
4283
 
 
4284
 
        buf_block_free(temp_block);
4285
 
        return(TRUE);
4286
 
}
4287
 
 
4288
 
/**************************************************************************
4289
 
Copy a page byte for byte, except for the file page header and trailer. */
4290
 
UNIV_INTERN
4291
 
void
4292
 
page_zip_copy(
4293
 
/*==========*/
4294
 
        page_zip_des_t*         page_zip,       /* out: copy of src_zip
4295
 
                                                (n_blobs, m_start, m_end,
4296
 
                                                m_nonempty, data[0..size-1]) */
4297
 
        page_t*                 page,           /* out: copy of src */
4298
 
        const page_zip_des_t*   src_zip,        /* in: compressed page */
4299
 
        const page_t*           src,            /* in: page */
4300
 
        dict_index_t*           index,          /* in: index of the B-tree */
4301
 
        mtr_t*                  mtr)            /* in: mini-transaction */
4302
 
{
4303
 
        ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
4304
 
        ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
4305
 
#ifdef UNIV_ZIP_DEBUG
4306
 
        ut_a(page_zip_validate(src_zip, src));
4307
 
#endif /* UNIV_ZIP_DEBUG */
4308
 
        ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
4309
 
        if (UNIV_UNLIKELY(src_zip->n_blobs)) {
4310
 
                ut_a(page_is_leaf(src));
4311
 
                ut_a(dict_index_is_clust(index));
4312
 
        }
4313
 
 
4314
 
        UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
4315
 
        UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
4316
 
        UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
4317
 
        UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
4318
 
 
4319
 
        /* Skip the file page header and trailer. */
4320
 
        memcpy(page + FIL_PAGE_DATA, src + FIL_PAGE_DATA,
4321
 
               UNIV_PAGE_SIZE - FIL_PAGE_DATA
4322
 
               - FIL_PAGE_DATA_END);
4323
 
        memcpy(page_zip->data + FIL_PAGE_DATA,
4324
 
               src_zip->data + FIL_PAGE_DATA,
4325
 
               page_zip_get_size(page_zip) - FIL_PAGE_DATA);
4326
 
 
4327
 
        {
4328
 
                page_zip_t*     data = page_zip->data;
4329
 
                memcpy(page_zip, src_zip, sizeof *page_zip);
4330
 
                page_zip->data = data;
4331
 
        }
4332
 
        ut_ad(page_zip_get_trailer_len(page_zip,
4333
 
                                       dict_index_is_clust(index), NULL)
4334
 
              + page_zip->m_end < page_zip_get_size(page_zip));
4335
 
 
4336
 
        if (!page_is_leaf(src)
4337
 
            && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
4338
 
            && UNIV_LIKELY(mach_read_from_4(page
4339
 
                                            + FIL_PAGE_PREV) != FIL_NULL)) {
4340
 
                /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
4341
 
                ulint   offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
4342
 
                                                 TRUE);
4343
 
                if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
4344
 
                        rec_t*  rec = page + offs;
4345
 
                        ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
4346
 
                             & REC_INFO_MIN_REC_FLAG);
4347
 
                        rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
4348
 
                }
4349
 
        }
4350
 
 
4351
 
#ifdef UNIV_ZIP_DEBUG
4352
 
        ut_a(page_zip_validate(page_zip, page));
4353
 
#endif /* UNIV_ZIP_DEBUG */
4354
 
 
4355
 
        page_zip_compress_write_log(page_zip, page, index, mtr);
4356
 
}
4357
 
 
4358
 
/**************************************************************************
4359
 
Parses a log record of compressing an index page. */
4360
 
UNIV_INTERN
4361
 
byte*
4362
 
page_zip_parse_compress(
4363
 
/*====================*/
4364
 
                                /* out: end of log record or NULL */
4365
 
        byte*           ptr,    /* in: buffer */
4366
 
        byte*           end_ptr,/* in: buffer end */
4367
 
        page_t*         page,   /* out: uncompressed page */
4368
 
        page_zip_des_t* page_zip)/* out: compressed page */
4369
 
{
4370
 
        ulint   size;
4371
 
        ulint   trailer_size;
4372
 
 
4373
 
        ut_ad(ptr && end_ptr);
4374
 
        ut_ad(!page == !page_zip);
4375
 
 
4376
 
        if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
4377
 
 
4378
 
                return(NULL);
4379
 
        }
4380
 
 
4381
 
        size = mach_read_from_2(ptr);
4382
 
        ptr += 2;
4383
 
        trailer_size = mach_read_from_2(ptr);
4384
 
        ptr += 2;
4385
 
 
4386
 
        if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
4387
 
 
4388
 
                return(NULL);
4389
 
        }
4390
 
 
4391
 
        if (page) {
4392
 
                if (UNIV_UNLIKELY(!page_zip)
4393
 
                    || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
4394
 
corrupt:
4395
 
                        recv_sys->found_corrupt_log = TRUE;
4396
 
 
4397
 
                        return(NULL);
4398
 
                }
4399
 
 
4400
 
                memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
4401
 
                memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
4402
 
                memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
4403
 
                memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
4404
 
                       page_zip_get_size(page_zip) - trailer_size
4405
 
                       - (FIL_PAGE_TYPE + size));
4406
 
                memcpy(page_zip->data + page_zip_get_size(page_zip)
4407
 
                       - trailer_size, ptr + 8 + size, trailer_size);
4408
 
 
4409
 
                if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page))) {
4410
 
 
4411
 
                        goto corrupt;
4412
 
                }
4413
 
        }
4414
 
 
4415
 
        return(ptr + 8 + size + trailer_size);
4416
 
}
4417
 
 
4418
 
/**************************************************************************
4419
 
Calculate the compressed page checksum. */
4420
 
UNIV_INTERN
4421
 
ulint
4422
 
page_zip_calc_checksum(
4423
 
/*===================*/
4424
 
                                /* out: page checksum */
4425
 
        const void*     data,   /* in: compressed page */
4426
 
        ulint           size)   /* in: size of compressed page */
4427
 
{
4428
 
        /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
4429
 
        and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
4430
 
 
4431
 
        const Bytef*    s       = data;
4432
 
        uLong           adler;
4433
 
 
4434
 
        ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4435
 
 
4436
 
        adler = adler32(0L, s + FIL_PAGE_OFFSET,
4437
 
                        FIL_PAGE_LSN - FIL_PAGE_OFFSET);
4438
 
        adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
4439
 
        adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
4440
 
                        size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4441
 
 
4442
 
        return((ulint) adler);
4443
 
}