~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/page/page0zip.c

  • Committer: Elan Ruusamäe
  • Date: 2008-12-04 19:38:26 UTC
  • mfrom: (644 drizzle)
  • mto: (641.3.10 devel)
  • mto: This revision was merged to the branch mainline in revision 649.
  • Revision ID: glen@haarber.alkohol.ee-20081204193826-xxyhd45ag121pf3z
- pull from trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/******************************************************
 
2
Compressed page interface
 
3
 
 
4
(c) 2005 Innobase Oy
 
5
 
 
6
Created June 2005 by Marko Makela
 
7
*******************************************************/
 
8
 
 
9
#define THIS_MODULE
 
10
#include "page0zip.h"
 
11
#ifdef UNIV_NONINL
 
12
# include "page0zip.ic"
 
13
#endif
 
14
#undef THIS_MODULE
 
15
#include "page0page.h"
 
16
#include "mtr0log.h"
 
17
#include "ut0sort.h"
 
18
#include "dict0boot.h"
 
19
#include "dict0dict.h"
 
20
#include "btr0sea.h"
 
21
#include "btr0cur.h"
 
22
#include "page0types.h"
 
23
#include "lock0lock.h"
 
24
#include "log0recv.h"
 
25
#include "zlib.h"
 
26
#include "buf0lru.h"
 
27
 
 
28
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
 
29
UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
 
30
 
 
31
/* Please refer to ../include/page0zip.ic for a description of the
 
32
compressed page format. */
 
33
 
 
34
/* The infimum and supremum records are omitted from the compressed page.
 
35
On compress, we compare that the records are there, and on uncompress we
 
36
restore the records. */
 
37
static const byte infimum_extra[] = {
 
38
        0x01,                   /* info_bits=0, n_owned=1 */
 
39
        0x00, 0x02              /* heap_no=0, status=2 */
 
40
        /* ?, ? */              /* next=(first user rec, or supremum) */
 
41
};
 
42
static const byte infimum_data[] = {
 
43
        0x69, 0x6e, 0x66, 0x69,
 
44
        0x6d, 0x75, 0x6d, 0x00  /* "infimum\0" */
 
45
};
 
46
static const byte supremum_extra_data[] = {
 
47
        /* 0x0?, */             /* info_bits=0, n_owned=1..8 */
 
48
        0x00, 0x0b,             /* heap_no=1, status=3 */
 
49
        0x00, 0x00,             /* next=0 */
 
50
        0x73, 0x75, 0x70, 0x72,
 
51
        0x65, 0x6d, 0x75, 0x6d  /* "supremum" */
 
52
};
 
53
 
 
54
/** Assert that a block of memory is filled with zero bytes.
 
55
Compare at most sizeof(field_ref_zero) bytes. */
 
56
#define ASSERT_ZERO(b, s) \
 
57
        ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
 
58
/** Assert that a BLOB pointer is filled with zero bytes. */
 
59
#define ASSERT_ZERO_BLOB(b) \
 
60
        ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
 
61
 
 
62
/* Enable some extra debugging output.  This code can be enabled
 
63
independently of any UNIV_ debugging conditions. */
 
64
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 
65
# include <stdarg.h>
 
66
__attribute__((format (printf, 1, 2)))
 
67
/**************************************************************************
 
68
Report a failure to decompress or compress. */
 
69
static
 
70
int
 
71
page_zip_fail_func(
 
72
/*===============*/
 
73
                                /* out: number of characters printed */
 
74
        const char*     fmt,    /* in: printf(3) format string */
 
75
        ...)                    /* in: arguments corresponding to fmt */
 
76
{
 
77
        int     res;
 
78
        va_list ap;
 
79
 
 
80
        ut_print_timestamp(stderr);
 
81
        fputs("  InnoDB: ", stderr);
 
82
        va_start(ap, fmt);
 
83
        res = vfprintf(stderr, fmt, ap);
 
84
        va_end(ap);
 
85
 
 
86
        return(res);
 
87
}
 
88
# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
 
89
#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 
90
# define page_zip_fail(fmt_args) /* empty */
 
91
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 
92
 
 
93
/**************************************************************************
 
94
Determine the guaranteed free space on an empty page. */
 
95
UNIV_INTERN
 
96
ulint
 
97
page_zip_empty_size(
 
98
/*================*/
 
99
                                /* out: minimum payload size on the page */
 
100
        ulint   n_fields,       /* in: number of columns in the index */
 
101
        ulint   zip_size)       /* in: compressed page size in bytes */
 
102
{
 
103
        lint    size = zip_size
 
104
                /* subtract the page header and the longest
 
105
                uncompressed data needed for one record */
 
106
                - (PAGE_DATA
 
107
                   + PAGE_ZIP_DIR_SLOT_SIZE
 
108
                   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
 
109
                   + 1/* encoded heap_no==2 in page_zip_write_rec() */
 
110
                   + 1/* end of modification log */
 
111
                   - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
 
112
                /* subtract the space for page_zip_fields_encode() */
 
113
                - compressBound(2 * (n_fields + 1));
 
114
        return(size > 0 ? (ulint) size : 0);
 
115
}
 
116
 
 
117
/*****************************************************************
 
118
Gets the size of the compressed page trailer (the dense page directory),
 
119
including deleted records (the free list). */
 
120
UNIV_INLINE
 
121
ulint
 
122
page_zip_dir_size(
 
123
/*==============*/
 
124
                                                /* out: length of dense page
 
125
                                                directory, in bytes */
 
126
        const page_zip_des_t*   page_zip)       /* in: compressed page */
 
127
{
 
128
        /* Exclude the page infimum and supremum from the record count. */
 
129
        ulint   size = PAGE_ZIP_DIR_SLOT_SIZE
 
130
                * (page_dir_get_n_heap(page_zip->data)
 
131
                   - PAGE_HEAP_NO_USER_LOW);
 
132
        return(size);
 
133
}
 
134
 
 
135
/*****************************************************************
 
136
Gets the size of the compressed page trailer (the dense page directory),
 
137
only including user records (excluding the free list). */
 
138
UNIV_INLINE
 
139
ulint
 
140
page_zip_dir_user_size(
 
141
/*===================*/
 
142
                                                /* out: length of dense page
 
143
                                                directory comprising existing
 
144
                                                records, in bytes */
 
145
        const page_zip_des_t*   page_zip)       /* in: compressed page */
 
146
{
 
147
        ulint   size = PAGE_ZIP_DIR_SLOT_SIZE
 
148
                * page_get_n_recs(page_zip->data);
 
149
        ut_ad(size <= page_zip_dir_size(page_zip));
 
150
        return(size);
 
151
}
 
152
 
 
153
/*****************************************************************
 
154
Find the slot of the given record in the dense page directory. */
 
155
UNIV_INLINE
 
156
byte*
 
157
page_zip_dir_find_low(
 
158
/*==================*/
 
159
                                        /* out: dense directory slot,
 
160
                                        or NULL if record not found */
 
161
        byte*   slot,                   /* in: start of records */
 
162
        byte*   end,                    /* in: end of records */
 
163
        ulint   offset)                 /* in: offset of user record */
 
164
{
 
165
        ut_ad(slot <= end);
 
166
 
 
167
        for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
 
168
                if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
 
169
                    == offset) {
 
170
                        return(slot);
 
171
                }
 
172
        }
 
173
 
 
174
        return(NULL);
 
175
}
 
176
 
 
177
/*****************************************************************
 
178
Find the slot of the given non-free record in the dense page directory. */
 
179
UNIV_INLINE
 
180
byte*
 
181
page_zip_dir_find(
 
182
/*==============*/
 
183
                                                /* out: dense directory slot,
 
184
                                                or NULL if record not found */
 
185
        page_zip_des_t* page_zip,               /* in: compressed page */
 
186
        ulint           offset)                 /* in: offset of user record */
 
187
{
 
188
        byte*   end     = page_zip->data + page_zip_get_size(page_zip);
 
189
 
 
190
        ut_ad(page_zip_simple_validate(page_zip));
 
191
 
 
192
        return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
 
193
                                     end,
 
194
                                     offset));
 
195
}
 
196
 
 
197
/*****************************************************************
 
198
Find the slot of the given free record in the dense page directory. */
 
199
UNIV_INLINE
 
200
byte*
 
201
page_zip_dir_find_free(
 
202
/*===================*/
 
203
                                                /* out: dense directory slot,
 
204
                                                or NULL if record not found */
 
205
        page_zip_des_t* page_zip,               /* in: compressed page */
 
206
        ulint           offset)                 /* in: offset of user record */
 
207
{
 
208
        byte*   end     = page_zip->data + page_zip_get_size(page_zip);
 
209
 
 
210
        ut_ad(page_zip_simple_validate(page_zip));
 
211
 
 
212
        return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
 
213
                                     end - page_zip_dir_user_size(page_zip),
 
214
                                     offset));
 
215
}
 
216
 
 
217
/*****************************************************************
 
218
Read a given slot in the dense page directory. */
 
219
UNIV_INLINE
 
220
ulint
 
221
page_zip_dir_get(
 
222
/*=============*/
 
223
                                                /* out: record offset
 
224
                                                on the uncompressed page,
 
225
                                                possibly ORed with
 
226
                                                PAGE_ZIP_DIR_SLOT_DEL or
 
227
                                                PAGE_ZIP_DIR_SLOT_OWNED */
 
228
        const page_zip_des_t*   page_zip,       /* in: compressed page */
 
229
        ulint                   slot)           /* in: slot
 
230
                                                (0=first user record) */
 
231
{
 
232
        ut_ad(page_zip_simple_validate(page_zip));
 
233
        ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
 
234
        return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
 
235
                                - PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
 
236
}
 
237
 
 
238
/**************************************************************************
 
239
Write a log record of compressing an index page. */
 
240
static
 
241
void
 
242
page_zip_compress_write_log(
 
243
/*========================*/
 
244
        const page_zip_des_t*   page_zip,/* in: compressed page */
 
245
        const page_t*           page,   /* in: uncompressed page */
 
246
        dict_index_t*           index,  /* in: index of the B-tree node */
 
247
        mtr_t*                  mtr)    /* in: mini-transaction */
 
248
{
 
249
        byte*   log_ptr;
 
250
        ulint   trailer_size;
 
251
 
 
252
        log_ptr = mlog_open(mtr, 11 + 2 + 2);
 
253
 
 
254
        if (!log_ptr) {
 
255
 
 
256
                return;
 
257
        }
 
258
 
 
259
        /* Read the number of user records. */
 
260
        trailer_size = page_dir_get_n_heap(page_zip->data)
 
261
                - PAGE_HEAP_NO_USER_LOW;
 
262
        /* Multiply by uncompressed of size stored per record */
 
263
        if (!page_is_leaf(page)) {
 
264
                trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
 
265
        } else if (dict_index_is_clust(index)) {
 
266
                trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
 
267
                        + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 
268
        } else {
 
269
                trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
 
270
        }
 
271
        /* Add the space occupied by BLOB pointers. */
 
272
        trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
 
273
        ut_a(page_zip->m_end > PAGE_DATA);
 
274
#if FIL_PAGE_DATA > PAGE_DATA
 
275
# error "FIL_PAGE_DATA > PAGE_DATA"
 
276
#endif
 
277
        ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
 
278
 
 
279
        log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
 
280
                                                     MLOG_ZIP_PAGE_COMPRESS,
 
281
                                                     log_ptr, mtr);
 
282
        mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
 
283
        log_ptr += 2;
 
284
        mach_write_to_2(log_ptr, trailer_size);
 
285
        log_ptr += 2;
 
286
        mlog_close(mtr, log_ptr);
 
287
 
 
288
        /* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
 
289
        mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
 
290
        mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
 
291
        /* Write most of the page header, the compressed stream and
 
292
        the modification log. */
 
293
        mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
 
294
                             page_zip->m_end - FIL_PAGE_TYPE);
 
295
        /* Write the uncompressed trailer of the compressed page. */
 
296
        mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
 
297
                             - trailer_size, trailer_size);
 
298
}
 
299
 
 
300
/**********************************************************
 
301
Determine how many externally stored columns are contained
 
302
in existing records with smaller heap_no than rec. */
 
303
static
 
304
ulint
 
305
page_zip_get_n_prev_extern(
 
306
/*=======================*/
 
307
        const page_zip_des_t*   page_zip,/* in: dense page directory on
 
308
                                        compressed page */
 
309
        const rec_t*            rec,    /* in: compact physical record
 
310
                                        on a B-tree leaf page */
 
311
        dict_index_t*           index)  /* in: record descriptor */
 
312
{
 
313
        const page_t*   page    = page_align(rec);
 
314
        ulint           n_ext   = 0;
 
315
        ulint           i;
 
316
        ulint           left;
 
317
        ulint           heap_no;
 
318
        ulint           n_recs  = page_get_n_recs(page_zip->data);
 
319
 
 
320
        ut_ad(page_is_leaf(page));
 
321
        ut_ad(page_is_comp(page));
 
322
        ut_ad(dict_table_is_comp(index->table));
 
323
        ut_ad(dict_index_is_clust(index));
 
324
 
 
325
        heap_no = rec_get_heap_no_new(rec);
 
326
        ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
 
327
        left = heap_no - PAGE_HEAP_NO_USER_LOW;
 
328
        if (UNIV_UNLIKELY(!left)) {
 
329
                return(0);
 
330
        }
 
331
 
 
332
        for (i = 0; i < n_recs; i++) {
 
333
                const rec_t*    r       = page + (page_zip_dir_get(page_zip, i)
 
334
                                                  & PAGE_ZIP_DIR_SLOT_MASK);
 
335
 
 
336
                if (rec_get_heap_no_new(r) < heap_no) {
 
337
                        n_ext += rec_get_n_extern_new(r, index,
 
338
                                                      ULINT_UNDEFINED);
 
339
                        if (!--left) {
 
340
                                break;
 
341
                        }
 
342
                }
 
343
        }
 
344
 
 
345
        return(n_ext);
 
346
}
 
347
 
 
348
/**************************************************************************
 
349
Encode the length of a fixed-length column. */
 
350
static
 
351
byte*
 
352
page_zip_fixed_field_encode(
 
353
/*========================*/
 
354
                        /* out: buf + length of encoded val */
 
355
        byte*   buf,    /* in: pointer to buffer where to write */
 
356
        ulint   val)    /* in: value to write */
 
357
{
 
358
        ut_ad(val >= 2);
 
359
 
 
360
        if (UNIV_LIKELY(val < 126)) {
 
361
                /*
 
362
                0 = nullable variable field of at most 255 bytes length;
 
363
                1 = not null variable field of at most 255 bytes length;
 
364
                126 = nullable variable field with maximum length >255;
 
365
                127 = not null variable field with maximum length >255
 
366
                */
 
367
                *buf++ = (byte) val;
 
368
        } else {
 
369
                *buf++ = (byte) (0x80 | val >> 8);
 
370
                *buf++ = (byte) val;
 
371
        }
 
372
 
 
373
        return(buf);
 
374
}
 
375
 
 
376
/**************************************************************************
 
377
Write the index information for the compressed page. */
 
378
static
 
379
ulint
 
380
page_zip_fields_encode(
 
381
/*===================*/
 
382
                                /* out: used size of buf */
 
383
        ulint           n,      /* in: number of fields to compress */
 
384
        dict_index_t*   index,  /* in: index comprising at least n fields */
 
385
        ulint           trx_id_pos,/* in: position of the trx_id column
 
386
                                in the index, or ULINT_UNDEFINED if
 
387
                                this is a non-leaf page */
 
388
        byte*           buf)    /* out: buffer of (n + 1) * 2 bytes */
 
389
{
 
390
        const byte*     buf_start       = buf;
 
391
        ulint           i;
 
392
        ulint           col;
 
393
        ulint           trx_id_col      = 0;
 
394
        /* sum of lengths of preceding non-nullable fixed fields, or 0 */
 
395
        ulint           fixed_sum       = 0;
 
396
 
 
397
        ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
 
398
 
 
399
        for (i = col = 0; i < n; i++) {
 
400
                dict_field_t*   field = dict_index_get_nth_field(index, i);
 
401
                ulint           val;
 
402
 
 
403
                if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
 
404
                        val = 1; /* set the "not nullable" flag */
 
405
                } else {
 
406
                        val = 0; /* nullable field */
 
407
                }
 
408
 
 
409
                if (!field->fixed_len) {
 
410
                        /* variable-length field */
 
411
                        const dict_col_t*       column
 
412
                                = dict_field_get_col(field);
 
413
 
 
414
                        if (UNIV_UNLIKELY(column->len > 255)
 
415
                            || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
 
416
                                val |= 0x7e; /* max > 255 bytes */
 
417
                        }
 
418
 
 
419
                        if (fixed_sum) {
 
420
                                /* write out the length of any
 
421
                                preceding non-nullable fields */
 
422
                                buf = page_zip_fixed_field_encode(
 
423
                                        buf, fixed_sum << 1 | 1);
 
424
                                fixed_sum = 0;
 
425
                                col++;
 
426
                        }
 
427
 
 
428
                        *buf++ = (byte) val;
 
429
                        col++;
 
430
                } else if (val) {
 
431
                        /* fixed-length non-nullable field */
 
432
 
 
433
                        if (fixed_sum && UNIV_UNLIKELY
 
434
                            (fixed_sum + field->fixed_len
 
435
                             > DICT_MAX_INDEX_COL_LEN)) {
 
436
                                /* Write out the length of the
 
437
                                preceding non-nullable fields,
 
438
                                to avoid exceeding the maximum
 
439
                                length of a fixed-length column. */
 
440
                                buf = page_zip_fixed_field_encode(
 
441
                                        buf, fixed_sum << 1 | 1);
 
442
                                fixed_sum = 0;
 
443
                                col++;
 
444
                        }
 
445
 
 
446
                        if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
 
447
                                if (fixed_sum) {
 
448
                                        /* Write out the length of any
 
449
                                        preceding non-nullable fields,
 
450
                                        and start a new trx_id column. */
 
451
                                        buf = page_zip_fixed_field_encode(
 
452
                                                buf, fixed_sum << 1 | 1);
 
453
                                        col++;
 
454
                                }
 
455
 
 
456
                                trx_id_col = col;
 
457
                                fixed_sum = field->fixed_len;
 
458
                        } else {
 
459
                                /* add to the sum */
 
460
                                fixed_sum += field->fixed_len;
 
461
                        }
 
462
                } else {
 
463
                        /* fixed-length nullable field */
 
464
 
 
465
                        if (fixed_sum) {
 
466
                                /* write out the length of any
 
467
                                preceding non-nullable fields */
 
468
                                buf = page_zip_fixed_field_encode(
 
469
                                        buf, fixed_sum << 1 | 1);
 
470
                                fixed_sum = 0;
 
471
                                col++;
 
472
                        }
 
473
 
 
474
                        buf = page_zip_fixed_field_encode(
 
475
                                buf, field->fixed_len << 1);
 
476
                        col++;
 
477
                }
 
478
        }
 
479
 
 
480
        if (fixed_sum) {
 
481
                /* Write out the lengths of last fixed-length columns. */
 
482
                buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
 
483
        }
 
484
 
 
485
        if (trx_id_pos != ULINT_UNDEFINED) {
 
486
                /* Write out the position of the trx_id column */
 
487
                i = trx_id_col;
 
488
        } else {
 
489
                /* Write out the number of nullable fields */
 
490
                i = index->n_nullable;
 
491
        }
 
492
 
 
493
        if (i < 128) {
 
494
                *buf++ = (byte) i;
 
495
        } else {
 
496
                *buf++ = (byte) (0x80 | i >> 8);
 
497
                *buf++ = (byte) i;
 
498
        }
 
499
 
 
500
        ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
 
501
        return((ulint) (buf - buf_start));
 
502
}
 
503
 
 
504
/**************************************************************************
 
505
Populate the dense page directory from the sparse directory. */
 
506
static
 
507
void
 
508
page_zip_dir_encode(
 
509
/*================*/
 
510
        const page_t*   page,   /* in: compact page */
 
511
        byte*           buf,    /* in: pointer to dense page directory[-1];
 
512
                                out: dense directory on compressed page */
 
513
        const rec_t**   recs)   /* in: pointer to an array of 0, or NULL;
 
514
                                out: dense page directory sorted by ascending
 
515
                                address (and heap_no) */
 
516
{
 
517
        const byte*     rec;
 
518
        ulint           status;
 
519
        ulint           min_mark;
 
520
        ulint           heap_no;
 
521
        ulint           i;
 
522
        ulint           n_heap;
 
523
        ulint           offs;
 
524
 
 
525
        min_mark = 0;
 
526
 
 
527
        if (page_is_leaf(page)) {
 
528
                status = REC_STATUS_ORDINARY;
 
529
        } else {
 
530
                status = REC_STATUS_NODE_PTR;
 
531
                if (UNIV_UNLIKELY
 
532
                    (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
 
533
                        min_mark = REC_INFO_MIN_REC_FLAG;
 
534
                }
 
535
        }
 
536
 
 
537
        n_heap = page_dir_get_n_heap(page);
 
538
 
 
539
        /* Traverse the list of stored records in the collation order,
 
540
        starting from the first user record. */
 
541
 
 
542
        rec = page + PAGE_NEW_INFIMUM, TRUE;
 
543
 
 
544
        i = 0;
 
545
 
 
546
        for (;;) {
 
547
                ulint   info_bits;
 
548
                offs = rec_get_next_offs(rec, TRUE);
 
549
                if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
 
550
                        break;
 
551
                }
 
552
                rec = page + offs;
 
553
                heap_no = rec_get_heap_no_new(rec);
 
554
                ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
 
555
                ut_a(heap_no < n_heap);
 
556
                ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
 
557
                ut_a(offs >= PAGE_ZIP_START);
 
558
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
 
559
# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
 
560
#endif
 
561
#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
 
562
# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
 
563
#endif
 
564
                if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
 
565
                        offs |= PAGE_ZIP_DIR_SLOT_OWNED;
 
566
                }
 
567
 
 
568
                info_bits = rec_get_info_bits(rec, TRUE);
 
569
                if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
 
570
                        info_bits &= ~REC_INFO_DELETED_FLAG;
 
571
                        offs |= PAGE_ZIP_DIR_SLOT_DEL;
 
572
                }
 
573
                ut_a(info_bits == min_mark);
 
574
                /* Only the smallest user record can have
 
575
                REC_INFO_MIN_REC_FLAG set. */
 
576
                min_mark = 0;
 
577
 
 
578
                mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
 
579
 
 
580
                if (UNIV_LIKELY_NULL(recs)) {
 
581
                        /* Ensure that each heap_no occurs at most once. */
 
582
                        ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
 
583
                        /* exclude infimum and supremum */
 
584
                        recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
 
585
                }
 
586
 
 
587
                ut_a(rec_get_status(rec) == status);
 
588
        }
 
589
 
 
590
        offs = page_header_get_field(page, PAGE_FREE);
 
591
 
 
592
        /* Traverse the free list (of deleted records). */
 
593
        while (offs) {
 
594
                ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
 
595
                rec = page + offs;
 
596
 
 
597
                heap_no = rec_get_heap_no_new(rec);
 
598
                ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
 
599
                ut_a(heap_no < n_heap);
 
600
 
 
601
                ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
 
602
                ut_a(rec_get_status(rec) == status);
 
603
 
 
604
                mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
 
605
 
 
606
                if (UNIV_LIKELY_NULL(recs)) {
 
607
                        /* Ensure that each heap_no occurs at most once. */
 
608
                        ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
 
609
                        /* exclude infimum and supremum */
 
610
                        recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
 
611
                }
 
612
 
 
613
                offs = rec_get_next_offs(rec, TRUE);
 
614
        }
 
615
 
 
616
        /* Ensure that each heap no occurs at least once. */
 
617
        ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
 
618
}
 
619
 
 
620
/**************************************************************************
 
621
Allocate memory for zlib. */
 
622
static
 
623
void*
 
624
page_zip_malloc(
 
625
/*============*/
 
626
        void*   opaque,
 
627
        uInt    items,
 
628
        uInt    size)
 
629
{
 
630
        return(mem_heap_alloc(opaque, items * size));
 
631
}
 
632
 
 
633
/**************************************************************************
 
634
Deallocate memory for zlib. */
 
635
static
 
636
void
 
637
page_zip_free(
 
638
/*==========*/
 
639
        void*   opaque __attribute__((unused)),
 
640
        void*   address __attribute__((unused)))
 
641
{
 
642
}
 
643
 
 
644
/**************************************************************************
 
645
Configure the zlib allocator to use the given memory heap. */
 
646
UNIV_INTERN
 
647
void
 
648
page_zip_set_alloc(
 
649
/*===============*/
 
650
        void*           stream,         /* in/out: zlib stream */
 
651
        mem_heap_t*     heap)           /* in: memory heap to use */
 
652
{
 
653
        z_stream*       strm = stream;
 
654
 
 
655
        strm->zalloc = page_zip_malloc;
 
656
        strm->zfree = page_zip_free;
 
657
        strm->opaque = heap;
 
658
}
 
659
 
 
660
#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 
661
# define PAGE_ZIP_COMPRESS_DBG
 
662
#endif
 
663
 
 
664
#ifdef PAGE_ZIP_COMPRESS_DBG
 
665
/* Set this variable in a debugger to enable
 
666
excessive logging in page_zip_compress(). */
 
667
UNIV_INTERN ibool       page_zip_compress_dbg;
 
668
/* Set this variable in a debugger to enable
 
669
binary logging of the data passed to deflate().
 
670
When this variable is nonzero, it will act
 
671
as a log file name generator. */
 
672
UNIV_INTERN unsigned    page_zip_compress_log;
 
673
 
 
674
/**************************************************************************
 
675
Wrapper for deflate().  Log the operation if page_zip_compress_dbg is set. */
 
676
static
 
677
ibool
 
678
page_zip_compress_deflate(
 
679
/*======================*/
 
680
        FILE*           logfile,/* in: log file, or NULL */
 
681
        z_streamp       strm,   /* in/out: compressed stream for deflate() */
 
682
        int             flush)  /* in: deflate() flushing method */
 
683
{
 
684
        int     status;
 
685
        if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
 
686
                ut_print_buf(stderr, strm->next_in, strm->avail_in);
 
687
        }
 
688
        if (UNIV_LIKELY_NULL(logfile)) {
 
689
                fwrite(strm->next_in, 1, strm->avail_in, logfile);
 
690
        }
 
691
        status = deflate(strm, flush);
 
692
        if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
 
693
                fprintf(stderr, " -> %d\n", status);
 
694
        }
 
695
        return(status);
 
696
}
 
697
 
 
698
/* Redefine deflate(). */
 
699
# undef deflate
 
700
# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
 
701
# define FILE_LOGFILE FILE* logfile,
 
702
# define LOGFILE logfile,
 
703
#else /* PAGE_ZIP_COMPRESS_DBG */
 
704
# define FILE_LOGFILE
 
705
# define LOGFILE
 
706
#endif /* PAGE_ZIP_COMPRESS_DBG */
 
707
 
 
708
/**************************************************************************
 
709
Compress the records of a node pointer page. */
 
710
static
 
711
int
 
712
page_zip_compress_node_ptrs(
 
713
/*========================*/
 
714
                                        /* out: Z_OK, or a zlib error code */
 
715
        FILE_LOGFILE
 
716
        z_stream*       c_stream,       /* in/out: compressed page stream */
 
717
        const rec_t**   recs,           /* in: dense page directory
 
718
                                        sorted by address */
 
719
        ulint           n_dense,        /* in: size of recs[] */
 
720
        dict_index_t*   index,          /* in: the index of the page */
 
721
        byte*           storage,        /* in: end of dense page directory */
 
722
        mem_heap_t*     heap)           /* in: temporary memory heap */
 
723
{
 
724
        int     err     = Z_OK;
 
725
        ulint*  offsets = NULL;
 
726
 
 
727
        do {
 
728
                const rec_t*    rec = *recs++;
 
729
 
 
730
                offsets = rec_get_offsets(rec, index, offsets,
 
731
                                          ULINT_UNDEFINED, &heap);
 
732
                /* Only leaf nodes may contain externally stored columns. */
 
733
                ut_ad(!rec_offs_any_extern(offsets));
 
734
 
 
735
                UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
736
                UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
737
                                   rec_offs_extra_size(offsets));
 
738
 
 
739
                /* Compress the extra bytes. */
 
740
                c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
 
741
                        - c_stream->next_in;
 
742
 
 
743
                if (c_stream->avail_in) {
 
744
                        err = deflate(c_stream, Z_NO_FLUSH);
 
745
                        if (UNIV_UNLIKELY(err != Z_OK)) {
 
746
                                break;
 
747
                        }
 
748
                }
 
749
                ut_ad(!c_stream->avail_in);
 
750
 
 
751
                /* Compress the data bytes, except node_ptr. */
 
752
                c_stream->next_in = (byte*) rec;
 
753
                c_stream->avail_in = rec_offs_data_size(offsets)
 
754
                        - REC_NODE_PTR_SIZE;
 
755
                ut_ad(c_stream->avail_in);
 
756
 
 
757
                err = deflate(c_stream, Z_NO_FLUSH);
 
758
                if (UNIV_UNLIKELY(err != Z_OK)) {
 
759
                        break;
 
760
                }
 
761
 
 
762
                ut_ad(!c_stream->avail_in);
 
763
 
 
764
                memcpy(storage - REC_NODE_PTR_SIZE
 
765
                       * (rec_get_heap_no_new(rec) - 1),
 
766
                       c_stream->next_in, REC_NODE_PTR_SIZE);
 
767
                c_stream->next_in += REC_NODE_PTR_SIZE;
 
768
        } while (--n_dense);
 
769
 
 
770
        return(err);
 
771
}
 
772
 
 
773
/**************************************************************************
 
774
Compress the records of a leaf node of a secondary index. */
 
775
static
 
776
int
 
777
page_zip_compress_sec(
 
778
/*==================*/
 
779
                                        /* out: Z_OK, or a zlib error code */
 
780
        FILE_LOGFILE
 
781
        z_stream*       c_stream,       /* in/out: compressed page stream */
 
782
        const rec_t**   recs,           /* in: dense page directory
 
783
                                        sorted by address */
 
784
        ulint           n_dense)        /* in: size of recs[] */
 
785
{
 
786
        int             err     = Z_OK;
 
787
 
 
788
        ut_ad(n_dense > 0);
 
789
 
 
790
        do {
 
791
                const rec_t*    rec = *recs++;
 
792
 
 
793
                /* Compress everything up to this record. */
 
794
                c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
 
795
                        - c_stream->next_in;
 
796
 
 
797
                if (UNIV_LIKELY(c_stream->avail_in)) {
 
798
                        UNIV_MEM_ASSERT_RW(c_stream->next_in,
 
799
                                           c_stream->avail_in);
 
800
                        err = deflate(c_stream, Z_NO_FLUSH);
 
801
                        if (UNIV_UNLIKELY(err != Z_OK)) {
 
802
                                break;
 
803
                        }
 
804
                }
 
805
 
 
806
                ut_ad(!c_stream->avail_in);
 
807
                ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
 
808
 
 
809
                /* Skip the REC_N_NEW_EXTRA_BYTES. */
 
810
 
 
811
                c_stream->next_in = (byte*) rec;
 
812
        } while (--n_dense);
 
813
 
 
814
        return(err);
 
815
}
 
816
 
 
817
/**************************************************************************
 
818
Compress a record of a leaf node of a clustered index that contains
 
819
externally stored columns. */
 
820
static
 
821
int
 
822
page_zip_compress_clust_ext(
 
823
/*========================*/
 
824
                                        /* out: Z_OK, or a zlib error code */
 
825
        FILE_LOGFILE
 
826
        z_stream*       c_stream,       /* in/out: compressed page stream */
 
827
        const rec_t*    rec,            /* in: record */
 
828
        const ulint*    offsets,        /* in: rec_get_offsets(rec) */
 
829
        ulint           trx_id_col,     /* in: position of of DB_TRX_ID */
 
830
        byte*           deleted,        /* in: dense directory entry pointing
 
831
                                        to the head of the free list */
 
832
        byte*           storage,        /* in: end of dense page directory */
 
833
        byte**          externs,        /* in/out: pointer to the next
 
834
                                        available BLOB pointer */
 
835
        ulint*          n_blobs)        /* in/out: number of
 
836
                                        externally stored columns */
 
837
{
 
838
        int     err;
 
839
        ulint   i;
 
840
 
 
841
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
842
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
843
                           rec_offs_extra_size(offsets));
 
844
 
 
845
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
846
                ulint           len;
 
847
                const byte*     src;
 
848
 
 
849
                if (UNIV_UNLIKELY(i == trx_id_col)) {
 
850
                        ut_ad(!rec_offs_nth_extern(offsets, i));
 
851
                        /* Store trx_id and roll_ptr
 
852
                        in uncompressed form. */
 
853
                        src = rec_get_nth_field(rec, offsets, i, &len);
 
854
                        ut_ad(src + DATA_TRX_ID_LEN
 
855
                              == rec_get_nth_field(rec, offsets,
 
856
                                                   i + 1, &len));
 
857
                        ut_ad(len == DATA_ROLL_PTR_LEN);
 
858
 
 
859
                        /* Compress any preceding bytes. */
 
860
                        c_stream->avail_in
 
861
                                = src - c_stream->next_in;
 
862
 
 
863
                        if (c_stream->avail_in) {
 
864
                                err = deflate(c_stream, Z_NO_FLUSH);
 
865
                                if (UNIV_UNLIKELY(err != Z_OK)) {
 
866
 
 
867
                                        return(err);
 
868
                                }
 
869
                        }
 
870
 
 
871
                        ut_ad(!c_stream->avail_in);
 
872
                        ut_ad(c_stream->next_in == src);
 
873
 
 
874
                        memcpy(storage
 
875
                               - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 
876
                               * (rec_get_heap_no_new(rec) - 1),
 
877
                               c_stream->next_in,
 
878
                               DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
879
 
 
880
                        c_stream->next_in
 
881
                                += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 
882
 
 
883
                        /* Skip also roll_ptr */
 
884
                        i++;
 
885
                } else if (rec_offs_nth_extern(offsets, i)) {
 
886
                        src = rec_get_nth_field(rec, offsets, i, &len);
 
887
                        ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
 
888
                        src += len - BTR_EXTERN_FIELD_REF_SIZE;
 
889
 
 
890
                        c_stream->avail_in = src
 
891
                                - c_stream->next_in;
 
892
                        if (UNIV_LIKELY(c_stream->avail_in)) {
 
893
                                err = deflate(c_stream, Z_NO_FLUSH);
 
894
                                if (UNIV_UNLIKELY(err != Z_OK)) {
 
895
 
 
896
                                        return(err);
 
897
                                }
 
898
                        }
 
899
 
 
900
                        ut_ad(!c_stream->avail_in);
 
901
                        ut_ad(c_stream->next_in == src);
 
902
 
 
903
                        /* Reserve space for the data at
 
904
                        the end of the space reserved for
 
905
                        the compressed data and the page
 
906
                        modification log. */
 
907
 
 
908
                        if (UNIV_UNLIKELY
 
909
                            (c_stream->avail_out
 
910
                             <= BTR_EXTERN_FIELD_REF_SIZE)) {
 
911
                                /* out of space */
 
912
                                return(Z_BUF_ERROR);
 
913
                        }
 
914
 
 
915
                        ut_ad(*externs == c_stream->next_out
 
916
                              + c_stream->avail_out
 
917
                              + 1/* end of modif. log */);
 
918
 
 
919
                        c_stream->next_in
 
920
                                += BTR_EXTERN_FIELD_REF_SIZE;
 
921
 
 
922
                        /* Skip deleted records. */
 
923
                        if (UNIV_LIKELY_NULL
 
924
                            (page_zip_dir_find_low(
 
925
                                    storage, deleted,
 
926
                                    page_offset(rec)))) {
 
927
                                continue;
 
928
                        }
 
929
 
 
930
                        (*n_blobs)++;
 
931
                        c_stream->avail_out
 
932
                                -= BTR_EXTERN_FIELD_REF_SIZE;
 
933
                        *externs -= BTR_EXTERN_FIELD_REF_SIZE;
 
934
 
 
935
                        /* Copy the BLOB pointer */
 
936
                        memcpy(*externs, c_stream->next_in
 
937
                               - BTR_EXTERN_FIELD_REF_SIZE,
 
938
                               BTR_EXTERN_FIELD_REF_SIZE);
 
939
                }
 
940
        }
 
941
 
 
942
        return(Z_OK);
 
943
}
 
944
 
 
945
/**************************************************************************
 
946
Compress the records of a leaf node of a clustered index. */
 
947
static
 
948
int
 
949
page_zip_compress_clust(
 
950
/*====================*/
 
951
                                        /* out: Z_OK, or a zlib error code */
 
952
        FILE_LOGFILE
 
953
        z_stream*       c_stream,       /* in/out: compressed page stream */
 
954
        const rec_t**   recs,           /* in: dense page directory
 
955
                                        sorted by address */
 
956
        ulint           n_dense,        /* in: size of recs[] */
 
957
        dict_index_t*   index,          /* in: the index of the page */
 
958
        ulint*          n_blobs,        /* in: 0; out: number of
 
959
                                        externally stored columns */
 
960
        ulint           trx_id_col,     /* index of the trx_id column */
 
961
        byte*           deleted,        /* in: dense directory entry pointing
 
962
                                        to the head of the free list */
 
963
        byte*           storage,        /* in: end of dense page directory */
 
964
        mem_heap_t*     heap)           /* in: temporary memory heap */
 
965
{
 
966
        int     err             = Z_OK;
 
967
        ulint*  offsets         = NULL;
 
968
        /* BTR_EXTERN_FIELD_REF storage */
 
969
        byte*   externs         = storage - n_dense
 
970
                * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
971
 
 
972
        ut_ad(*n_blobs == 0);
 
973
 
 
974
        do {
 
975
                const rec_t*    rec = *recs++;
 
976
 
 
977
                offsets = rec_get_offsets(rec, index, offsets,
 
978
                                          ULINT_UNDEFINED, &heap);
 
979
                ut_ad(rec_offs_n_fields(offsets)
 
980
                      == dict_index_get_n_fields(index));
 
981
                UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
982
                UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
983
                                   rec_offs_extra_size(offsets));
 
984
 
 
985
                /* Compress the extra bytes. */
 
986
                c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
 
987
                        - c_stream->next_in;
 
988
 
 
989
                if (c_stream->avail_in) {
 
990
                        err = deflate(c_stream, Z_NO_FLUSH);
 
991
                        if (UNIV_UNLIKELY(err != Z_OK)) {
 
992
 
 
993
                                goto func_exit;
 
994
                        }
 
995
                }
 
996
                ut_ad(!c_stream->avail_in);
 
997
                ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
 
998
 
 
999
                /* Compress the data bytes. */
 
1000
 
 
1001
                c_stream->next_in = (byte*) rec;
 
1002
 
 
1003
                /* Check if there are any externally stored columns.
 
1004
                For each externally stored column, store the
 
1005
                BTR_EXTERN_FIELD_REF separately. */
 
1006
                if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
 
1007
                        ut_ad(dict_index_is_clust(index));
 
1008
 
 
1009
                        err = page_zip_compress_clust_ext(
 
1010
                                LOGFILE
 
1011
                                c_stream, rec, offsets, trx_id_col,
 
1012
                                deleted, storage, &externs, n_blobs);
 
1013
 
 
1014
                        if (UNIV_UNLIKELY(err != Z_OK)) {
 
1015
 
 
1016
                                goto func_exit;
 
1017
                        }
 
1018
                } else {
 
1019
                        ulint           len;
 
1020
                        const byte*     src;
 
1021
 
 
1022
                        /* Store trx_id and roll_ptr in uncompressed form. */
 
1023
                        src = rec_get_nth_field(rec, offsets,
 
1024
                                                trx_id_col, &len);
 
1025
                        ut_ad(src + DATA_TRX_ID_LEN
 
1026
                              == rec_get_nth_field(rec, offsets,
 
1027
                                                   trx_id_col + 1, &len));
 
1028
                        ut_ad(len == DATA_ROLL_PTR_LEN);
 
1029
                        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
1030
                        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
1031
                                           rec_offs_extra_size(offsets));
 
1032
 
 
1033
                        /* Compress any preceding bytes. */
 
1034
                        c_stream->avail_in = src - c_stream->next_in;
 
1035
 
 
1036
                        if (c_stream->avail_in) {
 
1037
                                err = deflate(c_stream, Z_NO_FLUSH);
 
1038
                                if (UNIV_UNLIKELY(err != Z_OK)) {
 
1039
 
 
1040
                                        return(err);
 
1041
                                }
 
1042
                        }
 
1043
 
 
1044
                        ut_ad(!c_stream->avail_in);
 
1045
                        ut_ad(c_stream->next_in == src);
 
1046
 
 
1047
                        memcpy(storage
 
1048
                               - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 
1049
                               * (rec_get_heap_no_new(rec) - 1),
 
1050
                               c_stream->next_in,
 
1051
                               DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
1052
 
 
1053
                        c_stream->next_in
 
1054
                                += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 
1055
 
 
1056
                        /* Skip also roll_ptr */
 
1057
                        ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
 
1058
                }
 
1059
 
 
1060
                /* Compress the last bytes of the record. */
 
1061
                c_stream->avail_in = rec + rec_offs_data_size(offsets)
 
1062
                        - c_stream->next_in;
 
1063
 
 
1064
                if (c_stream->avail_in) {
 
1065
                        err = deflate(c_stream, Z_NO_FLUSH);
 
1066
                        if (UNIV_UNLIKELY(err != Z_OK)) {
 
1067
 
 
1068
                                goto func_exit;
 
1069
                        }
 
1070
                }
 
1071
                ut_ad(!c_stream->avail_in);
 
1072
        } while (--n_dense);
 
1073
 
 
1074
func_exit:
 
1075
        return(err);
 
1076
}
 
1077
 
 
1078
/**************************************************************************
 
1079
Compress a page. */
 
1080
UNIV_INTERN
 
1081
ibool
 
1082
page_zip_compress(
 
1083
/*==============*/
 
1084
                                /* out: TRUE on success, FALSE on failure;
 
1085
                                page_zip will be left intact on failure. */
 
1086
        page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
 
1087
                                m_start, m_end, m_nonempty */
 
1088
        const page_t*   page,   /* in: uncompressed page */
 
1089
        dict_index_t*   index,  /* in: index of the B-tree node */
 
1090
        mtr_t*          mtr)    /* in: mini-transaction, or NULL */
 
1091
{
 
1092
        z_stream        c_stream;
 
1093
        int             err;
 
1094
        ulint           n_fields;/* number of index fields needed */
 
1095
        byte*           fields; /* index field information */
 
1096
        byte*           buf;    /* compressed payload of the page */
 
1097
        byte*           buf_end;/* end of buf */
 
1098
        ulint           n_dense;
 
1099
        ulint           slot_size;/* amount of uncompressed bytes per record */
 
1100
        const rec_t**   recs;   /* dense page directory, sorted by address */
 
1101
        mem_heap_t*     heap;
 
1102
        ulint           trx_id_col;
 
1103
        ulint*          offsets = NULL;
 
1104
        ulint           n_blobs = 0;
 
1105
        byte*           storage;/* storage of uncompressed columns */
 
1106
        ullint          usec = ut_time_us(NULL);
 
1107
#ifdef PAGE_ZIP_COMPRESS_DBG
 
1108
        FILE*           logfile = NULL;
 
1109
#endif
 
1110
 
 
1111
        ut_a(page_is_comp(page));
 
1112
        ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
 
1113
        ut_ad(page_simple_validate_new((page_t*) page));
 
1114
        ut_ad(page_zip_simple_validate(page_zip));
 
1115
 
 
1116
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
 
1117
 
 
1118
        /* Check the data that will be omitted. */
 
1119
        ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
 
1120
                     infimum_extra, sizeof infimum_extra));
 
1121
        ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
 
1122
                     infimum_data, sizeof infimum_data));
 
1123
        ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
 
1124
             /* info_bits == 0, n_owned <= max */
 
1125
             <= PAGE_DIR_SLOT_MAX_N_OWNED);
 
1126
        ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
 
1127
                     supremum_extra_data, sizeof supremum_extra_data));
 
1128
 
 
1129
        if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
 
1130
                ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
 
1131
                     == PAGE_NEW_SUPREMUM);
 
1132
        }
 
1133
 
 
1134
        if (page_is_leaf(page)) {
 
1135
                n_fields = dict_index_get_n_fields(index);
 
1136
        } else {
 
1137
                n_fields = dict_index_get_n_unique_in_tree(index);
 
1138
        }
 
1139
 
 
1140
        /* The dense directory excludes the infimum and supremum records. */
 
1141
        n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
 
1142
#ifdef PAGE_ZIP_COMPRESS_DBG
 
1143
        if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
 
1144
                fprintf(stderr, "compress %p %p %lu %lu %lu\n",
 
1145
                        (void*) page_zip, (void*) page,
 
1146
                        page_is_leaf(page),
 
1147
                        n_fields, n_dense);
 
1148
        }
 
1149
        if (UNIV_UNLIKELY(page_zip_compress_log)) {
 
1150
                /* Create a log file for every compression attempt. */
 
1151
                char    logfilename[9];
 
1152
                ut_snprintf(logfilename, sizeof logfilename,
 
1153
                            "%08x", page_zip_compress_log++);
 
1154
                logfile = fopen(logfilename, "wb");
 
1155
 
 
1156
                if (logfile) {
 
1157
                        /* Write the uncompressed page to the log. */
 
1158
                        fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
 
1159
                        /* Record the compressed size as zero.
 
1160
                        This will be overwritten at successful exit. */
 
1161
                        putc(0, logfile);
 
1162
                        putc(0, logfile);
 
1163
                        putc(0, logfile);
 
1164
                        putc(0, logfile);
 
1165
                }
 
1166
        }
 
1167
#endif /* PAGE_ZIP_COMPRESS_DBG */
 
1168
        page_zip_stat[page_zip->ssize - 1].compressed++;
 
1169
 
 
1170
        if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
 
1171
                          >= page_zip_get_size(page_zip))) {
 
1172
 
 
1173
                goto err_exit;
 
1174
        }
 
1175
 
 
1176
        heap = mem_heap_create(page_zip_get_size(page_zip)
 
1177
                               + n_fields * (2 + sizeof *offsets)
 
1178
                               + n_dense * ((sizeof *recs)
 
1179
                                            - PAGE_ZIP_DIR_SLOT_SIZE)
 
1180
                               + UNIV_PAGE_SIZE * 4
 
1181
                               + (512 << MAX_MEM_LEVEL));
 
1182
 
 
1183
        recs = mem_heap_zalloc(heap, n_dense * sizeof *recs);
 
1184
 
 
1185
        fields = mem_heap_alloc(heap, (n_fields + 1) * 2);
 
1186
 
 
1187
        buf = mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA);
 
1188
        buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
 
1189
 
 
1190
        /* Compress the data payload. */
 
1191
        page_zip_set_alloc(&c_stream, heap);
 
1192
 
 
1193
        err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
 
1194
                           Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
 
1195
                           MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
 
1196
        ut_a(err == Z_OK);
 
1197
 
 
1198
        c_stream.next_out = buf;
 
1199
        /* Subtract the space reserved for uncompressed data. */
 
1200
        /* Page header and the end marker of the modification log */
 
1201
        c_stream.avail_out = buf_end - buf - 1;
 
1202
        /* Dense page directory and uncompressed columns, if any */
 
1203
        if (page_is_leaf(page)) {
 
1204
                if (dict_index_is_clust(index)) {
 
1205
                        trx_id_col = dict_index_get_sys_col_pos(
 
1206
                                index, DATA_TRX_ID);
 
1207
                        ut_ad(trx_id_col > 0);
 
1208
                        ut_ad(trx_id_col != ULINT_UNDEFINED);
 
1209
 
 
1210
                        slot_size = PAGE_ZIP_DIR_SLOT_SIZE
 
1211
                                + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 
1212
                } else {
 
1213
                        /* Signal the absence of trx_id
 
1214
                        in page_zip_fields_encode() */
 
1215
                        ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
 
1216
                              == ULINT_UNDEFINED);
 
1217
                        trx_id_col = 0;
 
1218
                        slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
 
1219
                }
 
1220
        } else {
 
1221
                slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
 
1222
                trx_id_col = ULINT_UNDEFINED;
 
1223
        }
 
1224
 
 
1225
        if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
 
1226
                          + 6/* sizeof(zlib header and footer) */)) {
 
1227
                goto zlib_error;
 
1228
        }
 
1229
 
 
1230
        c_stream.avail_out -= n_dense * slot_size;
 
1231
        c_stream.avail_in = page_zip_fields_encode(n_fields, index,
 
1232
                                                   trx_id_col, fields);
 
1233
        c_stream.next_in = fields;
 
1234
        if (UNIV_LIKELY(!trx_id_col)) {
 
1235
                trx_id_col = ULINT_UNDEFINED;
 
1236
        }
 
1237
 
 
1238
        UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
 
1239
        err = deflate(&c_stream, Z_FULL_FLUSH);
 
1240
        if (err != Z_OK) {
 
1241
                goto zlib_error;
 
1242
        }
 
1243
 
 
1244
        ut_ad(!c_stream.avail_in);
 
1245
 
 
1246
        page_zip_dir_encode(page, buf_end, recs);
 
1247
 
 
1248
        c_stream.next_in = (byte*) page + PAGE_ZIP_START;
 
1249
 
 
1250
        storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
 
1251
 
 
1252
        /* Compress the records in heap_no order. */
 
1253
        if (UNIV_UNLIKELY(!n_dense)) {
 
1254
        } else if (!page_is_leaf(page)) {
 
1255
                /* This is a node pointer page. */
 
1256
                err = page_zip_compress_node_ptrs(LOGFILE
 
1257
                                                  &c_stream, recs, n_dense,
 
1258
                                                  index, storage, heap);
 
1259
                if (UNIV_UNLIKELY(err != Z_OK)) {
 
1260
                        goto zlib_error;
 
1261
                }
 
1262
        } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
 
1263
                /* This is a leaf page in a secondary index. */
 
1264
                err = page_zip_compress_sec(LOGFILE
 
1265
                                            &c_stream, recs, n_dense);
 
1266
                if (UNIV_UNLIKELY(err != Z_OK)) {
 
1267
                        goto zlib_error;
 
1268
                }
 
1269
        } else {
 
1270
                /* This is a leaf page in a clustered index. */
 
1271
                err = page_zip_compress_clust(LOGFILE
 
1272
                                              &c_stream, recs, n_dense,
 
1273
                                              index, &n_blobs, trx_id_col,
 
1274
                                              buf_end - PAGE_ZIP_DIR_SLOT_SIZE
 
1275
                                              * page_get_n_recs(page),
 
1276
                                              storage, heap);
 
1277
                if (UNIV_UNLIKELY(err != Z_OK)) {
 
1278
                        goto zlib_error;
 
1279
                }
 
1280
        }
 
1281
 
 
1282
        /* Finish the compression. */
 
1283
        ut_ad(!c_stream.avail_in);
 
1284
        /* Compress any trailing garbage, in case the last record was
 
1285
        allocated from an originally longer space on the free list,
 
1286
        or the data of the last record from page_zip_compress_sec(). */
 
1287
        c_stream.avail_in
 
1288
                = page_header_get_field(page, PAGE_HEAP_TOP)
 
1289
                - (c_stream.next_in - page);
 
1290
        ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
 
1291
 
 
1292
        UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
 
1293
        err = deflate(&c_stream, Z_FINISH);
 
1294
 
 
1295
        if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
 
1296
zlib_error:
 
1297
                deflateEnd(&c_stream);
 
1298
                mem_heap_free(heap);
 
1299
err_exit:
 
1300
#ifdef PAGE_ZIP_COMPRESS_DBG
 
1301
                if (logfile) {
 
1302
                        fclose(logfile);
 
1303
                }
 
1304
#endif /* PAGE_ZIP_COMPRESS_DBG */
 
1305
                page_zip_stat[page_zip->ssize - 1].compressed_usec
 
1306
                        += ut_time_us(NULL) - usec;
 
1307
                return(FALSE);
 
1308
        }
 
1309
 
 
1310
        err = deflateEnd(&c_stream);
 
1311
        ut_a(err == Z_OK);
 
1312
 
 
1313
        ut_ad(buf + c_stream.total_out == c_stream.next_out);
 
1314
        ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
 
1315
 
 
1316
        /* Valgrind believes that zlib does not initialize some bits
 
1317
        in the last 7 or 8 bytes of the stream.  Make Valgrind happy. */
 
1318
        UNIV_MEM_VALID(buf, c_stream.total_out);
 
1319
 
 
1320
        /* Zero out the area reserved for the modification log.
 
1321
        Space for the end marker of the modification log is not
 
1322
        included in avail_out. */
 
1323
        memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
 
1324
 
 
1325
#ifdef UNIV_DEBUG
 
1326
        page_zip->m_start =
 
1327
#endif /* UNIV_DEBUG */
 
1328
                page_zip->m_end = PAGE_DATA + c_stream.total_out;
 
1329
        page_zip->m_nonempty = FALSE;
 
1330
        page_zip->n_blobs = n_blobs;
 
1331
        /* Copy those header fields that will not be written
 
1332
        in buf_flush_init_for_writing() */
 
1333
        memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
 
1334
               FIL_PAGE_LSN - FIL_PAGE_PREV);
 
1335
        memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
 
1336
        memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
 
1337
               PAGE_DATA - FIL_PAGE_DATA);
 
1338
        /* Copy the rest of the compressed page */
 
1339
        memcpy(page_zip->data + PAGE_DATA, buf,
 
1340
               page_zip_get_size(page_zip) - PAGE_DATA);
 
1341
        mem_heap_free(heap);
 
1342
#ifdef UNIV_ZIP_DEBUG
 
1343
        ut_a(page_zip_validate(page_zip, page));
 
1344
#endif /* UNIV_ZIP_DEBUG */
 
1345
 
 
1346
        if (mtr) {
 
1347
                page_zip_compress_write_log(page_zip, page, index, mtr);
 
1348
        }
 
1349
 
 
1350
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
1351
 
 
1352
#ifdef PAGE_ZIP_COMPRESS_DBG
 
1353
        if (logfile) {
 
1354
                /* Record the compressed size of the block. */
 
1355
                byte sz[4];
 
1356
                mach_write_to_4(sz, c_stream.total_out);
 
1357
                fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
 
1358
                fwrite(sz, 1, sizeof sz, logfile);
 
1359
                fclose(logfile);
 
1360
        }
 
1361
#endif /* PAGE_ZIP_COMPRESS_DBG */
 
1362
        {
 
1363
                page_zip_stat_t*        zip_stat
 
1364
                        = &page_zip_stat[page_zip->ssize - 1];
 
1365
                zip_stat->compressed_ok++;
 
1366
                zip_stat->compressed_usec += ut_time_us(NULL) - usec;
 
1367
        }
 
1368
 
 
1369
        return(TRUE);
 
1370
}
 
1371
 
 
1372
/**************************************************************************
 
1373
Compare two page directory entries. */
 
1374
UNIV_INLINE
 
1375
ibool
 
1376
page_zip_dir_cmp(
 
1377
/*=============*/
 
1378
                                /* out: positive if rec1 > rec2 */
 
1379
        const rec_t*    rec1,   /* in: rec1 */
 
1380
        const rec_t*    rec2)   /* in: rec2 */
 
1381
{
 
1382
        return(rec1 > rec2);
 
1383
}
 
1384
 
 
1385
/**************************************************************************
 
1386
Sort the dense page directory by address (heap_no). */
 
1387
static
 
1388
void
 
1389
page_zip_dir_sort(
 
1390
/*==============*/
 
1391
        rec_t** arr,    /* in/out: dense page directory */
 
1392
        rec_t** aux_arr,/* in/out: work area */
 
1393
        ulint   low,    /* in: lower bound of the sorting area, inclusive */
 
1394
        ulint   high)   /* in: upper bound of the sorting area, exclusive */
 
1395
{
 
1396
        UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
 
1397
                              page_zip_dir_cmp);
 
1398
}
 
1399
 
 
1400
/**************************************************************************
 
1401
Deallocate the index information initialized by page_zip_fields_decode(). */
 
1402
static
 
1403
void
 
1404
page_zip_fields_free(
 
1405
/*=================*/
 
1406
        dict_index_t*   index)  /* in: dummy index to be freed */
 
1407
{
 
1408
        if (index) {
 
1409
                dict_table_t*   table = index->table;
 
1410
                mem_heap_free(index->heap);
 
1411
                mutex_free(&(table->autoinc_mutex));
 
1412
                mem_heap_free(table->heap);
 
1413
        }
 
1414
}
 
1415
 
 
1416
/**************************************************************************
 
1417
Read the index information for the compressed page. */
 
1418
static
 
1419
dict_index_t*
 
1420
page_zip_fields_decode(
 
1421
/*===================*/
 
1422
                                /* out,own: dummy index describing the page,
 
1423
                                or NULL on error */
 
1424
        const byte*     buf,    /* in: index information */
 
1425
        const byte*     end,    /* in: end of buf */
 
1426
        ulint*          trx_id_col)/* in: NULL for non-leaf pages;
 
1427
                                for leaf pages, pointer to where to store
 
1428
                                the position of the trx_id column */
 
1429
{
 
1430
        const byte*     b;
 
1431
        ulint           n;
 
1432
        ulint           i;
 
1433
        ulint           val;
 
1434
        dict_table_t*   table;
 
1435
        dict_index_t*   index;
 
1436
 
 
1437
        /* Determine the number of fields. */
 
1438
        for (b = buf, n = 0; b < end; n++) {
 
1439
                if (*b++ & 0x80) {
 
1440
                        b++; /* skip the second byte */
 
1441
                }
 
1442
        }
 
1443
 
 
1444
        n--; /* n_nullable or trx_id */
 
1445
 
 
1446
        if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
 
1447
 
 
1448
                page_zip_fail(("page_zip_fields_decode: n = %lu\n",
 
1449
                               (ulong) n));
 
1450
                return(NULL);
 
1451
        }
 
1452
 
 
1453
        if (UNIV_UNLIKELY(b > end)) {
 
1454
 
 
1455
                page_zip_fail(("page_zip_fields_decode: %p > %p\n",
 
1456
                               (const void*) b, (const void*) end));
 
1457
                return(NULL);
 
1458
        }
 
1459
 
 
1460
        table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
 
1461
                                      DICT_TF_COMPACT);
 
1462
        index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
 
1463
                                      DICT_HDR_SPACE, 0, n);
 
1464
        index->table = table;
 
1465
        index->n_uniq = n;
 
1466
        /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
 
1467
        index->cached = TRUE;
 
1468
 
 
1469
        /* Initialize the fields. */
 
1470
        for (b = buf, i = 0; i < n; i++) {
 
1471
                ulint   mtype;
 
1472
                ulint   len;
 
1473
 
 
1474
                val = *b++;
 
1475
 
 
1476
                if (UNIV_UNLIKELY(val & 0x80)) {
 
1477
                        /* fixed length > 62 bytes */
 
1478
                        val = (val & 0x7f) << 8 | *b++;
 
1479
                        len = val >> 1;
 
1480
                        mtype = DATA_FIXBINARY;
 
1481
                } else if (UNIV_UNLIKELY(val >= 126)) {
 
1482
                        /* variable length with max > 255 bytes */
 
1483
                        len = 0x7fff;
 
1484
                        mtype = DATA_BINARY;
 
1485
                } else if (val <= 1) {
 
1486
                        /* variable length with max <= 255 bytes */
 
1487
                        len = 0;
 
1488
                        mtype = DATA_BINARY;
 
1489
                } else {
 
1490
                        /* fixed length < 62 bytes */
 
1491
                        len = val >> 1;
 
1492
                        mtype = DATA_FIXBINARY;
 
1493
                }
 
1494
 
 
1495
                dict_mem_table_add_col(table, NULL, NULL, mtype,
 
1496
                                       val & 1 ? DATA_NOT_NULL : 0, len);
 
1497
                dict_index_add_col(index, table,
 
1498
                                   dict_table_get_nth_col(table, i), 0);
 
1499
        }
 
1500
 
 
1501
        val = *b++;
 
1502
        if (UNIV_UNLIKELY(val & 0x80)) {
 
1503
                val = (val & 0x7f) << 8 | *b++;
 
1504
        }
 
1505
 
 
1506
        /* Decode the position of the trx_id column. */
 
1507
        if (trx_id_col) {
 
1508
                if (!val) {
 
1509
                        val = ULINT_UNDEFINED;
 
1510
                } else if (UNIV_UNLIKELY(val >= n)) {
 
1511
                        page_zip_fields_free(index);
 
1512
                        index = NULL;
 
1513
                } else {
 
1514
                        index->type = DICT_CLUSTERED;
 
1515
                }
 
1516
 
 
1517
                *trx_id_col = val;
 
1518
        } else {
 
1519
                /* Decode the number of nullable fields. */
 
1520
                if (UNIV_UNLIKELY(index->n_nullable > val)) {
 
1521
                        page_zip_fields_free(index);
 
1522
                        index = NULL;
 
1523
                } else {
 
1524
                        index->n_nullable = val;
 
1525
                }
 
1526
        }
 
1527
 
 
1528
        ut_ad(b == end);
 
1529
 
 
1530
        return(index);
 
1531
}
 
1532
 
 
1533
/**************************************************************************
 
1534
Populate the sparse page directory from the dense directory. */
 
1535
static
 
1536
ibool
 
1537
page_zip_dir_decode(
 
1538
/*================*/
 
1539
                                        /* out: TRUE on success,
 
1540
                                        FALSE on failure */
 
1541
        const page_zip_des_t*   page_zip,/* in: dense page directory on
 
1542
                                        compressed page */
 
1543
        page_t*                 page,   /* in: compact page with valid header;
 
1544
                                        out: trailer and sparse page directory
 
1545
                                        filled in */
 
1546
        rec_t**                 recs,   /* out: dense page directory sorted by
 
1547
                                        ascending address (and heap_no) */
 
1548
        rec_t**                 recs_aux,/* in/out: scratch area */
 
1549
        ulint                   n_dense)/* in: number of user records, and
 
1550
                                        size of recs[] and recs_aux[] */
 
1551
{
 
1552
        ulint   i;
 
1553
        ulint   n_recs;
 
1554
        byte*   slot;
 
1555
 
 
1556
        n_recs = page_get_n_recs(page);
 
1557
 
 
1558
        if (UNIV_UNLIKELY(n_recs > n_dense)) {
 
1559
                page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
 
1560
                               (ulong) n_recs, (ulong) n_dense));
 
1561
                return(FALSE);
 
1562
        }
 
1563
 
 
1564
        /* Traverse the list of stored records in the sorting order,
 
1565
        starting from the first user record. */
 
1566
 
 
1567
        slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
 
1568
        UNIV_PREFETCH_RW(slot);
 
1569
 
 
1570
        /* Zero out the page trailer. */
 
1571
        memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
 
1572
 
 
1573
        mach_write_to_2(slot, PAGE_NEW_INFIMUM);
 
1574
        slot -= PAGE_DIR_SLOT_SIZE;
 
1575
        UNIV_PREFETCH_RW(slot);
 
1576
 
 
1577
        /* Initialize the sparse directory and copy the dense directory. */
 
1578
        for (i = 0; i < n_recs; i++) {
 
1579
                ulint   offs = page_zip_dir_get(page_zip, i);
 
1580
 
 
1581
                if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
 
1582
                        mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
 
1583
                        slot -= PAGE_DIR_SLOT_SIZE;
 
1584
                        UNIV_PREFETCH_RW(slot);
 
1585
                }
 
1586
 
 
1587
                if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
 
1588
                                  < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
 
1589
                        page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
 
1590
                                       (unsigned) i, (unsigned) n_recs,
 
1591
                                       (ulong) offs));
 
1592
                        return(FALSE);
 
1593
                }
 
1594
 
 
1595
                recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
 
1596
        }
 
1597
 
 
1598
        mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
 
1599
        {
 
1600
                const page_dir_slot_t*  last_slot = page_dir_get_nth_slot(
 
1601
                        page, page_dir_get_n_slots(page) - 1);
 
1602
 
 
1603
                if (UNIV_UNLIKELY(slot != last_slot)) {
 
1604
                        page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
 
1605
                                       (const void*) slot,
 
1606
                                       (const void*) last_slot));
 
1607
                        return(FALSE);
 
1608
                }
 
1609
        }
 
1610
 
 
1611
        /* Copy the rest of the dense directory. */
 
1612
        for (; i < n_dense; i++) {
 
1613
                ulint   offs = page_zip_dir_get(page_zip, i);
 
1614
 
 
1615
                if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
 
1616
                        page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
 
1617
                                       (unsigned) i, (unsigned) n_dense,
 
1618
                                       (ulong) offs));
 
1619
                        return(FALSE);
 
1620
                }
 
1621
 
 
1622
                recs[i] = page + offs;
 
1623
        }
 
1624
 
 
1625
        if (UNIV_LIKELY(n_dense > 1)) {
 
1626
                page_zip_dir_sort(recs, recs_aux, 0, n_dense);
 
1627
        }
 
1628
        return(TRUE);
 
1629
}
 
1630
 
 
1631
/**************************************************************************
 
1632
Initialize the REC_N_NEW_EXTRA_BYTES of each record. */
 
1633
static
 
1634
ibool
 
1635
page_zip_set_extra_bytes(
 
1636
/*=====================*/
 
1637
                                        /* out: TRUE on success,
 
1638
                                        FALSE on failure */
 
1639
        const page_zip_des_t*   page_zip,/* in: compressed page */
 
1640
        page_t*                 page,   /* in/out: uncompressed page */
 
1641
        ulint                   info_bits)/* in: REC_INFO_MIN_REC_FLAG or 0 */
 
1642
{
 
1643
        ulint   n;
 
1644
        ulint   i;
 
1645
        ulint   n_owned = 1;
 
1646
        ulint   offs;
 
1647
        rec_t*  rec;
 
1648
 
 
1649
        n = page_get_n_recs(page);
 
1650
        rec = page + PAGE_NEW_INFIMUM;
 
1651
 
 
1652
        for (i = 0; i < n; i++) {
 
1653
                offs = page_zip_dir_get(page_zip, i);
 
1654
 
 
1655
                if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
 
1656
                        info_bits |= REC_INFO_DELETED_FLAG;
 
1657
                }
 
1658
                if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
 
1659
                        info_bits |= n_owned;
 
1660
                        n_owned = 1;
 
1661
                } else {
 
1662
                        n_owned++;
 
1663
                }
 
1664
                offs &= PAGE_ZIP_DIR_SLOT_MASK;
 
1665
                if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
 
1666
                                  + REC_N_NEW_EXTRA_BYTES)) {
 
1667
                        page_zip_fail(("page_zip_set_extra_bytes 1:"
 
1668
                                       " %u %u %lx\n",
 
1669
                                       (unsigned) i, (unsigned) n,
 
1670
                                       (ulong) offs));
 
1671
                        return(FALSE);
 
1672
                }
 
1673
 
 
1674
                rec_set_next_offs_new(rec, offs);
 
1675
                rec = page + offs;
 
1676
                rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
 
1677
                info_bits = 0;
 
1678
        }
 
1679
 
 
1680
        /* Set the next pointer of the last user record. */
 
1681
        rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
 
1682
 
 
1683
        /* Set n_owned of the supremum record. */
 
1684
        page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
 
1685
 
 
1686
        /* The dense directory excludes the infimum and supremum records. */
 
1687
        n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
 
1688
 
 
1689
        if (i >= n) {
 
1690
                if (UNIV_LIKELY(i == n)) {
 
1691
                        return(TRUE);
 
1692
                }
 
1693
 
 
1694
                page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
 
1695
                               (unsigned) i, (unsigned) n));
 
1696
                return(FALSE);
 
1697
        }
 
1698
 
 
1699
        offs = page_zip_dir_get(page_zip, i);
 
1700
 
 
1701
        /* Set the extra bytes of deleted records on the free list. */
 
1702
        for (;;) {
 
1703
                if (UNIV_UNLIKELY(!offs)
 
1704
                    || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
 
1705
 
 
1706
                        page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
 
1707
                                       (ulong) offs));
 
1708
                        return(FALSE);
 
1709
                }
 
1710
 
 
1711
                rec = page + offs;
 
1712
                rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
 
1713
 
 
1714
                if (++i == n) {
 
1715
                        break;
 
1716
                }
 
1717
 
 
1718
                offs = page_zip_dir_get(page_zip, i);
 
1719
                rec_set_next_offs_new(rec, offs);
 
1720
        }
 
1721
 
 
1722
        /* Terminate the free list. */
 
1723
        rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
 
1724
        rec_set_next_offs_new(rec, 0);
 
1725
 
 
1726
        return(TRUE);
 
1727
}
 
1728
 
 
1729
/**************************************************************************
 
1730
Apply the modification log to a record containing externally stored
 
1731
columns.  Do not copy the fields that are stored separately. */
 
1732
static
 
1733
const byte*
 
1734
page_zip_apply_log_ext(
 
1735
/*===================*/
 
1736
                                        /* out: pointer to modification log,
 
1737
                                        or NULL on failure */
 
1738
        rec_t*          rec,            /* in/out: record */
 
1739
        const ulint*    offsets,        /* in: rec_get_offsets(rec) */
 
1740
        ulint           trx_id_col,     /* in: position of of DB_TRX_ID */
 
1741
        const byte*     data,           /* in: modification log */
 
1742
        const byte*     end)            /* in: end of modification log */
 
1743
{
 
1744
        ulint   i;
 
1745
        ulint   len;
 
1746
        byte*   next_out = rec;
 
1747
 
 
1748
        /* Check if there are any externally stored columns.
 
1749
        For each externally stored column, skip the
 
1750
        BTR_EXTERN_FIELD_REF. */
 
1751
 
 
1752
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
1753
                byte*   dst;
 
1754
 
 
1755
                if (UNIV_UNLIKELY(i == trx_id_col)) {
 
1756
                        /* Skip trx_id and roll_ptr */
 
1757
                        dst = rec_get_nth_field(rec, offsets,
 
1758
                                                i, &len);
 
1759
                        if (UNIV_UNLIKELY(dst - next_out >= end - data)
 
1760
                            || UNIV_UNLIKELY
 
1761
                            (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
 
1762
                            || rec_offs_nth_extern(offsets, i)) {
 
1763
                                page_zip_fail(("page_zip_apply_log_ext:"
 
1764
                                               " trx_id len %lu,"
 
1765
                                               " %p - %p >= %p - %p\n",
 
1766
                                               (ulong) len,
 
1767
                                               (const void*) dst,
 
1768
                                               (const void*) next_out,
 
1769
                                               (const void*) end,
 
1770
                                               (const void*) data));
 
1771
                                return(NULL);
 
1772
                        }
 
1773
 
 
1774
                        memcpy(next_out, data, dst - next_out);
 
1775
                        data += dst - next_out;
 
1776
                        next_out = dst + (DATA_TRX_ID_LEN
 
1777
                                          + DATA_ROLL_PTR_LEN);
 
1778
                } else if (rec_offs_nth_extern(offsets, i)) {
 
1779
                        dst = rec_get_nth_field(rec, offsets,
 
1780
                                                i, &len);
 
1781
                        ut_ad(len
 
1782
                              >= BTR_EXTERN_FIELD_REF_SIZE);
 
1783
 
 
1784
                        len += dst - next_out
 
1785
                                - BTR_EXTERN_FIELD_REF_SIZE;
 
1786
 
 
1787
                        if (UNIV_UNLIKELY(data + len >= end)) {
 
1788
                                page_zip_fail(("page_zip_apply_log_ext: "
 
1789
                                               "ext %p+%lu >= %p\n",
 
1790
                                               (const void*) data,
 
1791
                                               (ulong) len,
 
1792
                                               (const void*) end));
 
1793
                                return(NULL);
 
1794
                        }
 
1795
 
 
1796
                        memcpy(next_out, data, len);
 
1797
                        data += len;
 
1798
                        next_out += len
 
1799
                                + BTR_EXTERN_FIELD_REF_SIZE;
 
1800
                }
 
1801
        }
 
1802
 
 
1803
        /* Copy the last bytes of the record. */
 
1804
        len = rec_get_end(rec, offsets) - next_out;
 
1805
        if (UNIV_UNLIKELY(data + len >= end)) {
 
1806
                page_zip_fail(("page_zip_apply_log_ext: "
 
1807
                               "last %p+%lu >= %p\n",
 
1808
                               (const void*) data,
 
1809
                               (ulong) len,
 
1810
                               (const void*) end));
 
1811
                return(NULL);
 
1812
        }
 
1813
        memcpy(next_out, data, len);
 
1814
        data += len;
 
1815
 
 
1816
        return(data);
 
1817
}
 
1818
 
 
1819
/**************************************************************************
 
1820
Apply the modification log to an uncompressed page.
 
1821
Do not copy the fields that are stored separately. */
 
1822
static
 
1823
const byte*
 
1824
page_zip_apply_log(
 
1825
/*===============*/
 
1826
                                /* out: pointer to end of modification log,
 
1827
                                or NULL on failure */
 
1828
        const byte*     data,   /* in: modification log */
 
1829
        ulint           size,   /* in: maximum length of the log, in bytes */
 
1830
        rec_t**         recs,   /* in: dense page directory,
 
1831
                                sorted by address (indexed by
 
1832
                                heap_no - PAGE_HEAP_NO_USER_LOW) */
 
1833
        ulint           n_dense,/* in: size of recs[] */
 
1834
        ulint           trx_id_col,/* in: column number of trx_id in the index,
 
1835
                                or ULINT_UNDEFINED if none */
 
1836
        ulint           heap_status,
 
1837
                                /* in: heap_no and status bits for
 
1838
                                the next record to uncompress */
 
1839
        dict_index_t*   index,  /* in: index of the page */
 
1840
        ulint*          offsets)/* in/out: work area for
 
1841
                                rec_get_offsets_reverse() */
 
1842
{
 
1843
        const byte* const end = data + size;
 
1844
 
 
1845
        for (;;) {
 
1846
                ulint   val;
 
1847
                rec_t*  rec;
 
1848
                ulint   len;
 
1849
                ulint   hs;
 
1850
 
 
1851
                val = *data++;
 
1852
                if (UNIV_UNLIKELY(!val)) {
 
1853
                        return(data - 1);
 
1854
                }
 
1855
                if (val & 0x80) {
 
1856
                        val = (val & 0x7f) << 8 | *data++;
 
1857
                        if (UNIV_UNLIKELY(!val)) {
 
1858
                                page_zip_fail(("page_zip_apply_log:"
 
1859
                                               " invalid val %x%x\n",
 
1860
                                               data[-2], data[-1]));
 
1861
                                return(NULL);
 
1862
                        }
 
1863
                }
 
1864
                if (UNIV_UNLIKELY(data >= end)) {
 
1865
                        page_zip_fail(("page_zip_apply_log: %p >= %p\n",
 
1866
                                       (const void*) data,
 
1867
                                       (const void*) end));
 
1868
                        return(NULL);
 
1869
                }
 
1870
                if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
 
1871
                        page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
 
1872
                                       (ulong) val, (ulong) n_dense));
 
1873
                        return(NULL);
 
1874
                }
 
1875
 
 
1876
                /* Determine the heap number and status bits of the record. */
 
1877
                rec = recs[(val >> 1) - 1];
 
1878
 
 
1879
                hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
 
1880
                hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
 
1881
 
 
1882
                /* This may either be an old record that is being
 
1883
                overwritten (updated in place, or allocated from
 
1884
                the free list), or a new record, with the next
 
1885
                available_heap_no. */
 
1886
                if (UNIV_UNLIKELY(hs > heap_status)) {
 
1887
                        page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
 
1888
                                       (ulong) hs, (ulong) heap_status));
 
1889
                        return(NULL);
 
1890
                } else if (hs == heap_status) {
 
1891
                        /* A new record was allocated from the heap. */
 
1892
                        if (UNIV_UNLIKELY(val & 1)) {
 
1893
                                /* Only existing records may be cleared. */
 
1894
                                page_zip_fail(("page_zip_apply_log:"
 
1895
                                               " attempting to create"
 
1896
                                               " deleted rec %lu\n",
 
1897
                                               (ulong) hs));
 
1898
                                return(NULL);
 
1899
                        }
 
1900
                        heap_status += 1 << REC_HEAP_NO_SHIFT;
 
1901
                }
 
1902
 
 
1903
                mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
 
1904
 
 
1905
                if (val & 1) {
 
1906
                        /* Clear the data bytes of the record. */
 
1907
                        mem_heap_t*     heap    = NULL;
 
1908
                        ulint*          offs;
 
1909
                        offs = rec_get_offsets(rec, index, offsets,
 
1910
                                               ULINT_UNDEFINED, &heap);
 
1911
                        memset(rec, 0, rec_offs_data_size(offs));
 
1912
 
 
1913
                        if (UNIV_LIKELY_NULL(heap)) {
 
1914
                                mem_heap_free(heap);
 
1915
                        }
 
1916
                        continue;
 
1917
                }
 
1918
 
 
1919
#if REC_STATUS_NODE_PTR != TRUE
 
1920
# error "REC_STATUS_NODE_PTR != TRUE"
 
1921
#endif
 
1922
                rec_get_offsets_reverse(data, index,
 
1923
                                        hs & REC_STATUS_NODE_PTR,
 
1924
                                        offsets);
 
1925
                rec_offs_make_valid(rec, index, offsets);
 
1926
 
 
1927
                /* Copy the extra bytes (backwards). */
 
1928
                {
 
1929
                        byte*   start   = rec_get_start(rec, offsets);
 
1930
                        byte*   b       = rec - REC_N_NEW_EXTRA_BYTES;
 
1931
                        while (b != start) {
 
1932
                                *--b = *data++;
 
1933
                        }
 
1934
                }
 
1935
 
 
1936
                /* Copy the data bytes. */
 
1937
                if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
 
1938
                        /* Non-leaf nodes should not contain any
 
1939
                        externally stored columns. */
 
1940
                        if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
 
1941
                                page_zip_fail(("page_zip_apply_log: "
 
1942
                                               "%lu&REC_STATUS_NODE_PTR\n",
 
1943
                                               (ulong) hs));
 
1944
                                return(NULL);
 
1945
                        }
 
1946
 
 
1947
                        data = page_zip_apply_log_ext(
 
1948
                                rec, offsets, trx_id_col, data, end);
 
1949
 
 
1950
                        if (UNIV_UNLIKELY(!data)) {
 
1951
                                return(NULL);
 
1952
                        }
 
1953
                } else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
 
1954
                        len = rec_offs_data_size(offsets)
 
1955
                                - REC_NODE_PTR_SIZE;
 
1956
                        /* Copy the data bytes, except node_ptr. */
 
1957
                        if (UNIV_UNLIKELY(data + len >= end)) {
 
1958
                                page_zip_fail(("page_zip_apply_log: "
 
1959
                                               "node_ptr %p+%lu >= %p\n",
 
1960
                                               (const void*) data,
 
1961
                                               (ulong) len,
 
1962
                                               (const void*) end));
 
1963
                                return(NULL);
 
1964
                        }
 
1965
                        memcpy(rec, data, len);
 
1966
                        data += len;
 
1967
                } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
 
1968
                        len = rec_offs_data_size(offsets);
 
1969
 
 
1970
                        /* Copy all data bytes of
 
1971
                        a record in a secondary index. */
 
1972
                        if (UNIV_UNLIKELY(data + len >= end)) {
 
1973
                                page_zip_fail(("page_zip_apply_log: "
 
1974
                                               "sec %p+%lu >= %p\n",
 
1975
                                               (const void*) data,
 
1976
                                               (ulong) len,
 
1977
                                               (const void*) end));
 
1978
                                return(NULL);
 
1979
                        }
 
1980
 
 
1981
                        memcpy(rec, data, len);
 
1982
                        data += len;
 
1983
                } else {
 
1984
                        /* Skip DB_TRX_ID and DB_ROLL_PTR. */
 
1985
                        ulint   l = rec_get_nth_field_offs(offsets,
 
1986
                                                           trx_id_col, &len);
 
1987
                        byte*   b;
 
1988
 
 
1989
                        if (UNIV_UNLIKELY(data + l >= end)
 
1990
                            || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
 
1991
                                                    + DATA_ROLL_PTR_LEN))) {
 
1992
                                page_zip_fail(("page_zip_apply_log: "
 
1993
                                               "trx_id %p+%lu >= %p\n",
 
1994
                                               (const void*) data,
 
1995
                                               (ulong) l,
 
1996
                                               (const void*) end));
 
1997
                                return(NULL);
 
1998
                        }
 
1999
 
 
2000
                        /* Copy any preceding data bytes. */
 
2001
                        memcpy(rec, data, l);
 
2002
                        data += l;
 
2003
 
 
2004
                        /* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
 
2005
                        b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
2006
                        len = rec_get_end(rec, offsets) - b;
 
2007
                        if (UNIV_UNLIKELY(data + len >= end)) {
 
2008
                                page_zip_fail(("page_zip_apply_log: "
 
2009
                                               "clust %p+%lu >= %p\n",
 
2010
                                               (const void*) data,
 
2011
                                               (ulong) len,
 
2012
                                               (const void*) end));
 
2013
                                return(NULL);
 
2014
                        }
 
2015
                        memcpy(b, data, len);
 
2016
                        data += len;
 
2017
                }
 
2018
        }
 
2019
}
 
2020
 
 
2021
/**************************************************************************
 
2022
Decompress the records of a node pointer page. */
 
2023
static
 
2024
ibool
 
2025
page_zip_decompress_node_ptrs(
 
2026
/*==========================*/
 
2027
                                        /* out: TRUE on success,
 
2028
                                        FALSE on failure */
 
2029
        page_zip_des_t* page_zip,       /* in/out: compressed page */
 
2030
        z_stream*       d_stream,       /* in/out: compressed page stream */
 
2031
        rec_t**         recs,           /* in: dense page directory
 
2032
                                        sorted by address */
 
2033
        ulint           n_dense,        /* in: size of recs[] */
 
2034
        dict_index_t*   index,          /* in: the index of the page */
 
2035
        ulint*          offsets,        /* in/out: temporary offsets */
 
2036
        mem_heap_t*     heap)           /* in: temporary memory heap */
 
2037
{
 
2038
        ulint           heap_status = REC_STATUS_NODE_PTR
 
2039
                | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
 
2040
        ulint           slot;
 
2041
        const byte*     storage;
 
2042
 
 
2043
        /* Subtract the space reserved for uncompressed data. */
 
2044
        d_stream->avail_in -= n_dense
 
2045
                * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
 
2046
 
 
2047
        /* Decompress the records in heap_no order. */
 
2048
        for (slot = 0; slot < n_dense; slot++) {
 
2049
                rec_t*  rec = recs[slot];
 
2050
 
 
2051
                d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
 
2052
                        - d_stream->next_out;
 
2053
 
 
2054
                ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
 
2055
                      - PAGE_ZIP_START - PAGE_DIR);
 
2056
                switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 
2057
                case Z_STREAM_END:
 
2058
                        /* Apparently, n_dense has grown
 
2059
                        since the time the page was last compressed. */
 
2060
                        goto zlib_done;
 
2061
                case Z_OK:
 
2062
                case Z_BUF_ERROR:
 
2063
                        if (!d_stream->avail_out) {
 
2064
                                break;
 
2065
                        }
 
2066
                        /* fall through */
 
2067
                default:
 
2068
                        page_zip_fail(("page_zip_decompress_node_ptrs:"
 
2069
                                       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
 
2070
                                       d_stream->msg));
 
2071
                        goto zlib_error;
 
2072
                }
 
2073
 
 
2074
                ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
 
2075
                /* Prepare to decompress the data bytes. */
 
2076
                d_stream->next_out = rec;
 
2077
                /* Set heap_no and the status bits. */
 
2078
                mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
 
2079
                heap_status += 1 << REC_HEAP_NO_SHIFT;
 
2080
 
 
2081
                /* Read the offsets. The status bits are needed here. */
 
2082
                offsets = rec_get_offsets(rec, index, offsets,
 
2083
                                          ULINT_UNDEFINED, &heap);
 
2084
 
 
2085
                /* Non-leaf nodes should not have any externally
 
2086
                stored columns. */
 
2087
                ut_ad(!rec_offs_any_extern(offsets));
 
2088
 
 
2089
                /* Decompress the data bytes, except node_ptr. */
 
2090
                d_stream->avail_out = rec_offs_data_size(offsets)
 
2091
                        - REC_NODE_PTR_SIZE;
 
2092
 
 
2093
                switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 
2094
                case Z_STREAM_END:
 
2095
                        goto zlib_done;
 
2096
                case Z_OK:
 
2097
                case Z_BUF_ERROR:
 
2098
                        if (!d_stream->avail_out) {
 
2099
                                break;
 
2100
                        }
 
2101
                        /* fall through */
 
2102
                default:
 
2103
                        page_zip_fail(("page_zip_decompress_node_ptrs:"
 
2104
                                       " 2 inflate(Z_SYNC_FLUSH)=%s\n",
 
2105
                                       d_stream->msg));
 
2106
                        goto zlib_error;
 
2107
                }
 
2108
 
 
2109
                /* Clear the node pointer in case the record
 
2110
                will be deleted and the space will be reallocated
 
2111
                to a smaller record. */
 
2112
                memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
 
2113
                d_stream->next_out += REC_NODE_PTR_SIZE;
 
2114
 
 
2115
                ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
 
2116
        }
 
2117
 
 
2118
        /* Decompress any trailing garbage, in case the last record was
 
2119
        allocated from an originally longer space on the free list. */
 
2120
        d_stream->avail_out = page_header_get_field(page_zip->data,
 
2121
                                                    PAGE_HEAP_TOP)
 
2122
                - page_offset(d_stream->next_out);
 
2123
        if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
 
2124
                          - PAGE_ZIP_START - PAGE_DIR)) {
 
2125
 
 
2126
                page_zip_fail(("page_zip_decompress_node_ptrs:"
 
2127
                               " avail_out = %u\n",
 
2128
                               d_stream->avail_out));
 
2129
                goto zlib_error;
 
2130
        }
 
2131
 
 
2132
        if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
 
2133
                page_zip_fail(("page_zip_decompress_node_ptrs:"
 
2134
                               " inflate(Z_FINISH)=%s\n",
 
2135
                               d_stream->msg));
 
2136
zlib_error:
 
2137
                inflateEnd(d_stream);
 
2138
                return(FALSE);
 
2139
        }
 
2140
 
 
2141
        /* Note that d_stream->avail_out > 0 may hold here
 
2142
        if the modification log is nonempty. */
 
2143
 
 
2144
zlib_done:
 
2145
        if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
 
2146
                ut_error;
 
2147
        }
 
2148
 
 
2149
        {
 
2150
                page_t* page = page_align(d_stream->next_out);
 
2151
 
 
2152
                /* Clear the unused heap space on the uncompressed page. */
 
2153
                memset(d_stream->next_out, 0,
 
2154
                       page_dir_get_nth_slot(page,
 
2155
                                             page_dir_get_n_slots(page) - 1)
 
2156
                       - d_stream->next_out);
 
2157
        }
 
2158
 
 
2159
#ifdef UNIV_DEBUG
 
2160
        page_zip->m_start = PAGE_DATA + d_stream->total_in;
 
2161
#endif /* UNIV_DEBUG */
 
2162
 
 
2163
        /* Apply the modification log. */
 
2164
        {
 
2165
                const byte*     mod_log_ptr;
 
2166
                mod_log_ptr = page_zip_apply_log(d_stream->next_in,
 
2167
                                                 d_stream->avail_in + 1,
 
2168
                                                 recs, n_dense,
 
2169
                                                 ULINT_UNDEFINED, heap_status,
 
2170
                                                 index, offsets);
 
2171
 
 
2172
                if (UNIV_UNLIKELY(!mod_log_ptr)) {
 
2173
                        return(FALSE);
 
2174
                }
 
2175
                page_zip->m_end = mod_log_ptr - page_zip->data;
 
2176
                page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
 
2177
        }
 
2178
 
 
2179
        if (UNIV_UNLIKELY
 
2180
            (page_zip_get_trailer_len(page_zip,
 
2181
                                      dict_index_is_clust(index), NULL)
 
2182
             + page_zip->m_end >= page_zip_get_size(page_zip))) {
 
2183
                page_zip_fail(("page_zip_decompress_node_ptrs:"
 
2184
                               " %lu + %lu >= %lu, %lu\n",
 
2185
                               (ulong) page_zip_get_trailer_len(
 
2186
                                       page_zip, dict_index_is_clust(index),
 
2187
                                       NULL),
 
2188
                               (ulong) page_zip->m_end,
 
2189
                               (ulong) page_zip_get_size(page_zip),
 
2190
                               (ulong) dict_index_is_clust(index)));
 
2191
                return(FALSE);
 
2192
        }
 
2193
 
 
2194
        /* Restore the uncompressed columns in heap_no order. */
 
2195
        storage = page_zip->data + page_zip_get_size(page_zip)
 
2196
                - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
 
2197
 
 
2198
        for (slot = 0; slot < n_dense; slot++) {
 
2199
                rec_t*          rec     = recs[slot];
 
2200
 
 
2201
                offsets = rec_get_offsets(rec, index, offsets,
 
2202
                                          ULINT_UNDEFINED, &heap);
 
2203
                /* Non-leaf nodes should not have any externally
 
2204
                stored columns. */
 
2205
                ut_ad(!rec_offs_any_extern(offsets));
 
2206
                storage -= REC_NODE_PTR_SIZE;
 
2207
 
 
2208
                memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
 
2209
                       storage, REC_NODE_PTR_SIZE);
 
2210
        }
 
2211
 
 
2212
        return(TRUE);
 
2213
}
 
2214
 
 
2215
/**************************************************************************
 
2216
Decompress the records of a leaf node of a secondary index. */
 
2217
static
 
2218
ibool
 
2219
page_zip_decompress_sec(
 
2220
/*====================*/
 
2221
                                        /* out: TRUE on success,
 
2222
                                        FALSE on failure */
 
2223
        page_zip_des_t* page_zip,       /* in/out: compressed page */
 
2224
        z_stream*       d_stream,       /* in/out: compressed page stream */
 
2225
        rec_t**         recs,           /* in: dense page directory
 
2226
                                        sorted by address */
 
2227
        ulint           n_dense,        /* in: size of recs[] */
 
2228
        dict_index_t*   index,          /* in: the index of the page */
 
2229
        ulint*          offsets)        /* in/out: temporary offsets */
 
2230
{
 
2231
        ulint   heap_status     = REC_STATUS_ORDINARY
 
2232
                | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
 
2233
        ulint   slot;
 
2234
 
 
2235
        ut_a(!dict_index_is_clust(index));
 
2236
 
 
2237
        /* Subtract the space reserved for uncompressed data. */
 
2238
        d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
 
2239
 
 
2240
        for (slot = 0; slot < n_dense; slot++) {
 
2241
                rec_t*  rec = recs[slot];
 
2242
 
 
2243
                /* Decompress everything up to this record. */
 
2244
                d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
 
2245
                        - d_stream->next_out;
 
2246
 
 
2247
                if (UNIV_LIKELY(d_stream->avail_out)) {
 
2248
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 
2249
                        case Z_STREAM_END:
 
2250
                                /* Apparently, n_dense has grown
 
2251
                                since the time the page was last compressed. */
 
2252
                                goto zlib_done;
 
2253
                        case Z_OK:
 
2254
                        case Z_BUF_ERROR:
 
2255
                                if (!d_stream->avail_out) {
 
2256
                                        break;
 
2257
                                }
 
2258
                                /* fall through */
 
2259
                        default:
 
2260
                                page_zip_fail(("page_zip_decompress_sec:"
 
2261
                                               " inflate(Z_SYNC_FLUSH)=%s\n",
 
2262
                                               d_stream->msg));
 
2263
                                goto zlib_error;
 
2264
                        }
 
2265
                }
 
2266
 
 
2267
                ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
 
2268
 
 
2269
                /* Skip the REC_N_NEW_EXTRA_BYTES. */
 
2270
 
 
2271
                d_stream->next_out = rec;
 
2272
 
 
2273
                /* Set heap_no and the status bits. */
 
2274
                mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
 
2275
                heap_status += 1 << REC_HEAP_NO_SHIFT;
 
2276
        }
 
2277
 
 
2278
        /* Decompress the data of the last record and any trailing garbage,
 
2279
        in case the last record was allocated from an originally longer space
 
2280
        on the free list. */
 
2281
        d_stream->avail_out = page_header_get_field(page_zip->data,
 
2282
                                                    PAGE_HEAP_TOP)
 
2283
                - page_offset(d_stream->next_out);
 
2284
        if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
 
2285
                          - PAGE_ZIP_START - PAGE_DIR)) {
 
2286
 
 
2287
                page_zip_fail(("page_zip_decompress_sec:"
 
2288
                               " avail_out = %u\n",
 
2289
                               d_stream->avail_out));
 
2290
                goto zlib_error;
 
2291
        }
 
2292
 
 
2293
        if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
 
2294
                page_zip_fail(("page_zip_decompress_sec:"
 
2295
                               " inflate(Z_FINISH)=%s\n",
 
2296
                               d_stream->msg));
 
2297
zlib_error:
 
2298
                inflateEnd(d_stream);
 
2299
                return(FALSE);
 
2300
        }
 
2301
 
 
2302
        /* Note that d_stream->avail_out > 0 may hold here
 
2303
        if the modification log is nonempty. */
 
2304
 
 
2305
zlib_done:
 
2306
        if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
 
2307
                ut_error;
 
2308
        }
 
2309
 
 
2310
        {
 
2311
                page_t* page = page_align(d_stream->next_out);
 
2312
 
 
2313
                /* Clear the unused heap space on the uncompressed page. */
 
2314
                memset(d_stream->next_out, 0,
 
2315
                       page_dir_get_nth_slot(page,
 
2316
                                             page_dir_get_n_slots(page) - 1)
 
2317
                       - d_stream->next_out);
 
2318
        }
 
2319
 
 
2320
#ifdef UNIV_DEBUG
 
2321
        page_zip->m_start = PAGE_DATA + d_stream->total_in;
 
2322
#endif /* UNIV_DEBUG */
 
2323
 
 
2324
        /* Apply the modification log. */
 
2325
        {
 
2326
                const byte*     mod_log_ptr;
 
2327
                mod_log_ptr = page_zip_apply_log(d_stream->next_in,
 
2328
                                                 d_stream->avail_in + 1,
 
2329
                                                 recs, n_dense,
 
2330
                                                 ULINT_UNDEFINED, heap_status,
 
2331
                                                 index, offsets);
 
2332
 
 
2333
                if (UNIV_UNLIKELY(!mod_log_ptr)) {
 
2334
                        return(FALSE);
 
2335
                }
 
2336
                page_zip->m_end = mod_log_ptr - page_zip->data;
 
2337
                page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
 
2338
        }
 
2339
 
 
2340
        if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
 
2341
                          + page_zip->m_end >= page_zip_get_size(page_zip))) {
 
2342
 
 
2343
                page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
 
2344
                               (ulong) page_zip_get_trailer_len(
 
2345
                                       page_zip, FALSE, NULL),
 
2346
                               (ulong) page_zip->m_end,
 
2347
                               (ulong) page_zip_get_size(page_zip)));
 
2348
                return(FALSE);
 
2349
        }
 
2350
 
 
2351
        /* There are no uncompressed columns on leaf pages of
 
2352
        secondary indexes. */
 
2353
 
 
2354
        return(TRUE);
 
2355
}
 
2356
 
 
2357
/**************************************************************************
 
2358
Decompress a record of a leaf node of a clustered index that contains
 
2359
externally stored columns. */
 
2360
static
 
2361
ibool
 
2362
page_zip_decompress_clust_ext(
 
2363
/*==========================*/
 
2364
                                        /* out: TRUE on success */
 
2365
        z_stream*       d_stream,       /* in/out: compressed page stream */
 
2366
        rec_t*          rec,            /* in/out: record */
 
2367
        const ulint*    offsets,        /* in: rec_get_offsets(rec) */
 
2368
        ulint           trx_id_col)     /* in: position of of DB_TRX_ID */
 
2369
{
 
2370
        ulint   i;
 
2371
 
 
2372
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
2373
                ulint   len;
 
2374
                byte*   dst;
 
2375
 
 
2376
                if (UNIV_UNLIKELY(i == trx_id_col)) {
 
2377
                        /* Skip trx_id and roll_ptr */
 
2378
                        dst = rec_get_nth_field(rec, offsets, i, &len);
 
2379
                        if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
 
2380
                                          + DATA_ROLL_PTR_LEN)) {
 
2381
 
 
2382
                                page_zip_fail(("page_zip_decompress_clust_ext:"
 
2383
                                               " len[%lu] = %lu\n",
 
2384
                                               (ulong) i, (ulong) len));
 
2385
                                return(FALSE);
 
2386
                        }
 
2387
 
 
2388
                        if (rec_offs_nth_extern(offsets, i)) {
 
2389
 
 
2390
                                page_zip_fail(("page_zip_decompress_clust_ext:"
 
2391
                                               " DB_TRX_ID at %lu is ext\n",
 
2392
                                               (ulong) i));
 
2393
                                return(FALSE);
 
2394
                        }
 
2395
 
 
2396
                        d_stream->avail_out = dst - d_stream->next_out;
 
2397
 
 
2398
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 
2399
                        case Z_STREAM_END:
 
2400
                        case Z_OK:
 
2401
                        case Z_BUF_ERROR:
 
2402
                                if (!d_stream->avail_out) {
 
2403
                                        break;
 
2404
                                }
 
2405
                                /* fall through */
 
2406
                        default:
 
2407
                                page_zip_fail(("page_zip_decompress_clust_ext:"
 
2408
                                               " 1 inflate(Z_SYNC_FLUSH)=%s\n",
 
2409
                                               d_stream->msg));
 
2410
                                return(FALSE);
 
2411
                        }
 
2412
 
 
2413
                        ut_ad(d_stream->next_out == dst);
 
2414
 
 
2415
                        /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
 
2416
                        avoid uninitialized bytes in case the record
 
2417
                        is affected by page_zip_apply_log(). */
 
2418
                        memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
2419
 
 
2420
                        d_stream->next_out += DATA_TRX_ID_LEN
 
2421
                                + DATA_ROLL_PTR_LEN;
 
2422
                } else if (rec_offs_nth_extern(offsets, i)) {
 
2423
                        dst = rec_get_nth_field(rec, offsets, i, &len);
 
2424
                        ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
 
2425
                        dst += len - BTR_EXTERN_FIELD_REF_SIZE;
 
2426
 
 
2427
                        d_stream->avail_out = dst - d_stream->next_out;
 
2428
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 
2429
                        case Z_STREAM_END:
 
2430
                        case Z_OK:
 
2431
                        case Z_BUF_ERROR:
 
2432
                                if (!d_stream->avail_out) {
 
2433
                                        break;
 
2434
                                }
 
2435
                                /* fall through */
 
2436
                        default:
 
2437
                                page_zip_fail(("page_zip_decompress_clust_ext:"
 
2438
                                               " 2 inflate(Z_SYNC_FLUSH)=%s\n",
 
2439
                                               d_stream->msg));
 
2440
                                return(FALSE);
 
2441
                        }
 
2442
 
 
2443
                        ut_ad(d_stream->next_out == dst);
 
2444
 
 
2445
                        /* Clear the BLOB pointer in case
 
2446
                        the record will be deleted and the
 
2447
                        space will not be reused.  Note that
 
2448
                        the final initialization of the BLOB
 
2449
                        pointers (copying from "externs"
 
2450
                        or clearing) will have to take place
 
2451
                        only after the page modification log
 
2452
                        has been applied.  Otherwise, we
 
2453
                        could end up with an uninitialized
 
2454
                        BLOB pointer when a record is deleted,
 
2455
                        reallocated and deleted. */
 
2456
                        memset(d_stream->next_out, 0,
 
2457
                               BTR_EXTERN_FIELD_REF_SIZE);
 
2458
                        d_stream->next_out
 
2459
                                += BTR_EXTERN_FIELD_REF_SIZE;
 
2460
                }
 
2461
        }
 
2462
 
 
2463
        return(TRUE);
 
2464
}
 
2465
 
 
2466
/**************************************************************************
 
2467
Compress the records of a leaf node of a clustered index. */
 
2468
static
 
2469
ibool
 
2470
page_zip_decompress_clust(
 
2471
/*======================*/
 
2472
                                        /* out: TRUE on success,
 
2473
                                        FALSE on failure */
 
2474
        page_zip_des_t* page_zip,       /* in/out: compressed page */
 
2475
        z_stream*       d_stream,       /* in/out: compressed page stream */
 
2476
        rec_t**         recs,           /* in: dense page directory
 
2477
                                        sorted by address */
 
2478
        ulint           n_dense,        /* in: size of recs[] */
 
2479
        dict_index_t*   index,          /* in: the index of the page */
 
2480
        ulint           trx_id_col,     /* index of the trx_id column */
 
2481
        ulint*          offsets,        /* in/out: temporary offsets */
 
2482
        mem_heap_t*     heap)           /* in: temporary memory heap */
 
2483
{
 
2484
        int             err;
 
2485
        ulint           slot;
 
2486
        ulint           heap_status     = REC_STATUS_ORDINARY
 
2487
                | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
 
2488
        const byte*     storage;
 
2489
        const byte*     externs;
 
2490
 
 
2491
        ut_a(dict_index_is_clust(index));
 
2492
 
 
2493
        /* Subtract the space reserved for uncompressed data. */
 
2494
        d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
 
2495
                                         + DATA_TRX_ID_LEN
 
2496
                                         + DATA_ROLL_PTR_LEN);
 
2497
 
 
2498
        /* Decompress the records in heap_no order. */
 
2499
        for (slot = 0; slot < n_dense; slot++) {
 
2500
                rec_t*  rec     = recs[slot];
 
2501
 
 
2502
                d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
 
2503
                        - d_stream->next_out;
 
2504
 
 
2505
                ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
 
2506
                      - PAGE_ZIP_START - PAGE_DIR);
 
2507
                err = inflate(d_stream, Z_SYNC_FLUSH);
 
2508
                switch (err) {
 
2509
                case Z_STREAM_END:
 
2510
                        /* Apparently, n_dense has grown
 
2511
                        since the time the page was last compressed. */
 
2512
                        goto zlib_done;
 
2513
                case Z_OK:
 
2514
                case Z_BUF_ERROR:
 
2515
                        if (UNIV_LIKELY(!d_stream->avail_out)) {
 
2516
                                break;
 
2517
                        }
 
2518
                        /* fall through */
 
2519
                default:
 
2520
                        page_zip_fail(("page_zip_decompress_clust:"
 
2521
                                       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
 
2522
                                       d_stream->msg));
 
2523
                        goto zlib_error;
 
2524
                }
 
2525
 
 
2526
                ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
 
2527
                /* Prepare to decompress the data bytes. */
 
2528
                d_stream->next_out = rec;
 
2529
                /* Set heap_no and the status bits. */
 
2530
                mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
 
2531
                heap_status += 1 << REC_HEAP_NO_SHIFT;
 
2532
 
 
2533
                /* Read the offsets. The status bits are needed here. */
 
2534
                offsets = rec_get_offsets(rec, index, offsets,
 
2535
                                          ULINT_UNDEFINED, &heap);
 
2536
 
 
2537
                /* This is a leaf page in a clustered index. */
 
2538
 
 
2539
                /* Check if there are any externally stored columns.
 
2540
                For each externally stored column, restore the
 
2541
                BTR_EXTERN_FIELD_REF separately. */
 
2542
 
 
2543
                if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
 
2544
                        if (UNIV_UNLIKELY
 
2545
                            (!page_zip_decompress_clust_ext(
 
2546
                                    d_stream, rec, offsets, trx_id_col))) {
 
2547
 
 
2548
                                goto zlib_error;
 
2549
                        }
 
2550
                } else {
 
2551
                        /* Skip trx_id and roll_ptr */
 
2552
                        ulint   len;
 
2553
                        byte*   dst = rec_get_nth_field(rec, offsets,
 
2554
                                                        trx_id_col, &len);
 
2555
                        if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
 
2556
                                          + DATA_ROLL_PTR_LEN)) {
 
2557
 
 
2558
                                page_zip_fail(("page_zip_decompress_clust:"
 
2559
                                               " len = %lu\n", (ulong) len));
 
2560
                                goto zlib_error;
 
2561
                        }
 
2562
 
 
2563
                        d_stream->avail_out = dst - d_stream->next_out;
 
2564
 
 
2565
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 
2566
                        case Z_STREAM_END:
 
2567
                        case Z_OK:
 
2568
                        case Z_BUF_ERROR:
 
2569
                                if (!d_stream->avail_out) {
 
2570
                                        break;
 
2571
                                }
 
2572
                                /* fall through */
 
2573
                        default:
 
2574
                                page_zip_fail(("page_zip_decompress_clust:"
 
2575
                                               " 2 inflate(Z_SYNC_FLUSH)=%s\n",
 
2576
                                               d_stream->msg));
 
2577
                                goto zlib_error;
 
2578
                        }
 
2579
 
 
2580
                        ut_ad(d_stream->next_out == dst);
 
2581
 
 
2582
                        /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
 
2583
                        avoid uninitialized bytes in case the record
 
2584
                        is affected by page_zip_apply_log(). */
 
2585
                        memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
2586
 
 
2587
                        d_stream->next_out += DATA_TRX_ID_LEN
 
2588
                                + DATA_ROLL_PTR_LEN;
 
2589
                }
 
2590
 
 
2591
                /* Decompress the last bytes of the record. */
 
2592
                d_stream->avail_out = rec_get_end(rec, offsets)
 
2593
                        - d_stream->next_out;
 
2594
 
 
2595
                switch (inflate(d_stream, Z_SYNC_FLUSH)) {
 
2596
                case Z_STREAM_END:
 
2597
                case Z_OK:
 
2598
                case Z_BUF_ERROR:
 
2599
                        if (!d_stream->avail_out) {
 
2600
                                break;
 
2601
                        }
 
2602
                        /* fall through */
 
2603
                default:
 
2604
                        page_zip_fail(("page_zip_decompress_clust:"
 
2605
                                       " 3 inflate(Z_SYNC_FLUSH)=%s\n",
 
2606
                                       d_stream->msg));
 
2607
                        goto zlib_error;
 
2608
                }
 
2609
        }
 
2610
 
 
2611
        /* Decompress any trailing garbage, in case the last record was
 
2612
        allocated from an originally longer space on the free list. */
 
2613
        d_stream->avail_out = page_header_get_field(page_zip->data,
 
2614
                                                    PAGE_HEAP_TOP)
 
2615
                - page_offset(d_stream->next_out);
 
2616
        if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
 
2617
                          - PAGE_ZIP_START - PAGE_DIR)) {
 
2618
 
 
2619
                page_zip_fail(("page_zip_decompress_clust:"
 
2620
                               " avail_out = %u\n",
 
2621
                               d_stream->avail_out));
 
2622
                goto zlib_error;
 
2623
        }
 
2624
 
 
2625
        if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
 
2626
                page_zip_fail(("page_zip_decompress_clust:"
 
2627
                               " inflate(Z_FINISH)=%s\n",
 
2628
                               d_stream->msg));
 
2629
zlib_error:
 
2630
                inflateEnd(d_stream);
 
2631
                return(FALSE);
 
2632
        }
 
2633
 
 
2634
        /* Note that d_stream->avail_out > 0 may hold here
 
2635
        if the modification log is nonempty. */
 
2636
 
 
2637
zlib_done:
 
2638
        if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
 
2639
                ut_error;
 
2640
        }
 
2641
 
 
2642
        {
 
2643
                page_t* page = page_align(d_stream->next_out);
 
2644
 
 
2645
                /* Clear the unused heap space on the uncompressed page. */
 
2646
                memset(d_stream->next_out, 0,
 
2647
                       page_dir_get_nth_slot(page,
 
2648
                                             page_dir_get_n_slots(page) - 1)
 
2649
                       - d_stream->next_out);
 
2650
        }
 
2651
 
 
2652
#ifdef UNIV_DEBUG
 
2653
        page_zip->m_start = PAGE_DATA + d_stream->total_in;
 
2654
#endif /* UNIV_DEBUG */
 
2655
 
 
2656
        /* Apply the modification log. */
 
2657
        {
 
2658
                const byte*     mod_log_ptr;
 
2659
                mod_log_ptr = page_zip_apply_log(d_stream->next_in,
 
2660
                                                 d_stream->avail_in + 1,
 
2661
                                                 recs, n_dense,
 
2662
                                                 trx_id_col, heap_status,
 
2663
                                                 index, offsets);
 
2664
 
 
2665
                if (UNIV_UNLIKELY(!mod_log_ptr)) {
 
2666
                        return(FALSE);
 
2667
                }
 
2668
                page_zip->m_end = mod_log_ptr - page_zip->data;
 
2669
                page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
 
2670
        }
 
2671
 
 
2672
        if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
 
2673
                          + page_zip->m_end >= page_zip_get_size(page_zip))) {
 
2674
 
 
2675
                page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
 
2676
                               (ulong) page_zip_get_trailer_len(
 
2677
                                       page_zip, TRUE, NULL),
 
2678
                               (ulong) page_zip->m_end,
 
2679
                               (ulong) page_zip_get_size(page_zip)));
 
2680
                return(FALSE);
 
2681
        }
 
2682
 
 
2683
        storage = page_zip->data + page_zip_get_size(page_zip)
 
2684
                - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
 
2685
 
 
2686
        externs = storage - n_dense
 
2687
                * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
2688
 
 
2689
        /* Restore the uncompressed columns in heap_no order. */
 
2690
 
 
2691
        for (slot = 0; slot < n_dense; slot++) {
 
2692
                ulint   i;
 
2693
                ulint   len;
 
2694
                byte*   dst;
 
2695
                rec_t*  rec     = recs[slot];
 
2696
                ibool   exists  = !page_zip_dir_find_free(
 
2697
                        page_zip, page_offset(rec));
 
2698
                offsets = rec_get_offsets(rec, index, offsets,
 
2699
                                          ULINT_UNDEFINED, &heap);
 
2700
 
 
2701
                dst = rec_get_nth_field(rec, offsets,
 
2702
                                        trx_id_col, &len);
 
2703
                ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
2704
                storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 
2705
                memcpy(dst, storage,
 
2706
                       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
2707
 
 
2708
                /* Check if there are any externally stored
 
2709
                columns in this record.  For each externally
 
2710
                stored column, restore or clear the
 
2711
                BTR_EXTERN_FIELD_REF. */
 
2712
                if (!rec_offs_any_extern(offsets)) {
 
2713
                        continue;
 
2714
                }
 
2715
 
 
2716
                for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
2717
                        if (!rec_offs_nth_extern(offsets, i)) {
 
2718
                                continue;
 
2719
                        }
 
2720
                        dst = rec_get_nth_field(rec, offsets, i, &len);
 
2721
 
 
2722
                        if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
 
2723
                                page_zip_fail(("page_zip_decompress_clust:"
 
2724
                                               " %lu < 20\n",
 
2725
                                               (ulong) len));
 
2726
                                return(FALSE);
 
2727
                        }
 
2728
 
 
2729
                        dst += len - BTR_EXTERN_FIELD_REF_SIZE;
 
2730
 
 
2731
                        if (UNIV_LIKELY(exists)) {
 
2732
                                /* Existing record:
 
2733
                                restore the BLOB pointer */
 
2734
                                externs -= BTR_EXTERN_FIELD_REF_SIZE;
 
2735
 
 
2736
                                if (UNIV_UNLIKELY
 
2737
                                    (externs < page_zip->data
 
2738
                                     + page_zip->m_end)) {
 
2739
                                        page_zip_fail(("page_zip_"
 
2740
                                                       "decompress_clust: "
 
2741
                                                       "%p < %p + %lu\n",
 
2742
                                                       (const void*) externs,
 
2743
                                                       (const void*)
 
2744
                                                       page_zip->data,
 
2745
                                                       (ulong)
 
2746
                                                       page_zip->m_end));
 
2747
                                        return(FALSE);
 
2748
                                }
 
2749
 
 
2750
                                memcpy(dst, externs,
 
2751
                                       BTR_EXTERN_FIELD_REF_SIZE);
 
2752
 
 
2753
                                page_zip->n_blobs++;
 
2754
                        } else {
 
2755
                                /* Deleted record:
 
2756
                                clear the BLOB pointer */
 
2757
                                memset(dst, 0,
 
2758
                                       BTR_EXTERN_FIELD_REF_SIZE);
 
2759
                        }
 
2760
                }
 
2761
        }
 
2762
 
 
2763
        return(TRUE);
 
2764
}
 
2765
 
 
2766
/**************************************************************************
 
2767
Decompress a page.  This function should tolerate errors on the compressed
 
2768
page.  Instead of letting assertions fail, it will return FALSE if an
 
2769
inconsistency is detected. */
 
2770
UNIV_INTERN
 
2771
ibool
 
2772
page_zip_decompress(
 
2773
/*================*/
 
2774
                                /* out: TRUE on success, FALSE on failure */
 
2775
        page_zip_des_t* page_zip,/* in: data, ssize;
 
2776
                                out: m_start, m_end, m_nonempty, n_blobs */
 
2777
        page_t*         page)   /* out: uncompressed page, may be trashed */
 
2778
{
 
2779
        z_stream        d_stream;
 
2780
        dict_index_t*   index   = NULL;
 
2781
        rec_t**         recs;   /* dense page directory, sorted by address */
 
2782
        ulint           n_dense;/* number of user records on the page */
 
2783
        ulint           trx_id_col = ULINT_UNDEFINED;
 
2784
        mem_heap_t*     heap;
 
2785
        ulint*          offsets;
 
2786
        ullint          usec = ut_time_us(NULL);
 
2787
 
 
2788
        ut_ad(page_zip_simple_validate(page_zip));
 
2789
        UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
 
2790
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
2791
 
 
2792
        /* The dense directory excludes the infimum and supremum records. */
 
2793
        n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
 
2794
        if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
 
2795
                          >= page_zip_get_size(page_zip))) {
 
2796
                page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
 
2797
                               (ulong) n_dense,
 
2798
                               (ulong) page_zip_get_size(page_zip)));
 
2799
                return(FALSE);
 
2800
        }
 
2801
 
 
2802
        heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
 
2803
        recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs));
 
2804
 
 
2805
#ifdef UNIV_ZIP_DEBUG
 
2806
        /* Clear the page. */
 
2807
        memset(page, 0x55, UNIV_PAGE_SIZE);
 
2808
#endif /* UNIV_ZIP_DEBUG */
 
2809
        UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE);
 
2810
        /* Copy the page header. */
 
2811
        memcpy(page, page_zip->data, PAGE_DATA);
 
2812
 
 
2813
        /* Copy the page directory. */
 
2814
        if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
 
2815
                                               recs + n_dense, n_dense))) {
 
2816
zlib_error:
 
2817
                mem_heap_free(heap);
 
2818
                return(FALSE);
 
2819
        }
 
2820
 
 
2821
        /* Copy the infimum and supremum records. */
 
2822
        memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
 
2823
               infimum_extra, sizeof infimum_extra);
 
2824
        if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
 
2825
                rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
 
2826
                                      PAGE_NEW_SUPREMUM);
 
2827
        } else {
 
2828
                rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
 
2829
                                      page_zip_dir_get(page_zip, 0)
 
2830
                                      & PAGE_ZIP_DIR_SLOT_MASK);
 
2831
        }
 
2832
        memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
 
2833
        memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
 
2834
               supremum_extra_data, sizeof supremum_extra_data);
 
2835
 
 
2836
        page_zip_set_alloc(&d_stream, heap);
 
2837
 
 
2838
        if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
 
2839
                          != Z_OK)) {
 
2840
                ut_error;
 
2841
        }
 
2842
 
 
2843
        d_stream.next_in = page_zip->data + PAGE_DATA;
 
2844
        /* Subtract the space reserved for
 
2845
        the page header and the end marker of the modification log. */
 
2846
        d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
 
2847
 
 
2848
        d_stream.next_out = page + PAGE_ZIP_START;
 
2849
        d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
 
2850
 
 
2851
        /* Decode the zlib header and the index information. */
 
2852
        if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
 
2853
 
 
2854
                page_zip_fail(("page_zip_decompress:"
 
2855
                               " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
 
2856
                goto zlib_error;
 
2857
        }
 
2858
 
 
2859
        if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
 
2860
 
 
2861
                page_zip_fail(("page_zip_decompress:"
 
2862
                               " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
 
2863
                goto zlib_error;
 
2864
        }
 
2865
 
 
2866
        index = page_zip_fields_decode(
 
2867
                page + PAGE_ZIP_START, d_stream.next_out,
 
2868
                page_is_leaf(page) ? &trx_id_col : NULL);
 
2869
 
 
2870
        if (UNIV_UNLIKELY(!index)) {
 
2871
 
 
2872
                goto zlib_error;
 
2873
        }
 
2874
 
 
2875
        /* Decompress the user records. */
 
2876
        page_zip->n_blobs = 0;
 
2877
        d_stream.next_out = page + PAGE_ZIP_START;
 
2878
 
 
2879
        {
 
2880
                /* Pre-allocate the offsets for rec_get_offsets_reverse(). */
 
2881
                ulint   n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
 
2882
                        + dict_index_get_n_fields(index);
 
2883
                offsets = mem_heap_alloc(heap, n * sizeof(ulint));
 
2884
                *offsets = n;
 
2885
        }
 
2886
 
 
2887
        /* Decompress the records in heap_no order. */
 
2888
        if (!page_is_leaf(page)) {
 
2889
                /* This is a node pointer page. */
 
2890
                ulint   info_bits;
 
2891
 
 
2892
                if (UNIV_UNLIKELY
 
2893
                    (!page_zip_decompress_node_ptrs(page_zip, &d_stream,
 
2894
                                                    recs, n_dense, index,
 
2895
                                                    offsets, heap))) {
 
2896
                        goto err_exit;
 
2897
                }
 
2898
 
 
2899
                info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
 
2900
                        ? REC_INFO_MIN_REC_FLAG : 0;
 
2901
 
 
2902
                if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
 
2903
                                                            info_bits))) {
 
2904
                        goto err_exit;
 
2905
                }
 
2906
        } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
 
2907
                /* This is a leaf page in a secondary index. */
 
2908
                if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
 
2909
                                                           recs, n_dense,
 
2910
                                                           index, offsets))) {
 
2911
                        goto err_exit;
 
2912
                }
 
2913
 
 
2914
                if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
 
2915
                                                            page, 0))) {
 
2916
err_exit:
 
2917
                        page_zip_fields_free(index);
 
2918
                        mem_heap_free(heap);
 
2919
                        return(FALSE);
 
2920
                }
 
2921
        } else {
 
2922
                /* This is a leaf page in a clustered index. */
 
2923
                if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
 
2924
                                                             &d_stream, recs,
 
2925
                                                             n_dense, index,
 
2926
                                                             trx_id_col,
 
2927
                                                             offsets, heap))) {
 
2928
                        goto err_exit;
 
2929
                }
 
2930
 
 
2931
                if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
 
2932
                                                            page, 0))) {
 
2933
                        goto err_exit;
 
2934
                }
 
2935
        }
 
2936
 
 
2937
        ut_a(page_is_comp(page));
 
2938
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
 
2939
 
 
2940
        page_zip_fields_free(index);
 
2941
        mem_heap_free(heap);
 
2942
        {
 
2943
                page_zip_stat_t*        zip_stat
 
2944
                        = &page_zip_stat[page_zip->ssize - 1];
 
2945
                zip_stat->decompressed++;
 
2946
                zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
 
2947
        }
 
2948
 
 
2949
        /* Update the stat counter for LRU policy. */
 
2950
        buf_LRU_stat_inc_unzip();
 
2951
 
 
2952
        return(TRUE);
 
2953
}
 
2954
 
 
2955
#ifdef UNIV_ZIP_DEBUG
 
2956
/**************************************************************************
 
2957
Dump a block of memory on the standard error stream. */
 
2958
static
 
2959
void
 
2960
page_zip_hexdump_func(
 
2961
/*==================*/
 
2962
        const char*     name,   /* in: name of the data structure */
 
2963
        const void*     buf,    /* in: data */
 
2964
        ulint           size)   /* in: length of the data, in bytes */
 
2965
{
 
2966
        const byte*     s       = buf;
 
2967
        ulint           addr;
 
2968
        const ulint     width   = 32; /* bytes per line */
 
2969
 
 
2970
        fprintf(stderr, "%s:\n", name);
 
2971
 
 
2972
        for (addr = 0; addr < size; addr += width) {
 
2973
                ulint   i;
 
2974
 
 
2975
                fprintf(stderr, "%04lx ", (ulong) addr);
 
2976
 
 
2977
                i = ut_min(width, size - addr);
 
2978
 
 
2979
                while (i--) {
 
2980
                        fprintf(stderr, "%02x", *s++);
 
2981
                }
 
2982
 
 
2983
                putc('\n', stderr);
 
2984
        }
 
2985
}
 
2986
 
 
2987
#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
 
2988
 
 
2989
/* Flag: make page_zip_validate() compare page headers only */
 
2990
UNIV_INTERN ibool       page_zip_validate_header_only = FALSE;
 
2991
 
 
2992
/**************************************************************************
 
2993
Check that the compressed and decompressed pages match. */
 
2994
UNIV_INTERN
 
2995
ibool
 
2996
page_zip_validate_low(
 
2997
/*==================*/
 
2998
                                        /* out: TRUE if valid, FALSE if not */
 
2999
        const page_zip_des_t*   page_zip,/* in: compressed page */
 
3000
        const page_t*           page,   /* in: uncompressed page */
 
3001
        ibool                   sloppy) /* in: FALSE=strict,
 
3002
                                        TRUE=ignore the MIN_REC_FLAG */
 
3003
{
 
3004
        page_zip_des_t  temp_page_zip;
 
3005
        byte*           temp_page_buf;
 
3006
        page_t*         temp_page;
 
3007
        ibool           valid;
 
3008
 
 
3009
        if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
 
3010
                   FIL_PAGE_LSN - FIL_PAGE_PREV)
 
3011
            || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
 
3012
            || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
 
3013
                      PAGE_DATA - FIL_PAGE_DATA)) {
 
3014
                page_zip_fail(("page_zip_validate: page header\n"));
 
3015
                page_zip_hexdump(page_zip, sizeof *page_zip);
 
3016
                page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
 
3017
                page_zip_hexdump(page, UNIV_PAGE_SIZE);
 
3018
                return(FALSE);
 
3019
        }
 
3020
 
 
3021
        ut_a(page_is_comp(page));
 
3022
 
 
3023
        if (page_zip_validate_header_only) {
 
3024
                return(TRUE);
 
3025
        }
 
3026
 
 
3027
        /* page_zip_decompress() expects the uncompressed page to be
 
3028
        UNIV_PAGE_SIZE aligned. */
 
3029
        temp_page_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
 
3030
        temp_page = ut_align(temp_page_buf, UNIV_PAGE_SIZE);
 
3031
 
 
3032
#ifdef UNIV_DEBUG_VALGRIND
 
3033
        /* Get detailed information on the valid bits in case the
 
3034
        UNIV_MEM_ASSERT_RW() checks fail.  The v-bits of page[],
 
3035
        page_zip->data[] or page_zip could be viewed at temp_page[] or
 
3036
        temp_page_zip in a debugger when running valgrind --db-attach. */
 
3037
        VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
 
3038
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
 
3039
        VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
 
3040
        UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
 
3041
        VALGRIND_GET_VBITS(page_zip->data, temp_page,
 
3042
                           page_zip_get_size(page_zip));
 
3043
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
3044
#endif /* UNIV_DEBUG_VALGRIND */
 
3045
 
 
3046
        temp_page_zip = *page_zip;
 
3047
        valid = page_zip_decompress(&temp_page_zip, temp_page);
 
3048
        if (!valid) {
 
3049
                fputs("page_zip_validate(): failed to decompress\n", stderr);
 
3050
                goto func_exit;
 
3051
        }
 
3052
        if (page_zip->n_blobs != temp_page_zip.n_blobs) {
 
3053
                page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
 
3054
                               page_zip->n_blobs, temp_page_zip.n_blobs));
 
3055
                valid = FALSE;
 
3056
        }
 
3057
#ifdef UNIV_DEBUG
 
3058
        if (page_zip->m_start != temp_page_zip.m_start) {
 
3059
                page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
 
3060
                               page_zip->m_start, temp_page_zip.m_start));
 
3061
                valid = FALSE;
 
3062
        }
 
3063
#endif /* UNIV_DEBUG */
 
3064
        if (page_zip->m_end != temp_page_zip.m_end) {
 
3065
                page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
 
3066
                               page_zip->m_end, temp_page_zip.m_end));
 
3067
                valid = FALSE;
 
3068
        }
 
3069
        if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
 
3070
                page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
 
3071
                               page_zip->m_nonempty,
 
3072
                               temp_page_zip.m_nonempty));
 
3073
                valid = FALSE;
 
3074
        }
 
3075
        if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
 
3076
                   UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
 
3077
 
 
3078
                /* In crash recovery, the "minimum record" flag may be
 
3079
                set incorrectly until the mini-transaction is
 
3080
                committed.  Let us tolerate that difference when we
 
3081
                are performing a sloppy validation. */
 
3082
 
 
3083
                if (sloppy) {
 
3084
                        byte    info_bits_diff;
 
3085
                        ulint   offset
 
3086
                                = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
 
3087
                                                    TRUE);
 
3088
                        ut_a(offset >= PAGE_NEW_SUPREMUM);
 
3089
                        offset -= 5 /* REC_NEW_INFO_BITS */;
 
3090
 
 
3091
                        info_bits_diff = page[offset] ^ temp_page[offset];
 
3092
 
 
3093
                        if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
 
3094
                                temp_page[offset] = page[offset];
 
3095
 
 
3096
                                if (!memcmp(page + PAGE_HEADER,
 
3097
                                            temp_page + PAGE_HEADER,
 
3098
                                            UNIV_PAGE_SIZE - PAGE_HEADER
 
3099
                                            - FIL_PAGE_DATA_END)) {
 
3100
 
 
3101
                                        /* Only the minimum record flag
 
3102
                                        differed.  Let us ignore it. */
 
3103
                                        page_zip_fail(("page_zip_validate: "
 
3104
                                                       "min_rec_flag "
 
3105
                                                       "(ignored, "
 
3106
                                                       "%lu,%lu,0x%02lx)\n",
 
3107
                                                       page_get_space_id(page),
 
3108
                                                       page_get_page_no(page),
 
3109
                                                       (ulong) page[offset]));
 
3110
                                        goto func_exit;
 
3111
                                }
 
3112
                        }
 
3113
                }
 
3114
                page_zip_fail(("page_zip_validate: content\n"));
 
3115
                valid = FALSE;
 
3116
        }
 
3117
 
 
3118
func_exit:
 
3119
        if (!valid) {
 
3120
                page_zip_hexdump(page_zip, sizeof *page_zip);
 
3121
                page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
 
3122
                page_zip_hexdump(page, UNIV_PAGE_SIZE);
 
3123
                page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
 
3124
        }
 
3125
        ut_free(temp_page_buf);
 
3126
        return(valid);
 
3127
}
 
3128
 
 
3129
/**************************************************************************
 
3130
Check that the compressed and decompressed pages match. */
 
3131
UNIV_INTERN
 
3132
ibool
 
3133
page_zip_validate(
 
3134
/*==============*/
 
3135
                                        /* out: TRUE if valid, FALSE if not */
 
3136
        const page_zip_des_t*   page_zip,/* in: compressed page */
 
3137
        const page_t*           page)   /* in: uncompressed page */
 
3138
{
 
3139
        return(page_zip_validate_low(page_zip, page,
 
3140
                                     recv_recovery_is_on()));
 
3141
}
 
3142
#endif /* UNIV_ZIP_DEBUG */
 
3143
 
 
3144
#ifdef UNIV_DEBUG
 
3145
static
 
3146
ibool
 
3147
page_zip_header_cmp(
 
3148
/*================*/
 
3149
                                        /* out: TRUE */
 
3150
        const page_zip_des_t*   page_zip,/* in: compressed page */
 
3151
        const byte*             page)   /* in: uncompressed page */
 
3152
{
 
3153
        ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
 
3154
                      FIL_PAGE_LSN - FIL_PAGE_PREV));
 
3155
        ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
 
3156
                      2));
 
3157
        ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
 
3158
                      PAGE_DATA - FIL_PAGE_DATA));
 
3159
 
 
3160
        return(TRUE);
 
3161
}
 
3162
#endif /* UNIV_DEBUG */
 
3163
 
 
3164
/**************************************************************************
 
3165
Write a record on the compressed page that contains externally stored
 
3166
columns.  The data must already have been written to the uncompressed page. */
 
3167
static
 
3168
byte*
 
3169
page_zip_write_rec_ext(
 
3170
/*===================*/
 
3171
                                        /* out: end of modification log */
 
3172
        page_zip_des_t* page_zip,       /* in/out: compressed page */
 
3173
        const page_t*   page,           /* in: page containing rec */
 
3174
        const byte*     rec,            /* in: record being written */
 
3175
        dict_index_t*   index,          /* in: record descriptor */
 
3176
        const ulint*    offsets,        /* in: rec_get_offsets(rec, index) */
 
3177
        ulint           create,         /* in: nonzero=insert, zero=update */
 
3178
        ulint           trx_id_col,     /* in: position of DB_TRX_ID */
 
3179
        ulint           heap_no,        /* in: heap number of rec */
 
3180
        byte*           storage,        /* in: end of dense page directory */
 
3181
        byte*           data)           /* in: end of modification log */
 
3182
{
 
3183
        const byte*     start   = rec;
 
3184
        ulint           i;
 
3185
        ulint           len;
 
3186
        byte*           externs = storage;
 
3187
        ulint           n_ext   = rec_offs_n_extern(offsets);
 
3188
 
 
3189
        ut_ad(rec_offs_validate(rec, index, offsets));
 
3190
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
3191
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
3192
                           rec_offs_extra_size(offsets));
 
3193
 
 
3194
        externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 
3195
                * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
 
3196
 
 
3197
        /* Note that this will not take into account
 
3198
        the BLOB columns of rec if create==TRUE. */
 
3199
        ut_ad(data + rec_offs_data_size(offsets)
 
3200
              - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 
3201
              - n_ext * BTR_EXTERN_FIELD_REF_SIZE
 
3202
              < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
 
3203
 
 
3204
        {
 
3205
                ulint   blob_no = page_zip_get_n_prev_extern(
 
3206
                        page_zip, rec, index);
 
3207
                byte*   ext_end = externs - page_zip->n_blobs
 
3208
                        * BTR_EXTERN_FIELD_REF_SIZE;
 
3209
                ut_ad(blob_no <= page_zip->n_blobs);
 
3210
                externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
 
3211
 
 
3212
                if (create) {
 
3213
                        page_zip->n_blobs += n_ext;
 
3214
                        ASSERT_ZERO_BLOB(ext_end - n_ext
 
3215
                                         * BTR_EXTERN_FIELD_REF_SIZE);
 
3216
                        memmove(ext_end - n_ext
 
3217
                                * BTR_EXTERN_FIELD_REF_SIZE,
 
3218
                                ext_end,
 
3219
                                externs - ext_end);
 
3220
                }
 
3221
 
 
3222
                ut_a(blob_no + n_ext <= page_zip->n_blobs);
 
3223
        }
 
3224
 
 
3225
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
 
3226
                const byte*     src;
 
3227
 
 
3228
                if (UNIV_UNLIKELY(i == trx_id_col)) {
 
3229
                        ut_ad(!rec_offs_nth_extern(offsets,
 
3230
                                                   i));
 
3231
                        ut_ad(!rec_offs_nth_extern(offsets,
 
3232
                                                   i + 1));
 
3233
                        /* Locate trx_id and roll_ptr. */
 
3234
                        src = rec_get_nth_field(rec, offsets,
 
3235
                                                i, &len);
 
3236
                        ut_ad(len == DATA_TRX_ID_LEN);
 
3237
                        ut_ad(src + DATA_TRX_ID_LEN
 
3238
                              == rec_get_nth_field(
 
3239
                                      rec, offsets,
 
3240
                                      i + 1, &len));
 
3241
                        ut_ad(len == DATA_ROLL_PTR_LEN);
 
3242
 
 
3243
                        /* Log the preceding fields. */
 
3244
                        ASSERT_ZERO(data, src - start);
 
3245
                        memcpy(data, start, src - start);
 
3246
                        data += src - start;
 
3247
                        start = src + (DATA_TRX_ID_LEN
 
3248
                                       + DATA_ROLL_PTR_LEN);
 
3249
 
 
3250
                        /* Store trx_id and roll_ptr. */
 
3251
                        memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 
3252
                               * (heap_no - 1),
 
3253
                               src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
3254
                        i++; /* skip also roll_ptr */
 
3255
                } else if (rec_offs_nth_extern(offsets, i)) {
 
3256
                        src = rec_get_nth_field(rec, offsets,
 
3257
                                                i, &len);
 
3258
 
 
3259
                        ut_ad(dict_index_is_clust(index));
 
3260
                        ut_ad(len
 
3261
                              >= BTR_EXTERN_FIELD_REF_SIZE);
 
3262
                        src += len - BTR_EXTERN_FIELD_REF_SIZE;
 
3263
 
 
3264
                        ASSERT_ZERO(data, src - start);
 
3265
                        memcpy(data, start, src - start);
 
3266
                        data += src - start;
 
3267
                        start = src + BTR_EXTERN_FIELD_REF_SIZE;
 
3268
 
 
3269
                        /* Store the BLOB pointer. */
 
3270
                        externs -= BTR_EXTERN_FIELD_REF_SIZE;
 
3271
                        ut_ad(data < externs);
 
3272
                        memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
 
3273
                }
 
3274
        }
 
3275
 
 
3276
        /* Log the last bytes of the record. */
 
3277
        len = rec_offs_data_size(offsets) - (start - rec);
 
3278
 
 
3279
        ASSERT_ZERO(data, len);
 
3280
        memcpy(data, start, len);
 
3281
        data += len;
 
3282
 
 
3283
        return(data);
 
3284
}
 
3285
 
 
3286
/**************************************************************************
 
3287
Write an entire record on the compressed page.  The data must already
 
3288
have been written to the uncompressed page. */
 
3289
UNIV_INTERN
 
3290
void
 
3291
page_zip_write_rec(
 
3292
/*===============*/
 
3293
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
3294
        const byte*     rec,    /* in: record being written */
 
3295
        dict_index_t*   index,  /* in: the index the record belongs to */
 
3296
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
 
3297
        ulint           create) /* in: nonzero=insert, zero=update */
 
3298
{
 
3299
        const page_t*   page;
 
3300
        byte*           data;
 
3301
        byte*           storage;
 
3302
        ulint           heap_no;
 
3303
        byte*           slot;
 
3304
 
 
3305
        ut_ad(buf_frame_get_page_zip(rec) == page_zip);
 
3306
        ut_ad(page_zip_simple_validate(page_zip));
 
3307
        ut_ad(page_zip_get_size(page_zip)
 
3308
              > PAGE_DATA + page_zip_dir_size(page_zip));
 
3309
        ut_ad(rec_offs_comp(offsets));
 
3310
        ut_ad(rec_offs_validate(rec, index, offsets));
 
3311
 
 
3312
        ut_ad(page_zip->m_start >= PAGE_DATA);
 
3313
 
 
3314
        page = page_align(rec);
 
3315
 
 
3316
        ut_ad(page_zip_header_cmp(page_zip, page));
 
3317
        ut_ad(page_simple_validate_new((page_t*) page));
 
3318
 
 
3319
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
3320
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
3321
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
3322
                           rec_offs_extra_size(offsets));
 
3323
 
 
3324
        slot = page_zip_dir_find(page_zip, page_offset(rec));
 
3325
        ut_a(slot);
 
3326
        /* Copy the delete mark. */
 
3327
        if (rec_get_deleted_flag(rec, TRUE)) {
 
3328
                *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
 
3329
        } else {
 
3330
                *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
 
3331
        }
 
3332
 
 
3333
        ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
 
3334
        ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
 
3335
              - PAGE_DIR - PAGE_DIR_SLOT_SIZE
 
3336
              * page_dir_get_n_slots(page));
 
3337
 
 
3338
        heap_no = rec_get_heap_no_new(rec);
 
3339
        ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
 
3340
        ut_ad(heap_no < page_dir_get_n_heap(page));
 
3341
 
 
3342
        /* Append to the modification log. */
 
3343
        data = page_zip->data + page_zip->m_end;
 
3344
        ut_ad(!*data);
 
3345
 
 
3346
        /* Identify the record by writing its heap number - 1.
 
3347
        0 is reserved to indicate the end of the modification log. */
 
3348
 
 
3349
        if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
 
3350
                *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
 
3351
                ut_ad(!*data);
 
3352
        }
 
3353
        *data++ = (byte) ((heap_no - 1) << 1);
 
3354
        ut_ad(!*data);
 
3355
 
 
3356
        {
 
3357
                const byte*     start   = rec - rec_offs_extra_size(offsets);
 
3358
                const byte*     b       = rec - REC_N_NEW_EXTRA_BYTES;
 
3359
 
 
3360
                /* Write the extra bytes backwards, so that
 
3361
                rec_offs_extra_size() can be easily computed in
 
3362
                page_zip_apply_log() by invoking
 
3363
                rec_get_offsets_reverse(). */
 
3364
 
 
3365
                while (b != start) {
 
3366
                        *data++ = *--b;
 
3367
                        ut_ad(!*data);
 
3368
                }
 
3369
        }
 
3370
 
 
3371
        /* Write the data bytes.  Store the uncompressed bytes separately. */
 
3372
        storage = page_zip->data + page_zip_get_size(page_zip)
 
3373
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
 
3374
                * PAGE_ZIP_DIR_SLOT_SIZE;
 
3375
 
 
3376
        if (page_is_leaf(page)) {
 
3377
                ulint           len;
 
3378
 
 
3379
                if (dict_index_is_clust(index)) {
 
3380
                        ulint           trx_id_col;
 
3381
 
 
3382
                        trx_id_col = dict_index_get_sys_col_pos(index,
 
3383
                                                                DATA_TRX_ID);
 
3384
                        ut_ad(trx_id_col != ULINT_UNDEFINED);
 
3385
 
 
3386
                        /* Store separately trx_id, roll_ptr and
 
3387
                        the BTR_EXTERN_FIELD_REF of each BLOB column. */
 
3388
                        if (rec_offs_any_extern(offsets)) {
 
3389
                                data = page_zip_write_rec_ext(
 
3390
                                        page_zip, page,
 
3391
                                        rec, index, offsets, create,
 
3392
                                        trx_id_col, heap_no, storage, data);
 
3393
                        } else {
 
3394
                                /* Locate trx_id and roll_ptr. */
 
3395
                                const byte*     src
 
3396
                                        = rec_get_nth_field(rec, offsets,
 
3397
                                                            trx_id_col, &len);
 
3398
                                ut_ad(len == DATA_TRX_ID_LEN);
 
3399
                                ut_ad(src + DATA_TRX_ID_LEN
 
3400
                                      == rec_get_nth_field(
 
3401
                                              rec, offsets,
 
3402
                                              trx_id_col + 1, &len));
 
3403
                                ut_ad(len == DATA_ROLL_PTR_LEN);
 
3404
 
 
3405
                                /* Log the preceding fields. */
 
3406
                                ASSERT_ZERO(data, src - rec);
 
3407
                                memcpy(data, rec, src - rec);
 
3408
                                data += src - rec;
 
3409
 
 
3410
                                /* Store trx_id and roll_ptr. */
 
3411
                                memcpy(storage
 
3412
                                       - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
 
3413
                                       * (heap_no - 1),
 
3414
                                       src,
 
3415
                                       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
3416
 
 
3417
                                src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
 
3418
 
 
3419
                                /* Log the last bytes of the record. */
 
3420
                                len = rec_offs_data_size(offsets)
 
3421
                                        - (src - rec);
 
3422
 
 
3423
                                ASSERT_ZERO(data, len);
 
3424
                                memcpy(data, src, len);
 
3425
                                data += len;
 
3426
                        }
 
3427
                } else {
 
3428
                        /* Leaf page of a secondary index:
 
3429
                        no externally stored columns */
 
3430
                        ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
 
3431
                              == ULINT_UNDEFINED);
 
3432
                        ut_ad(!rec_offs_any_extern(offsets));
 
3433
 
 
3434
                        /* Log the entire record. */
 
3435
                        len = rec_offs_data_size(offsets);
 
3436
 
 
3437
                        ASSERT_ZERO(data, len);
 
3438
                        memcpy(data, rec, len);
 
3439
                        data += len;
 
3440
                }
 
3441
        } else {
 
3442
                /* This is a node pointer page. */
 
3443
                ulint   len;
 
3444
 
 
3445
                /* Non-leaf nodes should not have any externally
 
3446
                stored columns. */
 
3447
                ut_ad(!rec_offs_any_extern(offsets));
 
3448
 
 
3449
                /* Copy the data bytes, except node_ptr. */
 
3450
                len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
 
3451
                ut_ad(data + len < storage - REC_NODE_PTR_SIZE
 
3452
                      * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
 
3453
                ASSERT_ZERO(data, len);
 
3454
                memcpy(data, rec, len);
 
3455
                data += len;
 
3456
 
 
3457
                /* Copy the node pointer to the uncompressed area. */
 
3458
                memcpy(storage - REC_NODE_PTR_SIZE
 
3459
                       * (heap_no - 1),
 
3460
                       rec + len,
 
3461
                       REC_NODE_PTR_SIZE);
 
3462
        }
 
3463
 
 
3464
        ut_a(!*data);
 
3465
        ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
 
3466
        page_zip->m_end = data - page_zip->data;
 
3467
        page_zip->m_nonempty = TRUE;
 
3468
 
 
3469
#ifdef UNIV_ZIP_DEBUG
 
3470
        ut_a(page_zip_validate(page_zip, page_align(rec)));
 
3471
#endif /* UNIV_ZIP_DEBUG */
 
3472
}
 
3473
 
 
3474
/***************************************************************
 
3475
Parses a log record of writing a BLOB pointer of a record. */
 
3476
UNIV_INTERN
 
3477
byte*
 
3478
page_zip_parse_write_blob_ptr(
 
3479
/*==========================*/
 
3480
                                /* out: end of log record or NULL */
 
3481
        byte*           ptr,    /* in: redo log buffer */
 
3482
        byte*           end_ptr,/* in: redo log buffer end */
 
3483
        page_t*         page,   /* in/out: uncompressed page */
 
3484
        page_zip_des_t* page_zip)/* in/out: compressed page */
 
3485
{
 
3486
        ulint   offset;
 
3487
        ulint   z_offset;
 
3488
 
 
3489
        ut_ad(!page == !page_zip);
 
3490
 
 
3491
        if (UNIV_UNLIKELY
 
3492
            (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
 
3493
 
 
3494
                return(NULL);
 
3495
        }
 
3496
 
 
3497
        offset = mach_read_from_2(ptr);
 
3498
        z_offset = mach_read_from_2(ptr + 2);
 
3499
 
 
3500
        if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
 
3501
            || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
 
3502
            || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
 
3503
corrupt:
 
3504
                recv_sys->found_corrupt_log = TRUE;
 
3505
 
 
3506
                return(NULL);
 
3507
        }
 
3508
 
 
3509
        if (page) {
 
3510
                if (UNIV_UNLIKELY(!page_zip)
 
3511
                    || UNIV_UNLIKELY(!page_is_leaf(page))) {
 
3512
 
 
3513
                        goto corrupt;
 
3514
                }
 
3515
 
 
3516
#ifdef UNIV_ZIP_DEBUG
 
3517
                ut_a(page_zip_validate(page_zip, page));
 
3518
#endif /* UNIV_ZIP_DEBUG */
 
3519
 
 
3520
                memcpy(page + offset,
 
3521
                       ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
 
3522
                memcpy(page_zip->data + z_offset,
 
3523
                       ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
 
3524
 
 
3525
#ifdef UNIV_ZIP_DEBUG
 
3526
                ut_a(page_zip_validate(page_zip, page));
 
3527
#endif /* UNIV_ZIP_DEBUG */
 
3528
        }
 
3529
 
 
3530
        return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
 
3531
}
 
3532
 
 
3533
/**************************************************************************
 
3534
Write a BLOB pointer of a record on the leaf page of a clustered index.
 
3535
The information must already have been updated on the uncompressed page. */
 
3536
UNIV_INTERN
 
3537
void
 
3538
page_zip_write_blob_ptr(
 
3539
/*====================*/
 
3540
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
3541
        const byte*     rec,    /* in/out: record whose data is being
 
3542
                                written */
 
3543
        dict_index_t*   index,  /* in: index of the page */
 
3544
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
 
3545
        ulint           n,      /* in: column index */
 
3546
        mtr_t*          mtr)    /* in: mini-transaction handle,
 
3547
                                or NULL if no logging is needed */
 
3548
{
 
3549
        const byte*     field;
 
3550
        byte*           externs;
 
3551
        const page_t*   page    = page_align(rec);
 
3552
        ulint           blob_no;
 
3553
        ulint           len;
 
3554
 
 
3555
        ut_ad(buf_frame_get_page_zip(rec) == page_zip);
 
3556
        ut_ad(page_simple_validate_new((page_t*) page));
 
3557
        ut_ad(page_zip_simple_validate(page_zip));
 
3558
        ut_ad(page_zip_get_size(page_zip)
 
3559
              > PAGE_DATA + page_zip_dir_size(page_zip));
 
3560
        ut_ad(rec_offs_comp(offsets));
 
3561
        ut_ad(rec_offs_validate(rec, NULL, offsets));
 
3562
        ut_ad(rec_offs_any_extern(offsets));
 
3563
        ut_ad(rec_offs_nth_extern(offsets, n));
 
3564
 
 
3565
        ut_ad(page_zip->m_start >= PAGE_DATA);
 
3566
        ut_ad(page_zip_header_cmp(page_zip, page));
 
3567
 
 
3568
        ut_ad(page_is_leaf(page));
 
3569
        ut_ad(dict_index_is_clust(index));
 
3570
 
 
3571
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
3572
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
3573
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
3574
                           rec_offs_extra_size(offsets));
 
3575
 
 
3576
        blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
 
3577
                + rec_get_n_extern_new(rec, index, n);
 
3578
        ut_a(blob_no < page_zip->n_blobs);
 
3579
 
 
3580
        externs = page_zip->data + page_zip_get_size(page_zip)
 
3581
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
 
3582
                * (PAGE_ZIP_DIR_SLOT_SIZE
 
3583
                   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
3584
 
 
3585
        field = rec_get_nth_field(rec, offsets, n, &len);
 
3586
 
 
3587
        externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
 
3588
        field += len - BTR_EXTERN_FIELD_REF_SIZE;
 
3589
 
 
3590
        memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
 
3591
 
 
3592
#ifdef UNIV_ZIP_DEBUG
 
3593
        ut_a(page_zip_validate(page_zip, page));
 
3594
#endif /* UNIV_ZIP_DEBUG */
 
3595
 
 
3596
        if (mtr) {
 
3597
                byte*   log_ptr = mlog_open(
 
3598
                        mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
 
3599
                if (UNIV_UNLIKELY(!log_ptr)) {
 
3600
                        return;
 
3601
                }
 
3602
 
 
3603
                log_ptr = mlog_write_initial_log_record_fast(
 
3604
                        (byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
 
3605
                mach_write_to_2(log_ptr, page_offset(field));
 
3606
                log_ptr += 2;
 
3607
                mach_write_to_2(log_ptr, externs - page_zip->data);
 
3608
                log_ptr += 2;
 
3609
                memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
 
3610
                log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
 
3611
                mlog_close(mtr, log_ptr);
 
3612
        }
 
3613
}
 
3614
 
 
3615
/***************************************************************
 
3616
Parses a log record of writing the node pointer of a record. */
 
3617
UNIV_INTERN
 
3618
byte*
 
3619
page_zip_parse_write_node_ptr(
 
3620
/*==========================*/
 
3621
                                /* out: end of log record or NULL */
 
3622
        byte*           ptr,    /* in: redo log buffer */
 
3623
        byte*           end_ptr,/* in: redo log buffer end */
 
3624
        page_t*         page,   /* in/out: uncompressed page */
 
3625
        page_zip_des_t* page_zip)/* in/out: compressed page */
 
3626
{
 
3627
        ulint   offset;
 
3628
        ulint   z_offset;
 
3629
 
 
3630
        ut_ad(!page == !page_zip);
 
3631
 
 
3632
        if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
 
3633
 
 
3634
                return(NULL);
 
3635
        }
 
3636
 
 
3637
        offset = mach_read_from_2(ptr);
 
3638
        z_offset = mach_read_from_2(ptr + 2);
 
3639
 
 
3640
        if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
 
3641
            || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
 
3642
            || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
 
3643
corrupt:
 
3644
                recv_sys->found_corrupt_log = TRUE;
 
3645
 
 
3646
                return(NULL);
 
3647
        }
 
3648
 
 
3649
        if (page) {
 
3650
                byte*   storage_end;
 
3651
                byte*   field;
 
3652
                byte*   storage;
 
3653
                ulint   heap_no;
 
3654
 
 
3655
                if (UNIV_UNLIKELY(!page_zip)
 
3656
                    || UNIV_UNLIKELY(page_is_leaf(page))) {
 
3657
 
 
3658
                        goto corrupt;
 
3659
                }
 
3660
 
 
3661
#ifdef UNIV_ZIP_DEBUG
 
3662
                ut_a(page_zip_validate(page_zip, page));
 
3663
#endif /* UNIV_ZIP_DEBUG */
 
3664
 
 
3665
                field = page + offset;
 
3666
                storage = page_zip->data + z_offset;
 
3667
 
 
3668
                storage_end = page_zip->data + page_zip_get_size(page_zip)
 
3669
                        - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
 
3670
                        * PAGE_ZIP_DIR_SLOT_SIZE;
 
3671
 
 
3672
                heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
 
3673
 
 
3674
                if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
 
3675
                    || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
 
3676
                    || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
 
3677
 
 
3678
                        goto corrupt;
 
3679
                }
 
3680
 
 
3681
                memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
 
3682
                memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
 
3683
 
 
3684
#ifdef UNIV_ZIP_DEBUG
 
3685
                ut_a(page_zip_validate(page_zip, page));
 
3686
#endif /* UNIV_ZIP_DEBUG */
 
3687
        }
 
3688
 
 
3689
        return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
 
3690
}
 
3691
 
 
3692
/**************************************************************************
 
3693
Write the node pointer of a record on a non-leaf compressed page. */
 
3694
UNIV_INTERN
 
3695
void
 
3696
page_zip_write_node_ptr(
 
3697
/*====================*/
 
3698
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
3699
        byte*           rec,    /* in/out: record */
 
3700
        ulint           size,   /* in: data size of rec */
 
3701
        ulint           ptr,    /* in: node pointer */
 
3702
        mtr_t*          mtr)    /* in: mini-transaction, or NULL */
 
3703
{
 
3704
        byte*   field;
 
3705
        byte*   storage;
 
3706
        page_t* page    = page_align(rec);
 
3707
 
 
3708
        ut_ad(buf_frame_get_page_zip(rec) == page_zip);
 
3709
        ut_ad(page_simple_validate_new(page));
 
3710
        ut_ad(page_zip_simple_validate(page_zip));
 
3711
        ut_ad(page_zip_get_size(page_zip)
 
3712
              > PAGE_DATA + page_zip_dir_size(page_zip));
 
3713
        ut_ad(page_rec_is_comp(rec));
 
3714
 
 
3715
        ut_ad(page_zip->m_start >= PAGE_DATA);
 
3716
        ut_ad(page_zip_header_cmp(page_zip, page));
 
3717
 
 
3718
        ut_ad(!page_is_leaf(page));
 
3719
 
 
3720
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
3721
        UNIV_MEM_ASSERT_RW(rec, size);
 
3722
 
 
3723
        storage = page_zip->data + page_zip_get_size(page_zip)
 
3724
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
 
3725
                * PAGE_ZIP_DIR_SLOT_SIZE
 
3726
                - (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
 
3727
        field = rec + size - REC_NODE_PTR_SIZE;
 
3728
 
 
3729
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 
3730
        ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
 
3731
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 
3732
#if REC_NODE_PTR_SIZE != 4
 
3733
# error "REC_NODE_PTR_SIZE != 4"
 
3734
#endif
 
3735
        mach_write_to_4(field, ptr);
 
3736
        memcpy(storage, field, REC_NODE_PTR_SIZE);
 
3737
 
 
3738
        if (mtr) {
 
3739
                byte*   log_ptr = mlog_open(mtr,
 
3740
                                            11 + 2 + 2 + REC_NODE_PTR_SIZE);
 
3741
                if (UNIV_UNLIKELY(!log_ptr)) {
 
3742
                        return;
 
3743
                }
 
3744
 
 
3745
                log_ptr = mlog_write_initial_log_record_fast(
 
3746
                        field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
 
3747
                mach_write_to_2(log_ptr, page_offset(field));
 
3748
                log_ptr += 2;
 
3749
                mach_write_to_2(log_ptr, storage - page_zip->data);
 
3750
                log_ptr += 2;
 
3751
                memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
 
3752
                log_ptr += REC_NODE_PTR_SIZE;
 
3753
                mlog_close(mtr, log_ptr);
 
3754
        }
 
3755
}
 
3756
 
 
3757
/**************************************************************************
 
3758
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
 
3759
UNIV_INTERN
 
3760
void
 
3761
page_zip_write_trx_id_and_roll_ptr(
 
3762
/*===============================*/
 
3763
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
3764
        byte*           rec,    /* in/out: record */
 
3765
        const ulint*    offsets,/* in: rec_get_offsets(rec, index) */
 
3766
        ulint           trx_id_col,/* in: column number of TRX_ID in rec */
 
3767
        dulint          trx_id, /* in: transaction identifier */
 
3768
        dulint          roll_ptr)/* in: roll_ptr */
 
3769
{
 
3770
        byte*   field;
 
3771
        byte*   storage;
 
3772
        page_t* page    = page_align(rec);
 
3773
        ulint   len;
 
3774
 
 
3775
        ut_ad(buf_frame_get_page_zip(rec) == page_zip);
 
3776
        ut_ad(page_simple_validate_new(page));
 
3777
        ut_ad(page_zip_simple_validate(page_zip));
 
3778
        ut_ad(page_zip_get_size(page_zip)
 
3779
              > PAGE_DATA + page_zip_dir_size(page_zip));
 
3780
        ut_ad(rec_offs_validate(rec, NULL, offsets));
 
3781
        ut_ad(rec_offs_comp(offsets));
 
3782
 
 
3783
        ut_ad(page_zip->m_start >= PAGE_DATA);
 
3784
        ut_ad(page_zip_header_cmp(page_zip, page));
 
3785
 
 
3786
        ut_ad(page_is_leaf(page));
 
3787
 
 
3788
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
3789
 
 
3790
        storage = page_zip->data + page_zip_get_size(page_zip)
 
3791
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
 
3792
                * PAGE_ZIP_DIR_SLOT_SIZE
 
3793
                - (rec_get_heap_no_new(rec) - 1)
 
3794
                * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
3795
 
 
3796
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
 
3797
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
 
3798
#endif
 
3799
        field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
 
3800
        ut_ad(len == DATA_TRX_ID_LEN);
 
3801
        ut_ad(field + DATA_TRX_ID_LEN
 
3802
              == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
 
3803
        ut_ad(len == DATA_ROLL_PTR_LEN);
 
3804
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
 
3805
        ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
 
3806
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
 
3807
#if DATA_TRX_ID_LEN != 6
 
3808
# error "DATA_TRX_ID_LEN != 6"
 
3809
#endif
 
3810
        mach_write_to_6(field, trx_id);
 
3811
#if DATA_ROLL_PTR_LEN != 7
 
3812
# error "DATA_ROLL_PTR_LEN != 7"
 
3813
#endif
 
3814
        mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
 
3815
        memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
3816
 
 
3817
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
3818
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
3819
                           rec_offs_extra_size(offsets));
 
3820
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
3821
}
 
3822
 
 
3823
#ifdef UNIV_ZIP_DEBUG
 
3824
/* Set this variable in a debugger to disable page_zip_clear_rec().
 
3825
The only observable effect should be the compression ratio due to
 
3826
deleted records not being zeroed out.  In rare cases, there can be
 
3827
page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
 
3828
columns if the space is reallocated for a smaller record. */
 
3829
UNIV_INTERN ibool       page_zip_clear_rec_disable;
 
3830
#endif /* UNIV_ZIP_DEBUG */
 
3831
 
 
3832
/**************************************************************************
 
3833
Clear an area on the uncompressed and compressed page, if possible. */
 
3834
static
 
3835
void
 
3836
page_zip_clear_rec(
 
3837
/*===============*/
 
3838
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
3839
        byte*           rec,    /* in: record to clear */
 
3840
        dict_index_t*   index,  /* in: index of rec */
 
3841
        const ulint*    offsets)/* in: rec_get_offsets(rec, index) */
 
3842
{
 
3843
        ulint   heap_no;
 
3844
        page_t* page    = page_align(rec);
 
3845
        /* page_zip_validate() would fail here if a record
 
3846
        containing externally stored columns is being deleted. */
 
3847
        ut_ad(rec_offs_validate(rec, index, offsets));
 
3848
        ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
 
3849
        ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
 
3850
        ut_ad(page_zip_header_cmp(page_zip, page));
 
3851
 
 
3852
        heap_no = rec_get_heap_no_new(rec);
 
3853
        ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
 
3854
 
 
3855
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
3856
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
3857
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
3858
                           rec_offs_extra_size(offsets));
 
3859
 
 
3860
        if (
 
3861
#ifdef UNIV_ZIP_DEBUG
 
3862
            !page_zip_clear_rec_disable &&
 
3863
#endif /* UNIV_ZIP_DEBUG */
 
3864
            page_zip->m_end
 
3865
            + 1 + ((heap_no - 1) >= 64)/* size of the log entry */
 
3866
            + page_zip_get_trailer_len(page_zip,
 
3867
                                       dict_index_is_clust(index), NULL)
 
3868
            < page_zip_get_size(page_zip)) {
 
3869
                byte*   data;
 
3870
 
 
3871
                /* Clear only the data bytes, because the allocator and
 
3872
                the decompressor depend on the extra bytes. */
 
3873
                memset(rec, 0, rec_offs_data_size(offsets));
 
3874
 
 
3875
                if (!page_is_leaf(page)) {
 
3876
                        /* Clear node_ptr on the compressed page. */
 
3877
                        byte*   storage = page_zip->data
 
3878
                                + page_zip_get_size(page_zip)
 
3879
                                - (page_dir_get_n_heap(page)
 
3880
                                   - PAGE_HEAP_NO_USER_LOW)
 
3881
                                * PAGE_ZIP_DIR_SLOT_SIZE;
 
3882
 
 
3883
                        memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
 
3884
                               0, REC_NODE_PTR_SIZE);
 
3885
                } else if (dict_index_is_clust(index)) {
 
3886
                        /* Clear trx_id and roll_ptr on the compressed page. */
 
3887
                        byte*   storage = page_zip->data
 
3888
                                + page_zip_get_size(page_zip)
 
3889
                                - (page_dir_get_n_heap(page)
 
3890
                                   - PAGE_HEAP_NO_USER_LOW)
 
3891
                                * PAGE_ZIP_DIR_SLOT_SIZE;
 
3892
 
 
3893
                        memset(storage - (heap_no - 1)
 
3894
                               * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
 
3895
                               0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
3896
                }
 
3897
 
 
3898
                /* Log that the data was zeroed out. */
 
3899
                data = page_zip->data + page_zip->m_end;
 
3900
                ut_ad(!*data);
 
3901
                if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
 
3902
                        *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
 
3903
                        ut_ad(!*data);
 
3904
                }
 
3905
                *data++ = (byte) ((heap_no - 1) << 1 | 1);
 
3906
                ut_ad(!*data);
 
3907
                ut_ad((ulint) (data - page_zip->data)
 
3908
                      < page_zip_get_size(page_zip));
 
3909
                page_zip->m_end = data - page_zip->data;
 
3910
                page_zip->m_nonempty = TRUE;
 
3911
        } else if (page_is_leaf(page) && dict_index_is_clust(index)) {
 
3912
                /* Do not clear the record, because there is not enough space
 
3913
                to log the operation. */
 
3914
 
 
3915
                if (rec_offs_any_extern(offsets)) {
 
3916
                        ulint   i;
 
3917
 
 
3918
                        for (i = rec_offs_n_fields(offsets); i--; ) {
 
3919
                                /* Clear all BLOB pointers in order to make
 
3920
                                page_zip_validate() pass. */
 
3921
                                if (rec_offs_nth_extern(offsets, i)) {
 
3922
                                        ulint   len;
 
3923
                                        byte*   field = rec_get_nth_field(
 
3924
                                                rec, offsets, i, &len);
 
3925
                                        memset(field + len
 
3926
                                               - BTR_EXTERN_FIELD_REF_SIZE,
 
3927
                                               0, BTR_EXTERN_FIELD_REF_SIZE);
 
3928
                                }
 
3929
                        }
 
3930
                }
 
3931
        }
 
3932
 
 
3933
#ifdef UNIV_ZIP_DEBUG
 
3934
        ut_a(page_zip_validate(page_zip, page));
 
3935
#endif /* UNIV_ZIP_DEBUG */
 
3936
}
 
3937
 
 
3938
/**************************************************************************
 
3939
Write the "deleted" flag of a record on a compressed page.  The flag must
 
3940
already have been written on the uncompressed page. */
 
3941
UNIV_INTERN
 
3942
void
 
3943
page_zip_rec_set_deleted(
 
3944
/*=====================*/
 
3945
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
3946
        const byte*     rec,    /* in: record on the uncompressed page */
 
3947
        ulint           flag)   /* in: the deleted flag (nonzero=TRUE) */
 
3948
{
 
3949
        byte*   slot = page_zip_dir_find(page_zip, page_offset(rec));
 
3950
        ut_a(slot);
 
3951
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
3952
        if (flag) {
 
3953
                *slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
 
3954
        } else {
 
3955
                *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
 
3956
        }
 
3957
#ifdef UNIV_ZIP_DEBUG
 
3958
        ut_a(page_zip_validate(page_zip, page_align(rec)));
 
3959
#endif /* UNIV_ZIP_DEBUG */
 
3960
}
 
3961
 
 
3962
/**************************************************************************
 
3963
Write the "owned" flag of a record on a compressed page.  The n_owned field
 
3964
must already have been written on the uncompressed page. */
 
3965
UNIV_INTERN
 
3966
void
 
3967
page_zip_rec_set_owned(
 
3968
/*===================*/
 
3969
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
3970
        const byte*     rec,    /* in: record on the uncompressed page */
 
3971
        ulint           flag)   /* in: the owned flag (nonzero=TRUE) */
 
3972
{
 
3973
        byte*   slot = page_zip_dir_find(page_zip, page_offset(rec));
 
3974
        ut_a(slot);
 
3975
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
3976
        if (flag) {
 
3977
                *slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
 
3978
        } else {
 
3979
                *slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
 
3980
        }
 
3981
}
 
3982
 
 
3983
/**************************************************************************
 
3984
Insert a record to the dense page directory. */
 
3985
UNIV_INTERN
 
3986
void
 
3987
page_zip_dir_insert(
 
3988
/*================*/
 
3989
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
3990
        const byte*     prev_rec,/* in: record after which to insert */
 
3991
        const byte*     free_rec,/* in: record from which rec was
 
3992
                                allocated, or NULL */
 
3993
        byte*           rec)    /* in: record to insert */
 
3994
{
 
3995
        ulint   n_dense;
 
3996
        byte*   slot_rec;
 
3997
        byte*   slot_free;
 
3998
 
 
3999
        ut_ad(prev_rec != rec);
 
4000
        ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
 
4001
        ut_ad(page_zip_simple_validate(page_zip));
 
4002
 
 
4003
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
4004
 
 
4005
        if (page_rec_is_infimum(prev_rec)) {
 
4006
                /* Use the first slot. */
 
4007
                slot_rec = page_zip->data + page_zip_get_size(page_zip);
 
4008
        } else {
 
4009
                byte*   end     = page_zip->data + page_zip_get_size(page_zip);
 
4010
                byte*   start   = end - page_zip_dir_user_size(page_zip);
 
4011
 
 
4012
                if (UNIV_LIKELY(!free_rec)) {
 
4013
                        /* PAGE_N_RECS was already incremented
 
4014
                        in page_cur_insert_rec_zip(), but the
 
4015
                        dense directory slot at that position
 
4016
                        contains garbage.  Skip it. */
 
4017
                        start += PAGE_ZIP_DIR_SLOT_SIZE;
 
4018
                }
 
4019
 
 
4020
                slot_rec = page_zip_dir_find_low(start, end,
 
4021
                                                 page_offset(prev_rec));
 
4022
                ut_a(slot_rec);
 
4023
        }
 
4024
 
 
4025
        /* Read the old n_dense (n_heap may have been incremented). */
 
4026
        n_dense = page_dir_get_n_heap(page_zip->data)
 
4027
                - (PAGE_HEAP_NO_USER_LOW + 1);
 
4028
 
 
4029
        if (UNIV_LIKELY_NULL(free_rec)) {
 
4030
                /* The record was allocated from the free list.
 
4031
                Shift the dense directory only up to that slot.
 
4032
                Note that in this case, n_dense is actually
 
4033
                off by one, because page_cur_insert_rec_zip()
 
4034
                did not increment n_heap. */
 
4035
                ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
 
4036
                      + PAGE_HEAP_NO_USER_LOW);
 
4037
                ut_ad(rec >= free_rec);
 
4038
                slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
 
4039
                ut_ad(slot_free);
 
4040
                slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
 
4041
        } else {
 
4042
                /* The record was allocated from the heap.
 
4043
                Shift the entire dense directory. */
 
4044
                ut_ad(rec_get_heap_no_new(rec) == n_dense
 
4045
                      + PAGE_HEAP_NO_USER_LOW);
 
4046
 
 
4047
                /* Shift to the end of the dense page directory. */
 
4048
                slot_free = page_zip->data + page_zip_get_size(page_zip)
 
4049
                        - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
 
4050
        }
 
4051
 
 
4052
        /* Shift the dense directory to allocate place for rec. */
 
4053
        memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
 
4054
                slot_rec - slot_free);
 
4055
 
 
4056
        /* Write the entry for the inserted record.
 
4057
        The "owned" and "deleted" flags must be zero. */
 
4058
        mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
 
4059
}
 
4060
 
 
4061
/**************************************************************************
 
4062
Shift the dense page directory and the array of BLOB pointers
 
4063
when a record is deleted. */
 
4064
UNIV_INTERN
 
4065
void
 
4066
page_zip_dir_delete(
 
4067
/*================*/
 
4068
        page_zip_des_t* page_zip,/* in/out: compressed page */
 
4069
        byte*           rec,    /* in: record to delete */
 
4070
        dict_index_t*   index,  /* in: index of rec */
 
4071
        const ulint*    offsets,/* in: rec_get_offsets(rec) */
 
4072
        const byte*     free)   /* in: previous start of the free list */
 
4073
{
 
4074
        byte*   slot_rec;
 
4075
        byte*   slot_free;
 
4076
        ulint   n_ext;
 
4077
        page_t* page    = page_align(rec);
 
4078
 
 
4079
        ut_ad(rec_offs_validate(rec, index, offsets));
 
4080
        ut_ad(rec_offs_comp(offsets));
 
4081
 
 
4082
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
4083
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
 
4084
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
 
4085
                           rec_offs_extra_size(offsets));
 
4086
 
 
4087
        slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
 
4088
 
 
4089
        ut_a(slot_rec);
 
4090
 
 
4091
        /* This could not be done before page_zip_dir_find(). */
 
4092
        page_header_set_field(page, page_zip, PAGE_N_RECS,
 
4093
                              (ulint)(page_get_n_recs(page) - 1));
 
4094
 
 
4095
        if (UNIV_UNLIKELY(!free)) {
 
4096
                /* Make the last slot the start of the free list. */
 
4097
                slot_free = page_zip->data + page_zip_get_size(page_zip)
 
4098
                        - PAGE_ZIP_DIR_SLOT_SIZE
 
4099
                        * (page_dir_get_n_heap(page_zip->data)
 
4100
                           - PAGE_HEAP_NO_USER_LOW);
 
4101
        } else {
 
4102
                slot_free = page_zip_dir_find_free(page_zip,
 
4103
                                                   page_offset(free));
 
4104
                ut_a(slot_free < slot_rec);
 
4105
                /* Grow the free list by one slot by moving the start. */
 
4106
                slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
 
4107
        }
 
4108
 
 
4109
        if (UNIV_LIKELY(slot_rec > slot_free)) {
 
4110
                memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
 
4111
                        slot_free,
 
4112
                        slot_rec - slot_free);
 
4113
        }
 
4114
 
 
4115
        /* Write the entry for the deleted record.
 
4116
        The "owned" and "deleted" flags will be cleared. */
 
4117
        mach_write_to_2(slot_free, page_offset(rec));
 
4118
 
 
4119
        if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
 
4120
                ut_ad(!rec_offs_any_extern(offsets));
 
4121
                goto skip_blobs;
 
4122
        }
 
4123
 
 
4124
        n_ext = rec_offs_n_extern(offsets);
 
4125
        if (UNIV_UNLIKELY(n_ext)) {
 
4126
                /* Shift and zero fill the array of BLOB pointers. */
 
4127
                ulint   blob_no;
 
4128
                byte*   externs;
 
4129
                byte*   ext_end;
 
4130
 
 
4131
                blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
 
4132
                ut_a(blob_no + n_ext <= page_zip->n_blobs);
 
4133
 
 
4134
                externs = page_zip->data + page_zip_get_size(page_zip)
 
4135
                        - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
 
4136
                        * (PAGE_ZIP_DIR_SLOT_SIZE
 
4137
                           + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
4138
 
 
4139
                ext_end = externs - page_zip->n_blobs
 
4140
                        * BTR_EXTERN_FIELD_REF_SIZE;
 
4141
                externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
 
4142
 
 
4143
                page_zip->n_blobs -= n_ext;
 
4144
                /* Shift and zero fill the array. */
 
4145
                memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
 
4146
                        (page_zip->n_blobs - blob_no)
 
4147
                        * BTR_EXTERN_FIELD_REF_SIZE);
 
4148
                memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
 
4149
        }
 
4150
 
 
4151
skip_blobs:
 
4152
        /* The compression algorithm expects info_bits and n_owned
 
4153
        to be 0 for deleted records. */
 
4154
        rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
 
4155
 
 
4156
        page_zip_clear_rec(page_zip, rec, index, offsets);
 
4157
}
 
4158
 
 
4159
/**************************************************************************
 
4160
Add a slot to the dense page directory. */
 
4161
UNIV_INTERN
 
4162
void
 
4163
page_zip_dir_add_slot(
 
4164
/*==================*/
 
4165
        page_zip_des_t* page_zip,       /* in/out: compressed page */
 
4166
        ulint           is_clustered)   /* in: nonzero for clustered index,
 
4167
                                        zero for others */
 
4168
{
 
4169
        ulint   n_dense;
 
4170
        byte*   dir;
 
4171
        byte*   stored;
 
4172
 
 
4173
        ut_ad(page_is_comp(page_zip->data));
 
4174
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
4175
 
 
4176
        /* Read the old n_dense (n_heap has already been incremented). */
 
4177
        n_dense = page_dir_get_n_heap(page_zip->data)
 
4178
                - (PAGE_HEAP_NO_USER_LOW + 1);
 
4179
 
 
4180
        dir = page_zip->data + page_zip_get_size(page_zip)
 
4181
                - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
 
4182
 
 
4183
        if (!page_is_leaf(page_zip->data)) {
 
4184
                ut_ad(!page_zip->n_blobs);
 
4185
                stored = dir - n_dense * REC_NODE_PTR_SIZE;
 
4186
        } else if (UNIV_UNLIKELY(is_clustered)) {
 
4187
                /* Move the BLOB pointer array backwards to make space for the
 
4188
                roll_ptr and trx_id columns and the dense directory slot. */
 
4189
                byte*   externs;
 
4190
 
 
4191
                stored = dir - n_dense
 
4192
                        * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
4193
                externs = stored
 
4194
                        - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
 
4195
                ASSERT_ZERO(externs
 
4196
                            - (PAGE_ZIP_DIR_SLOT_SIZE
 
4197
                               + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
 
4198
                            PAGE_ZIP_DIR_SLOT_SIZE
 
4199
                            + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
 
4200
                memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
 
4201
                                   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
 
4202
                        externs, stored - externs);
 
4203
        } else {
 
4204
                stored = dir
 
4205
                        - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
 
4206
                ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
 
4207
                            PAGE_ZIP_DIR_SLOT_SIZE);
 
4208
        }
 
4209
 
 
4210
        /* Move the uncompressed area backwards to make space
 
4211
        for one directory slot. */
 
4212
        memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
 
4213
}
 
4214
 
 
4215
/***************************************************************
 
4216
Parses a log record of writing to the header of a page. */
 
4217
UNIV_INTERN
 
4218
byte*
 
4219
page_zip_parse_write_header(
 
4220
/*========================*/
 
4221
                                /* out: end of log record or NULL */
 
4222
        byte*           ptr,    /* in: redo log buffer */
 
4223
        byte*           end_ptr,/* in: redo log buffer end */
 
4224
        page_t*         page,   /* in/out: uncompressed page */
 
4225
        page_zip_des_t* page_zip)/* in/out: compressed page */
 
4226
{
 
4227
        ulint   offset;
 
4228
        ulint   len;
 
4229
 
 
4230
        ut_ad(ptr && end_ptr);
 
4231
        ut_ad(!page == !page_zip);
 
4232
 
 
4233
        if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
 
4234
 
 
4235
                return(NULL);
 
4236
        }
 
4237
 
 
4238
        offset = (ulint) *ptr++;
 
4239
        len = (ulint) *ptr++;
 
4240
 
 
4241
        if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
 
4242
corrupt:
 
4243
                recv_sys->found_corrupt_log = TRUE;
 
4244
 
 
4245
                return(NULL);
 
4246
        }
 
4247
 
 
4248
        if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
 
4249
 
 
4250
                return(NULL);
 
4251
        }
 
4252
 
 
4253
        if (page) {
 
4254
                if (UNIV_UNLIKELY(!page_zip)) {
 
4255
 
 
4256
                        goto corrupt;
 
4257
                }
 
4258
#ifdef UNIV_ZIP_DEBUG
 
4259
                ut_a(page_zip_validate(page_zip, page));
 
4260
#endif /* UNIV_ZIP_DEBUG */
 
4261
 
 
4262
                memcpy(page + offset, ptr, len);
 
4263
                memcpy(page_zip->data + offset, ptr, len);
 
4264
 
 
4265
#ifdef UNIV_ZIP_DEBUG
 
4266
                ut_a(page_zip_validate(page_zip, page));
 
4267
#endif /* UNIV_ZIP_DEBUG */
 
4268
        }
 
4269
 
 
4270
        return(ptr + len);
 
4271
}
 
4272
 
 
4273
/**************************************************************************
 
4274
Write a log record of writing to the uncompressed header portion of a page. */
 
4275
UNIV_INTERN
 
4276
void
 
4277
page_zip_write_header_log(
 
4278
/*======================*/
 
4279
        const byte*     data,   /* in: data on the uncompressed page */
 
4280
        ulint           length, /* in: length of the data */
 
4281
        mtr_t*          mtr)    /* in: mini-transaction */
 
4282
{
 
4283
        byte*   log_ptr = mlog_open(mtr, 11 + 1 + 1);
 
4284
        ulint   offset  = page_offset(data);
 
4285
 
 
4286
        ut_ad(offset < PAGE_DATA);
 
4287
        ut_ad(offset + length < PAGE_DATA);
 
4288
#if PAGE_DATA > 255
 
4289
# error "PAGE_DATA > 255"
 
4290
#endif
 
4291
        ut_ad(length < 256);
 
4292
 
 
4293
        /* If no logging is requested, we may return now */
 
4294
        if (UNIV_UNLIKELY(!log_ptr)) {
 
4295
 
 
4296
                return;
 
4297
        }
 
4298
 
 
4299
        log_ptr = mlog_write_initial_log_record_fast(
 
4300
                (byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
 
4301
        *log_ptr++ = (byte) offset;
 
4302
        *log_ptr++ = (byte) length;
 
4303
        mlog_close(mtr, log_ptr);
 
4304
 
 
4305
        mlog_catenate_string(mtr, data, length);
 
4306
}
 
4307
 
 
4308
/**************************************************************************
 
4309
Reorganize and compress a page.  This is a low-level operation for
 
4310
compressed pages, to be used when page_zip_compress() fails.
 
4311
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
 
4312
The function btr_page_reorganize() should be preferred whenever possible.
 
4313
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
 
4314
non-clustered index, the caller must update the insert buffer free
 
4315
bits in the same mini-transaction in such a way that the modification
 
4316
will be redo-logged. */
 
4317
UNIV_INTERN
 
4318
ibool
 
4319
page_zip_reorganize(
 
4320
/*================*/
 
4321
                                /* out: TRUE on success, FALSE on failure;
 
4322
                                page and page_zip will be left intact
 
4323
                                on failure. */
 
4324
        buf_block_t*    block,  /* in/out: page with compressed page;
 
4325
                                on the compressed page, in: size;
 
4326
                                out: data, n_blobs,
 
4327
                                m_start, m_end, m_nonempty */
 
4328
        dict_index_t*   index,  /* in: index of the B-tree node */
 
4329
        mtr_t*          mtr)    /* in: mini-transaction */
 
4330
{
 
4331
        page_zip_des_t* page_zip        = buf_block_get_page_zip(block);
 
4332
        page_t*         page            = buf_block_get_frame(block);
 
4333
        buf_block_t*    temp_block;
 
4334
        page_t*         temp_page;
 
4335
        ulint           log_mode;
 
4336
 
 
4337
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
 
4338
        ut_ad(page_is_comp(page));
 
4339
        /* Note that page_zip_validate(page_zip, page) may fail here. */
 
4340
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
 
4341
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
 
4342
 
 
4343
        /* Disable logging */
 
4344
        log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
 
4345
 
 
4346
        temp_block = buf_block_alloc(0);
 
4347
        temp_page = temp_block->frame;
 
4348
 
 
4349
        btr_search_drop_page_hash_index(block);
 
4350
 
 
4351
        /* Copy the old page to temporary space */
 
4352
        buf_frame_copy(temp_page, page);
 
4353
 
 
4354
        /* Recreate the page: note that global data on page (possible
 
4355
        segment headers, next page-field, etc.) is preserved intact */
 
4356
 
 
4357
        page_create(block, mtr, TRUE);
 
4358
        block->check_index_page_at_flush = TRUE;
 
4359
 
 
4360
        /* Copy the records from the temporary space to the recreated page;
 
4361
        do not copy the lock bits yet */
 
4362
 
 
4363
        page_copy_rec_list_end_no_locks(block, temp_block,
 
4364
                                        page_get_infimum_rec(temp_page),
 
4365
                                        index, mtr);
 
4366
        /* Copy max trx id to recreated page */
 
4367
        page_set_max_trx_id(block, NULL, page_get_max_trx_id(temp_page));
 
4368
 
 
4369
        /* Restore logging. */
 
4370
        mtr_set_log_mode(mtr, log_mode);
 
4371
 
 
4372
        if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
 
4373
 
 
4374
                /* Restore the old page and exit. */
 
4375
                buf_frame_copy(page, temp_page);
 
4376
 
 
4377
                buf_block_free(temp_block);
 
4378
                return(FALSE);
 
4379
        }
 
4380
 
 
4381
        lock_move_reorganize_page(block, temp_block);
 
4382
 
 
4383
        buf_block_free(temp_block);
 
4384
        return(TRUE);
 
4385
}
 
4386
 
 
4387
/**************************************************************************
 
4388
Copy the records of a page byte for byte.  Do not copy the page header
 
4389
or trailer, except those B-tree header fields that are directly
 
4390
related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
 
4391
NOTE: The caller must update the lock table and the adaptive hash index. */
 
4392
UNIV_INTERN
 
4393
void
 
4394
page_zip_copy_recs(
 
4395
/*===============*/
 
4396
        page_zip_des_t*         page_zip,       /* out: copy of src_zip
 
4397
                                                (n_blobs, m_start, m_end,
 
4398
                                                m_nonempty, data[0..size-1]) */
 
4399
        page_t*                 page,           /* out: copy of src */
 
4400
        const page_zip_des_t*   src_zip,        /* in: compressed page */
 
4401
        const page_t*           src,            /* in: page */
 
4402
        dict_index_t*           index,          /* in: index of the B-tree */
 
4403
        mtr_t*                  mtr)            /* in: mini-transaction */
 
4404
{
 
4405
        ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
 
4406
        ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
 
4407
#ifdef UNIV_ZIP_DEBUG
 
4408
        /* The B-tree operations that call this function may set
 
4409
        FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
 
4410
        mismatch.  A strict page_zip_validate() will be executed later
 
4411
        during the B-tree operations. */
 
4412
        ut_a(page_zip_validate_low(src_zip, src, TRUE));
 
4413
#endif /* UNIV_ZIP_DEBUG */
 
4414
        ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
 
4415
        if (UNIV_UNLIKELY(src_zip->n_blobs)) {
 
4416
                ut_a(page_is_leaf(src));
 
4417
                ut_a(dict_index_is_clust(index));
 
4418
        }
 
4419
 
 
4420
        UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
 
4421
        UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
 
4422
        UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
 
4423
        UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
 
4424
 
 
4425
        /* Copy those B-tree page header fields that are related to
 
4426
        the records stored in the page.  Also copy the field
 
4427
        PAGE_MAX_TRX_ID.  Skip the rest of the page header and
 
4428
        trailer.  On the compressed page, there is no trailer. */
 
4429
#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
 
4430
# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
 
4431
#endif
 
4432
        memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
 
4433
               PAGE_HEADER_PRIV_END);
 
4434
        memcpy(PAGE_DATA + page, PAGE_DATA + src,
 
4435
               UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
 
4436
        memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
 
4437
               PAGE_HEADER_PRIV_END);
 
4438
        memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
 
4439
               page_zip_get_size(page_zip) - PAGE_DATA);
 
4440
 
 
4441
        /* Copy all fields of src_zip to page_zip, except the pointer
 
4442
        to the compressed data page. */
 
4443
        {
 
4444
                page_zip_t*     data = page_zip->data;
 
4445
                memcpy(page_zip, src_zip, sizeof *page_zip);
 
4446
                page_zip->data = data;
 
4447
        }
 
4448
        ut_ad(page_zip_get_trailer_len(page_zip,
 
4449
                                       dict_index_is_clust(index), NULL)
 
4450
              + page_zip->m_end < page_zip_get_size(page_zip));
 
4451
 
 
4452
        if (!page_is_leaf(src)
 
4453
            && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
 
4454
            && UNIV_LIKELY(mach_read_from_4(page
 
4455
                                            + FIL_PAGE_PREV) != FIL_NULL)) {
 
4456
                /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
 
4457
                ulint   offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
 
4458
                                                 TRUE);
 
4459
                if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
 
4460
                        rec_t*  rec = page + offs;
 
4461
                        ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
 
4462
                             & REC_INFO_MIN_REC_FLAG);
 
4463
                        rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
 
4464
                }
 
4465
        }
 
4466
 
 
4467
#ifdef UNIV_ZIP_DEBUG
 
4468
        ut_a(page_zip_validate(page_zip, page));
 
4469
#endif /* UNIV_ZIP_DEBUG */
 
4470
 
 
4471
        page_zip_compress_write_log(page_zip, page, index, mtr);
 
4472
}
 
4473
 
 
4474
/**************************************************************************
 
4475
Parses a log record of compressing an index page. */
 
4476
UNIV_INTERN
 
4477
byte*
 
4478
page_zip_parse_compress(
 
4479
/*====================*/
 
4480
                                /* out: end of log record or NULL */
 
4481
        byte*           ptr,    /* in: buffer */
 
4482
        byte*           end_ptr,/* in: buffer end */
 
4483
        page_t*         page,   /* out: uncompressed page */
 
4484
        page_zip_des_t* page_zip)/* out: compressed page */
 
4485
{
 
4486
        ulint   size;
 
4487
        ulint   trailer_size;
 
4488
 
 
4489
        ut_ad(ptr && end_ptr);
 
4490
        ut_ad(!page == !page_zip);
 
4491
 
 
4492
        if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
 
4493
 
 
4494
                return(NULL);
 
4495
        }
 
4496
 
 
4497
        size = mach_read_from_2(ptr);
 
4498
        ptr += 2;
 
4499
        trailer_size = mach_read_from_2(ptr);
 
4500
        ptr += 2;
 
4501
 
 
4502
        if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
 
4503
 
 
4504
                return(NULL);
 
4505
        }
 
4506
 
 
4507
        if (page) {
 
4508
                if (UNIV_UNLIKELY(!page_zip)
 
4509
                    || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
 
4510
corrupt:
 
4511
                        recv_sys->found_corrupt_log = TRUE;
 
4512
 
 
4513
                        return(NULL);
 
4514
                }
 
4515
 
 
4516
                memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
 
4517
                memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
 
4518
                memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
 
4519
                memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
 
4520
                       page_zip_get_size(page_zip) - trailer_size
 
4521
                       - (FIL_PAGE_TYPE + size));
 
4522
                memcpy(page_zip->data + page_zip_get_size(page_zip)
 
4523
                       - trailer_size, ptr + 8 + size, trailer_size);
 
4524
 
 
4525
                if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page))) {
 
4526
 
 
4527
                        goto corrupt;
 
4528
                }
 
4529
        }
 
4530
 
 
4531
        return(ptr + 8 + size + trailer_size);
 
4532
}
 
4533
 
 
4534
/**************************************************************************
 
4535
Calculate the compressed page checksum. */
 
4536
UNIV_INTERN
 
4537
ulint
 
4538
page_zip_calc_checksum(
 
4539
/*===================*/
 
4540
                                /* out: page checksum */
 
4541
        const void*     data,   /* in: compressed page */
 
4542
        ulint           size)   /* in: size of compressed page */
 
4543
{
 
4544
        /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
 
4545
        and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
 
4546
 
 
4547
        const Bytef*    s       = data;
 
4548
        uLong           adler;
 
4549
 
 
4550
        ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
4551
 
 
4552
        adler = adler32(0L, s + FIL_PAGE_OFFSET,
 
4553
                        FIL_PAGE_LSN - FIL_PAGE_OFFSET);
 
4554
        adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
 
4555
        adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
 
4556
                        size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
4557
 
 
4558
        return((ulint) adler);
 
4559
}