~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/page/page0zip.cc

  • Committer: Monty Taylor
  • Date: 2008-09-16 00:00:48 UTC
  • mto: This revision was merged to the branch mainline in revision 391.
  • Revision ID: monty@inaugust.com-20080916000048-3rvrv3gv9l0ad3gs
Fixed copyright headers in drizzled/

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*****************************************************************************
2
 
 
3
 
Copyright (C) 2005, 2009, Innobase Oy. All Rights Reserved.
4
 
 
5
 
This program is free software; you can redistribute it and/or modify it under
6
 
the terms of the GNU General Public License as published by the Free Software
7
 
Foundation; version 2 of the License.
8
 
 
9
 
This program is distributed in the hope that it will be useful, but WITHOUT
10
 
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
 
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
 
 
13
 
You should have received a copy of the GNU General Public License along with
14
 
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
 
St, Fifth Floor, Boston, MA 02110-1301 USA
16
 
 
17
 
*****************************************************************************/
18
 
 
19
 
/**************************************************//**
20
 
@file page/page0zip.c
21
 
Compressed page interface
22
 
 
23
 
Created June 2005 by Marko Makela
24
 
*******************************************************/
25
 
 
26
 
#define THIS_MODULE
27
 
#include "page0zip.h"
28
 
#ifdef UNIV_NONINL
29
 
# include "page0zip.ic"
30
 
#endif
31
 
#undef THIS_MODULE
32
 
#include "page0page.h"
33
 
#include "mtr0log.h"
34
 
#include "ut0sort.h"
35
 
#include "dict0dict.h"
36
 
#include "btr0cur.h"
37
 
#include "page0types.h"
38
 
#include "log0recv.h"
39
 
#include "zlib.h"
40
 
#ifndef UNIV_HOTBACKUP
41
 
# include "buf0lru.h"
42
 
# include "btr0sea.h"
43
 
# include "dict0boot.h"
44
 
# include "lock0lock.h"
45
 
#else /* !UNIV_HOTBACKUP */
46
 
# define lock_move_reorganize_page(block, temp_block)   ((void) 0)
47
 
# define buf_LRU_stat_inc_unzip()                       ((void) 0)
48
 
#endif /* !UNIV_HOTBACKUP */
49
 
 
50
 
#ifndef UNIV_HOTBACKUP
51
 
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
52
 
UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
53
 
#endif /* !UNIV_HOTBACKUP */
54
 
 
55
 
/* Please refer to ../include/page0zip.ic for a description of the
56
 
compressed page format. */
57
 
 
58
 
/* The infimum and supremum records are omitted from the compressed page.
59
 
On compress, we compare that the records are there, and on uncompress we
60
 
restore the records. */
61
 
/** Extra bytes of an infimum record */
62
 
static const byte infimum_extra[] = {
63
 
        0x01,                   /* info_bits=0, n_owned=1 */
64
 
        0x00, 0x02              /* heap_no=0, status=2 */
65
 
        /* ?, ? */              /* next=(first user rec, or supremum) */
66
 
};
67
 
/** Data bytes of an infimum record */
68
 
static const byte infimum_data[] = {
69
 
        0x69, 0x6e, 0x66, 0x69,
70
 
        0x6d, 0x75, 0x6d, 0x00  /* "infimum\0" */
71
 
};
72
 
/** Extra bytes and data bytes of a supremum record */
73
 
static const byte supremum_extra_data[] = {
74
 
        /* 0x0?, */             /* info_bits=0, n_owned=1..8 */
75
 
        0x00, 0x0b,             /* heap_no=1, status=3 */
76
 
        0x00, 0x00,             /* next=0 */
77
 
        0x73, 0x75, 0x70, 0x72,
78
 
        0x65, 0x6d, 0x75, 0x6d  /* "supremum" */
79
 
};
80
 
 
81
 
/** Assert that a block of memory is filled with zero bytes.
82
 
Compare at most sizeof(field_ref_zero) bytes.
83
 
@param b        in: memory block
84
 
@param s        in: size of the memory block, in bytes */
85
 
#define ASSERT_ZERO(b, s) \
86
 
        ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
87
 
/** Assert that a BLOB pointer is filled with zero bytes.
88
 
@param b        in: BLOB pointer */
89
 
#define ASSERT_ZERO_BLOB(b) \
90
 
        ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
91
 
 
92
 
/* Enable some extra debugging output.  This code can be enabled
93
 
independently of any UNIV_ debugging conditions. */
94
 
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
95
 
# include <stdarg.h>
96
 
__attribute__((format (printf, 1, 2)))
97
 
/**********************************************************************//**
98
 
Report a failure to decompress or compress.
99
 
@return number of characters printed */
100
 
static
101
 
int
102
 
page_zip_fail_func(
103
 
/*===============*/
104
 
        const char*     fmt,    /*!< in: printf(3) format string */
105
 
        ...)                    /*!< in: arguments corresponding to fmt */
106
 
{
107
 
        int     res;
108
 
        va_list ap;
109
 
 
110
 
        ut_print_timestamp(stderr);
111
 
        fputs("  InnoDB: ", stderr);
112
 
        va_start(ap, fmt);
113
 
        res = vfprintf(stderr, fmt, ap);
114
 
        va_end(ap);
115
 
 
116
 
        return(res);
117
 
}
118
 
/** Wrapper for page_zip_fail_func()
119
 
@param fmt_args in: printf(3) format string and arguments */
120
 
# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
121
 
#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
122
 
/** Dummy wrapper for page_zip_fail_func()
123
 
@param fmt_args ignored: printf(3) format string and arguments */
124
 
# define page_zip_fail(fmt_args) /* empty */
125
 
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
126
 
 
127
 
#ifndef UNIV_HOTBACKUP
128
 
/**********************************************************************//**
129
 
Determine the guaranteed free space on an empty page.
130
 
@return minimum payload size on the page */
131
 
UNIV_INTERN
132
 
ulint
133
 
page_zip_empty_size(
134
 
/*================*/
135
 
        ulint   n_fields,       /*!< in: number of columns in the index */
136
 
        ulint   zip_size)       /*!< in: compressed page size in bytes */
137
 
{
138
 
        lint    size = zip_size
139
 
                /* subtract the page header and the longest
140
 
                uncompressed data needed for one record */
141
 
                - (PAGE_DATA
142
 
                   + PAGE_ZIP_DIR_SLOT_SIZE
143
 
                   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
144
 
                   + 1/* encoded heap_no==2 in page_zip_write_rec() */
145
 
                   + 1/* end of modification log */
146
 
                   - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
147
 
                /* subtract the space for page_zip_fields_encode() */
148
 
                - compressBound(2 * (n_fields + 1));
149
 
        return(size > 0 ? (ulint) size : 0);
150
 
}
151
 
#endif /* !UNIV_HOTBACKUP */
152
 
 
153
 
/*************************************************************//**
154
 
Gets the size of the compressed page trailer (the dense page directory),
155
 
including deleted records (the free list).
156
 
@return length of dense page directory, in bytes */
157
 
UNIV_INLINE
158
 
ulint
159
 
page_zip_dir_size(
160
 
/*==============*/
161
 
        const page_zip_des_t*   page_zip)       /*!< in: compressed page */
162
 
{
163
 
        /* Exclude the page infimum and supremum from the record count. */
164
 
        ulint   size = PAGE_ZIP_DIR_SLOT_SIZE
165
 
                * (page_dir_get_n_heap(page_zip->data)
166
 
                   - PAGE_HEAP_NO_USER_LOW);
167
 
        return(size);
168
 
}
169
 
 
170
 
/*************************************************************//**
171
 
Gets the size of the compressed page trailer (the dense page directory),
172
 
only including user records (excluding the free list).
173
 
@return length of dense page directory comprising existing records, in bytes */
174
 
UNIV_INLINE
175
 
ulint
176
 
page_zip_dir_user_size(
177
 
/*===================*/
178
 
        const page_zip_des_t*   page_zip)       /*!< in: compressed page */
179
 
{
180
 
        ulint   size = PAGE_ZIP_DIR_SLOT_SIZE
181
 
                * page_get_n_recs(page_zip->data);
182
 
        ut_ad(size <= page_zip_dir_size(page_zip));
183
 
        return(size);
184
 
}
185
 
 
186
 
/*************************************************************//**
187
 
Find the slot of the given record in the dense page directory.
188
 
@return dense directory slot, or NULL if record not found */
189
 
UNIV_INLINE
190
 
byte*
191
 
page_zip_dir_find_low(
192
 
/*==================*/
193
 
        byte*   slot,                   /*!< in: start of records */
194
 
        byte*   end,                    /*!< in: end of records */
195
 
        ulint   offset)                 /*!< in: offset of user record */
196
 
{
197
 
        ut_ad(slot <= end);
198
 
 
199
 
        for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
200
 
                if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
201
 
                    == offset) {
202
 
                        return(slot);
203
 
                }
204
 
        }
205
 
 
206
 
        return(NULL);
207
 
}
208
 
 
209
 
/*************************************************************//**
210
 
Find the slot of the given non-free record in the dense page directory.
211
 
@return dense directory slot, or NULL if record not found */
212
 
UNIV_INLINE
213
 
byte*
214
 
page_zip_dir_find(
215
 
/*==============*/
216
 
        page_zip_des_t* page_zip,               /*!< in: compressed page */
217
 
        ulint           offset)                 /*!< in: offset of user record */
218
 
{
219
 
        byte*   end     = page_zip->data + page_zip_get_size(page_zip);
220
 
 
221
 
        ut_ad(page_zip_simple_validate(page_zip));
222
 
 
223
 
        return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
224
 
                                     end,
225
 
                                     offset));
226
 
}
227
 
 
228
 
/*************************************************************//**
229
 
Find the slot of the given free record in the dense page directory.
230
 
@return dense directory slot, or NULL if record not found */
231
 
UNIV_INLINE
232
 
byte*
233
 
page_zip_dir_find_free(
234
 
/*===================*/
235
 
        page_zip_des_t* page_zip,               /*!< in: compressed page */
236
 
        ulint           offset)                 /*!< in: offset of user record */
237
 
{
238
 
        byte*   end     = page_zip->data + page_zip_get_size(page_zip);
239
 
 
240
 
        ut_ad(page_zip_simple_validate(page_zip));
241
 
 
242
 
        return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
243
 
                                     end - page_zip_dir_user_size(page_zip),
244
 
                                     offset));
245
 
}
246
 
 
247
 
/*************************************************************//**
248
 
Read a given slot in the dense page directory.
249
 
@return record offset on the uncompressed page, possibly ORed with
250
 
PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */
251
 
UNIV_INLINE
252
 
ulint
253
 
page_zip_dir_get(
254
 
/*=============*/
255
 
        const page_zip_des_t*   page_zip,       /*!< in: compressed page */
256
 
        ulint                   slot)           /*!< in: slot
257
 
                                                (0=first user record) */
258
 
{
259
 
        ut_ad(page_zip_simple_validate(page_zip));
260
 
        ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
261
 
        return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
262
 
                                - PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
263
 
}
264
 
 
265
 
#ifndef UNIV_HOTBACKUP
266
 
/**********************************************************************//**
267
 
Write a log record of compressing an index page. */
268
 
static
269
 
void
270
 
page_zip_compress_write_log(
271
 
/*========================*/
272
 
        const page_zip_des_t*   page_zip,/*!< in: compressed page */
273
 
        const page_t*           page,   /*!< in: uncompressed page */
274
 
        dict_index_t*           index,  /*!< in: index of the B-tree node */
275
 
        mtr_t*                  mtr)    /*!< in: mini-transaction */
276
 
{
277
 
        byte*   log_ptr;
278
 
        ulint   trailer_size;
279
 
 
280
 
        ut_ad(!dict_index_is_ibuf(index));
281
 
 
282
 
        log_ptr = mlog_open(mtr, 11 + 2 + 2);
283
 
 
284
 
        if (!log_ptr) {
285
 
 
286
 
                return;
287
 
        }
288
 
 
289
 
        /* Read the number of user records. */
290
 
        trailer_size = page_dir_get_n_heap(page_zip->data)
291
 
                - PAGE_HEAP_NO_USER_LOW;
292
 
        /* Multiply by uncompressed of size stored per record */
293
 
        if (!page_is_leaf(page)) {
294
 
                trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
295
 
        } else if (dict_index_is_clust(index)) {
296
 
                trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
297
 
                        + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
298
 
        } else {
299
 
                trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
300
 
        }
301
 
        /* Add the space occupied by BLOB pointers. */
302
 
        trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
303
 
        ut_a(page_zip->m_end > PAGE_DATA);
304
 
#if FIL_PAGE_DATA > PAGE_DATA
305
 
# error "FIL_PAGE_DATA > PAGE_DATA"
306
 
#endif
307
 
        ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
308
 
 
309
 
        log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
310
 
                                                     MLOG_ZIP_PAGE_COMPRESS,
311
 
                                                     log_ptr, mtr);
312
 
        mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
313
 
        log_ptr += 2;
314
 
        mach_write_to_2(log_ptr, trailer_size);
315
 
        log_ptr += 2;
316
 
        mlog_close(mtr, log_ptr);
317
 
 
318
 
        /* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
319
 
        mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
320
 
        mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
321
 
        /* Write most of the page header, the compressed stream and
322
 
        the modification log. */
323
 
        mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
324
 
                             page_zip->m_end - FIL_PAGE_TYPE);
325
 
        /* Write the uncompressed trailer of the compressed page. */
326
 
        mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
327
 
                             - trailer_size, trailer_size);
328
 
}
329
 
#endif /* !UNIV_HOTBACKUP */
330
 
 
331
 
/******************************************************//**
332
 
Determine how many externally stored columns are contained
333
 
in existing records with smaller heap_no than rec. */
334
 
static
335
 
ulint
336
 
page_zip_get_n_prev_extern(
337
 
/*=======================*/
338
 
        const page_zip_des_t*   page_zip,/*!< in: dense page directory on
339
 
                                        compressed page */
340
 
        const rec_t*            rec,    /*!< in: compact physical record
341
 
                                        on a B-tree leaf page */
342
 
        dict_index_t*           index)  /*!< in: record descriptor */
343
 
{
344
 
        const page_t*   page    = page_align(rec);
345
 
        ulint           n_ext   = 0;
346
 
        ulint           i;
347
 
        ulint           left;
348
 
        ulint           heap_no;
349
 
        ulint           n_recs  = page_get_n_recs(page_zip->data);
350
 
 
351
 
        ut_ad(page_is_leaf(page));
352
 
        ut_ad(page_is_comp(page));
353
 
        ut_ad(dict_table_is_comp(index->table));
354
 
        ut_ad(dict_index_is_clust(index));
355
 
        ut_ad(!dict_index_is_ibuf(index));
356
 
 
357
 
        heap_no = rec_get_heap_no_new(rec);
358
 
        ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
359
 
        left = heap_no - PAGE_HEAP_NO_USER_LOW;
360
 
        if (UNIV_UNLIKELY(!left)) {
361
 
                return(0);
362
 
        }
363
 
 
364
 
        for (i = 0; i < n_recs; i++) {
365
 
                const rec_t*    r       = page + (page_zip_dir_get(page_zip, i)
366
 
                                                  & PAGE_ZIP_DIR_SLOT_MASK);
367
 
 
368
 
                if (rec_get_heap_no_new(r) < heap_no) {
369
 
                        n_ext += rec_get_n_extern_new(r, index,
370
 
                                                      ULINT_UNDEFINED);
371
 
                        if (!--left) {
372
 
                                break;
373
 
                        }
374
 
                }
375
 
        }
376
 
 
377
 
        return(n_ext);
378
 
}
379
 
 
380
 
/**********************************************************************//**
381
 
Encode the length of a fixed-length column.
382
 
@return buf + length of encoded val */
383
 
static
384
 
byte*
385
 
page_zip_fixed_field_encode(
386
 
/*========================*/
387
 
        byte*   buf,    /*!< in: pointer to buffer where to write */
388
 
        ulint   val)    /*!< in: value to write */
389
 
{
390
 
        ut_ad(val >= 2);
391
 
 
392
 
        if (UNIV_LIKELY(val < 126)) {
393
 
                /*
394
 
                0 = nullable variable field of at most 255 bytes length;
395
 
                1 = not null variable field of at most 255 bytes length;
396
 
                126 = nullable variable field with maximum length >255;
397
 
                127 = not null variable field with maximum length >255
398
 
                */
399
 
                *buf++ = (byte) val;
400
 
        } else {
401
 
                *buf++ = (byte) (0x80 | val >> 8);
402
 
                *buf++ = (byte) val;
403
 
        }
404
 
 
405
 
        return(buf);
406
 
}
407
 
 
408
 
/**********************************************************************//**
409
 
Write the index information for the compressed page.
410
 
@return used size of buf */
411
 
static
412
 
ulint
413
 
page_zip_fields_encode(
414
 
/*===================*/
415
 
        ulint           n,      /*!< in: number of fields to compress */
416
 
        dict_index_t*   index,  /*!< in: index comprising at least n fields */
417
 
        ulint           trx_id_pos,/*!< in: position of the trx_id column
418
 
                                in the index, or ULINT_UNDEFINED if
419
 
                                this is a non-leaf page */
420
 
        byte*           buf)    /*!< out: buffer of (n + 1) * 2 bytes */
421
 
{
422
 
        const byte*     buf_start       = buf;
423
 
        ulint           i;
424
 
        ulint           col;
425
 
        ulint           trx_id_col      = 0;
426
 
        /* sum of lengths of preceding non-nullable fixed fields, or 0 */
427
 
        ulint           fixed_sum       = 0;
428
 
 
429
 
        ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
430
 
 
431
 
        for (i = col = 0; i < n; i++) {
432
 
                dict_field_t*   field = dict_index_get_nth_field(index, i);
433
 
                ulint           val;
434
 
 
435
 
                if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
436
 
                        val = 1; /* set the "not nullable" flag */
437
 
                } else {
438
 
                        val = 0; /* nullable field */
439
 
                }
440
 
 
441
 
                if (!field->fixed_len) {
442
 
                        /* variable-length field */
443
 
                        const dict_col_t*       column
444
 
                                = dict_field_get_col(field);
445
 
 
446
 
                        if (UNIV_UNLIKELY(column->len > 255)
447
 
                            || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
448
 
                                val |= 0x7e; /* max > 255 bytes */
449
 
                        }
450
 
 
451
 
                        if (fixed_sum) {
452
 
                                /* write out the length of any
453
 
                                preceding non-nullable fields */
454
 
                                buf = page_zip_fixed_field_encode(
455
 
                                        buf, fixed_sum << 1 | 1);
456
 
                                fixed_sum = 0;
457
 
                                col++;
458
 
                        }
459
 
 
460
 
                        *buf++ = (byte) val;
461
 
                        col++;
462
 
                } else if (val) {
463
 
                        /* fixed-length non-nullable field */
464
 
 
465
 
                        if (fixed_sum && UNIV_UNLIKELY
466
 
                            (fixed_sum + field->fixed_len
467
 
                             > DICT_MAX_INDEX_COL_LEN)) {
468
 
                                /* Write out the length of the
469
 
                                preceding non-nullable fields,
470
 
                                to avoid exceeding the maximum
471
 
                                length of a fixed-length column. */
472
 
                                buf = page_zip_fixed_field_encode(
473
 
                                        buf, fixed_sum << 1 | 1);
474
 
                                fixed_sum = 0;
475
 
                                col++;
476
 
                        }
477
 
 
478
 
                        if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
479
 
                                if (fixed_sum) {
480
 
                                        /* Write out the length of any
481
 
                                        preceding non-nullable fields,
482
 
                                        and start a new trx_id column. */
483
 
                                        buf = page_zip_fixed_field_encode(
484
 
                                                buf, fixed_sum << 1 | 1);
485
 
                                        col++;
486
 
                                }
487
 
 
488
 
                                trx_id_col = col;
489
 
                                fixed_sum = field->fixed_len;
490
 
                        } else {
491
 
                                /* add to the sum */
492
 
                                fixed_sum += field->fixed_len;
493
 
                        }
494
 
                } else {
495
 
                        /* fixed-length nullable field */
496
 
 
497
 
                        if (fixed_sum) {
498
 
                                /* write out the length of any
499
 
                                preceding non-nullable fields */
500
 
                                buf = page_zip_fixed_field_encode(
501
 
                                        buf, fixed_sum << 1 | 1);
502
 
                                fixed_sum = 0;
503
 
                                col++;
504
 
                        }
505
 
 
506
 
                        buf = page_zip_fixed_field_encode(
507
 
                                buf, field->fixed_len << 1);
508
 
                        col++;
509
 
                }
510
 
        }
511
 
 
512
 
        if (fixed_sum) {
513
 
                /* Write out the lengths of last fixed-length columns. */
514
 
                buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
515
 
        }
516
 
 
517
 
        if (trx_id_pos != ULINT_UNDEFINED) {
518
 
                /* Write out the position of the trx_id column */
519
 
                i = trx_id_col;
520
 
        } else {
521
 
                /* Write out the number of nullable fields */
522
 
                i = index->n_nullable;
523
 
        }
524
 
 
525
 
        if (i < 128) {
526
 
                *buf++ = (byte) i;
527
 
        } else {
528
 
                *buf++ = (byte) (0x80 | i >> 8);
529
 
                *buf++ = (byte) i;
530
 
        }
531
 
 
532
 
        ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
533
 
        return((ulint) (buf - buf_start));
534
 
}
535
 
 
536
 
/**********************************************************************//**
537
 
Populate the dense page directory from the sparse directory. */
538
 
static
539
 
void
540
 
page_zip_dir_encode(
541
 
/*================*/
542
 
        const page_t*   page,   /*!< in: compact page */
543
 
        byte*           buf,    /*!< in: pointer to dense page directory[-1];
544
 
                                out: dense directory on compressed page */
545
 
        const rec_t**   recs)   /*!< in: pointer to an array of 0, or NULL;
546
 
                                out: dense page directory sorted by ascending
547
 
                                address (and heap_no) */
548
 
{
549
 
        const byte*     rec;
550
 
        ulint           status;
551
 
        ulint           min_mark;
552
 
        ulint           heap_no;
553
 
        ulint           i;
554
 
        ulint           n_heap;
555
 
        ulint           offs;
556
 
 
557
 
        min_mark = 0;
558
 
 
559
 
        if (page_is_leaf(page)) {
560
 
                status = REC_STATUS_ORDINARY;
561
 
        } else {
562
 
                status = REC_STATUS_NODE_PTR;
563
 
                if (UNIV_UNLIKELY
564
 
                    (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
565
 
                        min_mark = REC_INFO_MIN_REC_FLAG;
566
 
                }
567
 
        }
568
 
 
569
 
        n_heap = page_dir_get_n_heap(page);
570
 
 
571
 
        /* Traverse the list of stored records in the collation order,
572
 
        starting from the first user record. */
573
 
 
574
 
        rec = page + PAGE_NEW_INFIMUM;
575
 
 
576
 
        i = 0;
577
 
 
578
 
        for (;;) {
579
 
                ulint   info_bits;
580
 
                offs = rec_get_next_offs(rec, TRUE);
581
 
                if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
582
 
                        break;
583
 
                }
584
 
                rec = page + offs;
585
 
                heap_no = rec_get_heap_no_new(rec);
586
 
                ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
587
 
                ut_a(heap_no < n_heap);
588
 
                ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
589
 
                ut_a(offs >= PAGE_ZIP_START);
590
 
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
591
 
# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
592
 
#endif
593
 
#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
594
 
# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
595
 
#endif
596
 
                if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
597
 
                        offs |= PAGE_ZIP_DIR_SLOT_OWNED;
598
 
                }
599
 
 
600
 
                info_bits = rec_get_info_bits(rec, TRUE);
601
 
                if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
602
 
                        info_bits &= ~REC_INFO_DELETED_FLAG;
603
 
                        offs |= PAGE_ZIP_DIR_SLOT_DEL;
604
 
                }
605
 
                ut_a(info_bits == min_mark);
606
 
                /* Only the smallest user record can have
607
 
                REC_INFO_MIN_REC_FLAG set. */
608
 
                min_mark = 0;
609
 
 
610
 
                mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
611
 
 
612
 
                if (UNIV_LIKELY_NULL(recs)) {
613
 
                        /* Ensure that each heap_no occurs at most once. */
614
 
                        ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
615
 
                        /* exclude infimum and supremum */
616
 
                        recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
617
 
                }
618
 
 
619
 
                ut_a(rec_get_status(rec) == status);
620
 
        }
621
 
 
622
 
        offs = page_header_get_field(page, PAGE_FREE);
623
 
 
624
 
        /* Traverse the free list (of deleted records). */
625
 
        while (offs) {
626
 
                ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
627
 
                rec = page + offs;
628
 
 
629
 
                heap_no = rec_get_heap_no_new(rec);
630
 
                ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
631
 
                ut_a(heap_no < n_heap);
632
 
 
633
 
                ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
634
 
                ut_a(rec_get_status(rec) == status);
635
 
 
636
 
                mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
637
 
 
638
 
                if (UNIV_LIKELY_NULL(recs)) {
639
 
                        /* Ensure that each heap_no occurs at most once. */
640
 
                        ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
641
 
                        /* exclude infimum and supremum */
642
 
                        recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
643
 
                }
644
 
 
645
 
                offs = rec_get_next_offs(rec, TRUE);
646
 
        }
647
 
 
648
 
        /* Ensure that each heap no occurs at least once. */
649
 
        ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
650
 
}
651
 
 
652
 
/**********************************************************************//**
653
 
Allocate memory for zlib. */
654
 
extern "C" void* page_zip_malloc(void* opaque, uInt items, uInt size);
655
 
 
656
 
extern "C" void* page_zip_malloc
657
 
(
658
 
/*============*/
659
 
        void*   opaque, /*!< in/out: memory heap */
660
 
        uInt    items,  /*!< in: number of items to allocate */
661
 
        uInt    size)   /*!< in: size of an item in bytes */
662
 
{
663
 
        return(mem_heap_alloc(static_cast<mem_block_info_t *>(opaque), items * size));
664
 
}
665
 
 
666
 
/**********************************************************************//**
667
 
Deallocate memory for zlib. */
668
 
extern "C" void page_zip_free(void *opaque, void *address);
669
 
 
670
 
extern "C" void page_zip_free(void *, void *)
671
 
{ }
672
 
 
673
 
/**********************************************************************//**
674
 
Configure the zlib allocator to use the given memory heap. */
675
 
UNIV_INTERN
676
 
void
677
 
page_zip_set_alloc(
678
 
/*===============*/
679
 
        void*           stream,         /*!< in/out: zlib stream */
680
 
        mem_heap_t*     heap)           /*!< in: memory heap to use */
681
 
{
682
 
        z_stream*       strm = static_cast<z_stream *>(stream);
683
 
 
684
 
        strm->zalloc = page_zip_malloc;
685
 
        strm->zfree = page_zip_free;
686
 
        strm->opaque = heap;
687
 
}
688
 
 
689
 
#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
690
 
/** Symbol for enabling compression and decompression diagnostics */
691
 
# define PAGE_ZIP_COMPRESS_DBG
692
 
#endif
693
 
 
694
 
#ifdef PAGE_ZIP_COMPRESS_DBG
695
 
/** Set this variable in a debugger to enable
696
 
excessive logging in page_zip_compress(). */
697
 
UNIV_INTERN ibool       page_zip_compress_dbg;
698
 
/** Set this variable in a debugger to enable
699
 
binary logging of the data passed to deflate().
700
 
When this variable is nonzero, it will act
701
 
as a log file name generator. */
702
 
UNIV_INTERN unsigned    page_zip_compress_log;
703
 
 
704
 
/**********************************************************************//**
705
 
Wrapper for deflate().  Log the operation if page_zip_compress_dbg is set.
706
 
@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
707
 
static
708
 
int
709
 
page_zip_compress_deflate(
710
 
/*======================*/
711
 
        FILE*           logfile,/*!< in: log file, or NULL */
712
 
        z_streamp       strm,   /*!< in/out: compressed stream for deflate() */
713
 
        int             flush)  /*!< in: deflate() flushing method */
714
 
{
715
 
        int     status;
716
 
        if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
717
 
                ut_print_buf(stderr, strm->next_in, strm->avail_in);
718
 
        }
719
 
        if (UNIV_LIKELY_NULL(logfile)) {
720
 
                fwrite(strm->next_in, 1, strm->avail_in, logfile);
721
 
        }
722
 
        status = deflate(strm, flush);
723
 
        if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
724
 
                fprintf(stderr, " -> %d\n", status);
725
 
        }
726
 
        return(status);
727
 
}
728
 
 
729
 
/* Redefine deflate(). */
730
 
# undef deflate
731
 
/** Debug wrapper for the zlib compression routine deflate().
732
 
Log the operation if page_zip_compress_dbg is set.
733
 
@param strm     in/out: compressed stream
734
 
@param flush    in: flushing method
735
 
@return         deflate() status: Z_OK, Z_BUF_ERROR, ... */
736
 
# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
737
 
/** Declaration of the logfile parameter */
738
 
# define FILE_LOGFILE FILE* logfile,
739
 
/** The logfile parameter */
740
 
# define LOGFILE logfile,
741
 
#else /* PAGE_ZIP_COMPRESS_DBG */
742
 
/** Empty declaration of the logfile parameter */
743
 
# define FILE_LOGFILE
744
 
/** Missing logfile parameter */
745
 
# define LOGFILE
746
 
#endif /* PAGE_ZIP_COMPRESS_DBG */
747
 
 
748
 
/**********************************************************************//**
749
 
Compress the records of a node pointer page.
750
 
@return Z_OK, or a zlib error code */
751
 
static
752
 
int
753
 
page_zip_compress_node_ptrs(
754
 
/*========================*/
755
 
        FILE_LOGFILE
756
 
        z_stream*       c_stream,       /*!< in/out: compressed page stream */
757
 
        const rec_t**   recs,           /*!< in: dense page directory
758
 
                                        sorted by address */
759
 
        ulint           n_dense,        /*!< in: size of recs[] */
760
 
        dict_index_t*   index,          /*!< in: the index of the page */
761
 
        byte*           storage,        /*!< in: end of dense page directory */
762
 
        mem_heap_t*     heap)           /*!< in: temporary memory heap */
763
 
{
764
 
        int     err     = Z_OK;
765
 
        ulint*  offsets = NULL;
766
 
 
767
 
        do {
768
 
                const rec_t*    rec = *recs++;
769
 
 
770
 
                offsets = rec_get_offsets(rec, index, offsets,
771
 
                                          ULINT_UNDEFINED, &heap);
772
 
                /* Only leaf nodes may contain externally stored columns. */
773
 
                ut_ad(!rec_offs_any_extern(offsets));
774
 
 
775
 
                UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
776
 
                UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
777
 
                                   rec_offs_extra_size(offsets));
778
 
 
779
 
                /* Compress the extra bytes. */
780
 
                c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
781
 
                        - c_stream->next_in;
782
 
 
783
 
                if (c_stream->avail_in) {
784
 
                        err = deflate(c_stream, Z_NO_FLUSH);
785
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
786
 
                                break;
787
 
                        }
788
 
                }
789
 
                ut_ad(!c_stream->avail_in);
790
 
 
791
 
                /* Compress the data bytes, except node_ptr. */
792
 
                c_stream->next_in = (byte*) rec;
793
 
                c_stream->avail_in = rec_offs_data_size(offsets)
794
 
                        - REC_NODE_PTR_SIZE;
795
 
                ut_ad(c_stream->avail_in);
796
 
 
797
 
                err = deflate(c_stream, Z_NO_FLUSH);
798
 
                if (UNIV_UNLIKELY(err != Z_OK)) {
799
 
                        break;
800
 
                }
801
 
 
802
 
                ut_ad(!c_stream->avail_in);
803
 
 
804
 
                memcpy(storage - REC_NODE_PTR_SIZE
805
 
                       * (rec_get_heap_no_new(rec) - 1),
806
 
                       c_stream->next_in, REC_NODE_PTR_SIZE);
807
 
                c_stream->next_in += REC_NODE_PTR_SIZE;
808
 
        } while (--n_dense);
809
 
 
810
 
        return(err);
811
 
}
812
 
 
813
 
/**********************************************************************//**
814
 
Compress the records of a leaf node of a secondary index.
815
 
@return Z_OK, or a zlib error code */
816
 
static
817
 
int
818
 
page_zip_compress_sec(
819
 
/*==================*/
820
 
        FILE_LOGFILE
821
 
        z_stream*       c_stream,       /*!< in/out: compressed page stream */
822
 
        const rec_t**   recs,           /*!< in: dense page directory
823
 
                                        sorted by address */
824
 
        ulint           n_dense)        /*!< in: size of recs[] */
825
 
{
826
 
        int             err     = Z_OK;
827
 
 
828
 
        ut_ad(n_dense > 0);
829
 
 
830
 
        do {
831
 
                const rec_t*    rec = *recs++;
832
 
 
833
 
                /* Compress everything up to this record. */
834
 
                c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
835
 
                        - c_stream->next_in;
836
 
 
837
 
                if (UNIV_LIKELY(c_stream->avail_in)) {
838
 
                        UNIV_MEM_ASSERT_RW(c_stream->next_in,
839
 
                                           c_stream->avail_in);
840
 
                        err = deflate(c_stream, Z_NO_FLUSH);
841
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
842
 
                                break;
843
 
                        }
844
 
                }
845
 
 
846
 
                ut_ad(!c_stream->avail_in);
847
 
                ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
848
 
 
849
 
                /* Skip the REC_N_NEW_EXTRA_BYTES. */
850
 
 
851
 
                c_stream->next_in = (byte*) rec;
852
 
        } while (--n_dense);
853
 
 
854
 
        return(err);
855
 
}
856
 
 
857
 
/**********************************************************************//**
858
 
Compress a record of a leaf node of a clustered index that contains
859
 
externally stored columns.
860
 
@return Z_OK, or a zlib error code */
861
 
static
862
 
int
863
 
page_zip_compress_clust_ext(
864
 
/*========================*/
865
 
        FILE_LOGFILE
866
 
        z_stream*       c_stream,       /*!< in/out: compressed page stream */
867
 
        const rec_t*    rec,            /*!< in: record */
868
 
        const ulint*    offsets,        /*!< in: rec_get_offsets(rec) */
869
 
        ulint           trx_id_col,     /*!< in: position of of DB_TRX_ID */
870
 
        byte*           deleted,        /*!< in: dense directory entry pointing
871
 
                                        to the head of the free list */
872
 
        byte*           storage,        /*!< in: end of dense page directory */
873
 
        byte**          externs,        /*!< in/out: pointer to the next
874
 
                                        available BLOB pointer */
875
 
        ulint*          n_blobs)        /*!< in/out: number of
876
 
                                        externally stored columns */
877
 
{
878
 
        int     err;
879
 
        ulint   i;
880
 
 
881
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
882
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
883
 
                           rec_offs_extra_size(offsets));
884
 
 
885
 
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
886
 
                ulint           len;
887
 
                const byte*     src;
888
 
 
889
 
                if (UNIV_UNLIKELY(i == trx_id_col)) {
890
 
                        ut_ad(!rec_offs_nth_extern(offsets, i));
891
 
                        /* Store trx_id and roll_ptr
892
 
                        in uncompressed form. */
893
 
                        src = rec_get_nth_field(rec, offsets, i, &len);
894
 
                        ut_ad(src + DATA_TRX_ID_LEN
895
 
                              == rec_get_nth_field(rec, offsets,
896
 
                                                   i + 1, &len));
897
 
                        ut_ad(len == DATA_ROLL_PTR_LEN);
898
 
 
899
 
                        /* Compress any preceding bytes. */
900
 
                        c_stream->avail_in
901
 
                                = src - c_stream->next_in;
902
 
 
903
 
                        if (c_stream->avail_in) {
904
 
                                err = deflate(c_stream, Z_NO_FLUSH);
905
 
                                if (UNIV_UNLIKELY(err != Z_OK)) {
906
 
 
907
 
                                        return(err);
908
 
                                }
909
 
                        }
910
 
 
911
 
                        ut_ad(!c_stream->avail_in);
912
 
                        ut_ad(c_stream->next_in == src);
913
 
 
914
 
                        memcpy(storage
915
 
                               - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
916
 
                               * (rec_get_heap_no_new(rec) - 1),
917
 
                               c_stream->next_in,
918
 
                               DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
919
 
 
920
 
                        c_stream->next_in
921
 
                                += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
922
 
 
923
 
                        /* Skip also roll_ptr */
924
 
                        i++;
925
 
                } else if (rec_offs_nth_extern(offsets, i)) {
926
 
                        src = rec_get_nth_field(rec, offsets, i, &len);
927
 
                        ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
928
 
                        src += len - BTR_EXTERN_FIELD_REF_SIZE;
929
 
 
930
 
                        c_stream->avail_in = src
931
 
                                - c_stream->next_in;
932
 
                        if (UNIV_LIKELY(c_stream->avail_in)) {
933
 
                                err = deflate(c_stream, Z_NO_FLUSH);
934
 
                                if (UNIV_UNLIKELY(err != Z_OK)) {
935
 
 
936
 
                                        return(err);
937
 
                                }
938
 
                        }
939
 
 
940
 
                        ut_ad(!c_stream->avail_in);
941
 
                        ut_ad(c_stream->next_in == src);
942
 
 
943
 
                        /* Reserve space for the data at
944
 
                        the end of the space reserved for
945
 
                        the compressed data and the page
946
 
                        modification log. */
947
 
 
948
 
                        if (UNIV_UNLIKELY
949
 
                            (c_stream->avail_out
950
 
                             <= BTR_EXTERN_FIELD_REF_SIZE)) {
951
 
                                /* out of space */
952
 
                                return(Z_BUF_ERROR);
953
 
                        }
954
 
 
955
 
                        ut_ad(*externs == c_stream->next_out
956
 
                              + c_stream->avail_out
957
 
                              + 1/* end of modif. log */);
958
 
 
959
 
                        c_stream->next_in
960
 
                                += BTR_EXTERN_FIELD_REF_SIZE;
961
 
 
962
 
                        /* Skip deleted records. */
963
 
                        if (UNIV_LIKELY_NULL
964
 
                            (page_zip_dir_find_low(
965
 
                                    storage, deleted,
966
 
                                    page_offset(rec)))) {
967
 
                                continue;
968
 
                        }
969
 
 
970
 
                        (*n_blobs)++;
971
 
                        c_stream->avail_out
972
 
                                -= BTR_EXTERN_FIELD_REF_SIZE;
973
 
                        *externs -= BTR_EXTERN_FIELD_REF_SIZE;
974
 
 
975
 
                        /* Copy the BLOB pointer */
976
 
                        memcpy(*externs, c_stream->next_in
977
 
                               - BTR_EXTERN_FIELD_REF_SIZE,
978
 
                               BTR_EXTERN_FIELD_REF_SIZE);
979
 
                }
980
 
        }
981
 
 
982
 
        return(Z_OK);
983
 
}
984
 
 
985
 
/**********************************************************************//**
986
 
Compress the records of a leaf node of a clustered index.
987
 
@return Z_OK, or a zlib error code */
988
 
static
989
 
int
990
 
page_zip_compress_clust(
991
 
/*====================*/
992
 
        FILE_LOGFILE
993
 
        z_stream*       c_stream,       /*!< in/out: compressed page stream */
994
 
        const rec_t**   recs,           /*!< in: dense page directory
995
 
                                        sorted by address */
996
 
        ulint           n_dense,        /*!< in: size of recs[] */
997
 
        dict_index_t*   index,          /*!< in: the index of the page */
998
 
        ulint*          n_blobs,        /*!< in: 0; out: number of
999
 
                                        externally stored columns */
1000
 
        ulint           trx_id_col,     /*!< index of the trx_id column */
1001
 
        byte*           deleted,        /*!< in: dense directory entry pointing
1002
 
                                        to the head of the free list */
1003
 
        byte*           storage,        /*!< in: end of dense page directory */
1004
 
        mem_heap_t*     heap)           /*!< in: temporary memory heap */
1005
 
{
1006
 
        int     err             = Z_OK;
1007
 
        ulint*  offsets         = NULL;
1008
 
        /* BTR_EXTERN_FIELD_REF storage */
1009
 
        byte*   externs         = storage - n_dense
1010
 
                * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1011
 
 
1012
 
        ut_ad(*n_blobs == 0);
1013
 
 
1014
 
        do {
1015
 
                const rec_t*    rec = *recs++;
1016
 
 
1017
 
                offsets = rec_get_offsets(rec, index, offsets,
1018
 
                                          ULINT_UNDEFINED, &heap);
1019
 
                ut_ad(rec_offs_n_fields(offsets)
1020
 
                      == dict_index_get_n_fields(index));
1021
 
                UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1022
 
                UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1023
 
                                   rec_offs_extra_size(offsets));
1024
 
 
1025
 
                /* Compress the extra bytes. */
1026
 
                c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
1027
 
                        - c_stream->next_in;
1028
 
 
1029
 
                if (c_stream->avail_in) {
1030
 
                        err = deflate(c_stream, Z_NO_FLUSH);
1031
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
1032
 
 
1033
 
                                goto func_exit;
1034
 
                        }
1035
 
                }
1036
 
                ut_ad(!c_stream->avail_in);
1037
 
                ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
1038
 
 
1039
 
                /* Compress the data bytes. */
1040
 
 
1041
 
                c_stream->next_in = (byte*) rec;
1042
 
 
1043
 
                /* Check if there are any externally stored columns.
1044
 
                For each externally stored column, store the
1045
 
                BTR_EXTERN_FIELD_REF separately. */
1046
 
                if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1047
 
                        ut_ad(dict_index_is_clust(index));
1048
 
 
1049
 
                        err = page_zip_compress_clust_ext(
1050
 
                                LOGFILE
1051
 
                                c_stream, rec, offsets, trx_id_col,
1052
 
                                deleted, storage, &externs, n_blobs);
1053
 
 
1054
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
1055
 
 
1056
 
                                goto func_exit;
1057
 
                        }
1058
 
                } else {
1059
 
                        ulint           len;
1060
 
                        const byte*     src;
1061
 
 
1062
 
                        /* Store trx_id and roll_ptr in uncompressed form. */
1063
 
                        src = rec_get_nth_field(rec, offsets,
1064
 
                                                trx_id_col, &len);
1065
 
                        ut_ad(src + DATA_TRX_ID_LEN
1066
 
                              == rec_get_nth_field(rec, offsets,
1067
 
                                                   trx_id_col + 1, &len));
1068
 
                        ut_ad(len == DATA_ROLL_PTR_LEN);
1069
 
                        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1070
 
                        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1071
 
                                           rec_offs_extra_size(offsets));
1072
 
 
1073
 
                        /* Compress any preceding bytes. */
1074
 
                        c_stream->avail_in = src - c_stream->next_in;
1075
 
 
1076
 
                        if (c_stream->avail_in) {
1077
 
                                err = deflate(c_stream, Z_NO_FLUSH);
1078
 
                                if (UNIV_UNLIKELY(err != Z_OK)) {
1079
 
 
1080
 
                                        return(err);
1081
 
                                }
1082
 
                        }
1083
 
 
1084
 
                        ut_ad(!c_stream->avail_in);
1085
 
                        ut_ad(c_stream->next_in == src);
1086
 
 
1087
 
                        memcpy(storage
1088
 
                               - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1089
 
                               * (rec_get_heap_no_new(rec) - 1),
1090
 
                               c_stream->next_in,
1091
 
                               DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1092
 
 
1093
 
                        c_stream->next_in
1094
 
                                += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1095
 
 
1096
 
                        /* Skip also roll_ptr */
1097
 
                        ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
1098
 
                }
1099
 
 
1100
 
                /* Compress the last bytes of the record. */
1101
 
                c_stream->avail_in = rec + rec_offs_data_size(offsets)
1102
 
                        - c_stream->next_in;
1103
 
 
1104
 
                if (c_stream->avail_in) {
1105
 
                        err = deflate(c_stream, Z_NO_FLUSH);
1106
 
                        if (UNIV_UNLIKELY(err != Z_OK)) {
1107
 
 
1108
 
                                goto func_exit;
1109
 
                        }
1110
 
                }
1111
 
                ut_ad(!c_stream->avail_in);
1112
 
        } while (--n_dense);
1113
 
 
1114
 
func_exit:
1115
 
        return(err);
1116
 
}
1117
 
 
1118
 
/**********************************************************************//**
1119
 
Compress a page.
1120
 
@return TRUE on success, FALSE on failure; page_zip will be left
1121
 
intact on failure. */
1122
 
UNIV_INTERN
1123
 
ibool
1124
 
page_zip_compress(
1125
 
/*==============*/
1126
 
        page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs,
1127
 
                                m_start, m_end, m_nonempty */
1128
 
        const page_t*   page,   /*!< in: uncompressed page */
1129
 
        dict_index_t*   index,  /*!< in: index of the B-tree node */
1130
 
        mtr_t*          mtr)    /*!< in: mini-transaction, or NULL */
1131
 
{
1132
 
        z_stream        c_stream;
1133
 
        int             err;
1134
 
        ulint           n_fields;/* number of index fields needed */
1135
 
        byte*           fields; /*!< index field information */
1136
 
        byte*           buf;    /*!< compressed payload of the page */
1137
 
        byte*           buf_end;/* end of buf */
1138
 
        ulint           n_dense;
1139
 
        ulint           slot_size;/* amount of uncompressed bytes per record */
1140
 
        const rec_t**   recs;   /*!< dense page directory, sorted by address */
1141
 
        mem_heap_t*     heap;
1142
 
        ulint           trx_id_col;
1143
 
        ulint*          offsets = NULL;
1144
 
        ulint           n_blobs = 0;
1145
 
        byte*           storage;/* storage of uncompressed columns */
1146
 
#ifndef UNIV_HOTBACKUP
1147
 
        ullint          usec = ut_time_us(NULL);
1148
 
#endif /* !UNIV_HOTBACKUP */
1149
 
#ifdef PAGE_ZIP_COMPRESS_DBG
1150
 
        FILE*           logfile = NULL;
1151
 
#endif
1152
 
 
1153
 
        ut_a(page_is_comp(page));
1154
 
        ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
1155
 
        ut_ad(page_simple_validate_new((page_t*) page));
1156
 
        ut_ad(page_zip_simple_validate(page_zip));
1157
 
        ut_ad(dict_table_is_comp(index->table));
1158
 
        ut_ad(!dict_index_is_ibuf(index));
1159
 
 
1160
 
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1161
 
 
1162
 
        /* Check the data that will be omitted. */
1163
 
        ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
1164
 
                     infimum_extra, sizeof infimum_extra));
1165
 
        ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
1166
 
                     infimum_data, sizeof infimum_data));
1167
 
        ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
1168
 
             /* info_bits == 0, n_owned <= max */
1169
 
             <= PAGE_DIR_SLOT_MAX_N_OWNED);
1170
 
        ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
1171
 
                     supremum_extra_data, sizeof supremum_extra_data));
1172
 
 
1173
 
        if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
1174
 
                ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
1175
 
                     == PAGE_NEW_SUPREMUM);
1176
 
        }
1177
 
 
1178
 
        if (page_is_leaf(page)) {
1179
 
                n_fields = dict_index_get_n_fields(index);
1180
 
        } else {
1181
 
                n_fields = dict_index_get_n_unique_in_tree(index);
1182
 
        }
1183
 
 
1184
 
        /* The dense directory excludes the infimum and supremum records. */
1185
 
        n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1186
 
#ifdef PAGE_ZIP_COMPRESS_DBG
1187
 
        if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
1188
 
                fprintf(stderr, "compress %p %p %lu %lu %lu\n",
1189
 
                        (void*) page_zip, (void*) page,
1190
 
                        page_is_leaf(page),
1191
 
                        n_fields, n_dense);
1192
 
        }
1193
 
        if (UNIV_UNLIKELY(page_zip_compress_log)) {
1194
 
                /* Create a log file for every compression attempt. */
1195
 
                char    logfilename[9];
1196
 
                ut_snprintf(logfilename, sizeof logfilename,
1197
 
                            "%08x", page_zip_compress_log++);
1198
 
                logfile = fopen(logfilename, "wb");
1199
 
 
1200
 
                if (logfile) {
1201
 
                        /* Write the uncompressed page to the log. */
1202
 
                        fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
1203
 
                        /* Record the compressed size as zero.
1204
 
                        This will be overwritten at successful exit. */
1205
 
                        putc(0, logfile);
1206
 
                        putc(0, logfile);
1207
 
                        putc(0, logfile);
1208
 
                        putc(0, logfile);
1209
 
                }
1210
 
        }
1211
 
#endif /* PAGE_ZIP_COMPRESS_DBG */
1212
 
#ifndef UNIV_HOTBACKUP
1213
 
        page_zip_stat[page_zip->ssize - 1].compressed++;
1214
 
#endif /* !UNIV_HOTBACKUP */
1215
 
 
1216
 
        if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
1217
 
                          >= page_zip_get_size(page_zip))) {
1218
 
 
1219
 
                goto err_exit;
1220
 
        }
1221
 
 
1222
 
        heap = mem_heap_create(page_zip_get_size(page_zip)
1223
 
                               + n_fields * (2 + sizeof *offsets)
1224
 
                               + n_dense * ((sizeof *recs)
1225
 
                                            - PAGE_ZIP_DIR_SLOT_SIZE)
1226
 
                               + UNIV_PAGE_SIZE * 4
1227
 
                               + (512 << MAX_MEM_LEVEL));
1228
 
 
1229
 
        recs = static_cast<const unsigned char **>(mem_heap_zalloc(heap, n_dense * sizeof *recs));
1230
 
 
1231
 
        fields = static_cast<byte *>(mem_heap_alloc(heap, (n_fields + 1) * 2));
1232
 
 
1233
 
        buf = static_cast<byte *>(mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA));
1234
 
        buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
1235
 
 
1236
 
        /* Compress the data payload. */
1237
 
        page_zip_set_alloc(&c_stream, heap);
1238
 
 
1239
 
        err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
1240
 
                           Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
1241
 
                           MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
1242
 
        ut_a(err == Z_OK);
1243
 
 
1244
 
        c_stream.next_out = buf;
1245
 
        /* Subtract the space reserved for uncompressed data. */
1246
 
        /* Page header and the end marker of the modification log */
1247
 
        c_stream.avail_out = buf_end - buf - 1;
1248
 
        /* Dense page directory and uncompressed columns, if any */
1249
 
        if (page_is_leaf(page)) {
1250
 
                if (dict_index_is_clust(index)) {
1251
 
                        trx_id_col = dict_index_get_sys_col_pos(
1252
 
                                index, DATA_TRX_ID);
1253
 
                        ut_ad(trx_id_col > 0);
1254
 
                        ut_ad(trx_id_col != ULINT_UNDEFINED);
1255
 
 
1256
 
                        slot_size = PAGE_ZIP_DIR_SLOT_SIZE
1257
 
                                + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1258
 
                } else {
1259
 
                        /* Signal the absence of trx_id
1260
 
                        in page_zip_fields_encode() */
1261
 
                        ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
1262
 
                              == ULINT_UNDEFINED);
1263
 
                        trx_id_col = 0;
1264
 
                        slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
1265
 
                }
1266
 
        } else {
1267
 
                slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
1268
 
                trx_id_col = ULINT_UNDEFINED;
1269
 
        }
1270
 
 
1271
 
        if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
1272
 
                          + 6/* sizeof(zlib header and footer) */)) {
1273
 
                goto zlib_error;
1274
 
        }
1275
 
 
1276
 
        c_stream.avail_out -= n_dense * slot_size;
1277
 
        c_stream.avail_in = page_zip_fields_encode(n_fields, index,
1278
 
                                                   trx_id_col, fields);
1279
 
        c_stream.next_in = fields;
1280
 
        if (UNIV_LIKELY(!trx_id_col)) {
1281
 
                trx_id_col = ULINT_UNDEFINED;
1282
 
        }
1283
 
 
1284
 
        UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1285
 
        err = deflate(&c_stream, Z_FULL_FLUSH);
1286
 
        if (err != Z_OK) {
1287
 
                goto zlib_error;
1288
 
        }
1289
 
 
1290
 
        ut_ad(!c_stream.avail_in);
1291
 
 
1292
 
        page_zip_dir_encode(page, buf_end, recs);
1293
 
 
1294
 
        c_stream.next_in = (byte*) page + PAGE_ZIP_START;
1295
 
 
1296
 
        storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
1297
 
 
1298
 
        /* Compress the records in heap_no order. */
1299
 
        if (UNIV_UNLIKELY(!n_dense)) {
1300
 
        } else if (!page_is_leaf(page)) {
1301
 
                /* This is a node pointer page. */
1302
 
                err = page_zip_compress_node_ptrs(LOGFILE
1303
 
                                                  &c_stream, recs, n_dense,
1304
 
                                                  index, storage, heap);
1305
 
                if (UNIV_UNLIKELY(err != Z_OK)) {
1306
 
                        goto zlib_error;
1307
 
                }
1308
 
        } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1309
 
                /* This is a leaf page in a secondary index. */
1310
 
                err = page_zip_compress_sec(LOGFILE
1311
 
                                            &c_stream, recs, n_dense);
1312
 
                if (UNIV_UNLIKELY(err != Z_OK)) {
1313
 
                        goto zlib_error;
1314
 
                }
1315
 
        } else {
1316
 
                /* This is a leaf page in a clustered index. */
1317
 
                err = page_zip_compress_clust(LOGFILE
1318
 
                                              &c_stream, recs, n_dense,
1319
 
                                              index, &n_blobs, trx_id_col,
1320
 
                                              buf_end - PAGE_ZIP_DIR_SLOT_SIZE
1321
 
                                              * page_get_n_recs(page),
1322
 
                                              storage, heap);
1323
 
                if (UNIV_UNLIKELY(err != Z_OK)) {
1324
 
                        goto zlib_error;
1325
 
                }
1326
 
        }
1327
 
 
1328
 
        /* Finish the compression. */
1329
 
        ut_ad(!c_stream.avail_in);
1330
 
        /* Compress any trailing garbage, in case the last record was
1331
 
        allocated from an originally longer space on the free list,
1332
 
        or the data of the last record from page_zip_compress_sec(). */
1333
 
        c_stream.avail_in
1334
 
                = page_header_get_field(page, PAGE_HEAP_TOP)
1335
 
                - (c_stream.next_in - page);
1336
 
        ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
1337
 
 
1338
 
        UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1339
 
        err = deflate(&c_stream, Z_FINISH);
1340
 
 
1341
 
        if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
1342
 
zlib_error:
1343
 
                deflateEnd(&c_stream);
1344
 
                mem_heap_free(heap);
1345
 
err_exit:
1346
 
#ifdef PAGE_ZIP_COMPRESS_DBG
1347
 
                if (logfile) {
1348
 
                        fclose(logfile);
1349
 
                }
1350
 
#endif /* PAGE_ZIP_COMPRESS_DBG */
1351
 
#ifndef UNIV_HOTBACKUP
1352
 
                page_zip_stat[page_zip->ssize - 1].compressed_usec
1353
 
                        += ut_time_us(NULL) - usec;
1354
 
#endif /* !UNIV_HOTBACKUP */
1355
 
                return(FALSE);
1356
 
        }
1357
 
 
1358
 
        err = deflateEnd(&c_stream);
1359
 
        ut_a(err == Z_OK);
1360
 
 
1361
 
        ut_ad(buf + c_stream.total_out == c_stream.next_out);
1362
 
        ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
1363
 
 
1364
 
        /* Valgrind believes that zlib does not initialize some bits
1365
 
        in the last 7 or 8 bytes of the stream.  Make Valgrind happy. */
1366
 
        UNIV_MEM_VALID(buf, c_stream.total_out);
1367
 
 
1368
 
        /* Zero out the area reserved for the modification log.
1369
 
        Space for the end marker of the modification log is not
1370
 
        included in avail_out. */
1371
 
        memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
1372
 
 
1373
 
#ifdef UNIV_DEBUG
1374
 
        page_zip->m_start =
1375
 
#endif /* UNIV_DEBUG */
1376
 
                page_zip->m_end = PAGE_DATA + c_stream.total_out;
1377
 
        page_zip->m_nonempty = FALSE;
1378
 
        page_zip->n_blobs = n_blobs;
1379
 
        /* Copy those header fields that will not be written
1380
 
        in buf_flush_init_for_writing() */
1381
 
        memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
1382
 
               FIL_PAGE_LSN - FIL_PAGE_PREV);
1383
 
        memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
1384
 
        memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
1385
 
               PAGE_DATA - FIL_PAGE_DATA);
1386
 
        /* Copy the rest of the compressed page */
1387
 
        memcpy(page_zip->data + PAGE_DATA, buf,
1388
 
               page_zip_get_size(page_zip) - PAGE_DATA);
1389
 
        mem_heap_free(heap);
1390
 
#ifdef UNIV_ZIP_DEBUG
1391
 
        ut_a(page_zip_validate(page_zip, page));
1392
 
#endif /* UNIV_ZIP_DEBUG */
1393
 
 
1394
 
        if (mtr) {
1395
 
#ifndef UNIV_HOTBACKUP
1396
 
                page_zip_compress_write_log(page_zip, page, index, mtr);
1397
 
#endif /* !UNIV_HOTBACKUP */
1398
 
        }
1399
 
 
1400
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
1401
 
 
1402
 
#ifdef PAGE_ZIP_COMPRESS_DBG
1403
 
        if (logfile) {
1404
 
                /* Record the compressed size of the block. */
1405
 
                byte sz[4];
1406
 
                mach_write_to_4(sz, c_stream.total_out);
1407
 
                fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
1408
 
                fwrite(sz, 1, sizeof sz, logfile);
1409
 
                fclose(logfile);
1410
 
        }
1411
 
#endif /* PAGE_ZIP_COMPRESS_DBG */
1412
 
#ifndef UNIV_HOTBACKUP
1413
 
        {
1414
 
                page_zip_stat_t*        zip_stat
1415
 
                        = &page_zip_stat[page_zip->ssize - 1];
1416
 
                zip_stat->compressed_ok++;
1417
 
                zip_stat->compressed_usec += ut_time_us(NULL) - usec;
1418
 
        }
1419
 
#endif /* !UNIV_HOTBACKUP */
1420
 
 
1421
 
        return(TRUE);
1422
 
}
1423
 
 
1424
 
/**********************************************************************//**
1425
 
Compare two page directory entries.
1426
 
@return positive if rec1 > rec2 */
1427
 
UNIV_INLINE
1428
 
ibool
1429
 
page_zip_dir_cmp(
1430
 
/*=============*/
1431
 
        const rec_t*    rec1,   /*!< in: rec1 */
1432
 
        const rec_t*    rec2)   /*!< in: rec2 */
1433
 
{
1434
 
        return(rec1 > rec2);
1435
 
}
1436
 
 
1437
 
/**********************************************************************//**
1438
 
Sort the dense page directory by address (heap_no). */
1439
 
static
1440
 
void
1441
 
page_zip_dir_sort(
1442
 
/*==============*/
1443
 
        rec_t** arr,    /*!< in/out: dense page directory */
1444
 
        rec_t** aux_arr,/*!< in/out: work area */
1445
 
        ulint   low,    /*!< in: lower bound of the sorting area, inclusive */
1446
 
        ulint   high)   /*!< in: upper bound of the sorting area, exclusive */
1447
 
{
1448
 
        UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
1449
 
                              page_zip_dir_cmp);
1450
 
}
1451
 
 
1452
 
/**********************************************************************//**
1453
 
Deallocate the index information initialized by page_zip_fields_decode(). */
1454
 
static
1455
 
void
1456
 
page_zip_fields_free(
1457
 
/*=================*/
1458
 
        dict_index_t*   index)  /*!< in: dummy index to be freed */
1459
 
{
1460
 
        if (index) {
1461
 
                dict_table_t*   table = index->table;
1462
 
                mem_heap_free(index->heap);
1463
 
                mutex_free(&(table->autoinc_mutex));
1464
 
                ut_free(table->name);
1465
 
                mem_heap_free(table->heap);
1466
 
        }
1467
 
}
1468
 
 
1469
 
/**********************************************************************//**
1470
 
Read the index information for the compressed page.
1471
 
@return own: dummy index describing the page, or NULL on error */
1472
 
static
1473
 
dict_index_t*
1474
 
page_zip_fields_decode(
1475
 
/*===================*/
1476
 
        const byte*     buf,    /*!< in: index information */
1477
 
        const byte*     end,    /*!< in: end of buf */
1478
 
        ulint*          trx_id_col)/*!< in: NULL for non-leaf pages;
1479
 
                                for leaf pages, pointer to where to store
1480
 
                                the position of the trx_id column */
1481
 
{
1482
 
        const byte*     b;
1483
 
        ulint           n;
1484
 
        ulint           i;
1485
 
        ulint           val;
1486
 
        dict_table_t*   table;
1487
 
        dict_index_t*   index;
1488
 
 
1489
 
        /* Determine the number of fields. */
1490
 
        for (b = buf, n = 0; b < end; n++) {
1491
 
                if (*b++ & 0x80) {
1492
 
                        b++; /* skip the second byte */
1493
 
                }
1494
 
        }
1495
 
 
1496
 
        n--; /* n_nullable or trx_id */
1497
 
 
1498
 
        if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
1499
 
 
1500
 
                page_zip_fail(("page_zip_fields_decode: n = %lu\n",
1501
 
                               (ulong) n));
1502
 
                return(NULL);
1503
 
        }
1504
 
 
1505
 
        if (UNIV_UNLIKELY(b > end)) {
1506
 
 
1507
 
                page_zip_fail(("page_zip_fields_decode: %p > %p\n",
1508
 
                               (const void*) b, (const void*) end));
1509
 
                return(NULL);
1510
 
        }
1511
 
 
1512
 
        table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
1513
 
                                      DICT_TF_COMPACT);
1514
 
        index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
1515
 
                                      DICT_HDR_SPACE, 0, n);
1516
 
        index->table = table;
1517
 
        index->n_uniq = n;
1518
 
        /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1519
 
        index->cached = TRUE;
1520
 
 
1521
 
        /* Initialize the fields. */
1522
 
        for (b = buf, i = 0; i < n; i++) {
1523
 
                ulint   mtype;
1524
 
                ulint   len;
1525
 
 
1526
 
                val = *b++;
1527
 
 
1528
 
                if (UNIV_UNLIKELY(val & 0x80)) {
1529
 
                        /* fixed length > 62 bytes */
1530
 
                        val = (val & 0x7f) << 8 | *b++;
1531
 
                        len = val >> 1;
1532
 
                        mtype = DATA_FIXBINARY;
1533
 
                } else if (UNIV_UNLIKELY(val >= 126)) {
1534
 
                        /* variable length with max > 255 bytes */
1535
 
                        len = 0x7fff;
1536
 
                        mtype = DATA_BINARY;
1537
 
                } else if (val <= 1) {
1538
 
                        /* variable length with max <= 255 bytes */
1539
 
                        len = 0;
1540
 
                        mtype = DATA_BINARY;
1541
 
                } else {
1542
 
                        /* fixed length < 62 bytes */
1543
 
                        len = val >> 1;
1544
 
                        mtype = DATA_FIXBINARY;
1545
 
                }
1546
 
 
1547
 
                dict_mem_table_add_col(table, NULL, NULL, mtype,
1548
 
                                       val & 1 ? DATA_NOT_NULL : 0, len);
1549
 
                dict_index_add_col(index, table,
1550
 
                                   dict_table_get_nth_col(table, i), 0);
1551
 
        }
1552
 
 
1553
 
        val = *b++;
1554
 
        if (UNIV_UNLIKELY(val & 0x80)) {
1555
 
                val = (val & 0x7f) << 8 | *b++;
1556
 
        }
1557
 
 
1558
 
        /* Decode the position of the trx_id column. */
1559
 
        if (trx_id_col) {
1560
 
                if (!val) {
1561
 
                        val = ULINT_UNDEFINED;
1562
 
                } else if (UNIV_UNLIKELY(val >= n)) {
1563
 
                        page_zip_fields_free(index);
1564
 
                        index = NULL;
1565
 
                } else {
1566
 
                        index->type = DICT_CLUSTERED;
1567
 
                }
1568
 
 
1569
 
                *trx_id_col = val;
1570
 
        } else {
1571
 
                /* Decode the number of nullable fields. */
1572
 
                if (UNIV_UNLIKELY(index->n_nullable > val)) {
1573
 
                        page_zip_fields_free(index);
1574
 
                        index = NULL;
1575
 
                } else {
1576
 
                        index->n_nullable = val;
1577
 
                }
1578
 
        }
1579
 
 
1580
 
        ut_ad(b == end);
1581
 
 
1582
 
        return(index);
1583
 
}
1584
 
 
1585
 
/**********************************************************************//**
1586
 
Populate the sparse page directory from the dense directory.
1587
 
@return TRUE on success, FALSE on failure */
1588
 
static
1589
 
ibool
1590
 
page_zip_dir_decode(
1591
 
/*================*/
1592
 
        const page_zip_des_t*   page_zip,/*!< in: dense page directory on
1593
 
                                        compressed page */
1594
 
        page_t*                 page,   /*!< in: compact page with valid header;
1595
 
                                        out: trailer and sparse page directory
1596
 
                                        filled in */
1597
 
        rec_t**                 recs,   /*!< out: dense page directory sorted by
1598
 
                                        ascending address (and heap_no) */
1599
 
        rec_t**                 recs_aux,/*!< in/out: scratch area */
1600
 
        ulint                   n_dense)/*!< in: number of user records, and
1601
 
                                        size of recs[] and recs_aux[] */
1602
 
{
1603
 
        ulint   i;
1604
 
        ulint   n_recs;
1605
 
        byte*   slot;
1606
 
 
1607
 
        n_recs = page_get_n_recs(page);
1608
 
 
1609
 
        if (UNIV_UNLIKELY(n_recs > n_dense)) {
1610
 
                page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
1611
 
                               (ulong) n_recs, (ulong) n_dense));
1612
 
                return(FALSE);
1613
 
        }
1614
 
 
1615
 
        /* Traverse the list of stored records in the sorting order,
1616
 
        starting from the first user record. */
1617
 
 
1618
 
        slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
1619
 
        UNIV_PREFETCH_RW(slot);
1620
 
 
1621
 
        /* Zero out the page trailer. */
1622
 
        memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
1623
 
 
1624
 
        mach_write_to_2(slot, PAGE_NEW_INFIMUM);
1625
 
        slot -= PAGE_DIR_SLOT_SIZE;
1626
 
        UNIV_PREFETCH_RW(slot);
1627
 
 
1628
 
        /* Initialize the sparse directory and copy the dense directory. */
1629
 
        for (i = 0; i < n_recs; i++) {
1630
 
                ulint   offs = page_zip_dir_get(page_zip, i);
1631
 
 
1632
 
                if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
1633
 
                        mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
1634
 
                        slot -= PAGE_DIR_SLOT_SIZE;
1635
 
                        UNIV_PREFETCH_RW(slot);
1636
 
                }
1637
 
 
1638
 
                if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
1639
 
                                  < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
1640
 
                        page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
1641
 
                                       (unsigned) i, (unsigned) n_recs,
1642
 
                                       (ulong) offs));
1643
 
                        return(FALSE);
1644
 
                }
1645
 
 
1646
 
                recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
1647
 
        }
1648
 
 
1649
 
        mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
1650
 
        {
1651
 
                const page_dir_slot_t*  last_slot = page_dir_get_nth_slot(
1652
 
                        page, page_dir_get_n_slots(page) - 1);
1653
 
 
1654
 
                if (UNIV_UNLIKELY(slot != last_slot)) {
1655
 
                        page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
1656
 
                                       (const void*) slot,
1657
 
                                       (const void*) last_slot));
1658
 
                        return(FALSE);
1659
 
                }
1660
 
        }
1661
 
 
1662
 
        /* Copy the rest of the dense directory. */
1663
 
        for (; i < n_dense; i++) {
1664
 
                ulint   offs = page_zip_dir_get(page_zip, i);
1665
 
 
1666
 
                if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1667
 
                        page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
1668
 
                                       (unsigned) i, (unsigned) n_dense,
1669
 
                                       (ulong) offs));
1670
 
                        return(FALSE);
1671
 
                }
1672
 
 
1673
 
                recs[i] = page + offs;
1674
 
        }
1675
 
 
1676
 
        if (UNIV_LIKELY(n_dense > 1)) {
1677
 
                page_zip_dir_sort(recs, recs_aux, 0, n_dense);
1678
 
        }
1679
 
        return(TRUE);
1680
 
}
1681
 
 
1682
 
/**********************************************************************//**
1683
 
Initialize the REC_N_NEW_EXTRA_BYTES of each record.
1684
 
@return TRUE on success, FALSE on failure */
1685
 
static
1686
 
ibool
1687
 
page_zip_set_extra_bytes(
1688
 
/*=====================*/
1689
 
        const page_zip_des_t*   page_zip,/*!< in: compressed page */
1690
 
        page_t*                 page,   /*!< in/out: uncompressed page */
1691
 
        ulint                   info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */
1692
 
{
1693
 
        ulint   n;
1694
 
        ulint   i;
1695
 
        ulint   n_owned = 1;
1696
 
        ulint   offs;
1697
 
        rec_t*  rec;
1698
 
 
1699
 
        n = page_get_n_recs(page);
1700
 
        rec = page + PAGE_NEW_INFIMUM;
1701
 
 
1702
 
        for (i = 0; i < n; i++) {
1703
 
                offs = page_zip_dir_get(page_zip, i);
1704
 
 
1705
 
                if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
1706
 
                        info_bits |= REC_INFO_DELETED_FLAG;
1707
 
                }
1708
 
                if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
1709
 
                        info_bits |= n_owned;
1710
 
                        n_owned = 1;
1711
 
                } else {
1712
 
                        n_owned++;
1713
 
                }
1714
 
                offs &= PAGE_ZIP_DIR_SLOT_MASK;
1715
 
                if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
1716
 
                                  + REC_N_NEW_EXTRA_BYTES)) {
1717
 
                        page_zip_fail(("page_zip_set_extra_bytes 1:"
1718
 
                                       " %u %u %lx\n",
1719
 
                                       (unsigned) i, (unsigned) n,
1720
 
                                       (ulong) offs));
1721
 
                        return(FALSE);
1722
 
                }
1723
 
 
1724
 
                rec_set_next_offs_new(rec, offs);
1725
 
                rec = page + offs;
1726
 
                rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
1727
 
                info_bits = 0;
1728
 
        }
1729
 
 
1730
 
        /* Set the next pointer of the last user record. */
1731
 
        rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
1732
 
 
1733
 
        /* Set n_owned of the supremum record. */
1734
 
        page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
1735
 
 
1736
 
        /* The dense directory excludes the infimum and supremum records. */
1737
 
        n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1738
 
 
1739
 
        if (i >= n) {
1740
 
                if (UNIV_LIKELY(i == n)) {
1741
 
                        return(TRUE);
1742
 
                }
1743
 
 
1744
 
                page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
1745
 
                               (unsigned) i, (unsigned) n));
1746
 
                return(FALSE);
1747
 
        }
1748
 
 
1749
 
        offs = page_zip_dir_get(page_zip, i);
1750
 
 
1751
 
        /* Set the extra bytes of deleted records on the free list. */
1752
 
        for (;;) {
1753
 
                if (UNIV_UNLIKELY(!offs)
1754
 
                    || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1755
 
 
1756
 
                        page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
1757
 
                                       (ulong) offs));
1758
 
                        return(FALSE);
1759
 
                }
1760
 
 
1761
 
                rec = page + offs;
1762
 
                rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1763
 
 
1764
 
                if (++i == n) {
1765
 
                        break;
1766
 
                }
1767
 
 
1768
 
                offs = page_zip_dir_get(page_zip, i);
1769
 
                rec_set_next_offs_new(rec, offs);
1770
 
        }
1771
 
 
1772
 
        /* Terminate the free list. */
1773
 
        rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1774
 
        rec_set_next_offs_new(rec, 0);
1775
 
 
1776
 
        return(TRUE);
1777
 
}
1778
 
 
1779
 
/**********************************************************************//**
1780
 
Apply the modification log to a record containing externally stored
1781
 
columns.  Do not copy the fields that are stored separately.
1782
 
@return pointer to modification log, or NULL on failure */
1783
 
static
1784
 
const byte*
1785
 
page_zip_apply_log_ext(
1786
 
/*===================*/
1787
 
        rec_t*          rec,            /*!< in/out: record */
1788
 
        const ulint*    offsets,        /*!< in: rec_get_offsets(rec) */
1789
 
        ulint           trx_id_col,     /*!< in: position of of DB_TRX_ID */
1790
 
        const byte*     data,           /*!< in: modification log */
1791
 
        const byte*     end)            /*!< in: end of modification log */
1792
 
{
1793
 
        ulint   i;
1794
 
        ulint   len;
1795
 
        byte*   next_out = rec;
1796
 
 
1797
 
        /* Check if there are any externally stored columns.
1798
 
        For each externally stored column, skip the
1799
 
        BTR_EXTERN_FIELD_REF. */
1800
 
 
1801
 
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
1802
 
                byte*   dst;
1803
 
 
1804
 
                if (UNIV_UNLIKELY(i == trx_id_col)) {
1805
 
                        /* Skip trx_id and roll_ptr */
1806
 
                        dst = rec_get_nth_field(rec, offsets,
1807
 
                                                i, &len);
1808
 
                        if (UNIV_UNLIKELY(dst - next_out >= end - data)
1809
 
                            || UNIV_UNLIKELY
1810
 
                            (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
1811
 
                            || rec_offs_nth_extern(offsets, i)) {
1812
 
                                page_zip_fail(("page_zip_apply_log_ext:"
1813
 
                                               " trx_id len %lu,"
1814
 
                                               " %p - %p >= %p - %p\n",
1815
 
                                               (ulong) len,
1816
 
                                               (const void*) dst,
1817
 
                                               (const void*) next_out,
1818
 
                                               (const void*) end,
1819
 
                                               (const void*) data));
1820
 
                                return(NULL);
1821
 
                        }
1822
 
 
1823
 
                        memcpy(next_out, data, dst - next_out);
1824
 
                        data += dst - next_out;
1825
 
                        next_out = dst + (DATA_TRX_ID_LEN
1826
 
                                          + DATA_ROLL_PTR_LEN);
1827
 
                } else if (rec_offs_nth_extern(offsets, i)) {
1828
 
                        dst = rec_get_nth_field(rec, offsets,
1829
 
                                                i, &len);
1830
 
                        ut_ad(len
1831
 
                              >= BTR_EXTERN_FIELD_REF_SIZE);
1832
 
 
1833
 
                        len += dst - next_out
1834
 
                                - BTR_EXTERN_FIELD_REF_SIZE;
1835
 
 
1836
 
                        if (UNIV_UNLIKELY(data + len >= end)) {
1837
 
                                page_zip_fail(("page_zip_apply_log_ext: "
1838
 
                                               "ext %p+%lu >= %p\n",
1839
 
                                               (const void*) data,
1840
 
                                               (ulong) len,
1841
 
                                               (const void*) end));
1842
 
                                return(NULL);
1843
 
                        }
1844
 
 
1845
 
                        memcpy(next_out, data, len);
1846
 
                        data += len;
1847
 
                        next_out += len
1848
 
                                + BTR_EXTERN_FIELD_REF_SIZE;
1849
 
                }
1850
 
        }
1851
 
 
1852
 
        /* Copy the last bytes of the record. */
1853
 
        len = rec_get_end(rec, offsets) - next_out;
1854
 
        if (UNIV_UNLIKELY(data + len >= end)) {
1855
 
                page_zip_fail(("page_zip_apply_log_ext: "
1856
 
                               "last %p+%lu >= %p\n",
1857
 
                               (const void*) data,
1858
 
                               (ulong) len,
1859
 
                               (const void*) end));
1860
 
                return(NULL);
1861
 
        }
1862
 
        memcpy(next_out, data, len);
1863
 
        data += len;
1864
 
 
1865
 
        return(data);
1866
 
}
1867
 
 
1868
 
/**********************************************************************//**
1869
 
Apply the modification log to an uncompressed page.
1870
 
Do not copy the fields that are stored separately.
1871
 
@return pointer to end of modification log, or NULL on failure */
1872
 
static
1873
 
const byte*
1874
 
page_zip_apply_log(
1875
 
/*===============*/
1876
 
        const byte*     data,   /*!< in: modification log */
1877
 
        ulint           size,   /*!< in: maximum length of the log, in bytes */
1878
 
        rec_t**         recs,   /*!< in: dense page directory,
1879
 
                                sorted by address (indexed by
1880
 
                                heap_no - PAGE_HEAP_NO_USER_LOW) */
1881
 
        ulint           n_dense,/*!< in: size of recs[] */
1882
 
        ulint           trx_id_col,/*!< in: column number of trx_id in the index,
1883
 
                                or ULINT_UNDEFINED if none */
1884
 
        ulint           heap_status,
1885
 
                                /*!< in: heap_no and status bits for
1886
 
                                the next record to uncompress */
1887
 
        dict_index_t*   index,  /*!< in: index of the page */
1888
 
        ulint*          offsets)/*!< in/out: work area for
1889
 
                                rec_get_offsets_reverse() */
1890
 
{
1891
 
        const byte* const end = data + size;
1892
 
 
1893
 
        for (;;) {
1894
 
                ulint   val;
1895
 
                rec_t*  rec;
1896
 
                ulint   len;
1897
 
                ulint   hs;
1898
 
 
1899
 
                val = *data++;
1900
 
                if (UNIV_UNLIKELY(!val)) {
1901
 
                        return(data - 1);
1902
 
                }
1903
 
                if (val & 0x80) {
1904
 
                        val = (val & 0x7f) << 8 | *data++;
1905
 
                        if (UNIV_UNLIKELY(!val)) {
1906
 
                                page_zip_fail(("page_zip_apply_log:"
1907
 
                                               " invalid val %x%x\n",
1908
 
                                               data[-2], data[-1]));
1909
 
                                return(NULL);
1910
 
                        }
1911
 
                }
1912
 
                if (UNIV_UNLIKELY(data >= end)) {
1913
 
                        page_zip_fail(("page_zip_apply_log: %p >= %p\n",
1914
 
                                       (const void*) data,
1915
 
                                       (const void*) end));
1916
 
                        return(NULL);
1917
 
                }
1918
 
                if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
1919
 
                        page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
1920
 
                                       (ulong) val, (ulong) n_dense));
1921
 
                        return(NULL);
1922
 
                }
1923
 
 
1924
 
                /* Determine the heap number and status bits of the record. */
1925
 
                rec = recs[(val >> 1) - 1];
1926
 
 
1927
 
                hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
1928
 
                hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
1929
 
 
1930
 
                /* This may either be an old record that is being
1931
 
                overwritten (updated in place, or allocated from
1932
 
                the free list), or a new record, with the next
1933
 
                available_heap_no. */
1934
 
                if (UNIV_UNLIKELY(hs > heap_status)) {
1935
 
                        page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
1936
 
                                       (ulong) hs, (ulong) heap_status));
1937
 
                        return(NULL);
1938
 
                } else if (hs == heap_status) {
1939
 
                        /* A new record was allocated from the heap. */
1940
 
                        if (UNIV_UNLIKELY(val & 1)) {
1941
 
                                /* Only existing records may be cleared. */
1942
 
                                page_zip_fail(("page_zip_apply_log:"
1943
 
                                               " attempting to create"
1944
 
                                               " deleted rec %lu\n",
1945
 
                                               (ulong) hs));
1946
 
                                return(NULL);
1947
 
                        }
1948
 
                        heap_status += 1 << REC_HEAP_NO_SHIFT;
1949
 
                }
1950
 
 
1951
 
                mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
1952
 
 
1953
 
                if (val & 1) {
1954
 
                        /* Clear the data bytes of the record. */
1955
 
                        mem_heap_t*     heap    = NULL;
1956
 
                        ulint*          offs;
1957
 
                        offs = rec_get_offsets(rec, index, offsets,
1958
 
                                               ULINT_UNDEFINED, &heap);
1959
 
                        memset(rec, 0, rec_offs_data_size(offs));
1960
 
 
1961
 
                        if (UNIV_LIKELY_NULL(heap)) {
1962
 
                                mem_heap_free(heap);
1963
 
                        }
1964
 
                        continue;
1965
 
                }
1966
 
 
1967
 
#if REC_STATUS_NODE_PTR != TRUE
1968
 
# error "REC_STATUS_NODE_PTR != TRUE"
1969
 
#endif
1970
 
                rec_get_offsets_reverse(data, index,
1971
 
                                        hs & REC_STATUS_NODE_PTR,
1972
 
                                        offsets);
1973
 
                rec_offs_make_valid(rec, index, offsets);
1974
 
 
1975
 
                /* Copy the extra bytes (backwards). */
1976
 
                {
1977
 
                        byte*   start   = rec_get_start(rec, offsets);
1978
 
                        byte*   b       = rec - REC_N_NEW_EXTRA_BYTES;
1979
 
                        while (b != start) {
1980
 
                                *--b = *data++;
1981
 
                        }
1982
 
                }
1983
 
 
1984
 
                /* Copy the data bytes. */
1985
 
                if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1986
 
                        /* Non-leaf nodes should not contain any
1987
 
                        externally stored columns. */
1988
 
                        if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1989
 
                                page_zip_fail(("page_zip_apply_log: "
1990
 
                                               "%lu&REC_STATUS_NODE_PTR\n",
1991
 
                                               (ulong) hs));
1992
 
                                return(NULL);
1993
 
                        }
1994
 
 
1995
 
                        data = page_zip_apply_log_ext(
1996
 
                                rec, offsets, trx_id_col, data, end);
1997
 
 
1998
 
                        if (UNIV_UNLIKELY(!data)) {
1999
 
                                return(NULL);
2000
 
                        }
2001
 
                } else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
2002
 
                        len = rec_offs_data_size(offsets)
2003
 
                                - REC_NODE_PTR_SIZE;
2004
 
                        /* Copy the data bytes, except node_ptr. */
2005
 
                        if (UNIV_UNLIKELY(data + len >= end)) {
2006
 
                                page_zip_fail(("page_zip_apply_log: "
2007
 
                                               "node_ptr %p+%lu >= %p\n",
2008
 
                                               (const void*) data,
2009
 
                                               (ulong) len,
2010
 
                                               (const void*) end));
2011
 
                                return(NULL);
2012
 
                        }
2013
 
                        memcpy(rec, data, len);
2014
 
                        data += len;
2015
 
                } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2016
 
                        len = rec_offs_data_size(offsets);
2017
 
 
2018
 
                        /* Copy all data bytes of
2019
 
                        a record in a secondary index. */
2020
 
                        if (UNIV_UNLIKELY(data + len >= end)) {
2021
 
                                page_zip_fail(("page_zip_apply_log: "
2022
 
                                               "sec %p+%lu >= %p\n",
2023
 
                                               (const void*) data,
2024
 
                                               (ulong) len,
2025
 
                                               (const void*) end));
2026
 
                                return(NULL);
2027
 
                        }
2028
 
 
2029
 
                        memcpy(rec, data, len);
2030
 
                        data += len;
2031
 
                } else {
2032
 
                        /* Skip DB_TRX_ID and DB_ROLL_PTR. */
2033
 
                        ulint   l = rec_get_nth_field_offs(offsets,
2034
 
                                                           trx_id_col, &len);
2035
 
                        byte*   b;
2036
 
 
2037
 
                        if (UNIV_UNLIKELY(data + l >= end)
2038
 
                            || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
2039
 
                                                    + DATA_ROLL_PTR_LEN))) {
2040
 
                                page_zip_fail(("page_zip_apply_log: "
2041
 
                                               "trx_id %p+%lu >= %p\n",
2042
 
                                               (const void*) data,
2043
 
                                               (ulong) l,
2044
 
                                               (const void*) end));
2045
 
                                return(NULL);
2046
 
                        }
2047
 
 
2048
 
                        /* Copy any preceding data bytes. */
2049
 
                        memcpy(rec, data, l);
2050
 
                        data += l;
2051
 
 
2052
 
                        /* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
2053
 
                        b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2054
 
                        len = rec_get_end(rec, offsets) - b;
2055
 
                        if (UNIV_UNLIKELY(data + len >= end)) {
2056
 
                                page_zip_fail(("page_zip_apply_log: "
2057
 
                                               "clust %p+%lu >= %p\n",
2058
 
                                               (const void*) data,
2059
 
                                               (ulong) len,
2060
 
                                               (const void*) end));
2061
 
                                return(NULL);
2062
 
                        }
2063
 
                        memcpy(b, data, len);
2064
 
                        data += len;
2065
 
                }
2066
 
        }
2067
 
}
2068
 
 
2069
 
/**********************************************************************//**
2070
 
Decompress the records of a node pointer page.
2071
 
@return TRUE on success, FALSE on failure */
2072
 
static
2073
 
ibool
2074
 
page_zip_decompress_node_ptrs(
2075
 
/*==========================*/
2076
 
        page_zip_des_t* page_zip,       /*!< in/out: compressed page */
2077
 
        z_stream*       d_stream,       /*!< in/out: compressed page stream */
2078
 
        rec_t**         recs,           /*!< in: dense page directory
2079
 
                                        sorted by address */
2080
 
        ulint           n_dense,        /*!< in: size of recs[] */
2081
 
        dict_index_t*   index,          /*!< in: the index of the page */
2082
 
        ulint*          offsets,        /*!< in/out: temporary offsets */
2083
 
        mem_heap_t*     heap)           /*!< in: temporary memory heap */
2084
 
{
2085
 
        ulint           heap_status = REC_STATUS_NODE_PTR
2086
 
                | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2087
 
        ulint           slot;
2088
 
        const byte*     storage;
2089
 
 
2090
 
        /* Subtract the space reserved for uncompressed data. */
2091
 
        d_stream->avail_in -= n_dense
2092
 
                * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
2093
 
 
2094
 
        /* Decompress the records in heap_no order. */
2095
 
        for (slot = 0; slot < n_dense; slot++) {
2096
 
                rec_t*  rec = recs[slot];
2097
 
 
2098
 
                d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2099
 
                        - d_stream->next_out;
2100
 
 
2101
 
                ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2102
 
                      - PAGE_ZIP_START - PAGE_DIR);
2103
 
                switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2104
 
                case Z_STREAM_END:
2105
 
                        /* Apparently, n_dense has grown
2106
 
                        since the time the page was last compressed. */
2107
 
                        goto zlib_done;
2108
 
                case Z_OK:
2109
 
                case Z_BUF_ERROR:
2110
 
                        if (!d_stream->avail_out) {
2111
 
                                break;
2112
 
                        }
2113
 
                        /* fall through */
2114
 
                default:
2115
 
                        page_zip_fail(("page_zip_decompress_node_ptrs:"
2116
 
                                       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2117
 
                                       d_stream->msg));
2118
 
                        goto zlib_error;
2119
 
                }
2120
 
 
2121
 
                ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2122
 
                /* Prepare to decompress the data bytes. */
2123
 
                d_stream->next_out = rec;
2124
 
                /* Set heap_no and the status bits. */
2125
 
                mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2126
 
                heap_status += 1 << REC_HEAP_NO_SHIFT;
2127
 
 
2128
 
                /* Read the offsets. The status bits are needed here. */
2129
 
                offsets = rec_get_offsets(rec, index, offsets,
2130
 
                                          ULINT_UNDEFINED, &heap);
2131
 
 
2132
 
                /* Non-leaf nodes should not have any externally
2133
 
                stored columns. */
2134
 
                ut_ad(!rec_offs_any_extern(offsets));
2135
 
 
2136
 
                /* Decompress the data bytes, except node_ptr. */
2137
 
                d_stream->avail_out = rec_offs_data_size(offsets)
2138
 
                        - REC_NODE_PTR_SIZE;
2139
 
 
2140
 
                switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2141
 
                case Z_STREAM_END:
2142
 
                        goto zlib_done;
2143
 
                case Z_OK:
2144
 
                case Z_BUF_ERROR:
2145
 
                        if (!d_stream->avail_out) {
2146
 
                                break;
2147
 
                        }
2148
 
                        /* fall through */
2149
 
                default:
2150
 
                        page_zip_fail(("page_zip_decompress_node_ptrs:"
2151
 
                                       " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2152
 
                                       d_stream->msg));
2153
 
                        goto zlib_error;
2154
 
                }
2155
 
 
2156
 
                /* Clear the node pointer in case the record
2157
 
                will be deleted and the space will be reallocated
2158
 
                to a smaller record. */
2159
 
                memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
2160
 
                d_stream->next_out += REC_NODE_PTR_SIZE;
2161
 
 
2162
 
                ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
2163
 
        }
2164
 
 
2165
 
        /* Decompress any trailing garbage, in case the last record was
2166
 
        allocated from an originally longer space on the free list. */
2167
 
        d_stream->avail_out = page_header_get_field(page_zip->data,
2168
 
                                                    PAGE_HEAP_TOP)
2169
 
                - page_offset(d_stream->next_out);
2170
 
        if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2171
 
                          - PAGE_ZIP_START - PAGE_DIR)) {
2172
 
 
2173
 
                page_zip_fail(("page_zip_decompress_node_ptrs:"
2174
 
                               " avail_out = %u\n",
2175
 
                               d_stream->avail_out));
2176
 
                goto zlib_error;
2177
 
        }
2178
 
 
2179
 
        if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2180
 
                page_zip_fail(("page_zip_decompress_node_ptrs:"
2181
 
                               " inflate(Z_FINISH)=%s\n",
2182
 
                               d_stream->msg));
2183
 
zlib_error:
2184
 
                inflateEnd(d_stream);
2185
 
                return(FALSE);
2186
 
        }
2187
 
 
2188
 
        /* Note that d_stream->avail_out > 0 may hold here
2189
 
        if the modification log is nonempty. */
2190
 
 
2191
 
zlib_done:
2192
 
        if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2193
 
                ut_error;
2194
 
        }
2195
 
 
2196
 
        {
2197
 
                page_t* page = page_align(d_stream->next_out);
2198
 
 
2199
 
                /* Clear the unused heap space on the uncompressed page. */
2200
 
                memset(d_stream->next_out, 0,
2201
 
                       page_dir_get_nth_slot(page,
2202
 
                                             page_dir_get_n_slots(page) - 1)
2203
 
                       - d_stream->next_out);
2204
 
        }
2205
 
 
2206
 
#ifdef UNIV_DEBUG
2207
 
        page_zip->m_start = PAGE_DATA + d_stream->total_in;
2208
 
#endif /* UNIV_DEBUG */
2209
 
 
2210
 
        /* Apply the modification log. */
2211
 
        {
2212
 
                const byte*     mod_log_ptr;
2213
 
                mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2214
 
                                                 d_stream->avail_in + 1,
2215
 
                                                 recs, n_dense,
2216
 
                                                 ULINT_UNDEFINED, heap_status,
2217
 
                                                 index, offsets);
2218
 
 
2219
 
                if (UNIV_UNLIKELY(!mod_log_ptr)) {
2220
 
                        return(FALSE);
2221
 
                }
2222
 
                page_zip->m_end = mod_log_ptr - page_zip->data;
2223
 
                page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2224
 
        }
2225
 
 
2226
 
        if (UNIV_UNLIKELY
2227
 
            (page_zip_get_trailer_len(page_zip,
2228
 
                                      dict_index_is_clust(index), NULL)
2229
 
             + page_zip->m_end >= page_zip_get_size(page_zip))) {
2230
 
                page_zip_fail(("page_zip_decompress_node_ptrs:"
2231
 
                               " %lu + %lu >= %lu, %lu\n",
2232
 
                               (ulong) page_zip_get_trailer_len(
2233
 
                                       page_zip, dict_index_is_clust(index),
2234
 
                                       NULL),
2235
 
                               (ulong) page_zip->m_end,
2236
 
                               (ulong) page_zip_get_size(page_zip),
2237
 
                               (ulong) dict_index_is_clust(index)));
2238
 
                return(FALSE);
2239
 
        }
2240
 
 
2241
 
        /* Restore the uncompressed columns in heap_no order. */
2242
 
        storage = page_zip->data + page_zip_get_size(page_zip)
2243
 
                - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2244
 
 
2245
 
        for (slot = 0; slot < n_dense; slot++) {
2246
 
                rec_t*          rec     = recs[slot];
2247
 
 
2248
 
                offsets = rec_get_offsets(rec, index, offsets,
2249
 
                                          ULINT_UNDEFINED, &heap);
2250
 
                /* Non-leaf nodes should not have any externally
2251
 
                stored columns. */
2252
 
                ut_ad(!rec_offs_any_extern(offsets));
2253
 
                storage -= REC_NODE_PTR_SIZE;
2254
 
 
2255
 
                memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
2256
 
                       storage, REC_NODE_PTR_SIZE);
2257
 
        }
2258
 
 
2259
 
        return(TRUE);
2260
 
}
2261
 
 
2262
 
/**********************************************************************//**
2263
 
Decompress the records of a leaf node of a secondary index.
2264
 
@return TRUE on success, FALSE on failure */
2265
 
static
2266
 
ibool
2267
 
page_zip_decompress_sec(
2268
 
/*====================*/
2269
 
        page_zip_des_t* page_zip,       /*!< in/out: compressed page */
2270
 
        z_stream*       d_stream,       /*!< in/out: compressed page stream */
2271
 
        rec_t**         recs,           /*!< in: dense page directory
2272
 
                                        sorted by address */
2273
 
        ulint           n_dense,        /*!< in: size of recs[] */
2274
 
        dict_index_t*   index,          /*!< in: the index of the page */
2275
 
        ulint*          offsets)        /*!< in/out: temporary offsets */
2276
 
{
2277
 
        ulint   heap_status     = REC_STATUS_ORDINARY
2278
 
                | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2279
 
        ulint   slot;
2280
 
 
2281
 
        ut_a(!dict_index_is_clust(index));
2282
 
 
2283
 
        /* Subtract the space reserved for uncompressed data. */
2284
 
        d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2285
 
 
2286
 
        for (slot = 0; slot < n_dense; slot++) {
2287
 
                rec_t*  rec = recs[slot];
2288
 
 
2289
 
                /* Decompress everything up to this record. */
2290
 
                d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2291
 
                        - d_stream->next_out;
2292
 
 
2293
 
                if (UNIV_LIKELY(d_stream->avail_out)) {
2294
 
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2295
 
                        case Z_STREAM_END:
2296
 
                                /* Apparently, n_dense has grown
2297
 
                                since the time the page was last compressed. */
2298
 
                                goto zlib_done;
2299
 
                        case Z_OK:
2300
 
                        case Z_BUF_ERROR:
2301
 
                                if (!d_stream->avail_out) {
2302
 
                                        break;
2303
 
                                }
2304
 
                                /* fall through */
2305
 
                        default:
2306
 
                                page_zip_fail(("page_zip_decompress_sec:"
2307
 
                                               " inflate(Z_SYNC_FLUSH)=%s\n",
2308
 
                                               d_stream->msg));
2309
 
                                goto zlib_error;
2310
 
                        }
2311
 
                }
2312
 
 
2313
 
                ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2314
 
 
2315
 
                /* Skip the REC_N_NEW_EXTRA_BYTES. */
2316
 
 
2317
 
                d_stream->next_out = rec;
2318
 
 
2319
 
                /* Set heap_no and the status bits. */
2320
 
                mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2321
 
                heap_status += 1 << REC_HEAP_NO_SHIFT;
2322
 
        }
2323
 
 
2324
 
        /* Decompress the data of the last record and any trailing garbage,
2325
 
        in case the last record was allocated from an originally longer space
2326
 
        on the free list. */
2327
 
        d_stream->avail_out = page_header_get_field(page_zip->data,
2328
 
                                                    PAGE_HEAP_TOP)
2329
 
                - page_offset(d_stream->next_out);
2330
 
        if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2331
 
                          - PAGE_ZIP_START - PAGE_DIR)) {
2332
 
 
2333
 
                page_zip_fail(("page_zip_decompress_sec:"
2334
 
                               " avail_out = %u\n",
2335
 
                               d_stream->avail_out));
2336
 
                goto zlib_error;
2337
 
        }
2338
 
 
2339
 
        if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2340
 
                page_zip_fail(("page_zip_decompress_sec:"
2341
 
                               " inflate(Z_FINISH)=%s\n",
2342
 
                               d_stream->msg));
2343
 
zlib_error:
2344
 
                inflateEnd(d_stream);
2345
 
                return(FALSE);
2346
 
        }
2347
 
 
2348
 
        /* Note that d_stream->avail_out > 0 may hold here
2349
 
        if the modification log is nonempty. */
2350
 
 
2351
 
zlib_done:
2352
 
        if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2353
 
                ut_error;
2354
 
        }
2355
 
 
2356
 
        {
2357
 
                page_t* page = page_align(d_stream->next_out);
2358
 
 
2359
 
                /* Clear the unused heap space on the uncompressed page. */
2360
 
                memset(d_stream->next_out, 0,
2361
 
                       page_dir_get_nth_slot(page,
2362
 
                                             page_dir_get_n_slots(page) - 1)
2363
 
                       - d_stream->next_out);
2364
 
        }
2365
 
 
2366
 
#ifdef UNIV_DEBUG
2367
 
        page_zip->m_start = PAGE_DATA + d_stream->total_in;
2368
 
#endif /* UNIV_DEBUG */
2369
 
 
2370
 
        /* Apply the modification log. */
2371
 
        {
2372
 
                const byte*     mod_log_ptr;
2373
 
                mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2374
 
                                                 d_stream->avail_in + 1,
2375
 
                                                 recs, n_dense,
2376
 
                                                 ULINT_UNDEFINED, heap_status,
2377
 
                                                 index, offsets);
2378
 
 
2379
 
                if (UNIV_UNLIKELY(!mod_log_ptr)) {
2380
 
                        return(FALSE);
2381
 
                }
2382
 
                page_zip->m_end = mod_log_ptr - page_zip->data;
2383
 
                page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2384
 
        }
2385
 
 
2386
 
        if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
2387
 
                          + page_zip->m_end >= page_zip_get_size(page_zip))) {
2388
 
 
2389
 
                page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
2390
 
                               (ulong) page_zip_get_trailer_len(
2391
 
                                       page_zip, FALSE, NULL),
2392
 
                               (ulong) page_zip->m_end,
2393
 
                               (ulong) page_zip_get_size(page_zip)));
2394
 
                return(FALSE);
2395
 
        }
2396
 
 
2397
 
        /* There are no uncompressed columns on leaf pages of
2398
 
        secondary indexes. */
2399
 
 
2400
 
        return(TRUE);
2401
 
}
2402
 
 
2403
 
/**********************************************************************//**
2404
 
Decompress a record of a leaf node of a clustered index that contains
2405
 
externally stored columns.
2406
 
@return TRUE on success */
2407
 
static
2408
 
ibool
2409
 
page_zip_decompress_clust_ext(
2410
 
/*==========================*/
2411
 
        z_stream*       d_stream,       /*!< in/out: compressed page stream */
2412
 
        rec_t*          rec,            /*!< in/out: record */
2413
 
        const ulint*    offsets,        /*!< in: rec_get_offsets(rec) */
2414
 
        ulint           trx_id_col)     /*!< in: position of of DB_TRX_ID */
2415
 
{
2416
 
        ulint   i;
2417
 
 
2418
 
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2419
 
                ulint   len;
2420
 
                byte*   dst;
2421
 
 
2422
 
                if (UNIV_UNLIKELY(i == trx_id_col)) {
2423
 
                        /* Skip trx_id and roll_ptr */
2424
 
                        dst = rec_get_nth_field(rec, offsets, i, &len);
2425
 
                        if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2426
 
                                          + DATA_ROLL_PTR_LEN)) {
2427
 
 
2428
 
                                page_zip_fail(("page_zip_decompress_clust_ext:"
2429
 
                                               " len[%lu] = %lu\n",
2430
 
                                               (ulong) i, (ulong) len));
2431
 
                                return(FALSE);
2432
 
                        }
2433
 
 
2434
 
                        if (rec_offs_nth_extern(offsets, i)) {
2435
 
 
2436
 
                                page_zip_fail(("page_zip_decompress_clust_ext:"
2437
 
                                               " DB_TRX_ID at %lu is ext\n",
2438
 
                                               (ulong) i));
2439
 
                                return(FALSE);
2440
 
                        }
2441
 
 
2442
 
                        d_stream->avail_out = dst - d_stream->next_out;
2443
 
 
2444
 
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2445
 
                        case Z_STREAM_END:
2446
 
                        case Z_OK:
2447
 
                        case Z_BUF_ERROR:
2448
 
                                if (!d_stream->avail_out) {
2449
 
                                        break;
2450
 
                                }
2451
 
                                /* fall through */
2452
 
                        default:
2453
 
                                page_zip_fail(("page_zip_decompress_clust_ext:"
2454
 
                                               " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2455
 
                                               d_stream->msg));
2456
 
                                return(FALSE);
2457
 
                        }
2458
 
 
2459
 
                        ut_ad(d_stream->next_out == dst);
2460
 
 
2461
 
                        /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2462
 
                        avoid uninitialized bytes in case the record
2463
 
                        is affected by page_zip_apply_log(). */
2464
 
                        memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2465
 
 
2466
 
                        d_stream->next_out += DATA_TRX_ID_LEN
2467
 
                                + DATA_ROLL_PTR_LEN;
2468
 
                } else if (rec_offs_nth_extern(offsets, i)) {
2469
 
                        dst = rec_get_nth_field(rec, offsets, i, &len);
2470
 
                        ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
2471
 
                        dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2472
 
 
2473
 
                        d_stream->avail_out = dst - d_stream->next_out;
2474
 
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2475
 
                        case Z_STREAM_END:
2476
 
                        case Z_OK:
2477
 
                        case Z_BUF_ERROR:
2478
 
                                if (!d_stream->avail_out) {
2479
 
                                        break;
2480
 
                                }
2481
 
                                /* fall through */
2482
 
                        default:
2483
 
                                page_zip_fail(("page_zip_decompress_clust_ext:"
2484
 
                                               " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2485
 
                                               d_stream->msg));
2486
 
                                return(FALSE);
2487
 
                        }
2488
 
 
2489
 
                        ut_ad(d_stream->next_out == dst);
2490
 
 
2491
 
                        /* Clear the BLOB pointer in case
2492
 
                        the record will be deleted and the
2493
 
                        space will not be reused.  Note that
2494
 
                        the final initialization of the BLOB
2495
 
                        pointers (copying from "externs"
2496
 
                        or clearing) will have to take place
2497
 
                        only after the page modification log
2498
 
                        has been applied.  Otherwise, we
2499
 
                        could end up with an uninitialized
2500
 
                        BLOB pointer when a record is deleted,
2501
 
                        reallocated and deleted. */
2502
 
                        memset(d_stream->next_out, 0,
2503
 
                               BTR_EXTERN_FIELD_REF_SIZE);
2504
 
                        d_stream->next_out
2505
 
                                += BTR_EXTERN_FIELD_REF_SIZE;
2506
 
                }
2507
 
        }
2508
 
 
2509
 
        return(TRUE);
2510
 
}
2511
 
 
2512
 
/**********************************************************************//**
2513
 
Compress the records of a leaf node of a clustered index.
2514
 
@return TRUE on success, FALSE on failure */
2515
 
static
2516
 
ibool
2517
 
page_zip_decompress_clust(
2518
 
/*======================*/
2519
 
        page_zip_des_t* page_zip,       /*!< in/out: compressed page */
2520
 
        z_stream*       d_stream,       /*!< in/out: compressed page stream */
2521
 
        rec_t**         recs,           /*!< in: dense page directory
2522
 
                                        sorted by address */
2523
 
        ulint           n_dense,        /*!< in: size of recs[] */
2524
 
        dict_index_t*   index,          /*!< in: the index of the page */
2525
 
        ulint           trx_id_col,     /*!< index of the trx_id column */
2526
 
        ulint*          offsets,        /*!< in/out: temporary offsets */
2527
 
        mem_heap_t*     heap)           /*!< in: temporary memory heap */
2528
 
{
2529
 
        int             err;
2530
 
        ulint           slot;
2531
 
        ulint           heap_status     = REC_STATUS_ORDINARY
2532
 
                | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2533
 
        const byte*     storage;
2534
 
        const byte*     externs;
2535
 
 
2536
 
        ut_a(dict_index_is_clust(index));
2537
 
 
2538
 
        /* Subtract the space reserved for uncompressed data. */
2539
 
        d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
2540
 
                                         + DATA_TRX_ID_LEN
2541
 
                                         + DATA_ROLL_PTR_LEN);
2542
 
 
2543
 
        /* Decompress the records in heap_no order. */
2544
 
        for (slot = 0; slot < n_dense; slot++) {
2545
 
                rec_t*  rec     = recs[slot];
2546
 
 
2547
 
                d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2548
 
                        - d_stream->next_out;
2549
 
 
2550
 
                ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2551
 
                      - PAGE_ZIP_START - PAGE_DIR);
2552
 
                err = inflate(d_stream, Z_SYNC_FLUSH);
2553
 
                switch (err) {
2554
 
                case Z_STREAM_END:
2555
 
                        /* Apparently, n_dense has grown
2556
 
                        since the time the page was last compressed. */
2557
 
                        goto zlib_done;
2558
 
                case Z_OK:
2559
 
                case Z_BUF_ERROR:
2560
 
                        if (UNIV_LIKELY(!d_stream->avail_out)) {
2561
 
                                break;
2562
 
                        }
2563
 
                        /* fall through */
2564
 
                default:
2565
 
                        page_zip_fail(("page_zip_decompress_clust:"
2566
 
                                       " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2567
 
                                       d_stream->msg));
2568
 
                        goto zlib_error;
2569
 
                }
2570
 
 
2571
 
                ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2572
 
                /* Prepare to decompress the data bytes. */
2573
 
                d_stream->next_out = rec;
2574
 
                /* Set heap_no and the status bits. */
2575
 
                mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2576
 
                heap_status += 1 << REC_HEAP_NO_SHIFT;
2577
 
 
2578
 
                /* Read the offsets. The status bits are needed here. */
2579
 
                offsets = rec_get_offsets(rec, index, offsets,
2580
 
                                          ULINT_UNDEFINED, &heap);
2581
 
 
2582
 
                /* This is a leaf page in a clustered index. */
2583
 
 
2584
 
                /* Check if there are any externally stored columns.
2585
 
                For each externally stored column, restore the
2586
 
                BTR_EXTERN_FIELD_REF separately. */
2587
 
 
2588
 
                if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
2589
 
                        if (UNIV_UNLIKELY
2590
 
                            (!page_zip_decompress_clust_ext(
2591
 
                                    d_stream, rec, offsets, trx_id_col))) {
2592
 
 
2593
 
                                goto zlib_error;
2594
 
                        }
2595
 
                } else {
2596
 
                        /* Skip trx_id and roll_ptr */
2597
 
                        ulint   len;
2598
 
                        byte*   dst = rec_get_nth_field(rec, offsets,
2599
 
                                                        trx_id_col, &len);
2600
 
                        if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2601
 
                                          + DATA_ROLL_PTR_LEN)) {
2602
 
 
2603
 
                                page_zip_fail(("page_zip_decompress_clust:"
2604
 
                                               " len = %lu\n", (ulong) len));
2605
 
                                goto zlib_error;
2606
 
                        }
2607
 
 
2608
 
                        d_stream->avail_out = dst - d_stream->next_out;
2609
 
 
2610
 
                        switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2611
 
                        case Z_STREAM_END:
2612
 
                        case Z_OK:
2613
 
                        case Z_BUF_ERROR:
2614
 
                                if (!d_stream->avail_out) {
2615
 
                                        break;
2616
 
                                }
2617
 
                                /* fall through */
2618
 
                        default:
2619
 
                                page_zip_fail(("page_zip_decompress_clust:"
2620
 
                                               " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2621
 
                                               d_stream->msg));
2622
 
                                goto zlib_error;
2623
 
                        }
2624
 
 
2625
 
                        ut_ad(d_stream->next_out == dst);
2626
 
 
2627
 
                        /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2628
 
                        avoid uninitialized bytes in case the record
2629
 
                        is affected by page_zip_apply_log(). */
2630
 
                        memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2631
 
 
2632
 
                        d_stream->next_out += DATA_TRX_ID_LEN
2633
 
                                + DATA_ROLL_PTR_LEN;
2634
 
                }
2635
 
 
2636
 
                /* Decompress the last bytes of the record. */
2637
 
                d_stream->avail_out = rec_get_end(rec, offsets)
2638
 
                        - d_stream->next_out;
2639
 
 
2640
 
                switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2641
 
                case Z_STREAM_END:
2642
 
                case Z_OK:
2643
 
                case Z_BUF_ERROR:
2644
 
                        if (!d_stream->avail_out) {
2645
 
                                break;
2646
 
                        }
2647
 
                        /* fall through */
2648
 
                default:
2649
 
                        page_zip_fail(("page_zip_decompress_clust:"
2650
 
                                       " 3 inflate(Z_SYNC_FLUSH)=%s\n",
2651
 
                                       d_stream->msg));
2652
 
                        goto zlib_error;
2653
 
                }
2654
 
        }
2655
 
 
2656
 
        /* Decompress any trailing garbage, in case the last record was
2657
 
        allocated from an originally longer space on the free list. */
2658
 
        d_stream->avail_out = page_header_get_field(page_zip->data,
2659
 
                                                    PAGE_HEAP_TOP)
2660
 
                - page_offset(d_stream->next_out);
2661
 
        if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2662
 
                          - PAGE_ZIP_START - PAGE_DIR)) {
2663
 
 
2664
 
                page_zip_fail(("page_zip_decompress_clust:"
2665
 
                               " avail_out = %u\n",
2666
 
                               d_stream->avail_out));
2667
 
                goto zlib_error;
2668
 
        }
2669
 
 
2670
 
        if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2671
 
                page_zip_fail(("page_zip_decompress_clust:"
2672
 
                               " inflate(Z_FINISH)=%s\n",
2673
 
                               d_stream->msg));
2674
 
zlib_error:
2675
 
                inflateEnd(d_stream);
2676
 
                return(FALSE);
2677
 
        }
2678
 
 
2679
 
        /* Note that d_stream->avail_out > 0 may hold here
2680
 
        if the modification log is nonempty. */
2681
 
 
2682
 
zlib_done:
2683
 
        if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2684
 
                ut_error;
2685
 
        }
2686
 
 
2687
 
        {
2688
 
                page_t* page = page_align(d_stream->next_out);
2689
 
 
2690
 
                /* Clear the unused heap space on the uncompressed page. */
2691
 
                memset(d_stream->next_out, 0,
2692
 
                       page_dir_get_nth_slot(page,
2693
 
                                             page_dir_get_n_slots(page) - 1)
2694
 
                       - d_stream->next_out);
2695
 
        }
2696
 
 
2697
 
#ifdef UNIV_DEBUG
2698
 
        page_zip->m_start = PAGE_DATA + d_stream->total_in;
2699
 
#endif /* UNIV_DEBUG */
2700
 
 
2701
 
        /* Apply the modification log. */
2702
 
        {
2703
 
                const byte*     mod_log_ptr;
2704
 
                mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2705
 
                                                 d_stream->avail_in + 1,
2706
 
                                                 recs, n_dense,
2707
 
                                                 trx_id_col, heap_status,
2708
 
                                                 index, offsets);
2709
 
 
2710
 
                if (UNIV_UNLIKELY(!mod_log_ptr)) {
2711
 
                        return(FALSE);
2712
 
                }
2713
 
                page_zip->m_end = mod_log_ptr - page_zip->data;
2714
 
                page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2715
 
        }
2716
 
 
2717
 
        if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
2718
 
                          + page_zip->m_end >= page_zip_get_size(page_zip))) {
2719
 
 
2720
 
                page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
2721
 
                               (ulong) page_zip_get_trailer_len(
2722
 
                                       page_zip, TRUE, NULL),
2723
 
                               (ulong) page_zip->m_end,
2724
 
                               (ulong) page_zip_get_size(page_zip)));
2725
 
                return(FALSE);
2726
 
        }
2727
 
 
2728
 
        storage = page_zip->data + page_zip_get_size(page_zip)
2729
 
                - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2730
 
 
2731
 
        externs = storage - n_dense
2732
 
                * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2733
 
 
2734
 
        /* Restore the uncompressed columns in heap_no order. */
2735
 
 
2736
 
        for (slot = 0; slot < n_dense; slot++) {
2737
 
                ulint   i;
2738
 
                ulint   len;
2739
 
                byte*   dst;
2740
 
                rec_t*  rec     = recs[slot];
2741
 
                ibool   exists  = !page_zip_dir_find_free(
2742
 
                        page_zip, page_offset(rec));
2743
 
                offsets = rec_get_offsets(rec, index, offsets,
2744
 
                                          ULINT_UNDEFINED, &heap);
2745
 
 
2746
 
                dst = rec_get_nth_field(rec, offsets,
2747
 
                                        trx_id_col, &len);
2748
 
                ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2749
 
                storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
2750
 
                memcpy(dst, storage,
2751
 
                       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2752
 
 
2753
 
                /* Check if there are any externally stored
2754
 
                columns in this record.  For each externally
2755
 
                stored column, restore or clear the
2756
 
                BTR_EXTERN_FIELD_REF. */
2757
 
                if (!rec_offs_any_extern(offsets)) {
2758
 
                        continue;
2759
 
                }
2760
 
 
2761
 
                for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2762
 
                        if (!rec_offs_nth_extern(offsets, i)) {
2763
 
                                continue;
2764
 
                        }
2765
 
                        dst = rec_get_nth_field(rec, offsets, i, &len);
2766
 
 
2767
 
                        if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
2768
 
                                page_zip_fail(("page_zip_decompress_clust:"
2769
 
                                               " %lu < 20\n",
2770
 
                                               (ulong) len));
2771
 
                                return(FALSE);
2772
 
                        }
2773
 
 
2774
 
                        dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2775
 
 
2776
 
                        if (UNIV_LIKELY(exists)) {
2777
 
                                /* Existing record:
2778
 
                                restore the BLOB pointer */
2779
 
                                externs -= BTR_EXTERN_FIELD_REF_SIZE;
2780
 
 
2781
 
                                if (UNIV_UNLIKELY
2782
 
                                    (externs < page_zip->data
2783
 
                                     + page_zip->m_end)) {
2784
 
                                        page_zip_fail(("page_zip_"
2785
 
                                                       "decompress_clust: "
2786
 
                                                       "%p < %p + %lu\n",
2787
 
                                                       (const void*) externs,
2788
 
                                                       (const void*)
2789
 
                                                       page_zip->data,
2790
 
                                                       (ulong)
2791
 
                                                       page_zip->m_end));
2792
 
                                        return(FALSE);
2793
 
                                }
2794
 
 
2795
 
                                memcpy(dst, externs,
2796
 
                                       BTR_EXTERN_FIELD_REF_SIZE);
2797
 
 
2798
 
                                page_zip->n_blobs++;
2799
 
                        } else {
2800
 
                                /* Deleted record:
2801
 
                                clear the BLOB pointer */
2802
 
                                memset(dst, 0,
2803
 
                                       BTR_EXTERN_FIELD_REF_SIZE);
2804
 
                        }
2805
 
                }
2806
 
        }
2807
 
 
2808
 
        return(TRUE);
2809
 
}
2810
 
 
2811
 
/**********************************************************************//**
2812
 
Decompress a page.  This function should tolerate errors on the compressed
2813
 
page.  Instead of letting assertions fail, it will return FALSE if an
2814
 
inconsistency is detected.
2815
 
@return TRUE on success, FALSE on failure */
2816
 
UNIV_INTERN
2817
 
ibool
2818
 
page_zip_decompress(
2819
 
/*================*/
2820
 
        page_zip_des_t* page_zip,/*!< in: data, ssize;
2821
 
                                out: m_start, m_end, m_nonempty, n_blobs */
2822
 
        page_t*         page,   /*!< out: uncompressed page, may be trashed */
2823
 
        ibool           all)    /*!< in: TRUE=decompress the whole page;
2824
 
                                FALSE=verify but do not copy some
2825
 
                                page header fields that should not change
2826
 
                                after page creation */
2827
 
{
2828
 
        z_stream        d_stream;
2829
 
        dict_index_t*   index   = NULL;
2830
 
        rec_t**         recs;   /*!< dense page directory, sorted by address */
2831
 
        ulint           n_dense;/* number of user records on the page */
2832
 
        ulint           trx_id_col = ULINT_UNDEFINED;
2833
 
        mem_heap_t*     heap;
2834
 
        ulint*          offsets;
2835
 
#ifndef UNIV_HOTBACKUP
2836
 
        ullint          usec = ut_time_us(NULL);
2837
 
#endif /* !UNIV_HOTBACKUP */
2838
 
 
2839
 
        ut_ad(page_zip_simple_validate(page_zip));
2840
 
        UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
2841
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
2842
 
 
2843
 
        /* The dense directory excludes the infimum and supremum records. */
2844
 
        n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
2845
 
        if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
2846
 
                          >= page_zip_get_size(page_zip))) {
2847
 
                page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
2848
 
                               (ulong) n_dense,
2849
 
                               (ulong) page_zip_get_size(page_zip)));
2850
 
                return(FALSE);
2851
 
        }
2852
 
 
2853
 
        heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
2854
 
        recs = static_cast<byte **>(mem_heap_alloc(heap, n_dense * (2 * sizeof *recs)));
2855
 
 
2856
 
        if (all) {
2857
 
                /* Copy the page header. */
2858
 
                memcpy(page, page_zip->data, PAGE_DATA);
2859
 
        } else {
2860
 
                /* Check that the bytes that we skip are identical. */
2861
 
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
2862
 
                ut_a(!memcmp(FIL_PAGE_TYPE + page,
2863
 
                             FIL_PAGE_TYPE + page_zip->data,
2864
 
                             PAGE_HEADER - FIL_PAGE_TYPE));
2865
 
                ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page,
2866
 
                             PAGE_HEADER + PAGE_LEVEL + page_zip->data,
2867
 
                             PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL)));
2868
 
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
2869
 
 
2870
 
                /* Copy the mutable parts of the page header. */
2871
 
                memcpy(page, page_zip->data, FIL_PAGE_TYPE);
2872
 
                memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data,
2873
 
                       PAGE_LEVEL - PAGE_N_DIR_SLOTS);
2874
 
 
2875
 
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
2876
 
                /* Check that the page headers match after copying. */
2877
 
                ut_a(!memcmp(page, page_zip->data, PAGE_DATA));
2878
 
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
2879
 
        }
2880
 
 
2881
 
#ifdef UNIV_ZIP_DEBUG
2882
 
        /* Clear the uncompressed page, except the header. */
2883
 
        memset(PAGE_DATA + page, 0x55, UNIV_PAGE_SIZE - PAGE_DATA);
2884
 
#endif /* UNIV_ZIP_DEBUG */
2885
 
        UNIV_MEM_INVALID(PAGE_DATA + page, UNIV_PAGE_SIZE - PAGE_DATA);
2886
 
 
2887
 
        /* Copy the page directory. */
2888
 
        if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
2889
 
                                               recs + n_dense, n_dense))) {
2890
 
zlib_error:
2891
 
                mem_heap_free(heap);
2892
 
                return(FALSE);
2893
 
        }
2894
 
 
2895
 
        /* Copy the infimum and supremum records. */
2896
 
        memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
2897
 
               infimum_extra, sizeof infimum_extra);
2898
 
        if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
2899
 
                rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2900
 
                                      PAGE_NEW_SUPREMUM);
2901
 
        } else {
2902
 
                rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2903
 
                                      page_zip_dir_get(page_zip, 0)
2904
 
                                      & PAGE_ZIP_DIR_SLOT_MASK);
2905
 
        }
2906
 
        memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
2907
 
        memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
2908
 
               supremum_extra_data, sizeof supremum_extra_data);
2909
 
 
2910
 
        page_zip_set_alloc(&d_stream, heap);
2911
 
 
2912
 
        if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
2913
 
                          != Z_OK)) {
2914
 
                ut_error;
2915
 
        }
2916
 
 
2917
 
        d_stream.next_in = page_zip->data + PAGE_DATA;
2918
 
        /* Subtract the space reserved for
2919
 
        the page header and the end marker of the modification log. */
2920
 
        d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
2921
 
 
2922
 
        d_stream.next_out = page + PAGE_ZIP_START;
2923
 
        d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
2924
 
 
2925
 
        /* Decode the zlib header and the index information. */
2926
 
        if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2927
 
 
2928
 
                page_zip_fail(("page_zip_decompress:"
2929
 
                               " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2930
 
                goto zlib_error;
2931
 
        }
2932
 
 
2933
 
        if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2934
 
 
2935
 
                page_zip_fail(("page_zip_decompress:"
2936
 
                               " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2937
 
                goto zlib_error;
2938
 
        }
2939
 
 
2940
 
        index = page_zip_fields_decode(
2941
 
                page + PAGE_ZIP_START, d_stream.next_out,
2942
 
                page_is_leaf(page) ? &trx_id_col : NULL);
2943
 
 
2944
 
        if (UNIV_UNLIKELY(!index)) {
2945
 
 
2946
 
                goto zlib_error;
2947
 
        }
2948
 
 
2949
 
        /* Decompress the user records. */
2950
 
        page_zip->n_blobs = 0;
2951
 
        d_stream.next_out = page + PAGE_ZIP_START;
2952
 
 
2953
 
        {
2954
 
                /* Pre-allocate the offsets for rec_get_offsets_reverse(). */
2955
 
                ulint   n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
2956
 
                        + dict_index_get_n_fields(index);
2957
 
                offsets = static_cast<unsigned long *>(mem_heap_alloc(heap, n * sizeof(ulint)));
2958
 
                *offsets = n;
2959
 
        }
2960
 
 
2961
 
        /* Decompress the records in heap_no order. */
2962
 
        if (!page_is_leaf(page)) {
2963
 
                /* This is a node pointer page. */
2964
 
                ulint   info_bits;
2965
 
 
2966
 
                if (UNIV_UNLIKELY
2967
 
                    (!page_zip_decompress_node_ptrs(page_zip, &d_stream,
2968
 
                                                    recs, n_dense, index,
2969
 
                                                    offsets, heap))) {
2970
 
                        goto err_exit;
2971
 
                }
2972
 
 
2973
 
                info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
2974
 
                        ? REC_INFO_MIN_REC_FLAG : 0;
2975
 
 
2976
 
                if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
2977
 
                                                            info_bits))) {
2978
 
                        goto err_exit;
2979
 
                }
2980
 
        } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2981
 
                /* This is a leaf page in a secondary index. */
2982
 
                if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
2983
 
                                                           recs, n_dense,
2984
 
                                                           index, offsets))) {
2985
 
                        goto err_exit;
2986
 
                }
2987
 
 
2988
 
                if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2989
 
                                                            page, 0))) {
2990
 
err_exit:
2991
 
                        page_zip_fields_free(index);
2992
 
                        mem_heap_free(heap);
2993
 
                        return(FALSE);
2994
 
                }
2995
 
        } else {
2996
 
                /* This is a leaf page in a clustered index. */
2997
 
                if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
2998
 
                                                             &d_stream, recs,
2999
 
                                                             n_dense, index,
3000
 
                                                             trx_id_col,
3001
 
                                                             offsets, heap))) {
3002
 
                        goto err_exit;
3003
 
                }
3004
 
 
3005
 
                if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
3006
 
                                                            page, 0))) {
3007
 
                        goto err_exit;
3008
 
                }
3009
 
        }
3010
 
 
3011
 
        ut_a(page_is_comp(page));
3012
 
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3013
 
 
3014
 
        page_zip_fields_free(index);
3015
 
        mem_heap_free(heap);
3016
 
#ifndef UNIV_HOTBACKUP
3017
 
        {
3018
 
                page_zip_stat_t*        zip_stat
3019
 
                        = &page_zip_stat[page_zip->ssize - 1];
3020
 
                zip_stat->decompressed++;
3021
 
                zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
3022
 
        }
3023
 
#endif /* !UNIV_HOTBACKUP */
3024
 
 
3025
 
        /* Update the stat counter for LRU policy. */
3026
 
        buf_LRU_stat_inc_unzip();
3027
 
 
3028
 
        return(TRUE);
3029
 
}
3030
 
 
3031
 
#ifdef UNIV_ZIP_DEBUG
3032
 
/**********************************************************************//**
3033
 
Dump a block of memory on the standard error stream. */
3034
 
static
3035
 
void
3036
 
page_zip_hexdump_func(
3037
 
/*==================*/
3038
 
        const char*     name,   /*!< in: name of the data structure */
3039
 
        const void*     buf,    /*!< in: data */
3040
 
        ulint           size)   /*!< in: length of the data, in bytes */
3041
 
{
3042
 
        const byte*     s       = buf;
3043
 
        ulint           addr;
3044
 
        const ulint     width   = 32; /* bytes per line */
3045
 
 
3046
 
        fprintf(stderr, "%s:\n", name);
3047
 
 
3048
 
        for (addr = 0; addr < size; addr += width) {
3049
 
                ulint   i;
3050
 
 
3051
 
                fprintf(stderr, "%04lx ", (ulong) addr);
3052
 
 
3053
 
                i = ut_min(width, size - addr);
3054
 
 
3055
 
                while (i--) {
3056
 
                        fprintf(stderr, "%02x", *s++);
3057
 
                }
3058
 
 
3059
 
                putc('\n', stderr);
3060
 
        }
3061
 
}
3062
 
 
3063
 
/** Dump a block of memory on the standard error stream.
3064
 
@param buf      in: data
3065
 
@param size     in: length of the data, in bytes */
3066
 
#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
3067
 
 
3068
 
/** Flag: make page_zip_validate() compare page headers only */
3069
 
UNIV_INTERN ibool       page_zip_validate_header_only = FALSE;
3070
 
 
3071
 
/**********************************************************************//**
3072
 
Check that the compressed and decompressed pages match.
3073
 
@return TRUE if valid, FALSE if not */
3074
 
UNIV_INTERN
3075
 
ibool
3076
 
page_zip_validate_low(
3077
 
/*==================*/
3078
 
        const page_zip_des_t*   page_zip,/*!< in: compressed page */
3079
 
        const page_t*           page,   /*!< in: uncompressed page */
3080
 
        ibool                   sloppy) /*!< in: FALSE=strict,
3081
 
                                        TRUE=ignore the MIN_REC_FLAG */
3082
 
{
3083
 
        page_zip_des_t  temp_page_zip;
3084
 
        byte*           temp_page_buf;
3085
 
        page_t*         temp_page;
3086
 
        ibool           valid;
3087
 
 
3088
 
        if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3089
 
                   FIL_PAGE_LSN - FIL_PAGE_PREV)
3090
 
            || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
3091
 
            || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3092
 
                      PAGE_DATA - FIL_PAGE_DATA)) {
3093
 
                page_zip_fail(("page_zip_validate: page header\n"));
3094
 
                page_zip_hexdump(page_zip, sizeof *page_zip);
3095
 
                page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3096
 
                page_zip_hexdump(page, UNIV_PAGE_SIZE);
3097
 
                return(FALSE);
3098
 
        }
3099
 
 
3100
 
        ut_a(page_is_comp(page));
3101
 
 
3102
 
        if (page_zip_validate_header_only) {
3103
 
                return(TRUE);
3104
 
        }
3105
 
 
3106
 
        /* page_zip_decompress() expects the uncompressed page to be
3107
 
        UNIV_PAGE_SIZE aligned. */
3108
 
        temp_page_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
3109
 
        temp_page = ut_align(temp_page_buf, UNIV_PAGE_SIZE);
3110
 
 
3111
 
#ifdef UNIV_DEBUG_VALGRIND
3112
 
        /* Get detailed information on the valid bits in case the
3113
 
        UNIV_MEM_ASSERT_RW() checks fail.  The v-bits of page[],
3114
 
        page_zip->data[] or page_zip could be viewed at temp_page[] or
3115
 
        temp_page_zip in a debugger when running valgrind --db-attach. */
3116
 
        VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
3117
 
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3118
 
# if UNIV_WORD_SIZE == 4
3119
 
        VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
3120
 
        /* On 32-bit systems, there is no padding in page_zip_des_t.
3121
 
        On other systems, Valgrind could complain about uninitialized
3122
 
        pad bytes. */
3123
 
        UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
3124
 
# endif
3125
 
        VALGRIND_GET_VBITS(page_zip->data, temp_page,
3126
 
                           page_zip_get_size(page_zip));
3127
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3128
 
#endif /* UNIV_DEBUG_VALGRIND */
3129
 
 
3130
 
        temp_page_zip = *page_zip;
3131
 
        valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE);
3132
 
        if (!valid) {
3133
 
                fputs("page_zip_validate(): failed to decompress\n", stderr);
3134
 
                goto func_exit;
3135
 
        }
3136
 
        if (page_zip->n_blobs != temp_page_zip.n_blobs) {
3137
 
                page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
3138
 
                               page_zip->n_blobs, temp_page_zip.n_blobs));
3139
 
                valid = FALSE;
3140
 
        }
3141
 
#ifdef UNIV_DEBUG
3142
 
        if (page_zip->m_start != temp_page_zip.m_start) {
3143
 
                page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
3144
 
                               page_zip->m_start, temp_page_zip.m_start));
3145
 
                valid = FALSE;
3146
 
        }
3147
 
#endif /* UNIV_DEBUG */
3148
 
        if (page_zip->m_end != temp_page_zip.m_end) {
3149
 
                page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
3150
 
                               page_zip->m_end, temp_page_zip.m_end));
3151
 
                valid = FALSE;
3152
 
        }
3153
 
        if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
3154
 
                page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
3155
 
                               page_zip->m_nonempty,
3156
 
                               temp_page_zip.m_nonempty));
3157
 
                valid = FALSE;
3158
 
        }
3159
 
        if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
3160
 
                   UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
3161
 
 
3162
 
                /* In crash recovery, the "minimum record" flag may be
3163
 
                set incorrectly until the mini-transaction is
3164
 
                committed.  Let us tolerate that difference when we
3165
 
                are performing a sloppy validation. */
3166
 
 
3167
 
                if (sloppy) {
3168
 
                        byte    info_bits_diff;
3169
 
                        ulint   offset
3170
 
                                = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
3171
 
                                                    TRUE);
3172
 
                        ut_a(offset >= PAGE_NEW_SUPREMUM);
3173
 
                        offset -= 5 /* REC_NEW_INFO_BITS */;
3174
 
 
3175
 
                        info_bits_diff = page[offset] ^ temp_page[offset];
3176
 
 
3177
 
                        if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
3178
 
                                temp_page[offset] = page[offset];
3179
 
 
3180
 
                                if (!memcmp(page + PAGE_HEADER,
3181
 
                                            temp_page + PAGE_HEADER,
3182
 
                                            UNIV_PAGE_SIZE - PAGE_HEADER
3183
 
                                            - FIL_PAGE_DATA_END)) {
3184
 
 
3185
 
                                        /* Only the minimum record flag
3186
 
                                        differed.  Let us ignore it. */
3187
 
                                        page_zip_fail(("page_zip_validate: "
3188
 
                                                       "min_rec_flag "
3189
 
                                                       "(ignored, "
3190
 
                                                       "%lu,%lu,0x%02lx)\n",
3191
 
                                                       page_get_space_id(page),
3192
 
                                                       page_get_page_no(page),
3193
 
                                                       (ulong) page[offset]));
3194
 
                                        goto func_exit;
3195
 
                                }
3196
 
                        }
3197
 
                }
3198
 
                page_zip_fail(("page_zip_validate: content\n"));
3199
 
                valid = FALSE;
3200
 
        }
3201
 
 
3202
 
func_exit:
3203
 
        if (!valid) {
3204
 
                page_zip_hexdump(page_zip, sizeof *page_zip);
3205
 
                page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3206
 
                page_zip_hexdump(page, UNIV_PAGE_SIZE);
3207
 
                page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
3208
 
        }
3209
 
        ut_free(temp_page_buf);
3210
 
        return(valid);
3211
 
}
3212
 
 
3213
 
/**********************************************************************//**
3214
 
Check that the compressed and decompressed pages match.
3215
 
@return TRUE if valid, FALSE if not */
3216
 
UNIV_INTERN
3217
 
ibool
3218
 
page_zip_validate(
3219
 
/*==============*/
3220
 
        const page_zip_des_t*   page_zip,/*!< in: compressed page */
3221
 
        const page_t*           page)   /*!< in: uncompressed page */
3222
 
{
3223
 
        return(page_zip_validate_low(page_zip, page,
3224
 
                                     recv_recovery_is_on()));
3225
 
}
3226
 
#endif /* UNIV_ZIP_DEBUG */
3227
 
 
3228
 
#ifdef UNIV_DEBUG
3229
 
/**********************************************************************//**
3230
 
Assert that the compressed and decompressed page headers match.
3231
 
@return TRUE */
3232
 
static
3233
 
ibool
3234
 
page_zip_header_cmp(
3235
 
/*================*/
3236
 
        const page_zip_des_t*   page_zip,/*!< in: compressed page */
3237
 
        const byte*             page)   /*!< in: uncompressed page */
3238
 
{
3239
 
        ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3240
 
                      FIL_PAGE_LSN - FIL_PAGE_PREV));
3241
 
        ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
3242
 
                      2));
3243
 
        ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3244
 
                      PAGE_DATA - FIL_PAGE_DATA));
3245
 
 
3246
 
        return(TRUE);
3247
 
}
3248
 
#endif /* UNIV_DEBUG */
3249
 
 
3250
 
/**********************************************************************//**
3251
 
Write a record on the compressed page that contains externally stored
3252
 
columns.  The data must already have been written to the uncompressed page.
3253
 
@return end of modification log */
3254
 
static
3255
 
byte*
3256
 
page_zip_write_rec_ext(
3257
 
/*===================*/
3258
 
        page_zip_des_t* page_zip,       /*!< in/out: compressed page */
3259
 
        const page_t*   page,           /*!< in: page containing rec */
3260
 
        const byte*     rec,            /*!< in: record being written */
3261
 
        dict_index_t*   index,          /*!< in: record descriptor */
3262
 
        const ulint*    offsets,        /*!< in: rec_get_offsets(rec, index) */
3263
 
        ulint           create,         /*!< in: nonzero=insert, zero=update */
3264
 
        ulint           trx_id_col,     /*!< in: position of DB_TRX_ID */
3265
 
        ulint           heap_no,        /*!< in: heap number of rec */
3266
 
        byte*           storage,        /*!< in: end of dense page directory */
3267
 
        byte*           data)           /*!< in: end of modification log */
3268
 
{
3269
 
        const byte*     start   = rec;
3270
 
        ulint           i;
3271
 
        ulint           len;
3272
 
        byte*           externs = storage;
3273
 
        ulint           n_ext   = rec_offs_n_extern(offsets);
3274
 
 
3275
 
        ut_ad(rec_offs_validate(rec, index, offsets));
3276
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3277
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3278
 
                           rec_offs_extra_size(offsets));
3279
 
 
3280
 
        externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3281
 
                * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
3282
 
 
3283
 
        /* Note that this will not take into account
3284
 
        the BLOB columns of rec if create==TRUE. */
3285
 
        ut_ad(data + rec_offs_data_size(offsets)
3286
 
              - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3287
 
              - n_ext * BTR_EXTERN_FIELD_REF_SIZE
3288
 
              < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
3289
 
 
3290
 
        {
3291
 
                ulint   blob_no = page_zip_get_n_prev_extern(
3292
 
                        page_zip, rec, index);
3293
 
                byte*   ext_end = externs - page_zip->n_blobs
3294
 
                        * BTR_EXTERN_FIELD_REF_SIZE;
3295
 
                ut_ad(blob_no <= page_zip->n_blobs);
3296
 
                externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
3297
 
 
3298
 
                if (create) {
3299
 
                        page_zip->n_blobs += n_ext;
3300
 
                        ASSERT_ZERO_BLOB(ext_end - n_ext
3301
 
                                         * BTR_EXTERN_FIELD_REF_SIZE);
3302
 
                        memmove(ext_end - n_ext
3303
 
                                * BTR_EXTERN_FIELD_REF_SIZE,
3304
 
                                ext_end,
3305
 
                                externs - ext_end);
3306
 
                }
3307
 
 
3308
 
                ut_a(blob_no + n_ext <= page_zip->n_blobs);
3309
 
        }
3310
 
 
3311
 
        for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3312
 
                const byte*     src;
3313
 
 
3314
 
                if (UNIV_UNLIKELY(i == trx_id_col)) {
3315
 
                        ut_ad(!rec_offs_nth_extern(offsets,
3316
 
                                                   i));
3317
 
                        ut_ad(!rec_offs_nth_extern(offsets,
3318
 
                                                   i + 1));
3319
 
                        /* Locate trx_id and roll_ptr. */
3320
 
                        src = rec_get_nth_field(rec, offsets,
3321
 
                                                i, &len);
3322
 
                        ut_ad(len == DATA_TRX_ID_LEN);
3323
 
                        ut_ad(src + DATA_TRX_ID_LEN
3324
 
                              == rec_get_nth_field(
3325
 
                                      rec, offsets,
3326
 
                                      i + 1, &len));
3327
 
                        ut_ad(len == DATA_ROLL_PTR_LEN);
3328
 
 
3329
 
                        /* Log the preceding fields. */
3330
 
                        ASSERT_ZERO(data, src - start);
3331
 
                        memcpy(data, start, src - start);
3332
 
                        data += src - start;
3333
 
                        start = src + (DATA_TRX_ID_LEN
3334
 
                                       + DATA_ROLL_PTR_LEN);
3335
 
 
3336
 
                        /* Store trx_id and roll_ptr. */
3337
 
                        memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3338
 
                               * (heap_no - 1),
3339
 
                               src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3340
 
                        i++; /* skip also roll_ptr */
3341
 
                } else if (rec_offs_nth_extern(offsets, i)) {
3342
 
                        src = rec_get_nth_field(rec, offsets,
3343
 
                                                i, &len);
3344
 
 
3345
 
                        ut_ad(dict_index_is_clust(index));
3346
 
                        ut_ad(len
3347
 
                              >= BTR_EXTERN_FIELD_REF_SIZE);
3348
 
                        src += len - BTR_EXTERN_FIELD_REF_SIZE;
3349
 
 
3350
 
                        ASSERT_ZERO(data, src - start);
3351
 
                        memcpy(data, start, src - start);
3352
 
                        data += src - start;
3353
 
                        start = src + BTR_EXTERN_FIELD_REF_SIZE;
3354
 
 
3355
 
                        /* Store the BLOB pointer. */
3356
 
                        externs -= BTR_EXTERN_FIELD_REF_SIZE;
3357
 
                        ut_ad(data < externs);
3358
 
                        memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
3359
 
                }
3360
 
        }
3361
 
 
3362
 
        /* Log the last bytes of the record. */
3363
 
        len = rec_offs_data_size(offsets) - (start - rec);
3364
 
 
3365
 
        ASSERT_ZERO(data, len);
3366
 
        memcpy(data, start, len);
3367
 
        data += len;
3368
 
 
3369
 
        return(data);
3370
 
}
3371
 
 
3372
 
/**********************************************************************//**
3373
 
Write an entire record on the compressed page.  The data must already
3374
 
have been written to the uncompressed page. */
3375
 
UNIV_INTERN
3376
 
void
3377
 
page_zip_write_rec(
3378
 
/*===============*/
3379
 
        page_zip_des_t* page_zip,/*!< in/out: compressed page */
3380
 
        const byte*     rec,    /*!< in: record being written */
3381
 
        dict_index_t*   index,  /*!< in: the index the record belongs to */
3382
 
        const ulint*    offsets,/*!< in: rec_get_offsets(rec, index) */
3383
 
        ulint           create) /*!< in: nonzero=insert, zero=update */
3384
 
{
3385
 
        const page_t*   page;
3386
 
        byte*           data;
3387
 
        byte*           storage;
3388
 
        ulint           heap_no;
3389
 
        byte*           slot;
3390
 
 
3391
 
        ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3392
 
        ut_ad(page_zip_simple_validate(page_zip));
3393
 
        ut_ad(page_zip_get_size(page_zip)
3394
 
              > PAGE_DATA + page_zip_dir_size(page_zip));
3395
 
        ut_ad(rec_offs_comp(offsets));
3396
 
        ut_ad(rec_offs_validate(rec, index, offsets));
3397
 
 
3398
 
        ut_ad(page_zip->m_start >= PAGE_DATA);
3399
 
 
3400
 
        page = page_align(rec);
3401
 
 
3402
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3403
 
        ut_ad(page_simple_validate_new((page_t*) page));
3404
 
 
3405
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3406
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3407
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3408
 
                           rec_offs_extra_size(offsets));
3409
 
 
3410
 
        slot = page_zip_dir_find(page_zip, page_offset(rec));
3411
 
        ut_a(slot);
3412
 
        /* Copy the delete mark. */
3413
 
        if (rec_get_deleted_flag(rec, TRUE)) {
3414
 
                *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
3415
 
        } else {
3416
 
                *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3417
 
        }
3418
 
 
3419
 
        ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
3420
 
        ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
3421
 
              - PAGE_DIR - PAGE_DIR_SLOT_SIZE
3422
 
              * page_dir_get_n_slots(page));
3423
 
 
3424
 
        heap_no = rec_get_heap_no_new(rec);
3425
 
        ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
3426
 
        ut_ad(heap_no < page_dir_get_n_heap(page));
3427
 
 
3428
 
        /* Append to the modification log. */
3429
 
        data = page_zip->data + page_zip->m_end;
3430
 
        ut_ad(!*data);
3431
 
 
3432
 
        /* Identify the record by writing its heap number - 1.
3433
 
        0 is reserved to indicate the end of the modification log. */
3434
 
 
3435
 
        if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3436
 
                *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3437
 
                ut_ad(!*data);
3438
 
        }
3439
 
        *data++ = (byte) ((heap_no - 1) << 1);
3440
 
        ut_ad(!*data);
3441
 
 
3442
 
        {
3443
 
                const byte*     start   = rec - rec_offs_extra_size(offsets);
3444
 
                const byte*     b       = rec - REC_N_NEW_EXTRA_BYTES;
3445
 
 
3446
 
                /* Write the extra bytes backwards, so that
3447
 
                rec_offs_extra_size() can be easily computed in
3448
 
                page_zip_apply_log() by invoking
3449
 
                rec_get_offsets_reverse(). */
3450
 
 
3451
 
                while (b != start) {
3452
 
                        *data++ = *--b;
3453
 
                        ut_ad(!*data);
3454
 
                }
3455
 
        }
3456
 
 
3457
 
        /* Write the data bytes.  Store the uncompressed bytes separately. */
3458
 
        storage = page_zip->data + page_zip_get_size(page_zip)
3459
 
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3460
 
                * PAGE_ZIP_DIR_SLOT_SIZE;
3461
 
 
3462
 
        if (page_is_leaf(page)) {
3463
 
                ulint           len;
3464
 
 
3465
 
                if (dict_index_is_clust(index)) {
3466
 
                        ulint           trx_id_col;
3467
 
 
3468
 
                        trx_id_col = dict_index_get_sys_col_pos(index,
3469
 
                                                                DATA_TRX_ID);
3470
 
                        ut_ad(trx_id_col != ULINT_UNDEFINED);
3471
 
 
3472
 
                        /* Store separately trx_id, roll_ptr and
3473
 
                        the BTR_EXTERN_FIELD_REF of each BLOB column. */
3474
 
                        if (rec_offs_any_extern(offsets)) {
3475
 
                                data = page_zip_write_rec_ext(
3476
 
                                        page_zip, page,
3477
 
                                        rec, index, offsets, create,
3478
 
                                        trx_id_col, heap_no, storage, data);
3479
 
                        } else {
3480
 
                                /* Locate trx_id and roll_ptr. */
3481
 
                                const byte*     src
3482
 
                                        = rec_get_nth_field(rec, offsets,
3483
 
                                                            trx_id_col, &len);
3484
 
                                ut_ad(len == DATA_TRX_ID_LEN);
3485
 
                                ut_ad(src + DATA_TRX_ID_LEN
3486
 
                                      == rec_get_nth_field(
3487
 
                                              rec, offsets,
3488
 
                                              trx_id_col + 1, &len));
3489
 
                                ut_ad(len == DATA_ROLL_PTR_LEN);
3490
 
 
3491
 
                                /* Log the preceding fields. */
3492
 
                                ASSERT_ZERO(data, src - rec);
3493
 
                                memcpy(data, rec, src - rec);
3494
 
                                data += src - rec;
3495
 
 
3496
 
                                /* Store trx_id and roll_ptr. */
3497
 
                                memcpy(storage
3498
 
                                       - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3499
 
                                       * (heap_no - 1),
3500
 
                                       src,
3501
 
                                       DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3502
 
 
3503
 
                                src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
3504
 
 
3505
 
                                /* Log the last bytes of the record. */
3506
 
                                len = rec_offs_data_size(offsets)
3507
 
                                        - (src - rec);
3508
 
 
3509
 
                                ASSERT_ZERO(data, len);
3510
 
                                memcpy(data, src, len);
3511
 
                                data += len;
3512
 
                        }
3513
 
                } else {
3514
 
                        /* Leaf page of a secondary index:
3515
 
                        no externally stored columns */
3516
 
                        ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
3517
 
                              == ULINT_UNDEFINED);
3518
 
                        ut_ad(!rec_offs_any_extern(offsets));
3519
 
 
3520
 
                        /* Log the entire record. */
3521
 
                        len = rec_offs_data_size(offsets);
3522
 
 
3523
 
                        ASSERT_ZERO(data, len);
3524
 
                        memcpy(data, rec, len);
3525
 
                        data += len;
3526
 
                }
3527
 
        } else {
3528
 
                /* This is a node pointer page. */
3529
 
                ulint   len;
3530
 
 
3531
 
                /* Non-leaf nodes should not have any externally
3532
 
                stored columns. */
3533
 
                ut_ad(!rec_offs_any_extern(offsets));
3534
 
 
3535
 
                /* Copy the data bytes, except node_ptr. */
3536
 
                len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
3537
 
                ut_ad(data + len < storage - REC_NODE_PTR_SIZE
3538
 
                      * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
3539
 
                ASSERT_ZERO(data, len);
3540
 
                memcpy(data, rec, len);
3541
 
                data += len;
3542
 
 
3543
 
                /* Copy the node pointer to the uncompressed area. */
3544
 
                memcpy(storage - REC_NODE_PTR_SIZE
3545
 
                       * (heap_no - 1),
3546
 
                       rec + len,
3547
 
                       REC_NODE_PTR_SIZE);
3548
 
        }
3549
 
 
3550
 
        ut_a(!*data);
3551
 
        ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
3552
 
        page_zip->m_end = data - page_zip->data;
3553
 
        page_zip->m_nonempty = TRUE;
3554
 
 
3555
 
#ifdef UNIV_ZIP_DEBUG
3556
 
        ut_a(page_zip_validate(page_zip, page_align(rec)));
3557
 
#endif /* UNIV_ZIP_DEBUG */
3558
 
}
3559
 
 
3560
 
/***********************************************************//**
3561
 
Parses a log record of writing a BLOB pointer of a record.
3562
 
@return end of log record or NULL */
3563
 
UNIV_INTERN
3564
 
byte*
3565
 
page_zip_parse_write_blob_ptr(
3566
 
/*==========================*/
3567
 
        byte*           ptr,    /*!< in: redo log buffer */
3568
 
        byte*           end_ptr,/*!< in: redo log buffer end */
3569
 
        page_t*         page,   /*!< in/out: uncompressed page */
3570
 
        page_zip_des_t* page_zip)/*!< in/out: compressed page */
3571
 
{
3572
 
        ulint   offset;
3573
 
        ulint   z_offset;
3574
 
 
3575
 
        ut_ad(!page == !page_zip);
3576
 
 
3577
 
        if (UNIV_UNLIKELY
3578
 
            (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
3579
 
 
3580
 
                return(NULL);
3581
 
        }
3582
 
 
3583
 
        offset = mach_read_from_2(ptr);
3584
 
        z_offset = mach_read_from_2(ptr + 2);
3585
 
 
3586
 
        if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3587
 
            || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3588
 
            || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3589
 
corrupt:
3590
 
                recv_sys->found_corrupt_log = TRUE;
3591
 
 
3592
 
                return(NULL);
3593
 
        }
3594
 
 
3595
 
        if (page) {
3596
 
                if (UNIV_UNLIKELY(!page_zip)
3597
 
                    || UNIV_UNLIKELY(!page_is_leaf(page))) {
3598
 
 
3599
 
                        goto corrupt;
3600
 
                }
3601
 
 
3602
 
#ifdef UNIV_ZIP_DEBUG
3603
 
                ut_a(page_zip_validate(page_zip, page));
3604
 
#endif /* UNIV_ZIP_DEBUG */
3605
 
 
3606
 
                memcpy(page + offset,
3607
 
                       ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3608
 
                memcpy(page_zip->data + z_offset,
3609
 
                       ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3610
 
 
3611
 
#ifdef UNIV_ZIP_DEBUG
3612
 
                ut_a(page_zip_validate(page_zip, page));
3613
 
#endif /* UNIV_ZIP_DEBUG */
3614
 
        }
3615
 
 
3616
 
        return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
3617
 
}
3618
 
 
3619
 
/**********************************************************************//**
3620
 
Write a BLOB pointer of a record on the leaf page of a clustered index.
3621
 
The information must already have been updated on the uncompressed page. */
3622
 
UNIV_INTERN
3623
 
void
3624
 
page_zip_write_blob_ptr(
3625
 
/*====================*/
3626
 
        page_zip_des_t* page_zip,/*!< in/out: compressed page */
3627
 
        const byte*     rec,    /*!< in/out: record whose data is being
3628
 
                                written */
3629
 
        dict_index_t*   index,  /*!< in: index of the page */
3630
 
        const ulint*    offsets,/*!< in: rec_get_offsets(rec, index) */
3631
 
        ulint           n,      /*!< in: column index */
3632
 
        mtr_t*          mtr)    /*!< in: mini-transaction handle,
3633
 
                                or NULL if no logging is needed */
3634
 
{
3635
 
        const byte*     field;
3636
 
        byte*           externs;
3637
 
        const page_t*   page    = page_align(rec);
3638
 
        ulint           blob_no;
3639
 
        ulint           len;
3640
 
 
3641
 
        ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3642
 
        ut_ad(page_simple_validate_new((page_t*) page));
3643
 
        ut_ad(page_zip_simple_validate(page_zip));
3644
 
        ut_ad(page_zip_get_size(page_zip)
3645
 
              > PAGE_DATA + page_zip_dir_size(page_zip));
3646
 
        ut_ad(rec_offs_comp(offsets));
3647
 
        ut_ad(rec_offs_validate(rec, NULL, offsets));
3648
 
        ut_ad(rec_offs_any_extern(offsets));
3649
 
        ut_ad(rec_offs_nth_extern(offsets, n));
3650
 
 
3651
 
        ut_ad(page_zip->m_start >= PAGE_DATA);
3652
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3653
 
 
3654
 
        ut_ad(page_is_leaf(page));
3655
 
        ut_ad(dict_index_is_clust(index));
3656
 
 
3657
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3658
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3659
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3660
 
                           rec_offs_extra_size(offsets));
3661
 
 
3662
 
        blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
3663
 
                + rec_get_n_extern_new(rec, index, n);
3664
 
        ut_a(blob_no < page_zip->n_blobs);
3665
 
 
3666
 
        externs = page_zip->data + page_zip_get_size(page_zip)
3667
 
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3668
 
                * (PAGE_ZIP_DIR_SLOT_SIZE
3669
 
                   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3670
 
 
3671
 
        field = rec_get_nth_field(rec, offsets, n, &len);
3672
 
 
3673
 
        externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
3674
 
        field += len - BTR_EXTERN_FIELD_REF_SIZE;
3675
 
 
3676
 
        memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
3677
 
 
3678
 
#ifdef UNIV_ZIP_DEBUG
3679
 
        ut_a(page_zip_validate(page_zip, page));
3680
 
#endif /* UNIV_ZIP_DEBUG */
3681
 
 
3682
 
        if (mtr) {
3683
 
#ifndef UNIV_HOTBACKUP
3684
 
                byte*   log_ptr = mlog_open(
3685
 
                        mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
3686
 
                if (UNIV_UNLIKELY(!log_ptr)) {
3687
 
                        return;
3688
 
                }
3689
 
 
3690
 
                log_ptr = mlog_write_initial_log_record_fast(
3691
 
                        (byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
3692
 
                mach_write_to_2(log_ptr, page_offset(field));
3693
 
                log_ptr += 2;
3694
 
                mach_write_to_2(log_ptr, externs - page_zip->data);
3695
 
                log_ptr += 2;
3696
 
                memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
3697
 
                log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
3698
 
                mlog_close(mtr, log_ptr);
3699
 
#endif /* !UNIV_HOTBACKUP */
3700
 
        }
3701
 
}
3702
 
 
3703
 
/***********************************************************//**
3704
 
Parses a log record of writing the node pointer of a record.
3705
 
@return end of log record or NULL */
3706
 
UNIV_INTERN
3707
 
byte*
3708
 
page_zip_parse_write_node_ptr(
3709
 
/*==========================*/
3710
 
        byte*           ptr,    /*!< in: redo log buffer */
3711
 
        byte*           end_ptr,/*!< in: redo log buffer end */
3712
 
        page_t*         page,   /*!< in/out: uncompressed page */
3713
 
        page_zip_des_t* page_zip)/*!< in/out: compressed page */
3714
 
{
3715
 
        ulint   offset;
3716
 
        ulint   z_offset;
3717
 
 
3718
 
        ut_ad(!page == !page_zip);
3719
 
 
3720
 
        if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
3721
 
 
3722
 
                return(NULL);
3723
 
        }
3724
 
 
3725
 
        offset = mach_read_from_2(ptr);
3726
 
        z_offset = mach_read_from_2(ptr + 2);
3727
 
 
3728
 
        if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3729
 
            || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3730
 
            || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3731
 
corrupt:
3732
 
                recv_sys->found_corrupt_log = TRUE;
3733
 
 
3734
 
                return(NULL);
3735
 
        }
3736
 
 
3737
 
        if (page) {
3738
 
                byte*   storage_end;
3739
 
                byte*   field;
3740
 
                byte*   storage;
3741
 
                ulint   heap_no;
3742
 
 
3743
 
                if (UNIV_UNLIKELY(!page_zip)
3744
 
                    || UNIV_UNLIKELY(page_is_leaf(page))) {
3745
 
 
3746
 
                        goto corrupt;
3747
 
                }
3748
 
 
3749
 
#ifdef UNIV_ZIP_DEBUG
3750
 
                ut_a(page_zip_validate(page_zip, page));
3751
 
#endif /* UNIV_ZIP_DEBUG */
3752
 
 
3753
 
                field = page + offset;
3754
 
                storage = page_zip->data + z_offset;
3755
 
 
3756
 
                storage_end = page_zip->data + page_zip_get_size(page_zip)
3757
 
                        - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3758
 
                        * PAGE_ZIP_DIR_SLOT_SIZE;
3759
 
 
3760
 
                heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
3761
 
 
3762
 
                if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
3763
 
                    || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
3764
 
                    || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
3765
 
 
3766
 
                        goto corrupt;
3767
 
                }
3768
 
 
3769
 
                memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
3770
 
                memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
3771
 
 
3772
 
#ifdef UNIV_ZIP_DEBUG
3773
 
                ut_a(page_zip_validate(page_zip, page));
3774
 
#endif /* UNIV_ZIP_DEBUG */
3775
 
        }
3776
 
 
3777
 
        return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
3778
 
}
3779
 
 
3780
 
/**********************************************************************//**
3781
 
Write the node pointer of a record on a non-leaf compressed page. */
3782
 
UNIV_INTERN
3783
 
void
3784
 
page_zip_write_node_ptr(
3785
 
/*====================*/
3786
 
        page_zip_des_t* page_zip,/*!< in/out: compressed page */
3787
 
        byte*           rec,    /*!< in/out: record */
3788
 
        ulint           size,   /*!< in: data size of rec */
3789
 
        ulint           ptr,    /*!< in: node pointer */
3790
 
        mtr_t*          mtr)    /*!< in: mini-transaction, or NULL */
3791
 
{
3792
 
        byte*   field;
3793
 
        byte*   storage;
3794
 
        page_t* page    = page_align(rec);
3795
 
 
3796
 
        ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3797
 
        ut_ad(page_simple_validate_new(page));
3798
 
        ut_ad(page_zip_simple_validate(page_zip));
3799
 
        ut_ad(page_zip_get_size(page_zip)
3800
 
              > PAGE_DATA + page_zip_dir_size(page_zip));
3801
 
        ut_ad(page_rec_is_comp(rec));
3802
 
 
3803
 
        ut_ad(page_zip->m_start >= PAGE_DATA);
3804
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3805
 
 
3806
 
        ut_ad(!page_is_leaf(page));
3807
 
 
3808
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3809
 
        UNIV_MEM_ASSERT_RW(rec, size);
3810
 
 
3811
 
        storage = page_zip->data + page_zip_get_size(page_zip)
3812
 
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3813
 
                * PAGE_ZIP_DIR_SLOT_SIZE
3814
 
                - (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
3815
 
        field = rec + size - REC_NODE_PTR_SIZE;
3816
 
 
3817
 
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3818
 
        ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
3819
 
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3820
 
#if REC_NODE_PTR_SIZE != 4
3821
 
# error "REC_NODE_PTR_SIZE != 4"
3822
 
#endif
3823
 
        mach_write_to_4(field, ptr);
3824
 
        memcpy(storage, field, REC_NODE_PTR_SIZE);
3825
 
 
3826
 
        if (mtr) {
3827
 
#ifndef UNIV_HOTBACKUP
3828
 
                byte*   log_ptr = mlog_open(mtr,
3829
 
                                            11 + 2 + 2 + REC_NODE_PTR_SIZE);
3830
 
                if (UNIV_UNLIKELY(!log_ptr)) {
3831
 
                        return;
3832
 
                }
3833
 
 
3834
 
                log_ptr = mlog_write_initial_log_record_fast(
3835
 
                        field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
3836
 
                mach_write_to_2(log_ptr, page_offset(field));
3837
 
                log_ptr += 2;
3838
 
                mach_write_to_2(log_ptr, storage - page_zip->data);
3839
 
                log_ptr += 2;
3840
 
                memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
3841
 
                log_ptr += REC_NODE_PTR_SIZE;
3842
 
                mlog_close(mtr, log_ptr);
3843
 
#endif /* !UNIV_HOTBACKUP */
3844
 
        }
3845
 
}
3846
 
 
3847
 
/**********************************************************************//**
3848
 
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
3849
 
UNIV_INTERN
3850
 
void
3851
 
page_zip_write_trx_id_and_roll_ptr(
3852
 
/*===============================*/
3853
 
        page_zip_des_t* page_zip,/*!< in/out: compressed page */
3854
 
        byte*           rec,    /*!< in/out: record */
3855
 
        const ulint*    offsets,/*!< in: rec_get_offsets(rec, index) */
3856
 
        ulint           trx_id_col,/*!< in: column number of TRX_ID in rec */
3857
 
        trx_id_t        trx_id, /*!< in: transaction identifier */
3858
 
        roll_ptr_t      roll_ptr)/*!< in: roll_ptr */
3859
 
{
3860
 
        byte*   field;
3861
 
        byte*   storage;
3862
 
        page_t* page    = page_align(rec);
3863
 
        ulint   len;
3864
 
 
3865
 
        ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3866
 
        ut_ad(page_simple_validate_new(page));
3867
 
        ut_ad(page_zip_simple_validate(page_zip));
3868
 
        ut_ad(page_zip_get_size(page_zip)
3869
 
              > PAGE_DATA + page_zip_dir_size(page_zip));
3870
 
        ut_ad(rec_offs_validate(rec, NULL, offsets));
3871
 
        ut_ad(rec_offs_comp(offsets));
3872
 
 
3873
 
        ut_ad(page_zip->m_start >= PAGE_DATA);
3874
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3875
 
 
3876
 
        ut_ad(page_is_leaf(page));
3877
 
 
3878
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3879
 
 
3880
 
        storage = page_zip->data + page_zip_get_size(page_zip)
3881
 
                - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3882
 
                * PAGE_ZIP_DIR_SLOT_SIZE
3883
 
                - (rec_get_heap_no_new(rec) - 1)
3884
 
                * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3885
 
 
3886
 
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
3887
 
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
3888
 
#endif
3889
 
        field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
3890
 
        ut_ad(len == DATA_TRX_ID_LEN);
3891
 
        ut_ad(field + DATA_TRX_ID_LEN
3892
 
              == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
3893
 
        ut_ad(len == DATA_ROLL_PTR_LEN);
3894
 
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3895
 
        ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
3896
 
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3897
 
#if DATA_TRX_ID_LEN != 6
3898
 
# error "DATA_TRX_ID_LEN != 6"
3899
 
#endif
3900
 
        mach_write_to_6(field, trx_id);
3901
 
#if DATA_ROLL_PTR_LEN != 7
3902
 
# error "DATA_ROLL_PTR_LEN != 7"
3903
 
#endif
3904
 
        mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
3905
 
        memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3906
 
 
3907
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3908
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3909
 
                           rec_offs_extra_size(offsets));
3910
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3911
 
}
3912
 
 
3913
 
#ifdef UNIV_ZIP_DEBUG
3914
 
/** Set this variable in a debugger to disable page_zip_clear_rec().
3915
 
The only observable effect should be the compression ratio due to
3916
 
deleted records not being zeroed out.  In rare cases, there can be
3917
 
page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
3918
 
columns if the space is reallocated for a smaller record. */
3919
 
UNIV_INTERN ibool       page_zip_clear_rec_disable;
3920
 
#endif /* UNIV_ZIP_DEBUG */
3921
 
 
3922
 
/**********************************************************************//**
3923
 
Clear an area on the uncompressed and compressed page, if possible. */
3924
 
static
3925
 
void
3926
 
page_zip_clear_rec(
3927
 
/*===============*/
3928
 
        page_zip_des_t* page_zip,/*!< in/out: compressed page */
3929
 
        byte*           rec,    /*!< in: record to clear */
3930
 
        dict_index_t*   index,  /*!< in: index of rec */
3931
 
        const ulint*    offsets)/*!< in: rec_get_offsets(rec, index) */
3932
 
{
3933
 
        ulint   heap_no;
3934
 
        page_t* page    = page_align(rec);
3935
 
        /* page_zip_validate() would fail here if a record
3936
 
        containing externally stored columns is being deleted. */
3937
 
        ut_ad(rec_offs_validate(rec, index, offsets));
3938
 
        ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
3939
 
        ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
3940
 
        ut_ad(page_zip_header_cmp(page_zip, page));
3941
 
 
3942
 
        heap_no = rec_get_heap_no_new(rec);
3943
 
        ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
3944
 
 
3945
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3946
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3947
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3948
 
                           rec_offs_extra_size(offsets));
3949
 
 
3950
 
        if (
3951
 
#ifdef UNIV_ZIP_DEBUG
3952
 
            !page_zip_clear_rec_disable &&
3953
 
#endif /* UNIV_ZIP_DEBUG */
3954
 
            page_zip->m_end
3955
 
            + 1 + ((heap_no - 1) >= 64)/* size of the log entry */
3956
 
            + page_zip_get_trailer_len(page_zip,
3957
 
                                       dict_index_is_clust(index), NULL)
3958
 
            < page_zip_get_size(page_zip)) {
3959
 
                byte*   data;
3960
 
 
3961
 
                /* Clear only the data bytes, because the allocator and
3962
 
                the decompressor depend on the extra bytes. */
3963
 
                memset(rec, 0, rec_offs_data_size(offsets));
3964
 
 
3965
 
                if (!page_is_leaf(page)) {
3966
 
                        /* Clear node_ptr on the compressed page. */
3967
 
                        byte*   storage = page_zip->data
3968
 
                                + page_zip_get_size(page_zip)
3969
 
                                - (page_dir_get_n_heap(page)
3970
 
                                   - PAGE_HEAP_NO_USER_LOW)
3971
 
                                * PAGE_ZIP_DIR_SLOT_SIZE;
3972
 
 
3973
 
                        memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
3974
 
                               0, REC_NODE_PTR_SIZE);
3975
 
                } else if (dict_index_is_clust(index)) {
3976
 
                        /* Clear trx_id and roll_ptr on the compressed page. */
3977
 
                        byte*   storage = page_zip->data
3978
 
                                + page_zip_get_size(page_zip)
3979
 
                                - (page_dir_get_n_heap(page)
3980
 
                                   - PAGE_HEAP_NO_USER_LOW)
3981
 
                                * PAGE_ZIP_DIR_SLOT_SIZE;
3982
 
 
3983
 
                        memset(storage - (heap_no - 1)
3984
 
                               * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
3985
 
                               0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3986
 
                }
3987
 
 
3988
 
                /* Log that the data was zeroed out. */
3989
 
                data = page_zip->data + page_zip->m_end;
3990
 
                ut_ad(!*data);
3991
 
                if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3992
 
                        *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3993
 
                        ut_ad(!*data);
3994
 
                }
3995
 
                *data++ = (byte) ((heap_no - 1) << 1 | 1);
3996
 
                ut_ad(!*data);
3997
 
                ut_ad((ulint) (data - page_zip->data)
3998
 
                      < page_zip_get_size(page_zip));
3999
 
                page_zip->m_end = data - page_zip->data;
4000
 
                page_zip->m_nonempty = TRUE;
4001
 
        } else if (page_is_leaf(page) && dict_index_is_clust(index)) {
4002
 
                /* Do not clear the record, because there is not enough space
4003
 
                to log the operation. */
4004
 
 
4005
 
                if (rec_offs_any_extern(offsets)) {
4006
 
                        ulint   i;
4007
 
 
4008
 
                        for (i = rec_offs_n_fields(offsets); i--; ) {
4009
 
                                /* Clear all BLOB pointers in order to make
4010
 
                                page_zip_validate() pass. */
4011
 
                                if (rec_offs_nth_extern(offsets, i)) {
4012
 
                                        ulint   len;
4013
 
                                        byte*   field = rec_get_nth_field(
4014
 
                                                rec, offsets, i, &len);
4015
 
                                        memset(field + len
4016
 
                                               - BTR_EXTERN_FIELD_REF_SIZE,
4017
 
                                               0, BTR_EXTERN_FIELD_REF_SIZE);
4018
 
                                }
4019
 
                        }
4020
 
                }
4021
 
        }
4022
 
 
4023
 
#ifdef UNIV_ZIP_DEBUG
4024
 
        ut_a(page_zip_validate(page_zip, page));
4025
 
#endif /* UNIV_ZIP_DEBUG */
4026
 
}
4027
 
 
4028
 
/**********************************************************************//**
4029
 
Write the "deleted" flag of a record on a compressed page.  The flag must
4030
 
already have been written on the uncompressed page. */
4031
 
UNIV_INTERN
4032
 
void
4033
 
page_zip_rec_set_deleted(
4034
 
/*=====================*/
4035
 
        page_zip_des_t* page_zip,/*!< in/out: compressed page */
4036
 
        const byte*     rec,    /*!< in: record on the uncompressed page */
4037
 
        ulint           flag)   /*!< in: the deleted flag (nonzero=TRUE) */
4038
 
{
4039
 
        byte*   slot = page_zip_dir_find(page_zip, page_offset(rec));
4040
 
        ut_a(slot);
4041
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4042
 
        if (flag) {
4043
 
                *slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
4044
 
        } else {
4045
 
                *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
4046
 
        }
4047
 
#ifdef UNIV_ZIP_DEBUG
4048
 
        ut_a(page_zip_validate(page_zip, page_align(rec)));
4049
 
#endif /* UNIV_ZIP_DEBUG */
4050
 
}
4051
 
 
4052
 
/**********************************************************************//**
4053
 
Write the "owned" flag of a record on a compressed page.  The n_owned field
4054
 
must already have been written on the uncompressed page. */
4055
 
UNIV_INTERN
4056
 
void
4057
 
page_zip_rec_set_owned(
4058
 
/*===================*/
4059
 
        page_zip_des_t* page_zip,/*!< in/out: compressed page */
4060
 
        const byte*     rec,    /*!< in: record on the uncompressed page */
4061
 
        ulint           flag)   /*!< in: the owned flag (nonzero=TRUE) */
4062
 
{
4063
 
        byte*   slot = page_zip_dir_find(page_zip, page_offset(rec));
4064
 
        ut_a(slot);
4065
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4066
 
        if (flag) {
4067
 
                *slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4068
 
        } else {
4069
 
                *slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4070
 
        }
4071
 
}
4072
 
 
4073
 
/**********************************************************************//**
4074
 
Insert a record to the dense page directory. */
4075
 
UNIV_INTERN
4076
 
void
4077
 
page_zip_dir_insert(
4078
 
/*================*/
4079
 
        page_zip_des_t* page_zip,/*!< in/out: compressed page */
4080
 
        const byte*     prev_rec,/*!< in: record after which to insert */
4081
 
        const byte*     free_rec,/*!< in: record from which rec was
4082
 
                                allocated, or NULL */
4083
 
        byte*           rec)    /*!< in: record to insert */
4084
 
{
4085
 
        ulint   n_dense;
4086
 
        byte*   slot_rec;
4087
 
        byte*   slot_free;
4088
 
 
4089
 
        ut_ad(prev_rec != rec);
4090
 
        ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
4091
 
        ut_ad(page_zip_simple_validate(page_zip));
4092
 
 
4093
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4094
 
 
4095
 
        if (page_rec_is_infimum(prev_rec)) {
4096
 
                /* Use the first slot. */
4097
 
                slot_rec = page_zip->data + page_zip_get_size(page_zip);
4098
 
        } else {
4099
 
                byte*   end     = page_zip->data + page_zip_get_size(page_zip);
4100
 
                byte*   start   = end - page_zip_dir_user_size(page_zip);
4101
 
 
4102
 
                if (UNIV_LIKELY(!free_rec)) {
4103
 
                        /* PAGE_N_RECS was already incremented
4104
 
                        in page_cur_insert_rec_zip(), but the
4105
 
                        dense directory slot at that position
4106
 
                        contains garbage.  Skip it. */
4107
 
                        start += PAGE_ZIP_DIR_SLOT_SIZE;
4108
 
                }
4109
 
 
4110
 
                slot_rec = page_zip_dir_find_low(start, end,
4111
 
                                                 page_offset(prev_rec));
4112
 
                ut_a(slot_rec);
4113
 
        }
4114
 
 
4115
 
        /* Read the old n_dense (n_heap may have been incremented). */
4116
 
        n_dense = page_dir_get_n_heap(page_zip->data)
4117
 
                - (PAGE_HEAP_NO_USER_LOW + 1);
4118
 
 
4119
 
        if (UNIV_LIKELY_NULL(free_rec)) {
4120
 
                /* The record was allocated from the free list.
4121
 
                Shift the dense directory only up to that slot.
4122
 
                Note that in this case, n_dense is actually
4123
 
                off by one, because page_cur_insert_rec_zip()
4124
 
                did not increment n_heap. */
4125
 
                ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
4126
 
                      + PAGE_HEAP_NO_USER_LOW);
4127
 
                ut_ad(rec >= free_rec);
4128
 
                slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
4129
 
                ut_ad(slot_free);
4130
 
                slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4131
 
        } else {
4132
 
                /* The record was allocated from the heap.
4133
 
                Shift the entire dense directory. */
4134
 
                ut_ad(rec_get_heap_no_new(rec) == n_dense
4135
 
                      + PAGE_HEAP_NO_USER_LOW);
4136
 
 
4137
 
                /* Shift to the end of the dense page directory. */
4138
 
                slot_free = page_zip->data + page_zip_get_size(page_zip)
4139
 
                        - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4140
 
        }
4141
 
 
4142
 
        /* Shift the dense directory to allocate place for rec. */
4143
 
        memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
4144
 
                slot_rec - slot_free);
4145
 
 
4146
 
        /* Write the entry for the inserted record.
4147
 
        The "owned" and "deleted" flags must be zero. */
4148
 
        mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
4149
 
}
4150
 
 
4151
 
/**********************************************************************//**
4152
 
Shift the dense page directory and the array of BLOB pointers
4153
 
when a record is deleted. */
4154
 
UNIV_INTERN
4155
 
void
4156
 
page_zip_dir_delete(
4157
 
/*================*/
4158
 
        page_zip_des_t* page_zip,/*!< in/out: compressed page */
4159
 
        byte*           rec,    /*!< in: record to delete */
4160
 
        dict_index_t*   index,  /*!< in: index of rec */
4161
 
        const ulint*    offsets,/*!< in: rec_get_offsets(rec) */
4162
 
        const byte*     free)   /*!< in: previous start of the free list */
4163
 
{
4164
 
        byte*   slot_rec;
4165
 
        byte*   slot_free;
4166
 
        ulint   n_ext;
4167
 
        page_t* page    = page_align(rec);
4168
 
 
4169
 
        ut_ad(rec_offs_validate(rec, index, offsets));
4170
 
        ut_ad(rec_offs_comp(offsets));
4171
 
 
4172
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4173
 
        UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4174
 
        UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4175
 
                           rec_offs_extra_size(offsets));
4176
 
 
4177
 
        slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
4178
 
 
4179
 
        ut_a(slot_rec);
4180
 
 
4181
 
        /* This could not be done before page_zip_dir_find(). */
4182
 
        page_header_set_field(page, page_zip, PAGE_N_RECS,
4183
 
                              (ulint)(page_get_n_recs(page) - 1));
4184
 
 
4185
 
        if (UNIV_UNLIKELY(!free)) {
4186
 
                /* Make the last slot the start of the free list. */
4187
 
                slot_free = page_zip->data + page_zip_get_size(page_zip)
4188
 
                        - PAGE_ZIP_DIR_SLOT_SIZE
4189
 
                        * (page_dir_get_n_heap(page_zip->data)
4190
 
                           - PAGE_HEAP_NO_USER_LOW);
4191
 
        } else {
4192
 
                slot_free = page_zip_dir_find_free(page_zip,
4193
 
                                                   page_offset(free));
4194
 
                ut_a(slot_free < slot_rec);
4195
 
                /* Grow the free list by one slot by moving the start. */
4196
 
                slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4197
 
        }
4198
 
 
4199
 
        if (UNIV_LIKELY(slot_rec > slot_free)) {
4200
 
                memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
4201
 
                        slot_free,
4202
 
                        slot_rec - slot_free);
4203
 
        }
4204
 
 
4205
 
        /* Write the entry for the deleted record.
4206
 
        The "owned" and "deleted" flags will be cleared. */
4207
 
        mach_write_to_2(slot_free, page_offset(rec));
4208
 
 
4209
 
        if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
4210
 
                ut_ad(!rec_offs_any_extern(offsets));
4211
 
                goto skip_blobs;
4212
 
        }
4213
 
 
4214
 
        n_ext = rec_offs_n_extern(offsets);
4215
 
        if (UNIV_UNLIKELY(n_ext)) {
4216
 
                /* Shift and zero fill the array of BLOB pointers. */
4217
 
                ulint   blob_no;
4218
 
                byte*   externs;
4219
 
                byte*   ext_end;
4220
 
 
4221
 
                blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
4222
 
                ut_a(blob_no + n_ext <= page_zip->n_blobs);
4223
 
 
4224
 
                externs = page_zip->data + page_zip_get_size(page_zip)
4225
 
                        - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
4226
 
                        * (PAGE_ZIP_DIR_SLOT_SIZE
4227
 
                           + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4228
 
 
4229
 
                ext_end = externs - page_zip->n_blobs
4230
 
                        * BTR_EXTERN_FIELD_REF_SIZE;
4231
 
                externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
4232
 
 
4233
 
                page_zip->n_blobs -= n_ext;
4234
 
                /* Shift and zero fill the array. */
4235
 
                memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
4236
 
                        (page_zip->n_blobs - blob_no)
4237
 
                        * BTR_EXTERN_FIELD_REF_SIZE);
4238
 
                memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
4239
 
        }
4240
 
 
4241
 
skip_blobs:
4242
 
        /* The compression algorithm expects info_bits and n_owned
4243
 
        to be 0 for deleted records. */
4244
 
        rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
4245
 
 
4246
 
        page_zip_clear_rec(page_zip, rec, index, offsets);
4247
 
}
4248
 
 
4249
 
/**********************************************************************//**
4250
 
Add a slot to the dense page directory. */
4251
 
UNIV_INTERN
4252
 
void
4253
 
page_zip_dir_add_slot(
4254
 
/*==================*/
4255
 
        page_zip_des_t* page_zip,       /*!< in/out: compressed page */
4256
 
        ulint           is_clustered)   /*!< in: nonzero for clustered index,
4257
 
                                        zero for others */
4258
 
{
4259
 
        ulint   n_dense;
4260
 
        byte*   dir;
4261
 
        byte*   stored;
4262
 
 
4263
 
        ut_ad(page_is_comp(page_zip->data));
4264
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4265
 
 
4266
 
        /* Read the old n_dense (n_heap has already been incremented). */
4267
 
        n_dense = page_dir_get_n_heap(page_zip->data)
4268
 
                - (PAGE_HEAP_NO_USER_LOW + 1);
4269
 
 
4270
 
        dir = page_zip->data + page_zip_get_size(page_zip)
4271
 
                - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4272
 
 
4273
 
        if (!page_is_leaf(page_zip->data)) {
4274
 
                ut_ad(!page_zip->n_blobs);
4275
 
                stored = dir - n_dense * REC_NODE_PTR_SIZE;
4276
 
        } else if (UNIV_UNLIKELY(is_clustered)) {
4277
 
                /* Move the BLOB pointer array backwards to make space for the
4278
 
                roll_ptr and trx_id columns and the dense directory slot. */
4279
 
                byte*   externs;
4280
 
 
4281
 
                stored = dir - n_dense
4282
 
                        * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4283
 
                externs = stored
4284
 
                        - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4285
 
                ASSERT_ZERO(externs
4286
 
                            - (PAGE_ZIP_DIR_SLOT_SIZE
4287
 
                               + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4288
 
                            PAGE_ZIP_DIR_SLOT_SIZE
4289
 
                            + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4290
 
                memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
4291
 
                                   + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4292
 
                        externs, stored - externs);
4293
 
        } else {
4294
 
                stored = dir
4295
 
                        - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4296
 
                ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
4297
 
                            PAGE_ZIP_DIR_SLOT_SIZE);
4298
 
        }
4299
 
 
4300
 
        /* Move the uncompressed area backwards to make space
4301
 
        for one directory slot. */
4302
 
        memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
4303
 
}
4304
 
 
4305
 
/***********************************************************//**
4306
 
Parses a log record of writing to the header of a page.
4307
 
@return end of log record or NULL */
4308
 
UNIV_INTERN
4309
 
byte*
4310
 
page_zip_parse_write_header(
4311
 
/*========================*/
4312
 
        byte*           ptr,    /*!< in: redo log buffer */
4313
 
        byte*           end_ptr,/*!< in: redo log buffer end */
4314
 
        page_t*         page,   /*!< in/out: uncompressed page */
4315
 
        page_zip_des_t* page_zip)/*!< in/out: compressed page */
4316
 
{
4317
 
        ulint   offset;
4318
 
        ulint   len;
4319
 
 
4320
 
        ut_ad(ptr && end_ptr);
4321
 
        ut_ad(!page == !page_zip);
4322
 
 
4323
 
        if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
4324
 
 
4325
 
                return(NULL);
4326
 
        }
4327
 
 
4328
 
        offset = (ulint) *ptr++;
4329
 
        len = (ulint) *ptr++;
4330
 
 
4331
 
        if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
4332
 
corrupt:
4333
 
                recv_sys->found_corrupt_log = TRUE;
4334
 
 
4335
 
                return(NULL);
4336
 
        }
4337
 
 
4338
 
        if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
4339
 
 
4340
 
                return(NULL);
4341
 
        }
4342
 
 
4343
 
        if (page) {
4344
 
                if (UNIV_UNLIKELY(!page_zip)) {
4345
 
 
4346
 
                        goto corrupt;
4347
 
                }
4348
 
#ifdef UNIV_ZIP_DEBUG
4349
 
                ut_a(page_zip_validate(page_zip, page));
4350
 
#endif /* UNIV_ZIP_DEBUG */
4351
 
 
4352
 
                memcpy(page + offset, ptr, len);
4353
 
                memcpy(page_zip->data + offset, ptr, len);
4354
 
 
4355
 
#ifdef UNIV_ZIP_DEBUG
4356
 
                ut_a(page_zip_validate(page_zip, page));
4357
 
#endif /* UNIV_ZIP_DEBUG */
4358
 
        }
4359
 
 
4360
 
        return(ptr + len);
4361
 
}
4362
 
 
4363
 
#ifndef UNIV_HOTBACKUP
4364
 
/**********************************************************************//**
4365
 
Write a log record of writing to the uncompressed header portion of a page. */
4366
 
UNIV_INTERN
4367
 
void
4368
 
page_zip_write_header_log(
4369
 
/*======================*/
4370
 
        const byte*     data,   /*!< in: data on the uncompressed page */
4371
 
        ulint           length, /*!< in: length of the data */
4372
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
4373
 
{
4374
 
        byte*   log_ptr = mlog_open(mtr, 11 + 1 + 1);
4375
 
        ulint   offset  = page_offset(data);
4376
 
 
4377
 
        ut_ad(offset < PAGE_DATA);
4378
 
        ut_ad(offset + length < PAGE_DATA);
4379
 
#if PAGE_DATA > 255
4380
 
# error "PAGE_DATA > 255"
4381
 
#endif
4382
 
        ut_ad(length < 256);
4383
 
 
4384
 
        /* If no logging is requested, we may return now */
4385
 
        if (UNIV_UNLIKELY(!log_ptr)) {
4386
 
 
4387
 
                return;
4388
 
        }
4389
 
 
4390
 
        log_ptr = mlog_write_initial_log_record_fast(
4391
 
                (byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
4392
 
        *log_ptr++ = (byte) offset;
4393
 
        *log_ptr++ = (byte) length;
4394
 
        mlog_close(mtr, log_ptr);
4395
 
 
4396
 
        mlog_catenate_string(mtr, data, length);
4397
 
}
4398
 
#endif /* !UNIV_HOTBACKUP */
4399
 
 
4400
 
/**********************************************************************//**
4401
 
Reorganize and compress a page.  This is a low-level operation for
4402
 
compressed pages, to be used when page_zip_compress() fails.
4403
 
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
4404
 
The function btr_page_reorganize() should be preferred whenever possible.
4405
 
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
4406
 
non-clustered index, the caller must update the insert buffer free
4407
 
bits in the same mini-transaction in such a way that the modification
4408
 
will be redo-logged.
4409
 
@return TRUE on success, FALSE on failure; page_zip will be left
4410
 
intact on failure, but page will be overwritten. */
4411
 
UNIV_INTERN
4412
 
ibool
4413
 
page_zip_reorganize(
4414
 
/*================*/
4415
 
        buf_block_t*    block,  /*!< in/out: page with compressed page;
4416
 
                                on the compressed page, in: size;
4417
 
                                out: data, n_blobs,
4418
 
                                m_start, m_end, m_nonempty */
4419
 
        dict_index_t*   index,  /*!< in: index of the B-tree node */
4420
 
        mtr_t*          mtr)    /*!< in: mini-transaction */
4421
 
{
4422
 
        buf_pool_t*     buf_pool        = buf_pool_from_block(block);
4423
 
        page_zip_des_t* page_zip        = buf_block_get_page_zip(block);
4424
 
        page_t*         page            = buf_block_get_frame(block);
4425
 
        buf_block_t*    temp_block;
4426
 
        page_t*         temp_page;
4427
 
        ulint           log_mode;
4428
 
 
4429
 
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4430
 
        ut_ad(page_is_comp(page));
4431
 
        ut_ad(!dict_index_is_ibuf(index));
4432
 
        /* Note that page_zip_validate(page_zip, page) may fail here. */
4433
 
        UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
4434
 
        UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4435
 
 
4436
 
        /* Disable logging */
4437
 
        log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
4438
 
 
4439
 
#ifndef UNIV_HOTBACKUP
4440
 
        temp_block = buf_block_alloc(buf_pool, 0);
4441
 
        btr_search_drop_page_hash_index(block);
4442
 
        block->check_index_page_at_flush = TRUE;
4443
 
#else /* !UNIV_HOTBACKUP */
4444
 
        ut_ad(block == back_block1);
4445
 
        temp_block = back_block2;
4446
 
#endif /* !UNIV_HOTBACKUP */
4447
 
        temp_page = temp_block->frame;
4448
 
 
4449
 
        /* Copy the old page to temporary space */
4450
 
        buf_frame_copy(temp_page, page);
4451
 
 
4452
 
        /* Recreate the page: note that global data on page (possible
4453
 
        segment headers, next page-field, etc.) is preserved intact */
4454
 
 
4455
 
        page_create(block, mtr, TRUE);
4456
 
 
4457
 
        /* Copy the records from the temporary space to the recreated page;
4458
 
        do not copy the lock bits yet */
4459
 
 
4460
 
        page_copy_rec_list_end_no_locks(block, temp_block,
4461
 
                                        page_get_infimum_rec(temp_page),
4462
 
                                        index, mtr);
4463
 
 
4464
 
        if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
4465
 
                /* Copy max trx id to recreated page */
4466
 
                trx_id_t        max_trx_id = page_get_max_trx_id(temp_page);
4467
 
                page_set_max_trx_id(block, NULL, max_trx_id, NULL);
4468
 
                ut_ad(max_trx_id != 0);
4469
 
        }
4470
 
 
4471
 
        /* Restore logging. */
4472
 
        mtr_set_log_mode(mtr, log_mode);
4473
 
 
4474
 
        if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
4475
 
 
4476
 
#ifndef UNIV_HOTBACKUP
4477
 
                buf_block_free(temp_block);
4478
 
#endif /* !UNIV_HOTBACKUP */
4479
 
                return(FALSE);
4480
 
        }
4481
 
 
4482
 
        lock_move_reorganize_page(block, temp_block);
4483
 
 
4484
 
#ifndef UNIV_HOTBACKUP
4485
 
        buf_block_free(temp_block);
4486
 
#endif /* !UNIV_HOTBACKUP */
4487
 
        return(TRUE);
4488
 
}
4489
 
 
4490
 
#ifndef UNIV_HOTBACKUP
4491
 
/**********************************************************************//**
4492
 
Copy the records of a page byte for byte.  Do not copy the page header
4493
 
or trailer, except those B-tree header fields that are directly
4494
 
related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
4495
 
NOTE: The caller must update the lock table and the adaptive hash index. */
4496
 
UNIV_INTERN
4497
 
void
4498
 
page_zip_copy_recs(
4499
 
/*===============*/
4500
 
        page_zip_des_t*         page_zip,       /*!< out: copy of src_zip
4501
 
                                                (n_blobs, m_start, m_end,
4502
 
                                                m_nonempty, data[0..size-1]) */
4503
 
        page_t*                 page,           /*!< out: copy of src */
4504
 
        const page_zip_des_t*   src_zip,        /*!< in: compressed page */
4505
 
        const page_t*           src,            /*!< in: page */
4506
 
        dict_index_t*           index,          /*!< in: index of the B-tree */
4507
 
        mtr_t*                  mtr)            /*!< in: mini-transaction */
4508
 
{
4509
 
        ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
4510
 
        ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
4511
 
        ut_ad(!dict_index_is_ibuf(index));
4512
 
#ifdef UNIV_ZIP_DEBUG
4513
 
        /* The B-tree operations that call this function may set
4514
 
        FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
4515
 
        mismatch.  A strict page_zip_validate() will be executed later
4516
 
        during the B-tree operations. */
4517
 
        ut_a(page_zip_validate_low(src_zip, src, TRUE));
4518
 
#endif /* UNIV_ZIP_DEBUG */
4519
 
        ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
4520
 
        if (UNIV_UNLIKELY(src_zip->n_blobs)) {
4521
 
                ut_a(page_is_leaf(src));
4522
 
                ut_a(dict_index_is_clust(index));
4523
 
        }
4524
 
 
4525
 
        /* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
4526
 
        indexes.  It does not matter on other pages. */
4527
 
        ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
4528
 
             || page_get_max_trx_id(src));
4529
 
 
4530
 
        UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
4531
 
        UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
4532
 
        UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
4533
 
        UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
4534
 
 
4535
 
        /* Copy those B-tree page header fields that are related to
4536
 
        the records stored in the page.  Also copy the field
4537
 
        PAGE_MAX_TRX_ID.  Skip the rest of the page header and
4538
 
        trailer.  On the compressed page, there is no trailer. */
4539
 
#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
4540
 
# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
4541
 
#endif
4542
 
        memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
4543
 
               PAGE_HEADER_PRIV_END);
4544
 
        memcpy(PAGE_DATA + page, PAGE_DATA + src,
4545
 
               UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
4546
 
        memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
4547
 
               PAGE_HEADER_PRIV_END);
4548
 
        memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
4549
 
               page_zip_get_size(page_zip) - PAGE_DATA);
4550
 
 
4551
 
        /* Copy all fields of src_zip to page_zip, except the pointer
4552
 
        to the compressed data page. */
4553
 
        {
4554
 
                page_zip_t*     data = page_zip->data;
4555
 
                memcpy(page_zip, src_zip, sizeof *page_zip);
4556
 
                page_zip->data = data;
4557
 
        }
4558
 
        ut_ad(page_zip_get_trailer_len(page_zip,
4559
 
                                       dict_index_is_clust(index), NULL)
4560
 
              + page_zip->m_end < page_zip_get_size(page_zip));
4561
 
 
4562
 
        if (!page_is_leaf(src)
4563
 
            && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
4564
 
            && UNIV_LIKELY(mach_read_from_4(page
4565
 
                                            + FIL_PAGE_PREV) != FIL_NULL)) {
4566
 
                /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
4567
 
                ulint   offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
4568
 
                                                 TRUE);
4569
 
                if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
4570
 
                        rec_t*  rec = page + offs;
4571
 
                        ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
4572
 
                             & REC_INFO_MIN_REC_FLAG);
4573
 
                        rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
4574
 
                }
4575
 
        }
4576
 
 
4577
 
#ifdef UNIV_ZIP_DEBUG
4578
 
        ut_a(page_zip_validate(page_zip, page));
4579
 
#endif /* UNIV_ZIP_DEBUG */
4580
 
 
4581
 
        page_zip_compress_write_log(page_zip, page, index, mtr);
4582
 
}
4583
 
#endif /* !UNIV_HOTBACKUP */
4584
 
 
4585
 
/**********************************************************************//**
4586
 
Parses a log record of compressing an index page.
4587
 
@return end of log record or NULL */
4588
 
UNIV_INTERN
4589
 
byte*
4590
 
page_zip_parse_compress(
4591
 
/*====================*/
4592
 
        byte*           ptr,    /*!< in: buffer */
4593
 
        byte*           end_ptr,/*!< in: buffer end */
4594
 
        page_t*         page,   /*!< out: uncompressed page */
4595
 
        page_zip_des_t* page_zip)/*!< out: compressed page */
4596
 
{
4597
 
        ulint   size;
4598
 
        ulint   trailer_size;
4599
 
 
4600
 
        ut_ad(ptr && end_ptr);
4601
 
        ut_ad(!page == !page_zip);
4602
 
 
4603
 
        if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
4604
 
 
4605
 
                return(NULL);
4606
 
        }
4607
 
 
4608
 
        size = mach_read_from_2(ptr);
4609
 
        ptr += 2;
4610
 
        trailer_size = mach_read_from_2(ptr);
4611
 
        ptr += 2;
4612
 
 
4613
 
        if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
4614
 
 
4615
 
                return(NULL);
4616
 
        }
4617
 
 
4618
 
        if (page) {
4619
 
                if (UNIV_UNLIKELY(!page_zip)
4620
 
                    || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
4621
 
corrupt:
4622
 
                        recv_sys->found_corrupt_log = TRUE;
4623
 
 
4624
 
                        return(NULL);
4625
 
                }
4626
 
 
4627
 
                memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
4628
 
                memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
4629
 
                memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
4630
 
                memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
4631
 
                       page_zip_get_size(page_zip) - trailer_size
4632
 
                       - (FIL_PAGE_TYPE + size));
4633
 
                memcpy(page_zip->data + page_zip_get_size(page_zip)
4634
 
                       - trailer_size, ptr + 8 + size, trailer_size);
4635
 
 
4636
 
                if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page,
4637
 
                                                       TRUE))) {
4638
 
 
4639
 
                        goto corrupt;
4640
 
                }
4641
 
        }
4642
 
 
4643
 
        return(ptr + 8 + size + trailer_size);
4644
 
}
4645
 
 
4646
 
/**********************************************************************//**
4647
 
Calculate the compressed page checksum.
4648
 
@return page checksum */
4649
 
UNIV_INTERN
4650
 
ulint
4651
 
page_zip_calc_checksum(
4652
 
/*===================*/
4653
 
        const void*     data,   /*!< in: compressed page */
4654
 
        ulint           size)   /*!< in: size of compressed page */
4655
 
{
4656
 
        /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
4657
 
        and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
4658
 
 
4659
 
        const Bytef*    s       = static_cast<const Bytef *>(data);
4660
 
        uLong           adler;
4661
 
 
4662
 
        ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4663
 
 
4664
 
        adler = adler32(0L, s + FIL_PAGE_OFFSET,
4665
 
                        FIL_PAGE_LSN - FIL_PAGE_OFFSET);
4666
 
        adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
4667
 
        adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
4668
 
                        size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4669
 
 
4670
 
        return((ulint) adler);
4671
 
}