1
/*****************************************************************************
3
Copyright (C) 2005, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
21
Compressed page interface
23
Created June 2005 by Marko Makela
24
*******************************************************/
29
# include "page0zip.ic"
32
#include "page0page.h"
35
#include "dict0dict.h"
37
#include "page0types.h"
40
#ifndef UNIV_HOTBACKUP
43
# include "dict0boot.h"
44
# include "lock0lock.h"
45
#else /* !UNIV_HOTBACKUP */
46
# define lock_move_reorganize_page(block, temp_block) ((void) 0)
47
# define buf_LRU_stat_inc_unzip() ((void) 0)
48
#endif /* !UNIV_HOTBACKUP */
50
#ifndef UNIV_HOTBACKUP
51
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
52
UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
53
#endif /* !UNIV_HOTBACKUP */
55
/* Please refer to ../include/page0zip.ic for a description of the
56
compressed page format. */
58
/* The infimum and supremum records are omitted from the compressed page.
59
On compress, we compare that the records are there, and on uncompress we
60
restore the records. */
61
/** Extra bytes of an infimum record */
62
static const byte infimum_extra[] = {
63
0x01, /* info_bits=0, n_owned=1 */
64
0x00, 0x02 /* heap_no=0, status=2 */
65
/* ?, ? */ /* next=(first user rec, or supremum) */
67
/** Data bytes of an infimum record */
68
static const byte infimum_data[] = {
69
0x69, 0x6e, 0x66, 0x69,
70
0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
72
/** Extra bytes and data bytes of a supremum record */
73
static const byte supremum_extra_data[] = {
74
/* 0x0?, */ /* info_bits=0, n_owned=1..8 */
75
0x00, 0x0b, /* heap_no=1, status=3 */
76
0x00, 0x00, /* next=0 */
77
0x73, 0x75, 0x70, 0x72,
78
0x65, 0x6d, 0x75, 0x6d /* "supremum" */
81
/** Assert that a block of memory is filled with zero bytes.
82
Compare at most sizeof(field_ref_zero) bytes.
83
@param b in: memory block
84
@param s in: size of the memory block, in bytes */
85
#define ASSERT_ZERO(b, s) \
86
ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
87
/** Assert that a BLOB pointer is filled with zero bytes.
88
@param b in: BLOB pointer */
89
#define ASSERT_ZERO_BLOB(b) \
90
ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
92
/* Enable some extra debugging output. This code can be enabled
93
independently of any UNIV_ debugging conditions. */
94
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
96
__attribute__((format (printf, 1, 2)))
97
/**********************************************************************//**
98
Report a failure to decompress or compress.
99
@return number of characters printed */
104
const char* fmt, /*!< in: printf(3) format string */
105
...) /*!< in: arguments corresponding to fmt */
110
ut_print_timestamp(stderr);
111
fputs(" InnoDB: ", stderr);
113
res = vfprintf(stderr, fmt, ap);
118
/** Wrapper for page_zip_fail_func()
119
@param fmt_args in: printf(3) format string and arguments */
120
# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
121
#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
122
/** Dummy wrapper for page_zip_fail_func()
123
@param fmt_args ignored: printf(3) format string and arguments */
124
# define page_zip_fail(fmt_args) /* empty */
125
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
127
#ifndef UNIV_HOTBACKUP
128
/**********************************************************************//**
129
Determine the guaranteed free space on an empty page.
130
@return minimum payload size on the page */
135
ulint n_fields, /*!< in: number of columns in the index */
136
ulint zip_size) /*!< in: compressed page size in bytes */
139
/* subtract the page header and the longest
140
uncompressed data needed for one record */
142
+ PAGE_ZIP_DIR_SLOT_SIZE
143
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
144
+ 1/* encoded heap_no==2 in page_zip_write_rec() */
145
+ 1/* end of modification log */
146
- REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
147
/* subtract the space for page_zip_fields_encode() */
148
- compressBound(2 * (n_fields + 1));
149
return(size > 0 ? (ulint) size : 0);
151
#endif /* !UNIV_HOTBACKUP */
153
/*************************************************************//**
154
Gets the size of the compressed page trailer (the dense page directory),
155
including deleted records (the free list).
156
@return length of dense page directory, in bytes */
161
const page_zip_des_t* page_zip) /*!< in: compressed page */
163
/* Exclude the page infimum and supremum from the record count. */
164
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
165
* (page_dir_get_n_heap(page_zip->data)
166
- PAGE_HEAP_NO_USER_LOW);
170
/*************************************************************//**
171
Gets the size of the compressed page trailer (the dense page directory),
172
only including user records (excluding the free list).
173
@return length of dense page directory comprising existing records, in bytes */
176
page_zip_dir_user_size(
177
/*===================*/
178
const page_zip_des_t* page_zip) /*!< in: compressed page */
180
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
181
* page_get_n_recs(page_zip->data);
182
ut_ad(size <= page_zip_dir_size(page_zip));
186
/*************************************************************//**
187
Find the slot of the given record in the dense page directory.
188
@return dense directory slot, or NULL if record not found */
191
page_zip_dir_find_low(
192
/*==================*/
193
byte* slot, /*!< in: start of records */
194
byte* end, /*!< in: end of records */
195
ulint offset) /*!< in: offset of user record */
199
for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
200
if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
209
/*************************************************************//**
210
Find the slot of the given non-free record in the dense page directory.
211
@return dense directory slot, or NULL if record not found */
216
page_zip_des_t* page_zip, /*!< in: compressed page */
217
ulint offset) /*!< in: offset of user record */
219
byte* end = page_zip->data + page_zip_get_size(page_zip);
221
ut_ad(page_zip_simple_validate(page_zip));
223
return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
228
/*************************************************************//**
229
Find the slot of the given free record in the dense page directory.
230
@return dense directory slot, or NULL if record not found */
233
page_zip_dir_find_free(
234
/*===================*/
235
page_zip_des_t* page_zip, /*!< in: compressed page */
236
ulint offset) /*!< in: offset of user record */
238
byte* end = page_zip->data + page_zip_get_size(page_zip);
240
ut_ad(page_zip_simple_validate(page_zip));
242
return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
243
end - page_zip_dir_user_size(page_zip),
247
/*************************************************************//**
248
Read a given slot in the dense page directory.
249
@return record offset on the uncompressed page, possibly ORed with
250
PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */
255
const page_zip_des_t* page_zip, /*!< in: compressed page */
256
ulint slot) /*!< in: slot
257
(0=first user record) */
259
ut_ad(page_zip_simple_validate(page_zip));
260
ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
261
return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
262
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
265
#ifndef UNIV_HOTBACKUP
266
/**********************************************************************//**
267
Write a log record of compressing an index page. */
270
page_zip_compress_write_log(
271
/*========================*/
272
const page_zip_des_t* page_zip,/*!< in: compressed page */
273
const page_t* page, /*!< in: uncompressed page */
274
dict_index_t* index, /*!< in: index of the B-tree node */
275
mtr_t* mtr) /*!< in: mini-transaction */
280
ut_ad(!dict_index_is_ibuf(index));
282
log_ptr = mlog_open(mtr, 11 + 2 + 2);
289
/* Read the number of user records. */
290
trailer_size = page_dir_get_n_heap(page_zip->data)
291
- PAGE_HEAP_NO_USER_LOW;
292
/* Multiply by uncompressed of size stored per record */
293
if (!page_is_leaf(page)) {
294
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
295
} else if (dict_index_is_clust(index)) {
296
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
297
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
299
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
301
/* Add the space occupied by BLOB pointers. */
302
trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
303
ut_a(page_zip->m_end > PAGE_DATA);
304
#if FIL_PAGE_DATA > PAGE_DATA
305
# error "FIL_PAGE_DATA > PAGE_DATA"
307
ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
309
log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
310
MLOG_ZIP_PAGE_COMPRESS,
312
mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
314
mach_write_to_2(log_ptr, trailer_size);
316
mlog_close(mtr, log_ptr);
318
/* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
319
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
320
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
321
/* Write most of the page header, the compressed stream and
322
the modification log. */
323
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
324
page_zip->m_end - FIL_PAGE_TYPE);
325
/* Write the uncompressed trailer of the compressed page. */
326
mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
327
- trailer_size, trailer_size);
329
#endif /* !UNIV_HOTBACKUP */
331
/******************************************************//**
332
Determine how many externally stored columns are contained
333
in existing records with smaller heap_no than rec. */
336
page_zip_get_n_prev_extern(
337
/*=======================*/
338
const page_zip_des_t* page_zip,/*!< in: dense page directory on
340
const rec_t* rec, /*!< in: compact physical record
341
on a B-tree leaf page */
342
dict_index_t* index) /*!< in: record descriptor */
344
const page_t* page = page_align(rec);
349
ulint n_recs = page_get_n_recs(page_zip->data);
351
ut_ad(page_is_leaf(page));
352
ut_ad(page_is_comp(page));
353
ut_ad(dict_table_is_comp(index->table));
354
ut_ad(dict_index_is_clust(index));
355
ut_ad(!dict_index_is_ibuf(index));
357
heap_no = rec_get_heap_no_new(rec);
358
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
359
left = heap_no - PAGE_HEAP_NO_USER_LOW;
360
if (UNIV_UNLIKELY(!left)) {
364
for (i = 0; i < n_recs; i++) {
365
const rec_t* r = page + (page_zip_dir_get(page_zip, i)
366
& PAGE_ZIP_DIR_SLOT_MASK);
368
if (rec_get_heap_no_new(r) < heap_no) {
369
n_ext += rec_get_n_extern_new(r, index,
380
/**********************************************************************//**
381
Encode the length of a fixed-length column.
382
@return buf + length of encoded val */
385
page_zip_fixed_field_encode(
386
/*========================*/
387
byte* buf, /*!< in: pointer to buffer where to write */
388
ulint val) /*!< in: value to write */
392
if (UNIV_LIKELY(val < 126)) {
394
0 = nullable variable field of at most 255 bytes length;
395
1 = not null variable field of at most 255 bytes length;
396
126 = nullable variable field with maximum length >255;
397
127 = not null variable field with maximum length >255
401
*buf++ = (byte) (0x80 | val >> 8);
408
/**********************************************************************//**
409
Write the index information for the compressed page.
410
@return used size of buf */
413
page_zip_fields_encode(
414
/*===================*/
415
ulint n, /*!< in: number of fields to compress */
416
dict_index_t* index, /*!< in: index comprising at least n fields */
417
ulint trx_id_pos,/*!< in: position of the trx_id column
418
in the index, or ULINT_UNDEFINED if
419
this is a non-leaf page */
420
byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */
422
const byte* buf_start = buf;
425
ulint trx_id_col = 0;
426
/* sum of lengths of preceding non-nullable fixed fields, or 0 */
429
ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
431
for (i = col = 0; i < n; i++) {
432
dict_field_t* field = dict_index_get_nth_field(index, i);
435
if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
436
val = 1; /* set the "not nullable" flag */
438
val = 0; /* nullable field */
441
if (!field->fixed_len) {
442
/* variable-length field */
443
const dict_col_t* column
444
= dict_field_get_col(field);
446
if (UNIV_UNLIKELY(column->len > 255)
447
|| UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
448
val |= 0x7e; /* max > 255 bytes */
452
/* write out the length of any
453
preceding non-nullable fields */
454
buf = page_zip_fixed_field_encode(
455
buf, fixed_sum << 1 | 1);
463
/* fixed-length non-nullable field */
465
if (fixed_sum && UNIV_UNLIKELY
466
(fixed_sum + field->fixed_len
467
> DICT_MAX_INDEX_COL_LEN)) {
468
/* Write out the length of the
469
preceding non-nullable fields,
470
to avoid exceeding the maximum
471
length of a fixed-length column. */
472
buf = page_zip_fixed_field_encode(
473
buf, fixed_sum << 1 | 1);
478
if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
480
/* Write out the length of any
481
preceding non-nullable fields,
482
and start a new trx_id column. */
483
buf = page_zip_fixed_field_encode(
484
buf, fixed_sum << 1 | 1);
489
fixed_sum = field->fixed_len;
492
fixed_sum += field->fixed_len;
495
/* fixed-length nullable field */
498
/* write out the length of any
499
preceding non-nullable fields */
500
buf = page_zip_fixed_field_encode(
501
buf, fixed_sum << 1 | 1);
506
buf = page_zip_fixed_field_encode(
507
buf, field->fixed_len << 1);
513
/* Write out the lengths of last fixed-length columns. */
514
buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
517
if (trx_id_pos != ULINT_UNDEFINED) {
518
/* Write out the position of the trx_id column */
521
/* Write out the number of nullable fields */
522
i = index->n_nullable;
528
*buf++ = (byte) (0x80 | i >> 8);
532
ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
533
return((ulint) (buf - buf_start));
536
/**********************************************************************//**
537
Populate the dense page directory from the sparse directory. */
542
const page_t* page, /*!< in: compact page */
543
byte* buf, /*!< in: pointer to dense page directory[-1];
544
out: dense directory on compressed page */
545
const rec_t** recs) /*!< in: pointer to an array of 0, or NULL;
546
out: dense page directory sorted by ascending
547
address (and heap_no) */
559
if (page_is_leaf(page)) {
560
status = REC_STATUS_ORDINARY;
562
status = REC_STATUS_NODE_PTR;
564
(mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
565
min_mark = REC_INFO_MIN_REC_FLAG;
569
n_heap = page_dir_get_n_heap(page);
571
/* Traverse the list of stored records in the collation order,
572
starting from the first user record. */
574
rec = page + PAGE_NEW_INFIMUM;
580
offs = rec_get_next_offs(rec, TRUE);
581
if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
585
heap_no = rec_get_heap_no_new(rec);
586
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
587
ut_a(heap_no < n_heap);
588
ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
589
ut_a(offs >= PAGE_ZIP_START);
590
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
591
# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
593
#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
594
# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
596
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
597
offs |= PAGE_ZIP_DIR_SLOT_OWNED;
600
info_bits = rec_get_info_bits(rec, TRUE);
601
if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
602
info_bits &= ~REC_INFO_DELETED_FLAG;
603
offs |= PAGE_ZIP_DIR_SLOT_DEL;
605
ut_a(info_bits == min_mark);
606
/* Only the smallest user record can have
607
REC_INFO_MIN_REC_FLAG set. */
610
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
612
if (UNIV_LIKELY_NULL(recs)) {
613
/* Ensure that each heap_no occurs at most once. */
614
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
615
/* exclude infimum and supremum */
616
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
619
ut_a(rec_get_status(rec) == status);
622
offs = page_header_get_field(page, PAGE_FREE);
624
/* Traverse the free list (of deleted records). */
626
ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
629
heap_no = rec_get_heap_no_new(rec);
630
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
631
ut_a(heap_no < n_heap);
633
ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
634
ut_a(rec_get_status(rec) == status);
636
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
638
if (UNIV_LIKELY_NULL(recs)) {
639
/* Ensure that each heap_no occurs at most once. */
640
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
641
/* exclude infimum and supremum */
642
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
645
offs = rec_get_next_offs(rec, TRUE);
648
/* Ensure that each heap no occurs at least once. */
649
ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
652
/**********************************************************************//**
653
Allocate memory for zlib. */
654
extern "C" void* page_zip_malloc(void* opaque, uInt items, uInt size);
656
extern "C" void* page_zip_malloc
659
void* opaque, /*!< in/out: memory heap */
660
uInt items, /*!< in: number of items to allocate */
661
uInt size) /*!< in: size of an item in bytes */
663
return(mem_heap_alloc(static_cast<mem_block_info_t *>(opaque), items * size));
666
/**********************************************************************//**
667
Deallocate memory for zlib. */
668
extern "C" void page_zip_free(void *opaque, void *address);
670
extern "C" void page_zip_free(void *, void *)
673
/**********************************************************************//**
674
Configure the zlib allocator to use the given memory heap. */
679
void* stream, /*!< in/out: zlib stream */
680
mem_heap_t* heap) /*!< in: memory heap to use */
682
z_stream* strm = static_cast<z_stream *>(stream);
684
strm->zalloc = page_zip_malloc;
685
strm->zfree = page_zip_free;
689
#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
690
/** Symbol for enabling compression and decompression diagnostics */
691
# define PAGE_ZIP_COMPRESS_DBG
694
#ifdef PAGE_ZIP_COMPRESS_DBG
695
/** Set this variable in a debugger to enable
696
excessive logging in page_zip_compress(). */
697
UNIV_INTERN ibool page_zip_compress_dbg;
698
/** Set this variable in a debugger to enable
699
binary logging of the data passed to deflate().
700
When this variable is nonzero, it will act
701
as a log file name generator. */
702
UNIV_INTERN unsigned page_zip_compress_log;
704
/**********************************************************************//**
705
Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set.
706
@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
709
page_zip_compress_deflate(
710
/*======================*/
711
FILE* logfile,/*!< in: log file, or NULL */
712
z_streamp strm, /*!< in/out: compressed stream for deflate() */
713
int flush) /*!< in: deflate() flushing method */
716
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
717
ut_print_buf(stderr, strm->next_in, strm->avail_in);
719
if (UNIV_LIKELY_NULL(logfile)) {
720
fwrite(strm->next_in, 1, strm->avail_in, logfile);
722
status = deflate(strm, flush);
723
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
724
fprintf(stderr, " -> %d\n", status);
729
/* Redefine deflate(). */
731
/** Debug wrapper for the zlib compression routine deflate().
732
Log the operation if page_zip_compress_dbg is set.
733
@param strm in/out: compressed stream
734
@param flush in: flushing method
735
@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
736
# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
737
/** Declaration of the logfile parameter */
738
# define FILE_LOGFILE FILE* logfile,
739
/** The logfile parameter */
740
# define LOGFILE logfile,
741
#else /* PAGE_ZIP_COMPRESS_DBG */
742
/** Empty declaration of the logfile parameter */
743
# define FILE_LOGFILE
744
/** Missing logfile parameter */
746
#endif /* PAGE_ZIP_COMPRESS_DBG */
748
/**********************************************************************//**
749
Compress the records of a node pointer page.
750
@return Z_OK, or a zlib error code */
753
page_zip_compress_node_ptrs(
754
/*========================*/
756
z_stream* c_stream, /*!< in/out: compressed page stream */
757
const rec_t** recs, /*!< in: dense page directory
759
ulint n_dense, /*!< in: size of recs[] */
760
dict_index_t* index, /*!< in: the index of the page */
761
byte* storage, /*!< in: end of dense page directory */
762
mem_heap_t* heap) /*!< in: temporary memory heap */
765
ulint* offsets = NULL;
768
const rec_t* rec = *recs++;
770
offsets = rec_get_offsets(rec, index, offsets,
771
ULINT_UNDEFINED, &heap);
772
/* Only leaf nodes may contain externally stored columns. */
773
ut_ad(!rec_offs_any_extern(offsets));
775
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
776
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
777
rec_offs_extra_size(offsets));
779
/* Compress the extra bytes. */
780
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
783
if (c_stream->avail_in) {
784
err = deflate(c_stream, Z_NO_FLUSH);
785
if (UNIV_UNLIKELY(err != Z_OK)) {
789
ut_ad(!c_stream->avail_in);
791
/* Compress the data bytes, except node_ptr. */
792
c_stream->next_in = (byte*) rec;
793
c_stream->avail_in = rec_offs_data_size(offsets)
795
ut_ad(c_stream->avail_in);
797
err = deflate(c_stream, Z_NO_FLUSH);
798
if (UNIV_UNLIKELY(err != Z_OK)) {
802
ut_ad(!c_stream->avail_in);
804
memcpy(storage - REC_NODE_PTR_SIZE
805
* (rec_get_heap_no_new(rec) - 1),
806
c_stream->next_in, REC_NODE_PTR_SIZE);
807
c_stream->next_in += REC_NODE_PTR_SIZE;
813
/**********************************************************************//**
814
Compress the records of a leaf node of a secondary index.
815
@return Z_OK, or a zlib error code */
818
page_zip_compress_sec(
819
/*==================*/
821
z_stream* c_stream, /*!< in/out: compressed page stream */
822
const rec_t** recs, /*!< in: dense page directory
824
ulint n_dense) /*!< in: size of recs[] */
831
const rec_t* rec = *recs++;
833
/* Compress everything up to this record. */
834
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
837
if (UNIV_LIKELY(c_stream->avail_in)) {
838
UNIV_MEM_ASSERT_RW(c_stream->next_in,
840
err = deflate(c_stream, Z_NO_FLUSH);
841
if (UNIV_UNLIKELY(err != Z_OK)) {
846
ut_ad(!c_stream->avail_in);
847
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
849
/* Skip the REC_N_NEW_EXTRA_BYTES. */
851
c_stream->next_in = (byte*) rec;
857
/**********************************************************************//**
858
Compress a record of a leaf node of a clustered index that contains
859
externally stored columns.
860
@return Z_OK, or a zlib error code */
863
page_zip_compress_clust_ext(
864
/*========================*/
866
z_stream* c_stream, /*!< in/out: compressed page stream */
867
const rec_t* rec, /*!< in: record */
868
const ulint* offsets, /*!< in: rec_get_offsets(rec) */
869
ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
870
byte* deleted, /*!< in: dense directory entry pointing
871
to the head of the free list */
872
byte* storage, /*!< in: end of dense page directory */
873
byte** externs, /*!< in/out: pointer to the next
874
available BLOB pointer */
875
ulint* n_blobs) /*!< in/out: number of
876
externally stored columns */
881
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
882
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
883
rec_offs_extra_size(offsets));
885
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
889
if (UNIV_UNLIKELY(i == trx_id_col)) {
890
ut_ad(!rec_offs_nth_extern(offsets, i));
891
/* Store trx_id and roll_ptr
892
in uncompressed form. */
893
src = rec_get_nth_field(rec, offsets, i, &len);
894
ut_ad(src + DATA_TRX_ID_LEN
895
== rec_get_nth_field(rec, offsets,
897
ut_ad(len == DATA_ROLL_PTR_LEN);
899
/* Compress any preceding bytes. */
901
= src - c_stream->next_in;
903
if (c_stream->avail_in) {
904
err = deflate(c_stream, Z_NO_FLUSH);
905
if (UNIV_UNLIKELY(err != Z_OK)) {
911
ut_ad(!c_stream->avail_in);
912
ut_ad(c_stream->next_in == src);
915
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
916
* (rec_get_heap_no_new(rec) - 1),
918
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
921
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
923
/* Skip also roll_ptr */
925
} else if (rec_offs_nth_extern(offsets, i)) {
926
src = rec_get_nth_field(rec, offsets, i, &len);
927
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
928
src += len - BTR_EXTERN_FIELD_REF_SIZE;
930
c_stream->avail_in = src
932
if (UNIV_LIKELY(c_stream->avail_in)) {
933
err = deflate(c_stream, Z_NO_FLUSH);
934
if (UNIV_UNLIKELY(err != Z_OK)) {
940
ut_ad(!c_stream->avail_in);
941
ut_ad(c_stream->next_in == src);
943
/* Reserve space for the data at
944
the end of the space reserved for
945
the compressed data and the page
950
<= BTR_EXTERN_FIELD_REF_SIZE)) {
955
ut_ad(*externs == c_stream->next_out
956
+ c_stream->avail_out
957
+ 1/* end of modif. log */);
960
+= BTR_EXTERN_FIELD_REF_SIZE;
962
/* Skip deleted records. */
964
(page_zip_dir_find_low(
966
page_offset(rec)))) {
972
-= BTR_EXTERN_FIELD_REF_SIZE;
973
*externs -= BTR_EXTERN_FIELD_REF_SIZE;
975
/* Copy the BLOB pointer */
976
memcpy(*externs, c_stream->next_in
977
- BTR_EXTERN_FIELD_REF_SIZE,
978
BTR_EXTERN_FIELD_REF_SIZE);
985
/**********************************************************************//**
986
Compress the records of a leaf node of a clustered index.
987
@return Z_OK, or a zlib error code */
990
page_zip_compress_clust(
991
/*====================*/
993
z_stream* c_stream, /*!< in/out: compressed page stream */
994
const rec_t** recs, /*!< in: dense page directory
996
ulint n_dense, /*!< in: size of recs[] */
997
dict_index_t* index, /*!< in: the index of the page */
998
ulint* n_blobs, /*!< in: 0; out: number of
999
externally stored columns */
1000
ulint trx_id_col, /*!< index of the trx_id column */
1001
byte* deleted, /*!< in: dense directory entry pointing
1002
to the head of the free list */
1003
byte* storage, /*!< in: end of dense page directory */
1004
mem_heap_t* heap) /*!< in: temporary memory heap */
1007
ulint* offsets = NULL;
1008
/* BTR_EXTERN_FIELD_REF storage */
1009
byte* externs = storage - n_dense
1010
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1012
ut_ad(*n_blobs == 0);
1015
const rec_t* rec = *recs++;
1017
offsets = rec_get_offsets(rec, index, offsets,
1018
ULINT_UNDEFINED, &heap);
1019
ut_ad(rec_offs_n_fields(offsets)
1020
== dict_index_get_n_fields(index));
1021
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1022
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1023
rec_offs_extra_size(offsets));
1025
/* Compress the extra bytes. */
1026
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
1027
- c_stream->next_in;
1029
if (c_stream->avail_in) {
1030
err = deflate(c_stream, Z_NO_FLUSH);
1031
if (UNIV_UNLIKELY(err != Z_OK)) {
1036
ut_ad(!c_stream->avail_in);
1037
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
1039
/* Compress the data bytes. */
1041
c_stream->next_in = (byte*) rec;
1043
/* Check if there are any externally stored columns.
1044
For each externally stored column, store the
1045
BTR_EXTERN_FIELD_REF separately. */
1046
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1047
ut_ad(dict_index_is_clust(index));
1049
err = page_zip_compress_clust_ext(
1051
c_stream, rec, offsets, trx_id_col,
1052
deleted, storage, &externs, n_blobs);
1054
if (UNIV_UNLIKELY(err != Z_OK)) {
1062
/* Store trx_id and roll_ptr in uncompressed form. */
1063
src = rec_get_nth_field(rec, offsets,
1065
ut_ad(src + DATA_TRX_ID_LEN
1066
== rec_get_nth_field(rec, offsets,
1067
trx_id_col + 1, &len));
1068
ut_ad(len == DATA_ROLL_PTR_LEN);
1069
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1070
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1071
rec_offs_extra_size(offsets));
1073
/* Compress any preceding bytes. */
1074
c_stream->avail_in = src - c_stream->next_in;
1076
if (c_stream->avail_in) {
1077
err = deflate(c_stream, Z_NO_FLUSH);
1078
if (UNIV_UNLIKELY(err != Z_OK)) {
1084
ut_ad(!c_stream->avail_in);
1085
ut_ad(c_stream->next_in == src);
1088
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1089
* (rec_get_heap_no_new(rec) - 1),
1091
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1094
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1096
/* Skip also roll_ptr */
1097
ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
1100
/* Compress the last bytes of the record. */
1101
c_stream->avail_in = rec + rec_offs_data_size(offsets)
1102
- c_stream->next_in;
1104
if (c_stream->avail_in) {
1105
err = deflate(c_stream, Z_NO_FLUSH);
1106
if (UNIV_UNLIKELY(err != Z_OK)) {
1111
ut_ad(!c_stream->avail_in);
1112
} while (--n_dense);
1118
/**********************************************************************//**
1120
@return TRUE on success, FALSE on failure; page_zip will be left
1121
intact on failure. */
1126
page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs,
1127
m_start, m_end, m_nonempty */
1128
const page_t* page, /*!< in: uncompressed page */
1129
dict_index_t* index, /*!< in: index of the B-tree node */
1130
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
1134
ulint n_fields;/* number of index fields needed */
1135
byte* fields; /*!< index field information */
1136
byte* buf; /*!< compressed payload of the page */
1137
byte* buf_end;/* end of buf */
1139
ulint slot_size;/* amount of uncompressed bytes per record */
1140
const rec_t** recs; /*!< dense page directory, sorted by address */
1143
ulint* offsets = NULL;
1145
byte* storage;/* storage of uncompressed columns */
1146
#ifndef UNIV_HOTBACKUP
1147
ullint usec = ut_time_us(NULL);
1148
#endif /* !UNIV_HOTBACKUP */
1149
#ifdef PAGE_ZIP_COMPRESS_DBG
1150
FILE* logfile = NULL;
1153
ut_a(page_is_comp(page));
1154
ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
1155
ut_ad(page_simple_validate_new((page_t*) page));
1156
ut_ad(page_zip_simple_validate(page_zip));
1157
ut_ad(dict_table_is_comp(index->table));
1158
ut_ad(!dict_index_is_ibuf(index));
1160
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1162
/* Check the data that will be omitted. */
1163
ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
1164
infimum_extra, sizeof infimum_extra));
1165
ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
1166
infimum_data, sizeof infimum_data));
1167
ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
1168
/* info_bits == 0, n_owned <= max */
1169
<= PAGE_DIR_SLOT_MAX_N_OWNED);
1170
ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
1171
supremum_extra_data, sizeof supremum_extra_data));
1173
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
1174
ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
1175
== PAGE_NEW_SUPREMUM);
1178
if (page_is_leaf(page)) {
1179
n_fields = dict_index_get_n_fields(index);
1181
n_fields = dict_index_get_n_unique_in_tree(index);
1184
/* The dense directory excludes the infimum and supremum records. */
1185
n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1186
#ifdef PAGE_ZIP_COMPRESS_DBG
1187
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
1188
fprintf(stderr, "compress %p %p %lu %lu %lu\n",
1189
(void*) page_zip, (void*) page,
1193
if (UNIV_UNLIKELY(page_zip_compress_log)) {
1194
/* Create a log file for every compression attempt. */
1195
char logfilename[9];
1196
ut_snprintf(logfilename, sizeof logfilename,
1197
"%08x", page_zip_compress_log++);
1198
logfile = fopen(logfilename, "wb");
1201
/* Write the uncompressed page to the log. */
1202
fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
1203
/* Record the compressed size as zero.
1204
This will be overwritten at successful exit. */
1211
#endif /* PAGE_ZIP_COMPRESS_DBG */
1212
#ifndef UNIV_HOTBACKUP
1213
page_zip_stat[page_zip->ssize - 1].compressed++;
1214
#endif /* !UNIV_HOTBACKUP */
1216
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
1217
>= page_zip_get_size(page_zip))) {
1222
heap = mem_heap_create(page_zip_get_size(page_zip)
1223
+ n_fields * (2 + sizeof *offsets)
1224
+ n_dense * ((sizeof *recs)
1225
- PAGE_ZIP_DIR_SLOT_SIZE)
1226
+ UNIV_PAGE_SIZE * 4
1227
+ (512 << MAX_MEM_LEVEL));
1229
recs = static_cast<const unsigned char **>(mem_heap_zalloc(heap, n_dense * sizeof *recs));
1231
fields = static_cast<byte *>(mem_heap_alloc(heap, (n_fields + 1) * 2));
1233
buf = static_cast<byte *>(mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA));
1234
buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
1236
/* Compress the data payload. */
1237
page_zip_set_alloc(&c_stream, heap);
1239
err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
1240
Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
1241
MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
1244
c_stream.next_out = buf;
1245
/* Subtract the space reserved for uncompressed data. */
1246
/* Page header and the end marker of the modification log */
1247
c_stream.avail_out = buf_end - buf - 1;
1248
/* Dense page directory and uncompressed columns, if any */
1249
if (page_is_leaf(page)) {
1250
if (dict_index_is_clust(index)) {
1251
trx_id_col = dict_index_get_sys_col_pos(
1252
index, DATA_TRX_ID);
1253
ut_ad(trx_id_col > 0);
1254
ut_ad(trx_id_col != ULINT_UNDEFINED);
1256
slot_size = PAGE_ZIP_DIR_SLOT_SIZE
1257
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1259
/* Signal the absence of trx_id
1260
in page_zip_fields_encode() */
1261
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
1262
== ULINT_UNDEFINED);
1264
slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
1267
slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
1268
trx_id_col = ULINT_UNDEFINED;
1271
if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
1272
+ 6/* sizeof(zlib header and footer) */)) {
1276
c_stream.avail_out -= n_dense * slot_size;
1277
c_stream.avail_in = page_zip_fields_encode(n_fields, index,
1278
trx_id_col, fields);
1279
c_stream.next_in = fields;
1280
if (UNIV_LIKELY(!trx_id_col)) {
1281
trx_id_col = ULINT_UNDEFINED;
1284
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1285
err = deflate(&c_stream, Z_FULL_FLUSH);
1290
ut_ad(!c_stream.avail_in);
1292
page_zip_dir_encode(page, buf_end, recs);
1294
c_stream.next_in = (byte*) page + PAGE_ZIP_START;
1296
storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
1298
/* Compress the records in heap_no order. */
1299
if (UNIV_UNLIKELY(!n_dense)) {
1300
} else if (!page_is_leaf(page)) {
1301
/* This is a node pointer page. */
1302
err = page_zip_compress_node_ptrs(LOGFILE
1303
&c_stream, recs, n_dense,
1304
index, storage, heap);
1305
if (UNIV_UNLIKELY(err != Z_OK)) {
1308
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1309
/* This is a leaf page in a secondary index. */
1310
err = page_zip_compress_sec(LOGFILE
1311
&c_stream, recs, n_dense);
1312
if (UNIV_UNLIKELY(err != Z_OK)) {
1316
/* This is a leaf page in a clustered index. */
1317
err = page_zip_compress_clust(LOGFILE
1318
&c_stream, recs, n_dense,
1319
index, &n_blobs, trx_id_col,
1320
buf_end - PAGE_ZIP_DIR_SLOT_SIZE
1321
* page_get_n_recs(page),
1323
if (UNIV_UNLIKELY(err != Z_OK)) {
1328
/* Finish the compression. */
1329
ut_ad(!c_stream.avail_in);
1330
/* Compress any trailing garbage, in case the last record was
1331
allocated from an originally longer space on the free list,
1332
or the data of the last record from page_zip_compress_sec(). */
1334
= page_header_get_field(page, PAGE_HEAP_TOP)
1335
- (c_stream.next_in - page);
1336
ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
1338
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1339
err = deflate(&c_stream, Z_FINISH);
1341
if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
1343
deflateEnd(&c_stream);
1344
mem_heap_free(heap);
1346
#ifdef PAGE_ZIP_COMPRESS_DBG
1350
#endif /* PAGE_ZIP_COMPRESS_DBG */
1351
#ifndef UNIV_HOTBACKUP
1352
page_zip_stat[page_zip->ssize - 1].compressed_usec
1353
+= ut_time_us(NULL) - usec;
1354
#endif /* !UNIV_HOTBACKUP */
1358
err = deflateEnd(&c_stream);
1361
ut_ad(buf + c_stream.total_out == c_stream.next_out);
1362
ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
1364
/* Valgrind believes that zlib does not initialize some bits
1365
in the last 7 or 8 bytes of the stream. Make Valgrind happy. */
1366
UNIV_MEM_VALID(buf, c_stream.total_out);
1368
/* Zero out the area reserved for the modification log.
1369
Space for the end marker of the modification log is not
1370
included in avail_out. */
1371
memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
1375
#endif /* UNIV_DEBUG */
1376
page_zip->m_end = PAGE_DATA + c_stream.total_out;
1377
page_zip->m_nonempty = FALSE;
1378
page_zip->n_blobs = n_blobs;
1379
/* Copy those header fields that will not be written
1380
in buf_flush_init_for_writing() */
1381
memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
1382
FIL_PAGE_LSN - FIL_PAGE_PREV);
1383
memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
1384
memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
1385
PAGE_DATA - FIL_PAGE_DATA);
1386
/* Copy the rest of the compressed page */
1387
memcpy(page_zip->data + PAGE_DATA, buf,
1388
page_zip_get_size(page_zip) - PAGE_DATA);
1389
mem_heap_free(heap);
1390
#ifdef UNIV_ZIP_DEBUG
1391
ut_a(page_zip_validate(page_zip, page));
1392
#endif /* UNIV_ZIP_DEBUG */
1395
#ifndef UNIV_HOTBACKUP
1396
page_zip_compress_write_log(page_zip, page, index, mtr);
1397
#endif /* !UNIV_HOTBACKUP */
1400
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
1402
#ifdef PAGE_ZIP_COMPRESS_DBG
1404
/* Record the compressed size of the block. */
1406
mach_write_to_4(sz, c_stream.total_out);
1407
fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
1408
fwrite(sz, 1, sizeof sz, logfile);
1411
#endif /* PAGE_ZIP_COMPRESS_DBG */
1412
#ifndef UNIV_HOTBACKUP
1414
page_zip_stat_t* zip_stat
1415
= &page_zip_stat[page_zip->ssize - 1];
1416
zip_stat->compressed_ok++;
1417
zip_stat->compressed_usec += ut_time_us(NULL) - usec;
1419
#endif /* !UNIV_HOTBACKUP */
1424
/**********************************************************************//**
1425
Compare two page directory entries.
1426
@return positive if rec1 > rec2 */
1431
const rec_t* rec1, /*!< in: rec1 */
1432
const rec_t* rec2) /*!< in: rec2 */
1434
return(rec1 > rec2);
1437
/**********************************************************************//**
1438
Sort the dense page directory by address (heap_no). */
1443
rec_t** arr, /*!< in/out: dense page directory */
1444
rec_t** aux_arr,/*!< in/out: work area */
1445
ulint low, /*!< in: lower bound of the sorting area, inclusive */
1446
ulint high) /*!< in: upper bound of the sorting area, exclusive */
1448
UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
1452
/**********************************************************************//**
1453
Deallocate the index information initialized by page_zip_fields_decode(). */
1456
page_zip_fields_free(
1457
/*=================*/
1458
dict_index_t* index) /*!< in: dummy index to be freed */
1461
dict_table_t* table = index->table;
1462
mem_heap_free(index->heap);
1463
mutex_free(&(table->autoinc_mutex));
1464
ut_free(table->name);
1465
mem_heap_free(table->heap);
1469
/**********************************************************************//**
1470
Read the index information for the compressed page.
1471
@return own: dummy index describing the page, or NULL on error */
1474
page_zip_fields_decode(
1475
/*===================*/
1476
const byte* buf, /*!< in: index information */
1477
const byte* end, /*!< in: end of buf */
1478
ulint* trx_id_col)/*!< in: NULL for non-leaf pages;
1479
for leaf pages, pointer to where to store
1480
the position of the trx_id column */
1486
dict_table_t* table;
1487
dict_index_t* index;
1489
/* Determine the number of fields. */
1490
for (b = buf, n = 0; b < end; n++) {
1492
b++; /* skip the second byte */
1496
n--; /* n_nullable or trx_id */
1498
if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
1500
page_zip_fail(("page_zip_fields_decode: n = %lu\n",
1505
if (UNIV_UNLIKELY(b > end)) {
1507
page_zip_fail(("page_zip_fields_decode: %p > %p\n",
1508
(const void*) b, (const void*) end));
1512
table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
1514
index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
1515
DICT_HDR_SPACE, 0, n);
1516
index->table = table;
1518
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1519
index->cached = TRUE;
1521
/* Initialize the fields. */
1522
for (b = buf, i = 0; i < n; i++) {
1528
if (UNIV_UNLIKELY(val & 0x80)) {
1529
/* fixed length > 62 bytes */
1530
val = (val & 0x7f) << 8 | *b++;
1532
mtype = DATA_FIXBINARY;
1533
} else if (UNIV_UNLIKELY(val >= 126)) {
1534
/* variable length with max > 255 bytes */
1536
mtype = DATA_BINARY;
1537
} else if (val <= 1) {
1538
/* variable length with max <= 255 bytes */
1540
mtype = DATA_BINARY;
1542
/* fixed length < 62 bytes */
1544
mtype = DATA_FIXBINARY;
1547
dict_mem_table_add_col(table, NULL, NULL, mtype,
1548
val & 1 ? DATA_NOT_NULL : 0, len);
1549
dict_index_add_col(index, table,
1550
dict_table_get_nth_col(table, i), 0);
1554
if (UNIV_UNLIKELY(val & 0x80)) {
1555
val = (val & 0x7f) << 8 | *b++;
1558
/* Decode the position of the trx_id column. */
1561
val = ULINT_UNDEFINED;
1562
} else if (UNIV_UNLIKELY(val >= n)) {
1563
page_zip_fields_free(index);
1566
index->type = DICT_CLUSTERED;
1571
/* Decode the number of nullable fields. */
1572
if (UNIV_UNLIKELY(index->n_nullable > val)) {
1573
page_zip_fields_free(index);
1576
index->n_nullable = val;
1585
/**********************************************************************//**
1586
Populate the sparse page directory from the dense directory.
1587
@return TRUE on success, FALSE on failure */
1590
page_zip_dir_decode(
1591
/*================*/
1592
const page_zip_des_t* page_zip,/*!< in: dense page directory on
1594
page_t* page, /*!< in: compact page with valid header;
1595
out: trailer and sparse page directory
1597
rec_t** recs, /*!< out: dense page directory sorted by
1598
ascending address (and heap_no) */
1599
rec_t** recs_aux,/*!< in/out: scratch area */
1600
ulint n_dense)/*!< in: number of user records, and
1601
size of recs[] and recs_aux[] */
1607
n_recs = page_get_n_recs(page);
1609
if (UNIV_UNLIKELY(n_recs > n_dense)) {
1610
page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
1611
(ulong) n_recs, (ulong) n_dense));
1615
/* Traverse the list of stored records in the sorting order,
1616
starting from the first user record. */
1618
slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
1619
UNIV_PREFETCH_RW(slot);
1621
/* Zero out the page trailer. */
1622
memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
1624
mach_write_to_2(slot, PAGE_NEW_INFIMUM);
1625
slot -= PAGE_DIR_SLOT_SIZE;
1626
UNIV_PREFETCH_RW(slot);
1628
/* Initialize the sparse directory and copy the dense directory. */
1629
for (i = 0; i < n_recs; i++) {
1630
ulint offs = page_zip_dir_get(page_zip, i);
1632
if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
1633
mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
1634
slot -= PAGE_DIR_SLOT_SIZE;
1635
UNIV_PREFETCH_RW(slot);
1638
if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
1639
< PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
1640
page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
1641
(unsigned) i, (unsigned) n_recs,
1646
recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
1649
mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
1651
const page_dir_slot_t* last_slot = page_dir_get_nth_slot(
1652
page, page_dir_get_n_slots(page) - 1);
1654
if (UNIV_UNLIKELY(slot != last_slot)) {
1655
page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
1657
(const void*) last_slot));
1662
/* Copy the rest of the dense directory. */
1663
for (; i < n_dense; i++) {
1664
ulint offs = page_zip_dir_get(page_zip, i);
1666
if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1667
page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
1668
(unsigned) i, (unsigned) n_dense,
1673
recs[i] = page + offs;
1676
if (UNIV_LIKELY(n_dense > 1)) {
1677
page_zip_dir_sort(recs, recs_aux, 0, n_dense);
1682
/**********************************************************************//**
1683
Initialize the REC_N_NEW_EXTRA_BYTES of each record.
1684
@return TRUE on success, FALSE on failure */
1687
page_zip_set_extra_bytes(
1688
/*=====================*/
1689
const page_zip_des_t* page_zip,/*!< in: compressed page */
1690
page_t* page, /*!< in/out: uncompressed page */
1691
ulint info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */
1699
n = page_get_n_recs(page);
1700
rec = page + PAGE_NEW_INFIMUM;
1702
for (i = 0; i < n; i++) {
1703
offs = page_zip_dir_get(page_zip, i);
1705
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
1706
info_bits |= REC_INFO_DELETED_FLAG;
1708
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
1709
info_bits |= n_owned;
1714
offs &= PAGE_ZIP_DIR_SLOT_MASK;
1715
if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
1716
+ REC_N_NEW_EXTRA_BYTES)) {
1717
page_zip_fail(("page_zip_set_extra_bytes 1:"
1719
(unsigned) i, (unsigned) n,
1724
rec_set_next_offs_new(rec, offs);
1726
rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
1730
/* Set the next pointer of the last user record. */
1731
rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
1733
/* Set n_owned of the supremum record. */
1734
page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
1736
/* The dense directory excludes the infimum and supremum records. */
1737
n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1740
if (UNIV_LIKELY(i == n)) {
1744
page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
1745
(unsigned) i, (unsigned) n));
1749
offs = page_zip_dir_get(page_zip, i);
1751
/* Set the extra bytes of deleted records on the free list. */
1753
if (UNIV_UNLIKELY(!offs)
1754
|| UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1756
page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
1762
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1768
offs = page_zip_dir_get(page_zip, i);
1769
rec_set_next_offs_new(rec, offs);
1772
/* Terminate the free list. */
1773
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1774
rec_set_next_offs_new(rec, 0);
1779
/**********************************************************************//**
1780
Apply the modification log to a record containing externally stored
1781
columns. Do not copy the fields that are stored separately.
1782
@return pointer to modification log, or NULL on failure */
1785
page_zip_apply_log_ext(
1786
/*===================*/
1787
rec_t* rec, /*!< in/out: record */
1788
const ulint* offsets, /*!< in: rec_get_offsets(rec) */
1789
ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
1790
const byte* data, /*!< in: modification log */
1791
const byte* end) /*!< in: end of modification log */
1795
byte* next_out = rec;
1797
/* Check if there are any externally stored columns.
1798
For each externally stored column, skip the
1799
BTR_EXTERN_FIELD_REF. */
1801
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
1804
if (UNIV_UNLIKELY(i == trx_id_col)) {
1805
/* Skip trx_id and roll_ptr */
1806
dst = rec_get_nth_field(rec, offsets,
1808
if (UNIV_UNLIKELY(dst - next_out >= end - data)
1810
(len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
1811
|| rec_offs_nth_extern(offsets, i)) {
1812
page_zip_fail(("page_zip_apply_log_ext:"
1814
" %p - %p >= %p - %p\n",
1817
(const void*) next_out,
1819
(const void*) data));
1823
memcpy(next_out, data, dst - next_out);
1824
data += dst - next_out;
1825
next_out = dst + (DATA_TRX_ID_LEN
1826
+ DATA_ROLL_PTR_LEN);
1827
} else if (rec_offs_nth_extern(offsets, i)) {
1828
dst = rec_get_nth_field(rec, offsets,
1831
>= BTR_EXTERN_FIELD_REF_SIZE);
1833
len += dst - next_out
1834
- BTR_EXTERN_FIELD_REF_SIZE;
1836
if (UNIV_UNLIKELY(data + len >= end)) {
1837
page_zip_fail(("page_zip_apply_log_ext: "
1838
"ext %p+%lu >= %p\n",
1841
(const void*) end));
1845
memcpy(next_out, data, len);
1848
+ BTR_EXTERN_FIELD_REF_SIZE;
1852
/* Copy the last bytes of the record. */
1853
len = rec_get_end(rec, offsets) - next_out;
1854
if (UNIV_UNLIKELY(data + len >= end)) {
1855
page_zip_fail(("page_zip_apply_log_ext: "
1856
"last %p+%lu >= %p\n",
1859
(const void*) end));
1862
memcpy(next_out, data, len);
1868
/**********************************************************************//**
1869
Apply the modification log to an uncompressed page.
1870
Do not copy the fields that are stored separately.
1871
@return pointer to end of modification log, or NULL on failure */
1876
const byte* data, /*!< in: modification log */
1877
ulint size, /*!< in: maximum length of the log, in bytes */
1878
rec_t** recs, /*!< in: dense page directory,
1879
sorted by address (indexed by
1880
heap_no - PAGE_HEAP_NO_USER_LOW) */
1881
ulint n_dense,/*!< in: size of recs[] */
1882
ulint trx_id_col,/*!< in: column number of trx_id in the index,
1883
or ULINT_UNDEFINED if none */
1885
/*!< in: heap_no and status bits for
1886
the next record to uncompress */
1887
dict_index_t* index, /*!< in: index of the page */
1888
ulint* offsets)/*!< in/out: work area for
1889
rec_get_offsets_reverse() */
1891
const byte* const end = data + size;
1900
if (UNIV_UNLIKELY(!val)) {
1904
val = (val & 0x7f) << 8 | *data++;
1905
if (UNIV_UNLIKELY(!val)) {
1906
page_zip_fail(("page_zip_apply_log:"
1907
" invalid val %x%x\n",
1908
data[-2], data[-1]));
1912
if (UNIV_UNLIKELY(data >= end)) {
1913
page_zip_fail(("page_zip_apply_log: %p >= %p\n",
1915
(const void*) end));
1918
if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
1919
page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
1920
(ulong) val, (ulong) n_dense));
1924
/* Determine the heap number and status bits of the record. */
1925
rec = recs[(val >> 1) - 1];
1927
hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
1928
hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
1930
/* This may either be an old record that is being
1931
overwritten (updated in place, or allocated from
1932
the free list), or a new record, with the next
1933
available_heap_no. */
1934
if (UNIV_UNLIKELY(hs > heap_status)) {
1935
page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
1936
(ulong) hs, (ulong) heap_status));
1938
} else if (hs == heap_status) {
1939
/* A new record was allocated from the heap. */
1940
if (UNIV_UNLIKELY(val & 1)) {
1941
/* Only existing records may be cleared. */
1942
page_zip_fail(("page_zip_apply_log:"
1943
" attempting to create"
1944
" deleted rec %lu\n",
1948
heap_status += 1 << REC_HEAP_NO_SHIFT;
1951
mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
1954
/* Clear the data bytes of the record. */
1955
mem_heap_t* heap = NULL;
1957
offs = rec_get_offsets(rec, index, offsets,
1958
ULINT_UNDEFINED, &heap);
1959
memset(rec, 0, rec_offs_data_size(offs));
1961
if (UNIV_LIKELY_NULL(heap)) {
1962
mem_heap_free(heap);
1967
#if REC_STATUS_NODE_PTR != TRUE
1968
# error "REC_STATUS_NODE_PTR != TRUE"
1970
rec_get_offsets_reverse(data, index,
1971
hs & REC_STATUS_NODE_PTR,
1973
rec_offs_make_valid(rec, index, offsets);
1975
/* Copy the extra bytes (backwards). */
1977
byte* start = rec_get_start(rec, offsets);
1978
byte* b = rec - REC_N_NEW_EXTRA_BYTES;
1979
while (b != start) {
1984
/* Copy the data bytes. */
1985
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1986
/* Non-leaf nodes should not contain any
1987
externally stored columns. */
1988
if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1989
page_zip_fail(("page_zip_apply_log: "
1990
"%lu&REC_STATUS_NODE_PTR\n",
1995
data = page_zip_apply_log_ext(
1996
rec, offsets, trx_id_col, data, end);
1998
if (UNIV_UNLIKELY(!data)) {
2001
} else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
2002
len = rec_offs_data_size(offsets)
2003
- REC_NODE_PTR_SIZE;
2004
/* Copy the data bytes, except node_ptr. */
2005
if (UNIV_UNLIKELY(data + len >= end)) {
2006
page_zip_fail(("page_zip_apply_log: "
2007
"node_ptr %p+%lu >= %p\n",
2010
(const void*) end));
2013
memcpy(rec, data, len);
2015
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2016
len = rec_offs_data_size(offsets);
2018
/* Copy all data bytes of
2019
a record in a secondary index. */
2020
if (UNIV_UNLIKELY(data + len >= end)) {
2021
page_zip_fail(("page_zip_apply_log: "
2022
"sec %p+%lu >= %p\n",
2025
(const void*) end));
2029
memcpy(rec, data, len);
2032
/* Skip DB_TRX_ID and DB_ROLL_PTR. */
2033
ulint l = rec_get_nth_field_offs(offsets,
2037
if (UNIV_UNLIKELY(data + l >= end)
2038
|| UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
2039
+ DATA_ROLL_PTR_LEN))) {
2040
page_zip_fail(("page_zip_apply_log: "
2041
"trx_id %p+%lu >= %p\n",
2044
(const void*) end));
2048
/* Copy any preceding data bytes. */
2049
memcpy(rec, data, l);
2052
/* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
2053
b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2054
len = rec_get_end(rec, offsets) - b;
2055
if (UNIV_UNLIKELY(data + len >= end)) {
2056
page_zip_fail(("page_zip_apply_log: "
2057
"clust %p+%lu >= %p\n",
2060
(const void*) end));
2063
memcpy(b, data, len);
2069
/**********************************************************************//**
2070
Decompress the records of a node pointer page.
2071
@return TRUE on success, FALSE on failure */
2074
page_zip_decompress_node_ptrs(
2075
/*==========================*/
2076
page_zip_des_t* page_zip, /*!< in/out: compressed page */
2077
z_stream* d_stream, /*!< in/out: compressed page stream */
2078
rec_t** recs, /*!< in: dense page directory
2079
sorted by address */
2080
ulint n_dense, /*!< in: size of recs[] */
2081
dict_index_t* index, /*!< in: the index of the page */
2082
ulint* offsets, /*!< in/out: temporary offsets */
2083
mem_heap_t* heap) /*!< in: temporary memory heap */
2085
ulint heap_status = REC_STATUS_NODE_PTR
2086
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2088
const byte* storage;
2090
/* Subtract the space reserved for uncompressed data. */
2091
d_stream->avail_in -= n_dense
2092
* (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
2094
/* Decompress the records in heap_no order. */
2095
for (slot = 0; slot < n_dense; slot++) {
2096
rec_t* rec = recs[slot];
2098
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2099
- d_stream->next_out;
2101
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2102
- PAGE_ZIP_START - PAGE_DIR);
2103
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2105
/* Apparently, n_dense has grown
2106
since the time the page was last compressed. */
2110
if (!d_stream->avail_out) {
2115
page_zip_fail(("page_zip_decompress_node_ptrs:"
2116
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2121
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2122
/* Prepare to decompress the data bytes. */
2123
d_stream->next_out = rec;
2124
/* Set heap_no and the status bits. */
2125
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2126
heap_status += 1 << REC_HEAP_NO_SHIFT;
2128
/* Read the offsets. The status bits are needed here. */
2129
offsets = rec_get_offsets(rec, index, offsets,
2130
ULINT_UNDEFINED, &heap);
2132
/* Non-leaf nodes should not have any externally
2134
ut_ad(!rec_offs_any_extern(offsets));
2136
/* Decompress the data bytes, except node_ptr. */
2137
d_stream->avail_out = rec_offs_data_size(offsets)
2138
- REC_NODE_PTR_SIZE;
2140
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2145
if (!d_stream->avail_out) {
2150
page_zip_fail(("page_zip_decompress_node_ptrs:"
2151
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2156
/* Clear the node pointer in case the record
2157
will be deleted and the space will be reallocated
2158
to a smaller record. */
2159
memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
2160
d_stream->next_out += REC_NODE_PTR_SIZE;
2162
ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
2165
/* Decompress any trailing garbage, in case the last record was
2166
allocated from an originally longer space on the free list. */
2167
d_stream->avail_out = page_header_get_field(page_zip->data,
2169
- page_offset(d_stream->next_out);
2170
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2171
- PAGE_ZIP_START - PAGE_DIR)) {
2173
page_zip_fail(("page_zip_decompress_node_ptrs:"
2174
" avail_out = %u\n",
2175
d_stream->avail_out));
2179
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2180
page_zip_fail(("page_zip_decompress_node_ptrs:"
2181
" inflate(Z_FINISH)=%s\n",
2184
inflateEnd(d_stream);
2188
/* Note that d_stream->avail_out > 0 may hold here
2189
if the modification log is nonempty. */
2192
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2197
page_t* page = page_align(d_stream->next_out);
2199
/* Clear the unused heap space on the uncompressed page. */
2200
memset(d_stream->next_out, 0,
2201
page_dir_get_nth_slot(page,
2202
page_dir_get_n_slots(page) - 1)
2203
- d_stream->next_out);
2207
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2208
#endif /* UNIV_DEBUG */
2210
/* Apply the modification log. */
2212
const byte* mod_log_ptr;
2213
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2214
d_stream->avail_in + 1,
2216
ULINT_UNDEFINED, heap_status,
2219
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2222
page_zip->m_end = mod_log_ptr - page_zip->data;
2223
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2227
(page_zip_get_trailer_len(page_zip,
2228
dict_index_is_clust(index), NULL)
2229
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2230
page_zip_fail(("page_zip_decompress_node_ptrs:"
2231
" %lu + %lu >= %lu, %lu\n",
2232
(ulong) page_zip_get_trailer_len(
2233
page_zip, dict_index_is_clust(index),
2235
(ulong) page_zip->m_end,
2236
(ulong) page_zip_get_size(page_zip),
2237
(ulong) dict_index_is_clust(index)));
2241
/* Restore the uncompressed columns in heap_no order. */
2242
storage = page_zip->data + page_zip_get_size(page_zip)
2243
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2245
for (slot = 0; slot < n_dense; slot++) {
2246
rec_t* rec = recs[slot];
2248
offsets = rec_get_offsets(rec, index, offsets,
2249
ULINT_UNDEFINED, &heap);
2250
/* Non-leaf nodes should not have any externally
2252
ut_ad(!rec_offs_any_extern(offsets));
2253
storage -= REC_NODE_PTR_SIZE;
2255
memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
2256
storage, REC_NODE_PTR_SIZE);
2262
/**********************************************************************//**
2263
Decompress the records of a leaf node of a secondary index.
2264
@return TRUE on success, FALSE on failure */
2267
page_zip_decompress_sec(
2268
/*====================*/
2269
page_zip_des_t* page_zip, /*!< in/out: compressed page */
2270
z_stream* d_stream, /*!< in/out: compressed page stream */
2271
rec_t** recs, /*!< in: dense page directory
2272
sorted by address */
2273
ulint n_dense, /*!< in: size of recs[] */
2274
dict_index_t* index, /*!< in: the index of the page */
2275
ulint* offsets) /*!< in/out: temporary offsets */
2277
ulint heap_status = REC_STATUS_ORDINARY
2278
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2281
ut_a(!dict_index_is_clust(index));
2283
/* Subtract the space reserved for uncompressed data. */
2284
d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2286
for (slot = 0; slot < n_dense; slot++) {
2287
rec_t* rec = recs[slot];
2289
/* Decompress everything up to this record. */
2290
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2291
- d_stream->next_out;
2293
if (UNIV_LIKELY(d_stream->avail_out)) {
2294
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2296
/* Apparently, n_dense has grown
2297
since the time the page was last compressed. */
2301
if (!d_stream->avail_out) {
2306
page_zip_fail(("page_zip_decompress_sec:"
2307
" inflate(Z_SYNC_FLUSH)=%s\n",
2313
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2315
/* Skip the REC_N_NEW_EXTRA_BYTES. */
2317
d_stream->next_out = rec;
2319
/* Set heap_no and the status bits. */
2320
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2321
heap_status += 1 << REC_HEAP_NO_SHIFT;
2324
/* Decompress the data of the last record and any trailing garbage,
2325
in case the last record was allocated from an originally longer space
2326
on the free list. */
2327
d_stream->avail_out = page_header_get_field(page_zip->data,
2329
- page_offset(d_stream->next_out);
2330
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2331
- PAGE_ZIP_START - PAGE_DIR)) {
2333
page_zip_fail(("page_zip_decompress_sec:"
2334
" avail_out = %u\n",
2335
d_stream->avail_out));
2339
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2340
page_zip_fail(("page_zip_decompress_sec:"
2341
" inflate(Z_FINISH)=%s\n",
2344
inflateEnd(d_stream);
2348
/* Note that d_stream->avail_out > 0 may hold here
2349
if the modification log is nonempty. */
2352
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2357
page_t* page = page_align(d_stream->next_out);
2359
/* Clear the unused heap space on the uncompressed page. */
2360
memset(d_stream->next_out, 0,
2361
page_dir_get_nth_slot(page,
2362
page_dir_get_n_slots(page) - 1)
2363
- d_stream->next_out);
2367
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2368
#endif /* UNIV_DEBUG */
2370
/* Apply the modification log. */
2372
const byte* mod_log_ptr;
2373
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2374
d_stream->avail_in + 1,
2376
ULINT_UNDEFINED, heap_status,
2379
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2382
page_zip->m_end = mod_log_ptr - page_zip->data;
2383
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2386
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
2387
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2389
page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
2390
(ulong) page_zip_get_trailer_len(
2391
page_zip, FALSE, NULL),
2392
(ulong) page_zip->m_end,
2393
(ulong) page_zip_get_size(page_zip)));
2397
/* There are no uncompressed columns on leaf pages of
2398
secondary indexes. */
2403
/**********************************************************************//**
2404
Decompress a record of a leaf node of a clustered index that contains
2405
externally stored columns.
2406
@return TRUE on success */
2409
page_zip_decompress_clust_ext(
2410
/*==========================*/
2411
z_stream* d_stream, /*!< in/out: compressed page stream */
2412
rec_t* rec, /*!< in/out: record */
2413
const ulint* offsets, /*!< in: rec_get_offsets(rec) */
2414
ulint trx_id_col) /*!< in: position of of DB_TRX_ID */
2418
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2422
if (UNIV_UNLIKELY(i == trx_id_col)) {
2423
/* Skip trx_id and roll_ptr */
2424
dst = rec_get_nth_field(rec, offsets, i, &len);
2425
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2426
+ DATA_ROLL_PTR_LEN)) {
2428
page_zip_fail(("page_zip_decompress_clust_ext:"
2429
" len[%lu] = %lu\n",
2430
(ulong) i, (ulong) len));
2434
if (rec_offs_nth_extern(offsets, i)) {
2436
page_zip_fail(("page_zip_decompress_clust_ext:"
2437
" DB_TRX_ID at %lu is ext\n",
2442
d_stream->avail_out = dst - d_stream->next_out;
2444
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2448
if (!d_stream->avail_out) {
2453
page_zip_fail(("page_zip_decompress_clust_ext:"
2454
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2459
ut_ad(d_stream->next_out == dst);
2461
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2462
avoid uninitialized bytes in case the record
2463
is affected by page_zip_apply_log(). */
2464
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2466
d_stream->next_out += DATA_TRX_ID_LEN
2467
+ DATA_ROLL_PTR_LEN;
2468
} else if (rec_offs_nth_extern(offsets, i)) {
2469
dst = rec_get_nth_field(rec, offsets, i, &len);
2470
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
2471
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2473
d_stream->avail_out = dst - d_stream->next_out;
2474
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2478
if (!d_stream->avail_out) {
2483
page_zip_fail(("page_zip_decompress_clust_ext:"
2484
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2489
ut_ad(d_stream->next_out == dst);
2491
/* Clear the BLOB pointer in case
2492
the record will be deleted and the
2493
space will not be reused. Note that
2494
the final initialization of the BLOB
2495
pointers (copying from "externs"
2496
or clearing) will have to take place
2497
only after the page modification log
2498
has been applied. Otherwise, we
2499
could end up with an uninitialized
2500
BLOB pointer when a record is deleted,
2501
reallocated and deleted. */
2502
memset(d_stream->next_out, 0,
2503
BTR_EXTERN_FIELD_REF_SIZE);
2505
+= BTR_EXTERN_FIELD_REF_SIZE;
2512
/**********************************************************************//**
2513
Compress the records of a leaf node of a clustered index.
2514
@return TRUE on success, FALSE on failure */
2517
page_zip_decompress_clust(
2518
/*======================*/
2519
page_zip_des_t* page_zip, /*!< in/out: compressed page */
2520
z_stream* d_stream, /*!< in/out: compressed page stream */
2521
rec_t** recs, /*!< in: dense page directory
2522
sorted by address */
2523
ulint n_dense, /*!< in: size of recs[] */
2524
dict_index_t* index, /*!< in: the index of the page */
2525
ulint trx_id_col, /*!< index of the trx_id column */
2526
ulint* offsets, /*!< in/out: temporary offsets */
2527
mem_heap_t* heap) /*!< in: temporary memory heap */
2531
ulint heap_status = REC_STATUS_ORDINARY
2532
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2533
const byte* storage;
2534
const byte* externs;
2536
ut_a(dict_index_is_clust(index));
2538
/* Subtract the space reserved for uncompressed data. */
2539
d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
2541
+ DATA_ROLL_PTR_LEN);
2543
/* Decompress the records in heap_no order. */
2544
for (slot = 0; slot < n_dense; slot++) {
2545
rec_t* rec = recs[slot];
2547
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2548
- d_stream->next_out;
2550
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2551
- PAGE_ZIP_START - PAGE_DIR);
2552
err = inflate(d_stream, Z_SYNC_FLUSH);
2555
/* Apparently, n_dense has grown
2556
since the time the page was last compressed. */
2560
if (UNIV_LIKELY(!d_stream->avail_out)) {
2565
page_zip_fail(("page_zip_decompress_clust:"
2566
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2571
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2572
/* Prepare to decompress the data bytes. */
2573
d_stream->next_out = rec;
2574
/* Set heap_no and the status bits. */
2575
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2576
heap_status += 1 << REC_HEAP_NO_SHIFT;
2578
/* Read the offsets. The status bits are needed here. */
2579
offsets = rec_get_offsets(rec, index, offsets,
2580
ULINT_UNDEFINED, &heap);
2582
/* This is a leaf page in a clustered index. */
2584
/* Check if there are any externally stored columns.
2585
For each externally stored column, restore the
2586
BTR_EXTERN_FIELD_REF separately. */
2588
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
2590
(!page_zip_decompress_clust_ext(
2591
d_stream, rec, offsets, trx_id_col))) {
2596
/* Skip trx_id and roll_ptr */
2598
byte* dst = rec_get_nth_field(rec, offsets,
2600
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2601
+ DATA_ROLL_PTR_LEN)) {
2603
page_zip_fail(("page_zip_decompress_clust:"
2604
" len = %lu\n", (ulong) len));
2608
d_stream->avail_out = dst - d_stream->next_out;
2610
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2614
if (!d_stream->avail_out) {
2619
page_zip_fail(("page_zip_decompress_clust:"
2620
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2625
ut_ad(d_stream->next_out == dst);
2627
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2628
avoid uninitialized bytes in case the record
2629
is affected by page_zip_apply_log(). */
2630
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2632
d_stream->next_out += DATA_TRX_ID_LEN
2633
+ DATA_ROLL_PTR_LEN;
2636
/* Decompress the last bytes of the record. */
2637
d_stream->avail_out = rec_get_end(rec, offsets)
2638
- d_stream->next_out;
2640
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2644
if (!d_stream->avail_out) {
2649
page_zip_fail(("page_zip_decompress_clust:"
2650
" 3 inflate(Z_SYNC_FLUSH)=%s\n",
2656
/* Decompress any trailing garbage, in case the last record was
2657
allocated from an originally longer space on the free list. */
2658
d_stream->avail_out = page_header_get_field(page_zip->data,
2660
- page_offset(d_stream->next_out);
2661
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2662
- PAGE_ZIP_START - PAGE_DIR)) {
2664
page_zip_fail(("page_zip_decompress_clust:"
2665
" avail_out = %u\n",
2666
d_stream->avail_out));
2670
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2671
page_zip_fail(("page_zip_decompress_clust:"
2672
" inflate(Z_FINISH)=%s\n",
2675
inflateEnd(d_stream);
2679
/* Note that d_stream->avail_out > 0 may hold here
2680
if the modification log is nonempty. */
2683
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2688
page_t* page = page_align(d_stream->next_out);
2690
/* Clear the unused heap space on the uncompressed page. */
2691
memset(d_stream->next_out, 0,
2692
page_dir_get_nth_slot(page,
2693
page_dir_get_n_slots(page) - 1)
2694
- d_stream->next_out);
2698
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2699
#endif /* UNIV_DEBUG */
2701
/* Apply the modification log. */
2703
const byte* mod_log_ptr;
2704
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2705
d_stream->avail_in + 1,
2707
trx_id_col, heap_status,
2710
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2713
page_zip->m_end = mod_log_ptr - page_zip->data;
2714
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2717
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
2718
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2720
page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
2721
(ulong) page_zip_get_trailer_len(
2722
page_zip, TRUE, NULL),
2723
(ulong) page_zip->m_end,
2724
(ulong) page_zip_get_size(page_zip)));
2728
storage = page_zip->data + page_zip_get_size(page_zip)
2729
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2731
externs = storage - n_dense
2732
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2734
/* Restore the uncompressed columns in heap_no order. */
2736
for (slot = 0; slot < n_dense; slot++) {
2740
rec_t* rec = recs[slot];
2741
ibool exists = !page_zip_dir_find_free(
2742
page_zip, page_offset(rec));
2743
offsets = rec_get_offsets(rec, index, offsets,
2744
ULINT_UNDEFINED, &heap);
2746
dst = rec_get_nth_field(rec, offsets,
2748
ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2749
storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
2750
memcpy(dst, storage,
2751
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2753
/* Check if there are any externally stored
2754
columns in this record. For each externally
2755
stored column, restore or clear the
2756
BTR_EXTERN_FIELD_REF. */
2757
if (!rec_offs_any_extern(offsets)) {
2761
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2762
if (!rec_offs_nth_extern(offsets, i)) {
2765
dst = rec_get_nth_field(rec, offsets, i, &len);
2767
if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
2768
page_zip_fail(("page_zip_decompress_clust:"
2774
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2776
if (UNIV_LIKELY(exists)) {
2778
restore the BLOB pointer */
2779
externs -= BTR_EXTERN_FIELD_REF_SIZE;
2782
(externs < page_zip->data
2783
+ page_zip->m_end)) {
2784
page_zip_fail(("page_zip_"
2785
"decompress_clust: "
2787
(const void*) externs,
2795
memcpy(dst, externs,
2796
BTR_EXTERN_FIELD_REF_SIZE);
2798
page_zip->n_blobs++;
2801
clear the BLOB pointer */
2803
BTR_EXTERN_FIELD_REF_SIZE);
2811
/**********************************************************************//**
2812
Decompress a page. This function should tolerate errors on the compressed
2813
page. Instead of letting assertions fail, it will return FALSE if an
2814
inconsistency is detected.
2815
@return TRUE on success, FALSE on failure */
2818
page_zip_decompress(
2819
/*================*/
2820
page_zip_des_t* page_zip,/*!< in: data, ssize;
2821
out: m_start, m_end, m_nonempty, n_blobs */
2822
page_t* page, /*!< out: uncompressed page, may be trashed */
2823
ibool all) /*!< in: TRUE=decompress the whole page;
2824
FALSE=verify but do not copy some
2825
page header fields that should not change
2826
after page creation */
2829
dict_index_t* index = NULL;
2830
rec_t** recs; /*!< dense page directory, sorted by address */
2831
ulint n_dense;/* number of user records on the page */
2832
ulint trx_id_col = ULINT_UNDEFINED;
2835
#ifndef UNIV_HOTBACKUP
2836
ullint usec = ut_time_us(NULL);
2837
#endif /* !UNIV_HOTBACKUP */
2839
ut_ad(page_zip_simple_validate(page_zip));
2840
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
2841
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
2843
/* The dense directory excludes the infimum and supremum records. */
2844
n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
2845
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
2846
>= page_zip_get_size(page_zip))) {
2847
page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
2849
(ulong) page_zip_get_size(page_zip)));
2853
heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
2854
recs = static_cast<byte **>(mem_heap_alloc(heap, n_dense * (2 * sizeof *recs)));
2857
/* Copy the page header. */
2858
memcpy(page, page_zip->data, PAGE_DATA);
2860
/* Check that the bytes that we skip are identical. */
2861
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
2862
ut_a(!memcmp(FIL_PAGE_TYPE + page,
2863
FIL_PAGE_TYPE + page_zip->data,
2864
PAGE_HEADER - FIL_PAGE_TYPE));
2865
ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page,
2866
PAGE_HEADER + PAGE_LEVEL + page_zip->data,
2867
PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL)));
2868
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
2870
/* Copy the mutable parts of the page header. */
2871
memcpy(page, page_zip->data, FIL_PAGE_TYPE);
2872
memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data,
2873
PAGE_LEVEL - PAGE_N_DIR_SLOTS);
2875
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
2876
/* Check that the page headers match after copying. */
2877
ut_a(!memcmp(page, page_zip->data, PAGE_DATA));
2878
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
2881
#ifdef UNIV_ZIP_DEBUG
2882
/* Clear the uncompressed page, except the header. */
2883
memset(PAGE_DATA + page, 0x55, UNIV_PAGE_SIZE - PAGE_DATA);
2884
#endif /* UNIV_ZIP_DEBUG */
2885
UNIV_MEM_INVALID(PAGE_DATA + page, UNIV_PAGE_SIZE - PAGE_DATA);
2887
/* Copy the page directory. */
2888
if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
2889
recs + n_dense, n_dense))) {
2891
mem_heap_free(heap);
2895
/* Copy the infimum and supremum records. */
2896
memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
2897
infimum_extra, sizeof infimum_extra);
2898
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
2899
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2902
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2903
page_zip_dir_get(page_zip, 0)
2904
& PAGE_ZIP_DIR_SLOT_MASK);
2906
memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
2907
memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
2908
supremum_extra_data, sizeof supremum_extra_data);
2910
page_zip_set_alloc(&d_stream, heap);
2912
if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
2917
d_stream.next_in = page_zip->data + PAGE_DATA;
2918
/* Subtract the space reserved for
2919
the page header and the end marker of the modification log. */
2920
d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
2922
d_stream.next_out = page + PAGE_ZIP_START;
2923
d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
2925
/* Decode the zlib header and the index information. */
2926
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2928
page_zip_fail(("page_zip_decompress:"
2929
" 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2933
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2935
page_zip_fail(("page_zip_decompress:"
2936
" 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2940
index = page_zip_fields_decode(
2941
page + PAGE_ZIP_START, d_stream.next_out,
2942
page_is_leaf(page) ? &trx_id_col : NULL);
2944
if (UNIV_UNLIKELY(!index)) {
2949
/* Decompress the user records. */
2950
page_zip->n_blobs = 0;
2951
d_stream.next_out = page + PAGE_ZIP_START;
2954
/* Pre-allocate the offsets for rec_get_offsets_reverse(). */
2955
ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
2956
+ dict_index_get_n_fields(index);
2957
offsets = static_cast<unsigned long *>(mem_heap_alloc(heap, n * sizeof(ulint)));
2961
/* Decompress the records in heap_no order. */
2962
if (!page_is_leaf(page)) {
2963
/* This is a node pointer page. */
2967
(!page_zip_decompress_node_ptrs(page_zip, &d_stream,
2968
recs, n_dense, index,
2973
info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
2974
? REC_INFO_MIN_REC_FLAG : 0;
2976
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
2980
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2981
/* This is a leaf page in a secondary index. */
2982
if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
2988
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2991
page_zip_fields_free(index);
2992
mem_heap_free(heap);
2996
/* This is a leaf page in a clustered index. */
2997
if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
3005
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
3011
ut_a(page_is_comp(page));
3012
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3014
page_zip_fields_free(index);
3015
mem_heap_free(heap);
3016
#ifndef UNIV_HOTBACKUP
3018
page_zip_stat_t* zip_stat
3019
= &page_zip_stat[page_zip->ssize - 1];
3020
zip_stat->decompressed++;
3021
zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
3023
#endif /* !UNIV_HOTBACKUP */
3025
/* Update the stat counter for LRU policy. */
3026
buf_LRU_stat_inc_unzip();
3031
#ifdef UNIV_ZIP_DEBUG
3032
/**********************************************************************//**
3033
Dump a block of memory on the standard error stream. */
3036
page_zip_hexdump_func(
3037
/*==================*/
3038
const char* name, /*!< in: name of the data structure */
3039
const void* buf, /*!< in: data */
3040
ulint size) /*!< in: length of the data, in bytes */
3042
const byte* s = buf;
3044
const ulint width = 32; /* bytes per line */
3046
fprintf(stderr, "%s:\n", name);
3048
for (addr = 0; addr < size; addr += width) {
3051
fprintf(stderr, "%04lx ", (ulong) addr);
3053
i = ut_min(width, size - addr);
3056
fprintf(stderr, "%02x", *s++);
3063
/** Dump a block of memory on the standard error stream.
3065
@param size in: length of the data, in bytes */
3066
#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
3068
/** Flag: make page_zip_validate() compare page headers only */
3069
UNIV_INTERN ibool page_zip_validate_header_only = FALSE;
3071
/**********************************************************************//**
3072
Check that the compressed and decompressed pages match.
3073
@return TRUE if valid, FALSE if not */
3076
page_zip_validate_low(
3077
/*==================*/
3078
const page_zip_des_t* page_zip,/*!< in: compressed page */
3079
const page_t* page, /*!< in: uncompressed page */
3080
ibool sloppy) /*!< in: FALSE=strict,
3081
TRUE=ignore the MIN_REC_FLAG */
3083
page_zip_des_t temp_page_zip;
3084
byte* temp_page_buf;
3088
if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3089
FIL_PAGE_LSN - FIL_PAGE_PREV)
3090
|| memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
3091
|| memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3092
PAGE_DATA - FIL_PAGE_DATA)) {
3093
page_zip_fail(("page_zip_validate: page header\n"));
3094
page_zip_hexdump(page_zip, sizeof *page_zip);
3095
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3096
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3100
ut_a(page_is_comp(page));
3102
if (page_zip_validate_header_only) {
3106
/* page_zip_decompress() expects the uncompressed page to be
3107
UNIV_PAGE_SIZE aligned. */
3108
temp_page_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
3109
temp_page = ut_align(temp_page_buf, UNIV_PAGE_SIZE);
3111
#ifdef UNIV_DEBUG_VALGRIND
3112
/* Get detailed information on the valid bits in case the
3113
UNIV_MEM_ASSERT_RW() checks fail. The v-bits of page[],
3114
page_zip->data[] or page_zip could be viewed at temp_page[] or
3115
temp_page_zip in a debugger when running valgrind --db-attach. */
3116
VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
3117
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3118
# if UNIV_WORD_SIZE == 4
3119
VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
3120
/* On 32-bit systems, there is no padding in page_zip_des_t.
3121
On other systems, Valgrind could complain about uninitialized
3123
UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
3125
VALGRIND_GET_VBITS(page_zip->data, temp_page,
3126
page_zip_get_size(page_zip));
3127
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3128
#endif /* UNIV_DEBUG_VALGRIND */
3130
temp_page_zip = *page_zip;
3131
valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE);
3133
fputs("page_zip_validate(): failed to decompress\n", stderr);
3136
if (page_zip->n_blobs != temp_page_zip.n_blobs) {
3137
page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
3138
page_zip->n_blobs, temp_page_zip.n_blobs));
3142
if (page_zip->m_start != temp_page_zip.m_start) {
3143
page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
3144
page_zip->m_start, temp_page_zip.m_start));
3147
#endif /* UNIV_DEBUG */
3148
if (page_zip->m_end != temp_page_zip.m_end) {
3149
page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
3150
page_zip->m_end, temp_page_zip.m_end));
3153
if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
3154
page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
3155
page_zip->m_nonempty,
3156
temp_page_zip.m_nonempty));
3159
if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
3160
UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
3162
/* In crash recovery, the "minimum record" flag may be
3163
set incorrectly until the mini-transaction is
3164
committed. Let us tolerate that difference when we
3165
are performing a sloppy validation. */
3168
byte info_bits_diff;
3170
= rec_get_next_offs(page + PAGE_NEW_INFIMUM,
3172
ut_a(offset >= PAGE_NEW_SUPREMUM);
3173
offset -= 5 /* REC_NEW_INFO_BITS */;
3175
info_bits_diff = page[offset] ^ temp_page[offset];
3177
if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
3178
temp_page[offset] = page[offset];
3180
if (!memcmp(page + PAGE_HEADER,
3181
temp_page + PAGE_HEADER,
3182
UNIV_PAGE_SIZE - PAGE_HEADER
3183
- FIL_PAGE_DATA_END)) {
3185
/* Only the minimum record flag
3186
differed. Let us ignore it. */
3187
page_zip_fail(("page_zip_validate: "
3190
"%lu,%lu,0x%02lx)\n",
3191
page_get_space_id(page),
3192
page_get_page_no(page),
3193
(ulong) page[offset]));
3198
page_zip_fail(("page_zip_validate: content\n"));
3204
page_zip_hexdump(page_zip, sizeof *page_zip);
3205
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3206
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3207
page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
3209
ut_free(temp_page_buf);
3213
/**********************************************************************//**
3214
Check that the compressed and decompressed pages match.
3215
@return TRUE if valid, FALSE if not */
3220
const page_zip_des_t* page_zip,/*!< in: compressed page */
3221
const page_t* page) /*!< in: uncompressed page */
3223
return(page_zip_validate_low(page_zip, page,
3224
recv_recovery_is_on()));
3226
#endif /* UNIV_ZIP_DEBUG */
3229
/**********************************************************************//**
3230
Assert that the compressed and decompressed page headers match.
3234
page_zip_header_cmp(
3235
/*================*/
3236
const page_zip_des_t* page_zip,/*!< in: compressed page */
3237
const byte* page) /*!< in: uncompressed page */
3239
ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3240
FIL_PAGE_LSN - FIL_PAGE_PREV));
3241
ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
3243
ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3244
PAGE_DATA - FIL_PAGE_DATA));
3248
#endif /* UNIV_DEBUG */
3250
/**********************************************************************//**
3251
Write a record on the compressed page that contains externally stored
3252
columns. The data must already have been written to the uncompressed page.
3253
@return end of modification log */
3256
page_zip_write_rec_ext(
3257
/*===================*/
3258
page_zip_des_t* page_zip, /*!< in/out: compressed page */
3259
const page_t* page, /*!< in: page containing rec */
3260
const byte* rec, /*!< in: record being written */
3261
dict_index_t* index, /*!< in: record descriptor */
3262
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
3263
ulint create, /*!< in: nonzero=insert, zero=update */
3264
ulint trx_id_col, /*!< in: position of DB_TRX_ID */
3265
ulint heap_no, /*!< in: heap number of rec */
3266
byte* storage, /*!< in: end of dense page directory */
3267
byte* data) /*!< in: end of modification log */
3269
const byte* start = rec;
3272
byte* externs = storage;
3273
ulint n_ext = rec_offs_n_extern(offsets);
3275
ut_ad(rec_offs_validate(rec, index, offsets));
3276
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3277
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3278
rec_offs_extra_size(offsets));
3280
externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3281
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
3283
/* Note that this will not take into account
3284
the BLOB columns of rec if create==TRUE. */
3285
ut_ad(data + rec_offs_data_size(offsets)
3286
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3287
- n_ext * BTR_EXTERN_FIELD_REF_SIZE
3288
< externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
3291
ulint blob_no = page_zip_get_n_prev_extern(
3292
page_zip, rec, index);
3293
byte* ext_end = externs - page_zip->n_blobs
3294
* BTR_EXTERN_FIELD_REF_SIZE;
3295
ut_ad(blob_no <= page_zip->n_blobs);
3296
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
3299
page_zip->n_blobs += n_ext;
3300
ASSERT_ZERO_BLOB(ext_end - n_ext
3301
* BTR_EXTERN_FIELD_REF_SIZE);
3302
memmove(ext_end - n_ext
3303
* BTR_EXTERN_FIELD_REF_SIZE,
3308
ut_a(blob_no + n_ext <= page_zip->n_blobs);
3311
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3314
if (UNIV_UNLIKELY(i == trx_id_col)) {
3315
ut_ad(!rec_offs_nth_extern(offsets,
3317
ut_ad(!rec_offs_nth_extern(offsets,
3319
/* Locate trx_id and roll_ptr. */
3320
src = rec_get_nth_field(rec, offsets,
3322
ut_ad(len == DATA_TRX_ID_LEN);
3323
ut_ad(src + DATA_TRX_ID_LEN
3324
== rec_get_nth_field(
3327
ut_ad(len == DATA_ROLL_PTR_LEN);
3329
/* Log the preceding fields. */
3330
ASSERT_ZERO(data, src - start);
3331
memcpy(data, start, src - start);
3332
data += src - start;
3333
start = src + (DATA_TRX_ID_LEN
3334
+ DATA_ROLL_PTR_LEN);
3336
/* Store trx_id and roll_ptr. */
3337
memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3339
src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3340
i++; /* skip also roll_ptr */
3341
} else if (rec_offs_nth_extern(offsets, i)) {
3342
src = rec_get_nth_field(rec, offsets,
3345
ut_ad(dict_index_is_clust(index));
3347
>= BTR_EXTERN_FIELD_REF_SIZE);
3348
src += len - BTR_EXTERN_FIELD_REF_SIZE;
3350
ASSERT_ZERO(data, src - start);
3351
memcpy(data, start, src - start);
3352
data += src - start;
3353
start = src + BTR_EXTERN_FIELD_REF_SIZE;
3355
/* Store the BLOB pointer. */
3356
externs -= BTR_EXTERN_FIELD_REF_SIZE;
3357
ut_ad(data < externs);
3358
memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
3362
/* Log the last bytes of the record. */
3363
len = rec_offs_data_size(offsets) - (start - rec);
3365
ASSERT_ZERO(data, len);
3366
memcpy(data, start, len);
3372
/**********************************************************************//**
3373
Write an entire record on the compressed page. The data must already
3374
have been written to the uncompressed page. */
3379
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3380
const byte* rec, /*!< in: record being written */
3381
dict_index_t* index, /*!< in: the index the record belongs to */
3382
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3383
ulint create) /*!< in: nonzero=insert, zero=update */
3391
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3392
ut_ad(page_zip_simple_validate(page_zip));
3393
ut_ad(page_zip_get_size(page_zip)
3394
> PAGE_DATA + page_zip_dir_size(page_zip));
3395
ut_ad(rec_offs_comp(offsets));
3396
ut_ad(rec_offs_validate(rec, index, offsets));
3398
ut_ad(page_zip->m_start >= PAGE_DATA);
3400
page = page_align(rec);
3402
ut_ad(page_zip_header_cmp(page_zip, page));
3403
ut_ad(page_simple_validate_new((page_t*) page));
3405
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3406
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3407
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3408
rec_offs_extra_size(offsets));
3410
slot = page_zip_dir_find(page_zip, page_offset(rec));
3412
/* Copy the delete mark. */
3413
if (rec_get_deleted_flag(rec, TRUE)) {
3414
*slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
3416
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3419
ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
3420
ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
3421
- PAGE_DIR - PAGE_DIR_SLOT_SIZE
3422
* page_dir_get_n_slots(page));
3424
heap_no = rec_get_heap_no_new(rec);
3425
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
3426
ut_ad(heap_no < page_dir_get_n_heap(page));
3428
/* Append to the modification log. */
3429
data = page_zip->data + page_zip->m_end;
3432
/* Identify the record by writing its heap number - 1.
3433
0 is reserved to indicate the end of the modification log. */
3435
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3436
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3439
*data++ = (byte) ((heap_no - 1) << 1);
3443
const byte* start = rec - rec_offs_extra_size(offsets);
3444
const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
3446
/* Write the extra bytes backwards, so that
3447
rec_offs_extra_size() can be easily computed in
3448
page_zip_apply_log() by invoking
3449
rec_get_offsets_reverse(). */
3451
while (b != start) {
3457
/* Write the data bytes. Store the uncompressed bytes separately. */
3458
storage = page_zip->data + page_zip_get_size(page_zip)
3459
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3460
* PAGE_ZIP_DIR_SLOT_SIZE;
3462
if (page_is_leaf(page)) {
3465
if (dict_index_is_clust(index)) {
3468
trx_id_col = dict_index_get_sys_col_pos(index,
3470
ut_ad(trx_id_col != ULINT_UNDEFINED);
3472
/* Store separately trx_id, roll_ptr and
3473
the BTR_EXTERN_FIELD_REF of each BLOB column. */
3474
if (rec_offs_any_extern(offsets)) {
3475
data = page_zip_write_rec_ext(
3477
rec, index, offsets, create,
3478
trx_id_col, heap_no, storage, data);
3480
/* Locate trx_id and roll_ptr. */
3482
= rec_get_nth_field(rec, offsets,
3484
ut_ad(len == DATA_TRX_ID_LEN);
3485
ut_ad(src + DATA_TRX_ID_LEN
3486
== rec_get_nth_field(
3488
trx_id_col + 1, &len));
3489
ut_ad(len == DATA_ROLL_PTR_LEN);
3491
/* Log the preceding fields. */
3492
ASSERT_ZERO(data, src - rec);
3493
memcpy(data, rec, src - rec);
3496
/* Store trx_id and roll_ptr. */
3498
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3501
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3503
src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
3505
/* Log the last bytes of the record. */
3506
len = rec_offs_data_size(offsets)
3509
ASSERT_ZERO(data, len);
3510
memcpy(data, src, len);
3514
/* Leaf page of a secondary index:
3515
no externally stored columns */
3516
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
3517
== ULINT_UNDEFINED);
3518
ut_ad(!rec_offs_any_extern(offsets));
3520
/* Log the entire record. */
3521
len = rec_offs_data_size(offsets);
3523
ASSERT_ZERO(data, len);
3524
memcpy(data, rec, len);
3528
/* This is a node pointer page. */
3531
/* Non-leaf nodes should not have any externally
3533
ut_ad(!rec_offs_any_extern(offsets));
3535
/* Copy the data bytes, except node_ptr. */
3536
len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
3537
ut_ad(data + len < storage - REC_NODE_PTR_SIZE
3538
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
3539
ASSERT_ZERO(data, len);
3540
memcpy(data, rec, len);
3543
/* Copy the node pointer to the uncompressed area. */
3544
memcpy(storage - REC_NODE_PTR_SIZE
3551
ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
3552
page_zip->m_end = data - page_zip->data;
3553
page_zip->m_nonempty = TRUE;
3555
#ifdef UNIV_ZIP_DEBUG
3556
ut_a(page_zip_validate(page_zip, page_align(rec)));
3557
#endif /* UNIV_ZIP_DEBUG */
3560
/***********************************************************//**
3561
Parses a log record of writing a BLOB pointer of a record.
3562
@return end of log record or NULL */
3565
page_zip_parse_write_blob_ptr(
3566
/*==========================*/
3567
byte* ptr, /*!< in: redo log buffer */
3568
byte* end_ptr,/*!< in: redo log buffer end */
3569
page_t* page, /*!< in/out: uncompressed page */
3570
page_zip_des_t* page_zip)/*!< in/out: compressed page */
3575
ut_ad(!page == !page_zip);
3578
(end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
3583
offset = mach_read_from_2(ptr);
3584
z_offset = mach_read_from_2(ptr + 2);
3586
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3587
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3588
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3590
recv_sys->found_corrupt_log = TRUE;
3596
if (UNIV_UNLIKELY(!page_zip)
3597
|| UNIV_UNLIKELY(!page_is_leaf(page))) {
3602
#ifdef UNIV_ZIP_DEBUG
3603
ut_a(page_zip_validate(page_zip, page));
3604
#endif /* UNIV_ZIP_DEBUG */
3606
memcpy(page + offset,
3607
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3608
memcpy(page_zip->data + z_offset,
3609
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3611
#ifdef UNIV_ZIP_DEBUG
3612
ut_a(page_zip_validate(page_zip, page));
3613
#endif /* UNIV_ZIP_DEBUG */
3616
return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
3619
/**********************************************************************//**
3620
Write a BLOB pointer of a record on the leaf page of a clustered index.
3621
The information must already have been updated on the uncompressed page. */
3624
page_zip_write_blob_ptr(
3625
/*====================*/
3626
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3627
const byte* rec, /*!< in/out: record whose data is being
3629
dict_index_t* index, /*!< in: index of the page */
3630
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3631
ulint n, /*!< in: column index */
3632
mtr_t* mtr) /*!< in: mini-transaction handle,
3633
or NULL if no logging is needed */
3637
const page_t* page = page_align(rec);
3641
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3642
ut_ad(page_simple_validate_new((page_t*) page));
3643
ut_ad(page_zip_simple_validate(page_zip));
3644
ut_ad(page_zip_get_size(page_zip)
3645
> PAGE_DATA + page_zip_dir_size(page_zip));
3646
ut_ad(rec_offs_comp(offsets));
3647
ut_ad(rec_offs_validate(rec, NULL, offsets));
3648
ut_ad(rec_offs_any_extern(offsets));
3649
ut_ad(rec_offs_nth_extern(offsets, n));
3651
ut_ad(page_zip->m_start >= PAGE_DATA);
3652
ut_ad(page_zip_header_cmp(page_zip, page));
3654
ut_ad(page_is_leaf(page));
3655
ut_ad(dict_index_is_clust(index));
3657
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3658
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3659
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3660
rec_offs_extra_size(offsets));
3662
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
3663
+ rec_get_n_extern_new(rec, index, n);
3664
ut_a(blob_no < page_zip->n_blobs);
3666
externs = page_zip->data + page_zip_get_size(page_zip)
3667
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3668
* (PAGE_ZIP_DIR_SLOT_SIZE
3669
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3671
field = rec_get_nth_field(rec, offsets, n, &len);
3673
externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
3674
field += len - BTR_EXTERN_FIELD_REF_SIZE;
3676
memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
3678
#ifdef UNIV_ZIP_DEBUG
3679
ut_a(page_zip_validate(page_zip, page));
3680
#endif /* UNIV_ZIP_DEBUG */
3683
#ifndef UNIV_HOTBACKUP
3684
byte* log_ptr = mlog_open(
3685
mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
3686
if (UNIV_UNLIKELY(!log_ptr)) {
3690
log_ptr = mlog_write_initial_log_record_fast(
3691
(byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
3692
mach_write_to_2(log_ptr, page_offset(field));
3694
mach_write_to_2(log_ptr, externs - page_zip->data);
3696
memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
3697
log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
3698
mlog_close(mtr, log_ptr);
3699
#endif /* !UNIV_HOTBACKUP */
3703
/***********************************************************//**
3704
Parses a log record of writing the node pointer of a record.
3705
@return end of log record or NULL */
3708
page_zip_parse_write_node_ptr(
3709
/*==========================*/
3710
byte* ptr, /*!< in: redo log buffer */
3711
byte* end_ptr,/*!< in: redo log buffer end */
3712
page_t* page, /*!< in/out: uncompressed page */
3713
page_zip_des_t* page_zip)/*!< in/out: compressed page */
3718
ut_ad(!page == !page_zip);
3720
if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
3725
offset = mach_read_from_2(ptr);
3726
z_offset = mach_read_from_2(ptr + 2);
3728
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3729
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3730
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3732
recv_sys->found_corrupt_log = TRUE;
3743
if (UNIV_UNLIKELY(!page_zip)
3744
|| UNIV_UNLIKELY(page_is_leaf(page))) {
3749
#ifdef UNIV_ZIP_DEBUG
3750
ut_a(page_zip_validate(page_zip, page));
3751
#endif /* UNIV_ZIP_DEBUG */
3753
field = page + offset;
3754
storage = page_zip->data + z_offset;
3756
storage_end = page_zip->data + page_zip_get_size(page_zip)
3757
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3758
* PAGE_ZIP_DIR_SLOT_SIZE;
3760
heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
3762
if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
3763
|| UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
3764
|| UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
3769
memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
3770
memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
3772
#ifdef UNIV_ZIP_DEBUG
3773
ut_a(page_zip_validate(page_zip, page));
3774
#endif /* UNIV_ZIP_DEBUG */
3777
return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
3780
/**********************************************************************//**
3781
Write the node pointer of a record on a non-leaf compressed page. */
3784
page_zip_write_node_ptr(
3785
/*====================*/
3786
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3787
byte* rec, /*!< in/out: record */
3788
ulint size, /*!< in: data size of rec */
3789
ulint ptr, /*!< in: node pointer */
3790
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
3794
page_t* page = page_align(rec);
3796
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3797
ut_ad(page_simple_validate_new(page));
3798
ut_ad(page_zip_simple_validate(page_zip));
3799
ut_ad(page_zip_get_size(page_zip)
3800
> PAGE_DATA + page_zip_dir_size(page_zip));
3801
ut_ad(page_rec_is_comp(rec));
3803
ut_ad(page_zip->m_start >= PAGE_DATA);
3804
ut_ad(page_zip_header_cmp(page_zip, page));
3806
ut_ad(!page_is_leaf(page));
3808
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3809
UNIV_MEM_ASSERT_RW(rec, size);
3811
storage = page_zip->data + page_zip_get_size(page_zip)
3812
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3813
* PAGE_ZIP_DIR_SLOT_SIZE
3814
- (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
3815
field = rec + size - REC_NODE_PTR_SIZE;
3817
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3818
ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
3819
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3820
#if REC_NODE_PTR_SIZE != 4
3821
# error "REC_NODE_PTR_SIZE != 4"
3823
mach_write_to_4(field, ptr);
3824
memcpy(storage, field, REC_NODE_PTR_SIZE);
3827
#ifndef UNIV_HOTBACKUP
3828
byte* log_ptr = mlog_open(mtr,
3829
11 + 2 + 2 + REC_NODE_PTR_SIZE);
3830
if (UNIV_UNLIKELY(!log_ptr)) {
3834
log_ptr = mlog_write_initial_log_record_fast(
3835
field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
3836
mach_write_to_2(log_ptr, page_offset(field));
3838
mach_write_to_2(log_ptr, storage - page_zip->data);
3840
memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
3841
log_ptr += REC_NODE_PTR_SIZE;
3842
mlog_close(mtr, log_ptr);
3843
#endif /* !UNIV_HOTBACKUP */
3847
/**********************************************************************//**
3848
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
3851
page_zip_write_trx_id_and_roll_ptr(
3852
/*===============================*/
3853
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3854
byte* rec, /*!< in/out: record */
3855
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3856
ulint trx_id_col,/*!< in: column number of TRX_ID in rec */
3857
trx_id_t trx_id, /*!< in: transaction identifier */
3858
roll_ptr_t roll_ptr)/*!< in: roll_ptr */
3862
page_t* page = page_align(rec);
3865
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3866
ut_ad(page_simple_validate_new(page));
3867
ut_ad(page_zip_simple_validate(page_zip));
3868
ut_ad(page_zip_get_size(page_zip)
3869
> PAGE_DATA + page_zip_dir_size(page_zip));
3870
ut_ad(rec_offs_validate(rec, NULL, offsets));
3871
ut_ad(rec_offs_comp(offsets));
3873
ut_ad(page_zip->m_start >= PAGE_DATA);
3874
ut_ad(page_zip_header_cmp(page_zip, page));
3876
ut_ad(page_is_leaf(page));
3878
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3880
storage = page_zip->data + page_zip_get_size(page_zip)
3881
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3882
* PAGE_ZIP_DIR_SLOT_SIZE
3883
- (rec_get_heap_no_new(rec) - 1)
3884
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3886
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
3887
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
3889
field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
3890
ut_ad(len == DATA_TRX_ID_LEN);
3891
ut_ad(field + DATA_TRX_ID_LEN
3892
== rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
3893
ut_ad(len == DATA_ROLL_PTR_LEN);
3894
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3895
ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
3896
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3897
#if DATA_TRX_ID_LEN != 6
3898
# error "DATA_TRX_ID_LEN != 6"
3900
mach_write_to_6(field, trx_id);
3901
#if DATA_ROLL_PTR_LEN != 7
3902
# error "DATA_ROLL_PTR_LEN != 7"
3904
mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
3905
memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3907
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3908
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3909
rec_offs_extra_size(offsets));
3910
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3913
#ifdef UNIV_ZIP_DEBUG
3914
/** Set this variable in a debugger to disable page_zip_clear_rec().
3915
The only observable effect should be the compression ratio due to
3916
deleted records not being zeroed out. In rare cases, there can be
3917
page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
3918
columns if the space is reallocated for a smaller record. */
3919
UNIV_INTERN ibool page_zip_clear_rec_disable;
3920
#endif /* UNIV_ZIP_DEBUG */
3922
/**********************************************************************//**
3923
Clear an area on the uncompressed and compressed page, if possible. */
3928
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3929
byte* rec, /*!< in: record to clear */
3930
dict_index_t* index, /*!< in: index of rec */
3931
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
3934
page_t* page = page_align(rec);
3935
/* page_zip_validate() would fail here if a record
3936
containing externally stored columns is being deleted. */
3937
ut_ad(rec_offs_validate(rec, index, offsets));
3938
ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
3939
ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
3940
ut_ad(page_zip_header_cmp(page_zip, page));
3942
heap_no = rec_get_heap_no_new(rec);
3943
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
3945
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3946
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3947
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3948
rec_offs_extra_size(offsets));
3951
#ifdef UNIV_ZIP_DEBUG
3952
!page_zip_clear_rec_disable &&
3953
#endif /* UNIV_ZIP_DEBUG */
3955
+ 1 + ((heap_no - 1) >= 64)/* size of the log entry */
3956
+ page_zip_get_trailer_len(page_zip,
3957
dict_index_is_clust(index), NULL)
3958
< page_zip_get_size(page_zip)) {
3961
/* Clear only the data bytes, because the allocator and
3962
the decompressor depend on the extra bytes. */
3963
memset(rec, 0, rec_offs_data_size(offsets));
3965
if (!page_is_leaf(page)) {
3966
/* Clear node_ptr on the compressed page. */
3967
byte* storage = page_zip->data
3968
+ page_zip_get_size(page_zip)
3969
- (page_dir_get_n_heap(page)
3970
- PAGE_HEAP_NO_USER_LOW)
3971
* PAGE_ZIP_DIR_SLOT_SIZE;
3973
memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
3974
0, REC_NODE_PTR_SIZE);
3975
} else if (dict_index_is_clust(index)) {
3976
/* Clear trx_id and roll_ptr on the compressed page. */
3977
byte* storage = page_zip->data
3978
+ page_zip_get_size(page_zip)
3979
- (page_dir_get_n_heap(page)
3980
- PAGE_HEAP_NO_USER_LOW)
3981
* PAGE_ZIP_DIR_SLOT_SIZE;
3983
memset(storage - (heap_no - 1)
3984
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
3985
0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3988
/* Log that the data was zeroed out. */
3989
data = page_zip->data + page_zip->m_end;
3991
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3992
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3995
*data++ = (byte) ((heap_no - 1) << 1 | 1);
3997
ut_ad((ulint) (data - page_zip->data)
3998
< page_zip_get_size(page_zip));
3999
page_zip->m_end = data - page_zip->data;
4000
page_zip->m_nonempty = TRUE;
4001
} else if (page_is_leaf(page) && dict_index_is_clust(index)) {
4002
/* Do not clear the record, because there is not enough space
4003
to log the operation. */
4005
if (rec_offs_any_extern(offsets)) {
4008
for (i = rec_offs_n_fields(offsets); i--; ) {
4009
/* Clear all BLOB pointers in order to make
4010
page_zip_validate() pass. */
4011
if (rec_offs_nth_extern(offsets, i)) {
4013
byte* field = rec_get_nth_field(
4014
rec, offsets, i, &len);
4016
- BTR_EXTERN_FIELD_REF_SIZE,
4017
0, BTR_EXTERN_FIELD_REF_SIZE);
4023
#ifdef UNIV_ZIP_DEBUG
4024
ut_a(page_zip_validate(page_zip, page));
4025
#endif /* UNIV_ZIP_DEBUG */
4028
/**********************************************************************//**
4029
Write the "deleted" flag of a record on a compressed page. The flag must
4030
already have been written on the uncompressed page. */
4033
page_zip_rec_set_deleted(
4034
/*=====================*/
4035
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4036
const byte* rec, /*!< in: record on the uncompressed page */
4037
ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
4039
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
4041
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4043
*slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
4045
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
4047
#ifdef UNIV_ZIP_DEBUG
4048
ut_a(page_zip_validate(page_zip, page_align(rec)));
4049
#endif /* UNIV_ZIP_DEBUG */
4052
/**********************************************************************//**
4053
Write the "owned" flag of a record on a compressed page. The n_owned field
4054
must already have been written on the uncompressed page. */
4057
page_zip_rec_set_owned(
4058
/*===================*/
4059
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4060
const byte* rec, /*!< in: record on the uncompressed page */
4061
ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
4063
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
4065
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4067
*slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4069
*slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4073
/**********************************************************************//**
4074
Insert a record to the dense page directory. */
4077
page_zip_dir_insert(
4078
/*================*/
4079
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4080
const byte* prev_rec,/*!< in: record after which to insert */
4081
const byte* free_rec,/*!< in: record from which rec was
4082
allocated, or NULL */
4083
byte* rec) /*!< in: record to insert */
4089
ut_ad(prev_rec != rec);
4090
ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
4091
ut_ad(page_zip_simple_validate(page_zip));
4093
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4095
if (page_rec_is_infimum(prev_rec)) {
4096
/* Use the first slot. */
4097
slot_rec = page_zip->data + page_zip_get_size(page_zip);
4099
byte* end = page_zip->data + page_zip_get_size(page_zip);
4100
byte* start = end - page_zip_dir_user_size(page_zip);
4102
if (UNIV_LIKELY(!free_rec)) {
4103
/* PAGE_N_RECS was already incremented
4104
in page_cur_insert_rec_zip(), but the
4105
dense directory slot at that position
4106
contains garbage. Skip it. */
4107
start += PAGE_ZIP_DIR_SLOT_SIZE;
4110
slot_rec = page_zip_dir_find_low(start, end,
4111
page_offset(prev_rec));
4115
/* Read the old n_dense (n_heap may have been incremented). */
4116
n_dense = page_dir_get_n_heap(page_zip->data)
4117
- (PAGE_HEAP_NO_USER_LOW + 1);
4119
if (UNIV_LIKELY_NULL(free_rec)) {
4120
/* The record was allocated from the free list.
4121
Shift the dense directory only up to that slot.
4122
Note that in this case, n_dense is actually
4123
off by one, because page_cur_insert_rec_zip()
4124
did not increment n_heap. */
4125
ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
4126
+ PAGE_HEAP_NO_USER_LOW);
4127
ut_ad(rec >= free_rec);
4128
slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
4130
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4132
/* The record was allocated from the heap.
4133
Shift the entire dense directory. */
4134
ut_ad(rec_get_heap_no_new(rec) == n_dense
4135
+ PAGE_HEAP_NO_USER_LOW);
4137
/* Shift to the end of the dense page directory. */
4138
slot_free = page_zip->data + page_zip_get_size(page_zip)
4139
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4142
/* Shift the dense directory to allocate place for rec. */
4143
memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
4144
slot_rec - slot_free);
4146
/* Write the entry for the inserted record.
4147
The "owned" and "deleted" flags must be zero. */
4148
mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
4151
/**********************************************************************//**
4152
Shift the dense page directory and the array of BLOB pointers
4153
when a record is deleted. */
4156
page_zip_dir_delete(
4157
/*================*/
4158
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4159
byte* rec, /*!< in: record to delete */
4160
dict_index_t* index, /*!< in: index of rec */
4161
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
4162
const byte* free) /*!< in: previous start of the free list */
4167
page_t* page = page_align(rec);
4169
ut_ad(rec_offs_validate(rec, index, offsets));
4170
ut_ad(rec_offs_comp(offsets));
4172
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4173
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4174
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4175
rec_offs_extra_size(offsets));
4177
slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
4181
/* This could not be done before page_zip_dir_find(). */
4182
page_header_set_field(page, page_zip, PAGE_N_RECS,
4183
(ulint)(page_get_n_recs(page) - 1));
4185
if (UNIV_UNLIKELY(!free)) {
4186
/* Make the last slot the start of the free list. */
4187
slot_free = page_zip->data + page_zip_get_size(page_zip)
4188
- PAGE_ZIP_DIR_SLOT_SIZE
4189
* (page_dir_get_n_heap(page_zip->data)
4190
- PAGE_HEAP_NO_USER_LOW);
4192
slot_free = page_zip_dir_find_free(page_zip,
4194
ut_a(slot_free < slot_rec);
4195
/* Grow the free list by one slot by moving the start. */
4196
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4199
if (UNIV_LIKELY(slot_rec > slot_free)) {
4200
memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
4202
slot_rec - slot_free);
4205
/* Write the entry for the deleted record.
4206
The "owned" and "deleted" flags will be cleared. */
4207
mach_write_to_2(slot_free, page_offset(rec));
4209
if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
4210
ut_ad(!rec_offs_any_extern(offsets));
4214
n_ext = rec_offs_n_extern(offsets);
4215
if (UNIV_UNLIKELY(n_ext)) {
4216
/* Shift and zero fill the array of BLOB pointers. */
4221
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
4222
ut_a(blob_no + n_ext <= page_zip->n_blobs);
4224
externs = page_zip->data + page_zip_get_size(page_zip)
4225
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
4226
* (PAGE_ZIP_DIR_SLOT_SIZE
4227
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4229
ext_end = externs - page_zip->n_blobs
4230
* BTR_EXTERN_FIELD_REF_SIZE;
4231
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
4233
page_zip->n_blobs -= n_ext;
4234
/* Shift and zero fill the array. */
4235
memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
4236
(page_zip->n_blobs - blob_no)
4237
* BTR_EXTERN_FIELD_REF_SIZE);
4238
memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
4242
/* The compression algorithm expects info_bits and n_owned
4243
to be 0 for deleted records. */
4244
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
4246
page_zip_clear_rec(page_zip, rec, index, offsets);
4249
/**********************************************************************//**
4250
Add a slot to the dense page directory. */
4253
page_zip_dir_add_slot(
4254
/*==================*/
4255
page_zip_des_t* page_zip, /*!< in/out: compressed page */
4256
ulint is_clustered) /*!< in: nonzero for clustered index,
4263
ut_ad(page_is_comp(page_zip->data));
4264
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4266
/* Read the old n_dense (n_heap has already been incremented). */
4267
n_dense = page_dir_get_n_heap(page_zip->data)
4268
- (PAGE_HEAP_NO_USER_LOW + 1);
4270
dir = page_zip->data + page_zip_get_size(page_zip)
4271
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4273
if (!page_is_leaf(page_zip->data)) {
4274
ut_ad(!page_zip->n_blobs);
4275
stored = dir - n_dense * REC_NODE_PTR_SIZE;
4276
} else if (UNIV_UNLIKELY(is_clustered)) {
4277
/* Move the BLOB pointer array backwards to make space for the
4278
roll_ptr and trx_id columns and the dense directory slot. */
4281
stored = dir - n_dense
4282
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4284
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4286
- (PAGE_ZIP_DIR_SLOT_SIZE
4287
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4288
PAGE_ZIP_DIR_SLOT_SIZE
4289
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4290
memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
4291
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4292
externs, stored - externs);
4295
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4296
ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
4297
PAGE_ZIP_DIR_SLOT_SIZE);
4300
/* Move the uncompressed area backwards to make space
4301
for one directory slot. */
4302
memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
4305
/***********************************************************//**
4306
Parses a log record of writing to the header of a page.
4307
@return end of log record or NULL */
4310
page_zip_parse_write_header(
4311
/*========================*/
4312
byte* ptr, /*!< in: redo log buffer */
4313
byte* end_ptr,/*!< in: redo log buffer end */
4314
page_t* page, /*!< in/out: uncompressed page */
4315
page_zip_des_t* page_zip)/*!< in/out: compressed page */
4320
ut_ad(ptr && end_ptr);
4321
ut_ad(!page == !page_zip);
4323
if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
4328
offset = (ulint) *ptr++;
4329
len = (ulint) *ptr++;
4331
if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
4333
recv_sys->found_corrupt_log = TRUE;
4338
if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
4344
if (UNIV_UNLIKELY(!page_zip)) {
4348
#ifdef UNIV_ZIP_DEBUG
4349
ut_a(page_zip_validate(page_zip, page));
4350
#endif /* UNIV_ZIP_DEBUG */
4352
memcpy(page + offset, ptr, len);
4353
memcpy(page_zip->data + offset, ptr, len);
4355
#ifdef UNIV_ZIP_DEBUG
4356
ut_a(page_zip_validate(page_zip, page));
4357
#endif /* UNIV_ZIP_DEBUG */
4363
#ifndef UNIV_HOTBACKUP
4364
/**********************************************************************//**
4365
Write a log record of writing to the uncompressed header portion of a page. */
4368
page_zip_write_header_log(
4369
/*======================*/
4370
const byte* data, /*!< in: data on the uncompressed page */
4371
ulint length, /*!< in: length of the data */
4372
mtr_t* mtr) /*!< in: mini-transaction */
4374
byte* log_ptr = mlog_open(mtr, 11 + 1 + 1);
4375
ulint offset = page_offset(data);
4377
ut_ad(offset < PAGE_DATA);
4378
ut_ad(offset + length < PAGE_DATA);
4380
# error "PAGE_DATA > 255"
4382
ut_ad(length < 256);
4384
/* If no logging is requested, we may return now */
4385
if (UNIV_UNLIKELY(!log_ptr)) {
4390
log_ptr = mlog_write_initial_log_record_fast(
4391
(byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
4392
*log_ptr++ = (byte) offset;
4393
*log_ptr++ = (byte) length;
4394
mlog_close(mtr, log_ptr);
4396
mlog_catenate_string(mtr, data, length);
4398
#endif /* !UNIV_HOTBACKUP */
4400
/**********************************************************************//**
4401
Reorganize and compress a page. This is a low-level operation for
4402
compressed pages, to be used when page_zip_compress() fails.
4403
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
4404
The function btr_page_reorganize() should be preferred whenever possible.
4405
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
4406
non-clustered index, the caller must update the insert buffer free
4407
bits in the same mini-transaction in such a way that the modification
4408
will be redo-logged.
4409
@return TRUE on success, FALSE on failure; page_zip will be left
4410
intact on failure, but page will be overwritten. */
4413
page_zip_reorganize(
4414
/*================*/
4415
buf_block_t* block, /*!< in/out: page with compressed page;
4416
on the compressed page, in: size;
4418
m_start, m_end, m_nonempty */
4419
dict_index_t* index, /*!< in: index of the B-tree node */
4420
mtr_t* mtr) /*!< in: mini-transaction */
4422
buf_pool_t* buf_pool = buf_pool_from_block(block);
4423
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
4424
page_t* page = buf_block_get_frame(block);
4425
buf_block_t* temp_block;
4429
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4430
ut_ad(page_is_comp(page));
4431
ut_ad(!dict_index_is_ibuf(index));
4432
/* Note that page_zip_validate(page_zip, page) may fail here. */
4433
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
4434
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4436
/* Disable logging */
4437
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
4439
#ifndef UNIV_HOTBACKUP
4440
temp_block = buf_block_alloc(buf_pool, 0);
4441
btr_search_drop_page_hash_index(block);
4442
block->check_index_page_at_flush = TRUE;
4443
#else /* !UNIV_HOTBACKUP */
4444
ut_ad(block == back_block1);
4445
temp_block = back_block2;
4446
#endif /* !UNIV_HOTBACKUP */
4447
temp_page = temp_block->frame;
4449
/* Copy the old page to temporary space */
4450
buf_frame_copy(temp_page, page);
4452
/* Recreate the page: note that global data on page (possible
4453
segment headers, next page-field, etc.) is preserved intact */
4455
page_create(block, mtr, TRUE);
4457
/* Copy the records from the temporary space to the recreated page;
4458
do not copy the lock bits yet */
4460
page_copy_rec_list_end_no_locks(block, temp_block,
4461
page_get_infimum_rec(temp_page),
4464
if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
4465
/* Copy max trx id to recreated page */
4466
trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
4467
page_set_max_trx_id(block, NULL, max_trx_id, NULL);
4468
ut_ad(max_trx_id != 0);
4471
/* Restore logging. */
4472
mtr_set_log_mode(mtr, log_mode);
4474
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
4476
#ifndef UNIV_HOTBACKUP
4477
buf_block_free(temp_block);
4478
#endif /* !UNIV_HOTBACKUP */
4482
lock_move_reorganize_page(block, temp_block);
4484
#ifndef UNIV_HOTBACKUP
4485
buf_block_free(temp_block);
4486
#endif /* !UNIV_HOTBACKUP */
4490
#ifndef UNIV_HOTBACKUP
4491
/**********************************************************************//**
4492
Copy the records of a page byte for byte. Do not copy the page header
4493
or trailer, except those B-tree header fields that are directly
4494
related to the storage of records. Also copy PAGE_MAX_TRX_ID.
4495
NOTE: The caller must update the lock table and the adaptive hash index. */
4500
page_zip_des_t* page_zip, /*!< out: copy of src_zip
4501
(n_blobs, m_start, m_end,
4502
m_nonempty, data[0..size-1]) */
4503
page_t* page, /*!< out: copy of src */
4504
const page_zip_des_t* src_zip, /*!< in: compressed page */
4505
const page_t* src, /*!< in: page */
4506
dict_index_t* index, /*!< in: index of the B-tree */
4507
mtr_t* mtr) /*!< in: mini-transaction */
4509
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
4510
ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
4511
ut_ad(!dict_index_is_ibuf(index));
4512
#ifdef UNIV_ZIP_DEBUG
4513
/* The B-tree operations that call this function may set
4514
FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
4515
mismatch. A strict page_zip_validate() will be executed later
4516
during the B-tree operations. */
4517
ut_a(page_zip_validate_low(src_zip, src, TRUE));
4518
#endif /* UNIV_ZIP_DEBUG */
4519
ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
4520
if (UNIV_UNLIKELY(src_zip->n_blobs)) {
4521
ut_a(page_is_leaf(src));
4522
ut_a(dict_index_is_clust(index));
4525
/* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
4526
indexes. It does not matter on other pages. */
4527
ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
4528
|| page_get_max_trx_id(src));
4530
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
4531
UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
4532
UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
4533
UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
4535
/* Copy those B-tree page header fields that are related to
4536
the records stored in the page. Also copy the field
4537
PAGE_MAX_TRX_ID. Skip the rest of the page header and
4538
trailer. On the compressed page, there is no trailer. */
4539
#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
4540
# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
4542
memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
4543
PAGE_HEADER_PRIV_END);
4544
memcpy(PAGE_DATA + page, PAGE_DATA + src,
4545
UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
4546
memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
4547
PAGE_HEADER_PRIV_END);
4548
memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
4549
page_zip_get_size(page_zip) - PAGE_DATA);
4551
/* Copy all fields of src_zip to page_zip, except the pointer
4552
to the compressed data page. */
4554
page_zip_t* data = page_zip->data;
4555
memcpy(page_zip, src_zip, sizeof *page_zip);
4556
page_zip->data = data;
4558
ut_ad(page_zip_get_trailer_len(page_zip,
4559
dict_index_is_clust(index), NULL)
4560
+ page_zip->m_end < page_zip_get_size(page_zip));
4562
if (!page_is_leaf(src)
4563
&& UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
4564
&& UNIV_LIKELY(mach_read_from_4(page
4565
+ FIL_PAGE_PREV) != FIL_NULL)) {
4566
/* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
4567
ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
4569
if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
4570
rec_t* rec = page + offs;
4571
ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
4572
& REC_INFO_MIN_REC_FLAG);
4573
rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
4577
#ifdef UNIV_ZIP_DEBUG
4578
ut_a(page_zip_validate(page_zip, page));
4579
#endif /* UNIV_ZIP_DEBUG */
4581
page_zip_compress_write_log(page_zip, page, index, mtr);
4583
#endif /* !UNIV_HOTBACKUP */
4585
/**********************************************************************//**
4586
Parses a log record of compressing an index page.
4587
@return end of log record or NULL */
4590
page_zip_parse_compress(
4591
/*====================*/
4592
byte* ptr, /*!< in: buffer */
4593
byte* end_ptr,/*!< in: buffer end */
4594
page_t* page, /*!< out: uncompressed page */
4595
page_zip_des_t* page_zip)/*!< out: compressed page */
4600
ut_ad(ptr && end_ptr);
4601
ut_ad(!page == !page_zip);
4603
if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
4608
size = mach_read_from_2(ptr);
4610
trailer_size = mach_read_from_2(ptr);
4613
if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
4619
if (UNIV_UNLIKELY(!page_zip)
4620
|| UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
4622
recv_sys->found_corrupt_log = TRUE;
4627
memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
4628
memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
4629
memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
4630
memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
4631
page_zip_get_size(page_zip) - trailer_size
4632
- (FIL_PAGE_TYPE + size));
4633
memcpy(page_zip->data + page_zip_get_size(page_zip)
4634
- trailer_size, ptr + 8 + size, trailer_size);
4636
if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page,
4643
return(ptr + 8 + size + trailer_size);
4646
/**********************************************************************//**
4647
Calculate the compressed page checksum.
4648
@return page checksum */
4651
page_zip_calc_checksum(
4652
/*===================*/
4653
const void* data, /*!< in: compressed page */
4654
ulint size) /*!< in: size of compressed page */
4656
/* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
4657
and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
4659
const Bytef* s = static_cast<const Bytef *>(data);
4662
ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4664
adler = adler32(0L, s + FIL_PAGE_OFFSET,
4665
FIL_PAGE_LSN - FIL_PAGE_OFFSET);
4666
adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
4667
adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
4668
size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4670
return((ulint) adler);