1
/*****************************************************************************
3
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
21
Compressed page interface
23
Created June 2005 by Marko Makela
24
*******************************************************/
29
# include "page0zip.ic"
32
#include "page0page.h"
35
#include "dict0dict.h"
37
#include "page0types.h"
40
#ifndef UNIV_HOTBACKUP
43
# include "dict0boot.h"
44
# include "lock0lock.h"
45
#else /* !UNIV_HOTBACKUP */
46
# define lock_move_reorganize_page(block, temp_block) ((void) 0)
47
# define buf_LRU_stat_inc_unzip() ((void) 0)
48
#endif /* !UNIV_HOTBACKUP */
50
#ifndef UNIV_HOTBACKUP
51
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
52
UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
53
#endif /* !UNIV_HOTBACKUP */
55
/* Please refer to ../include/page0zip.ic for a description of the
56
compressed page format. */
58
/* The infimum and supremum records are omitted from the compressed page.
59
On compress, we compare that the records are there, and on uncompress we
60
restore the records. */
61
/** Extra bytes of an infimum record */
62
static const byte infimum_extra[] = {
63
0x01, /* info_bits=0, n_owned=1 */
64
0x00, 0x02 /* heap_no=0, status=2 */
65
/* ?, ? */ /* next=(first user rec, or supremum) */
67
/** Data bytes of an infimum record */
68
static const byte infimum_data[] = {
69
0x69, 0x6e, 0x66, 0x69,
70
0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
72
/** Extra bytes and data bytes of a supremum record */
73
static const byte supremum_extra_data[] = {
74
/* 0x0?, */ /* info_bits=0, n_owned=1..8 */
75
0x00, 0x0b, /* heap_no=1, status=3 */
76
0x00, 0x00, /* next=0 */
77
0x73, 0x75, 0x70, 0x72,
78
0x65, 0x6d, 0x75, 0x6d /* "supremum" */
81
/** Assert that a block of memory is filled with zero bytes.
82
Compare at most sizeof(field_ref_zero) bytes.
83
@param b in: memory block
84
@param s in: size of the memory block, in bytes */
85
#define ASSERT_ZERO(b, s) \
86
ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
87
/** Assert that a BLOB pointer is filled with zero bytes.
88
@param b in: BLOB pointer */
89
#define ASSERT_ZERO_BLOB(b) \
90
ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
92
/* Enable some extra debugging output. This code can be enabled
93
independently of any UNIV_ debugging conditions. */
94
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
96
__attribute__((format (printf, 1, 2)))
97
/**********************************************************************//**
98
Report a failure to decompress or compress.
99
@return number of characters printed */
104
const char* fmt, /*!< in: printf(3) format string */
105
...) /*!< in: arguments corresponding to fmt */
110
ut_print_timestamp(stderr);
111
fputs(" InnoDB: ", stderr);
113
res = vfprintf(stderr, fmt, ap);
118
/** Wrapper for page_zip_fail_func()
119
@param fmt_args in: printf(3) format string and arguments */
120
# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
121
#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
122
/** Dummy wrapper for page_zip_fail_func()
123
@param fmt_args ignored: printf(3) format string and arguments */
124
# define page_zip_fail(fmt_args) /* empty */
125
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
127
#ifndef UNIV_HOTBACKUP
128
/**********************************************************************//**
129
Determine the guaranteed free space on an empty page.
130
@return minimum payload size on the page */
135
ulint n_fields, /*!< in: number of columns in the index */
136
ulint zip_size) /*!< in: compressed page size in bytes */
139
/* subtract the page header and the longest
140
uncompressed data needed for one record */
142
+ PAGE_ZIP_DIR_SLOT_SIZE
143
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
144
+ 1/* encoded heap_no==2 in page_zip_write_rec() */
145
+ 1/* end of modification log */
146
- REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
147
/* subtract the space for page_zip_fields_encode() */
148
- compressBound(2 * (n_fields + 1));
149
return(size > 0 ? (ulint) size : 0);
151
#endif /* !UNIV_HOTBACKUP */
153
/*************************************************************//**
154
Gets the size of the compressed page trailer (the dense page directory),
155
including deleted records (the free list).
156
@return length of dense page directory, in bytes */
161
const page_zip_des_t* page_zip) /*!< in: compressed page */
163
/* Exclude the page infimum and supremum from the record count. */
164
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
165
* (page_dir_get_n_heap(page_zip->data)
166
- PAGE_HEAP_NO_USER_LOW);
170
/*************************************************************//**
171
Gets the size of the compressed page trailer (the dense page directory),
172
only including user records (excluding the free list).
173
@return length of dense page directory comprising existing records, in bytes */
176
page_zip_dir_user_size(
177
/*===================*/
178
const page_zip_des_t* page_zip) /*!< in: compressed page */
180
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
181
* page_get_n_recs(page_zip->data);
182
ut_ad(size <= page_zip_dir_size(page_zip));
186
/*************************************************************//**
187
Find the slot of the given record in the dense page directory.
188
@return dense directory slot, or NULL if record not found */
191
page_zip_dir_find_low(
192
/*==================*/
193
byte* slot, /*!< in: start of records */
194
byte* end, /*!< in: end of records */
195
ulint offset) /*!< in: offset of user record */
199
for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
200
if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
209
/*************************************************************//**
210
Find the slot of the given non-free record in the dense page directory.
211
@return dense directory slot, or NULL if record not found */
216
page_zip_des_t* page_zip, /*!< in: compressed page */
217
ulint offset) /*!< in: offset of user record */
219
byte* end = page_zip->data + page_zip_get_size(page_zip);
221
ut_ad(page_zip_simple_validate(page_zip));
223
return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
228
/*************************************************************//**
229
Find the slot of the given free record in the dense page directory.
230
@return dense directory slot, or NULL if record not found */
233
page_zip_dir_find_free(
234
/*===================*/
235
page_zip_des_t* page_zip, /*!< in: compressed page */
236
ulint offset) /*!< in: offset of user record */
238
byte* end = page_zip->data + page_zip_get_size(page_zip);
240
ut_ad(page_zip_simple_validate(page_zip));
242
return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
243
end - page_zip_dir_user_size(page_zip),
247
/*************************************************************//**
248
Read a given slot in the dense page directory.
249
@return record offset on the uncompressed page, possibly ORed with
250
PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */
255
const page_zip_des_t* page_zip, /*!< in: compressed page */
256
ulint slot) /*!< in: slot
257
(0=first user record) */
259
ut_ad(page_zip_simple_validate(page_zip));
260
ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
261
return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
262
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
265
#ifndef UNIV_HOTBACKUP
266
/**********************************************************************//**
267
Write a log record of compressing an index page. */
270
page_zip_compress_write_log(
271
/*========================*/
272
const page_zip_des_t* page_zip,/*!< in: compressed page */
273
const page_t* page, /*!< in: uncompressed page */
274
dict_index_t* index, /*!< in: index of the B-tree node */
275
mtr_t* mtr) /*!< in: mini-transaction */
280
ut_ad(!dict_index_is_ibuf(index));
282
log_ptr = mlog_open(mtr, 11 + 2 + 2);
289
/* Read the number of user records. */
290
trailer_size = page_dir_get_n_heap(page_zip->data)
291
- PAGE_HEAP_NO_USER_LOW;
292
/* Multiply by uncompressed of size stored per record */
293
if (!page_is_leaf(page)) {
294
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
295
} else if (dict_index_is_clust(index)) {
296
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
297
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
299
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
301
/* Add the space occupied by BLOB pointers. */
302
trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
303
ut_a(page_zip->m_end > PAGE_DATA);
304
#if FIL_PAGE_DATA > PAGE_DATA
305
# error "FIL_PAGE_DATA > PAGE_DATA"
307
ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
309
log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
310
MLOG_ZIP_PAGE_COMPRESS,
312
mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
314
mach_write_to_2(log_ptr, trailer_size);
316
mlog_close(mtr, log_ptr);
318
/* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
319
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
320
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
321
/* Write most of the page header, the compressed stream and
322
the modification log. */
323
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
324
page_zip->m_end - FIL_PAGE_TYPE);
325
/* Write the uncompressed trailer of the compressed page. */
326
mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
327
- trailer_size, trailer_size);
329
#endif /* !UNIV_HOTBACKUP */
331
/******************************************************//**
332
Determine how many externally stored columns are contained
333
in existing records with smaller heap_no than rec. */
336
page_zip_get_n_prev_extern(
337
/*=======================*/
338
const page_zip_des_t* page_zip,/*!< in: dense page directory on
340
const rec_t* rec, /*!< in: compact physical record
341
on a B-tree leaf page */
342
dict_index_t* index) /*!< in: record descriptor */
344
const page_t* page = page_align(rec);
349
ulint n_recs = page_get_n_recs(page_zip->data);
351
ut_ad(page_is_leaf(page));
352
ut_ad(page_is_comp(page));
353
ut_ad(dict_table_is_comp(index->table));
354
ut_ad(dict_index_is_clust(index));
355
ut_ad(!dict_index_is_ibuf(index));
357
heap_no = rec_get_heap_no_new(rec);
358
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
359
left = heap_no - PAGE_HEAP_NO_USER_LOW;
360
if (UNIV_UNLIKELY(!left)) {
364
for (i = 0; i < n_recs; i++) {
365
const rec_t* r = page + (page_zip_dir_get(page_zip, i)
366
& PAGE_ZIP_DIR_SLOT_MASK);
368
if (rec_get_heap_no_new(r) < heap_no) {
369
n_ext += rec_get_n_extern_new(r, index,
380
/**********************************************************************//**
381
Encode the length of a fixed-length column.
382
@return buf + length of encoded val */
385
page_zip_fixed_field_encode(
386
/*========================*/
387
byte* buf, /*!< in: pointer to buffer where to write */
388
ulint val) /*!< in: value to write */
392
if (UNIV_LIKELY(val < 126)) {
394
0 = nullable variable field of at most 255 bytes length;
395
1 = not null variable field of at most 255 bytes length;
396
126 = nullable variable field with maximum length >255;
397
127 = not null variable field with maximum length >255
401
*buf++ = (byte) (0x80 | val >> 8);
408
/**********************************************************************//**
409
Write the index information for the compressed page.
410
@return used size of buf */
413
page_zip_fields_encode(
414
/*===================*/
415
ulint n, /*!< in: number of fields to compress */
416
dict_index_t* index, /*!< in: index comprising at least n fields */
417
ulint trx_id_pos,/*!< in: position of the trx_id column
418
in the index, or ULINT_UNDEFINED if
419
this is a non-leaf page */
420
byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */
422
const byte* buf_start = buf;
425
ulint trx_id_col = 0;
426
/* sum of lengths of preceding non-nullable fixed fields, or 0 */
429
ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
431
for (i = col = 0; i < n; i++) {
432
dict_field_t* field = dict_index_get_nth_field(index, i);
435
if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
436
val = 1; /* set the "not nullable" flag */
438
val = 0; /* nullable field */
441
if (!field->fixed_len) {
442
/* variable-length field */
443
const dict_col_t* column
444
= dict_field_get_col(field);
446
if (UNIV_UNLIKELY(column->len > 255)
447
|| UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
448
val |= 0x7e; /* max > 255 bytes */
452
/* write out the length of any
453
preceding non-nullable fields */
454
buf = page_zip_fixed_field_encode(
455
buf, fixed_sum << 1 | 1);
463
/* fixed-length non-nullable field */
465
if (fixed_sum && UNIV_UNLIKELY
466
(fixed_sum + field->fixed_len
467
> DICT_MAX_INDEX_COL_LEN)) {
468
/* Write out the length of the
469
preceding non-nullable fields,
470
to avoid exceeding the maximum
471
length of a fixed-length column. */
472
buf = page_zip_fixed_field_encode(
473
buf, fixed_sum << 1 | 1);
478
if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
480
/* Write out the length of any
481
preceding non-nullable fields,
482
and start a new trx_id column. */
483
buf = page_zip_fixed_field_encode(
484
buf, fixed_sum << 1 | 1);
489
fixed_sum = field->fixed_len;
492
fixed_sum += field->fixed_len;
495
/* fixed-length nullable field */
498
/* write out the length of any
499
preceding non-nullable fields */
500
buf = page_zip_fixed_field_encode(
501
buf, fixed_sum << 1 | 1);
506
buf = page_zip_fixed_field_encode(
507
buf, field->fixed_len << 1);
513
/* Write out the lengths of last fixed-length columns. */
514
buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
517
if (trx_id_pos != ULINT_UNDEFINED) {
518
/* Write out the position of the trx_id column */
521
/* Write out the number of nullable fields */
522
i = index->n_nullable;
528
*buf++ = (byte) (0x80 | i >> 8);
532
ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
533
return((ulint) (buf - buf_start));
536
/**********************************************************************//**
537
Populate the dense page directory from the sparse directory. */
542
const page_t* page, /*!< in: compact page */
543
byte* buf, /*!< in: pointer to dense page directory[-1];
544
out: dense directory on compressed page */
545
const rec_t** recs) /*!< in: pointer to an array of 0, or NULL;
546
out: dense page directory sorted by ascending
547
address (and heap_no) */
559
if (page_is_leaf(page)) {
560
status = REC_STATUS_ORDINARY;
562
status = REC_STATUS_NODE_PTR;
564
(mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
565
min_mark = REC_INFO_MIN_REC_FLAG;
569
n_heap = page_dir_get_n_heap(page);
571
/* Traverse the list of stored records in the collation order,
572
starting from the first user record. */
574
rec = page + PAGE_NEW_INFIMUM;
580
offs = rec_get_next_offs(rec, TRUE);
581
if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
585
heap_no = rec_get_heap_no_new(rec);
586
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
587
ut_a(heap_no < n_heap);
588
ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
589
ut_a(offs >= PAGE_ZIP_START);
590
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
591
# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
593
#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
594
# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
596
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
597
offs |= PAGE_ZIP_DIR_SLOT_OWNED;
600
info_bits = rec_get_info_bits(rec, TRUE);
601
if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
602
info_bits &= ~REC_INFO_DELETED_FLAG;
603
offs |= PAGE_ZIP_DIR_SLOT_DEL;
605
ut_a(info_bits == min_mark);
606
/* Only the smallest user record can have
607
REC_INFO_MIN_REC_FLAG set. */
610
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
612
if (UNIV_LIKELY_NULL(recs)) {
613
/* Ensure that each heap_no occurs at most once. */
614
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
615
/* exclude infimum and supremum */
616
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
619
ut_a(rec_get_status(rec) == status);
622
offs = page_header_get_field(page, PAGE_FREE);
624
/* Traverse the free list (of deleted records). */
626
ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
629
heap_no = rec_get_heap_no_new(rec);
630
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
631
ut_a(heap_no < n_heap);
633
ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
634
ut_a(rec_get_status(rec) == status);
636
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
638
if (UNIV_LIKELY_NULL(recs)) {
639
/* Ensure that each heap_no occurs at most once. */
640
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
641
/* exclude infimum and supremum */
642
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
645
offs = rec_get_next_offs(rec, TRUE);
648
/* Ensure that each heap no occurs at least once. */
649
ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
652
/**********************************************************************//**
653
Allocate memory for zlib. */
658
void* opaque, /*!< in/out: memory heap */
659
uInt items, /*!< in: number of items to allocate */
660
uInt size) /*!< in: size of an item in bytes */
662
return(mem_heap_alloc(opaque, items * size));
665
/**********************************************************************//**
666
Deallocate memory for zlib. */
671
void* opaque __attribute__((unused)), /*!< in: memory heap */
672
void* address __attribute__((unused)))/*!< in: object to free */
676
/**********************************************************************//**
677
Configure the zlib allocator to use the given memory heap. */
682
void* stream, /*!< in/out: zlib stream */
683
mem_heap_t* heap) /*!< in: memory heap to use */
685
z_stream* strm = stream;
687
strm->zalloc = page_zip_malloc;
688
strm->zfree = page_zip_free;
692
#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
693
/** Symbol for enabling compression and decompression diagnostics */
694
# define PAGE_ZIP_COMPRESS_DBG
697
#ifdef PAGE_ZIP_COMPRESS_DBG
698
/** Set this variable in a debugger to enable
699
excessive logging in page_zip_compress(). */
700
UNIV_INTERN ibool page_zip_compress_dbg;
701
/** Set this variable in a debugger to enable
702
binary logging of the data passed to deflate().
703
When this variable is nonzero, it will act
704
as a log file name generator. */
705
UNIV_INTERN unsigned page_zip_compress_log;
707
/**********************************************************************//**
708
Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set.
709
@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
712
page_zip_compress_deflate(
713
/*======================*/
714
FILE* logfile,/*!< in: log file, or NULL */
715
z_streamp strm, /*!< in/out: compressed stream for deflate() */
716
int flush) /*!< in: deflate() flushing method */
719
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
720
ut_print_buf(stderr, strm->next_in, strm->avail_in);
722
if (UNIV_LIKELY_NULL(logfile)) {
723
fwrite(strm->next_in, 1, strm->avail_in, logfile);
725
status = deflate(strm, flush);
726
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
727
fprintf(stderr, " -> %d\n", status);
732
/* Redefine deflate(). */
734
/** Debug wrapper for the zlib compression routine deflate().
735
Log the operation if page_zip_compress_dbg is set.
736
@param strm in/out: compressed stream
737
@param flush in: flushing method
738
@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
739
# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
740
/** Declaration of the logfile parameter */
741
# define FILE_LOGFILE FILE* logfile,
742
/** The logfile parameter */
743
# define LOGFILE logfile,
744
#else /* PAGE_ZIP_COMPRESS_DBG */
745
/** Empty declaration of the logfile parameter */
746
# define FILE_LOGFILE
747
/** Missing logfile parameter */
749
#endif /* PAGE_ZIP_COMPRESS_DBG */
751
/**********************************************************************//**
752
Compress the records of a node pointer page.
753
@return Z_OK, or a zlib error code */
756
page_zip_compress_node_ptrs(
757
/*========================*/
759
z_stream* c_stream, /*!< in/out: compressed page stream */
760
const rec_t** recs, /*!< in: dense page directory
762
ulint n_dense, /*!< in: size of recs[] */
763
dict_index_t* index, /*!< in: the index of the page */
764
byte* storage, /*!< in: end of dense page directory */
765
mem_heap_t* heap) /*!< in: temporary memory heap */
768
ulint* offsets = NULL;
771
const rec_t* rec = *recs++;
773
offsets = rec_get_offsets(rec, index, offsets,
774
ULINT_UNDEFINED, &heap);
775
/* Only leaf nodes may contain externally stored columns. */
776
ut_ad(!rec_offs_any_extern(offsets));
778
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
779
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
780
rec_offs_extra_size(offsets));
782
/* Compress the extra bytes. */
783
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
786
if (c_stream->avail_in) {
787
err = deflate(c_stream, Z_NO_FLUSH);
788
if (UNIV_UNLIKELY(err != Z_OK)) {
792
ut_ad(!c_stream->avail_in);
794
/* Compress the data bytes, except node_ptr. */
795
c_stream->next_in = (byte*) rec;
796
c_stream->avail_in = rec_offs_data_size(offsets)
798
ut_ad(c_stream->avail_in);
800
err = deflate(c_stream, Z_NO_FLUSH);
801
if (UNIV_UNLIKELY(err != Z_OK)) {
805
ut_ad(!c_stream->avail_in);
807
memcpy(storage - REC_NODE_PTR_SIZE
808
* (rec_get_heap_no_new(rec) - 1),
809
c_stream->next_in, REC_NODE_PTR_SIZE);
810
c_stream->next_in += REC_NODE_PTR_SIZE;
816
/**********************************************************************//**
817
Compress the records of a leaf node of a secondary index.
818
@return Z_OK, or a zlib error code */
821
page_zip_compress_sec(
822
/*==================*/
824
z_stream* c_stream, /*!< in/out: compressed page stream */
825
const rec_t** recs, /*!< in: dense page directory
827
ulint n_dense) /*!< in: size of recs[] */
834
const rec_t* rec = *recs++;
836
/* Compress everything up to this record. */
837
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
840
if (UNIV_LIKELY(c_stream->avail_in)) {
841
UNIV_MEM_ASSERT_RW(c_stream->next_in,
843
err = deflate(c_stream, Z_NO_FLUSH);
844
if (UNIV_UNLIKELY(err != Z_OK)) {
849
ut_ad(!c_stream->avail_in);
850
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
852
/* Skip the REC_N_NEW_EXTRA_BYTES. */
854
c_stream->next_in = (byte*) rec;
860
/**********************************************************************//**
861
Compress a record of a leaf node of a clustered index that contains
862
externally stored columns.
863
@return Z_OK, or a zlib error code */
866
page_zip_compress_clust_ext(
867
/*========================*/
869
z_stream* c_stream, /*!< in/out: compressed page stream */
870
const rec_t* rec, /*!< in: record */
871
const ulint* offsets, /*!< in: rec_get_offsets(rec) */
872
ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
873
byte* deleted, /*!< in: dense directory entry pointing
874
to the head of the free list */
875
byte* storage, /*!< in: end of dense page directory */
876
byte** externs, /*!< in/out: pointer to the next
877
available BLOB pointer */
878
ulint* n_blobs) /*!< in/out: number of
879
externally stored columns */
884
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
885
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
886
rec_offs_extra_size(offsets));
888
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
892
if (UNIV_UNLIKELY(i == trx_id_col)) {
893
ut_ad(!rec_offs_nth_extern(offsets, i));
894
/* Store trx_id and roll_ptr
895
in uncompressed form. */
896
src = rec_get_nth_field(rec, offsets, i, &len);
897
ut_ad(src + DATA_TRX_ID_LEN
898
== rec_get_nth_field(rec, offsets,
900
ut_ad(len == DATA_ROLL_PTR_LEN);
902
/* Compress any preceding bytes. */
904
= src - c_stream->next_in;
906
if (c_stream->avail_in) {
907
err = deflate(c_stream, Z_NO_FLUSH);
908
if (UNIV_UNLIKELY(err != Z_OK)) {
914
ut_ad(!c_stream->avail_in);
915
ut_ad(c_stream->next_in == src);
918
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
919
* (rec_get_heap_no_new(rec) - 1),
921
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
924
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
926
/* Skip also roll_ptr */
928
} else if (rec_offs_nth_extern(offsets, i)) {
929
src = rec_get_nth_field(rec, offsets, i, &len);
930
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
931
src += len - BTR_EXTERN_FIELD_REF_SIZE;
933
c_stream->avail_in = src
935
if (UNIV_LIKELY(c_stream->avail_in)) {
936
err = deflate(c_stream, Z_NO_FLUSH);
937
if (UNIV_UNLIKELY(err != Z_OK)) {
943
ut_ad(!c_stream->avail_in);
944
ut_ad(c_stream->next_in == src);
946
/* Reserve space for the data at
947
the end of the space reserved for
948
the compressed data and the page
953
<= BTR_EXTERN_FIELD_REF_SIZE)) {
958
ut_ad(*externs == c_stream->next_out
959
+ c_stream->avail_out
960
+ 1/* end of modif. log */);
963
+= BTR_EXTERN_FIELD_REF_SIZE;
965
/* Skip deleted records. */
967
(page_zip_dir_find_low(
969
page_offset(rec)))) {
975
-= BTR_EXTERN_FIELD_REF_SIZE;
976
*externs -= BTR_EXTERN_FIELD_REF_SIZE;
978
/* Copy the BLOB pointer */
979
memcpy(*externs, c_stream->next_in
980
- BTR_EXTERN_FIELD_REF_SIZE,
981
BTR_EXTERN_FIELD_REF_SIZE);
988
/**********************************************************************//**
989
Compress the records of a leaf node of a clustered index.
990
@return Z_OK, or a zlib error code */
993
page_zip_compress_clust(
994
/*====================*/
996
z_stream* c_stream, /*!< in/out: compressed page stream */
997
const rec_t** recs, /*!< in: dense page directory
999
ulint n_dense, /*!< in: size of recs[] */
1000
dict_index_t* index, /*!< in: the index of the page */
1001
ulint* n_blobs, /*!< in: 0; out: number of
1002
externally stored columns */
1003
ulint trx_id_col, /*!< index of the trx_id column */
1004
byte* deleted, /*!< in: dense directory entry pointing
1005
to the head of the free list */
1006
byte* storage, /*!< in: end of dense page directory */
1007
mem_heap_t* heap) /*!< in: temporary memory heap */
1010
ulint* offsets = NULL;
1011
/* BTR_EXTERN_FIELD_REF storage */
1012
byte* externs = storage - n_dense
1013
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1015
ut_ad(*n_blobs == 0);
1018
const rec_t* rec = *recs++;
1020
offsets = rec_get_offsets(rec, index, offsets,
1021
ULINT_UNDEFINED, &heap);
1022
ut_ad(rec_offs_n_fields(offsets)
1023
== dict_index_get_n_fields(index));
1024
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1025
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1026
rec_offs_extra_size(offsets));
1028
/* Compress the extra bytes. */
1029
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
1030
- c_stream->next_in;
1032
if (c_stream->avail_in) {
1033
err = deflate(c_stream, Z_NO_FLUSH);
1034
if (UNIV_UNLIKELY(err != Z_OK)) {
1039
ut_ad(!c_stream->avail_in);
1040
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
1042
/* Compress the data bytes. */
1044
c_stream->next_in = (byte*) rec;
1046
/* Check if there are any externally stored columns.
1047
For each externally stored column, store the
1048
BTR_EXTERN_FIELD_REF separately. */
1049
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1050
ut_ad(dict_index_is_clust(index));
1052
err = page_zip_compress_clust_ext(
1054
c_stream, rec, offsets, trx_id_col,
1055
deleted, storage, &externs, n_blobs);
1057
if (UNIV_UNLIKELY(err != Z_OK)) {
1065
/* Store trx_id and roll_ptr in uncompressed form. */
1066
src = rec_get_nth_field(rec, offsets,
1068
ut_ad(src + DATA_TRX_ID_LEN
1069
== rec_get_nth_field(rec, offsets,
1070
trx_id_col + 1, &len));
1071
ut_ad(len == DATA_ROLL_PTR_LEN);
1072
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1073
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1074
rec_offs_extra_size(offsets));
1076
/* Compress any preceding bytes. */
1077
c_stream->avail_in = src - c_stream->next_in;
1079
if (c_stream->avail_in) {
1080
err = deflate(c_stream, Z_NO_FLUSH);
1081
if (UNIV_UNLIKELY(err != Z_OK)) {
1087
ut_ad(!c_stream->avail_in);
1088
ut_ad(c_stream->next_in == src);
1091
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1092
* (rec_get_heap_no_new(rec) - 1),
1094
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1097
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1099
/* Skip also roll_ptr */
1100
ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
1103
/* Compress the last bytes of the record. */
1104
c_stream->avail_in = rec + rec_offs_data_size(offsets)
1105
- c_stream->next_in;
1107
if (c_stream->avail_in) {
1108
err = deflate(c_stream, Z_NO_FLUSH);
1109
if (UNIV_UNLIKELY(err != Z_OK)) {
1114
ut_ad(!c_stream->avail_in);
1115
} while (--n_dense);
1121
/**********************************************************************//**
1123
@return TRUE on success, FALSE on failure; page_zip will be left
1124
intact on failure. */
1129
page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs,
1130
m_start, m_end, m_nonempty */
1131
const page_t* page, /*!< in: uncompressed page */
1132
dict_index_t* index, /*!< in: index of the B-tree node */
1133
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
1137
ulint n_fields;/* number of index fields needed */
1138
byte* fields; /*!< index field information */
1139
byte* buf; /*!< compressed payload of the page */
1140
byte* buf_end;/* end of buf */
1142
ulint slot_size;/* amount of uncompressed bytes per record */
1143
const rec_t** recs; /*!< dense page directory, sorted by address */
1146
ulint* offsets = NULL;
1148
byte* storage;/* storage of uncompressed columns */
1149
#ifndef UNIV_HOTBACKUP
1150
ullint usec = ut_time_us(NULL);
1151
#endif /* !UNIV_HOTBACKUP */
1152
#ifdef PAGE_ZIP_COMPRESS_DBG
1153
FILE* logfile = NULL;
1156
ut_a(page_is_comp(page));
1157
ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
1158
ut_ad(page_simple_validate_new((page_t*) page));
1159
ut_ad(page_zip_simple_validate(page_zip));
1160
ut_ad(dict_table_is_comp(index->table));
1161
ut_ad(!dict_index_is_ibuf(index));
1163
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1165
/* Check the data that will be omitted. */
1166
ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
1167
infimum_extra, sizeof infimum_extra));
1168
ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
1169
infimum_data, sizeof infimum_data));
1170
ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
1171
/* info_bits == 0, n_owned <= max */
1172
<= PAGE_DIR_SLOT_MAX_N_OWNED);
1173
ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
1174
supremum_extra_data, sizeof supremum_extra_data));
1176
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
1177
ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
1178
== PAGE_NEW_SUPREMUM);
1181
if (page_is_leaf(page)) {
1182
n_fields = dict_index_get_n_fields(index);
1184
n_fields = dict_index_get_n_unique_in_tree(index);
1187
/* The dense directory excludes the infimum and supremum records. */
1188
n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1189
#ifdef PAGE_ZIP_COMPRESS_DBG
1190
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
1191
fprintf(stderr, "compress %p %p %lu %lu %lu\n",
1192
(void*) page_zip, (void*) page,
1196
if (UNIV_UNLIKELY(page_zip_compress_log)) {
1197
/* Create a log file for every compression attempt. */
1198
char logfilename[9];
1199
ut_snprintf(logfilename, sizeof logfilename,
1200
"%08x", page_zip_compress_log++);
1201
logfile = fopen(logfilename, "wb");
1204
/* Write the uncompressed page to the log. */
1205
fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
1206
/* Record the compressed size as zero.
1207
This will be overwritten at successful exit. */
1214
#endif /* PAGE_ZIP_COMPRESS_DBG */
1215
#ifndef UNIV_HOTBACKUP
1216
page_zip_stat[page_zip->ssize - 1].compressed++;
1217
#endif /* !UNIV_HOTBACKUP */
1219
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
1220
>= page_zip_get_size(page_zip))) {
1225
heap = mem_heap_create(page_zip_get_size(page_zip)
1226
+ n_fields * (2 + sizeof *offsets)
1227
+ n_dense * ((sizeof *recs)
1228
- PAGE_ZIP_DIR_SLOT_SIZE)
1229
+ UNIV_PAGE_SIZE * 4
1230
+ (512 << MAX_MEM_LEVEL));
1232
recs = mem_heap_zalloc(heap, n_dense * sizeof *recs);
1234
fields = mem_heap_alloc(heap, (n_fields + 1) * 2);
1236
buf = mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA);
1237
buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
1239
/* Compress the data payload. */
1240
page_zip_set_alloc(&c_stream, heap);
1242
err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
1243
Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
1244
MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
1247
c_stream.next_out = buf;
1248
/* Subtract the space reserved for uncompressed data. */
1249
/* Page header and the end marker of the modification log */
1250
c_stream.avail_out = buf_end - buf - 1;
1251
/* Dense page directory and uncompressed columns, if any */
1252
if (page_is_leaf(page)) {
1253
if (dict_index_is_clust(index)) {
1254
trx_id_col = dict_index_get_sys_col_pos(
1255
index, DATA_TRX_ID);
1256
ut_ad(trx_id_col > 0);
1257
ut_ad(trx_id_col != ULINT_UNDEFINED);
1259
slot_size = PAGE_ZIP_DIR_SLOT_SIZE
1260
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1262
/* Signal the absence of trx_id
1263
in page_zip_fields_encode() */
1264
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
1265
== ULINT_UNDEFINED);
1267
slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
1270
slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
1271
trx_id_col = ULINT_UNDEFINED;
1274
if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
1275
+ 6/* sizeof(zlib header and footer) */)) {
1279
c_stream.avail_out -= n_dense * slot_size;
1280
c_stream.avail_in = page_zip_fields_encode(n_fields, index,
1281
trx_id_col, fields);
1282
c_stream.next_in = fields;
1283
if (UNIV_LIKELY(!trx_id_col)) {
1284
trx_id_col = ULINT_UNDEFINED;
1287
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1288
err = deflate(&c_stream, Z_FULL_FLUSH);
1293
ut_ad(!c_stream.avail_in);
1295
page_zip_dir_encode(page, buf_end, recs);
1297
c_stream.next_in = (byte*) page + PAGE_ZIP_START;
1299
storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
1301
/* Compress the records in heap_no order. */
1302
if (UNIV_UNLIKELY(!n_dense)) {
1303
} else if (!page_is_leaf(page)) {
1304
/* This is a node pointer page. */
1305
err = page_zip_compress_node_ptrs(LOGFILE
1306
&c_stream, recs, n_dense,
1307
index, storage, heap);
1308
if (UNIV_UNLIKELY(err != Z_OK)) {
1311
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1312
/* This is a leaf page in a secondary index. */
1313
err = page_zip_compress_sec(LOGFILE
1314
&c_stream, recs, n_dense);
1315
if (UNIV_UNLIKELY(err != Z_OK)) {
1319
/* This is a leaf page in a clustered index. */
1320
err = page_zip_compress_clust(LOGFILE
1321
&c_stream, recs, n_dense,
1322
index, &n_blobs, trx_id_col,
1323
buf_end - PAGE_ZIP_DIR_SLOT_SIZE
1324
* page_get_n_recs(page),
1326
if (UNIV_UNLIKELY(err != Z_OK)) {
1331
/* Finish the compression. */
1332
ut_ad(!c_stream.avail_in);
1333
/* Compress any trailing garbage, in case the last record was
1334
allocated from an originally longer space on the free list,
1335
or the data of the last record from page_zip_compress_sec(). */
1337
= page_header_get_field(page, PAGE_HEAP_TOP)
1338
- (c_stream.next_in - page);
1339
ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
1341
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1342
err = deflate(&c_stream, Z_FINISH);
1344
if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
1346
deflateEnd(&c_stream);
1347
mem_heap_free(heap);
1349
#ifdef PAGE_ZIP_COMPRESS_DBG
1353
#endif /* PAGE_ZIP_COMPRESS_DBG */
1354
#ifndef UNIV_HOTBACKUP
1355
page_zip_stat[page_zip->ssize - 1].compressed_usec
1356
+= ut_time_us(NULL) - usec;
1357
#endif /* !UNIV_HOTBACKUP */
1361
err = deflateEnd(&c_stream);
1364
ut_ad(buf + c_stream.total_out == c_stream.next_out);
1365
ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
1367
/* Valgrind believes that zlib does not initialize some bits
1368
in the last 7 or 8 bytes of the stream. Make Valgrind happy. */
1369
UNIV_MEM_VALID(buf, c_stream.total_out);
1371
/* Zero out the area reserved for the modification log.
1372
Space for the end marker of the modification log is not
1373
included in avail_out. */
1374
memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
1378
#endif /* UNIV_DEBUG */
1379
page_zip->m_end = PAGE_DATA + c_stream.total_out;
1380
page_zip->m_nonempty = FALSE;
1381
page_zip->n_blobs = n_blobs;
1382
/* Copy those header fields that will not be written
1383
in buf_flush_init_for_writing() */
1384
memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
1385
FIL_PAGE_LSN - FIL_PAGE_PREV);
1386
memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
1387
memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
1388
PAGE_DATA - FIL_PAGE_DATA);
1389
/* Copy the rest of the compressed page */
1390
memcpy(page_zip->data + PAGE_DATA, buf,
1391
page_zip_get_size(page_zip) - PAGE_DATA);
1392
mem_heap_free(heap);
1393
#ifdef UNIV_ZIP_DEBUG
1394
ut_a(page_zip_validate(page_zip, page));
1395
#endif /* UNIV_ZIP_DEBUG */
1398
#ifndef UNIV_HOTBACKUP
1399
page_zip_compress_write_log(page_zip, page, index, mtr);
1400
#endif /* !UNIV_HOTBACKUP */
1403
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
1405
#ifdef PAGE_ZIP_COMPRESS_DBG
1407
/* Record the compressed size of the block. */
1409
mach_write_to_4(sz, c_stream.total_out);
1410
fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
1411
fwrite(sz, 1, sizeof sz, logfile);
1414
#endif /* PAGE_ZIP_COMPRESS_DBG */
1415
#ifndef UNIV_HOTBACKUP
1417
page_zip_stat_t* zip_stat
1418
= &page_zip_stat[page_zip->ssize - 1];
1419
zip_stat->compressed_ok++;
1420
zip_stat->compressed_usec += ut_time_us(NULL) - usec;
1422
#endif /* !UNIV_HOTBACKUP */
1427
/**********************************************************************//**
1428
Compare two page directory entries.
1429
@return positive if rec1 > rec2 */
1434
const rec_t* rec1, /*!< in: rec1 */
1435
const rec_t* rec2) /*!< in: rec2 */
1437
return(rec1 > rec2);
1440
/**********************************************************************//**
1441
Sort the dense page directory by address (heap_no). */
1446
rec_t** arr, /*!< in/out: dense page directory */
1447
rec_t** aux_arr,/*!< in/out: work area */
1448
ulint low, /*!< in: lower bound of the sorting area, inclusive */
1449
ulint high) /*!< in: upper bound of the sorting area, exclusive */
1451
UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
1455
/**********************************************************************//**
1456
Deallocate the index information initialized by page_zip_fields_decode(). */
1459
page_zip_fields_free(
1460
/*=================*/
1461
dict_index_t* index) /*!< in: dummy index to be freed */
1464
dict_table_t* table = index->table;
1465
mem_heap_free(index->heap);
1466
mutex_free(&(table->autoinc_mutex));
1467
ut_free(table->name);
1468
mem_heap_free(table->heap);
1472
/**********************************************************************//**
1473
Read the index information for the compressed page.
1474
@return own: dummy index describing the page, or NULL on error */
1477
page_zip_fields_decode(
1478
/*===================*/
1479
const byte* buf, /*!< in: index information */
1480
const byte* end, /*!< in: end of buf */
1481
ulint* trx_id_col)/*!< in: NULL for non-leaf pages;
1482
for leaf pages, pointer to where to store
1483
the position of the trx_id column */
1489
dict_table_t* table;
1490
dict_index_t* index;
1492
/* Determine the number of fields. */
1493
for (b = buf, n = 0; b < end; n++) {
1495
b++; /* skip the second byte */
1499
n--; /* n_nullable or trx_id */
1501
if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
1503
page_zip_fail(("page_zip_fields_decode: n = %lu\n",
1508
if (UNIV_UNLIKELY(b > end)) {
1510
page_zip_fail(("page_zip_fields_decode: %p > %p\n",
1511
(const void*) b, (const void*) end));
1515
table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
1517
index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
1518
DICT_HDR_SPACE, 0, n);
1519
index->table = table;
1521
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1522
index->cached = TRUE;
1524
/* Initialize the fields. */
1525
for (b = buf, i = 0; i < n; i++) {
1531
if (UNIV_UNLIKELY(val & 0x80)) {
1532
/* fixed length > 62 bytes */
1533
val = (val & 0x7f) << 8 | *b++;
1535
mtype = DATA_FIXBINARY;
1536
} else if (UNIV_UNLIKELY(val >= 126)) {
1537
/* variable length with max > 255 bytes */
1539
mtype = DATA_BINARY;
1540
} else if (val <= 1) {
1541
/* variable length with max <= 255 bytes */
1543
mtype = DATA_BINARY;
1545
/* fixed length < 62 bytes */
1547
mtype = DATA_FIXBINARY;
1550
dict_mem_table_add_col(table, NULL, NULL, mtype,
1551
val & 1 ? DATA_NOT_NULL : 0, len);
1552
dict_index_add_col(index, table,
1553
dict_table_get_nth_col(table, i), 0);
1557
if (UNIV_UNLIKELY(val & 0x80)) {
1558
val = (val & 0x7f) << 8 | *b++;
1561
/* Decode the position of the trx_id column. */
1564
val = ULINT_UNDEFINED;
1565
} else if (UNIV_UNLIKELY(val >= n)) {
1566
page_zip_fields_free(index);
1569
index->type = DICT_CLUSTERED;
1574
/* Decode the number of nullable fields. */
1575
if (UNIV_UNLIKELY(index->n_nullable > val)) {
1576
page_zip_fields_free(index);
1579
index->n_nullable = val;
1588
/**********************************************************************//**
1589
Populate the sparse page directory from the dense directory.
1590
@return TRUE on success, FALSE on failure */
1593
page_zip_dir_decode(
1594
/*================*/
1595
const page_zip_des_t* page_zip,/*!< in: dense page directory on
1597
page_t* page, /*!< in: compact page with valid header;
1598
out: trailer and sparse page directory
1600
rec_t** recs, /*!< out: dense page directory sorted by
1601
ascending address (and heap_no) */
1602
rec_t** recs_aux,/*!< in/out: scratch area */
1603
ulint n_dense)/*!< in: number of user records, and
1604
size of recs[] and recs_aux[] */
1610
n_recs = page_get_n_recs(page);
1612
if (UNIV_UNLIKELY(n_recs > n_dense)) {
1613
page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
1614
(ulong) n_recs, (ulong) n_dense));
1618
/* Traverse the list of stored records in the sorting order,
1619
starting from the first user record. */
1621
slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
1622
UNIV_PREFETCH_RW(slot);
1624
/* Zero out the page trailer. */
1625
memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
1627
mach_write_to_2(slot, PAGE_NEW_INFIMUM);
1628
slot -= PAGE_DIR_SLOT_SIZE;
1629
UNIV_PREFETCH_RW(slot);
1631
/* Initialize the sparse directory and copy the dense directory. */
1632
for (i = 0; i < n_recs; i++) {
1633
ulint offs = page_zip_dir_get(page_zip, i);
1635
if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
1636
mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
1637
slot -= PAGE_DIR_SLOT_SIZE;
1638
UNIV_PREFETCH_RW(slot);
1641
if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
1642
< PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
1643
page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
1644
(unsigned) i, (unsigned) n_recs,
1649
recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
1652
mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
1654
const page_dir_slot_t* last_slot = page_dir_get_nth_slot(
1655
page, page_dir_get_n_slots(page) - 1);
1657
if (UNIV_UNLIKELY(slot != last_slot)) {
1658
page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
1660
(const void*) last_slot));
1665
/* Copy the rest of the dense directory. */
1666
for (; i < n_dense; i++) {
1667
ulint offs = page_zip_dir_get(page_zip, i);
1669
if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1670
page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
1671
(unsigned) i, (unsigned) n_dense,
1676
recs[i] = page + offs;
1679
if (UNIV_LIKELY(n_dense > 1)) {
1680
page_zip_dir_sort(recs, recs_aux, 0, n_dense);
1685
/**********************************************************************//**
1686
Initialize the REC_N_NEW_EXTRA_BYTES of each record.
1687
@return TRUE on success, FALSE on failure */
1690
page_zip_set_extra_bytes(
1691
/*=====================*/
1692
const page_zip_des_t* page_zip,/*!< in: compressed page */
1693
page_t* page, /*!< in/out: uncompressed page */
1694
ulint info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */
1702
n = page_get_n_recs(page);
1703
rec = page + PAGE_NEW_INFIMUM;
1705
for (i = 0; i < n; i++) {
1706
offs = page_zip_dir_get(page_zip, i);
1708
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
1709
info_bits |= REC_INFO_DELETED_FLAG;
1711
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
1712
info_bits |= n_owned;
1717
offs &= PAGE_ZIP_DIR_SLOT_MASK;
1718
if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
1719
+ REC_N_NEW_EXTRA_BYTES)) {
1720
page_zip_fail(("page_zip_set_extra_bytes 1:"
1722
(unsigned) i, (unsigned) n,
1727
rec_set_next_offs_new(rec, offs);
1729
rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
1733
/* Set the next pointer of the last user record. */
1734
rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
1736
/* Set n_owned of the supremum record. */
1737
page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
1739
/* The dense directory excludes the infimum and supremum records. */
1740
n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1743
if (UNIV_LIKELY(i == n)) {
1747
page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
1748
(unsigned) i, (unsigned) n));
1752
offs = page_zip_dir_get(page_zip, i);
1754
/* Set the extra bytes of deleted records on the free list. */
1756
if (UNIV_UNLIKELY(!offs)
1757
|| UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1759
page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
1765
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1771
offs = page_zip_dir_get(page_zip, i);
1772
rec_set_next_offs_new(rec, offs);
1775
/* Terminate the free list. */
1776
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1777
rec_set_next_offs_new(rec, 0);
1782
/**********************************************************************//**
1783
Apply the modification log to a record containing externally stored
1784
columns. Do not copy the fields that are stored separately.
1785
@return pointer to modification log, or NULL on failure */
1788
page_zip_apply_log_ext(
1789
/*===================*/
1790
rec_t* rec, /*!< in/out: record */
1791
const ulint* offsets, /*!< in: rec_get_offsets(rec) */
1792
ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
1793
const byte* data, /*!< in: modification log */
1794
const byte* end) /*!< in: end of modification log */
1798
byte* next_out = rec;
1800
/* Check if there are any externally stored columns.
1801
For each externally stored column, skip the
1802
BTR_EXTERN_FIELD_REF. */
1804
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
1807
if (UNIV_UNLIKELY(i == trx_id_col)) {
1808
/* Skip trx_id and roll_ptr */
1809
dst = rec_get_nth_field(rec, offsets,
1811
if (UNIV_UNLIKELY(dst - next_out >= end - data)
1813
(len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
1814
|| rec_offs_nth_extern(offsets, i)) {
1815
page_zip_fail(("page_zip_apply_log_ext:"
1817
" %p - %p >= %p - %p\n",
1820
(const void*) next_out,
1822
(const void*) data));
1826
memcpy(next_out, data, dst - next_out);
1827
data += dst - next_out;
1828
next_out = dst + (DATA_TRX_ID_LEN
1829
+ DATA_ROLL_PTR_LEN);
1830
} else if (rec_offs_nth_extern(offsets, i)) {
1831
dst = rec_get_nth_field(rec, offsets,
1834
>= BTR_EXTERN_FIELD_REF_SIZE);
1836
len += dst - next_out
1837
- BTR_EXTERN_FIELD_REF_SIZE;
1839
if (UNIV_UNLIKELY(data + len >= end)) {
1840
page_zip_fail(("page_zip_apply_log_ext: "
1841
"ext %p+%lu >= %p\n",
1844
(const void*) end));
1848
memcpy(next_out, data, len);
1851
+ BTR_EXTERN_FIELD_REF_SIZE;
1855
/* Copy the last bytes of the record. */
1856
len = rec_get_end(rec, offsets) - next_out;
1857
if (UNIV_UNLIKELY(data + len >= end)) {
1858
page_zip_fail(("page_zip_apply_log_ext: "
1859
"last %p+%lu >= %p\n",
1862
(const void*) end));
1865
memcpy(next_out, data, len);
1871
/**********************************************************************//**
1872
Apply the modification log to an uncompressed page.
1873
Do not copy the fields that are stored separately.
1874
@return pointer to end of modification log, or NULL on failure */
1879
const byte* data, /*!< in: modification log */
1880
ulint size, /*!< in: maximum length of the log, in bytes */
1881
rec_t** recs, /*!< in: dense page directory,
1882
sorted by address (indexed by
1883
heap_no - PAGE_HEAP_NO_USER_LOW) */
1884
ulint n_dense,/*!< in: size of recs[] */
1885
ulint trx_id_col,/*!< in: column number of trx_id in the index,
1886
or ULINT_UNDEFINED if none */
1888
/*!< in: heap_no and status bits for
1889
the next record to uncompress */
1890
dict_index_t* index, /*!< in: index of the page */
1891
ulint* offsets)/*!< in/out: work area for
1892
rec_get_offsets_reverse() */
1894
const byte* const end = data + size;
1903
if (UNIV_UNLIKELY(!val)) {
1907
val = (val & 0x7f) << 8 | *data++;
1908
if (UNIV_UNLIKELY(!val)) {
1909
page_zip_fail(("page_zip_apply_log:"
1910
" invalid val %x%x\n",
1911
data[-2], data[-1]));
1915
if (UNIV_UNLIKELY(data >= end)) {
1916
page_zip_fail(("page_zip_apply_log: %p >= %p\n",
1918
(const void*) end));
1921
if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
1922
page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
1923
(ulong) val, (ulong) n_dense));
1927
/* Determine the heap number and status bits of the record. */
1928
rec = recs[(val >> 1) - 1];
1930
hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
1931
hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
1933
/* This may either be an old record that is being
1934
overwritten (updated in place, or allocated from
1935
the free list), or a new record, with the next
1936
available_heap_no. */
1937
if (UNIV_UNLIKELY(hs > heap_status)) {
1938
page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
1939
(ulong) hs, (ulong) heap_status));
1941
} else if (hs == heap_status) {
1942
/* A new record was allocated from the heap. */
1943
if (UNIV_UNLIKELY(val & 1)) {
1944
/* Only existing records may be cleared. */
1945
page_zip_fail(("page_zip_apply_log:"
1946
" attempting to create"
1947
" deleted rec %lu\n",
1951
heap_status += 1 << REC_HEAP_NO_SHIFT;
1954
mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
1957
/* Clear the data bytes of the record. */
1958
mem_heap_t* heap = NULL;
1960
offs = rec_get_offsets(rec, index, offsets,
1961
ULINT_UNDEFINED, &heap);
1962
memset(rec, 0, rec_offs_data_size(offs));
1964
if (UNIV_LIKELY_NULL(heap)) {
1965
mem_heap_free(heap);
1970
#if REC_STATUS_NODE_PTR != TRUE
1971
# error "REC_STATUS_NODE_PTR != TRUE"
1973
rec_get_offsets_reverse(data, index,
1974
hs & REC_STATUS_NODE_PTR,
1976
rec_offs_make_valid(rec, index, offsets);
1978
/* Copy the extra bytes (backwards). */
1980
byte* start = rec_get_start(rec, offsets);
1981
byte* b = rec - REC_N_NEW_EXTRA_BYTES;
1982
while (b != start) {
1987
/* Copy the data bytes. */
1988
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1989
/* Non-leaf nodes should not contain any
1990
externally stored columns. */
1991
if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1992
page_zip_fail(("page_zip_apply_log: "
1993
"%lu&REC_STATUS_NODE_PTR\n",
1998
data = page_zip_apply_log_ext(
1999
rec, offsets, trx_id_col, data, end);
2001
if (UNIV_UNLIKELY(!data)) {
2004
} else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
2005
len = rec_offs_data_size(offsets)
2006
- REC_NODE_PTR_SIZE;
2007
/* Copy the data bytes, except node_ptr. */
2008
if (UNIV_UNLIKELY(data + len >= end)) {
2009
page_zip_fail(("page_zip_apply_log: "
2010
"node_ptr %p+%lu >= %p\n",
2013
(const void*) end));
2016
memcpy(rec, data, len);
2018
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2019
len = rec_offs_data_size(offsets);
2021
/* Copy all data bytes of
2022
a record in a secondary index. */
2023
if (UNIV_UNLIKELY(data + len >= end)) {
2024
page_zip_fail(("page_zip_apply_log: "
2025
"sec %p+%lu >= %p\n",
2028
(const void*) end));
2032
memcpy(rec, data, len);
2035
/* Skip DB_TRX_ID and DB_ROLL_PTR. */
2036
ulint l = rec_get_nth_field_offs(offsets,
2040
if (UNIV_UNLIKELY(data + l >= end)
2041
|| UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
2042
+ DATA_ROLL_PTR_LEN))) {
2043
page_zip_fail(("page_zip_apply_log: "
2044
"trx_id %p+%lu >= %p\n",
2047
(const void*) end));
2051
/* Copy any preceding data bytes. */
2052
memcpy(rec, data, l);
2055
/* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
2056
b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2057
len = rec_get_end(rec, offsets) - b;
2058
if (UNIV_UNLIKELY(data + len >= end)) {
2059
page_zip_fail(("page_zip_apply_log: "
2060
"clust %p+%lu >= %p\n",
2063
(const void*) end));
2066
memcpy(b, data, len);
2072
/**********************************************************************//**
2073
Decompress the records of a node pointer page.
2074
@return TRUE on success, FALSE on failure */
2077
page_zip_decompress_node_ptrs(
2078
/*==========================*/
2079
page_zip_des_t* page_zip, /*!< in/out: compressed page */
2080
z_stream* d_stream, /*!< in/out: compressed page stream */
2081
rec_t** recs, /*!< in: dense page directory
2082
sorted by address */
2083
ulint n_dense, /*!< in: size of recs[] */
2084
dict_index_t* index, /*!< in: the index of the page */
2085
ulint* offsets, /*!< in/out: temporary offsets */
2086
mem_heap_t* heap) /*!< in: temporary memory heap */
2088
ulint heap_status = REC_STATUS_NODE_PTR
2089
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2091
const byte* storage;
2093
/* Subtract the space reserved for uncompressed data. */
2094
d_stream->avail_in -= n_dense
2095
* (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
2097
/* Decompress the records in heap_no order. */
2098
for (slot = 0; slot < n_dense; slot++) {
2099
rec_t* rec = recs[slot];
2101
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2102
- d_stream->next_out;
2104
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2105
- PAGE_ZIP_START - PAGE_DIR);
2106
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2108
/* Apparently, n_dense has grown
2109
since the time the page was last compressed. */
2113
if (!d_stream->avail_out) {
2118
page_zip_fail(("page_zip_decompress_node_ptrs:"
2119
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2124
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2125
/* Prepare to decompress the data bytes. */
2126
d_stream->next_out = rec;
2127
/* Set heap_no and the status bits. */
2128
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2129
heap_status += 1 << REC_HEAP_NO_SHIFT;
2131
/* Read the offsets. The status bits are needed here. */
2132
offsets = rec_get_offsets(rec, index, offsets,
2133
ULINT_UNDEFINED, &heap);
2135
/* Non-leaf nodes should not have any externally
2137
ut_ad(!rec_offs_any_extern(offsets));
2139
/* Decompress the data bytes, except node_ptr. */
2140
d_stream->avail_out = rec_offs_data_size(offsets)
2141
- REC_NODE_PTR_SIZE;
2143
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2148
if (!d_stream->avail_out) {
2153
page_zip_fail(("page_zip_decompress_node_ptrs:"
2154
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2159
/* Clear the node pointer in case the record
2160
will be deleted and the space will be reallocated
2161
to a smaller record. */
2162
memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
2163
d_stream->next_out += REC_NODE_PTR_SIZE;
2165
ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
2168
/* Decompress any trailing garbage, in case the last record was
2169
allocated from an originally longer space on the free list. */
2170
d_stream->avail_out = page_header_get_field(page_zip->data,
2172
- page_offset(d_stream->next_out);
2173
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2174
- PAGE_ZIP_START - PAGE_DIR)) {
2176
page_zip_fail(("page_zip_decompress_node_ptrs:"
2177
" avail_out = %u\n",
2178
d_stream->avail_out));
2182
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2183
page_zip_fail(("page_zip_decompress_node_ptrs:"
2184
" inflate(Z_FINISH)=%s\n",
2187
inflateEnd(d_stream);
2191
/* Note that d_stream->avail_out > 0 may hold here
2192
if the modification log is nonempty. */
2195
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2200
page_t* page = page_align(d_stream->next_out);
2202
/* Clear the unused heap space on the uncompressed page. */
2203
memset(d_stream->next_out, 0,
2204
page_dir_get_nth_slot(page,
2205
page_dir_get_n_slots(page) - 1)
2206
- d_stream->next_out);
2210
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2211
#endif /* UNIV_DEBUG */
2213
/* Apply the modification log. */
2215
const byte* mod_log_ptr;
2216
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2217
d_stream->avail_in + 1,
2219
ULINT_UNDEFINED, heap_status,
2222
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2225
page_zip->m_end = mod_log_ptr - page_zip->data;
2226
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2230
(page_zip_get_trailer_len(page_zip,
2231
dict_index_is_clust(index), NULL)
2232
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2233
page_zip_fail(("page_zip_decompress_node_ptrs:"
2234
" %lu + %lu >= %lu, %lu\n",
2235
(ulong) page_zip_get_trailer_len(
2236
page_zip, dict_index_is_clust(index),
2238
(ulong) page_zip->m_end,
2239
(ulong) page_zip_get_size(page_zip),
2240
(ulong) dict_index_is_clust(index)));
2244
/* Restore the uncompressed columns in heap_no order. */
2245
storage = page_zip->data + page_zip_get_size(page_zip)
2246
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2248
for (slot = 0; slot < n_dense; slot++) {
2249
rec_t* rec = recs[slot];
2251
offsets = rec_get_offsets(rec, index, offsets,
2252
ULINT_UNDEFINED, &heap);
2253
/* Non-leaf nodes should not have any externally
2255
ut_ad(!rec_offs_any_extern(offsets));
2256
storage -= REC_NODE_PTR_SIZE;
2258
memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
2259
storage, REC_NODE_PTR_SIZE);
2265
/**********************************************************************//**
2266
Decompress the records of a leaf node of a secondary index.
2267
@return TRUE on success, FALSE on failure */
2270
page_zip_decompress_sec(
2271
/*====================*/
2272
page_zip_des_t* page_zip, /*!< in/out: compressed page */
2273
z_stream* d_stream, /*!< in/out: compressed page stream */
2274
rec_t** recs, /*!< in: dense page directory
2275
sorted by address */
2276
ulint n_dense, /*!< in: size of recs[] */
2277
dict_index_t* index, /*!< in: the index of the page */
2278
ulint* offsets) /*!< in/out: temporary offsets */
2280
ulint heap_status = REC_STATUS_ORDINARY
2281
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2284
ut_a(!dict_index_is_clust(index));
2286
/* Subtract the space reserved for uncompressed data. */
2287
d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2289
for (slot = 0; slot < n_dense; slot++) {
2290
rec_t* rec = recs[slot];
2292
/* Decompress everything up to this record. */
2293
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2294
- d_stream->next_out;
2296
if (UNIV_LIKELY(d_stream->avail_out)) {
2297
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2299
/* Apparently, n_dense has grown
2300
since the time the page was last compressed. */
2304
if (!d_stream->avail_out) {
2309
page_zip_fail(("page_zip_decompress_sec:"
2310
" inflate(Z_SYNC_FLUSH)=%s\n",
2316
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2318
/* Skip the REC_N_NEW_EXTRA_BYTES. */
2320
d_stream->next_out = rec;
2322
/* Set heap_no and the status bits. */
2323
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2324
heap_status += 1 << REC_HEAP_NO_SHIFT;
2327
/* Decompress the data of the last record and any trailing garbage,
2328
in case the last record was allocated from an originally longer space
2329
on the free list. */
2330
d_stream->avail_out = page_header_get_field(page_zip->data,
2332
- page_offset(d_stream->next_out);
2333
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2334
- PAGE_ZIP_START - PAGE_DIR)) {
2336
page_zip_fail(("page_zip_decompress_sec:"
2337
" avail_out = %u\n",
2338
d_stream->avail_out));
2342
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2343
page_zip_fail(("page_zip_decompress_sec:"
2344
" inflate(Z_FINISH)=%s\n",
2347
inflateEnd(d_stream);
2351
/* Note that d_stream->avail_out > 0 may hold here
2352
if the modification log is nonempty. */
2355
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2360
page_t* page = page_align(d_stream->next_out);
2362
/* Clear the unused heap space on the uncompressed page. */
2363
memset(d_stream->next_out, 0,
2364
page_dir_get_nth_slot(page,
2365
page_dir_get_n_slots(page) - 1)
2366
- d_stream->next_out);
2370
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2371
#endif /* UNIV_DEBUG */
2373
/* Apply the modification log. */
2375
const byte* mod_log_ptr;
2376
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2377
d_stream->avail_in + 1,
2379
ULINT_UNDEFINED, heap_status,
2382
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2385
page_zip->m_end = mod_log_ptr - page_zip->data;
2386
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2389
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
2390
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2392
page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
2393
(ulong) page_zip_get_trailer_len(
2394
page_zip, FALSE, NULL),
2395
(ulong) page_zip->m_end,
2396
(ulong) page_zip_get_size(page_zip)));
2400
/* There are no uncompressed columns on leaf pages of
2401
secondary indexes. */
2406
/**********************************************************************//**
2407
Decompress a record of a leaf node of a clustered index that contains
2408
externally stored columns.
2409
@return TRUE on success */
2412
page_zip_decompress_clust_ext(
2413
/*==========================*/
2414
z_stream* d_stream, /*!< in/out: compressed page stream */
2415
rec_t* rec, /*!< in/out: record */
2416
const ulint* offsets, /*!< in: rec_get_offsets(rec) */
2417
ulint trx_id_col) /*!< in: position of of DB_TRX_ID */
2421
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2425
if (UNIV_UNLIKELY(i == trx_id_col)) {
2426
/* Skip trx_id and roll_ptr */
2427
dst = rec_get_nth_field(rec, offsets, i, &len);
2428
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2429
+ DATA_ROLL_PTR_LEN)) {
2431
page_zip_fail(("page_zip_decompress_clust_ext:"
2432
" len[%lu] = %lu\n",
2433
(ulong) i, (ulong) len));
2437
if (rec_offs_nth_extern(offsets, i)) {
2439
page_zip_fail(("page_zip_decompress_clust_ext:"
2440
" DB_TRX_ID at %lu is ext\n",
2445
d_stream->avail_out = dst - d_stream->next_out;
2447
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2451
if (!d_stream->avail_out) {
2456
page_zip_fail(("page_zip_decompress_clust_ext:"
2457
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2462
ut_ad(d_stream->next_out == dst);
2464
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2465
avoid uninitialized bytes in case the record
2466
is affected by page_zip_apply_log(). */
2467
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2469
d_stream->next_out += DATA_TRX_ID_LEN
2470
+ DATA_ROLL_PTR_LEN;
2471
} else if (rec_offs_nth_extern(offsets, i)) {
2472
dst = rec_get_nth_field(rec, offsets, i, &len);
2473
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
2474
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2476
d_stream->avail_out = dst - d_stream->next_out;
2477
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2481
if (!d_stream->avail_out) {
2486
page_zip_fail(("page_zip_decompress_clust_ext:"
2487
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2492
ut_ad(d_stream->next_out == dst);
2494
/* Clear the BLOB pointer in case
2495
the record will be deleted and the
2496
space will not be reused. Note that
2497
the final initialization of the BLOB
2498
pointers (copying from "externs"
2499
or clearing) will have to take place
2500
only after the page modification log
2501
has been applied. Otherwise, we
2502
could end up with an uninitialized
2503
BLOB pointer when a record is deleted,
2504
reallocated and deleted. */
2505
memset(d_stream->next_out, 0,
2506
BTR_EXTERN_FIELD_REF_SIZE);
2508
+= BTR_EXTERN_FIELD_REF_SIZE;
2515
/**********************************************************************//**
2516
Compress the records of a leaf node of a clustered index.
2517
@return TRUE on success, FALSE on failure */
2520
page_zip_decompress_clust(
2521
/*======================*/
2522
page_zip_des_t* page_zip, /*!< in/out: compressed page */
2523
z_stream* d_stream, /*!< in/out: compressed page stream */
2524
rec_t** recs, /*!< in: dense page directory
2525
sorted by address */
2526
ulint n_dense, /*!< in: size of recs[] */
2527
dict_index_t* index, /*!< in: the index of the page */
2528
ulint trx_id_col, /*!< index of the trx_id column */
2529
ulint* offsets, /*!< in/out: temporary offsets */
2530
mem_heap_t* heap) /*!< in: temporary memory heap */
2534
ulint heap_status = REC_STATUS_ORDINARY
2535
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2536
const byte* storage;
2537
const byte* externs;
2539
ut_a(dict_index_is_clust(index));
2541
/* Subtract the space reserved for uncompressed data. */
2542
d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
2544
+ DATA_ROLL_PTR_LEN);
2546
/* Decompress the records in heap_no order. */
2547
for (slot = 0; slot < n_dense; slot++) {
2548
rec_t* rec = recs[slot];
2550
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2551
- d_stream->next_out;
2553
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2554
- PAGE_ZIP_START - PAGE_DIR);
2555
err = inflate(d_stream, Z_SYNC_FLUSH);
2558
/* Apparently, n_dense has grown
2559
since the time the page was last compressed. */
2563
if (UNIV_LIKELY(!d_stream->avail_out)) {
2568
page_zip_fail(("page_zip_decompress_clust:"
2569
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2574
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2575
/* Prepare to decompress the data bytes. */
2576
d_stream->next_out = rec;
2577
/* Set heap_no and the status bits. */
2578
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2579
heap_status += 1 << REC_HEAP_NO_SHIFT;
2581
/* Read the offsets. The status bits are needed here. */
2582
offsets = rec_get_offsets(rec, index, offsets,
2583
ULINT_UNDEFINED, &heap);
2585
/* This is a leaf page in a clustered index. */
2587
/* Check if there are any externally stored columns.
2588
For each externally stored column, restore the
2589
BTR_EXTERN_FIELD_REF separately. */
2591
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
2593
(!page_zip_decompress_clust_ext(
2594
d_stream, rec, offsets, trx_id_col))) {
2599
/* Skip trx_id and roll_ptr */
2601
byte* dst = rec_get_nth_field(rec, offsets,
2603
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2604
+ DATA_ROLL_PTR_LEN)) {
2606
page_zip_fail(("page_zip_decompress_clust:"
2607
" len = %lu\n", (ulong) len));
2611
d_stream->avail_out = dst - d_stream->next_out;
2613
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2617
if (!d_stream->avail_out) {
2622
page_zip_fail(("page_zip_decompress_clust:"
2623
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2628
ut_ad(d_stream->next_out == dst);
2630
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2631
avoid uninitialized bytes in case the record
2632
is affected by page_zip_apply_log(). */
2633
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2635
d_stream->next_out += DATA_TRX_ID_LEN
2636
+ DATA_ROLL_PTR_LEN;
2639
/* Decompress the last bytes of the record. */
2640
d_stream->avail_out = rec_get_end(rec, offsets)
2641
- d_stream->next_out;
2643
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2647
if (!d_stream->avail_out) {
2652
page_zip_fail(("page_zip_decompress_clust:"
2653
" 3 inflate(Z_SYNC_FLUSH)=%s\n",
2659
/* Decompress any trailing garbage, in case the last record was
2660
allocated from an originally longer space on the free list. */
2661
d_stream->avail_out = page_header_get_field(page_zip->data,
2663
- page_offset(d_stream->next_out);
2664
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2665
- PAGE_ZIP_START - PAGE_DIR)) {
2667
page_zip_fail(("page_zip_decompress_clust:"
2668
" avail_out = %u\n",
2669
d_stream->avail_out));
2673
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2674
page_zip_fail(("page_zip_decompress_clust:"
2675
" inflate(Z_FINISH)=%s\n",
2678
inflateEnd(d_stream);
2682
/* Note that d_stream->avail_out > 0 may hold here
2683
if the modification log is nonempty. */
2686
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2691
page_t* page = page_align(d_stream->next_out);
2693
/* Clear the unused heap space on the uncompressed page. */
2694
memset(d_stream->next_out, 0,
2695
page_dir_get_nth_slot(page,
2696
page_dir_get_n_slots(page) - 1)
2697
- d_stream->next_out);
2701
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2702
#endif /* UNIV_DEBUG */
2704
/* Apply the modification log. */
2706
const byte* mod_log_ptr;
2707
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2708
d_stream->avail_in + 1,
2710
trx_id_col, heap_status,
2713
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2716
page_zip->m_end = mod_log_ptr - page_zip->data;
2717
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2720
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
2721
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2723
page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
2724
(ulong) page_zip_get_trailer_len(
2725
page_zip, TRUE, NULL),
2726
(ulong) page_zip->m_end,
2727
(ulong) page_zip_get_size(page_zip)));
2731
storage = page_zip->data + page_zip_get_size(page_zip)
2732
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2734
externs = storage - n_dense
2735
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2737
/* Restore the uncompressed columns in heap_no order. */
2739
for (slot = 0; slot < n_dense; slot++) {
2743
rec_t* rec = recs[slot];
2744
ibool exists = !page_zip_dir_find_free(
2745
page_zip, page_offset(rec));
2746
offsets = rec_get_offsets(rec, index, offsets,
2747
ULINT_UNDEFINED, &heap);
2749
dst = rec_get_nth_field(rec, offsets,
2751
ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2752
storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
2753
memcpy(dst, storage,
2754
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2756
/* Check if there are any externally stored
2757
columns in this record. For each externally
2758
stored column, restore or clear the
2759
BTR_EXTERN_FIELD_REF. */
2760
if (!rec_offs_any_extern(offsets)) {
2764
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2765
if (!rec_offs_nth_extern(offsets, i)) {
2768
dst = rec_get_nth_field(rec, offsets, i, &len);
2770
if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
2771
page_zip_fail(("page_zip_decompress_clust:"
2777
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2779
if (UNIV_LIKELY(exists)) {
2781
restore the BLOB pointer */
2782
externs -= BTR_EXTERN_FIELD_REF_SIZE;
2785
(externs < page_zip->data
2786
+ page_zip->m_end)) {
2787
page_zip_fail(("page_zip_"
2788
"decompress_clust: "
2790
(const void*) externs,
2798
memcpy(dst, externs,
2799
BTR_EXTERN_FIELD_REF_SIZE);
2801
page_zip->n_blobs++;
2804
clear the BLOB pointer */
2806
BTR_EXTERN_FIELD_REF_SIZE);
2814
/**********************************************************************//**
2815
Decompress a page. This function should tolerate errors on the compressed
2816
page. Instead of letting assertions fail, it will return FALSE if an
2817
inconsistency is detected.
2818
@return TRUE on success, FALSE on failure */
2821
page_zip_decompress(
2822
/*================*/
2823
page_zip_des_t* page_zip,/*!< in: data, ssize;
2824
out: m_start, m_end, m_nonempty, n_blobs */
2825
page_t* page, /*!< out: uncompressed page, may be trashed */
2826
ibool all) /*!< in: TRUE=decompress the whole page;
2827
FALSE=verify but do not copy some
2828
page header fields that should not change
2829
after page creation */
2832
dict_index_t* index = NULL;
2833
rec_t** recs; /*!< dense page directory, sorted by address */
2834
ulint n_dense;/* number of user records on the page */
2835
ulint trx_id_col = ULINT_UNDEFINED;
2838
#ifndef UNIV_HOTBACKUP
2839
ullint usec = ut_time_us(NULL);
2840
#endif /* !UNIV_HOTBACKUP */
2842
ut_ad(page_zip_simple_validate(page_zip));
2843
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
2844
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
2846
/* The dense directory excludes the infimum and supremum records. */
2847
n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
2848
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
2849
>= page_zip_get_size(page_zip))) {
2850
page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
2852
(ulong) page_zip_get_size(page_zip)));
2856
heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
2857
recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs));
2860
/* Copy the page header. */
2861
memcpy(page, page_zip->data, PAGE_DATA);
2863
/* Check that the bytes that we skip are identical. */
2864
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
2865
ut_a(!memcmp(FIL_PAGE_TYPE + page,
2866
FIL_PAGE_TYPE + page_zip->data,
2867
PAGE_HEADER - FIL_PAGE_TYPE));
2868
ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page,
2869
PAGE_HEADER + PAGE_LEVEL + page_zip->data,
2870
PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL)));
2871
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
2873
/* Copy the mutable parts of the page header. */
2874
memcpy(page, page_zip->data, FIL_PAGE_TYPE);
2875
memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data,
2876
PAGE_LEVEL - PAGE_N_DIR_SLOTS);
2878
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
2879
/* Check that the page headers match after copying. */
2880
ut_a(!memcmp(page, page_zip->data, PAGE_DATA));
2881
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
2884
#ifdef UNIV_ZIP_DEBUG
2885
/* Clear the uncompressed page, except the header. */
2886
memset(PAGE_DATA + page, 0x55, UNIV_PAGE_SIZE - PAGE_DATA);
2887
#endif /* UNIV_ZIP_DEBUG */
2888
UNIV_MEM_INVALID(PAGE_DATA + page, UNIV_PAGE_SIZE - PAGE_DATA);
2890
/* Copy the page directory. */
2891
if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
2892
recs + n_dense, n_dense))) {
2894
mem_heap_free(heap);
2898
/* Copy the infimum and supremum records. */
2899
memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
2900
infimum_extra, sizeof infimum_extra);
2901
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
2902
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2905
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2906
page_zip_dir_get(page_zip, 0)
2907
& PAGE_ZIP_DIR_SLOT_MASK);
2909
memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
2910
memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
2911
supremum_extra_data, sizeof supremum_extra_data);
2913
page_zip_set_alloc(&d_stream, heap);
2915
if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
2920
d_stream.next_in = page_zip->data + PAGE_DATA;
2921
/* Subtract the space reserved for
2922
the page header and the end marker of the modification log. */
2923
d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
2925
d_stream.next_out = page + PAGE_ZIP_START;
2926
d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
2928
/* Decode the zlib header and the index information. */
2929
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2931
page_zip_fail(("page_zip_decompress:"
2932
" 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2936
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2938
page_zip_fail(("page_zip_decompress:"
2939
" 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2943
index = page_zip_fields_decode(
2944
page + PAGE_ZIP_START, d_stream.next_out,
2945
page_is_leaf(page) ? &trx_id_col : NULL);
2947
if (UNIV_UNLIKELY(!index)) {
2952
/* Decompress the user records. */
2953
page_zip->n_blobs = 0;
2954
d_stream.next_out = page + PAGE_ZIP_START;
2957
/* Pre-allocate the offsets for rec_get_offsets_reverse(). */
2958
ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
2959
+ dict_index_get_n_fields(index);
2960
offsets = mem_heap_alloc(heap, n * sizeof(ulint));
2964
/* Decompress the records in heap_no order. */
2965
if (!page_is_leaf(page)) {
2966
/* This is a node pointer page. */
2970
(!page_zip_decompress_node_ptrs(page_zip, &d_stream,
2971
recs, n_dense, index,
2976
info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
2977
? REC_INFO_MIN_REC_FLAG : 0;
2979
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
2983
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2984
/* This is a leaf page in a secondary index. */
2985
if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
2991
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2994
page_zip_fields_free(index);
2995
mem_heap_free(heap);
2999
/* This is a leaf page in a clustered index. */
3000
if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
3008
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
3014
ut_a(page_is_comp(page));
3015
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3017
page_zip_fields_free(index);
3018
mem_heap_free(heap);
3019
#ifndef UNIV_HOTBACKUP
3021
page_zip_stat_t* zip_stat
3022
= &page_zip_stat[page_zip->ssize - 1];
3023
zip_stat->decompressed++;
3024
zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
3026
#endif /* !UNIV_HOTBACKUP */
3028
/* Update the stat counter for LRU policy. */
3029
buf_LRU_stat_inc_unzip();
3034
#ifdef UNIV_ZIP_DEBUG
3035
/**********************************************************************//**
3036
Dump a block of memory on the standard error stream. */
3039
page_zip_hexdump_func(
3040
/*==================*/
3041
const char* name, /*!< in: name of the data structure */
3042
const void* buf, /*!< in: data */
3043
ulint size) /*!< in: length of the data, in bytes */
3045
const byte* s = buf;
3047
const ulint width = 32; /* bytes per line */
3049
fprintf(stderr, "%s:\n", name);
3051
for (addr = 0; addr < size; addr += width) {
3054
fprintf(stderr, "%04lx ", (ulong) addr);
3056
i = ut_min(width, size - addr);
3059
fprintf(stderr, "%02x", *s++);
3066
/** Dump a block of memory on the standard error stream.
3068
@param size in: length of the data, in bytes */
3069
#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
3071
/** Flag: make page_zip_validate() compare page headers only */
3072
UNIV_INTERN ibool page_zip_validate_header_only = FALSE;
3074
/**********************************************************************//**
3075
Check that the compressed and decompressed pages match.
3076
@return TRUE if valid, FALSE if not */
3079
page_zip_validate_low(
3080
/*==================*/
3081
const page_zip_des_t* page_zip,/*!< in: compressed page */
3082
const page_t* page, /*!< in: uncompressed page */
3083
ibool sloppy) /*!< in: FALSE=strict,
3084
TRUE=ignore the MIN_REC_FLAG */
3086
page_zip_des_t temp_page_zip;
3087
byte* temp_page_buf;
3091
if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3092
FIL_PAGE_LSN - FIL_PAGE_PREV)
3093
|| memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
3094
|| memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3095
PAGE_DATA - FIL_PAGE_DATA)) {
3096
page_zip_fail(("page_zip_validate: page header\n"));
3097
page_zip_hexdump(page_zip, sizeof *page_zip);
3098
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3099
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3103
ut_a(page_is_comp(page));
3105
if (page_zip_validate_header_only) {
3109
/* page_zip_decompress() expects the uncompressed page to be
3110
UNIV_PAGE_SIZE aligned. */
3111
temp_page_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
3112
temp_page = ut_align(temp_page_buf, UNIV_PAGE_SIZE);
3114
#ifdef UNIV_DEBUG_VALGRIND
3115
/* Get detailed information on the valid bits in case the
3116
UNIV_MEM_ASSERT_RW() checks fail. The v-bits of page[],
3117
page_zip->data[] or page_zip could be viewed at temp_page[] or
3118
temp_page_zip in a debugger when running valgrind --db-attach. */
3119
VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
3120
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3121
# if UNIV_WORD_SIZE == 4
3122
VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
3123
/* On 32-bit systems, there is no padding in page_zip_des_t.
3124
On other systems, Valgrind could complain about uninitialized
3126
UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
3128
VALGRIND_GET_VBITS(page_zip->data, temp_page,
3129
page_zip_get_size(page_zip));
3130
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3131
#endif /* UNIV_DEBUG_VALGRIND */
3133
temp_page_zip = *page_zip;
3134
valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE);
3136
fputs("page_zip_validate(): failed to decompress\n", stderr);
3139
if (page_zip->n_blobs != temp_page_zip.n_blobs) {
3140
page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
3141
page_zip->n_blobs, temp_page_zip.n_blobs));
3145
if (page_zip->m_start != temp_page_zip.m_start) {
3146
page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
3147
page_zip->m_start, temp_page_zip.m_start));
3150
#endif /* UNIV_DEBUG */
3151
if (page_zip->m_end != temp_page_zip.m_end) {
3152
page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
3153
page_zip->m_end, temp_page_zip.m_end));
3156
if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
3157
page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
3158
page_zip->m_nonempty,
3159
temp_page_zip.m_nonempty));
3162
if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
3163
UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
3165
/* In crash recovery, the "minimum record" flag may be
3166
set incorrectly until the mini-transaction is
3167
committed. Let us tolerate that difference when we
3168
are performing a sloppy validation. */
3171
byte info_bits_diff;
3173
= rec_get_next_offs(page + PAGE_NEW_INFIMUM,
3175
ut_a(offset >= PAGE_NEW_SUPREMUM);
3176
offset -= 5 /* REC_NEW_INFO_BITS */;
3178
info_bits_diff = page[offset] ^ temp_page[offset];
3180
if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
3181
temp_page[offset] = page[offset];
3183
if (!memcmp(page + PAGE_HEADER,
3184
temp_page + PAGE_HEADER,
3185
UNIV_PAGE_SIZE - PAGE_HEADER
3186
- FIL_PAGE_DATA_END)) {
3188
/* Only the minimum record flag
3189
differed. Let us ignore it. */
3190
page_zip_fail(("page_zip_validate: "
3193
"%lu,%lu,0x%02lx)\n",
3194
page_get_space_id(page),
3195
page_get_page_no(page),
3196
(ulong) page[offset]));
3201
page_zip_fail(("page_zip_validate: content\n"));
3207
page_zip_hexdump(page_zip, sizeof *page_zip);
3208
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3209
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3210
page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
3212
ut_free(temp_page_buf);
3216
/**********************************************************************//**
3217
Check that the compressed and decompressed pages match.
3218
@return TRUE if valid, FALSE if not */
3223
const page_zip_des_t* page_zip,/*!< in: compressed page */
3224
const page_t* page) /*!< in: uncompressed page */
3226
return(page_zip_validate_low(page_zip, page,
3227
recv_recovery_is_on()));
3229
#endif /* UNIV_ZIP_DEBUG */
3232
/**********************************************************************//**
3233
Assert that the compressed and decompressed page headers match.
3237
page_zip_header_cmp(
3238
/*================*/
3239
const page_zip_des_t* page_zip,/*!< in: compressed page */
3240
const byte* page) /*!< in: uncompressed page */
3242
ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3243
FIL_PAGE_LSN - FIL_PAGE_PREV));
3244
ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
3246
ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3247
PAGE_DATA - FIL_PAGE_DATA));
3251
#endif /* UNIV_DEBUG */
3253
/**********************************************************************//**
3254
Write a record on the compressed page that contains externally stored
3255
columns. The data must already have been written to the uncompressed page.
3256
@return end of modification log */
3259
page_zip_write_rec_ext(
3260
/*===================*/
3261
page_zip_des_t* page_zip, /*!< in/out: compressed page */
3262
const page_t* page, /*!< in: page containing rec */
3263
const byte* rec, /*!< in: record being written */
3264
dict_index_t* index, /*!< in: record descriptor */
3265
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
3266
ulint create, /*!< in: nonzero=insert, zero=update */
3267
ulint trx_id_col, /*!< in: position of DB_TRX_ID */
3268
ulint heap_no, /*!< in: heap number of rec */
3269
byte* storage, /*!< in: end of dense page directory */
3270
byte* data) /*!< in: end of modification log */
3272
const byte* start = rec;
3275
byte* externs = storage;
3276
ulint n_ext = rec_offs_n_extern(offsets);
3278
ut_ad(rec_offs_validate(rec, index, offsets));
3279
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3280
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3281
rec_offs_extra_size(offsets));
3283
externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3284
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
3286
/* Note that this will not take into account
3287
the BLOB columns of rec if create==TRUE. */
3288
ut_ad(data + rec_offs_data_size(offsets)
3289
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3290
- n_ext * BTR_EXTERN_FIELD_REF_SIZE
3291
< externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
3294
ulint blob_no = page_zip_get_n_prev_extern(
3295
page_zip, rec, index);
3296
byte* ext_end = externs - page_zip->n_blobs
3297
* BTR_EXTERN_FIELD_REF_SIZE;
3298
ut_ad(blob_no <= page_zip->n_blobs);
3299
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
3302
page_zip->n_blobs += n_ext;
3303
ASSERT_ZERO_BLOB(ext_end - n_ext
3304
* BTR_EXTERN_FIELD_REF_SIZE);
3305
memmove(ext_end - n_ext
3306
* BTR_EXTERN_FIELD_REF_SIZE,
3311
ut_a(blob_no + n_ext <= page_zip->n_blobs);
3314
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3317
if (UNIV_UNLIKELY(i == trx_id_col)) {
3318
ut_ad(!rec_offs_nth_extern(offsets,
3320
ut_ad(!rec_offs_nth_extern(offsets,
3322
/* Locate trx_id and roll_ptr. */
3323
src = rec_get_nth_field(rec, offsets,
3325
ut_ad(len == DATA_TRX_ID_LEN);
3326
ut_ad(src + DATA_TRX_ID_LEN
3327
== rec_get_nth_field(
3330
ut_ad(len == DATA_ROLL_PTR_LEN);
3332
/* Log the preceding fields. */
3333
ASSERT_ZERO(data, src - start);
3334
memcpy(data, start, src - start);
3335
data += src - start;
3336
start = src + (DATA_TRX_ID_LEN
3337
+ DATA_ROLL_PTR_LEN);
3339
/* Store trx_id and roll_ptr. */
3340
memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3342
src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3343
i++; /* skip also roll_ptr */
3344
} else if (rec_offs_nth_extern(offsets, i)) {
3345
src = rec_get_nth_field(rec, offsets,
3348
ut_ad(dict_index_is_clust(index));
3350
>= BTR_EXTERN_FIELD_REF_SIZE);
3351
src += len - BTR_EXTERN_FIELD_REF_SIZE;
3353
ASSERT_ZERO(data, src - start);
3354
memcpy(data, start, src - start);
3355
data += src - start;
3356
start = src + BTR_EXTERN_FIELD_REF_SIZE;
3358
/* Store the BLOB pointer. */
3359
externs -= BTR_EXTERN_FIELD_REF_SIZE;
3360
ut_ad(data < externs);
3361
memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
3365
/* Log the last bytes of the record. */
3366
len = rec_offs_data_size(offsets) - (start - rec);
3368
ASSERT_ZERO(data, len);
3369
memcpy(data, start, len);
3375
/**********************************************************************//**
3376
Write an entire record on the compressed page. The data must already
3377
have been written to the uncompressed page. */
3382
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3383
const byte* rec, /*!< in: record being written */
3384
dict_index_t* index, /*!< in: the index the record belongs to */
3385
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3386
ulint create) /*!< in: nonzero=insert, zero=update */
3394
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3395
ut_ad(page_zip_simple_validate(page_zip));
3396
ut_ad(page_zip_get_size(page_zip)
3397
> PAGE_DATA + page_zip_dir_size(page_zip));
3398
ut_ad(rec_offs_comp(offsets));
3399
ut_ad(rec_offs_validate(rec, index, offsets));
3401
ut_ad(page_zip->m_start >= PAGE_DATA);
3403
page = page_align(rec);
3405
ut_ad(page_zip_header_cmp(page_zip, page));
3406
ut_ad(page_simple_validate_new((page_t*) page));
3408
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3409
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3410
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3411
rec_offs_extra_size(offsets));
3413
slot = page_zip_dir_find(page_zip, page_offset(rec));
3415
/* Copy the delete mark. */
3416
if (rec_get_deleted_flag(rec, TRUE)) {
3417
*slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
3419
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3422
ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
3423
ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
3424
- PAGE_DIR - PAGE_DIR_SLOT_SIZE
3425
* page_dir_get_n_slots(page));
3427
heap_no = rec_get_heap_no_new(rec);
3428
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
3429
ut_ad(heap_no < page_dir_get_n_heap(page));
3431
/* Append to the modification log. */
3432
data = page_zip->data + page_zip->m_end;
3435
/* Identify the record by writing its heap number - 1.
3436
0 is reserved to indicate the end of the modification log. */
3438
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3439
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3442
*data++ = (byte) ((heap_no - 1) << 1);
3446
const byte* start = rec - rec_offs_extra_size(offsets);
3447
const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
3449
/* Write the extra bytes backwards, so that
3450
rec_offs_extra_size() can be easily computed in
3451
page_zip_apply_log() by invoking
3452
rec_get_offsets_reverse(). */
3454
while (b != start) {
3460
/* Write the data bytes. Store the uncompressed bytes separately. */
3461
storage = page_zip->data + page_zip_get_size(page_zip)
3462
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3463
* PAGE_ZIP_DIR_SLOT_SIZE;
3465
if (page_is_leaf(page)) {
3468
if (dict_index_is_clust(index)) {
3471
trx_id_col = dict_index_get_sys_col_pos(index,
3473
ut_ad(trx_id_col != ULINT_UNDEFINED);
3475
/* Store separately trx_id, roll_ptr and
3476
the BTR_EXTERN_FIELD_REF of each BLOB column. */
3477
if (rec_offs_any_extern(offsets)) {
3478
data = page_zip_write_rec_ext(
3480
rec, index, offsets, create,
3481
trx_id_col, heap_no, storage, data);
3483
/* Locate trx_id and roll_ptr. */
3485
= rec_get_nth_field(rec, offsets,
3487
ut_ad(len == DATA_TRX_ID_LEN);
3488
ut_ad(src + DATA_TRX_ID_LEN
3489
== rec_get_nth_field(
3491
trx_id_col + 1, &len));
3492
ut_ad(len == DATA_ROLL_PTR_LEN);
3494
/* Log the preceding fields. */
3495
ASSERT_ZERO(data, src - rec);
3496
memcpy(data, rec, src - rec);
3499
/* Store trx_id and roll_ptr. */
3501
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3504
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3506
src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
3508
/* Log the last bytes of the record. */
3509
len = rec_offs_data_size(offsets)
3512
ASSERT_ZERO(data, len);
3513
memcpy(data, src, len);
3517
/* Leaf page of a secondary index:
3518
no externally stored columns */
3519
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
3520
== ULINT_UNDEFINED);
3521
ut_ad(!rec_offs_any_extern(offsets));
3523
/* Log the entire record. */
3524
len = rec_offs_data_size(offsets);
3526
ASSERT_ZERO(data, len);
3527
memcpy(data, rec, len);
3531
/* This is a node pointer page. */
3534
/* Non-leaf nodes should not have any externally
3536
ut_ad(!rec_offs_any_extern(offsets));
3538
/* Copy the data bytes, except node_ptr. */
3539
len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
3540
ut_ad(data + len < storage - REC_NODE_PTR_SIZE
3541
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
3542
ASSERT_ZERO(data, len);
3543
memcpy(data, rec, len);
3546
/* Copy the node pointer to the uncompressed area. */
3547
memcpy(storage - REC_NODE_PTR_SIZE
3554
ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
3555
page_zip->m_end = data - page_zip->data;
3556
page_zip->m_nonempty = TRUE;
3558
#ifdef UNIV_ZIP_DEBUG
3559
ut_a(page_zip_validate(page_zip, page_align(rec)));
3560
#endif /* UNIV_ZIP_DEBUG */
3563
/***********************************************************//**
3564
Parses a log record of writing a BLOB pointer of a record.
3565
@return end of log record or NULL */
3568
page_zip_parse_write_blob_ptr(
3569
/*==========================*/
3570
byte* ptr, /*!< in: redo log buffer */
3571
byte* end_ptr,/*!< in: redo log buffer end */
3572
page_t* page, /*!< in/out: uncompressed page */
3573
page_zip_des_t* page_zip)/*!< in/out: compressed page */
3578
ut_ad(!page == !page_zip);
3581
(end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
3586
offset = mach_read_from_2(ptr);
3587
z_offset = mach_read_from_2(ptr + 2);
3589
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3590
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3591
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3593
recv_sys->found_corrupt_log = TRUE;
3599
if (UNIV_UNLIKELY(!page_zip)
3600
|| UNIV_UNLIKELY(!page_is_leaf(page))) {
3605
#ifdef UNIV_ZIP_DEBUG
3606
ut_a(page_zip_validate(page_zip, page));
3607
#endif /* UNIV_ZIP_DEBUG */
3609
memcpy(page + offset,
3610
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3611
memcpy(page_zip->data + z_offset,
3612
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3614
#ifdef UNIV_ZIP_DEBUG
3615
ut_a(page_zip_validate(page_zip, page));
3616
#endif /* UNIV_ZIP_DEBUG */
3619
return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
3622
/**********************************************************************//**
3623
Write a BLOB pointer of a record on the leaf page of a clustered index.
3624
The information must already have been updated on the uncompressed page. */
3627
page_zip_write_blob_ptr(
3628
/*====================*/
3629
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3630
const byte* rec, /*!< in/out: record whose data is being
3632
dict_index_t* index, /*!< in: index of the page */
3633
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3634
ulint n, /*!< in: column index */
3635
mtr_t* mtr) /*!< in: mini-transaction handle,
3636
or NULL if no logging is needed */
3640
const page_t* page = page_align(rec);
3644
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3645
ut_ad(page_simple_validate_new((page_t*) page));
3646
ut_ad(page_zip_simple_validate(page_zip));
3647
ut_ad(page_zip_get_size(page_zip)
3648
> PAGE_DATA + page_zip_dir_size(page_zip));
3649
ut_ad(rec_offs_comp(offsets));
3650
ut_ad(rec_offs_validate(rec, NULL, offsets));
3651
ut_ad(rec_offs_any_extern(offsets));
3652
ut_ad(rec_offs_nth_extern(offsets, n));
3654
ut_ad(page_zip->m_start >= PAGE_DATA);
3655
ut_ad(page_zip_header_cmp(page_zip, page));
3657
ut_ad(page_is_leaf(page));
3658
ut_ad(dict_index_is_clust(index));
3660
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3661
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3662
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3663
rec_offs_extra_size(offsets));
3665
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
3666
+ rec_get_n_extern_new(rec, index, n);
3667
ut_a(blob_no < page_zip->n_blobs);
3669
externs = page_zip->data + page_zip_get_size(page_zip)
3670
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3671
* (PAGE_ZIP_DIR_SLOT_SIZE
3672
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3674
field = rec_get_nth_field(rec, offsets, n, &len);
3676
externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
3677
field += len - BTR_EXTERN_FIELD_REF_SIZE;
3679
memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
3681
#ifdef UNIV_ZIP_DEBUG
3682
ut_a(page_zip_validate(page_zip, page));
3683
#endif /* UNIV_ZIP_DEBUG */
3686
#ifndef UNIV_HOTBACKUP
3687
byte* log_ptr = mlog_open(
3688
mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
3689
if (UNIV_UNLIKELY(!log_ptr)) {
3693
log_ptr = mlog_write_initial_log_record_fast(
3694
(byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
3695
mach_write_to_2(log_ptr, page_offset(field));
3697
mach_write_to_2(log_ptr, externs - page_zip->data);
3699
memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
3700
log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
3701
mlog_close(mtr, log_ptr);
3702
#endif /* !UNIV_HOTBACKUP */
3706
/***********************************************************//**
3707
Parses a log record of writing the node pointer of a record.
3708
@return end of log record or NULL */
3711
page_zip_parse_write_node_ptr(
3712
/*==========================*/
3713
byte* ptr, /*!< in: redo log buffer */
3714
byte* end_ptr,/*!< in: redo log buffer end */
3715
page_t* page, /*!< in/out: uncompressed page */
3716
page_zip_des_t* page_zip)/*!< in/out: compressed page */
3721
ut_ad(!page == !page_zip);
3723
if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
3728
offset = mach_read_from_2(ptr);
3729
z_offset = mach_read_from_2(ptr + 2);
3731
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3732
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3733
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3735
recv_sys->found_corrupt_log = TRUE;
3746
if (UNIV_UNLIKELY(!page_zip)
3747
|| UNIV_UNLIKELY(page_is_leaf(page))) {
3752
#ifdef UNIV_ZIP_DEBUG
3753
ut_a(page_zip_validate(page_zip, page));
3754
#endif /* UNIV_ZIP_DEBUG */
3756
field = page + offset;
3757
storage = page_zip->data + z_offset;
3759
storage_end = page_zip->data + page_zip_get_size(page_zip)
3760
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3761
* PAGE_ZIP_DIR_SLOT_SIZE;
3763
heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
3765
if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
3766
|| UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
3767
|| UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
3772
memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
3773
memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
3775
#ifdef UNIV_ZIP_DEBUG
3776
ut_a(page_zip_validate(page_zip, page));
3777
#endif /* UNIV_ZIP_DEBUG */
3780
return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
3783
/**********************************************************************//**
3784
Write the node pointer of a record on a non-leaf compressed page. */
3787
page_zip_write_node_ptr(
3788
/*====================*/
3789
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3790
byte* rec, /*!< in/out: record */
3791
ulint size, /*!< in: data size of rec */
3792
ulint ptr, /*!< in: node pointer */
3793
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
3797
page_t* page = page_align(rec);
3799
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3800
ut_ad(page_simple_validate_new(page));
3801
ut_ad(page_zip_simple_validate(page_zip));
3802
ut_ad(page_zip_get_size(page_zip)
3803
> PAGE_DATA + page_zip_dir_size(page_zip));
3804
ut_ad(page_rec_is_comp(rec));
3806
ut_ad(page_zip->m_start >= PAGE_DATA);
3807
ut_ad(page_zip_header_cmp(page_zip, page));
3809
ut_ad(!page_is_leaf(page));
3811
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3812
UNIV_MEM_ASSERT_RW(rec, size);
3814
storage = page_zip->data + page_zip_get_size(page_zip)
3815
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3816
* PAGE_ZIP_DIR_SLOT_SIZE
3817
- (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
3818
field = rec + size - REC_NODE_PTR_SIZE;
3820
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3821
ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
3822
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3823
#if REC_NODE_PTR_SIZE != 4
3824
# error "REC_NODE_PTR_SIZE != 4"
3826
mach_write_to_4(field, ptr);
3827
memcpy(storage, field, REC_NODE_PTR_SIZE);
3830
#ifndef UNIV_HOTBACKUP
3831
byte* log_ptr = mlog_open(mtr,
3832
11 + 2 + 2 + REC_NODE_PTR_SIZE);
3833
if (UNIV_UNLIKELY(!log_ptr)) {
3837
log_ptr = mlog_write_initial_log_record_fast(
3838
field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
3839
mach_write_to_2(log_ptr, page_offset(field));
3841
mach_write_to_2(log_ptr, storage - page_zip->data);
3843
memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
3844
log_ptr += REC_NODE_PTR_SIZE;
3845
mlog_close(mtr, log_ptr);
3846
#endif /* !UNIV_HOTBACKUP */
3850
/**********************************************************************//**
3851
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
3854
page_zip_write_trx_id_and_roll_ptr(
3855
/*===============================*/
3856
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3857
byte* rec, /*!< in/out: record */
3858
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
3859
ulint trx_id_col,/*!< in: column number of TRX_ID in rec */
3860
trx_id_t trx_id, /*!< in: transaction identifier */
3861
roll_ptr_t roll_ptr)/*!< in: roll_ptr */
3865
page_t* page = page_align(rec);
3868
ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3869
ut_ad(page_simple_validate_new(page));
3870
ut_ad(page_zip_simple_validate(page_zip));
3871
ut_ad(page_zip_get_size(page_zip)
3872
> PAGE_DATA + page_zip_dir_size(page_zip));
3873
ut_ad(rec_offs_validate(rec, NULL, offsets));
3874
ut_ad(rec_offs_comp(offsets));
3876
ut_ad(page_zip->m_start >= PAGE_DATA);
3877
ut_ad(page_zip_header_cmp(page_zip, page));
3879
ut_ad(page_is_leaf(page));
3881
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3883
storage = page_zip->data + page_zip_get_size(page_zip)
3884
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3885
* PAGE_ZIP_DIR_SLOT_SIZE
3886
- (rec_get_heap_no_new(rec) - 1)
3887
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3889
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
3890
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
3892
field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
3893
ut_ad(len == DATA_TRX_ID_LEN);
3894
ut_ad(field + DATA_TRX_ID_LEN
3895
== rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
3896
ut_ad(len == DATA_ROLL_PTR_LEN);
3897
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3898
ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
3899
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3900
#if DATA_TRX_ID_LEN != 6
3901
# error "DATA_TRX_ID_LEN != 6"
3903
mach_write_to_6(field, trx_id);
3904
#if DATA_ROLL_PTR_LEN != 7
3905
# error "DATA_ROLL_PTR_LEN != 7"
3907
mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
3908
memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3910
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3911
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3912
rec_offs_extra_size(offsets));
3913
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3916
#ifdef UNIV_ZIP_DEBUG
3917
/** Set this variable in a debugger to disable page_zip_clear_rec().
3918
The only observable effect should be the compression ratio due to
3919
deleted records not being zeroed out. In rare cases, there can be
3920
page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
3921
columns if the space is reallocated for a smaller record. */
3922
UNIV_INTERN ibool page_zip_clear_rec_disable;
3923
#endif /* UNIV_ZIP_DEBUG */
3925
/**********************************************************************//**
3926
Clear an area on the uncompressed and compressed page, if possible. */
3931
page_zip_des_t* page_zip,/*!< in/out: compressed page */
3932
byte* rec, /*!< in: record to clear */
3933
dict_index_t* index, /*!< in: index of rec */
3934
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
3937
page_t* page = page_align(rec);
3938
/* page_zip_validate() would fail here if a record
3939
containing externally stored columns is being deleted. */
3940
ut_ad(rec_offs_validate(rec, index, offsets));
3941
ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
3942
ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
3943
ut_ad(page_zip_header_cmp(page_zip, page));
3945
heap_no = rec_get_heap_no_new(rec);
3946
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
3948
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3949
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3950
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3951
rec_offs_extra_size(offsets));
3954
#ifdef UNIV_ZIP_DEBUG
3955
!page_zip_clear_rec_disable &&
3956
#endif /* UNIV_ZIP_DEBUG */
3958
+ 1 + ((heap_no - 1) >= 64)/* size of the log entry */
3959
+ page_zip_get_trailer_len(page_zip,
3960
dict_index_is_clust(index), NULL)
3961
< page_zip_get_size(page_zip)) {
3964
/* Clear only the data bytes, because the allocator and
3965
the decompressor depend on the extra bytes. */
3966
memset(rec, 0, rec_offs_data_size(offsets));
3968
if (!page_is_leaf(page)) {
3969
/* Clear node_ptr on the compressed page. */
3970
byte* storage = page_zip->data
3971
+ page_zip_get_size(page_zip)
3972
- (page_dir_get_n_heap(page)
3973
- PAGE_HEAP_NO_USER_LOW)
3974
* PAGE_ZIP_DIR_SLOT_SIZE;
3976
memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
3977
0, REC_NODE_PTR_SIZE);
3978
} else if (dict_index_is_clust(index)) {
3979
/* Clear trx_id and roll_ptr on the compressed page. */
3980
byte* storage = page_zip->data
3981
+ page_zip_get_size(page_zip)
3982
- (page_dir_get_n_heap(page)
3983
- PAGE_HEAP_NO_USER_LOW)
3984
* PAGE_ZIP_DIR_SLOT_SIZE;
3986
memset(storage - (heap_no - 1)
3987
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
3988
0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3991
/* Log that the data was zeroed out. */
3992
data = page_zip->data + page_zip->m_end;
3994
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3995
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3998
*data++ = (byte) ((heap_no - 1) << 1 | 1);
4000
ut_ad((ulint) (data - page_zip->data)
4001
< page_zip_get_size(page_zip));
4002
page_zip->m_end = data - page_zip->data;
4003
page_zip->m_nonempty = TRUE;
4004
} else if (page_is_leaf(page) && dict_index_is_clust(index)) {
4005
/* Do not clear the record, because there is not enough space
4006
to log the operation. */
4008
if (rec_offs_any_extern(offsets)) {
4011
for (i = rec_offs_n_fields(offsets); i--; ) {
4012
/* Clear all BLOB pointers in order to make
4013
page_zip_validate() pass. */
4014
if (rec_offs_nth_extern(offsets, i)) {
4016
byte* field = rec_get_nth_field(
4017
rec, offsets, i, &len);
4019
- BTR_EXTERN_FIELD_REF_SIZE,
4020
0, BTR_EXTERN_FIELD_REF_SIZE);
4026
#ifdef UNIV_ZIP_DEBUG
4027
ut_a(page_zip_validate(page_zip, page));
4028
#endif /* UNIV_ZIP_DEBUG */
4031
/**********************************************************************//**
4032
Write the "deleted" flag of a record on a compressed page. The flag must
4033
already have been written on the uncompressed page. */
4036
page_zip_rec_set_deleted(
4037
/*=====================*/
4038
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4039
const byte* rec, /*!< in: record on the uncompressed page */
4040
ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
4042
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
4044
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4046
*slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
4048
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
4050
#ifdef UNIV_ZIP_DEBUG
4051
ut_a(page_zip_validate(page_zip, page_align(rec)));
4052
#endif /* UNIV_ZIP_DEBUG */
4055
/**********************************************************************//**
4056
Write the "owned" flag of a record on a compressed page. The n_owned field
4057
must already have been written on the uncompressed page. */
4060
page_zip_rec_set_owned(
4061
/*===================*/
4062
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4063
const byte* rec, /*!< in: record on the uncompressed page */
4064
ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
4066
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
4068
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4070
*slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4072
*slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4076
/**********************************************************************//**
4077
Insert a record to the dense page directory. */
4080
page_zip_dir_insert(
4081
/*================*/
4082
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4083
const byte* prev_rec,/*!< in: record after which to insert */
4084
const byte* free_rec,/*!< in: record from which rec was
4085
allocated, or NULL */
4086
byte* rec) /*!< in: record to insert */
4092
ut_ad(prev_rec != rec);
4093
ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
4094
ut_ad(page_zip_simple_validate(page_zip));
4096
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4098
if (page_rec_is_infimum(prev_rec)) {
4099
/* Use the first slot. */
4100
slot_rec = page_zip->data + page_zip_get_size(page_zip);
4102
byte* end = page_zip->data + page_zip_get_size(page_zip);
4103
byte* start = end - page_zip_dir_user_size(page_zip);
4105
if (UNIV_LIKELY(!free_rec)) {
4106
/* PAGE_N_RECS was already incremented
4107
in page_cur_insert_rec_zip(), but the
4108
dense directory slot at that position
4109
contains garbage. Skip it. */
4110
start += PAGE_ZIP_DIR_SLOT_SIZE;
4113
slot_rec = page_zip_dir_find_low(start, end,
4114
page_offset(prev_rec));
4118
/* Read the old n_dense (n_heap may have been incremented). */
4119
n_dense = page_dir_get_n_heap(page_zip->data)
4120
- (PAGE_HEAP_NO_USER_LOW + 1);
4122
if (UNIV_LIKELY_NULL(free_rec)) {
4123
/* The record was allocated from the free list.
4124
Shift the dense directory only up to that slot.
4125
Note that in this case, n_dense is actually
4126
off by one, because page_cur_insert_rec_zip()
4127
did not increment n_heap. */
4128
ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
4129
+ PAGE_HEAP_NO_USER_LOW);
4130
ut_ad(rec >= free_rec);
4131
slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
4133
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4135
/* The record was allocated from the heap.
4136
Shift the entire dense directory. */
4137
ut_ad(rec_get_heap_no_new(rec) == n_dense
4138
+ PAGE_HEAP_NO_USER_LOW);
4140
/* Shift to the end of the dense page directory. */
4141
slot_free = page_zip->data + page_zip_get_size(page_zip)
4142
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4145
/* Shift the dense directory to allocate place for rec. */
4146
memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
4147
slot_rec - slot_free);
4149
/* Write the entry for the inserted record.
4150
The "owned" and "deleted" flags must be zero. */
4151
mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
4154
/**********************************************************************//**
4155
Shift the dense page directory and the array of BLOB pointers
4156
when a record is deleted. */
4159
page_zip_dir_delete(
4160
/*================*/
4161
page_zip_des_t* page_zip,/*!< in/out: compressed page */
4162
byte* rec, /*!< in: record to delete */
4163
dict_index_t* index, /*!< in: index of rec */
4164
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
4165
const byte* free) /*!< in: previous start of the free list */
4170
page_t* page = page_align(rec);
4172
ut_ad(rec_offs_validate(rec, index, offsets));
4173
ut_ad(rec_offs_comp(offsets));
4175
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4176
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4177
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4178
rec_offs_extra_size(offsets));
4180
slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
4184
/* This could not be done before page_zip_dir_find(). */
4185
page_header_set_field(page, page_zip, PAGE_N_RECS,
4186
(ulint)(page_get_n_recs(page) - 1));
4188
if (UNIV_UNLIKELY(!free)) {
4189
/* Make the last slot the start of the free list. */
4190
slot_free = page_zip->data + page_zip_get_size(page_zip)
4191
- PAGE_ZIP_DIR_SLOT_SIZE
4192
* (page_dir_get_n_heap(page_zip->data)
4193
- PAGE_HEAP_NO_USER_LOW);
4195
slot_free = page_zip_dir_find_free(page_zip,
4197
ut_a(slot_free < slot_rec);
4198
/* Grow the free list by one slot by moving the start. */
4199
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4202
if (UNIV_LIKELY(slot_rec > slot_free)) {
4203
memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
4205
slot_rec - slot_free);
4208
/* Write the entry for the deleted record.
4209
The "owned" and "deleted" flags will be cleared. */
4210
mach_write_to_2(slot_free, page_offset(rec));
4212
if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
4213
ut_ad(!rec_offs_any_extern(offsets));
4217
n_ext = rec_offs_n_extern(offsets);
4218
if (UNIV_UNLIKELY(n_ext)) {
4219
/* Shift and zero fill the array of BLOB pointers. */
4224
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
4225
ut_a(blob_no + n_ext <= page_zip->n_blobs);
4227
externs = page_zip->data + page_zip_get_size(page_zip)
4228
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
4229
* (PAGE_ZIP_DIR_SLOT_SIZE
4230
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4232
ext_end = externs - page_zip->n_blobs
4233
* BTR_EXTERN_FIELD_REF_SIZE;
4234
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
4236
page_zip->n_blobs -= n_ext;
4237
/* Shift and zero fill the array. */
4238
memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
4239
(page_zip->n_blobs - blob_no)
4240
* BTR_EXTERN_FIELD_REF_SIZE);
4241
memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
4245
/* The compression algorithm expects info_bits and n_owned
4246
to be 0 for deleted records. */
4247
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
4249
page_zip_clear_rec(page_zip, rec, index, offsets);
4252
/**********************************************************************//**
4253
Add a slot to the dense page directory. */
4256
page_zip_dir_add_slot(
4257
/*==================*/
4258
page_zip_des_t* page_zip, /*!< in/out: compressed page */
4259
ulint is_clustered) /*!< in: nonzero for clustered index,
4266
ut_ad(page_is_comp(page_zip->data));
4267
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4269
/* Read the old n_dense (n_heap has already been incremented). */
4270
n_dense = page_dir_get_n_heap(page_zip->data)
4271
- (PAGE_HEAP_NO_USER_LOW + 1);
4273
dir = page_zip->data + page_zip_get_size(page_zip)
4274
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4276
if (!page_is_leaf(page_zip->data)) {
4277
ut_ad(!page_zip->n_blobs);
4278
stored = dir - n_dense * REC_NODE_PTR_SIZE;
4279
} else if (UNIV_UNLIKELY(is_clustered)) {
4280
/* Move the BLOB pointer array backwards to make space for the
4281
roll_ptr and trx_id columns and the dense directory slot. */
4284
stored = dir - n_dense
4285
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4287
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4289
- (PAGE_ZIP_DIR_SLOT_SIZE
4290
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4291
PAGE_ZIP_DIR_SLOT_SIZE
4292
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4293
memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
4294
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4295
externs, stored - externs);
4298
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4299
ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
4300
PAGE_ZIP_DIR_SLOT_SIZE);
4303
/* Move the uncompressed area backwards to make space
4304
for one directory slot. */
4305
memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
4308
/***********************************************************//**
4309
Parses a log record of writing to the header of a page.
4310
@return end of log record or NULL */
4313
page_zip_parse_write_header(
4314
/*========================*/
4315
byte* ptr, /*!< in: redo log buffer */
4316
byte* end_ptr,/*!< in: redo log buffer end */
4317
page_t* page, /*!< in/out: uncompressed page */
4318
page_zip_des_t* page_zip)/*!< in/out: compressed page */
4323
ut_ad(ptr && end_ptr);
4324
ut_ad(!page == !page_zip);
4326
if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
4331
offset = (ulint) *ptr++;
4332
len = (ulint) *ptr++;
4334
if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
4336
recv_sys->found_corrupt_log = TRUE;
4341
if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
4347
if (UNIV_UNLIKELY(!page_zip)) {
4351
#ifdef UNIV_ZIP_DEBUG
4352
ut_a(page_zip_validate(page_zip, page));
4353
#endif /* UNIV_ZIP_DEBUG */
4355
memcpy(page + offset, ptr, len);
4356
memcpy(page_zip->data + offset, ptr, len);
4358
#ifdef UNIV_ZIP_DEBUG
4359
ut_a(page_zip_validate(page_zip, page));
4360
#endif /* UNIV_ZIP_DEBUG */
4366
#ifndef UNIV_HOTBACKUP
4367
/**********************************************************************//**
4368
Write a log record of writing to the uncompressed header portion of a page. */
4371
page_zip_write_header_log(
4372
/*======================*/
4373
const byte* data, /*!< in: data on the uncompressed page */
4374
ulint length, /*!< in: length of the data */
4375
mtr_t* mtr) /*!< in: mini-transaction */
4377
byte* log_ptr = mlog_open(mtr, 11 + 1 + 1);
4378
ulint offset = page_offset(data);
4380
ut_ad(offset < PAGE_DATA);
4381
ut_ad(offset + length < PAGE_DATA);
4383
# error "PAGE_DATA > 255"
4385
ut_ad(length < 256);
4387
/* If no logging is requested, we may return now */
4388
if (UNIV_UNLIKELY(!log_ptr)) {
4393
log_ptr = mlog_write_initial_log_record_fast(
4394
(byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
4395
*log_ptr++ = (byte) offset;
4396
*log_ptr++ = (byte) length;
4397
mlog_close(mtr, log_ptr);
4399
mlog_catenate_string(mtr, data, length);
4401
#endif /* !UNIV_HOTBACKUP */
4403
/**********************************************************************//**
4404
Reorganize and compress a page. This is a low-level operation for
4405
compressed pages, to be used when page_zip_compress() fails.
4406
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
4407
The function btr_page_reorganize() should be preferred whenever possible.
4408
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
4409
non-clustered index, the caller must update the insert buffer free
4410
bits in the same mini-transaction in such a way that the modification
4411
will be redo-logged.
4412
@return TRUE on success, FALSE on failure; page_zip will be left
4413
intact on failure, but page will be overwritten. */
4416
page_zip_reorganize(
4417
/*================*/
4418
buf_block_t* block, /*!< in/out: page with compressed page;
4419
on the compressed page, in: size;
4421
m_start, m_end, m_nonempty */
4422
dict_index_t* index, /*!< in: index of the B-tree node */
4423
mtr_t* mtr) /*!< in: mini-transaction */
4425
buf_pool_t* buf_pool = buf_pool_from_block(block);
4426
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
4427
page_t* page = buf_block_get_frame(block);
4428
buf_block_t* temp_block;
4432
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4433
ut_ad(page_is_comp(page));
4434
ut_ad(!dict_index_is_ibuf(index));
4435
/* Note that page_zip_validate(page_zip, page) may fail here. */
4436
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
4437
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4439
/* Disable logging */
4440
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
4442
#ifndef UNIV_HOTBACKUP
4443
temp_block = buf_block_alloc(buf_pool, 0);
4444
btr_search_drop_page_hash_index(block);
4445
block->check_index_page_at_flush = TRUE;
4446
#else /* !UNIV_HOTBACKUP */
4447
ut_ad(block == back_block1);
4448
temp_block = back_block2;
4449
#endif /* !UNIV_HOTBACKUP */
4450
temp_page = temp_block->frame;
4452
/* Copy the old page to temporary space */
4453
buf_frame_copy(temp_page, page);
4455
/* Recreate the page: note that global data on page (possible
4456
segment headers, next page-field, etc.) is preserved intact */
4458
page_create(block, mtr, TRUE);
4460
/* Copy the records from the temporary space to the recreated page;
4461
do not copy the lock bits yet */
4463
page_copy_rec_list_end_no_locks(block, temp_block,
4464
page_get_infimum_rec(temp_page),
4467
if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
4468
/* Copy max trx id to recreated page */
4469
trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
4470
page_set_max_trx_id(block, NULL, max_trx_id, NULL);
4471
ut_ad(max_trx_id != 0);
4474
/* Restore logging. */
4475
mtr_set_log_mode(mtr, log_mode);
4477
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
4479
#ifndef UNIV_HOTBACKUP
4480
buf_block_free(temp_block);
4481
#endif /* !UNIV_HOTBACKUP */
4485
lock_move_reorganize_page(block, temp_block);
4487
#ifndef UNIV_HOTBACKUP
4488
buf_block_free(temp_block);
4489
#endif /* !UNIV_HOTBACKUP */
4493
#ifndef UNIV_HOTBACKUP
4494
/**********************************************************************//**
4495
Copy the records of a page byte for byte. Do not copy the page header
4496
or trailer, except those B-tree header fields that are directly
4497
related to the storage of records. Also copy PAGE_MAX_TRX_ID.
4498
NOTE: The caller must update the lock table and the adaptive hash index. */
4503
page_zip_des_t* page_zip, /*!< out: copy of src_zip
4504
(n_blobs, m_start, m_end,
4505
m_nonempty, data[0..size-1]) */
4506
page_t* page, /*!< out: copy of src */
4507
const page_zip_des_t* src_zip, /*!< in: compressed page */
4508
const page_t* src, /*!< in: page */
4509
dict_index_t* index, /*!< in: index of the B-tree */
4510
mtr_t* mtr) /*!< in: mini-transaction */
4512
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
4513
ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
4514
ut_ad(!dict_index_is_ibuf(index));
4515
#ifdef UNIV_ZIP_DEBUG
4516
/* The B-tree operations that call this function may set
4517
FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
4518
mismatch. A strict page_zip_validate() will be executed later
4519
during the B-tree operations. */
4520
ut_a(page_zip_validate_low(src_zip, src, TRUE));
4521
#endif /* UNIV_ZIP_DEBUG */
4522
ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
4523
if (UNIV_UNLIKELY(src_zip->n_blobs)) {
4524
ut_a(page_is_leaf(src));
4525
ut_a(dict_index_is_clust(index));
4528
/* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
4529
indexes. It does not matter on other pages. */
4530
ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
4531
|| page_get_max_trx_id(src));
4533
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
4534
UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
4535
UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
4536
UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
4538
/* Copy those B-tree page header fields that are related to
4539
the records stored in the page. Also copy the field
4540
PAGE_MAX_TRX_ID. Skip the rest of the page header and
4541
trailer. On the compressed page, there is no trailer. */
4542
#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
4543
# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
4545
memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
4546
PAGE_HEADER_PRIV_END);
4547
memcpy(PAGE_DATA + page, PAGE_DATA + src,
4548
UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
4549
memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
4550
PAGE_HEADER_PRIV_END);
4551
memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
4552
page_zip_get_size(page_zip) - PAGE_DATA);
4554
/* Copy all fields of src_zip to page_zip, except the pointer
4555
to the compressed data page. */
4557
page_zip_t* data = page_zip->data;
4558
memcpy(page_zip, src_zip, sizeof *page_zip);
4559
page_zip->data = data;
4561
ut_ad(page_zip_get_trailer_len(page_zip,
4562
dict_index_is_clust(index), NULL)
4563
+ page_zip->m_end < page_zip_get_size(page_zip));
4565
if (!page_is_leaf(src)
4566
&& UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
4567
&& UNIV_LIKELY(mach_read_from_4(page
4568
+ FIL_PAGE_PREV) != FIL_NULL)) {
4569
/* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
4570
ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
4572
if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
4573
rec_t* rec = page + offs;
4574
ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
4575
& REC_INFO_MIN_REC_FLAG);
4576
rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
4580
#ifdef UNIV_ZIP_DEBUG
4581
ut_a(page_zip_validate(page_zip, page));
4582
#endif /* UNIV_ZIP_DEBUG */
4584
page_zip_compress_write_log(page_zip, page, index, mtr);
4586
#endif /* !UNIV_HOTBACKUP */
4588
/**********************************************************************//**
4589
Parses a log record of compressing an index page.
4590
@return end of log record or NULL */
4593
page_zip_parse_compress(
4594
/*====================*/
4595
byte* ptr, /*!< in: buffer */
4596
byte* end_ptr,/*!< in: buffer end */
4597
page_t* page, /*!< out: uncompressed page */
4598
page_zip_des_t* page_zip)/*!< out: compressed page */
4603
ut_ad(ptr && end_ptr);
4604
ut_ad(!page == !page_zip);
4606
if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
4611
size = mach_read_from_2(ptr);
4613
trailer_size = mach_read_from_2(ptr);
4616
if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
4622
if (UNIV_UNLIKELY(!page_zip)
4623
|| UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
4625
recv_sys->found_corrupt_log = TRUE;
4630
memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
4631
memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
4632
memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
4633
memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
4634
page_zip_get_size(page_zip) - trailer_size
4635
- (FIL_PAGE_TYPE + size));
4636
memcpy(page_zip->data + page_zip_get_size(page_zip)
4637
- trailer_size, ptr + 8 + size, trailer_size);
4639
if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page,
4646
return(ptr + 8 + size + trailer_size);
4649
/**********************************************************************//**
4650
Calculate the compressed page checksum.
4651
@return page checksum */
4654
page_zip_calc_checksum(
4655
/*===================*/
4656
const void* data, /*!< in: compressed page */
4657
ulint size) /*!< in: size of compressed page */
4659
/* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
4660
and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
4662
const Bytef* s = data;
4665
ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4667
adler = adler32(0L, s + FIL_PAGE_OFFSET,
4668
FIL_PAGE_LSN - FIL_PAGE_OFFSET);
4669
adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
4670
adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
4671
size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4673
return((ulint) adler);