1
/******************************************************
2
Compressed page interface
6
Created June 2005 by Marko Makela
7
*******************************************************/
12
# include "page0zip.ic"
15
#include "page0page.h"
18
#include "dict0boot.h"
19
#include "dict0dict.h"
22
#include "page0types.h"
23
#include "lock0lock.h"
28
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
29
UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
31
/* Please refer to ../include/page0zip.ic for a description of the
32
compressed page format. */
34
/* The infimum and supremum records are omitted from the compressed page.
35
On compress, we compare that the records are there, and on uncompress we
36
restore the records. */
37
static const byte infimum_extra[] = {
38
0x01, /* info_bits=0, n_owned=1 */
39
0x00, 0x02 /* heap_no=0, status=2 */
40
/* ?, ? */ /* next=(first user rec, or supremum) */
42
static const byte infimum_data[] = {
43
0x69, 0x6e, 0x66, 0x69,
44
0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
46
static const byte supremum_extra_data[] = {
47
/* 0x0?, */ /* info_bits=0, n_owned=1..8 */
48
0x00, 0x0b, /* heap_no=1, status=3 */
49
0x00, 0x00, /* next=0 */
50
0x73, 0x75, 0x70, 0x72,
51
0x65, 0x6d, 0x75, 0x6d /* "supremum" */
54
/** Assert that a block of memory is filled with zero bytes.
55
Compare at most sizeof(field_ref_zero) bytes. */
56
#define ASSERT_ZERO(b, s) \
57
ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
58
/** Assert that a BLOB pointer is filled with zero bytes. */
59
#define ASSERT_ZERO_BLOB(b) \
60
ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
62
/* Enable some extra debugging output. This code can be enabled
63
independently of any UNIV_ debugging conditions. */
64
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
66
__attribute__((format (printf, 1, 2)))
67
/**************************************************************************
68
Report a failure to decompress or compress. */
73
/* out: number of characters printed */
74
const char* fmt, /* in: printf(3) format string */
75
...) /* in: arguments corresponding to fmt */
80
ut_print_timestamp(stderr);
81
fputs(" InnoDB: ", stderr);
83
res = vfprintf(stderr, fmt, ap);
88
# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
89
#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
90
# define page_zip_fail(fmt_args) /* empty */
91
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
93
/**************************************************************************
94
Determine the guaranteed free space on an empty page. */
99
/* out: minimum payload size on the page */
100
ulint n_fields, /* in: number of columns in the index */
101
ulint zip_size) /* in: compressed page size in bytes */
104
/* subtract the page header and the longest
105
uncompressed data needed for one record */
107
+ PAGE_ZIP_DIR_SLOT_SIZE
108
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
109
+ 1/* encoded heap_no==2 in page_zip_write_rec() */
110
+ 1/* end of modification log */
111
- REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
112
/* subtract the space for page_zip_fields_encode() */
113
- compressBound(2 * (n_fields + 1));
114
return(size > 0 ? (ulint) size : 0);
117
/*****************************************************************
118
Gets the size of the compressed page trailer (the dense page directory),
119
including deleted records (the free list). */
124
/* out: length of dense page
125
directory, in bytes */
126
const page_zip_des_t* page_zip) /* in: compressed page */
128
/* Exclude the page infimum and supremum from the record count. */
129
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
130
* (page_dir_get_n_heap(page_zip->data)
131
- PAGE_HEAP_NO_USER_LOW);
135
/*****************************************************************
136
Gets the size of the compressed page trailer (the dense page directory),
137
only including user records (excluding the free list). */
140
page_zip_dir_user_size(
141
/*===================*/
142
/* out: length of dense page
143
directory comprising existing
145
const page_zip_des_t* page_zip) /* in: compressed page */
147
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
148
* page_get_n_recs(page_zip->data);
149
ut_ad(size <= page_zip_dir_size(page_zip));
153
/*****************************************************************
154
Find the slot of the given record in the dense page directory. */
157
page_zip_dir_find_low(
158
/*==================*/
159
/* out: dense directory slot,
160
or NULL if record not found */
161
byte* slot, /* in: start of records */
162
byte* end, /* in: end of records */
163
ulint offset) /* in: offset of user record */
167
for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
168
if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
177
/*****************************************************************
178
Find the slot of the given non-free record in the dense page directory. */
183
/* out: dense directory slot,
184
or NULL if record not found */
185
page_zip_des_t* page_zip, /* in: compressed page */
186
ulint offset) /* in: offset of user record */
188
byte* end = page_zip->data + page_zip_get_size(page_zip);
190
ut_ad(page_zip_simple_validate(page_zip));
192
return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
197
/*****************************************************************
198
Find the slot of the given free record in the dense page directory. */
201
page_zip_dir_find_free(
202
/*===================*/
203
/* out: dense directory slot,
204
or NULL if record not found */
205
page_zip_des_t* page_zip, /* in: compressed page */
206
ulint offset) /* in: offset of user record */
208
byte* end = page_zip->data + page_zip_get_size(page_zip);
210
ut_ad(page_zip_simple_validate(page_zip));
212
return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
213
end - page_zip_dir_user_size(page_zip),
217
/*****************************************************************
218
Read a given slot in the dense page directory. */
223
/* out: record offset
224
on the uncompressed page,
226
PAGE_ZIP_DIR_SLOT_DEL or
227
PAGE_ZIP_DIR_SLOT_OWNED */
228
const page_zip_des_t* page_zip, /* in: compressed page */
229
ulint slot) /* in: slot
230
(0=first user record) */
232
ut_ad(page_zip_simple_validate(page_zip));
233
ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
234
return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
235
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
238
/**************************************************************************
239
Write a log record of compressing an index page. */
242
page_zip_compress_write_log(
243
/*========================*/
244
const page_zip_des_t* page_zip,/* in: compressed page */
245
const page_t* page, /* in: uncompressed page */
246
dict_index_t* index, /* in: index of the B-tree node */
247
mtr_t* mtr) /* in: mini-transaction */
252
log_ptr = mlog_open(mtr, 11 + 2 + 2);
259
/* Read the number of user records. */
260
trailer_size = page_dir_get_n_heap(page_zip->data)
261
- PAGE_HEAP_NO_USER_LOW;
262
/* Multiply by uncompressed of size stored per record */
263
if (!page_is_leaf(page)) {
264
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
265
} else if (dict_index_is_clust(index)) {
266
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
267
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
269
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
271
/* Add the space occupied by BLOB pointers. */
272
trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
273
ut_a(page_zip->m_end > PAGE_DATA);
274
#if FIL_PAGE_DATA > PAGE_DATA
275
# error "FIL_PAGE_DATA > PAGE_DATA"
277
ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
279
log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
280
MLOG_ZIP_PAGE_COMPRESS,
282
mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
284
mach_write_to_2(log_ptr, trailer_size);
286
mlog_close(mtr, log_ptr);
288
/* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
289
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
290
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
291
/* Write most of the page header, the compressed stream and
292
the modification log. */
293
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
294
page_zip->m_end - FIL_PAGE_TYPE);
295
/* Write the uncompressed trailer of the compressed page. */
296
mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
297
- trailer_size, trailer_size);
300
/**********************************************************
301
Determine how many externally stored columns are contained
302
in existing records with smaller heap_no than rec. */
305
page_zip_get_n_prev_extern(
306
/*=======================*/
307
const page_zip_des_t* page_zip,/* in: dense page directory on
309
const rec_t* rec, /* in: compact physical record
310
on a B-tree leaf page */
311
dict_index_t* index) /* in: record descriptor */
313
const page_t* page = page_align(rec);
318
ulint n_recs = page_get_n_recs(page_zip->data);
320
ut_ad(page_is_leaf(page));
321
ut_ad(page_is_comp(page));
322
ut_ad(dict_table_is_comp(index->table));
323
ut_ad(dict_index_is_clust(index));
325
heap_no = rec_get_heap_no_new(rec);
326
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
327
left = heap_no - PAGE_HEAP_NO_USER_LOW;
328
if (UNIV_UNLIKELY(!left)) {
332
for (i = 0; i < n_recs; i++) {
333
const rec_t* r = page + (page_zip_dir_get(page_zip, i)
334
& PAGE_ZIP_DIR_SLOT_MASK);
336
if (rec_get_heap_no_new(r) < heap_no) {
337
n_ext += rec_get_n_extern_new(r, index,
348
/**************************************************************************
349
Encode the length of a fixed-length column. */
352
page_zip_fixed_field_encode(
353
/*========================*/
354
/* out: buf + length of encoded val */
355
byte* buf, /* in: pointer to buffer where to write */
356
ulint val) /* in: value to write */
360
if (UNIV_LIKELY(val < 126)) {
362
0 = nullable variable field of at most 255 bytes length;
363
1 = not null variable field of at most 255 bytes length;
364
126 = nullable variable field with maximum length >255;
365
127 = not null variable field with maximum length >255
369
*buf++ = (byte) (0x80 | val >> 8);
376
/**************************************************************************
377
Write the index information for the compressed page. */
380
page_zip_fields_encode(
381
/*===================*/
382
/* out: used size of buf */
383
ulint n, /* in: number of fields to compress */
384
dict_index_t* index, /* in: index comprising at least n fields */
385
ulint trx_id_pos,/* in: position of the trx_id column
386
in the index, or ULINT_UNDEFINED if
387
this is a non-leaf page */
388
byte* buf) /* out: buffer of (n + 1) * 2 bytes */
390
const byte* buf_start = buf;
393
ulint trx_id_col = 0;
394
/* sum of lengths of preceding non-nullable fixed fields, or 0 */
397
ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
399
for (i = col = 0; i < n; i++) {
400
dict_field_t* field = dict_index_get_nth_field(index, i);
403
if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
404
val = 1; /* set the "not nullable" flag */
406
val = 0; /* nullable field */
409
if (!field->fixed_len) {
410
/* variable-length field */
411
const dict_col_t* column
412
= dict_field_get_col(field);
414
if (UNIV_UNLIKELY(column->len > 255)
415
|| UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
416
val |= 0x7e; /* max > 255 bytes */
420
/* write out the length of any
421
preceding non-nullable fields */
422
buf = page_zip_fixed_field_encode(
423
buf, fixed_sum << 1 | 1);
431
/* fixed-length non-nullable field */
433
if (fixed_sum && UNIV_UNLIKELY
434
(fixed_sum + field->fixed_len
435
> DICT_MAX_INDEX_COL_LEN)) {
436
/* Write out the length of the
437
preceding non-nullable fields,
438
to avoid exceeding the maximum
439
length of a fixed-length column. */
440
buf = page_zip_fixed_field_encode(
441
buf, fixed_sum << 1 | 1);
446
if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
448
/* Write out the length of any
449
preceding non-nullable fields,
450
and start a new trx_id column. */
451
buf = page_zip_fixed_field_encode(
452
buf, fixed_sum << 1 | 1);
457
fixed_sum = field->fixed_len;
460
fixed_sum += field->fixed_len;
463
/* fixed-length nullable field */
466
/* write out the length of any
467
preceding non-nullable fields */
468
buf = page_zip_fixed_field_encode(
469
buf, fixed_sum << 1 | 1);
474
buf = page_zip_fixed_field_encode(
475
buf, field->fixed_len << 1);
481
/* Write out the lengths of last fixed-length columns. */
482
buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
485
if (trx_id_pos != ULINT_UNDEFINED) {
486
/* Write out the position of the trx_id column */
489
/* Write out the number of nullable fields */
490
i = index->n_nullable;
496
*buf++ = (byte) (0x80 | i >> 8);
500
ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
501
return((ulint) (buf - buf_start));
504
/**************************************************************************
505
Populate the dense page directory from the sparse directory. */
510
const page_t* page, /* in: compact page */
511
byte* buf, /* in: pointer to dense page directory[-1];
512
out: dense directory on compressed page */
513
const rec_t** recs) /* in: pointer to an array of 0, or NULL;
514
out: dense page directory sorted by ascending
515
address (and heap_no) */
527
if (page_is_leaf(page)) {
528
status = REC_STATUS_ORDINARY;
530
status = REC_STATUS_NODE_PTR;
532
(mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
533
min_mark = REC_INFO_MIN_REC_FLAG;
537
n_heap = page_dir_get_n_heap(page);
539
/* Traverse the list of stored records in the collation order,
540
starting from the first user record. */
542
rec = page + PAGE_NEW_INFIMUM, TRUE;
548
offs = rec_get_next_offs(rec, TRUE);
549
if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
553
heap_no = rec_get_heap_no_new(rec);
554
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
555
ut_a(heap_no < n_heap);
556
ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
557
ut_a(offs >= PAGE_ZIP_START);
558
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
559
# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
561
#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
562
# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
564
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
565
offs |= PAGE_ZIP_DIR_SLOT_OWNED;
568
info_bits = rec_get_info_bits(rec, TRUE);
569
if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
570
info_bits &= ~REC_INFO_DELETED_FLAG;
571
offs |= PAGE_ZIP_DIR_SLOT_DEL;
573
ut_a(info_bits == min_mark);
574
/* Only the smallest user record can have
575
REC_INFO_MIN_REC_FLAG set. */
578
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
580
if (UNIV_LIKELY_NULL(recs)) {
581
/* Ensure that each heap_no occurs at most once. */
582
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
583
/* exclude infimum and supremum */
584
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
587
ut_a(rec_get_status(rec) == status);
590
offs = page_header_get_field(page, PAGE_FREE);
592
/* Traverse the free list (of deleted records). */
594
ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
597
heap_no = rec_get_heap_no_new(rec);
598
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
599
ut_a(heap_no < n_heap);
601
ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
602
ut_a(rec_get_status(rec) == status);
604
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
606
if (UNIV_LIKELY_NULL(recs)) {
607
/* Ensure that each heap_no occurs at most once. */
608
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
609
/* exclude infimum and supremum */
610
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
613
offs = rec_get_next_offs(rec, TRUE);
616
/* Ensure that each heap no occurs at least once. */
617
ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
620
/**************************************************************************
621
Allocate memory for zlib. */
630
return(mem_heap_alloc(opaque, items * size));
633
/**************************************************************************
634
Deallocate memory for zlib. */
639
void* opaque __attribute__((unused)),
640
void* address __attribute__((unused)))
644
/**************************************************************************
645
Configure the zlib allocator to use the given memory heap. */
650
void* stream, /* in/out: zlib stream */
651
mem_heap_t* heap) /* in: memory heap to use */
653
z_stream* strm = stream;
655
strm->zalloc = page_zip_malloc;
656
strm->zfree = page_zip_free;
660
#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
661
# define PAGE_ZIP_COMPRESS_DBG
664
#ifdef PAGE_ZIP_COMPRESS_DBG
665
/* Set this variable in a debugger to enable
666
excessive logging in page_zip_compress(). */
667
UNIV_INTERN ibool page_zip_compress_dbg;
668
/* Set this variable in a debugger to enable
669
binary logging of the data passed to deflate().
670
When this variable is nonzero, it will act
671
as a log file name generator. */
672
UNIV_INTERN unsigned page_zip_compress_log;
674
/**************************************************************************
675
Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set. */
678
page_zip_compress_deflate(
679
/*======================*/
680
FILE* logfile,/* in: log file, or NULL */
681
z_streamp strm, /* in/out: compressed stream for deflate() */
682
int flush) /* in: deflate() flushing method */
685
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
686
ut_print_buf(stderr, strm->next_in, strm->avail_in);
688
if (UNIV_LIKELY_NULL(logfile)) {
689
fwrite(strm->next_in, 1, strm->avail_in, logfile);
691
status = deflate(strm, flush);
692
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
693
fprintf(stderr, " -> %d\n", status);
698
/* Redefine deflate(). */
700
# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
701
# define FILE_LOGFILE FILE* logfile,
702
# define LOGFILE logfile,
703
#else /* PAGE_ZIP_COMPRESS_DBG */
704
# define FILE_LOGFILE
706
#endif /* PAGE_ZIP_COMPRESS_DBG */
708
/**************************************************************************
709
Compress the records of a node pointer page. */
712
page_zip_compress_node_ptrs(
713
/*========================*/
714
/* out: Z_OK, or a zlib error code */
716
z_stream* c_stream, /* in/out: compressed page stream */
717
const rec_t** recs, /* in: dense page directory
719
ulint n_dense, /* in: size of recs[] */
720
dict_index_t* index, /* in: the index of the page */
721
byte* storage, /* in: end of dense page directory */
722
mem_heap_t* heap) /* in: temporary memory heap */
725
ulint* offsets = NULL;
728
const rec_t* rec = *recs++;
730
offsets = rec_get_offsets(rec, index, offsets,
731
ULINT_UNDEFINED, &heap);
732
/* Only leaf nodes may contain externally stored columns. */
733
ut_ad(!rec_offs_any_extern(offsets));
735
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
736
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
737
rec_offs_extra_size(offsets));
739
/* Compress the extra bytes. */
740
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
743
if (c_stream->avail_in) {
744
err = deflate(c_stream, Z_NO_FLUSH);
745
if (UNIV_UNLIKELY(err != Z_OK)) {
749
ut_ad(!c_stream->avail_in);
751
/* Compress the data bytes, except node_ptr. */
752
c_stream->next_in = (byte*) rec;
753
c_stream->avail_in = rec_offs_data_size(offsets)
755
ut_ad(c_stream->avail_in);
757
err = deflate(c_stream, Z_NO_FLUSH);
758
if (UNIV_UNLIKELY(err != Z_OK)) {
762
ut_ad(!c_stream->avail_in);
764
memcpy(storage - REC_NODE_PTR_SIZE
765
* (rec_get_heap_no_new(rec) - 1),
766
c_stream->next_in, REC_NODE_PTR_SIZE);
767
c_stream->next_in += REC_NODE_PTR_SIZE;
773
/**************************************************************************
774
Compress the records of a leaf node of a secondary index. */
777
page_zip_compress_sec(
778
/*==================*/
779
/* out: Z_OK, or a zlib error code */
781
z_stream* c_stream, /* in/out: compressed page stream */
782
const rec_t** recs, /* in: dense page directory
784
ulint n_dense) /* in: size of recs[] */
791
const rec_t* rec = *recs++;
793
/* Compress everything up to this record. */
794
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
797
if (UNIV_LIKELY(c_stream->avail_in)) {
798
UNIV_MEM_ASSERT_RW(c_stream->next_in,
800
err = deflate(c_stream, Z_NO_FLUSH);
801
if (UNIV_UNLIKELY(err != Z_OK)) {
806
ut_ad(!c_stream->avail_in);
807
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
809
/* Skip the REC_N_NEW_EXTRA_BYTES. */
811
c_stream->next_in = (byte*) rec;
817
/**************************************************************************
818
Compress a record of a leaf node of a clustered index that contains
819
externally stored columns. */
822
page_zip_compress_clust_ext(
823
/*========================*/
824
/* out: Z_OK, or a zlib error code */
826
z_stream* c_stream, /* in/out: compressed page stream */
827
const rec_t* rec, /* in: record */
828
const ulint* offsets, /* in: rec_get_offsets(rec) */
829
ulint trx_id_col, /* in: position of of DB_TRX_ID */
830
byte* deleted, /* in: dense directory entry pointing
831
to the head of the free list */
832
byte* storage, /* in: end of dense page directory */
833
byte** externs, /* in/out: pointer to the next
834
available BLOB pointer */
835
ulint* n_blobs) /* in/out: number of
836
externally stored columns */
841
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
842
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
843
rec_offs_extra_size(offsets));
845
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
849
if (UNIV_UNLIKELY(i == trx_id_col)) {
850
ut_ad(!rec_offs_nth_extern(offsets, i));
851
/* Store trx_id and roll_ptr
852
in uncompressed form. */
853
src = rec_get_nth_field(rec, offsets, i, &len);
854
ut_ad(src + DATA_TRX_ID_LEN
855
== rec_get_nth_field(rec, offsets,
857
ut_ad(len == DATA_ROLL_PTR_LEN);
859
/* Compress any preceding bytes. */
861
= src - c_stream->next_in;
863
if (c_stream->avail_in) {
864
err = deflate(c_stream, Z_NO_FLUSH);
865
if (UNIV_UNLIKELY(err != Z_OK)) {
871
ut_ad(!c_stream->avail_in);
872
ut_ad(c_stream->next_in == src);
875
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
876
* (rec_get_heap_no_new(rec) - 1),
878
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
881
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
883
/* Skip also roll_ptr */
885
} else if (rec_offs_nth_extern(offsets, i)) {
886
src = rec_get_nth_field(rec, offsets, i, &len);
887
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
888
src += len - BTR_EXTERN_FIELD_REF_SIZE;
890
c_stream->avail_in = src
892
if (UNIV_LIKELY(c_stream->avail_in)) {
893
err = deflate(c_stream, Z_NO_FLUSH);
894
if (UNIV_UNLIKELY(err != Z_OK)) {
900
ut_ad(!c_stream->avail_in);
901
ut_ad(c_stream->next_in == src);
903
/* Reserve space for the data at
904
the end of the space reserved for
905
the compressed data and the page
910
<= BTR_EXTERN_FIELD_REF_SIZE)) {
915
ut_ad(*externs == c_stream->next_out
916
+ c_stream->avail_out
917
+ 1/* end of modif. log */);
920
+= BTR_EXTERN_FIELD_REF_SIZE;
922
/* Skip deleted records. */
924
(page_zip_dir_find_low(
926
page_offset(rec)))) {
932
-= BTR_EXTERN_FIELD_REF_SIZE;
933
*externs -= BTR_EXTERN_FIELD_REF_SIZE;
935
/* Copy the BLOB pointer */
936
memcpy(*externs, c_stream->next_in
937
- BTR_EXTERN_FIELD_REF_SIZE,
938
BTR_EXTERN_FIELD_REF_SIZE);
945
/**************************************************************************
946
Compress the records of a leaf node of a clustered index. */
949
page_zip_compress_clust(
950
/*====================*/
951
/* out: Z_OK, or a zlib error code */
953
z_stream* c_stream, /* in/out: compressed page stream */
954
const rec_t** recs, /* in: dense page directory
956
ulint n_dense, /* in: size of recs[] */
957
dict_index_t* index, /* in: the index of the page */
958
ulint* n_blobs, /* in: 0; out: number of
959
externally stored columns */
960
ulint trx_id_col, /* index of the trx_id column */
961
byte* deleted, /* in: dense directory entry pointing
962
to the head of the free list */
963
byte* storage, /* in: end of dense page directory */
964
mem_heap_t* heap) /* in: temporary memory heap */
967
ulint* offsets = NULL;
968
/* BTR_EXTERN_FIELD_REF storage */
969
byte* externs = storage - n_dense
970
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
972
ut_ad(*n_blobs == 0);
975
const rec_t* rec = *recs++;
977
offsets = rec_get_offsets(rec, index, offsets,
978
ULINT_UNDEFINED, &heap);
979
ut_ad(rec_offs_n_fields(offsets)
980
== dict_index_get_n_fields(index));
981
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
982
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
983
rec_offs_extra_size(offsets));
985
/* Compress the extra bytes. */
986
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
989
if (c_stream->avail_in) {
990
err = deflate(c_stream, Z_NO_FLUSH);
991
if (UNIV_UNLIKELY(err != Z_OK)) {
996
ut_ad(!c_stream->avail_in);
997
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
999
/* Compress the data bytes. */
1001
c_stream->next_in = (byte*) rec;
1003
/* Check if there are any externally stored columns.
1004
For each externally stored column, store the
1005
BTR_EXTERN_FIELD_REF separately. */
1006
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1007
ut_ad(dict_index_is_clust(index));
1009
err = page_zip_compress_clust_ext(
1011
c_stream, rec, offsets, trx_id_col,
1012
deleted, storage, &externs, n_blobs);
1014
if (UNIV_UNLIKELY(err != Z_OK)) {
1022
/* Store trx_id and roll_ptr in uncompressed form. */
1023
src = rec_get_nth_field(rec, offsets,
1025
ut_ad(src + DATA_TRX_ID_LEN
1026
== rec_get_nth_field(rec, offsets,
1027
trx_id_col + 1, &len));
1028
ut_ad(len == DATA_ROLL_PTR_LEN);
1029
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1030
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1031
rec_offs_extra_size(offsets));
1033
/* Compress any preceding bytes. */
1034
c_stream->avail_in = src - c_stream->next_in;
1036
if (c_stream->avail_in) {
1037
err = deflate(c_stream, Z_NO_FLUSH);
1038
if (UNIV_UNLIKELY(err != Z_OK)) {
1044
ut_ad(!c_stream->avail_in);
1045
ut_ad(c_stream->next_in == src);
1048
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1049
* (rec_get_heap_no_new(rec) - 1),
1051
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1054
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1056
/* Skip also roll_ptr */
1057
ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
1060
/* Compress the last bytes of the record. */
1061
c_stream->avail_in = rec + rec_offs_data_size(offsets)
1062
- c_stream->next_in;
1064
if (c_stream->avail_in) {
1065
err = deflate(c_stream, Z_NO_FLUSH);
1066
if (UNIV_UNLIKELY(err != Z_OK)) {
1071
ut_ad(!c_stream->avail_in);
1072
} while (--n_dense);
1078
/**************************************************************************
1084
/* out: TRUE on success, FALSE on failure;
1085
page_zip will be left intact on failure. */
1086
page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
1087
m_start, m_end, m_nonempty */
1088
const page_t* page, /* in: uncompressed page */
1089
dict_index_t* index, /* in: index of the B-tree node */
1090
mtr_t* mtr) /* in: mini-transaction, or NULL */
1094
ulint n_fields;/* number of index fields needed */
1095
byte* fields; /* index field information */
1096
byte* buf; /* compressed payload of the page */
1097
byte* buf_end;/* end of buf */
1099
ulint slot_size;/* amount of uncompressed bytes per record */
1100
const rec_t** recs; /* dense page directory, sorted by address */
1103
ulint* offsets = NULL;
1105
byte* storage;/* storage of uncompressed columns */
1106
ullint usec = ut_time_us(NULL);
1107
#ifdef PAGE_ZIP_COMPRESS_DBG
1108
FILE* logfile = NULL;
1111
ut_a(page_is_comp(page));
1112
ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
1113
ut_ad(page_simple_validate_new((page_t*) page));
1114
ut_ad(page_zip_simple_validate(page_zip));
1116
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1118
/* Check the data that will be omitted. */
1119
ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
1120
infimum_extra, sizeof infimum_extra));
1121
ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
1122
infimum_data, sizeof infimum_data));
1123
ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
1124
/* info_bits == 0, n_owned <= max */
1125
<= PAGE_DIR_SLOT_MAX_N_OWNED);
1126
ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
1127
supremum_extra_data, sizeof supremum_extra_data));
1129
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
1130
ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
1131
== PAGE_NEW_SUPREMUM);
1134
if (page_is_leaf(page)) {
1135
n_fields = dict_index_get_n_fields(index);
1137
n_fields = dict_index_get_n_unique_in_tree(index);
1140
/* The dense directory excludes the infimum and supremum records. */
1141
n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1142
#ifdef PAGE_ZIP_COMPRESS_DBG
1143
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
1144
fprintf(stderr, "compress %p %p %lu %lu %lu\n",
1145
(void*) page_zip, (void*) page,
1149
if (UNIV_UNLIKELY(page_zip_compress_log)) {
1150
/* Create a log file for every compression attempt. */
1151
char logfilename[9];
1152
ut_snprintf(logfilename, sizeof logfilename,
1153
"%08x", page_zip_compress_log++);
1154
logfile = fopen(logfilename, "wb");
1157
/* Write the uncompressed page to the log. */
1158
fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
1159
/* Record the compressed size as zero.
1160
This will be overwritten at successful exit. */
1167
#endif /* PAGE_ZIP_COMPRESS_DBG */
1168
page_zip_stat[page_zip->ssize - 1].compressed++;
1170
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
1171
>= page_zip_get_size(page_zip))) {
1176
heap = mem_heap_create(page_zip_get_size(page_zip)
1177
+ n_fields * (2 + sizeof *offsets)
1178
+ n_dense * ((sizeof *recs)
1179
- PAGE_ZIP_DIR_SLOT_SIZE)
1180
+ UNIV_PAGE_SIZE * 4
1181
+ (512 << MAX_MEM_LEVEL));
1183
recs = mem_heap_zalloc(heap, n_dense * sizeof *recs);
1185
fields = mem_heap_alloc(heap, (n_fields + 1) * 2);
1187
buf = mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA);
1188
buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
1190
/* Compress the data payload. */
1191
page_zip_set_alloc(&c_stream, heap);
1193
err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
1194
Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
1195
MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
1198
c_stream.next_out = buf;
1199
/* Subtract the space reserved for uncompressed data. */
1200
/* Page header and the end marker of the modification log */
1201
c_stream.avail_out = buf_end - buf - 1;
1202
/* Dense page directory and uncompressed columns, if any */
1203
if (page_is_leaf(page)) {
1204
if (dict_index_is_clust(index)) {
1205
trx_id_col = dict_index_get_sys_col_pos(
1206
index, DATA_TRX_ID);
1207
ut_ad(trx_id_col > 0);
1208
ut_ad(trx_id_col != ULINT_UNDEFINED);
1210
slot_size = PAGE_ZIP_DIR_SLOT_SIZE
1211
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1213
/* Signal the absence of trx_id
1214
in page_zip_fields_encode() */
1215
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
1216
== ULINT_UNDEFINED);
1218
slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
1221
slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
1222
trx_id_col = ULINT_UNDEFINED;
1225
if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
1226
+ 6/* sizeof(zlib header and footer) */)) {
1230
c_stream.avail_out -= n_dense * slot_size;
1231
c_stream.avail_in = page_zip_fields_encode(n_fields, index,
1232
trx_id_col, fields);
1233
c_stream.next_in = fields;
1234
if (UNIV_LIKELY(!trx_id_col)) {
1235
trx_id_col = ULINT_UNDEFINED;
1238
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1239
err = deflate(&c_stream, Z_FULL_FLUSH);
1244
ut_ad(!c_stream.avail_in);
1246
page_zip_dir_encode(page, buf_end, recs);
1248
c_stream.next_in = (byte*) page + PAGE_ZIP_START;
1250
storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
1252
/* Compress the records in heap_no order. */
1253
if (UNIV_UNLIKELY(!n_dense)) {
1254
} else if (!page_is_leaf(page)) {
1255
/* This is a node pointer page. */
1256
err = page_zip_compress_node_ptrs(LOGFILE
1257
&c_stream, recs, n_dense,
1258
index, storage, heap);
1259
if (UNIV_UNLIKELY(err != Z_OK)) {
1262
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1263
/* This is a leaf page in a secondary index. */
1264
err = page_zip_compress_sec(LOGFILE
1265
&c_stream, recs, n_dense);
1266
if (UNIV_UNLIKELY(err != Z_OK)) {
1270
/* This is a leaf page in a clustered index. */
1271
err = page_zip_compress_clust(LOGFILE
1272
&c_stream, recs, n_dense,
1273
index, &n_blobs, trx_id_col,
1274
buf_end - PAGE_ZIP_DIR_SLOT_SIZE
1275
* page_get_n_recs(page),
1277
if (UNIV_UNLIKELY(err != Z_OK)) {
1282
/* Finish the compression. */
1283
ut_ad(!c_stream.avail_in);
1284
/* Compress any trailing garbage, in case the last record was
1285
allocated from an originally longer space on the free list,
1286
or the data of the last record from page_zip_compress_sec(). */
1288
= page_header_get_field(page, PAGE_HEAP_TOP)
1289
- (c_stream.next_in - page);
1290
ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
1292
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1293
err = deflate(&c_stream, Z_FINISH);
1295
if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
1297
deflateEnd(&c_stream);
1298
mem_heap_free(heap);
1300
#ifdef PAGE_ZIP_COMPRESS_DBG
1304
#endif /* PAGE_ZIP_COMPRESS_DBG */
1305
page_zip_stat[page_zip->ssize - 1].compressed_usec
1306
+= ut_time_us(NULL) - usec;
1310
err = deflateEnd(&c_stream);
1313
ut_ad(buf + c_stream.total_out == c_stream.next_out);
1314
ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
1316
/* Valgrind believes that zlib does not initialize some bits
1317
in the last 7 or 8 bytes of the stream. Make Valgrind happy. */
1318
UNIV_MEM_VALID(buf, c_stream.total_out);
1320
/* Zero out the area reserved for the modification log.
1321
Space for the end marker of the modification log is not
1322
included in avail_out. */
1323
memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
1327
#endif /* UNIV_DEBUG */
1328
page_zip->m_end = PAGE_DATA + c_stream.total_out;
1329
page_zip->m_nonempty = FALSE;
1330
page_zip->n_blobs = n_blobs;
1331
/* Copy those header fields that will not be written
1332
in buf_flush_init_for_writing() */
1333
memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
1334
FIL_PAGE_LSN - FIL_PAGE_PREV);
1335
memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
1336
memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
1337
PAGE_DATA - FIL_PAGE_DATA);
1338
/* Copy the rest of the compressed page */
1339
memcpy(page_zip->data + PAGE_DATA, buf,
1340
page_zip_get_size(page_zip) - PAGE_DATA);
1341
mem_heap_free(heap);
1342
#ifdef UNIV_ZIP_DEBUG
1343
ut_a(page_zip_validate(page_zip, page));
1344
#endif /* UNIV_ZIP_DEBUG */
1347
page_zip_compress_write_log(page_zip, page, index, mtr);
1350
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
1352
#ifdef PAGE_ZIP_COMPRESS_DBG
1354
/* Record the compressed size of the block. */
1356
mach_write_to_4(sz, c_stream.total_out);
1357
fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
1358
fwrite(sz, 1, sizeof sz, logfile);
1361
#endif /* PAGE_ZIP_COMPRESS_DBG */
1363
page_zip_stat_t* zip_stat
1364
= &page_zip_stat[page_zip->ssize - 1];
1365
zip_stat->compressed_ok++;
1366
zip_stat->compressed_usec += ut_time_us(NULL) - usec;
1372
/**************************************************************************
1373
Compare two page directory entries. */
1378
/* out: positive if rec1 > rec2 */
1379
const rec_t* rec1, /* in: rec1 */
1380
const rec_t* rec2) /* in: rec2 */
1382
return(rec1 > rec2);
1385
/**************************************************************************
1386
Sort the dense page directory by address (heap_no). */
1391
rec_t** arr, /* in/out: dense page directory */
1392
rec_t** aux_arr,/* in/out: work area */
1393
ulint low, /* in: lower bound of the sorting area, inclusive */
1394
ulint high) /* in: upper bound of the sorting area, exclusive */
1396
UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
1400
/**************************************************************************
1401
Deallocate the index information initialized by page_zip_fields_decode(). */
1404
page_zip_fields_free(
1405
/*=================*/
1406
dict_index_t* index) /* in: dummy index to be freed */
1409
dict_table_t* table = index->table;
1410
mem_heap_free(index->heap);
1411
mutex_free(&(table->autoinc_mutex));
1412
mem_heap_free(table->heap);
1416
/**************************************************************************
1417
Read the index information for the compressed page. */
1420
page_zip_fields_decode(
1421
/*===================*/
1422
/* out,own: dummy index describing the page,
1424
const byte* buf, /* in: index information */
1425
const byte* end, /* in: end of buf */
1426
ulint* trx_id_col)/* in: NULL for non-leaf pages;
1427
for leaf pages, pointer to where to store
1428
the position of the trx_id column */
1434
dict_table_t* table;
1435
dict_index_t* index;
1437
/* Determine the number of fields. */
1438
for (b = buf, n = 0; b < end; n++) {
1440
b++; /* skip the second byte */
1444
n--; /* n_nullable or trx_id */
1446
if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
1448
page_zip_fail(("page_zip_fields_decode: n = %lu\n",
1453
if (UNIV_UNLIKELY(b > end)) {
1455
page_zip_fail(("page_zip_fields_decode: %p > %p\n",
1456
(const void*) b, (const void*) end));
1460
table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
1462
index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
1463
DICT_HDR_SPACE, 0, n);
1464
index->table = table;
1466
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1467
index->cached = TRUE;
1469
/* Initialize the fields. */
1470
for (b = buf, i = 0; i < n; i++) {
1476
if (UNIV_UNLIKELY(val & 0x80)) {
1477
/* fixed length > 62 bytes */
1478
val = (val & 0x7f) << 8 | *b++;
1480
mtype = DATA_FIXBINARY;
1481
} else if (UNIV_UNLIKELY(val >= 126)) {
1482
/* variable length with max > 255 bytes */
1484
mtype = DATA_BINARY;
1485
} else if (val <= 1) {
1486
/* variable length with max <= 255 bytes */
1488
mtype = DATA_BINARY;
1490
/* fixed length < 62 bytes */
1492
mtype = DATA_FIXBINARY;
1495
dict_mem_table_add_col(table, NULL, NULL, mtype,
1496
val & 1 ? DATA_NOT_NULL : 0, len);
1497
dict_index_add_col(index, table,
1498
dict_table_get_nth_col(table, i), 0);
1502
if (UNIV_UNLIKELY(val & 0x80)) {
1503
val = (val & 0x7f) << 8 | *b++;
1506
/* Decode the position of the trx_id column. */
1509
val = ULINT_UNDEFINED;
1510
} else if (UNIV_UNLIKELY(val >= n)) {
1511
page_zip_fields_free(index);
1514
index->type = DICT_CLUSTERED;
1519
/* Decode the number of nullable fields. */
1520
if (UNIV_UNLIKELY(index->n_nullable > val)) {
1521
page_zip_fields_free(index);
1524
index->n_nullable = val;
1533
/**************************************************************************
1534
Populate the sparse page directory from the dense directory. */
1537
page_zip_dir_decode(
1538
/*================*/
1539
/* out: TRUE on success,
1541
const page_zip_des_t* page_zip,/* in: dense page directory on
1543
page_t* page, /* in: compact page with valid header;
1544
out: trailer and sparse page directory
1546
rec_t** recs, /* out: dense page directory sorted by
1547
ascending address (and heap_no) */
1548
rec_t** recs_aux,/* in/out: scratch area */
1549
ulint n_dense)/* in: number of user records, and
1550
size of recs[] and recs_aux[] */
1556
n_recs = page_get_n_recs(page);
1558
if (UNIV_UNLIKELY(n_recs > n_dense)) {
1559
page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
1560
(ulong) n_recs, (ulong) n_dense));
1564
/* Traverse the list of stored records in the sorting order,
1565
starting from the first user record. */
1567
slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
1568
UNIV_PREFETCH_RW(slot);
1570
/* Zero out the page trailer. */
1571
memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
1573
mach_write_to_2(slot, PAGE_NEW_INFIMUM);
1574
slot -= PAGE_DIR_SLOT_SIZE;
1575
UNIV_PREFETCH_RW(slot);
1577
/* Initialize the sparse directory and copy the dense directory. */
1578
for (i = 0; i < n_recs; i++) {
1579
ulint offs = page_zip_dir_get(page_zip, i);
1581
if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
1582
mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
1583
slot -= PAGE_DIR_SLOT_SIZE;
1584
UNIV_PREFETCH_RW(slot);
1587
if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
1588
< PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
1589
page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
1590
(unsigned) i, (unsigned) n_recs,
1595
recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
1598
mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
1600
const page_dir_slot_t* last_slot = page_dir_get_nth_slot(
1601
page, page_dir_get_n_slots(page) - 1);
1603
if (UNIV_UNLIKELY(slot != last_slot)) {
1604
page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
1606
(const void*) last_slot));
1611
/* Copy the rest of the dense directory. */
1612
for (; i < n_dense; i++) {
1613
ulint offs = page_zip_dir_get(page_zip, i);
1615
if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1616
page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
1617
(unsigned) i, (unsigned) n_dense,
1622
recs[i] = page + offs;
1625
if (UNIV_LIKELY(n_dense > 1)) {
1626
page_zip_dir_sort(recs, recs_aux, 0, n_dense);
1631
/**************************************************************************
1632
Initialize the REC_N_NEW_EXTRA_BYTES of each record. */
1635
page_zip_set_extra_bytes(
1636
/*=====================*/
1637
/* out: TRUE on success,
1639
const page_zip_des_t* page_zip,/* in: compressed page */
1640
page_t* page, /* in/out: uncompressed page */
1641
ulint info_bits)/* in: REC_INFO_MIN_REC_FLAG or 0 */
1649
n = page_get_n_recs(page);
1650
rec = page + PAGE_NEW_INFIMUM;
1652
for (i = 0; i < n; i++) {
1653
offs = page_zip_dir_get(page_zip, i);
1655
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
1656
info_bits |= REC_INFO_DELETED_FLAG;
1658
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
1659
info_bits |= n_owned;
1664
offs &= PAGE_ZIP_DIR_SLOT_MASK;
1665
if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
1666
+ REC_N_NEW_EXTRA_BYTES)) {
1667
page_zip_fail(("page_zip_set_extra_bytes 1:"
1669
(unsigned) i, (unsigned) n,
1674
rec_set_next_offs_new(rec, offs);
1676
rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
1680
/* Set the next pointer of the last user record. */
1681
rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
1683
/* Set n_owned of the supremum record. */
1684
page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
1686
/* The dense directory excludes the infimum and supremum records. */
1687
n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1690
if (UNIV_LIKELY(i == n)) {
1694
page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
1695
(unsigned) i, (unsigned) n));
1699
offs = page_zip_dir_get(page_zip, i);
1701
/* Set the extra bytes of deleted records on the free list. */
1703
if (UNIV_UNLIKELY(!offs)
1704
|| UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1706
page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
1712
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1718
offs = page_zip_dir_get(page_zip, i);
1719
rec_set_next_offs_new(rec, offs);
1722
/* Terminate the free list. */
1723
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1724
rec_set_next_offs_new(rec, 0);
1729
/**************************************************************************
1730
Apply the modification log to a record containing externally stored
1731
columns. Do not copy the fields that are stored separately. */
1734
page_zip_apply_log_ext(
1735
/*===================*/
1736
/* out: pointer to modification log,
1737
or NULL on failure */
1738
rec_t* rec, /* in/out: record */
1739
const ulint* offsets, /* in: rec_get_offsets(rec) */
1740
ulint trx_id_col, /* in: position of of DB_TRX_ID */
1741
const byte* data, /* in: modification log */
1742
const byte* end) /* in: end of modification log */
1746
byte* next_out = rec;
1748
/* Check if there are any externally stored columns.
1749
For each externally stored column, skip the
1750
BTR_EXTERN_FIELD_REF. */
1752
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
1755
if (UNIV_UNLIKELY(i == trx_id_col)) {
1756
/* Skip trx_id and roll_ptr */
1757
dst = rec_get_nth_field(rec, offsets,
1759
if (UNIV_UNLIKELY(dst - next_out >= end - data)
1761
(len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
1762
|| rec_offs_nth_extern(offsets, i)) {
1763
page_zip_fail(("page_zip_apply_log_ext:"
1765
" %p - %p >= %p - %p\n",
1768
(const void*) next_out,
1770
(const void*) data));
1774
memcpy(next_out, data, dst - next_out);
1775
data += dst - next_out;
1776
next_out = dst + (DATA_TRX_ID_LEN
1777
+ DATA_ROLL_PTR_LEN);
1778
} else if (rec_offs_nth_extern(offsets, i)) {
1779
dst = rec_get_nth_field(rec, offsets,
1782
>= BTR_EXTERN_FIELD_REF_SIZE);
1784
len += dst - next_out
1785
- BTR_EXTERN_FIELD_REF_SIZE;
1787
if (UNIV_UNLIKELY(data + len >= end)) {
1788
page_zip_fail(("page_zip_apply_log_ext: "
1789
"ext %p+%lu >= %p\n",
1792
(const void*) end));
1796
memcpy(next_out, data, len);
1799
+ BTR_EXTERN_FIELD_REF_SIZE;
1803
/* Copy the last bytes of the record. */
1804
len = rec_get_end(rec, offsets) - next_out;
1805
if (UNIV_UNLIKELY(data + len >= end)) {
1806
page_zip_fail(("page_zip_apply_log_ext: "
1807
"last %p+%lu >= %p\n",
1810
(const void*) end));
1813
memcpy(next_out, data, len);
1819
/**************************************************************************
1820
Apply the modification log to an uncompressed page.
1821
Do not copy the fields that are stored separately. */
1826
/* out: pointer to end of modification log,
1827
or NULL on failure */
1828
const byte* data, /* in: modification log */
1829
ulint size, /* in: maximum length of the log, in bytes */
1830
rec_t** recs, /* in: dense page directory,
1831
sorted by address (indexed by
1832
heap_no - PAGE_HEAP_NO_USER_LOW) */
1833
ulint n_dense,/* in: size of recs[] */
1834
ulint trx_id_col,/* in: column number of trx_id in the index,
1835
or ULINT_UNDEFINED if none */
1837
/* in: heap_no and status bits for
1838
the next record to uncompress */
1839
dict_index_t* index, /* in: index of the page */
1840
ulint* offsets)/* in/out: work area for
1841
rec_get_offsets_reverse() */
1843
const byte* const end = data + size;
1852
if (UNIV_UNLIKELY(!val)) {
1856
val = (val & 0x7f) << 8 | *data++;
1857
if (UNIV_UNLIKELY(!val)) {
1858
page_zip_fail(("page_zip_apply_log:"
1859
" invalid val %x%x\n",
1860
data[-2], data[-1]));
1864
if (UNIV_UNLIKELY(data >= end)) {
1865
page_zip_fail(("page_zip_apply_log: %p >= %p\n",
1867
(const void*) end));
1870
if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
1871
page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
1872
(ulong) val, (ulong) n_dense));
1876
/* Determine the heap number and status bits of the record. */
1877
rec = recs[(val >> 1) - 1];
1879
hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
1880
hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
1882
/* This may either be an old record that is being
1883
overwritten (updated in place, or allocated from
1884
the free list), or a new record, with the next
1885
available_heap_no. */
1886
if (UNIV_UNLIKELY(hs > heap_status)) {
1887
page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
1888
(ulong) hs, (ulong) heap_status));
1890
} else if (hs == heap_status) {
1891
/* A new record was allocated from the heap. */
1892
if (UNIV_UNLIKELY(val & 1)) {
1893
/* Only existing records may be cleared. */
1894
page_zip_fail(("page_zip_apply_log:"
1895
" attempting to create"
1896
" deleted rec %lu\n",
1900
heap_status += 1 << REC_HEAP_NO_SHIFT;
1903
mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
1906
/* Clear the data bytes of the record. */
1907
mem_heap_t* heap = NULL;
1909
offs = rec_get_offsets(rec, index, offsets,
1910
ULINT_UNDEFINED, &heap);
1911
memset(rec, 0, rec_offs_data_size(offs));
1913
if (UNIV_LIKELY_NULL(heap)) {
1914
mem_heap_free(heap);
1919
#if REC_STATUS_NODE_PTR != TRUE
1920
# error "REC_STATUS_NODE_PTR != TRUE"
1922
rec_get_offsets_reverse(data, index,
1923
hs & REC_STATUS_NODE_PTR,
1925
rec_offs_make_valid(rec, index, offsets);
1927
/* Copy the extra bytes (backwards). */
1929
byte* start = rec_get_start(rec, offsets);
1930
byte* b = rec - REC_N_NEW_EXTRA_BYTES;
1931
while (b != start) {
1936
/* Copy the data bytes. */
1937
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1938
/* Non-leaf nodes should not contain any
1939
externally stored columns. */
1940
if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1941
page_zip_fail(("page_zip_apply_log: "
1942
"%lu&REC_STATUS_NODE_PTR\n",
1947
data = page_zip_apply_log_ext(
1948
rec, offsets, trx_id_col, data, end);
1950
if (UNIV_UNLIKELY(!data)) {
1953
} else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1954
len = rec_offs_data_size(offsets)
1955
- REC_NODE_PTR_SIZE;
1956
/* Copy the data bytes, except node_ptr. */
1957
if (UNIV_UNLIKELY(data + len >= end)) {
1958
page_zip_fail(("page_zip_apply_log: "
1959
"node_ptr %p+%lu >= %p\n",
1962
(const void*) end));
1965
memcpy(rec, data, len);
1967
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1968
len = rec_offs_data_size(offsets);
1970
/* Copy all data bytes of
1971
a record in a secondary index. */
1972
if (UNIV_UNLIKELY(data + len >= end)) {
1973
page_zip_fail(("page_zip_apply_log: "
1974
"sec %p+%lu >= %p\n",
1977
(const void*) end));
1981
memcpy(rec, data, len);
1984
/* Skip DB_TRX_ID and DB_ROLL_PTR. */
1985
ulint l = rec_get_nth_field_offs(offsets,
1989
if (UNIV_UNLIKELY(data + l >= end)
1990
|| UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
1991
+ DATA_ROLL_PTR_LEN))) {
1992
page_zip_fail(("page_zip_apply_log: "
1993
"trx_id %p+%lu >= %p\n",
1996
(const void*) end));
2000
/* Copy any preceding data bytes. */
2001
memcpy(rec, data, l);
2004
/* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
2005
b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2006
len = rec_get_end(rec, offsets) - b;
2007
if (UNIV_UNLIKELY(data + len >= end)) {
2008
page_zip_fail(("page_zip_apply_log: "
2009
"clust %p+%lu >= %p\n",
2012
(const void*) end));
2015
memcpy(b, data, len);
2021
/**************************************************************************
2022
Decompress the records of a node pointer page. */
2025
page_zip_decompress_node_ptrs(
2026
/*==========================*/
2027
/* out: TRUE on success,
2029
page_zip_des_t* page_zip, /* in/out: compressed page */
2030
z_stream* d_stream, /* in/out: compressed page stream */
2031
rec_t** recs, /* in: dense page directory
2032
sorted by address */
2033
ulint n_dense, /* in: size of recs[] */
2034
dict_index_t* index, /* in: the index of the page */
2035
ulint* offsets, /* in/out: temporary offsets */
2036
mem_heap_t* heap) /* in: temporary memory heap */
2038
ulint heap_status = REC_STATUS_NODE_PTR
2039
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2041
const byte* storage;
2043
/* Subtract the space reserved for uncompressed data. */
2044
d_stream->avail_in -= n_dense
2045
* (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
2047
/* Decompress the records in heap_no order. */
2048
for (slot = 0; slot < n_dense; slot++) {
2049
rec_t* rec = recs[slot];
2051
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2052
- d_stream->next_out;
2054
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2055
- PAGE_ZIP_START - PAGE_DIR);
2056
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2058
/* Apparently, n_dense has grown
2059
since the time the page was last compressed. */
2063
if (!d_stream->avail_out) {
2068
page_zip_fail(("page_zip_decompress_node_ptrs:"
2069
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2074
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2075
/* Prepare to decompress the data bytes. */
2076
d_stream->next_out = rec;
2077
/* Set heap_no and the status bits. */
2078
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2079
heap_status += 1 << REC_HEAP_NO_SHIFT;
2081
/* Read the offsets. The status bits are needed here. */
2082
offsets = rec_get_offsets(rec, index, offsets,
2083
ULINT_UNDEFINED, &heap);
2085
/* Non-leaf nodes should not have any externally
2087
ut_ad(!rec_offs_any_extern(offsets));
2089
/* Decompress the data bytes, except node_ptr. */
2090
d_stream->avail_out = rec_offs_data_size(offsets)
2091
- REC_NODE_PTR_SIZE;
2093
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2098
if (!d_stream->avail_out) {
2103
page_zip_fail(("page_zip_decompress_node_ptrs:"
2104
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2109
/* Clear the node pointer in case the record
2110
will be deleted and the space will be reallocated
2111
to a smaller record. */
2112
memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
2113
d_stream->next_out += REC_NODE_PTR_SIZE;
2115
ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
2118
/* Decompress any trailing garbage, in case the last record was
2119
allocated from an originally longer space on the free list. */
2120
d_stream->avail_out = page_header_get_field(page_zip->data,
2122
- page_offset(d_stream->next_out);
2123
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2124
- PAGE_ZIP_START - PAGE_DIR)) {
2126
page_zip_fail(("page_zip_decompress_node_ptrs:"
2127
" avail_out = %u\n",
2128
d_stream->avail_out));
2132
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2133
page_zip_fail(("page_zip_decompress_node_ptrs:"
2134
" inflate(Z_FINISH)=%s\n",
2137
inflateEnd(d_stream);
2141
/* Note that d_stream->avail_out > 0 may hold here
2142
if the modification log is nonempty. */
2145
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2150
page_t* page = page_align(d_stream->next_out);
2152
/* Clear the unused heap space on the uncompressed page. */
2153
memset(d_stream->next_out, 0,
2154
page_dir_get_nth_slot(page,
2155
page_dir_get_n_slots(page) - 1)
2156
- d_stream->next_out);
2160
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2161
#endif /* UNIV_DEBUG */
2163
/* Apply the modification log. */
2165
const byte* mod_log_ptr;
2166
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2167
d_stream->avail_in + 1,
2169
ULINT_UNDEFINED, heap_status,
2172
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2175
page_zip->m_end = mod_log_ptr - page_zip->data;
2176
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2180
(page_zip_get_trailer_len(page_zip,
2181
dict_index_is_clust(index), NULL)
2182
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2183
page_zip_fail(("page_zip_decompress_node_ptrs:"
2184
" %lu + %lu >= %lu, %lu\n",
2185
(ulong) page_zip_get_trailer_len(
2186
page_zip, dict_index_is_clust(index),
2188
(ulong) page_zip->m_end,
2189
(ulong) page_zip_get_size(page_zip),
2190
(ulong) dict_index_is_clust(index)));
2194
/* Restore the uncompressed columns in heap_no order. */
2195
storage = page_zip->data + page_zip_get_size(page_zip)
2196
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2198
for (slot = 0; slot < n_dense; slot++) {
2199
rec_t* rec = recs[slot];
2201
offsets = rec_get_offsets(rec, index, offsets,
2202
ULINT_UNDEFINED, &heap);
2203
/* Non-leaf nodes should not have any externally
2205
ut_ad(!rec_offs_any_extern(offsets));
2206
storage -= REC_NODE_PTR_SIZE;
2208
memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
2209
storage, REC_NODE_PTR_SIZE);
2215
/**************************************************************************
2216
Decompress the records of a leaf node of a secondary index. */
2219
page_zip_decompress_sec(
2220
/*====================*/
2221
/* out: TRUE on success,
2223
page_zip_des_t* page_zip, /* in/out: compressed page */
2224
z_stream* d_stream, /* in/out: compressed page stream */
2225
rec_t** recs, /* in: dense page directory
2226
sorted by address */
2227
ulint n_dense, /* in: size of recs[] */
2228
dict_index_t* index, /* in: the index of the page */
2229
ulint* offsets) /* in/out: temporary offsets */
2231
ulint heap_status = REC_STATUS_ORDINARY
2232
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2235
ut_a(!dict_index_is_clust(index));
2237
/* Subtract the space reserved for uncompressed data. */
2238
d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2240
for (slot = 0; slot < n_dense; slot++) {
2241
rec_t* rec = recs[slot];
2243
/* Decompress everything up to this record. */
2244
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2245
- d_stream->next_out;
2247
if (UNIV_LIKELY(d_stream->avail_out)) {
2248
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2250
/* Apparently, n_dense has grown
2251
since the time the page was last compressed. */
2255
if (!d_stream->avail_out) {
2260
page_zip_fail(("page_zip_decompress_sec:"
2261
" inflate(Z_SYNC_FLUSH)=%s\n",
2267
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2269
/* Skip the REC_N_NEW_EXTRA_BYTES. */
2271
d_stream->next_out = rec;
2273
/* Set heap_no and the status bits. */
2274
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2275
heap_status += 1 << REC_HEAP_NO_SHIFT;
2278
/* Decompress the data of the last record and any trailing garbage,
2279
in case the last record was allocated from an originally longer space
2280
on the free list. */
2281
d_stream->avail_out = page_header_get_field(page_zip->data,
2283
- page_offset(d_stream->next_out);
2284
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2285
- PAGE_ZIP_START - PAGE_DIR)) {
2287
page_zip_fail(("page_zip_decompress_sec:"
2288
" avail_out = %u\n",
2289
d_stream->avail_out));
2293
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2294
page_zip_fail(("page_zip_decompress_sec:"
2295
" inflate(Z_FINISH)=%s\n",
2298
inflateEnd(d_stream);
2302
/* Note that d_stream->avail_out > 0 may hold here
2303
if the modification log is nonempty. */
2306
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2311
page_t* page = page_align(d_stream->next_out);
2313
/* Clear the unused heap space on the uncompressed page. */
2314
memset(d_stream->next_out, 0,
2315
page_dir_get_nth_slot(page,
2316
page_dir_get_n_slots(page) - 1)
2317
- d_stream->next_out);
2321
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2322
#endif /* UNIV_DEBUG */
2324
/* Apply the modification log. */
2326
const byte* mod_log_ptr;
2327
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2328
d_stream->avail_in + 1,
2330
ULINT_UNDEFINED, heap_status,
2333
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2336
page_zip->m_end = mod_log_ptr - page_zip->data;
2337
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2340
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
2341
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2343
page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
2344
(ulong) page_zip_get_trailer_len(
2345
page_zip, FALSE, NULL),
2346
(ulong) page_zip->m_end,
2347
(ulong) page_zip_get_size(page_zip)));
2351
/* There are no uncompressed columns on leaf pages of
2352
secondary indexes. */
2357
/**************************************************************************
2358
Decompress a record of a leaf node of a clustered index that contains
2359
externally stored columns. */
2362
page_zip_decompress_clust_ext(
2363
/*==========================*/
2364
/* out: TRUE on success */
2365
z_stream* d_stream, /* in/out: compressed page stream */
2366
rec_t* rec, /* in/out: record */
2367
const ulint* offsets, /* in: rec_get_offsets(rec) */
2368
ulint trx_id_col) /* in: position of of DB_TRX_ID */
2372
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2376
if (UNIV_UNLIKELY(i == trx_id_col)) {
2377
/* Skip trx_id and roll_ptr */
2378
dst = rec_get_nth_field(rec, offsets, i, &len);
2379
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2380
+ DATA_ROLL_PTR_LEN)) {
2382
page_zip_fail(("page_zip_decompress_clust_ext:"
2383
" len[%lu] = %lu\n",
2384
(ulong) i, (ulong) len));
2388
if (rec_offs_nth_extern(offsets, i)) {
2390
page_zip_fail(("page_zip_decompress_clust_ext:"
2391
" DB_TRX_ID at %lu is ext\n",
2396
d_stream->avail_out = dst - d_stream->next_out;
2398
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2402
if (!d_stream->avail_out) {
2407
page_zip_fail(("page_zip_decompress_clust_ext:"
2408
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2413
ut_ad(d_stream->next_out == dst);
2415
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2416
avoid uninitialized bytes in case the record
2417
is affected by page_zip_apply_log(). */
2418
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2420
d_stream->next_out += DATA_TRX_ID_LEN
2421
+ DATA_ROLL_PTR_LEN;
2422
} else if (rec_offs_nth_extern(offsets, i)) {
2423
dst = rec_get_nth_field(rec, offsets, i, &len);
2424
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
2425
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2427
d_stream->avail_out = dst - d_stream->next_out;
2428
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2432
if (!d_stream->avail_out) {
2437
page_zip_fail(("page_zip_decompress_clust_ext:"
2438
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2443
ut_ad(d_stream->next_out == dst);
2445
/* Clear the BLOB pointer in case
2446
the record will be deleted and the
2447
space will not be reused. Note that
2448
the final initialization of the BLOB
2449
pointers (copying from "externs"
2450
or clearing) will have to take place
2451
only after the page modification log
2452
has been applied. Otherwise, we
2453
could end up with an uninitialized
2454
BLOB pointer when a record is deleted,
2455
reallocated and deleted. */
2456
memset(d_stream->next_out, 0,
2457
BTR_EXTERN_FIELD_REF_SIZE);
2459
+= BTR_EXTERN_FIELD_REF_SIZE;
2466
/**************************************************************************
2467
Compress the records of a leaf node of a clustered index. */
2470
page_zip_decompress_clust(
2471
/*======================*/
2472
/* out: TRUE on success,
2474
page_zip_des_t* page_zip, /* in/out: compressed page */
2475
z_stream* d_stream, /* in/out: compressed page stream */
2476
rec_t** recs, /* in: dense page directory
2477
sorted by address */
2478
ulint n_dense, /* in: size of recs[] */
2479
dict_index_t* index, /* in: the index of the page */
2480
ulint trx_id_col, /* index of the trx_id column */
2481
ulint* offsets, /* in/out: temporary offsets */
2482
mem_heap_t* heap) /* in: temporary memory heap */
2486
ulint heap_status = REC_STATUS_ORDINARY
2487
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2488
const byte* storage;
2489
const byte* externs;
2491
ut_a(dict_index_is_clust(index));
2493
/* Subtract the space reserved for uncompressed data. */
2494
d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
2496
+ DATA_ROLL_PTR_LEN);
2498
/* Decompress the records in heap_no order. */
2499
for (slot = 0; slot < n_dense; slot++) {
2500
rec_t* rec = recs[slot];
2502
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2503
- d_stream->next_out;
2505
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2506
- PAGE_ZIP_START - PAGE_DIR);
2507
err = inflate(d_stream, Z_SYNC_FLUSH);
2510
/* Apparently, n_dense has grown
2511
since the time the page was last compressed. */
2515
if (UNIV_LIKELY(!d_stream->avail_out)) {
2520
page_zip_fail(("page_zip_decompress_clust:"
2521
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2526
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2527
/* Prepare to decompress the data bytes. */
2528
d_stream->next_out = rec;
2529
/* Set heap_no and the status bits. */
2530
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2531
heap_status += 1 << REC_HEAP_NO_SHIFT;
2533
/* Read the offsets. The status bits are needed here. */
2534
offsets = rec_get_offsets(rec, index, offsets,
2535
ULINT_UNDEFINED, &heap);
2537
/* This is a leaf page in a clustered index. */
2539
/* Check if there are any externally stored columns.
2540
For each externally stored column, restore the
2541
BTR_EXTERN_FIELD_REF separately. */
2543
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
2545
(!page_zip_decompress_clust_ext(
2546
d_stream, rec, offsets, trx_id_col))) {
2551
/* Skip trx_id and roll_ptr */
2553
byte* dst = rec_get_nth_field(rec, offsets,
2555
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2556
+ DATA_ROLL_PTR_LEN)) {
2558
page_zip_fail(("page_zip_decompress_clust:"
2559
" len = %lu\n", (ulong) len));
2563
d_stream->avail_out = dst - d_stream->next_out;
2565
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2569
if (!d_stream->avail_out) {
2574
page_zip_fail(("page_zip_decompress_clust:"
2575
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2580
ut_ad(d_stream->next_out == dst);
2582
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2583
avoid uninitialized bytes in case the record
2584
is affected by page_zip_apply_log(). */
2585
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2587
d_stream->next_out += DATA_TRX_ID_LEN
2588
+ DATA_ROLL_PTR_LEN;
2591
/* Decompress the last bytes of the record. */
2592
d_stream->avail_out = rec_get_end(rec, offsets)
2593
- d_stream->next_out;
2595
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2599
if (!d_stream->avail_out) {
2604
page_zip_fail(("page_zip_decompress_clust:"
2605
" 3 inflate(Z_SYNC_FLUSH)=%s\n",
2611
/* Decompress any trailing garbage, in case the last record was
2612
allocated from an originally longer space on the free list. */
2613
d_stream->avail_out = page_header_get_field(page_zip->data,
2615
- page_offset(d_stream->next_out);
2616
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2617
- PAGE_ZIP_START - PAGE_DIR)) {
2619
page_zip_fail(("page_zip_decompress_clust:"
2620
" avail_out = %u\n",
2621
d_stream->avail_out));
2625
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2626
page_zip_fail(("page_zip_decompress_clust:"
2627
" inflate(Z_FINISH)=%s\n",
2630
inflateEnd(d_stream);
2634
/* Note that d_stream->avail_out > 0 may hold here
2635
if the modification log is nonempty. */
2638
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2643
page_t* page = page_align(d_stream->next_out);
2645
/* Clear the unused heap space on the uncompressed page. */
2646
memset(d_stream->next_out, 0,
2647
page_dir_get_nth_slot(page,
2648
page_dir_get_n_slots(page) - 1)
2649
- d_stream->next_out);
2653
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2654
#endif /* UNIV_DEBUG */
2656
/* Apply the modification log. */
2658
const byte* mod_log_ptr;
2659
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2660
d_stream->avail_in + 1,
2662
trx_id_col, heap_status,
2665
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2668
page_zip->m_end = mod_log_ptr - page_zip->data;
2669
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2672
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
2673
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2675
page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
2676
(ulong) page_zip_get_trailer_len(
2677
page_zip, TRUE, NULL),
2678
(ulong) page_zip->m_end,
2679
(ulong) page_zip_get_size(page_zip)));
2683
storage = page_zip->data + page_zip_get_size(page_zip)
2684
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2686
externs = storage - n_dense
2687
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2689
/* Restore the uncompressed columns in heap_no order. */
2691
for (slot = 0; slot < n_dense; slot++) {
2695
rec_t* rec = recs[slot];
2696
ibool exists = !page_zip_dir_find_free(
2697
page_zip, page_offset(rec));
2698
offsets = rec_get_offsets(rec, index, offsets,
2699
ULINT_UNDEFINED, &heap);
2701
dst = rec_get_nth_field(rec, offsets,
2703
ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2704
storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
2705
memcpy(dst, storage,
2706
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2708
/* Check if there are any externally stored
2709
columns in this record. For each externally
2710
stored column, restore or clear the
2711
BTR_EXTERN_FIELD_REF. */
2712
if (!rec_offs_any_extern(offsets)) {
2716
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2717
if (!rec_offs_nth_extern(offsets, i)) {
2720
dst = rec_get_nth_field(rec, offsets, i, &len);
2722
if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
2723
page_zip_fail(("page_zip_decompress_clust:"
2729
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2731
if (UNIV_LIKELY(exists)) {
2733
restore the BLOB pointer */
2734
externs -= BTR_EXTERN_FIELD_REF_SIZE;
2737
(externs < page_zip->data
2738
+ page_zip->m_end)) {
2739
page_zip_fail(("page_zip_"
2740
"decompress_clust: "
2742
(const void*) externs,
2750
memcpy(dst, externs,
2751
BTR_EXTERN_FIELD_REF_SIZE);
2753
page_zip->n_blobs++;
2756
clear the BLOB pointer */
2758
BTR_EXTERN_FIELD_REF_SIZE);
2766
/**************************************************************************
2767
Decompress a page. This function should tolerate errors on the compressed
2768
page. Instead of letting assertions fail, it will return FALSE if an
2769
inconsistency is detected. */
2772
page_zip_decompress(
2773
/*================*/
2774
/* out: TRUE on success, FALSE on failure */
2775
page_zip_des_t* page_zip,/* in: data, ssize;
2776
out: m_start, m_end, m_nonempty, n_blobs */
2777
page_t* page) /* out: uncompressed page, may be trashed */
2780
dict_index_t* index = NULL;
2781
rec_t** recs; /* dense page directory, sorted by address */
2782
ulint n_dense;/* number of user records on the page */
2783
ulint trx_id_col = ULINT_UNDEFINED;
2786
ullint usec = ut_time_us(NULL);
2788
ut_ad(page_zip_simple_validate(page_zip));
2789
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
2790
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
2792
/* The dense directory excludes the infimum and supremum records. */
2793
n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
2794
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
2795
>= page_zip_get_size(page_zip))) {
2796
page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
2798
(ulong) page_zip_get_size(page_zip)));
2802
heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
2803
recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs));
2805
#ifdef UNIV_ZIP_DEBUG
2806
/* Clear the page. */
2807
memset(page, 0x55, UNIV_PAGE_SIZE);
2808
#endif /* UNIV_ZIP_DEBUG */
2809
UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE);
2810
/* Copy the page header. */
2811
memcpy(page, page_zip->data, PAGE_DATA);
2813
/* Copy the page directory. */
2814
if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
2815
recs + n_dense, n_dense))) {
2817
mem_heap_free(heap);
2821
/* Copy the infimum and supremum records. */
2822
memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
2823
infimum_extra, sizeof infimum_extra);
2824
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
2825
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2828
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2829
page_zip_dir_get(page_zip, 0)
2830
& PAGE_ZIP_DIR_SLOT_MASK);
2832
memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
2833
memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
2834
supremum_extra_data, sizeof supremum_extra_data);
2836
page_zip_set_alloc(&d_stream, heap);
2838
if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
2843
d_stream.next_in = page_zip->data + PAGE_DATA;
2844
/* Subtract the space reserved for
2845
the page header and the end marker of the modification log. */
2846
d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
2848
d_stream.next_out = page + PAGE_ZIP_START;
2849
d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
2851
/* Decode the zlib header and the index information. */
2852
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2854
page_zip_fail(("page_zip_decompress:"
2855
" 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2859
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2861
page_zip_fail(("page_zip_decompress:"
2862
" 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2866
index = page_zip_fields_decode(
2867
page + PAGE_ZIP_START, d_stream.next_out,
2868
page_is_leaf(page) ? &trx_id_col : NULL);
2870
if (UNIV_UNLIKELY(!index)) {
2875
/* Decompress the user records. */
2876
page_zip->n_blobs = 0;
2877
d_stream.next_out = page + PAGE_ZIP_START;
2880
/* Pre-allocate the offsets for rec_get_offsets_reverse(). */
2881
ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
2882
+ dict_index_get_n_fields(index);
2883
offsets = mem_heap_alloc(heap, n * sizeof(ulint));
2887
/* Decompress the records in heap_no order. */
2888
if (!page_is_leaf(page)) {
2889
/* This is a node pointer page. */
2893
(!page_zip_decompress_node_ptrs(page_zip, &d_stream,
2894
recs, n_dense, index,
2899
info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
2900
? REC_INFO_MIN_REC_FLAG : 0;
2902
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
2906
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2907
/* This is a leaf page in a secondary index. */
2908
if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
2914
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2917
page_zip_fields_free(index);
2918
mem_heap_free(heap);
2922
/* This is a leaf page in a clustered index. */
2923
if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
2931
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2937
ut_a(page_is_comp(page));
2938
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
2940
page_zip_fields_free(index);
2941
mem_heap_free(heap);
2943
page_zip_stat_t* zip_stat
2944
= &page_zip_stat[page_zip->ssize - 1];
2945
zip_stat->decompressed++;
2946
zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
2949
/* Update the stat counter for LRU policy. */
2950
buf_LRU_stat_inc_unzip();
2955
#ifdef UNIV_ZIP_DEBUG
2956
/**************************************************************************
2957
Dump a block of memory on the standard error stream. */
2960
page_zip_hexdump_func(
2961
/*==================*/
2962
const char* name, /* in: name of the data structure */
2963
const void* buf, /* in: data */
2964
ulint size) /* in: length of the data, in bytes */
2966
const byte* s = buf;
2968
const ulint width = 32; /* bytes per line */
2970
fprintf(stderr, "%s:\n", name);
2972
for (addr = 0; addr < size; addr += width) {
2975
fprintf(stderr, "%04lx ", (ulong) addr);
2977
i = ut_min(width, size - addr);
2980
fprintf(stderr, "%02x", *s++);
2987
#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
2989
/* Flag: make page_zip_validate() compare page headers only */
2990
UNIV_INTERN ibool page_zip_validate_header_only = FALSE;
2992
/**************************************************************************
2993
Check that the compressed and decompressed pages match. */
2996
page_zip_validate_low(
2997
/*==================*/
2998
/* out: TRUE if valid, FALSE if not */
2999
const page_zip_des_t* page_zip,/* in: compressed page */
3000
const page_t* page, /* in: uncompressed page */
3001
ibool sloppy) /* in: FALSE=strict,
3002
TRUE=ignore the MIN_REC_FLAG */
3004
page_zip_des_t temp_page_zip;
3005
byte* temp_page_buf;
3009
if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3010
FIL_PAGE_LSN - FIL_PAGE_PREV)
3011
|| memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
3012
|| memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3013
PAGE_DATA - FIL_PAGE_DATA)) {
3014
page_zip_fail(("page_zip_validate: page header\n"));
3015
page_zip_hexdump(page_zip, sizeof *page_zip);
3016
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3017
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3021
ut_a(page_is_comp(page));
3023
if (page_zip_validate_header_only) {
3027
/* page_zip_decompress() expects the uncompressed page to be
3028
UNIV_PAGE_SIZE aligned. */
3029
temp_page_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
3030
temp_page = ut_align(temp_page_buf, UNIV_PAGE_SIZE);
3032
#ifdef UNIV_DEBUG_VALGRIND
3033
/* Get detailed information on the valid bits in case the
3034
UNIV_MEM_ASSERT_RW() checks fail. The v-bits of page[],
3035
page_zip->data[] or page_zip could be viewed at temp_page[] or
3036
temp_page_zip in a debugger when running valgrind --db-attach. */
3037
VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
3038
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3039
VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
3040
UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
3041
VALGRIND_GET_VBITS(page_zip->data, temp_page,
3042
page_zip_get_size(page_zip));
3043
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3044
#endif /* UNIV_DEBUG_VALGRIND */
3046
temp_page_zip = *page_zip;
3047
valid = page_zip_decompress(&temp_page_zip, temp_page);
3049
fputs("page_zip_validate(): failed to decompress\n", stderr);
3052
if (page_zip->n_blobs != temp_page_zip.n_blobs) {
3053
page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
3054
page_zip->n_blobs, temp_page_zip.n_blobs));
3058
if (page_zip->m_start != temp_page_zip.m_start) {
3059
page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
3060
page_zip->m_start, temp_page_zip.m_start));
3063
#endif /* UNIV_DEBUG */
3064
if (page_zip->m_end != temp_page_zip.m_end) {
3065
page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
3066
page_zip->m_end, temp_page_zip.m_end));
3069
if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
3070
page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
3071
page_zip->m_nonempty,
3072
temp_page_zip.m_nonempty));
3075
if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
3076
UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
3078
/* In crash recovery, the "minimum record" flag may be
3079
set incorrectly until the mini-transaction is
3080
committed. Let us tolerate that difference when we
3081
are performing a sloppy validation. */
3084
byte info_bits_diff;
3086
= rec_get_next_offs(page + PAGE_NEW_INFIMUM,
3088
ut_a(offset >= PAGE_NEW_SUPREMUM);
3089
offset -= 5 /* REC_NEW_INFO_BITS */;
3091
info_bits_diff = page[offset] ^ temp_page[offset];
3093
if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
3094
temp_page[offset] = page[offset];
3096
if (!memcmp(page + PAGE_HEADER,
3097
temp_page + PAGE_HEADER,
3098
UNIV_PAGE_SIZE - PAGE_HEADER
3099
- FIL_PAGE_DATA_END)) {
3101
/* Only the minimum record flag
3102
differed. Let us ignore it. */
3103
page_zip_fail(("page_zip_validate: "
3106
"%lu,%lu,0x%02lx)\n",
3107
page_get_space_id(page),
3108
page_get_page_no(page),
3109
(ulong) page[offset]));
3114
page_zip_fail(("page_zip_validate: content\n"));
3120
page_zip_hexdump(page_zip, sizeof *page_zip);
3121
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3122
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3123
page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
3125
ut_free(temp_page_buf);
3129
/**************************************************************************
3130
Check that the compressed and decompressed pages match. */
3135
/* out: TRUE if valid, FALSE if not */
3136
const page_zip_des_t* page_zip,/* in: compressed page */
3137
const page_t* page) /* in: uncompressed page */
3139
return(page_zip_validate_low(page_zip, page,
3140
recv_recovery_is_on()));
3142
#endif /* UNIV_ZIP_DEBUG */
3147
page_zip_header_cmp(
3148
/*================*/
3150
const page_zip_des_t* page_zip,/* in: compressed page */
3151
const byte* page) /* in: uncompressed page */
3153
ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3154
FIL_PAGE_LSN - FIL_PAGE_PREV));
3155
ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
3157
ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3158
PAGE_DATA - FIL_PAGE_DATA));
3162
#endif /* UNIV_DEBUG */
3164
/**************************************************************************
3165
Write a record on the compressed page that contains externally stored
3166
columns. The data must already have been written to the uncompressed page. */
3169
page_zip_write_rec_ext(
3170
/*===================*/
3171
/* out: end of modification log */
3172
page_zip_des_t* page_zip, /* in/out: compressed page */
3173
const page_t* page, /* in: page containing rec */
3174
const byte* rec, /* in: record being written */
3175
dict_index_t* index, /* in: record descriptor */
3176
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
3177
ulint create, /* in: nonzero=insert, zero=update */
3178
ulint trx_id_col, /* in: position of DB_TRX_ID */
3179
ulint heap_no, /* in: heap number of rec */
3180
byte* storage, /* in: end of dense page directory */
3181
byte* data) /* in: end of modification log */
3183
const byte* start = rec;
3186
byte* externs = storage;
3187
ulint n_ext = rec_offs_n_extern(offsets);
3189
ut_ad(rec_offs_validate(rec, index, offsets));
3190
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3191
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3192
rec_offs_extra_size(offsets));
3194
externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3195
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
3197
/* Note that this will not take into account
3198
the BLOB columns of rec if create==TRUE. */
3199
ut_ad(data + rec_offs_data_size(offsets)
3200
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3201
- n_ext * BTR_EXTERN_FIELD_REF_SIZE
3202
< externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
3205
ulint blob_no = page_zip_get_n_prev_extern(
3206
page_zip, rec, index);
3207
byte* ext_end = externs - page_zip->n_blobs
3208
* BTR_EXTERN_FIELD_REF_SIZE;
3209
ut_ad(blob_no <= page_zip->n_blobs);
3210
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
3213
page_zip->n_blobs += n_ext;
3214
ASSERT_ZERO_BLOB(ext_end - n_ext
3215
* BTR_EXTERN_FIELD_REF_SIZE);
3216
memmove(ext_end - n_ext
3217
* BTR_EXTERN_FIELD_REF_SIZE,
3222
ut_a(blob_no + n_ext <= page_zip->n_blobs);
3225
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3228
if (UNIV_UNLIKELY(i == trx_id_col)) {
3229
ut_ad(!rec_offs_nth_extern(offsets,
3231
ut_ad(!rec_offs_nth_extern(offsets,
3233
/* Locate trx_id and roll_ptr. */
3234
src = rec_get_nth_field(rec, offsets,
3236
ut_ad(len == DATA_TRX_ID_LEN);
3237
ut_ad(src + DATA_TRX_ID_LEN
3238
== rec_get_nth_field(
3241
ut_ad(len == DATA_ROLL_PTR_LEN);
3243
/* Log the preceding fields. */
3244
ASSERT_ZERO(data, src - start);
3245
memcpy(data, start, src - start);
3246
data += src - start;
3247
start = src + (DATA_TRX_ID_LEN
3248
+ DATA_ROLL_PTR_LEN);
3250
/* Store trx_id and roll_ptr. */
3251
memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3253
src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3254
i++; /* skip also roll_ptr */
3255
} else if (rec_offs_nth_extern(offsets, i)) {
3256
src = rec_get_nth_field(rec, offsets,
3259
ut_ad(dict_index_is_clust(index));
3261
>= BTR_EXTERN_FIELD_REF_SIZE);
3262
src += len - BTR_EXTERN_FIELD_REF_SIZE;
3264
ASSERT_ZERO(data, src - start);
3265
memcpy(data, start, src - start);
3266
data += src - start;
3267
start = src + BTR_EXTERN_FIELD_REF_SIZE;
3269
/* Store the BLOB pointer. */
3270
externs -= BTR_EXTERN_FIELD_REF_SIZE;
3271
ut_ad(data < externs);
3272
memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
3276
/* Log the last bytes of the record. */
3277
len = rec_offs_data_size(offsets) - (start - rec);
3279
ASSERT_ZERO(data, len);
3280
memcpy(data, start, len);
3286
/**************************************************************************
3287
Write an entire record on the compressed page. The data must already
3288
have been written to the uncompressed page. */
3293
page_zip_des_t* page_zip,/* in/out: compressed page */
3294
const byte* rec, /* in: record being written */
3295
dict_index_t* index, /* in: the index the record belongs to */
3296
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
3297
ulint create) /* in: nonzero=insert, zero=update */
3305
ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3306
ut_ad(page_zip_simple_validate(page_zip));
3307
ut_ad(page_zip_get_size(page_zip)
3308
> PAGE_DATA + page_zip_dir_size(page_zip));
3309
ut_ad(rec_offs_comp(offsets));
3310
ut_ad(rec_offs_validate(rec, index, offsets));
3312
ut_ad(page_zip->m_start >= PAGE_DATA);
3314
page = page_align(rec);
3316
ut_ad(page_zip_header_cmp(page_zip, page));
3317
ut_ad(page_simple_validate_new((page_t*) page));
3319
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3320
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3321
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3322
rec_offs_extra_size(offsets));
3324
slot = page_zip_dir_find(page_zip, page_offset(rec));
3326
/* Copy the delete mark. */
3327
if (rec_get_deleted_flag(rec, TRUE)) {
3328
*slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
3330
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3333
ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
3334
ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
3335
- PAGE_DIR - PAGE_DIR_SLOT_SIZE
3336
* page_dir_get_n_slots(page));
3338
heap_no = rec_get_heap_no_new(rec);
3339
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
3340
ut_ad(heap_no < page_dir_get_n_heap(page));
3342
/* Append to the modification log. */
3343
data = page_zip->data + page_zip->m_end;
3346
/* Identify the record by writing its heap number - 1.
3347
0 is reserved to indicate the end of the modification log. */
3349
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3350
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3353
*data++ = (byte) ((heap_no - 1) << 1);
3357
const byte* start = rec - rec_offs_extra_size(offsets);
3358
const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
3360
/* Write the extra bytes backwards, so that
3361
rec_offs_extra_size() can be easily computed in
3362
page_zip_apply_log() by invoking
3363
rec_get_offsets_reverse(). */
3365
while (b != start) {
3371
/* Write the data bytes. Store the uncompressed bytes separately. */
3372
storage = page_zip->data + page_zip_get_size(page_zip)
3373
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3374
* PAGE_ZIP_DIR_SLOT_SIZE;
3376
if (page_is_leaf(page)) {
3379
if (dict_index_is_clust(index)) {
3382
trx_id_col = dict_index_get_sys_col_pos(index,
3384
ut_ad(trx_id_col != ULINT_UNDEFINED);
3386
/* Store separately trx_id, roll_ptr and
3387
the BTR_EXTERN_FIELD_REF of each BLOB column. */
3388
if (rec_offs_any_extern(offsets)) {
3389
data = page_zip_write_rec_ext(
3391
rec, index, offsets, create,
3392
trx_id_col, heap_no, storage, data);
3394
/* Locate trx_id and roll_ptr. */
3396
= rec_get_nth_field(rec, offsets,
3398
ut_ad(len == DATA_TRX_ID_LEN);
3399
ut_ad(src + DATA_TRX_ID_LEN
3400
== rec_get_nth_field(
3402
trx_id_col + 1, &len));
3403
ut_ad(len == DATA_ROLL_PTR_LEN);
3405
/* Log the preceding fields. */
3406
ASSERT_ZERO(data, src - rec);
3407
memcpy(data, rec, src - rec);
3410
/* Store trx_id and roll_ptr. */
3412
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3415
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3417
src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
3419
/* Log the last bytes of the record. */
3420
len = rec_offs_data_size(offsets)
3423
ASSERT_ZERO(data, len);
3424
memcpy(data, src, len);
3428
/* Leaf page of a secondary index:
3429
no externally stored columns */
3430
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
3431
== ULINT_UNDEFINED);
3432
ut_ad(!rec_offs_any_extern(offsets));
3434
/* Log the entire record. */
3435
len = rec_offs_data_size(offsets);
3437
ASSERT_ZERO(data, len);
3438
memcpy(data, rec, len);
3442
/* This is a node pointer page. */
3445
/* Non-leaf nodes should not have any externally
3447
ut_ad(!rec_offs_any_extern(offsets));
3449
/* Copy the data bytes, except node_ptr. */
3450
len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
3451
ut_ad(data + len < storage - REC_NODE_PTR_SIZE
3452
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
3453
ASSERT_ZERO(data, len);
3454
memcpy(data, rec, len);
3457
/* Copy the node pointer to the uncompressed area. */
3458
memcpy(storage - REC_NODE_PTR_SIZE
3465
ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
3466
page_zip->m_end = data - page_zip->data;
3467
page_zip->m_nonempty = TRUE;
3469
#ifdef UNIV_ZIP_DEBUG
3470
ut_a(page_zip_validate(page_zip, page_align(rec)));
3471
#endif /* UNIV_ZIP_DEBUG */
3474
/***************************************************************
3475
Parses a log record of writing a BLOB pointer of a record. */
3478
page_zip_parse_write_blob_ptr(
3479
/*==========================*/
3480
/* out: end of log record or NULL */
3481
byte* ptr, /* in: redo log buffer */
3482
byte* end_ptr,/* in: redo log buffer end */
3483
page_t* page, /* in/out: uncompressed page */
3484
page_zip_des_t* page_zip)/* in/out: compressed page */
3489
ut_ad(!page == !page_zip);
3492
(end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
3497
offset = mach_read_from_2(ptr);
3498
z_offset = mach_read_from_2(ptr + 2);
3500
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3501
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3502
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3504
recv_sys->found_corrupt_log = TRUE;
3510
if (UNIV_UNLIKELY(!page_zip)
3511
|| UNIV_UNLIKELY(!page_is_leaf(page))) {
3516
#ifdef UNIV_ZIP_DEBUG
3517
ut_a(page_zip_validate(page_zip, page));
3518
#endif /* UNIV_ZIP_DEBUG */
3520
memcpy(page + offset,
3521
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3522
memcpy(page_zip->data + z_offset,
3523
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3525
#ifdef UNIV_ZIP_DEBUG
3526
ut_a(page_zip_validate(page_zip, page));
3527
#endif /* UNIV_ZIP_DEBUG */
3530
return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
3533
/**************************************************************************
3534
Write a BLOB pointer of a record on the leaf page of a clustered index.
3535
The information must already have been updated on the uncompressed page. */
3538
page_zip_write_blob_ptr(
3539
/*====================*/
3540
page_zip_des_t* page_zip,/* in/out: compressed page */
3541
const byte* rec, /* in/out: record whose data is being
3543
dict_index_t* index, /* in: index of the page */
3544
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
3545
ulint n, /* in: column index */
3546
mtr_t* mtr) /* in: mini-transaction handle,
3547
or NULL if no logging is needed */
3551
const page_t* page = page_align(rec);
3555
ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3556
ut_ad(page_simple_validate_new((page_t*) page));
3557
ut_ad(page_zip_simple_validate(page_zip));
3558
ut_ad(page_zip_get_size(page_zip)
3559
> PAGE_DATA + page_zip_dir_size(page_zip));
3560
ut_ad(rec_offs_comp(offsets));
3561
ut_ad(rec_offs_validate(rec, NULL, offsets));
3562
ut_ad(rec_offs_any_extern(offsets));
3563
ut_ad(rec_offs_nth_extern(offsets, n));
3565
ut_ad(page_zip->m_start >= PAGE_DATA);
3566
ut_ad(page_zip_header_cmp(page_zip, page));
3568
ut_ad(page_is_leaf(page));
3569
ut_ad(dict_index_is_clust(index));
3571
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3572
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3573
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3574
rec_offs_extra_size(offsets));
3576
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
3577
+ rec_get_n_extern_new(rec, index, n);
3578
ut_a(blob_no < page_zip->n_blobs);
3580
externs = page_zip->data + page_zip_get_size(page_zip)
3581
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3582
* (PAGE_ZIP_DIR_SLOT_SIZE
3583
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3585
field = rec_get_nth_field(rec, offsets, n, &len);
3587
externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
3588
field += len - BTR_EXTERN_FIELD_REF_SIZE;
3590
memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
3592
#ifdef UNIV_ZIP_DEBUG
3593
ut_a(page_zip_validate(page_zip, page));
3594
#endif /* UNIV_ZIP_DEBUG */
3597
byte* log_ptr = mlog_open(
3598
mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
3599
if (UNIV_UNLIKELY(!log_ptr)) {
3603
log_ptr = mlog_write_initial_log_record_fast(
3604
(byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
3605
mach_write_to_2(log_ptr, page_offset(field));
3607
mach_write_to_2(log_ptr, externs - page_zip->data);
3609
memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
3610
log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
3611
mlog_close(mtr, log_ptr);
3615
/***************************************************************
3616
Parses a log record of writing the node pointer of a record. */
3619
page_zip_parse_write_node_ptr(
3620
/*==========================*/
3621
/* out: end of log record or NULL */
3622
byte* ptr, /* in: redo log buffer */
3623
byte* end_ptr,/* in: redo log buffer end */
3624
page_t* page, /* in/out: uncompressed page */
3625
page_zip_des_t* page_zip)/* in/out: compressed page */
3630
ut_ad(!page == !page_zip);
3632
if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
3637
offset = mach_read_from_2(ptr);
3638
z_offset = mach_read_from_2(ptr + 2);
3640
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3641
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3642
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3644
recv_sys->found_corrupt_log = TRUE;
3655
if (UNIV_UNLIKELY(!page_zip)
3656
|| UNIV_UNLIKELY(page_is_leaf(page))) {
3661
#ifdef UNIV_ZIP_DEBUG
3662
ut_a(page_zip_validate(page_zip, page));
3663
#endif /* UNIV_ZIP_DEBUG */
3665
field = page + offset;
3666
storage = page_zip->data + z_offset;
3668
storage_end = page_zip->data + page_zip_get_size(page_zip)
3669
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3670
* PAGE_ZIP_DIR_SLOT_SIZE;
3672
heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
3674
if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
3675
|| UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
3676
|| UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
3681
memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
3682
memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
3684
#ifdef UNIV_ZIP_DEBUG
3685
ut_a(page_zip_validate(page_zip, page));
3686
#endif /* UNIV_ZIP_DEBUG */
3689
return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
3692
/**************************************************************************
3693
Write the node pointer of a record on a non-leaf compressed page. */
3696
page_zip_write_node_ptr(
3697
/*====================*/
3698
page_zip_des_t* page_zip,/* in/out: compressed page */
3699
byte* rec, /* in/out: record */
3700
ulint size, /* in: data size of rec */
3701
ulint ptr, /* in: node pointer */
3702
mtr_t* mtr) /* in: mini-transaction, or NULL */
3706
page_t* page = page_align(rec);
3708
ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3709
ut_ad(page_simple_validate_new(page));
3710
ut_ad(page_zip_simple_validate(page_zip));
3711
ut_ad(page_zip_get_size(page_zip)
3712
> PAGE_DATA + page_zip_dir_size(page_zip));
3713
ut_ad(page_rec_is_comp(rec));
3715
ut_ad(page_zip->m_start >= PAGE_DATA);
3716
ut_ad(page_zip_header_cmp(page_zip, page));
3718
ut_ad(!page_is_leaf(page));
3720
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3721
UNIV_MEM_ASSERT_RW(rec, size);
3723
storage = page_zip->data + page_zip_get_size(page_zip)
3724
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3725
* PAGE_ZIP_DIR_SLOT_SIZE
3726
- (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
3727
field = rec + size - REC_NODE_PTR_SIZE;
3729
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3730
ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
3731
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3732
#if REC_NODE_PTR_SIZE != 4
3733
# error "REC_NODE_PTR_SIZE != 4"
3735
mach_write_to_4(field, ptr);
3736
memcpy(storage, field, REC_NODE_PTR_SIZE);
3739
byte* log_ptr = mlog_open(mtr,
3740
11 + 2 + 2 + REC_NODE_PTR_SIZE);
3741
if (UNIV_UNLIKELY(!log_ptr)) {
3745
log_ptr = mlog_write_initial_log_record_fast(
3746
field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
3747
mach_write_to_2(log_ptr, page_offset(field));
3749
mach_write_to_2(log_ptr, storage - page_zip->data);
3751
memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
3752
log_ptr += REC_NODE_PTR_SIZE;
3753
mlog_close(mtr, log_ptr);
3757
/**************************************************************************
3758
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
3761
page_zip_write_trx_id_and_roll_ptr(
3762
/*===============================*/
3763
page_zip_des_t* page_zip,/* in/out: compressed page */
3764
byte* rec, /* in/out: record */
3765
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
3766
ulint trx_id_col,/* in: column number of TRX_ID in rec */
3767
dulint trx_id, /* in: transaction identifier */
3768
dulint roll_ptr)/* in: roll_ptr */
3772
page_t* page = page_align(rec);
3775
ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3776
ut_ad(page_simple_validate_new(page));
3777
ut_ad(page_zip_simple_validate(page_zip));
3778
ut_ad(page_zip_get_size(page_zip)
3779
> PAGE_DATA + page_zip_dir_size(page_zip));
3780
ut_ad(rec_offs_validate(rec, NULL, offsets));
3781
ut_ad(rec_offs_comp(offsets));
3783
ut_ad(page_zip->m_start >= PAGE_DATA);
3784
ut_ad(page_zip_header_cmp(page_zip, page));
3786
ut_ad(page_is_leaf(page));
3788
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3790
storage = page_zip->data + page_zip_get_size(page_zip)
3791
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3792
* PAGE_ZIP_DIR_SLOT_SIZE
3793
- (rec_get_heap_no_new(rec) - 1)
3794
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3796
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
3797
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
3799
field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
3800
ut_ad(len == DATA_TRX_ID_LEN);
3801
ut_ad(field + DATA_TRX_ID_LEN
3802
== rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
3803
ut_ad(len == DATA_ROLL_PTR_LEN);
3804
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3805
ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
3806
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3807
#if DATA_TRX_ID_LEN != 6
3808
# error "DATA_TRX_ID_LEN != 6"
3810
mach_write_to_6(field, trx_id);
3811
#if DATA_ROLL_PTR_LEN != 7
3812
# error "DATA_ROLL_PTR_LEN != 7"
3814
mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
3815
memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3817
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3818
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3819
rec_offs_extra_size(offsets));
3820
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3823
#ifdef UNIV_ZIP_DEBUG
3824
/* Set this variable in a debugger to disable page_zip_clear_rec().
3825
The only observable effect should be the compression ratio due to
3826
deleted records not being zeroed out. In rare cases, there can be
3827
page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
3828
columns if the space is reallocated for a smaller record. */
3829
UNIV_INTERN ibool page_zip_clear_rec_disable;
3830
#endif /* UNIV_ZIP_DEBUG */
3832
/**************************************************************************
3833
Clear an area on the uncompressed and compressed page, if possible. */
3838
page_zip_des_t* page_zip,/* in/out: compressed page */
3839
byte* rec, /* in: record to clear */
3840
dict_index_t* index, /* in: index of rec */
3841
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
3844
page_t* page = page_align(rec);
3845
/* page_zip_validate() would fail here if a record
3846
containing externally stored columns is being deleted. */
3847
ut_ad(rec_offs_validate(rec, index, offsets));
3848
ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
3849
ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
3850
ut_ad(page_zip_header_cmp(page_zip, page));
3852
heap_no = rec_get_heap_no_new(rec);
3853
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
3855
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3856
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3857
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3858
rec_offs_extra_size(offsets));
3861
#ifdef UNIV_ZIP_DEBUG
3862
!page_zip_clear_rec_disable &&
3863
#endif /* UNIV_ZIP_DEBUG */
3865
+ 1 + ((heap_no - 1) >= 64)/* size of the log entry */
3866
+ page_zip_get_trailer_len(page_zip,
3867
dict_index_is_clust(index), NULL)
3868
< page_zip_get_size(page_zip)) {
3871
/* Clear only the data bytes, because the allocator and
3872
the decompressor depend on the extra bytes. */
3873
memset(rec, 0, rec_offs_data_size(offsets));
3875
if (!page_is_leaf(page)) {
3876
/* Clear node_ptr on the compressed page. */
3877
byte* storage = page_zip->data
3878
+ page_zip_get_size(page_zip)
3879
- (page_dir_get_n_heap(page)
3880
- PAGE_HEAP_NO_USER_LOW)
3881
* PAGE_ZIP_DIR_SLOT_SIZE;
3883
memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
3884
0, REC_NODE_PTR_SIZE);
3885
} else if (dict_index_is_clust(index)) {
3886
/* Clear trx_id and roll_ptr on the compressed page. */
3887
byte* storage = page_zip->data
3888
+ page_zip_get_size(page_zip)
3889
- (page_dir_get_n_heap(page)
3890
- PAGE_HEAP_NO_USER_LOW)
3891
* PAGE_ZIP_DIR_SLOT_SIZE;
3893
memset(storage - (heap_no - 1)
3894
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
3895
0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3898
/* Log that the data was zeroed out. */
3899
data = page_zip->data + page_zip->m_end;
3901
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3902
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3905
*data++ = (byte) ((heap_no - 1) << 1 | 1);
3907
ut_ad((ulint) (data - page_zip->data)
3908
< page_zip_get_size(page_zip));
3909
page_zip->m_end = data - page_zip->data;
3910
page_zip->m_nonempty = TRUE;
3911
} else if (page_is_leaf(page) && dict_index_is_clust(index)) {
3912
/* Do not clear the record, because there is not enough space
3913
to log the operation. */
3915
if (rec_offs_any_extern(offsets)) {
3918
for (i = rec_offs_n_fields(offsets); i--; ) {
3919
/* Clear all BLOB pointers in order to make
3920
page_zip_validate() pass. */
3921
if (rec_offs_nth_extern(offsets, i)) {
3923
byte* field = rec_get_nth_field(
3924
rec, offsets, i, &len);
3926
- BTR_EXTERN_FIELD_REF_SIZE,
3927
0, BTR_EXTERN_FIELD_REF_SIZE);
3933
#ifdef UNIV_ZIP_DEBUG
3934
ut_a(page_zip_validate(page_zip, page));
3935
#endif /* UNIV_ZIP_DEBUG */
3938
/**************************************************************************
3939
Write the "deleted" flag of a record on a compressed page. The flag must
3940
already have been written on the uncompressed page. */
3943
page_zip_rec_set_deleted(
3944
/*=====================*/
3945
page_zip_des_t* page_zip,/* in/out: compressed page */
3946
const byte* rec, /* in: record on the uncompressed page */
3947
ulint flag) /* in: the deleted flag (nonzero=TRUE) */
3949
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
3951
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3953
*slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
3955
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3957
#ifdef UNIV_ZIP_DEBUG
3958
ut_a(page_zip_validate(page_zip, page_align(rec)));
3959
#endif /* UNIV_ZIP_DEBUG */
3962
/**************************************************************************
3963
Write the "owned" flag of a record on a compressed page. The n_owned field
3964
must already have been written on the uncompressed page. */
3967
page_zip_rec_set_owned(
3968
/*===================*/
3969
page_zip_des_t* page_zip,/* in/out: compressed page */
3970
const byte* rec, /* in: record on the uncompressed page */
3971
ulint flag) /* in: the owned flag (nonzero=TRUE) */
3973
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
3975
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3977
*slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
3979
*slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
3983
/**************************************************************************
3984
Insert a record to the dense page directory. */
3987
page_zip_dir_insert(
3988
/*================*/
3989
page_zip_des_t* page_zip,/* in/out: compressed page */
3990
const byte* prev_rec,/* in: record after which to insert */
3991
const byte* free_rec,/* in: record from which rec was
3992
allocated, or NULL */
3993
byte* rec) /* in: record to insert */
3999
ut_ad(prev_rec != rec);
4000
ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
4001
ut_ad(page_zip_simple_validate(page_zip));
4003
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4005
if (page_rec_is_infimum(prev_rec)) {
4006
/* Use the first slot. */
4007
slot_rec = page_zip->data + page_zip_get_size(page_zip);
4009
byte* end = page_zip->data + page_zip_get_size(page_zip);
4010
byte* start = end - page_zip_dir_user_size(page_zip);
4012
if (UNIV_LIKELY(!free_rec)) {
4013
/* PAGE_N_RECS was already incremented
4014
in page_cur_insert_rec_zip(), but the
4015
dense directory slot at that position
4016
contains garbage. Skip it. */
4017
start += PAGE_ZIP_DIR_SLOT_SIZE;
4020
slot_rec = page_zip_dir_find_low(start, end,
4021
page_offset(prev_rec));
4025
/* Read the old n_dense (n_heap may have been incremented). */
4026
n_dense = page_dir_get_n_heap(page_zip->data)
4027
- (PAGE_HEAP_NO_USER_LOW + 1);
4029
if (UNIV_LIKELY_NULL(free_rec)) {
4030
/* The record was allocated from the free list.
4031
Shift the dense directory only up to that slot.
4032
Note that in this case, n_dense is actually
4033
off by one, because page_cur_insert_rec_zip()
4034
did not increment n_heap. */
4035
ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
4036
+ PAGE_HEAP_NO_USER_LOW);
4037
ut_ad(rec >= free_rec);
4038
slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
4040
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4042
/* The record was allocated from the heap.
4043
Shift the entire dense directory. */
4044
ut_ad(rec_get_heap_no_new(rec) == n_dense
4045
+ PAGE_HEAP_NO_USER_LOW);
4047
/* Shift to the end of the dense page directory. */
4048
slot_free = page_zip->data + page_zip_get_size(page_zip)
4049
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4052
/* Shift the dense directory to allocate place for rec. */
4053
memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
4054
slot_rec - slot_free);
4056
/* Write the entry for the inserted record.
4057
The "owned" and "deleted" flags must be zero. */
4058
mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
4061
/**************************************************************************
4062
Shift the dense page directory and the array of BLOB pointers
4063
when a record is deleted. */
4066
page_zip_dir_delete(
4067
/*================*/
4068
page_zip_des_t* page_zip,/* in/out: compressed page */
4069
byte* rec, /* in: record to delete */
4070
dict_index_t* index, /* in: index of rec */
4071
const ulint* offsets,/* in: rec_get_offsets(rec) */
4072
const byte* free) /* in: previous start of the free list */
4077
page_t* page = page_align(rec);
4079
ut_ad(rec_offs_validate(rec, index, offsets));
4080
ut_ad(rec_offs_comp(offsets));
4082
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4083
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4084
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4085
rec_offs_extra_size(offsets));
4087
slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
4091
/* This could not be done before page_zip_dir_find(). */
4092
page_header_set_field(page, page_zip, PAGE_N_RECS,
4093
(ulint)(page_get_n_recs(page) - 1));
4095
if (UNIV_UNLIKELY(!free)) {
4096
/* Make the last slot the start of the free list. */
4097
slot_free = page_zip->data + page_zip_get_size(page_zip)
4098
- PAGE_ZIP_DIR_SLOT_SIZE
4099
* (page_dir_get_n_heap(page_zip->data)
4100
- PAGE_HEAP_NO_USER_LOW);
4102
slot_free = page_zip_dir_find_free(page_zip,
4104
ut_a(slot_free < slot_rec);
4105
/* Grow the free list by one slot by moving the start. */
4106
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4109
if (UNIV_LIKELY(slot_rec > slot_free)) {
4110
memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
4112
slot_rec - slot_free);
4115
/* Write the entry for the deleted record.
4116
The "owned" and "deleted" flags will be cleared. */
4117
mach_write_to_2(slot_free, page_offset(rec));
4119
if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
4120
ut_ad(!rec_offs_any_extern(offsets));
4124
n_ext = rec_offs_n_extern(offsets);
4125
if (UNIV_UNLIKELY(n_ext)) {
4126
/* Shift and zero fill the array of BLOB pointers. */
4131
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
4132
ut_a(blob_no + n_ext <= page_zip->n_blobs);
4134
externs = page_zip->data + page_zip_get_size(page_zip)
4135
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
4136
* (PAGE_ZIP_DIR_SLOT_SIZE
4137
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4139
ext_end = externs - page_zip->n_blobs
4140
* BTR_EXTERN_FIELD_REF_SIZE;
4141
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
4143
page_zip->n_blobs -= n_ext;
4144
/* Shift and zero fill the array. */
4145
memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
4146
(page_zip->n_blobs - blob_no)
4147
* BTR_EXTERN_FIELD_REF_SIZE);
4148
memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
4152
/* The compression algorithm expects info_bits and n_owned
4153
to be 0 for deleted records. */
4154
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
4156
page_zip_clear_rec(page_zip, rec, index, offsets);
4159
/**************************************************************************
4160
Add a slot to the dense page directory. */
4163
page_zip_dir_add_slot(
4164
/*==================*/
4165
page_zip_des_t* page_zip, /* in/out: compressed page */
4166
ulint is_clustered) /* in: nonzero for clustered index,
4173
ut_ad(page_is_comp(page_zip->data));
4174
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4176
/* Read the old n_dense (n_heap has already been incremented). */
4177
n_dense = page_dir_get_n_heap(page_zip->data)
4178
- (PAGE_HEAP_NO_USER_LOW + 1);
4180
dir = page_zip->data + page_zip_get_size(page_zip)
4181
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4183
if (!page_is_leaf(page_zip->data)) {
4184
ut_ad(!page_zip->n_blobs);
4185
stored = dir - n_dense * REC_NODE_PTR_SIZE;
4186
} else if (UNIV_UNLIKELY(is_clustered)) {
4187
/* Move the BLOB pointer array backwards to make space for the
4188
roll_ptr and trx_id columns and the dense directory slot. */
4191
stored = dir - n_dense
4192
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4194
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4196
- (PAGE_ZIP_DIR_SLOT_SIZE
4197
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4198
PAGE_ZIP_DIR_SLOT_SIZE
4199
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4200
memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
4201
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4202
externs, stored - externs);
4205
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4206
ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
4207
PAGE_ZIP_DIR_SLOT_SIZE);
4210
/* Move the uncompressed area backwards to make space
4211
for one directory slot. */
4212
memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
4215
/***************************************************************
4216
Parses a log record of writing to the header of a page. */
4219
page_zip_parse_write_header(
4220
/*========================*/
4221
/* out: end of log record or NULL */
4222
byte* ptr, /* in: redo log buffer */
4223
byte* end_ptr,/* in: redo log buffer end */
4224
page_t* page, /* in/out: uncompressed page */
4225
page_zip_des_t* page_zip)/* in/out: compressed page */
4230
ut_ad(ptr && end_ptr);
4231
ut_ad(!page == !page_zip);
4233
if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
4238
offset = (ulint) *ptr++;
4239
len = (ulint) *ptr++;
4241
if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
4243
recv_sys->found_corrupt_log = TRUE;
4248
if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
4254
if (UNIV_UNLIKELY(!page_zip)) {
4258
#ifdef UNIV_ZIP_DEBUG
4259
ut_a(page_zip_validate(page_zip, page));
4260
#endif /* UNIV_ZIP_DEBUG */
4262
memcpy(page + offset, ptr, len);
4263
memcpy(page_zip->data + offset, ptr, len);
4265
#ifdef UNIV_ZIP_DEBUG
4266
ut_a(page_zip_validate(page_zip, page));
4267
#endif /* UNIV_ZIP_DEBUG */
4273
/**************************************************************************
4274
Write a log record of writing to the uncompressed header portion of a page. */
4277
page_zip_write_header_log(
4278
/*======================*/
4279
const byte* data, /* in: data on the uncompressed page */
4280
ulint length, /* in: length of the data */
4281
mtr_t* mtr) /* in: mini-transaction */
4283
byte* log_ptr = mlog_open(mtr, 11 + 1 + 1);
4284
ulint offset = page_offset(data);
4286
ut_ad(offset < PAGE_DATA);
4287
ut_ad(offset + length < PAGE_DATA);
4289
# error "PAGE_DATA > 255"
4291
ut_ad(length < 256);
4293
/* If no logging is requested, we may return now */
4294
if (UNIV_UNLIKELY(!log_ptr)) {
4299
log_ptr = mlog_write_initial_log_record_fast(
4300
(byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
4301
*log_ptr++ = (byte) offset;
4302
*log_ptr++ = (byte) length;
4303
mlog_close(mtr, log_ptr);
4305
mlog_catenate_string(mtr, data, length);
4308
/**************************************************************************
4309
Reorganize and compress a page. This is a low-level operation for
4310
compressed pages, to be used when page_zip_compress() fails.
4311
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
4312
The function btr_page_reorganize() should be preferred whenever possible.
4313
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
4314
non-clustered index, the caller must update the insert buffer free
4315
bits in the same mini-transaction in such a way that the modification
4316
will be redo-logged. */
4319
page_zip_reorganize(
4320
/*================*/
4321
/* out: TRUE on success, FALSE on failure;
4322
page and page_zip will be left intact
4324
buf_block_t* block, /* in/out: page with compressed page;
4325
on the compressed page, in: size;
4327
m_start, m_end, m_nonempty */
4328
dict_index_t* index, /* in: index of the B-tree node */
4329
mtr_t* mtr) /* in: mini-transaction */
4331
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
4332
page_t* page = buf_block_get_frame(block);
4333
buf_block_t* temp_block;
4337
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4338
ut_ad(page_is_comp(page));
4339
/* Note that page_zip_validate(page_zip, page) may fail here. */
4340
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
4341
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4343
/* Disable logging */
4344
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
4346
temp_block = buf_block_alloc(0);
4347
temp_page = temp_block->frame;
4349
btr_search_drop_page_hash_index(block);
4351
/* Copy the old page to temporary space */
4352
buf_frame_copy(temp_page, page);
4354
/* Recreate the page: note that global data on page (possible
4355
segment headers, next page-field, etc.) is preserved intact */
4357
page_create(block, mtr, TRUE);
4358
block->check_index_page_at_flush = TRUE;
4360
/* Copy the records from the temporary space to the recreated page;
4361
do not copy the lock bits yet */
4363
page_copy_rec_list_end_no_locks(block, temp_block,
4364
page_get_infimum_rec(temp_page),
4366
/* Copy max trx id to recreated page */
4367
page_set_max_trx_id(block, NULL, page_get_max_trx_id(temp_page));
4369
/* Restore logging. */
4370
mtr_set_log_mode(mtr, log_mode);
4372
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
4374
/* Restore the old page and exit. */
4375
buf_frame_copy(page, temp_page);
4377
buf_block_free(temp_block);
4381
lock_move_reorganize_page(block, temp_block);
4383
buf_block_free(temp_block);
4387
/**************************************************************************
4388
Copy the records of a page byte for byte. Do not copy the page header
4389
or trailer, except those B-tree header fields that are directly
4390
related to the storage of records. Also copy PAGE_MAX_TRX_ID.
4391
NOTE: The caller must update the lock table and the adaptive hash index. */
4396
page_zip_des_t* page_zip, /* out: copy of src_zip
4397
(n_blobs, m_start, m_end,
4398
m_nonempty, data[0..size-1]) */
4399
page_t* page, /* out: copy of src */
4400
const page_zip_des_t* src_zip, /* in: compressed page */
4401
const page_t* src, /* in: page */
4402
dict_index_t* index, /* in: index of the B-tree */
4403
mtr_t* mtr) /* in: mini-transaction */
4405
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
4406
ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
4407
#ifdef UNIV_ZIP_DEBUG
4408
/* The B-tree operations that call this function may set
4409
FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
4410
mismatch. A strict page_zip_validate() will be executed later
4411
during the B-tree operations. */
4412
ut_a(page_zip_validate_low(src_zip, src, TRUE));
4413
#endif /* UNIV_ZIP_DEBUG */
4414
ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
4415
if (UNIV_UNLIKELY(src_zip->n_blobs)) {
4416
ut_a(page_is_leaf(src));
4417
ut_a(dict_index_is_clust(index));
4420
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
4421
UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
4422
UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
4423
UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
4425
/* Copy those B-tree page header fields that are related to
4426
the records stored in the page. Also copy the field
4427
PAGE_MAX_TRX_ID. Skip the rest of the page header and
4428
trailer. On the compressed page, there is no trailer. */
4429
#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
4430
# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
4432
memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
4433
PAGE_HEADER_PRIV_END);
4434
memcpy(PAGE_DATA + page, PAGE_DATA + src,
4435
UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
4436
memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
4437
PAGE_HEADER_PRIV_END);
4438
memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
4439
page_zip_get_size(page_zip) - PAGE_DATA);
4441
/* Copy all fields of src_zip to page_zip, except the pointer
4442
to the compressed data page. */
4444
page_zip_t* data = page_zip->data;
4445
memcpy(page_zip, src_zip, sizeof *page_zip);
4446
page_zip->data = data;
4448
ut_ad(page_zip_get_trailer_len(page_zip,
4449
dict_index_is_clust(index), NULL)
4450
+ page_zip->m_end < page_zip_get_size(page_zip));
4452
if (!page_is_leaf(src)
4453
&& UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
4454
&& UNIV_LIKELY(mach_read_from_4(page
4455
+ FIL_PAGE_PREV) != FIL_NULL)) {
4456
/* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
4457
ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
4459
if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
4460
rec_t* rec = page + offs;
4461
ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
4462
& REC_INFO_MIN_REC_FLAG);
4463
rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
4467
#ifdef UNIV_ZIP_DEBUG
4468
ut_a(page_zip_validate(page_zip, page));
4469
#endif /* UNIV_ZIP_DEBUG */
4471
page_zip_compress_write_log(page_zip, page, index, mtr);
4474
/**************************************************************************
4475
Parses a log record of compressing an index page. */
4478
page_zip_parse_compress(
4479
/*====================*/
4480
/* out: end of log record or NULL */
4481
byte* ptr, /* in: buffer */
4482
byte* end_ptr,/* in: buffer end */
4483
page_t* page, /* out: uncompressed page */
4484
page_zip_des_t* page_zip)/* out: compressed page */
4489
ut_ad(ptr && end_ptr);
4490
ut_ad(!page == !page_zip);
4492
if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
4497
size = mach_read_from_2(ptr);
4499
trailer_size = mach_read_from_2(ptr);
4502
if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
4508
if (UNIV_UNLIKELY(!page_zip)
4509
|| UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
4511
recv_sys->found_corrupt_log = TRUE;
4516
memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
4517
memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
4518
memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
4519
memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
4520
page_zip_get_size(page_zip) - trailer_size
4521
- (FIL_PAGE_TYPE + size));
4522
memcpy(page_zip->data + page_zip_get_size(page_zip)
4523
- trailer_size, ptr + 8 + size, trailer_size);
4525
if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page))) {
4531
return(ptr + 8 + size + trailer_size);
4534
/**************************************************************************
4535
Calculate the compressed page checksum. */
4538
page_zip_calc_checksum(
4539
/*===================*/
4540
/* out: page checksum */
4541
const void* data, /* in: compressed page */
4542
ulint size) /* in: size of compressed page */
4544
/* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
4545
and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
4547
const Bytef* s = data;
4550
ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4552
adler = adler32(0L, s + FIL_PAGE_OFFSET,
4553
FIL_PAGE_LSN - FIL_PAGE_OFFSET);
4554
adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
4555
adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
4556
size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4558
return((ulint) adler);