1
/*****************************************************************************
3
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
19
/******************************************************
20
Compressed page interface
22
Created June 2005 by Marko Makela
23
*******************************************************/
28
# include "page0zip.ic"
31
#include "page0page.h"
34
#include "dict0boot.h"
35
#include "dict0dict.h"
38
#include "page0types.h"
39
#include "lock0lock.h"
44
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
45
UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1];
47
/* Please refer to ../include/page0zip.ic for a description of the
48
compressed page format. */
50
/* The infimum and supremum records are omitted from the compressed page.
51
On compress, we compare that the records are there, and on uncompress we
52
restore the records. */
53
static const byte infimum_extra[] = {
54
0x01, /* info_bits=0, n_owned=1 */
55
0x00, 0x02 /* heap_no=0, status=2 */
56
/* ?, ? */ /* next=(first user rec, or supremum) */
58
static const byte infimum_data[] = {
59
0x69, 0x6e, 0x66, 0x69,
60
0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
62
static const byte supremum_extra_data[] = {
63
/* 0x0?, */ /* info_bits=0, n_owned=1..8 */
64
0x00, 0x0b, /* heap_no=1, status=3 */
65
0x00, 0x00, /* next=0 */
66
0x73, 0x75, 0x70, 0x72,
67
0x65, 0x6d, 0x75, 0x6d /* "supremum" */
70
/** Assert that a block of memory is filled with zero bytes.
71
Compare at most sizeof(field_ref_zero) bytes. */
72
#define ASSERT_ZERO(b, s) \
73
ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
74
/** Assert that a BLOB pointer is filled with zero bytes. */
75
#define ASSERT_ZERO_BLOB(b) \
76
ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
78
/* Enable some extra debugging output. This code can be enabled
79
independently of any UNIV_ debugging conditions. */
80
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
82
__attribute__((format (printf, 1, 2)))
83
/**************************************************************************
84
Report a failure to decompress or compress. */
89
/* out: number of characters printed */
90
const char* fmt, /* in: printf(3) format string */
91
...) /* in: arguments corresponding to fmt */
96
ut_print_timestamp(stderr);
97
fputs(" InnoDB: ", stderr);
99
res = vfprintf(stderr, fmt, ap);
104
# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
105
#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
106
# define page_zip_fail(fmt_args) /* empty */
107
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
109
/**************************************************************************
110
Determine the guaranteed free space on an empty page. */
115
/* out: minimum payload size on the page */
116
ulint n_fields, /* in: number of columns in the index */
117
ulint zip_size) /* in: compressed page size in bytes */
120
/* subtract the page header and the longest
121
uncompressed data needed for one record */
123
+ PAGE_ZIP_DIR_SLOT_SIZE
124
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
125
+ 1/* encoded heap_no==2 in page_zip_write_rec() */
126
+ 1/* end of modification log */
127
- REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
128
/* subtract the space for page_zip_fields_encode() */
129
- compressBound(2 * (n_fields + 1));
130
return(size > 0 ? (ulint) size : 0);
133
/*****************************************************************
134
Gets the size of the compressed page trailer (the dense page directory),
135
including deleted records (the free list). */
140
/* out: length of dense page
141
directory, in bytes */
142
const page_zip_des_t* page_zip) /* in: compressed page */
144
/* Exclude the page infimum and supremum from the record count. */
145
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
146
* (page_dir_get_n_heap(page_zip->data)
147
- PAGE_HEAP_NO_USER_LOW);
151
/*****************************************************************
152
Gets the size of the compressed page trailer (the dense page directory),
153
only including user records (excluding the free list). */
156
page_zip_dir_user_size(
157
/*===================*/
158
/* out: length of dense page
159
directory comprising existing
161
const page_zip_des_t* page_zip) /* in: compressed page */
163
ulint size = PAGE_ZIP_DIR_SLOT_SIZE
164
* page_get_n_recs(page_zip->data);
165
ut_ad(size <= page_zip_dir_size(page_zip));
169
/*****************************************************************
170
Find the slot of the given record in the dense page directory. */
173
page_zip_dir_find_low(
174
/*==================*/
175
/* out: dense directory slot,
176
or NULL if record not found */
177
byte* slot, /* in: start of records */
178
byte* end, /* in: end of records */
179
ulint offset) /* in: offset of user record */
183
for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
184
if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
193
/*****************************************************************
194
Find the slot of the given non-free record in the dense page directory. */
199
/* out: dense directory slot,
200
or NULL if record not found */
201
page_zip_des_t* page_zip, /* in: compressed page */
202
ulint offset) /* in: offset of user record */
204
byte* end = page_zip->data + page_zip_get_size(page_zip);
206
ut_ad(page_zip_simple_validate(page_zip));
208
return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
213
/*****************************************************************
214
Find the slot of the given free record in the dense page directory. */
217
page_zip_dir_find_free(
218
/*===================*/
219
/* out: dense directory slot,
220
or NULL if record not found */
221
page_zip_des_t* page_zip, /* in: compressed page */
222
ulint offset) /* in: offset of user record */
224
byte* end = page_zip->data + page_zip_get_size(page_zip);
226
ut_ad(page_zip_simple_validate(page_zip));
228
return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
229
end - page_zip_dir_user_size(page_zip),
233
/*****************************************************************
234
Read a given slot in the dense page directory. */
239
/* out: record offset
240
on the uncompressed page,
242
PAGE_ZIP_DIR_SLOT_DEL or
243
PAGE_ZIP_DIR_SLOT_OWNED */
244
const page_zip_des_t* page_zip, /* in: compressed page */
245
ulint slot) /* in: slot
246
(0=first user record) */
248
ut_ad(page_zip_simple_validate(page_zip));
249
ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
250
return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
251
- PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
254
/**************************************************************************
255
Write a log record of compressing an index page. */
258
page_zip_compress_write_log(
259
/*========================*/
260
const page_zip_des_t* page_zip,/* in: compressed page */
261
const page_t* page, /* in: uncompressed page */
262
dict_index_t* index, /* in: index of the B-tree node */
263
mtr_t* mtr) /* in: mini-transaction */
268
log_ptr = mlog_open(mtr, 11 + 2 + 2);
275
/* Read the number of user records. */
276
trailer_size = page_dir_get_n_heap(page_zip->data)
277
- PAGE_HEAP_NO_USER_LOW;
278
/* Multiply by uncompressed of size stored per record */
279
if (!page_is_leaf(page)) {
280
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
281
} else if (dict_index_is_clust(index)) {
282
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
283
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
285
trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
287
/* Add the space occupied by BLOB pointers. */
288
trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
289
ut_a(page_zip->m_end > PAGE_DATA);
290
#if FIL_PAGE_DATA > PAGE_DATA
291
# error "FIL_PAGE_DATA > PAGE_DATA"
293
ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
295
log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
296
MLOG_ZIP_PAGE_COMPRESS,
298
mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
300
mach_write_to_2(log_ptr, trailer_size);
302
mlog_close(mtr, log_ptr);
304
/* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
305
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
306
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
307
/* Write most of the page header, the compressed stream and
308
the modification log. */
309
mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
310
page_zip->m_end - FIL_PAGE_TYPE);
311
/* Write the uncompressed trailer of the compressed page. */
312
mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
313
- trailer_size, trailer_size);
316
/**********************************************************
317
Determine how many externally stored columns are contained
318
in existing records with smaller heap_no than rec. */
321
page_zip_get_n_prev_extern(
322
/*=======================*/
323
const page_zip_des_t* page_zip,/* in: dense page directory on
325
const rec_t* rec, /* in: compact physical record
326
on a B-tree leaf page */
327
dict_index_t* index) /* in: record descriptor */
329
const page_t* page = page_align(rec);
334
ulint n_recs = page_get_n_recs(page_zip->data);
336
ut_ad(page_is_leaf(page));
337
ut_ad(page_is_comp(page));
338
ut_ad(dict_table_is_comp(index->table));
339
ut_ad(dict_index_is_clust(index));
341
heap_no = rec_get_heap_no_new(rec);
342
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
343
left = heap_no - PAGE_HEAP_NO_USER_LOW;
344
if (UNIV_UNLIKELY(!left)) {
348
for (i = 0; i < n_recs; i++) {
349
const rec_t* r = page + (page_zip_dir_get(page_zip, i)
350
& PAGE_ZIP_DIR_SLOT_MASK);
352
if (rec_get_heap_no_new(r) < heap_no) {
353
n_ext += rec_get_n_extern_new(r, index,
364
/**************************************************************************
365
Encode the length of a fixed-length column. */
368
page_zip_fixed_field_encode(
369
/*========================*/
370
/* out: buf + length of encoded val */
371
byte* buf, /* in: pointer to buffer where to write */
372
ulint val) /* in: value to write */
376
if (UNIV_LIKELY(val < 126)) {
378
0 = nullable variable field of at most 255 bytes length;
379
1 = not null variable field of at most 255 bytes length;
380
126 = nullable variable field with maximum length >255;
381
127 = not null variable field with maximum length >255
385
*buf++ = (byte) (0x80 | val >> 8);
392
/**************************************************************************
393
Write the index information for the compressed page. */
396
page_zip_fields_encode(
397
/*===================*/
398
/* out: used size of buf */
399
ulint n, /* in: number of fields to compress */
400
dict_index_t* index, /* in: index comprising at least n fields */
401
ulint trx_id_pos,/* in: position of the trx_id column
402
in the index, or ULINT_UNDEFINED if
403
this is a non-leaf page */
404
byte* buf) /* out: buffer of (n + 1) * 2 bytes */
406
const byte* buf_start = buf;
409
ulint trx_id_col = 0;
410
/* sum of lengths of preceding non-nullable fixed fields, or 0 */
413
ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
415
for (i = col = 0; i < n; i++) {
416
dict_field_t* field = dict_index_get_nth_field(index, i);
419
if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
420
val = 1; /* set the "not nullable" flag */
422
val = 0; /* nullable field */
425
if (!field->fixed_len) {
426
/* variable-length field */
427
const dict_col_t* column
428
= dict_field_get_col(field);
430
if (UNIV_UNLIKELY(column->len > 255)
431
|| UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
432
val |= 0x7e; /* max > 255 bytes */
436
/* write out the length of any
437
preceding non-nullable fields */
438
buf = page_zip_fixed_field_encode(
439
buf, fixed_sum << 1 | 1);
447
/* fixed-length non-nullable field */
449
if (fixed_sum && UNIV_UNLIKELY
450
(fixed_sum + field->fixed_len
451
> DICT_MAX_INDEX_COL_LEN)) {
452
/* Write out the length of the
453
preceding non-nullable fields,
454
to avoid exceeding the maximum
455
length of a fixed-length column. */
456
buf = page_zip_fixed_field_encode(
457
buf, fixed_sum << 1 | 1);
462
if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
464
/* Write out the length of any
465
preceding non-nullable fields,
466
and start a new trx_id column. */
467
buf = page_zip_fixed_field_encode(
468
buf, fixed_sum << 1 | 1);
473
fixed_sum = field->fixed_len;
476
fixed_sum += field->fixed_len;
479
/* fixed-length nullable field */
482
/* write out the length of any
483
preceding non-nullable fields */
484
buf = page_zip_fixed_field_encode(
485
buf, fixed_sum << 1 | 1);
490
buf = page_zip_fixed_field_encode(
491
buf, field->fixed_len << 1);
497
/* Write out the lengths of last fixed-length columns. */
498
buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
501
if (trx_id_pos != ULINT_UNDEFINED) {
502
/* Write out the position of the trx_id column */
505
/* Write out the number of nullable fields */
506
i = index->n_nullable;
512
*buf++ = (byte) (0x80 | i >> 8);
516
ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
517
return((ulint) (buf - buf_start));
520
/**************************************************************************
521
Populate the dense page directory from the sparse directory. */
526
const page_t* page, /* in: compact page */
527
byte* buf, /* in: pointer to dense page directory[-1];
528
out: dense directory on compressed page */
529
const rec_t** recs) /* in: pointer to an array of 0, or NULL;
530
out: dense page directory sorted by ascending
531
address (and heap_no) */
543
if (page_is_leaf(page)) {
544
status = REC_STATUS_ORDINARY;
546
status = REC_STATUS_NODE_PTR;
548
(mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
549
min_mark = REC_INFO_MIN_REC_FLAG;
553
n_heap = page_dir_get_n_heap(page);
555
/* Traverse the list of stored records in the collation order,
556
starting from the first user record. */
558
rec = page + PAGE_NEW_INFIMUM, TRUE;
564
offs = rec_get_next_offs(rec, TRUE);
565
if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
569
heap_no = rec_get_heap_no_new(rec);
570
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
571
ut_a(heap_no < n_heap);
572
ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
573
ut_a(offs >= PAGE_ZIP_START);
574
#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
575
# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
577
#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1
578
# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE - 1"
580
if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
581
offs |= PAGE_ZIP_DIR_SLOT_OWNED;
584
info_bits = rec_get_info_bits(rec, TRUE);
585
if (UNIV_UNLIKELY(info_bits & REC_INFO_DELETED_FLAG)) {
586
info_bits &= ~REC_INFO_DELETED_FLAG;
587
offs |= PAGE_ZIP_DIR_SLOT_DEL;
589
ut_a(info_bits == min_mark);
590
/* Only the smallest user record can have
591
REC_INFO_MIN_REC_FLAG set. */
594
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
596
if (UNIV_LIKELY_NULL(recs)) {
597
/* Ensure that each heap_no occurs at most once. */
598
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
599
/* exclude infimum and supremum */
600
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
603
ut_a(rec_get_status(rec) == status);
606
offs = page_header_get_field(page, PAGE_FREE);
608
/* Traverse the free list (of deleted records). */
610
ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
613
heap_no = rec_get_heap_no_new(rec);
614
ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
615
ut_a(heap_no < n_heap);
617
ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
618
ut_a(rec_get_status(rec) == status);
620
mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
622
if (UNIV_LIKELY_NULL(recs)) {
623
/* Ensure that each heap_no occurs at most once. */
624
ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
625
/* exclude infimum and supremum */
626
recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
629
offs = rec_get_next_offs(rec, TRUE);
632
/* Ensure that each heap no occurs at least once. */
633
ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
636
/**************************************************************************
637
Allocate memory for zlib. */
646
return(mem_heap_alloc(opaque, items * size));
649
/**************************************************************************
650
Deallocate memory for zlib. */
655
void* opaque __attribute__((unused)),
656
void* address __attribute__((unused)))
660
/**************************************************************************
661
Configure the zlib allocator to use the given memory heap. */
666
void* stream, /* in/out: zlib stream */
667
mem_heap_t* heap) /* in: memory heap to use */
669
z_stream* strm = stream;
671
strm->zalloc = page_zip_malloc;
672
strm->zfree = page_zip_free;
676
#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
677
# define PAGE_ZIP_COMPRESS_DBG
680
#ifdef PAGE_ZIP_COMPRESS_DBG
681
/* Set this variable in a debugger to enable
682
excessive logging in page_zip_compress(). */
683
UNIV_INTERN ibool page_zip_compress_dbg;
684
/* Set this variable in a debugger to enable
685
binary logging of the data passed to deflate().
686
When this variable is nonzero, it will act
687
as a log file name generator. */
688
UNIV_INTERN unsigned page_zip_compress_log;
690
/**************************************************************************
691
Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set. */
694
page_zip_compress_deflate(
695
/*======================*/
696
FILE* logfile,/* in: log file, or NULL */
697
z_streamp strm, /* in/out: compressed stream for deflate() */
698
int flush) /* in: deflate() flushing method */
701
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
702
ut_print_buf(stderr, strm->next_in, strm->avail_in);
704
if (UNIV_LIKELY_NULL(logfile)) {
705
fwrite(strm->next_in, 1, strm->avail_in, logfile);
707
status = deflate(strm, flush);
708
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
709
fprintf(stderr, " -> %d\n", status);
714
/* Redefine deflate(). */
716
# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
717
# define FILE_LOGFILE FILE* logfile,
718
# define LOGFILE logfile,
719
#else /* PAGE_ZIP_COMPRESS_DBG */
720
# define FILE_LOGFILE
722
#endif /* PAGE_ZIP_COMPRESS_DBG */
724
/**************************************************************************
725
Compress the records of a node pointer page. */
728
page_zip_compress_node_ptrs(
729
/*========================*/
730
/* out: Z_OK, or a zlib error code */
732
z_stream* c_stream, /* in/out: compressed page stream */
733
const rec_t** recs, /* in: dense page directory
735
ulint n_dense, /* in: size of recs[] */
736
dict_index_t* index, /* in: the index of the page */
737
byte* storage, /* in: end of dense page directory */
738
mem_heap_t* heap) /* in: temporary memory heap */
741
ulint* offsets = NULL;
744
const rec_t* rec = *recs++;
746
offsets = rec_get_offsets(rec, index, offsets,
747
ULINT_UNDEFINED, &heap);
748
/* Only leaf nodes may contain externally stored columns. */
749
ut_ad(!rec_offs_any_extern(offsets));
751
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
752
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
753
rec_offs_extra_size(offsets));
755
/* Compress the extra bytes. */
756
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
759
if (c_stream->avail_in) {
760
err = deflate(c_stream, Z_NO_FLUSH);
761
if (UNIV_UNLIKELY(err != Z_OK)) {
765
ut_ad(!c_stream->avail_in);
767
/* Compress the data bytes, except node_ptr. */
768
c_stream->next_in = (byte*) rec;
769
c_stream->avail_in = rec_offs_data_size(offsets)
771
ut_ad(c_stream->avail_in);
773
err = deflate(c_stream, Z_NO_FLUSH);
774
if (UNIV_UNLIKELY(err != Z_OK)) {
778
ut_ad(!c_stream->avail_in);
780
memcpy(storage - REC_NODE_PTR_SIZE
781
* (rec_get_heap_no_new(rec) - 1),
782
c_stream->next_in, REC_NODE_PTR_SIZE);
783
c_stream->next_in += REC_NODE_PTR_SIZE;
789
/**************************************************************************
790
Compress the records of a leaf node of a secondary index. */
793
page_zip_compress_sec(
794
/*==================*/
795
/* out: Z_OK, or a zlib error code */
797
z_stream* c_stream, /* in/out: compressed page stream */
798
const rec_t** recs, /* in: dense page directory
800
ulint n_dense) /* in: size of recs[] */
807
const rec_t* rec = *recs++;
809
/* Compress everything up to this record. */
810
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
813
if (UNIV_LIKELY(c_stream->avail_in)) {
814
UNIV_MEM_ASSERT_RW(c_stream->next_in,
816
err = deflate(c_stream, Z_NO_FLUSH);
817
if (UNIV_UNLIKELY(err != Z_OK)) {
822
ut_ad(!c_stream->avail_in);
823
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
825
/* Skip the REC_N_NEW_EXTRA_BYTES. */
827
c_stream->next_in = (byte*) rec;
833
/**************************************************************************
834
Compress a record of a leaf node of a clustered index that contains
835
externally stored columns. */
838
page_zip_compress_clust_ext(
839
/*========================*/
840
/* out: Z_OK, or a zlib error code */
842
z_stream* c_stream, /* in/out: compressed page stream */
843
const rec_t* rec, /* in: record */
844
const ulint* offsets, /* in: rec_get_offsets(rec) */
845
ulint trx_id_col, /* in: position of of DB_TRX_ID */
846
byte* deleted, /* in: dense directory entry pointing
847
to the head of the free list */
848
byte* storage, /* in: end of dense page directory */
849
byte** externs, /* in/out: pointer to the next
850
available BLOB pointer */
851
ulint* n_blobs) /* in/out: number of
852
externally stored columns */
857
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
858
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
859
rec_offs_extra_size(offsets));
861
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
865
if (UNIV_UNLIKELY(i == trx_id_col)) {
866
ut_ad(!rec_offs_nth_extern(offsets, i));
867
/* Store trx_id and roll_ptr
868
in uncompressed form. */
869
src = rec_get_nth_field(rec, offsets, i, &len);
870
ut_ad(src + DATA_TRX_ID_LEN
871
== rec_get_nth_field(rec, offsets,
873
ut_ad(len == DATA_ROLL_PTR_LEN);
875
/* Compress any preceding bytes. */
877
= src - c_stream->next_in;
879
if (c_stream->avail_in) {
880
err = deflate(c_stream, Z_NO_FLUSH);
881
if (UNIV_UNLIKELY(err != Z_OK)) {
887
ut_ad(!c_stream->avail_in);
888
ut_ad(c_stream->next_in == src);
891
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
892
* (rec_get_heap_no_new(rec) - 1),
894
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
897
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
899
/* Skip also roll_ptr */
901
} else if (rec_offs_nth_extern(offsets, i)) {
902
src = rec_get_nth_field(rec, offsets, i, &len);
903
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
904
src += len - BTR_EXTERN_FIELD_REF_SIZE;
906
c_stream->avail_in = src
908
if (UNIV_LIKELY(c_stream->avail_in)) {
909
err = deflate(c_stream, Z_NO_FLUSH);
910
if (UNIV_UNLIKELY(err != Z_OK)) {
916
ut_ad(!c_stream->avail_in);
917
ut_ad(c_stream->next_in == src);
919
/* Reserve space for the data at
920
the end of the space reserved for
921
the compressed data and the page
926
<= BTR_EXTERN_FIELD_REF_SIZE)) {
931
ut_ad(*externs == c_stream->next_out
932
+ c_stream->avail_out
933
+ 1/* end of modif. log */);
936
+= BTR_EXTERN_FIELD_REF_SIZE;
938
/* Skip deleted records. */
940
(page_zip_dir_find_low(
942
page_offset(rec)))) {
948
-= BTR_EXTERN_FIELD_REF_SIZE;
949
*externs -= BTR_EXTERN_FIELD_REF_SIZE;
951
/* Copy the BLOB pointer */
952
memcpy(*externs, c_stream->next_in
953
- BTR_EXTERN_FIELD_REF_SIZE,
954
BTR_EXTERN_FIELD_REF_SIZE);
961
/**************************************************************************
962
Compress the records of a leaf node of a clustered index. */
965
page_zip_compress_clust(
966
/*====================*/
967
/* out: Z_OK, or a zlib error code */
969
z_stream* c_stream, /* in/out: compressed page stream */
970
const rec_t** recs, /* in: dense page directory
972
ulint n_dense, /* in: size of recs[] */
973
dict_index_t* index, /* in: the index of the page */
974
ulint* n_blobs, /* in: 0; out: number of
975
externally stored columns */
976
ulint trx_id_col, /* index of the trx_id column */
977
byte* deleted, /* in: dense directory entry pointing
978
to the head of the free list */
979
byte* storage, /* in: end of dense page directory */
980
mem_heap_t* heap) /* in: temporary memory heap */
983
ulint* offsets = NULL;
984
/* BTR_EXTERN_FIELD_REF storage */
985
byte* externs = storage - n_dense
986
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
988
ut_ad(*n_blobs == 0);
991
const rec_t* rec = *recs++;
993
offsets = rec_get_offsets(rec, index, offsets,
994
ULINT_UNDEFINED, &heap);
995
ut_ad(rec_offs_n_fields(offsets)
996
== dict_index_get_n_fields(index));
997
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
998
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
999
rec_offs_extra_size(offsets));
1001
/* Compress the extra bytes. */
1002
c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
1003
- c_stream->next_in;
1005
if (c_stream->avail_in) {
1006
err = deflate(c_stream, Z_NO_FLUSH);
1007
if (UNIV_UNLIKELY(err != Z_OK)) {
1012
ut_ad(!c_stream->avail_in);
1013
ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
1015
/* Compress the data bytes. */
1017
c_stream->next_in = (byte*) rec;
1019
/* Check if there are any externally stored columns.
1020
For each externally stored column, store the
1021
BTR_EXTERN_FIELD_REF separately. */
1022
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1023
ut_ad(dict_index_is_clust(index));
1025
err = page_zip_compress_clust_ext(
1027
c_stream, rec, offsets, trx_id_col,
1028
deleted, storage, &externs, n_blobs);
1030
if (UNIV_UNLIKELY(err != Z_OK)) {
1038
/* Store trx_id and roll_ptr in uncompressed form. */
1039
src = rec_get_nth_field(rec, offsets,
1041
ut_ad(src + DATA_TRX_ID_LEN
1042
== rec_get_nth_field(rec, offsets,
1043
trx_id_col + 1, &len));
1044
ut_ad(len == DATA_ROLL_PTR_LEN);
1045
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1046
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1047
rec_offs_extra_size(offsets));
1049
/* Compress any preceding bytes. */
1050
c_stream->avail_in = src - c_stream->next_in;
1052
if (c_stream->avail_in) {
1053
err = deflate(c_stream, Z_NO_FLUSH);
1054
if (UNIV_UNLIKELY(err != Z_OK)) {
1060
ut_ad(!c_stream->avail_in);
1061
ut_ad(c_stream->next_in == src);
1064
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1065
* (rec_get_heap_no_new(rec) - 1),
1067
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1070
+= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1072
/* Skip also roll_ptr */
1073
ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
1076
/* Compress the last bytes of the record. */
1077
c_stream->avail_in = rec + rec_offs_data_size(offsets)
1078
- c_stream->next_in;
1080
if (c_stream->avail_in) {
1081
err = deflate(c_stream, Z_NO_FLUSH);
1082
if (UNIV_UNLIKELY(err != Z_OK)) {
1087
ut_ad(!c_stream->avail_in);
1088
} while (--n_dense);
1094
/**************************************************************************
1100
/* out: TRUE on success, FALSE on failure;
1101
page_zip will be left intact on failure. */
1102
page_zip_des_t* page_zip,/* in: size; out: data, n_blobs,
1103
m_start, m_end, m_nonempty */
1104
const page_t* page, /* in: uncompressed page */
1105
dict_index_t* index, /* in: index of the B-tree node */
1106
mtr_t* mtr) /* in: mini-transaction, or NULL */
1110
ulint n_fields;/* number of index fields needed */
1111
byte* fields; /* index field information */
1112
byte* buf; /* compressed payload of the page */
1113
byte* buf_end;/* end of buf */
1115
ulint slot_size;/* amount of uncompressed bytes per record */
1116
const rec_t** recs; /* dense page directory, sorted by address */
1119
ulint* offsets = NULL;
1121
byte* storage;/* storage of uncompressed columns */
1122
ullint usec = ut_time_us(NULL);
1123
#ifdef PAGE_ZIP_COMPRESS_DBG
1124
FILE* logfile = NULL;
1127
ut_a(page_is_comp(page));
1128
ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
1129
ut_ad(page_simple_validate_new((page_t*) page));
1130
ut_ad(page_zip_simple_validate(page_zip));
1132
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1134
/* Check the data that will be omitted. */
1135
ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
1136
infimum_extra, sizeof infimum_extra));
1137
ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
1138
infimum_data, sizeof infimum_data));
1139
ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
1140
/* info_bits == 0, n_owned <= max */
1141
<= PAGE_DIR_SLOT_MAX_N_OWNED);
1142
ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
1143
supremum_extra_data, sizeof supremum_extra_data));
1145
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
1146
ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
1147
== PAGE_NEW_SUPREMUM);
1150
if (page_is_leaf(page)) {
1151
n_fields = dict_index_get_n_fields(index);
1153
n_fields = dict_index_get_n_unique_in_tree(index);
1156
/* The dense directory excludes the infimum and supremum records. */
1157
n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1158
#ifdef PAGE_ZIP_COMPRESS_DBG
1159
if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
1160
fprintf(stderr, "compress %p %p %lu %lu %lu\n",
1161
(void*) page_zip, (void*) page,
1165
if (UNIV_UNLIKELY(page_zip_compress_log)) {
1166
/* Create a log file for every compression attempt. */
1167
char logfilename[9];
1168
ut_snprintf(logfilename, sizeof logfilename,
1169
"%08x", page_zip_compress_log++);
1170
logfile = fopen(logfilename, "wb");
1173
/* Write the uncompressed page to the log. */
1174
fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
1175
/* Record the compressed size as zero.
1176
This will be overwritten at successful exit. */
1183
#endif /* PAGE_ZIP_COMPRESS_DBG */
1184
page_zip_stat[page_zip->ssize - 1].compressed++;
1186
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
1187
>= page_zip_get_size(page_zip))) {
1192
heap = mem_heap_create(page_zip_get_size(page_zip)
1193
+ n_fields * (2 + sizeof *offsets)
1194
+ n_dense * ((sizeof *recs)
1195
- PAGE_ZIP_DIR_SLOT_SIZE)
1196
+ UNIV_PAGE_SIZE * 4
1197
+ (512 << MAX_MEM_LEVEL));
1199
recs = mem_heap_zalloc(heap, n_dense * sizeof *recs);
1201
fields = mem_heap_alloc(heap, (n_fields + 1) * 2);
1203
buf = mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA);
1204
buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
1206
/* Compress the data payload. */
1207
page_zip_set_alloc(&c_stream, heap);
1209
err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
1210
Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
1211
MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
1214
c_stream.next_out = buf;
1215
/* Subtract the space reserved for uncompressed data. */
1216
/* Page header and the end marker of the modification log */
1217
c_stream.avail_out = buf_end - buf - 1;
1218
/* Dense page directory and uncompressed columns, if any */
1219
if (page_is_leaf(page)) {
1220
if (dict_index_is_clust(index)) {
1221
trx_id_col = dict_index_get_sys_col_pos(
1222
index, DATA_TRX_ID);
1223
ut_ad(trx_id_col > 0);
1224
ut_ad(trx_id_col != ULINT_UNDEFINED);
1226
slot_size = PAGE_ZIP_DIR_SLOT_SIZE
1227
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1229
/* Signal the absence of trx_id
1230
in page_zip_fields_encode() */
1231
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
1232
== ULINT_UNDEFINED);
1234
slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
1237
slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
1238
trx_id_col = ULINT_UNDEFINED;
1241
if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
1242
+ 6/* sizeof(zlib header and footer) */)) {
1246
c_stream.avail_out -= n_dense * slot_size;
1247
c_stream.avail_in = page_zip_fields_encode(n_fields, index,
1248
trx_id_col, fields);
1249
c_stream.next_in = fields;
1250
if (UNIV_LIKELY(!trx_id_col)) {
1251
trx_id_col = ULINT_UNDEFINED;
1254
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1255
err = deflate(&c_stream, Z_FULL_FLUSH);
1260
ut_ad(!c_stream.avail_in);
1262
page_zip_dir_encode(page, buf_end, recs);
1264
c_stream.next_in = (byte*) page + PAGE_ZIP_START;
1266
storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
1268
/* Compress the records in heap_no order. */
1269
if (UNIV_UNLIKELY(!n_dense)) {
1270
} else if (!page_is_leaf(page)) {
1271
/* This is a node pointer page. */
1272
err = page_zip_compress_node_ptrs(LOGFILE
1273
&c_stream, recs, n_dense,
1274
index, storage, heap);
1275
if (UNIV_UNLIKELY(err != Z_OK)) {
1278
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1279
/* This is a leaf page in a secondary index. */
1280
err = page_zip_compress_sec(LOGFILE
1281
&c_stream, recs, n_dense);
1282
if (UNIV_UNLIKELY(err != Z_OK)) {
1286
/* This is a leaf page in a clustered index. */
1287
err = page_zip_compress_clust(LOGFILE
1288
&c_stream, recs, n_dense,
1289
index, &n_blobs, trx_id_col,
1290
buf_end - PAGE_ZIP_DIR_SLOT_SIZE
1291
* page_get_n_recs(page),
1293
if (UNIV_UNLIKELY(err != Z_OK)) {
1298
/* Finish the compression. */
1299
ut_ad(!c_stream.avail_in);
1300
/* Compress any trailing garbage, in case the last record was
1301
allocated from an originally longer space on the free list,
1302
or the data of the last record from page_zip_compress_sec(). */
1304
= page_header_get_field(page, PAGE_HEAP_TOP)
1305
- (c_stream.next_in - page);
1306
ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
1308
UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1309
err = deflate(&c_stream, Z_FINISH);
1311
if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
1313
deflateEnd(&c_stream);
1314
mem_heap_free(heap);
1316
#ifdef PAGE_ZIP_COMPRESS_DBG
1320
#endif /* PAGE_ZIP_COMPRESS_DBG */
1321
page_zip_stat[page_zip->ssize - 1].compressed_usec
1322
+= ut_time_us(NULL) - usec;
1326
err = deflateEnd(&c_stream);
1329
ut_ad(buf + c_stream.total_out == c_stream.next_out);
1330
ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
1332
/* Valgrind believes that zlib does not initialize some bits
1333
in the last 7 or 8 bytes of the stream. Make Valgrind happy. */
1334
UNIV_MEM_VALID(buf, c_stream.total_out);
1336
/* Zero out the area reserved for the modification log.
1337
Space for the end marker of the modification log is not
1338
included in avail_out. */
1339
memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
1343
#endif /* UNIV_DEBUG */
1344
page_zip->m_end = PAGE_DATA + c_stream.total_out;
1345
page_zip->m_nonempty = FALSE;
1346
page_zip->n_blobs = n_blobs;
1347
/* Copy those header fields that will not be written
1348
in buf_flush_init_for_writing() */
1349
memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
1350
FIL_PAGE_LSN - FIL_PAGE_PREV);
1351
memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
1352
memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
1353
PAGE_DATA - FIL_PAGE_DATA);
1354
/* Copy the rest of the compressed page */
1355
memcpy(page_zip->data + PAGE_DATA, buf,
1356
page_zip_get_size(page_zip) - PAGE_DATA);
1357
mem_heap_free(heap);
1358
#ifdef UNIV_ZIP_DEBUG
1359
ut_a(page_zip_validate(page_zip, page));
1360
#endif /* UNIV_ZIP_DEBUG */
1363
page_zip_compress_write_log(page_zip, page, index, mtr);
1366
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
1368
#ifdef PAGE_ZIP_COMPRESS_DBG
1370
/* Record the compressed size of the block. */
1372
mach_write_to_4(sz, c_stream.total_out);
1373
fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
1374
fwrite(sz, 1, sizeof sz, logfile);
1377
#endif /* PAGE_ZIP_COMPRESS_DBG */
1379
page_zip_stat_t* zip_stat
1380
= &page_zip_stat[page_zip->ssize - 1];
1381
zip_stat->compressed_ok++;
1382
zip_stat->compressed_usec += ut_time_us(NULL) - usec;
1388
/**************************************************************************
1389
Compare two page directory entries. */
1394
/* out: positive if rec1 > rec2 */
1395
const rec_t* rec1, /* in: rec1 */
1396
const rec_t* rec2) /* in: rec2 */
1398
return(rec1 > rec2);
1401
/**************************************************************************
1402
Sort the dense page directory by address (heap_no). */
1407
rec_t** arr, /* in/out: dense page directory */
1408
rec_t** aux_arr,/* in/out: work area */
1409
ulint low, /* in: lower bound of the sorting area, inclusive */
1410
ulint high) /* in: upper bound of the sorting area, exclusive */
1412
UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
1416
/**************************************************************************
1417
Deallocate the index information initialized by page_zip_fields_decode(). */
1420
page_zip_fields_free(
1421
/*=================*/
1422
dict_index_t* index) /* in: dummy index to be freed */
1425
dict_table_t* table = index->table;
1426
mem_heap_free(index->heap);
1427
mutex_free(&(table->autoinc_mutex));
1428
mem_heap_free(table->heap);
1432
/**************************************************************************
1433
Read the index information for the compressed page. */
1436
page_zip_fields_decode(
1437
/*===================*/
1438
/* out,own: dummy index describing the page,
1440
const byte* buf, /* in: index information */
1441
const byte* end, /* in: end of buf */
1442
ulint* trx_id_col)/* in: NULL for non-leaf pages;
1443
for leaf pages, pointer to where to store
1444
the position of the trx_id column */
1450
dict_table_t* table;
1451
dict_index_t* index;
1453
/* Determine the number of fields. */
1454
for (b = buf, n = 0; b < end; n++) {
1456
b++; /* skip the second byte */
1460
n--; /* n_nullable or trx_id */
1462
if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
1464
page_zip_fail(("page_zip_fields_decode: n = %lu\n",
1469
if (UNIV_UNLIKELY(b > end)) {
1471
page_zip_fail(("page_zip_fields_decode: %p > %p\n",
1472
(const void*) b, (const void*) end));
1476
table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
1478
index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
1479
DICT_HDR_SPACE, 0, n);
1480
index->table = table;
1482
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1483
index->cached = TRUE;
1485
/* Initialize the fields. */
1486
for (b = buf, i = 0; i < n; i++) {
1492
if (UNIV_UNLIKELY(val & 0x80)) {
1493
/* fixed length > 62 bytes */
1494
val = (val & 0x7f) << 8 | *b++;
1496
mtype = DATA_FIXBINARY;
1497
} else if (UNIV_UNLIKELY(val >= 126)) {
1498
/* variable length with max > 255 bytes */
1500
mtype = DATA_BINARY;
1501
} else if (val <= 1) {
1502
/* variable length with max <= 255 bytes */
1504
mtype = DATA_BINARY;
1506
/* fixed length < 62 bytes */
1508
mtype = DATA_FIXBINARY;
1511
dict_mem_table_add_col(table, NULL, NULL, mtype,
1512
val & 1 ? DATA_NOT_NULL : 0, len);
1513
dict_index_add_col(index, table,
1514
dict_table_get_nth_col(table, i), 0);
1518
if (UNIV_UNLIKELY(val & 0x80)) {
1519
val = (val & 0x7f) << 8 | *b++;
1522
/* Decode the position of the trx_id column. */
1525
val = ULINT_UNDEFINED;
1526
} else if (UNIV_UNLIKELY(val >= n)) {
1527
page_zip_fields_free(index);
1530
index->type = DICT_CLUSTERED;
1535
/* Decode the number of nullable fields. */
1536
if (UNIV_UNLIKELY(index->n_nullable > val)) {
1537
page_zip_fields_free(index);
1540
index->n_nullable = val;
1549
/**************************************************************************
1550
Populate the sparse page directory from the dense directory. */
1553
page_zip_dir_decode(
1554
/*================*/
1555
/* out: TRUE on success,
1557
const page_zip_des_t* page_zip,/* in: dense page directory on
1559
page_t* page, /* in: compact page with valid header;
1560
out: trailer and sparse page directory
1562
rec_t** recs, /* out: dense page directory sorted by
1563
ascending address (and heap_no) */
1564
rec_t** recs_aux,/* in/out: scratch area */
1565
ulint n_dense)/* in: number of user records, and
1566
size of recs[] and recs_aux[] */
1572
n_recs = page_get_n_recs(page);
1574
if (UNIV_UNLIKELY(n_recs > n_dense)) {
1575
page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
1576
(ulong) n_recs, (ulong) n_dense));
1580
/* Traverse the list of stored records in the sorting order,
1581
starting from the first user record. */
1583
slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
1584
UNIV_PREFETCH_RW(slot);
1586
/* Zero out the page trailer. */
1587
memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
1589
mach_write_to_2(slot, PAGE_NEW_INFIMUM);
1590
slot -= PAGE_DIR_SLOT_SIZE;
1591
UNIV_PREFETCH_RW(slot);
1593
/* Initialize the sparse directory and copy the dense directory. */
1594
for (i = 0; i < n_recs; i++) {
1595
ulint offs = page_zip_dir_get(page_zip, i);
1597
if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
1598
mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
1599
slot -= PAGE_DIR_SLOT_SIZE;
1600
UNIV_PREFETCH_RW(slot);
1603
if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
1604
< PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
1605
page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
1606
(unsigned) i, (unsigned) n_recs,
1611
recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
1614
mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
1616
const page_dir_slot_t* last_slot = page_dir_get_nth_slot(
1617
page, page_dir_get_n_slots(page) - 1);
1619
if (UNIV_UNLIKELY(slot != last_slot)) {
1620
page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
1622
(const void*) last_slot));
1627
/* Copy the rest of the dense directory. */
1628
for (; i < n_dense; i++) {
1629
ulint offs = page_zip_dir_get(page_zip, i);
1631
if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1632
page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
1633
(unsigned) i, (unsigned) n_dense,
1638
recs[i] = page + offs;
1641
if (UNIV_LIKELY(n_dense > 1)) {
1642
page_zip_dir_sort(recs, recs_aux, 0, n_dense);
1647
/**************************************************************************
1648
Initialize the REC_N_NEW_EXTRA_BYTES of each record. */
1651
page_zip_set_extra_bytes(
1652
/*=====================*/
1653
/* out: TRUE on success,
1655
const page_zip_des_t* page_zip,/* in: compressed page */
1656
page_t* page, /* in/out: uncompressed page */
1657
ulint info_bits)/* in: REC_INFO_MIN_REC_FLAG or 0 */
1665
n = page_get_n_recs(page);
1666
rec = page + PAGE_NEW_INFIMUM;
1668
for (i = 0; i < n; i++) {
1669
offs = page_zip_dir_get(page_zip, i);
1671
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_DEL)) {
1672
info_bits |= REC_INFO_DELETED_FLAG;
1674
if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
1675
info_bits |= n_owned;
1680
offs &= PAGE_ZIP_DIR_SLOT_MASK;
1681
if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
1682
+ REC_N_NEW_EXTRA_BYTES)) {
1683
page_zip_fail(("page_zip_set_extra_bytes 1:"
1685
(unsigned) i, (unsigned) n,
1690
rec_set_next_offs_new(rec, offs);
1692
rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
1696
/* Set the next pointer of the last user record. */
1697
rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
1699
/* Set n_owned of the supremum record. */
1700
page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
1702
/* The dense directory excludes the infimum and supremum records. */
1703
n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1706
if (UNIV_LIKELY(i == n)) {
1710
page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
1711
(unsigned) i, (unsigned) n));
1715
offs = page_zip_dir_get(page_zip, i);
1717
/* Set the extra bytes of deleted records on the free list. */
1719
if (UNIV_UNLIKELY(!offs)
1720
|| UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1722
page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
1728
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1734
offs = page_zip_dir_get(page_zip, i);
1735
rec_set_next_offs_new(rec, offs);
1738
/* Terminate the free list. */
1739
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1740
rec_set_next_offs_new(rec, 0);
1745
/**************************************************************************
1746
Apply the modification log to a record containing externally stored
1747
columns. Do not copy the fields that are stored separately. */
1750
page_zip_apply_log_ext(
1751
/*===================*/
1752
/* out: pointer to modification log,
1753
or NULL on failure */
1754
rec_t* rec, /* in/out: record */
1755
const ulint* offsets, /* in: rec_get_offsets(rec) */
1756
ulint trx_id_col, /* in: position of of DB_TRX_ID */
1757
const byte* data, /* in: modification log */
1758
const byte* end) /* in: end of modification log */
1762
byte* next_out = rec;
1764
/* Check if there are any externally stored columns.
1765
For each externally stored column, skip the
1766
BTR_EXTERN_FIELD_REF. */
1768
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
1771
if (UNIV_UNLIKELY(i == trx_id_col)) {
1772
/* Skip trx_id and roll_ptr */
1773
dst = rec_get_nth_field(rec, offsets,
1775
if (UNIV_UNLIKELY(dst - next_out >= end - data)
1777
(len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
1778
|| rec_offs_nth_extern(offsets, i)) {
1779
page_zip_fail(("page_zip_apply_log_ext:"
1781
" %p - %p >= %p - %p\n",
1784
(const void*) next_out,
1786
(const void*) data));
1790
memcpy(next_out, data, dst - next_out);
1791
data += dst - next_out;
1792
next_out = dst + (DATA_TRX_ID_LEN
1793
+ DATA_ROLL_PTR_LEN);
1794
} else if (rec_offs_nth_extern(offsets, i)) {
1795
dst = rec_get_nth_field(rec, offsets,
1798
>= BTR_EXTERN_FIELD_REF_SIZE);
1800
len += dst - next_out
1801
- BTR_EXTERN_FIELD_REF_SIZE;
1803
if (UNIV_UNLIKELY(data + len >= end)) {
1804
page_zip_fail(("page_zip_apply_log_ext: "
1805
"ext %p+%lu >= %p\n",
1808
(const void*) end));
1812
memcpy(next_out, data, len);
1815
+ BTR_EXTERN_FIELD_REF_SIZE;
1819
/* Copy the last bytes of the record. */
1820
len = rec_get_end(rec, offsets) - next_out;
1821
if (UNIV_UNLIKELY(data + len >= end)) {
1822
page_zip_fail(("page_zip_apply_log_ext: "
1823
"last %p+%lu >= %p\n",
1826
(const void*) end));
1829
memcpy(next_out, data, len);
1835
/**************************************************************************
1836
Apply the modification log to an uncompressed page.
1837
Do not copy the fields that are stored separately. */
1842
/* out: pointer to end of modification log,
1843
or NULL on failure */
1844
const byte* data, /* in: modification log */
1845
ulint size, /* in: maximum length of the log, in bytes */
1846
rec_t** recs, /* in: dense page directory,
1847
sorted by address (indexed by
1848
heap_no - PAGE_HEAP_NO_USER_LOW) */
1849
ulint n_dense,/* in: size of recs[] */
1850
ulint trx_id_col,/* in: column number of trx_id in the index,
1851
or ULINT_UNDEFINED if none */
1853
/* in: heap_no and status bits for
1854
the next record to uncompress */
1855
dict_index_t* index, /* in: index of the page */
1856
ulint* offsets)/* in/out: work area for
1857
rec_get_offsets_reverse() */
1859
const byte* const end = data + size;
1868
if (UNIV_UNLIKELY(!val)) {
1872
val = (val & 0x7f) << 8 | *data++;
1873
if (UNIV_UNLIKELY(!val)) {
1874
page_zip_fail(("page_zip_apply_log:"
1875
" invalid val %x%x\n",
1876
data[-2], data[-1]));
1880
if (UNIV_UNLIKELY(data >= end)) {
1881
page_zip_fail(("page_zip_apply_log: %p >= %p\n",
1883
(const void*) end));
1886
if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
1887
page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
1888
(ulong) val, (ulong) n_dense));
1892
/* Determine the heap number and status bits of the record. */
1893
rec = recs[(val >> 1) - 1];
1895
hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
1896
hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
1898
/* This may either be an old record that is being
1899
overwritten (updated in place, or allocated from
1900
the free list), or a new record, with the next
1901
available_heap_no. */
1902
if (UNIV_UNLIKELY(hs > heap_status)) {
1903
page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
1904
(ulong) hs, (ulong) heap_status));
1906
} else if (hs == heap_status) {
1907
/* A new record was allocated from the heap. */
1908
if (UNIV_UNLIKELY(val & 1)) {
1909
/* Only existing records may be cleared. */
1910
page_zip_fail(("page_zip_apply_log:"
1911
" attempting to create"
1912
" deleted rec %lu\n",
1916
heap_status += 1 << REC_HEAP_NO_SHIFT;
1919
mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
1922
/* Clear the data bytes of the record. */
1923
mem_heap_t* heap = NULL;
1925
offs = rec_get_offsets(rec, index, offsets,
1926
ULINT_UNDEFINED, &heap);
1927
memset(rec, 0, rec_offs_data_size(offs));
1929
if (UNIV_LIKELY_NULL(heap)) {
1930
mem_heap_free(heap);
1935
#if REC_STATUS_NODE_PTR != TRUE
1936
# error "REC_STATUS_NODE_PTR != TRUE"
1938
rec_get_offsets_reverse(data, index,
1939
hs & REC_STATUS_NODE_PTR,
1941
rec_offs_make_valid(rec, index, offsets);
1943
/* Copy the extra bytes (backwards). */
1945
byte* start = rec_get_start(rec, offsets);
1946
byte* b = rec - REC_N_NEW_EXTRA_BYTES;
1947
while (b != start) {
1952
/* Copy the data bytes. */
1953
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
1954
/* Non-leaf nodes should not contain any
1955
externally stored columns. */
1956
if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1957
page_zip_fail(("page_zip_apply_log: "
1958
"%lu&REC_STATUS_NODE_PTR\n",
1963
data = page_zip_apply_log_ext(
1964
rec, offsets, trx_id_col, data, end);
1966
if (UNIV_UNLIKELY(!data)) {
1969
} else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
1970
len = rec_offs_data_size(offsets)
1971
- REC_NODE_PTR_SIZE;
1972
/* Copy the data bytes, except node_ptr. */
1973
if (UNIV_UNLIKELY(data + len >= end)) {
1974
page_zip_fail(("page_zip_apply_log: "
1975
"node_ptr %p+%lu >= %p\n",
1978
(const void*) end));
1981
memcpy(rec, data, len);
1983
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1984
len = rec_offs_data_size(offsets);
1986
/* Copy all data bytes of
1987
a record in a secondary index. */
1988
if (UNIV_UNLIKELY(data + len >= end)) {
1989
page_zip_fail(("page_zip_apply_log: "
1990
"sec %p+%lu >= %p\n",
1993
(const void*) end));
1997
memcpy(rec, data, len);
2000
/* Skip DB_TRX_ID and DB_ROLL_PTR. */
2001
ulint l = rec_get_nth_field_offs(offsets,
2005
if (UNIV_UNLIKELY(data + l >= end)
2006
|| UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
2007
+ DATA_ROLL_PTR_LEN))) {
2008
page_zip_fail(("page_zip_apply_log: "
2009
"trx_id %p+%lu >= %p\n",
2012
(const void*) end));
2016
/* Copy any preceding data bytes. */
2017
memcpy(rec, data, l);
2020
/* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
2021
b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2022
len = rec_get_end(rec, offsets) - b;
2023
if (UNIV_UNLIKELY(data + len >= end)) {
2024
page_zip_fail(("page_zip_apply_log: "
2025
"clust %p+%lu >= %p\n",
2028
(const void*) end));
2031
memcpy(b, data, len);
2037
/**************************************************************************
2038
Decompress the records of a node pointer page. */
2041
page_zip_decompress_node_ptrs(
2042
/*==========================*/
2043
/* out: TRUE on success,
2045
page_zip_des_t* page_zip, /* in/out: compressed page */
2046
z_stream* d_stream, /* in/out: compressed page stream */
2047
rec_t** recs, /* in: dense page directory
2048
sorted by address */
2049
ulint n_dense, /* in: size of recs[] */
2050
dict_index_t* index, /* in: the index of the page */
2051
ulint* offsets, /* in/out: temporary offsets */
2052
mem_heap_t* heap) /* in: temporary memory heap */
2054
ulint heap_status = REC_STATUS_NODE_PTR
2055
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2057
const byte* storage;
2059
/* Subtract the space reserved for uncompressed data. */
2060
d_stream->avail_in -= n_dense
2061
* (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
2063
/* Decompress the records in heap_no order. */
2064
for (slot = 0; slot < n_dense; slot++) {
2065
rec_t* rec = recs[slot];
2067
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2068
- d_stream->next_out;
2070
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2071
- PAGE_ZIP_START - PAGE_DIR);
2072
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2074
/* Apparently, n_dense has grown
2075
since the time the page was last compressed. */
2079
if (!d_stream->avail_out) {
2084
page_zip_fail(("page_zip_decompress_node_ptrs:"
2085
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2090
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2091
/* Prepare to decompress the data bytes. */
2092
d_stream->next_out = rec;
2093
/* Set heap_no and the status bits. */
2094
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2095
heap_status += 1 << REC_HEAP_NO_SHIFT;
2097
/* Read the offsets. The status bits are needed here. */
2098
offsets = rec_get_offsets(rec, index, offsets,
2099
ULINT_UNDEFINED, &heap);
2101
/* Non-leaf nodes should not have any externally
2103
ut_ad(!rec_offs_any_extern(offsets));
2105
/* Decompress the data bytes, except node_ptr. */
2106
d_stream->avail_out = rec_offs_data_size(offsets)
2107
- REC_NODE_PTR_SIZE;
2109
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2114
if (!d_stream->avail_out) {
2119
page_zip_fail(("page_zip_decompress_node_ptrs:"
2120
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2125
/* Clear the node pointer in case the record
2126
will be deleted and the space will be reallocated
2127
to a smaller record. */
2128
memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
2129
d_stream->next_out += REC_NODE_PTR_SIZE;
2131
ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
2134
/* Decompress any trailing garbage, in case the last record was
2135
allocated from an originally longer space on the free list. */
2136
d_stream->avail_out = page_header_get_field(page_zip->data,
2138
- page_offset(d_stream->next_out);
2139
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2140
- PAGE_ZIP_START - PAGE_DIR)) {
2142
page_zip_fail(("page_zip_decompress_node_ptrs:"
2143
" avail_out = %u\n",
2144
d_stream->avail_out));
2148
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2149
page_zip_fail(("page_zip_decompress_node_ptrs:"
2150
" inflate(Z_FINISH)=%s\n",
2153
inflateEnd(d_stream);
2157
/* Note that d_stream->avail_out > 0 may hold here
2158
if the modification log is nonempty. */
2161
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2166
page_t* page = page_align(d_stream->next_out);
2168
/* Clear the unused heap space on the uncompressed page. */
2169
memset(d_stream->next_out, 0,
2170
page_dir_get_nth_slot(page,
2171
page_dir_get_n_slots(page) - 1)
2172
- d_stream->next_out);
2176
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2177
#endif /* UNIV_DEBUG */
2179
/* Apply the modification log. */
2181
const byte* mod_log_ptr;
2182
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2183
d_stream->avail_in + 1,
2185
ULINT_UNDEFINED, heap_status,
2188
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2191
page_zip->m_end = mod_log_ptr - page_zip->data;
2192
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2196
(page_zip_get_trailer_len(page_zip,
2197
dict_index_is_clust(index), NULL)
2198
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2199
page_zip_fail(("page_zip_decompress_node_ptrs:"
2200
" %lu + %lu >= %lu, %lu\n",
2201
(ulong) page_zip_get_trailer_len(
2202
page_zip, dict_index_is_clust(index),
2204
(ulong) page_zip->m_end,
2205
(ulong) page_zip_get_size(page_zip),
2206
(ulong) dict_index_is_clust(index)));
2210
/* Restore the uncompressed columns in heap_no order. */
2211
storage = page_zip->data + page_zip_get_size(page_zip)
2212
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2214
for (slot = 0; slot < n_dense; slot++) {
2215
rec_t* rec = recs[slot];
2217
offsets = rec_get_offsets(rec, index, offsets,
2218
ULINT_UNDEFINED, &heap);
2219
/* Non-leaf nodes should not have any externally
2221
ut_ad(!rec_offs_any_extern(offsets));
2222
storage -= REC_NODE_PTR_SIZE;
2224
memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
2225
storage, REC_NODE_PTR_SIZE);
2231
/**************************************************************************
2232
Decompress the records of a leaf node of a secondary index. */
2235
page_zip_decompress_sec(
2236
/*====================*/
2237
/* out: TRUE on success,
2239
page_zip_des_t* page_zip, /* in/out: compressed page */
2240
z_stream* d_stream, /* in/out: compressed page stream */
2241
rec_t** recs, /* in: dense page directory
2242
sorted by address */
2243
ulint n_dense, /* in: size of recs[] */
2244
dict_index_t* index, /* in: the index of the page */
2245
ulint* offsets) /* in/out: temporary offsets */
2247
ulint heap_status = REC_STATUS_ORDINARY
2248
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2251
ut_a(!dict_index_is_clust(index));
2253
/* Subtract the space reserved for uncompressed data. */
2254
d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2256
for (slot = 0; slot < n_dense; slot++) {
2257
rec_t* rec = recs[slot];
2259
/* Decompress everything up to this record. */
2260
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2261
- d_stream->next_out;
2263
if (UNIV_LIKELY(d_stream->avail_out)) {
2264
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2266
/* Apparently, n_dense has grown
2267
since the time the page was last compressed. */
2271
if (!d_stream->avail_out) {
2276
page_zip_fail(("page_zip_decompress_sec:"
2277
" inflate(Z_SYNC_FLUSH)=%s\n",
2283
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2285
/* Skip the REC_N_NEW_EXTRA_BYTES. */
2287
d_stream->next_out = rec;
2289
/* Set heap_no and the status bits. */
2290
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2291
heap_status += 1 << REC_HEAP_NO_SHIFT;
2294
/* Decompress the data of the last record and any trailing garbage,
2295
in case the last record was allocated from an originally longer space
2296
on the free list. */
2297
d_stream->avail_out = page_header_get_field(page_zip->data,
2299
- page_offset(d_stream->next_out);
2300
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2301
- PAGE_ZIP_START - PAGE_DIR)) {
2303
page_zip_fail(("page_zip_decompress_sec:"
2304
" avail_out = %u\n",
2305
d_stream->avail_out));
2309
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2310
page_zip_fail(("page_zip_decompress_sec:"
2311
" inflate(Z_FINISH)=%s\n",
2314
inflateEnd(d_stream);
2318
/* Note that d_stream->avail_out > 0 may hold here
2319
if the modification log is nonempty. */
2322
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2327
page_t* page = page_align(d_stream->next_out);
2329
/* Clear the unused heap space on the uncompressed page. */
2330
memset(d_stream->next_out, 0,
2331
page_dir_get_nth_slot(page,
2332
page_dir_get_n_slots(page) - 1)
2333
- d_stream->next_out);
2337
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2338
#endif /* UNIV_DEBUG */
2340
/* Apply the modification log. */
2342
const byte* mod_log_ptr;
2343
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2344
d_stream->avail_in + 1,
2346
ULINT_UNDEFINED, heap_status,
2349
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2352
page_zip->m_end = mod_log_ptr - page_zip->data;
2353
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2356
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE, NULL)
2357
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2359
page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
2360
(ulong) page_zip_get_trailer_len(
2361
page_zip, FALSE, NULL),
2362
(ulong) page_zip->m_end,
2363
(ulong) page_zip_get_size(page_zip)));
2367
/* There are no uncompressed columns on leaf pages of
2368
secondary indexes. */
2373
/**************************************************************************
2374
Decompress a record of a leaf node of a clustered index that contains
2375
externally stored columns. */
2378
page_zip_decompress_clust_ext(
2379
/*==========================*/
2380
/* out: TRUE on success */
2381
z_stream* d_stream, /* in/out: compressed page stream */
2382
rec_t* rec, /* in/out: record */
2383
const ulint* offsets, /* in: rec_get_offsets(rec) */
2384
ulint trx_id_col) /* in: position of of DB_TRX_ID */
2388
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2392
if (UNIV_UNLIKELY(i == trx_id_col)) {
2393
/* Skip trx_id and roll_ptr */
2394
dst = rec_get_nth_field(rec, offsets, i, &len);
2395
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2396
+ DATA_ROLL_PTR_LEN)) {
2398
page_zip_fail(("page_zip_decompress_clust_ext:"
2399
" len[%lu] = %lu\n",
2400
(ulong) i, (ulong) len));
2404
if (rec_offs_nth_extern(offsets, i)) {
2406
page_zip_fail(("page_zip_decompress_clust_ext:"
2407
" DB_TRX_ID at %lu is ext\n",
2412
d_stream->avail_out = dst - d_stream->next_out;
2414
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2418
if (!d_stream->avail_out) {
2423
page_zip_fail(("page_zip_decompress_clust_ext:"
2424
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2429
ut_ad(d_stream->next_out == dst);
2431
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2432
avoid uninitialized bytes in case the record
2433
is affected by page_zip_apply_log(). */
2434
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2436
d_stream->next_out += DATA_TRX_ID_LEN
2437
+ DATA_ROLL_PTR_LEN;
2438
} else if (rec_offs_nth_extern(offsets, i)) {
2439
dst = rec_get_nth_field(rec, offsets, i, &len);
2440
ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
2441
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2443
d_stream->avail_out = dst - d_stream->next_out;
2444
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2448
if (!d_stream->avail_out) {
2453
page_zip_fail(("page_zip_decompress_clust_ext:"
2454
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2459
ut_ad(d_stream->next_out == dst);
2461
/* Clear the BLOB pointer in case
2462
the record will be deleted and the
2463
space will not be reused. Note that
2464
the final initialization of the BLOB
2465
pointers (copying from "externs"
2466
or clearing) will have to take place
2467
only after the page modification log
2468
has been applied. Otherwise, we
2469
could end up with an uninitialized
2470
BLOB pointer when a record is deleted,
2471
reallocated and deleted. */
2472
memset(d_stream->next_out, 0,
2473
BTR_EXTERN_FIELD_REF_SIZE);
2475
+= BTR_EXTERN_FIELD_REF_SIZE;
2482
/**************************************************************************
2483
Compress the records of a leaf node of a clustered index. */
2486
page_zip_decompress_clust(
2487
/*======================*/
2488
/* out: TRUE on success,
2490
page_zip_des_t* page_zip, /* in/out: compressed page */
2491
z_stream* d_stream, /* in/out: compressed page stream */
2492
rec_t** recs, /* in: dense page directory
2493
sorted by address */
2494
ulint n_dense, /* in: size of recs[] */
2495
dict_index_t* index, /* in: the index of the page */
2496
ulint trx_id_col, /* index of the trx_id column */
2497
ulint* offsets, /* in/out: temporary offsets */
2498
mem_heap_t* heap) /* in: temporary memory heap */
2502
ulint heap_status = REC_STATUS_ORDINARY
2503
| PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2504
const byte* storage;
2505
const byte* externs;
2507
ut_a(dict_index_is_clust(index));
2509
/* Subtract the space reserved for uncompressed data. */
2510
d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
2512
+ DATA_ROLL_PTR_LEN);
2514
/* Decompress the records in heap_no order. */
2515
for (slot = 0; slot < n_dense; slot++) {
2516
rec_t* rec = recs[slot];
2518
d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2519
- d_stream->next_out;
2521
ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2522
- PAGE_ZIP_START - PAGE_DIR);
2523
err = inflate(d_stream, Z_SYNC_FLUSH);
2526
/* Apparently, n_dense has grown
2527
since the time the page was last compressed. */
2531
if (UNIV_LIKELY(!d_stream->avail_out)) {
2536
page_zip_fail(("page_zip_decompress_clust:"
2537
" 1 inflate(Z_SYNC_FLUSH)=%s\n",
2542
ut_ad(d_stream->next_out == rec - REC_N_NEW_EXTRA_BYTES);
2543
/* Prepare to decompress the data bytes. */
2544
d_stream->next_out = rec;
2545
/* Set heap_no and the status bits. */
2546
mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2547
heap_status += 1 << REC_HEAP_NO_SHIFT;
2549
/* Read the offsets. The status bits are needed here. */
2550
offsets = rec_get_offsets(rec, index, offsets,
2551
ULINT_UNDEFINED, &heap);
2553
/* This is a leaf page in a clustered index. */
2555
/* Check if there are any externally stored columns.
2556
For each externally stored column, restore the
2557
BTR_EXTERN_FIELD_REF separately. */
2559
if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
2561
(!page_zip_decompress_clust_ext(
2562
d_stream, rec, offsets, trx_id_col))) {
2567
/* Skip trx_id and roll_ptr */
2569
byte* dst = rec_get_nth_field(rec, offsets,
2571
if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2572
+ DATA_ROLL_PTR_LEN)) {
2574
page_zip_fail(("page_zip_decompress_clust:"
2575
" len = %lu\n", (ulong) len));
2579
d_stream->avail_out = dst - d_stream->next_out;
2581
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2585
if (!d_stream->avail_out) {
2590
page_zip_fail(("page_zip_decompress_clust:"
2591
" 2 inflate(Z_SYNC_FLUSH)=%s\n",
2596
ut_ad(d_stream->next_out == dst);
2598
/* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2599
avoid uninitialized bytes in case the record
2600
is affected by page_zip_apply_log(). */
2601
memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2603
d_stream->next_out += DATA_TRX_ID_LEN
2604
+ DATA_ROLL_PTR_LEN;
2607
/* Decompress the last bytes of the record. */
2608
d_stream->avail_out = rec_get_end(rec, offsets)
2609
- d_stream->next_out;
2611
switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2615
if (!d_stream->avail_out) {
2620
page_zip_fail(("page_zip_decompress_clust:"
2621
" 3 inflate(Z_SYNC_FLUSH)=%s\n",
2627
/* Decompress any trailing garbage, in case the last record was
2628
allocated from an originally longer space on the free list. */
2629
d_stream->avail_out = page_header_get_field(page_zip->data,
2631
- page_offset(d_stream->next_out);
2632
if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2633
- PAGE_ZIP_START - PAGE_DIR)) {
2635
page_zip_fail(("page_zip_decompress_clust:"
2636
" avail_out = %u\n",
2637
d_stream->avail_out));
2641
if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2642
page_zip_fail(("page_zip_decompress_clust:"
2643
" inflate(Z_FINISH)=%s\n",
2646
inflateEnd(d_stream);
2650
/* Note that d_stream->avail_out > 0 may hold here
2651
if the modification log is nonempty. */
2654
if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2659
page_t* page = page_align(d_stream->next_out);
2661
/* Clear the unused heap space on the uncompressed page. */
2662
memset(d_stream->next_out, 0,
2663
page_dir_get_nth_slot(page,
2664
page_dir_get_n_slots(page) - 1)
2665
- d_stream->next_out);
2669
page_zip->m_start = PAGE_DATA + d_stream->total_in;
2670
#endif /* UNIV_DEBUG */
2672
/* Apply the modification log. */
2674
const byte* mod_log_ptr;
2675
mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2676
d_stream->avail_in + 1,
2678
trx_id_col, heap_status,
2681
if (UNIV_UNLIKELY(!mod_log_ptr)) {
2684
page_zip->m_end = mod_log_ptr - page_zip->data;
2685
page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2688
if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE, NULL)
2689
+ page_zip->m_end >= page_zip_get_size(page_zip))) {
2691
page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
2692
(ulong) page_zip_get_trailer_len(
2693
page_zip, TRUE, NULL),
2694
(ulong) page_zip->m_end,
2695
(ulong) page_zip_get_size(page_zip)));
2699
storage = page_zip->data + page_zip_get_size(page_zip)
2700
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2702
externs = storage - n_dense
2703
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2705
/* Restore the uncompressed columns in heap_no order. */
2707
for (slot = 0; slot < n_dense; slot++) {
2711
rec_t* rec = recs[slot];
2712
ibool exists = !page_zip_dir_find_free(
2713
page_zip, page_offset(rec));
2714
offsets = rec_get_offsets(rec, index, offsets,
2715
ULINT_UNDEFINED, &heap);
2717
dst = rec_get_nth_field(rec, offsets,
2719
ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2720
storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
2721
memcpy(dst, storage,
2722
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2724
/* Check if there are any externally stored
2725
columns in this record. For each externally
2726
stored column, restore or clear the
2727
BTR_EXTERN_FIELD_REF. */
2728
if (!rec_offs_any_extern(offsets)) {
2732
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2733
if (!rec_offs_nth_extern(offsets, i)) {
2736
dst = rec_get_nth_field(rec, offsets, i, &len);
2738
if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
2739
page_zip_fail(("page_zip_decompress_clust:"
2745
dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2747
if (UNIV_LIKELY(exists)) {
2749
restore the BLOB pointer */
2750
externs -= BTR_EXTERN_FIELD_REF_SIZE;
2753
(externs < page_zip->data
2754
+ page_zip->m_end)) {
2755
page_zip_fail(("page_zip_"
2756
"decompress_clust: "
2758
(const void*) externs,
2766
memcpy(dst, externs,
2767
BTR_EXTERN_FIELD_REF_SIZE);
2769
page_zip->n_blobs++;
2772
clear the BLOB pointer */
2774
BTR_EXTERN_FIELD_REF_SIZE);
2782
/**************************************************************************
2783
Decompress a page. This function should tolerate errors on the compressed
2784
page. Instead of letting assertions fail, it will return FALSE if an
2785
inconsistency is detected. */
2788
page_zip_decompress(
2789
/*================*/
2790
/* out: TRUE on success, FALSE on failure */
2791
page_zip_des_t* page_zip,/* in: data, ssize;
2792
out: m_start, m_end, m_nonempty, n_blobs */
2793
page_t* page) /* out: uncompressed page, may be trashed */
2796
dict_index_t* index = NULL;
2797
rec_t** recs; /* dense page directory, sorted by address */
2798
ulint n_dense;/* number of user records on the page */
2799
ulint trx_id_col = ULINT_UNDEFINED;
2802
ullint usec = ut_time_us(NULL);
2804
ut_ad(page_zip_simple_validate(page_zip));
2805
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
2806
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
2808
/* The dense directory excludes the infimum and supremum records. */
2809
n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
2810
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
2811
>= page_zip_get_size(page_zip))) {
2812
page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
2814
(ulong) page_zip_get_size(page_zip)));
2818
heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
2819
recs = mem_heap_alloc(heap, n_dense * (2 * sizeof *recs));
2821
#ifdef UNIV_ZIP_DEBUG
2822
/* Clear the page. */
2823
memset(page, 0x55, UNIV_PAGE_SIZE);
2824
#endif /* UNIV_ZIP_DEBUG */
2825
UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE);
2826
/* Copy the page header. */
2827
memcpy(page, page_zip->data, PAGE_DATA);
2829
/* Copy the page directory. */
2830
if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
2831
recs + n_dense, n_dense))) {
2833
mem_heap_free(heap);
2837
/* Copy the infimum and supremum records. */
2838
memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
2839
infimum_extra, sizeof infimum_extra);
2840
if (UNIV_UNLIKELY(!page_get_n_recs(page))) {
2841
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2844
rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
2845
page_zip_dir_get(page_zip, 0)
2846
& PAGE_ZIP_DIR_SLOT_MASK);
2848
memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
2849
memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
2850
supremum_extra_data, sizeof supremum_extra_data);
2852
page_zip_set_alloc(&d_stream, heap);
2854
if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
2859
d_stream.next_in = page_zip->data + PAGE_DATA;
2860
/* Subtract the space reserved for
2861
the page header and the end marker of the modification log. */
2862
d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
2864
d_stream.next_out = page + PAGE_ZIP_START;
2865
d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
2867
/* Decode the zlib header and the index information. */
2868
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2870
page_zip_fail(("page_zip_decompress:"
2871
" 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2875
if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
2877
page_zip_fail(("page_zip_decompress:"
2878
" 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
2882
index = page_zip_fields_decode(
2883
page + PAGE_ZIP_START, d_stream.next_out,
2884
page_is_leaf(page) ? &trx_id_col : NULL);
2886
if (UNIV_UNLIKELY(!index)) {
2891
/* Decompress the user records. */
2892
page_zip->n_blobs = 0;
2893
d_stream.next_out = page + PAGE_ZIP_START;
2896
/* Pre-allocate the offsets for rec_get_offsets_reverse(). */
2897
ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
2898
+ dict_index_get_n_fields(index);
2899
offsets = mem_heap_alloc(heap, n * sizeof(ulint));
2903
/* Decompress the records in heap_no order. */
2904
if (!page_is_leaf(page)) {
2905
/* This is a node pointer page. */
2909
(!page_zip_decompress_node_ptrs(page_zip, &d_stream,
2910
recs, n_dense, index,
2915
info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
2916
? REC_INFO_MIN_REC_FLAG : 0;
2918
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
2922
} else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2923
/* This is a leaf page in a secondary index. */
2924
if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
2930
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2933
page_zip_fields_free(index);
2934
mem_heap_free(heap);
2938
/* This is a leaf page in a clustered index. */
2939
if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
2947
if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
2953
ut_a(page_is_comp(page));
2954
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
2956
page_zip_fields_free(index);
2957
mem_heap_free(heap);
2959
page_zip_stat_t* zip_stat
2960
= &page_zip_stat[page_zip->ssize - 1];
2961
zip_stat->decompressed++;
2962
zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
2965
/* Update the stat counter for LRU policy. */
2966
buf_LRU_stat_inc_unzip();
2971
#ifdef UNIV_ZIP_DEBUG
2972
/**************************************************************************
2973
Dump a block of memory on the standard error stream. */
2976
page_zip_hexdump_func(
2977
/*==================*/
2978
const char* name, /* in: name of the data structure */
2979
const void* buf, /* in: data */
2980
ulint size) /* in: length of the data, in bytes */
2982
const byte* s = buf;
2984
const ulint width = 32; /* bytes per line */
2986
fprintf(stderr, "%s:\n", name);
2988
for (addr = 0; addr < size; addr += width) {
2991
fprintf(stderr, "%04lx ", (ulong) addr);
2993
i = ut_min(width, size - addr);
2996
fprintf(stderr, "%02x", *s++);
3003
#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
3005
/* Flag: make page_zip_validate() compare page headers only */
3006
UNIV_INTERN ibool page_zip_validate_header_only = FALSE;
3008
/**************************************************************************
3009
Check that the compressed and decompressed pages match. */
3012
page_zip_validate_low(
3013
/*==================*/
3014
/* out: TRUE if valid, FALSE if not */
3015
const page_zip_des_t* page_zip,/* in: compressed page */
3016
const page_t* page, /* in: uncompressed page */
3017
ibool sloppy) /* in: FALSE=strict,
3018
TRUE=ignore the MIN_REC_FLAG */
3020
page_zip_des_t temp_page_zip;
3021
byte* temp_page_buf;
3025
if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3026
FIL_PAGE_LSN - FIL_PAGE_PREV)
3027
|| memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
3028
|| memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3029
PAGE_DATA - FIL_PAGE_DATA)) {
3030
page_zip_fail(("page_zip_validate: page header\n"));
3031
page_zip_hexdump(page_zip, sizeof *page_zip);
3032
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3033
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3037
ut_a(page_is_comp(page));
3039
if (page_zip_validate_header_only) {
3043
/* page_zip_decompress() expects the uncompressed page to be
3044
UNIV_PAGE_SIZE aligned. */
3045
temp_page_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
3046
temp_page = ut_align(temp_page_buf, UNIV_PAGE_SIZE);
3048
#ifdef UNIV_DEBUG_VALGRIND
3049
/* Get detailed information on the valid bits in case the
3050
UNIV_MEM_ASSERT_RW() checks fail. The v-bits of page[],
3051
page_zip->data[] or page_zip could be viewed at temp_page[] or
3052
temp_page_zip in a debugger when running valgrind --db-attach. */
3053
VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
3054
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3055
VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
3056
UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
3057
VALGRIND_GET_VBITS(page_zip->data, temp_page,
3058
page_zip_get_size(page_zip));
3059
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3060
#endif /* UNIV_DEBUG_VALGRIND */
3062
temp_page_zip = *page_zip;
3063
valid = page_zip_decompress(&temp_page_zip, temp_page);
3065
fputs("page_zip_validate(): failed to decompress\n", stderr);
3068
if (page_zip->n_blobs != temp_page_zip.n_blobs) {
3069
page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
3070
page_zip->n_blobs, temp_page_zip.n_blobs));
3074
if (page_zip->m_start != temp_page_zip.m_start) {
3075
page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
3076
page_zip->m_start, temp_page_zip.m_start));
3079
#endif /* UNIV_DEBUG */
3080
if (page_zip->m_end != temp_page_zip.m_end) {
3081
page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
3082
page_zip->m_end, temp_page_zip.m_end));
3085
if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
3086
page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
3087
page_zip->m_nonempty,
3088
temp_page_zip.m_nonempty));
3091
if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
3092
UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
3094
/* In crash recovery, the "minimum record" flag may be
3095
set incorrectly until the mini-transaction is
3096
committed. Let us tolerate that difference when we
3097
are performing a sloppy validation. */
3100
byte info_bits_diff;
3102
= rec_get_next_offs(page + PAGE_NEW_INFIMUM,
3104
ut_a(offset >= PAGE_NEW_SUPREMUM);
3105
offset -= 5 /* REC_NEW_INFO_BITS */;
3107
info_bits_diff = page[offset] ^ temp_page[offset];
3109
if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
3110
temp_page[offset] = page[offset];
3112
if (!memcmp(page + PAGE_HEADER,
3113
temp_page + PAGE_HEADER,
3114
UNIV_PAGE_SIZE - PAGE_HEADER
3115
- FIL_PAGE_DATA_END)) {
3117
/* Only the minimum record flag
3118
differed. Let us ignore it. */
3119
page_zip_fail(("page_zip_validate: "
3122
"%lu,%lu,0x%02lx)\n",
3123
page_get_space_id(page),
3124
page_get_page_no(page),
3125
(ulong) page[offset]));
3130
page_zip_fail(("page_zip_validate: content\n"));
3136
page_zip_hexdump(page_zip, sizeof *page_zip);
3137
page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3138
page_zip_hexdump(page, UNIV_PAGE_SIZE);
3139
page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
3141
ut_free(temp_page_buf);
3145
/**************************************************************************
3146
Check that the compressed and decompressed pages match. */
3151
/* out: TRUE if valid, FALSE if not */
3152
const page_zip_des_t* page_zip,/* in: compressed page */
3153
const page_t* page) /* in: uncompressed page */
3155
return(page_zip_validate_low(page_zip, page,
3156
recv_recovery_is_on()));
3158
#endif /* UNIV_ZIP_DEBUG */
3163
page_zip_header_cmp(
3164
/*================*/
3166
const page_zip_des_t* page_zip,/* in: compressed page */
3167
const byte* page) /* in: uncompressed page */
3169
ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3170
FIL_PAGE_LSN - FIL_PAGE_PREV));
3171
ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
3173
ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3174
PAGE_DATA - FIL_PAGE_DATA));
3178
#endif /* UNIV_DEBUG */
3180
/**************************************************************************
3181
Write a record on the compressed page that contains externally stored
3182
columns. The data must already have been written to the uncompressed page. */
3185
page_zip_write_rec_ext(
3186
/*===================*/
3187
/* out: end of modification log */
3188
page_zip_des_t* page_zip, /* in/out: compressed page */
3189
const page_t* page, /* in: page containing rec */
3190
const byte* rec, /* in: record being written */
3191
dict_index_t* index, /* in: record descriptor */
3192
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
3193
ulint create, /* in: nonzero=insert, zero=update */
3194
ulint trx_id_col, /* in: position of DB_TRX_ID */
3195
ulint heap_no, /* in: heap number of rec */
3196
byte* storage, /* in: end of dense page directory */
3197
byte* data) /* in: end of modification log */
3199
const byte* start = rec;
3202
byte* externs = storage;
3203
ulint n_ext = rec_offs_n_extern(offsets);
3205
ut_ad(rec_offs_validate(rec, index, offsets));
3206
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3207
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3208
rec_offs_extra_size(offsets));
3210
externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3211
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
3213
/* Note that this will not take into account
3214
the BLOB columns of rec if create==TRUE. */
3215
ut_ad(data + rec_offs_data_size(offsets)
3216
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3217
- n_ext * BTR_EXTERN_FIELD_REF_SIZE
3218
< externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
3221
ulint blob_no = page_zip_get_n_prev_extern(
3222
page_zip, rec, index);
3223
byte* ext_end = externs - page_zip->n_blobs
3224
* BTR_EXTERN_FIELD_REF_SIZE;
3225
ut_ad(blob_no <= page_zip->n_blobs);
3226
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
3229
page_zip->n_blobs += n_ext;
3230
ASSERT_ZERO_BLOB(ext_end - n_ext
3231
* BTR_EXTERN_FIELD_REF_SIZE);
3232
memmove(ext_end - n_ext
3233
* BTR_EXTERN_FIELD_REF_SIZE,
3238
ut_a(blob_no + n_ext <= page_zip->n_blobs);
3241
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3244
if (UNIV_UNLIKELY(i == trx_id_col)) {
3245
ut_ad(!rec_offs_nth_extern(offsets,
3247
ut_ad(!rec_offs_nth_extern(offsets,
3249
/* Locate trx_id and roll_ptr. */
3250
src = rec_get_nth_field(rec, offsets,
3252
ut_ad(len == DATA_TRX_ID_LEN);
3253
ut_ad(src + DATA_TRX_ID_LEN
3254
== rec_get_nth_field(
3257
ut_ad(len == DATA_ROLL_PTR_LEN);
3259
/* Log the preceding fields. */
3260
ASSERT_ZERO(data, src - start);
3261
memcpy(data, start, src - start);
3262
data += src - start;
3263
start = src + (DATA_TRX_ID_LEN
3264
+ DATA_ROLL_PTR_LEN);
3266
/* Store trx_id and roll_ptr. */
3267
memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3269
src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3270
i++; /* skip also roll_ptr */
3271
} else if (rec_offs_nth_extern(offsets, i)) {
3272
src = rec_get_nth_field(rec, offsets,
3275
ut_ad(dict_index_is_clust(index));
3277
>= BTR_EXTERN_FIELD_REF_SIZE);
3278
src += len - BTR_EXTERN_FIELD_REF_SIZE;
3280
ASSERT_ZERO(data, src - start);
3281
memcpy(data, start, src - start);
3282
data += src - start;
3283
start = src + BTR_EXTERN_FIELD_REF_SIZE;
3285
/* Store the BLOB pointer. */
3286
externs -= BTR_EXTERN_FIELD_REF_SIZE;
3287
ut_ad(data < externs);
3288
memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
3292
/* Log the last bytes of the record. */
3293
len = rec_offs_data_size(offsets) - (start - rec);
3295
ASSERT_ZERO(data, len);
3296
memcpy(data, start, len);
3302
/**************************************************************************
3303
Write an entire record on the compressed page. The data must already
3304
have been written to the uncompressed page. */
3309
page_zip_des_t* page_zip,/* in/out: compressed page */
3310
const byte* rec, /* in: record being written */
3311
dict_index_t* index, /* in: the index the record belongs to */
3312
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
3313
ulint create) /* in: nonzero=insert, zero=update */
3321
ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3322
ut_ad(page_zip_simple_validate(page_zip));
3323
ut_ad(page_zip_get_size(page_zip)
3324
> PAGE_DATA + page_zip_dir_size(page_zip));
3325
ut_ad(rec_offs_comp(offsets));
3326
ut_ad(rec_offs_validate(rec, index, offsets));
3328
ut_ad(page_zip->m_start >= PAGE_DATA);
3330
page = page_align(rec);
3332
ut_ad(page_zip_header_cmp(page_zip, page));
3333
ut_ad(page_simple_validate_new((page_t*) page));
3335
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3336
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3337
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3338
rec_offs_extra_size(offsets));
3340
slot = page_zip_dir_find(page_zip, page_offset(rec));
3342
/* Copy the delete mark. */
3343
if (rec_get_deleted_flag(rec, TRUE)) {
3344
*slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
3346
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3349
ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
3350
ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
3351
- PAGE_DIR - PAGE_DIR_SLOT_SIZE
3352
* page_dir_get_n_slots(page));
3354
heap_no = rec_get_heap_no_new(rec);
3355
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
3356
ut_ad(heap_no < page_dir_get_n_heap(page));
3358
/* Append to the modification log. */
3359
data = page_zip->data + page_zip->m_end;
3362
/* Identify the record by writing its heap number - 1.
3363
0 is reserved to indicate the end of the modification log. */
3365
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3366
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3369
*data++ = (byte) ((heap_no - 1) << 1);
3373
const byte* start = rec - rec_offs_extra_size(offsets);
3374
const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
3376
/* Write the extra bytes backwards, so that
3377
rec_offs_extra_size() can be easily computed in
3378
page_zip_apply_log() by invoking
3379
rec_get_offsets_reverse(). */
3381
while (b != start) {
3387
/* Write the data bytes. Store the uncompressed bytes separately. */
3388
storage = page_zip->data + page_zip_get_size(page_zip)
3389
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3390
* PAGE_ZIP_DIR_SLOT_SIZE;
3392
if (page_is_leaf(page)) {
3395
if (dict_index_is_clust(index)) {
3398
trx_id_col = dict_index_get_sys_col_pos(index,
3400
ut_ad(trx_id_col != ULINT_UNDEFINED);
3402
/* Store separately trx_id, roll_ptr and
3403
the BTR_EXTERN_FIELD_REF of each BLOB column. */
3404
if (rec_offs_any_extern(offsets)) {
3405
data = page_zip_write_rec_ext(
3407
rec, index, offsets, create,
3408
trx_id_col, heap_no, storage, data);
3410
/* Locate trx_id and roll_ptr. */
3412
= rec_get_nth_field(rec, offsets,
3414
ut_ad(len == DATA_TRX_ID_LEN);
3415
ut_ad(src + DATA_TRX_ID_LEN
3416
== rec_get_nth_field(
3418
trx_id_col + 1, &len));
3419
ut_ad(len == DATA_ROLL_PTR_LEN);
3421
/* Log the preceding fields. */
3422
ASSERT_ZERO(data, src - rec);
3423
memcpy(data, rec, src - rec);
3426
/* Store trx_id and roll_ptr. */
3428
- (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3431
DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3433
src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
3435
/* Log the last bytes of the record. */
3436
len = rec_offs_data_size(offsets)
3439
ASSERT_ZERO(data, len);
3440
memcpy(data, src, len);
3444
/* Leaf page of a secondary index:
3445
no externally stored columns */
3446
ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
3447
== ULINT_UNDEFINED);
3448
ut_ad(!rec_offs_any_extern(offsets));
3450
/* Log the entire record. */
3451
len = rec_offs_data_size(offsets);
3453
ASSERT_ZERO(data, len);
3454
memcpy(data, rec, len);
3458
/* This is a node pointer page. */
3461
/* Non-leaf nodes should not have any externally
3463
ut_ad(!rec_offs_any_extern(offsets));
3465
/* Copy the data bytes, except node_ptr. */
3466
len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
3467
ut_ad(data + len < storage - REC_NODE_PTR_SIZE
3468
* (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
3469
ASSERT_ZERO(data, len);
3470
memcpy(data, rec, len);
3473
/* Copy the node pointer to the uncompressed area. */
3474
memcpy(storage - REC_NODE_PTR_SIZE
3481
ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
3482
page_zip->m_end = data - page_zip->data;
3483
page_zip->m_nonempty = TRUE;
3485
#ifdef UNIV_ZIP_DEBUG
3486
ut_a(page_zip_validate(page_zip, page_align(rec)));
3487
#endif /* UNIV_ZIP_DEBUG */
3490
/***************************************************************
3491
Parses a log record of writing a BLOB pointer of a record. */
3494
page_zip_parse_write_blob_ptr(
3495
/*==========================*/
3496
/* out: end of log record or NULL */
3497
byte* ptr, /* in: redo log buffer */
3498
byte* end_ptr,/* in: redo log buffer end */
3499
page_t* page, /* in/out: uncompressed page */
3500
page_zip_des_t* page_zip)/* in/out: compressed page */
3505
ut_ad(!page == !page_zip);
3508
(end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
3513
offset = mach_read_from_2(ptr);
3514
z_offset = mach_read_from_2(ptr + 2);
3516
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3517
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3518
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3520
recv_sys->found_corrupt_log = TRUE;
3526
if (UNIV_UNLIKELY(!page_zip)
3527
|| UNIV_UNLIKELY(!page_is_leaf(page))) {
3532
#ifdef UNIV_ZIP_DEBUG
3533
ut_a(page_zip_validate(page_zip, page));
3534
#endif /* UNIV_ZIP_DEBUG */
3536
memcpy(page + offset,
3537
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3538
memcpy(page_zip->data + z_offset,
3539
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3541
#ifdef UNIV_ZIP_DEBUG
3542
ut_a(page_zip_validate(page_zip, page));
3543
#endif /* UNIV_ZIP_DEBUG */
3546
return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
3549
/**************************************************************************
3550
Write a BLOB pointer of a record on the leaf page of a clustered index.
3551
The information must already have been updated on the uncompressed page. */
3554
page_zip_write_blob_ptr(
3555
/*====================*/
3556
page_zip_des_t* page_zip,/* in/out: compressed page */
3557
const byte* rec, /* in/out: record whose data is being
3559
dict_index_t* index, /* in: index of the page */
3560
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
3561
ulint n, /* in: column index */
3562
mtr_t* mtr) /* in: mini-transaction handle,
3563
or NULL if no logging is needed */
3567
const page_t* page = page_align(rec);
3571
ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3572
ut_ad(page_simple_validate_new((page_t*) page));
3573
ut_ad(page_zip_simple_validate(page_zip));
3574
ut_ad(page_zip_get_size(page_zip)
3575
> PAGE_DATA + page_zip_dir_size(page_zip));
3576
ut_ad(rec_offs_comp(offsets));
3577
ut_ad(rec_offs_validate(rec, NULL, offsets));
3578
ut_ad(rec_offs_any_extern(offsets));
3579
ut_ad(rec_offs_nth_extern(offsets, n));
3581
ut_ad(page_zip->m_start >= PAGE_DATA);
3582
ut_ad(page_zip_header_cmp(page_zip, page));
3584
ut_ad(page_is_leaf(page));
3585
ut_ad(dict_index_is_clust(index));
3587
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3588
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3589
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3590
rec_offs_extra_size(offsets));
3592
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
3593
+ rec_get_n_extern_new(rec, index, n);
3594
ut_a(blob_no < page_zip->n_blobs);
3596
externs = page_zip->data + page_zip_get_size(page_zip)
3597
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3598
* (PAGE_ZIP_DIR_SLOT_SIZE
3599
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3601
field = rec_get_nth_field(rec, offsets, n, &len);
3603
externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
3604
field += len - BTR_EXTERN_FIELD_REF_SIZE;
3606
memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
3608
#ifdef UNIV_ZIP_DEBUG
3609
ut_a(page_zip_validate(page_zip, page));
3610
#endif /* UNIV_ZIP_DEBUG */
3613
byte* log_ptr = mlog_open(
3614
mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
3615
if (UNIV_UNLIKELY(!log_ptr)) {
3619
log_ptr = mlog_write_initial_log_record_fast(
3620
(byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
3621
mach_write_to_2(log_ptr, page_offset(field));
3623
mach_write_to_2(log_ptr, externs - page_zip->data);
3625
memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
3626
log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
3627
mlog_close(mtr, log_ptr);
3631
/***************************************************************
3632
Parses a log record of writing the node pointer of a record. */
3635
page_zip_parse_write_node_ptr(
3636
/*==========================*/
3637
/* out: end of log record or NULL */
3638
byte* ptr, /* in: redo log buffer */
3639
byte* end_ptr,/* in: redo log buffer end */
3640
page_t* page, /* in/out: uncompressed page */
3641
page_zip_des_t* page_zip)/* in/out: compressed page */
3646
ut_ad(!page == !page_zip);
3648
if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
3653
offset = mach_read_from_2(ptr);
3654
z_offset = mach_read_from_2(ptr + 2);
3656
if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3657
|| UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3658
|| UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3660
recv_sys->found_corrupt_log = TRUE;
3671
if (UNIV_UNLIKELY(!page_zip)
3672
|| UNIV_UNLIKELY(page_is_leaf(page))) {
3677
#ifdef UNIV_ZIP_DEBUG
3678
ut_a(page_zip_validate(page_zip, page));
3679
#endif /* UNIV_ZIP_DEBUG */
3681
field = page + offset;
3682
storage = page_zip->data + z_offset;
3684
storage_end = page_zip->data + page_zip_get_size(page_zip)
3685
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3686
* PAGE_ZIP_DIR_SLOT_SIZE;
3688
heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
3690
if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
3691
|| UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
3692
|| UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
3697
memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
3698
memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
3700
#ifdef UNIV_ZIP_DEBUG
3701
ut_a(page_zip_validate(page_zip, page));
3702
#endif /* UNIV_ZIP_DEBUG */
3705
return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
3708
/**************************************************************************
3709
Write the node pointer of a record on a non-leaf compressed page. */
3712
page_zip_write_node_ptr(
3713
/*====================*/
3714
page_zip_des_t* page_zip,/* in/out: compressed page */
3715
byte* rec, /* in/out: record */
3716
ulint size, /* in: data size of rec */
3717
ulint ptr, /* in: node pointer */
3718
mtr_t* mtr) /* in: mini-transaction, or NULL */
3722
page_t* page = page_align(rec);
3724
ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3725
ut_ad(page_simple_validate_new(page));
3726
ut_ad(page_zip_simple_validate(page_zip));
3727
ut_ad(page_zip_get_size(page_zip)
3728
> PAGE_DATA + page_zip_dir_size(page_zip));
3729
ut_ad(page_rec_is_comp(rec));
3731
ut_ad(page_zip->m_start >= PAGE_DATA);
3732
ut_ad(page_zip_header_cmp(page_zip, page));
3734
ut_ad(!page_is_leaf(page));
3736
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3737
UNIV_MEM_ASSERT_RW(rec, size);
3739
storage = page_zip->data + page_zip_get_size(page_zip)
3740
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3741
* PAGE_ZIP_DIR_SLOT_SIZE
3742
- (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
3743
field = rec + size - REC_NODE_PTR_SIZE;
3745
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3746
ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
3747
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3748
#if REC_NODE_PTR_SIZE != 4
3749
# error "REC_NODE_PTR_SIZE != 4"
3751
mach_write_to_4(field, ptr);
3752
memcpy(storage, field, REC_NODE_PTR_SIZE);
3755
byte* log_ptr = mlog_open(mtr,
3756
11 + 2 + 2 + REC_NODE_PTR_SIZE);
3757
if (UNIV_UNLIKELY(!log_ptr)) {
3761
log_ptr = mlog_write_initial_log_record_fast(
3762
field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
3763
mach_write_to_2(log_ptr, page_offset(field));
3765
mach_write_to_2(log_ptr, storage - page_zip->data);
3767
memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
3768
log_ptr += REC_NODE_PTR_SIZE;
3769
mlog_close(mtr, log_ptr);
3773
/**************************************************************************
3774
Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
3777
page_zip_write_trx_id_and_roll_ptr(
3778
/*===============================*/
3779
page_zip_des_t* page_zip,/* in/out: compressed page */
3780
byte* rec, /* in/out: record */
3781
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
3782
ulint trx_id_col,/* in: column number of TRX_ID in rec */
3783
dulint trx_id, /* in: transaction identifier */
3784
dulint roll_ptr)/* in: roll_ptr */
3788
page_t* page = page_align(rec);
3791
ut_ad(buf_frame_get_page_zip(rec) == page_zip);
3792
ut_ad(page_simple_validate_new(page));
3793
ut_ad(page_zip_simple_validate(page_zip));
3794
ut_ad(page_zip_get_size(page_zip)
3795
> PAGE_DATA + page_zip_dir_size(page_zip));
3796
ut_ad(rec_offs_validate(rec, NULL, offsets));
3797
ut_ad(rec_offs_comp(offsets));
3799
ut_ad(page_zip->m_start >= PAGE_DATA);
3800
ut_ad(page_zip_header_cmp(page_zip, page));
3802
ut_ad(page_is_leaf(page));
3804
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3806
storage = page_zip->data + page_zip_get_size(page_zip)
3807
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3808
* PAGE_ZIP_DIR_SLOT_SIZE
3809
- (rec_get_heap_no_new(rec) - 1)
3810
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3812
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
3813
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
3815
field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
3816
ut_ad(len == DATA_TRX_ID_LEN);
3817
ut_ad(field + DATA_TRX_ID_LEN
3818
== rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
3819
ut_ad(len == DATA_ROLL_PTR_LEN);
3820
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
3821
ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
3822
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3823
#if DATA_TRX_ID_LEN != 6
3824
# error "DATA_TRX_ID_LEN != 6"
3826
mach_write_to_6(field, trx_id);
3827
#if DATA_ROLL_PTR_LEN != 7
3828
# error "DATA_ROLL_PTR_LEN != 7"
3830
mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
3831
memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3833
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3834
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3835
rec_offs_extra_size(offsets));
3836
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3839
#ifdef UNIV_ZIP_DEBUG
3840
/* Set this variable in a debugger to disable page_zip_clear_rec().
3841
The only observable effect should be the compression ratio due to
3842
deleted records not being zeroed out. In rare cases, there can be
3843
page_zip_validate() failures on the node_ptr, trx_id and roll_ptr
3844
columns if the space is reallocated for a smaller record. */
3845
UNIV_INTERN ibool page_zip_clear_rec_disable;
3846
#endif /* UNIV_ZIP_DEBUG */
3848
/**************************************************************************
3849
Clear an area on the uncompressed and compressed page, if possible. */
3854
page_zip_des_t* page_zip,/* in/out: compressed page */
3855
byte* rec, /* in: record to clear */
3856
dict_index_t* index, /* in: index of rec */
3857
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
3860
page_t* page = page_align(rec);
3861
/* page_zip_validate() would fail here if a record
3862
containing externally stored columns is being deleted. */
3863
ut_ad(rec_offs_validate(rec, index, offsets));
3864
ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
3865
ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
3866
ut_ad(page_zip_header_cmp(page_zip, page));
3868
heap_no = rec_get_heap_no_new(rec);
3869
ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
3871
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3872
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3873
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3874
rec_offs_extra_size(offsets));
3877
#ifdef UNIV_ZIP_DEBUG
3878
!page_zip_clear_rec_disable &&
3879
#endif /* UNIV_ZIP_DEBUG */
3881
+ 1 + ((heap_no - 1) >= 64)/* size of the log entry */
3882
+ page_zip_get_trailer_len(page_zip,
3883
dict_index_is_clust(index), NULL)
3884
< page_zip_get_size(page_zip)) {
3887
/* Clear only the data bytes, because the allocator and
3888
the decompressor depend on the extra bytes. */
3889
memset(rec, 0, rec_offs_data_size(offsets));
3891
if (!page_is_leaf(page)) {
3892
/* Clear node_ptr on the compressed page. */
3893
byte* storage = page_zip->data
3894
+ page_zip_get_size(page_zip)
3895
- (page_dir_get_n_heap(page)
3896
- PAGE_HEAP_NO_USER_LOW)
3897
* PAGE_ZIP_DIR_SLOT_SIZE;
3899
memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
3900
0, REC_NODE_PTR_SIZE);
3901
} else if (dict_index_is_clust(index)) {
3902
/* Clear trx_id and roll_ptr on the compressed page. */
3903
byte* storage = page_zip->data
3904
+ page_zip_get_size(page_zip)
3905
- (page_dir_get_n_heap(page)
3906
- PAGE_HEAP_NO_USER_LOW)
3907
* PAGE_ZIP_DIR_SLOT_SIZE;
3909
memset(storage - (heap_no - 1)
3910
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
3911
0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3914
/* Log that the data was zeroed out. */
3915
data = page_zip->data + page_zip->m_end;
3917
if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3918
*data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3921
*data++ = (byte) ((heap_no - 1) << 1 | 1);
3923
ut_ad((ulint) (data - page_zip->data)
3924
< page_zip_get_size(page_zip));
3925
page_zip->m_end = data - page_zip->data;
3926
page_zip->m_nonempty = TRUE;
3927
} else if (page_is_leaf(page) && dict_index_is_clust(index)) {
3928
/* Do not clear the record, because there is not enough space
3929
to log the operation. */
3931
if (rec_offs_any_extern(offsets)) {
3934
for (i = rec_offs_n_fields(offsets); i--; ) {
3935
/* Clear all BLOB pointers in order to make
3936
page_zip_validate() pass. */
3937
if (rec_offs_nth_extern(offsets, i)) {
3939
byte* field = rec_get_nth_field(
3940
rec, offsets, i, &len);
3942
- BTR_EXTERN_FIELD_REF_SIZE,
3943
0, BTR_EXTERN_FIELD_REF_SIZE);
3949
#ifdef UNIV_ZIP_DEBUG
3950
ut_a(page_zip_validate(page_zip, page));
3951
#endif /* UNIV_ZIP_DEBUG */
3954
/**************************************************************************
3955
Write the "deleted" flag of a record on a compressed page. The flag must
3956
already have been written on the uncompressed page. */
3959
page_zip_rec_set_deleted(
3960
/*=====================*/
3961
page_zip_des_t* page_zip,/* in/out: compressed page */
3962
const byte* rec, /* in: record on the uncompressed page */
3963
ulint flag) /* in: the deleted flag (nonzero=TRUE) */
3965
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
3967
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3969
*slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
3971
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3973
#ifdef UNIV_ZIP_DEBUG
3974
ut_a(page_zip_validate(page_zip, page_align(rec)));
3975
#endif /* UNIV_ZIP_DEBUG */
3978
/**************************************************************************
3979
Write the "owned" flag of a record on a compressed page. The n_owned field
3980
must already have been written on the uncompressed page. */
3983
page_zip_rec_set_owned(
3984
/*===================*/
3985
page_zip_des_t* page_zip,/* in/out: compressed page */
3986
const byte* rec, /* in: record on the uncompressed page */
3987
ulint flag) /* in: the owned flag (nonzero=TRUE) */
3989
byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
3991
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3993
*slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
3995
*slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
3999
/**************************************************************************
4000
Insert a record to the dense page directory. */
4003
page_zip_dir_insert(
4004
/*================*/
4005
page_zip_des_t* page_zip,/* in/out: compressed page */
4006
const byte* prev_rec,/* in: record after which to insert */
4007
const byte* free_rec,/* in: record from which rec was
4008
allocated, or NULL */
4009
byte* rec) /* in: record to insert */
4015
ut_ad(prev_rec != rec);
4016
ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
4017
ut_ad(page_zip_simple_validate(page_zip));
4019
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4021
if (page_rec_is_infimum(prev_rec)) {
4022
/* Use the first slot. */
4023
slot_rec = page_zip->data + page_zip_get_size(page_zip);
4025
byte* end = page_zip->data + page_zip_get_size(page_zip);
4026
byte* start = end - page_zip_dir_user_size(page_zip);
4028
if (UNIV_LIKELY(!free_rec)) {
4029
/* PAGE_N_RECS was already incremented
4030
in page_cur_insert_rec_zip(), but the
4031
dense directory slot at that position
4032
contains garbage. Skip it. */
4033
start += PAGE_ZIP_DIR_SLOT_SIZE;
4036
slot_rec = page_zip_dir_find_low(start, end,
4037
page_offset(prev_rec));
4041
/* Read the old n_dense (n_heap may have been incremented). */
4042
n_dense = page_dir_get_n_heap(page_zip->data)
4043
- (PAGE_HEAP_NO_USER_LOW + 1);
4045
if (UNIV_LIKELY_NULL(free_rec)) {
4046
/* The record was allocated from the free list.
4047
Shift the dense directory only up to that slot.
4048
Note that in this case, n_dense is actually
4049
off by one, because page_cur_insert_rec_zip()
4050
did not increment n_heap. */
4051
ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
4052
+ PAGE_HEAP_NO_USER_LOW);
4053
ut_ad(rec >= free_rec);
4054
slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
4056
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4058
/* The record was allocated from the heap.
4059
Shift the entire dense directory. */
4060
ut_ad(rec_get_heap_no_new(rec) == n_dense
4061
+ PAGE_HEAP_NO_USER_LOW);
4063
/* Shift to the end of the dense page directory. */
4064
slot_free = page_zip->data + page_zip_get_size(page_zip)
4065
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4068
/* Shift the dense directory to allocate place for rec. */
4069
memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
4070
slot_rec - slot_free);
4072
/* Write the entry for the inserted record.
4073
The "owned" and "deleted" flags must be zero. */
4074
mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
4077
/**************************************************************************
4078
Shift the dense page directory and the array of BLOB pointers
4079
when a record is deleted. */
4082
page_zip_dir_delete(
4083
/*================*/
4084
page_zip_des_t* page_zip,/* in/out: compressed page */
4085
byte* rec, /* in: record to delete */
4086
dict_index_t* index, /* in: index of rec */
4087
const ulint* offsets,/* in: rec_get_offsets(rec) */
4088
const byte* free) /* in: previous start of the free list */
4093
page_t* page = page_align(rec);
4095
ut_ad(rec_offs_validate(rec, index, offsets));
4096
ut_ad(rec_offs_comp(offsets));
4098
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4099
UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4100
UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4101
rec_offs_extra_size(offsets));
4103
slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
4107
/* This could not be done before page_zip_dir_find(). */
4108
page_header_set_field(page, page_zip, PAGE_N_RECS,
4109
(ulint)(page_get_n_recs(page) - 1));
4111
if (UNIV_UNLIKELY(!free)) {
4112
/* Make the last slot the start of the free list. */
4113
slot_free = page_zip->data + page_zip_get_size(page_zip)
4114
- PAGE_ZIP_DIR_SLOT_SIZE
4115
* (page_dir_get_n_heap(page_zip->data)
4116
- PAGE_HEAP_NO_USER_LOW);
4118
slot_free = page_zip_dir_find_free(page_zip,
4120
ut_a(slot_free < slot_rec);
4121
/* Grow the free list by one slot by moving the start. */
4122
slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4125
if (UNIV_LIKELY(slot_rec > slot_free)) {
4126
memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
4128
slot_rec - slot_free);
4131
/* Write the entry for the deleted record.
4132
The "owned" and "deleted" flags will be cleared. */
4133
mach_write_to_2(slot_free, page_offset(rec));
4135
if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
4136
ut_ad(!rec_offs_any_extern(offsets));
4140
n_ext = rec_offs_n_extern(offsets);
4141
if (UNIV_UNLIKELY(n_ext)) {
4142
/* Shift and zero fill the array of BLOB pointers. */
4147
blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
4148
ut_a(blob_no + n_ext <= page_zip->n_blobs);
4150
externs = page_zip->data + page_zip_get_size(page_zip)
4151
- (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
4152
* (PAGE_ZIP_DIR_SLOT_SIZE
4153
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4155
ext_end = externs - page_zip->n_blobs
4156
* BTR_EXTERN_FIELD_REF_SIZE;
4157
externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
4159
page_zip->n_blobs -= n_ext;
4160
/* Shift and zero fill the array. */
4161
memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
4162
(page_zip->n_blobs - blob_no)
4163
* BTR_EXTERN_FIELD_REF_SIZE);
4164
memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
4168
/* The compression algorithm expects info_bits and n_owned
4169
to be 0 for deleted records. */
4170
rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
4172
page_zip_clear_rec(page_zip, rec, index, offsets);
4175
/**************************************************************************
4176
Add a slot to the dense page directory. */
4179
page_zip_dir_add_slot(
4180
/*==================*/
4181
page_zip_des_t* page_zip, /* in/out: compressed page */
4182
ulint is_clustered) /* in: nonzero for clustered index,
4189
ut_ad(page_is_comp(page_zip->data));
4190
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4192
/* Read the old n_dense (n_heap has already been incremented). */
4193
n_dense = page_dir_get_n_heap(page_zip->data)
4194
- (PAGE_HEAP_NO_USER_LOW + 1);
4196
dir = page_zip->data + page_zip_get_size(page_zip)
4197
- PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4199
if (!page_is_leaf(page_zip->data)) {
4200
ut_ad(!page_zip->n_blobs);
4201
stored = dir - n_dense * REC_NODE_PTR_SIZE;
4202
} else if (UNIV_UNLIKELY(is_clustered)) {
4203
/* Move the BLOB pointer array backwards to make space for the
4204
roll_ptr and trx_id columns and the dense directory slot. */
4207
stored = dir - n_dense
4208
* (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4210
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4212
- (PAGE_ZIP_DIR_SLOT_SIZE
4213
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4214
PAGE_ZIP_DIR_SLOT_SIZE
4215
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4216
memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
4217
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4218
externs, stored - externs);
4221
- page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4222
ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
4223
PAGE_ZIP_DIR_SLOT_SIZE);
4226
/* Move the uncompressed area backwards to make space
4227
for one directory slot. */
4228
memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
4231
/***************************************************************
4232
Parses a log record of writing to the header of a page. */
4235
page_zip_parse_write_header(
4236
/*========================*/
4237
/* out: end of log record or NULL */
4238
byte* ptr, /* in: redo log buffer */
4239
byte* end_ptr,/* in: redo log buffer end */
4240
page_t* page, /* in/out: uncompressed page */
4241
page_zip_des_t* page_zip)/* in/out: compressed page */
4246
ut_ad(ptr && end_ptr);
4247
ut_ad(!page == !page_zip);
4249
if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
4254
offset = (ulint) *ptr++;
4255
len = (ulint) *ptr++;
4257
if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
4259
recv_sys->found_corrupt_log = TRUE;
4264
if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
4270
if (UNIV_UNLIKELY(!page_zip)) {
4274
#ifdef UNIV_ZIP_DEBUG
4275
ut_a(page_zip_validate(page_zip, page));
4276
#endif /* UNIV_ZIP_DEBUG */
4278
memcpy(page + offset, ptr, len);
4279
memcpy(page_zip->data + offset, ptr, len);
4281
#ifdef UNIV_ZIP_DEBUG
4282
ut_a(page_zip_validate(page_zip, page));
4283
#endif /* UNIV_ZIP_DEBUG */
4289
/**************************************************************************
4290
Write a log record of writing to the uncompressed header portion of a page. */
4293
page_zip_write_header_log(
4294
/*======================*/
4295
const byte* data, /* in: data on the uncompressed page */
4296
ulint length, /* in: length of the data */
4297
mtr_t* mtr) /* in: mini-transaction */
4299
byte* log_ptr = mlog_open(mtr, 11 + 1 + 1);
4300
ulint offset = page_offset(data);
4302
ut_ad(offset < PAGE_DATA);
4303
ut_ad(offset + length < PAGE_DATA);
4305
# error "PAGE_DATA > 255"
4307
ut_ad(length < 256);
4309
/* If no logging is requested, we may return now */
4310
if (UNIV_UNLIKELY(!log_ptr)) {
4315
log_ptr = mlog_write_initial_log_record_fast(
4316
(byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
4317
*log_ptr++ = (byte) offset;
4318
*log_ptr++ = (byte) length;
4319
mlog_close(mtr, log_ptr);
4321
mlog_catenate_string(mtr, data, length);
4324
/**************************************************************************
4325
Reorganize and compress a page. This is a low-level operation for
4326
compressed pages, to be used when page_zip_compress() fails.
4327
On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
4328
The function btr_page_reorganize() should be preferred whenever possible.
4329
IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
4330
non-clustered index, the caller must update the insert buffer free
4331
bits in the same mini-transaction in such a way that the modification
4332
will be redo-logged. */
4335
page_zip_reorganize(
4336
/*================*/
4337
/* out: TRUE on success, FALSE on failure;
4338
page and page_zip will be left intact
4340
buf_block_t* block, /* in/out: page with compressed page;
4341
on the compressed page, in: size;
4343
m_start, m_end, m_nonempty */
4344
dict_index_t* index, /* in: index of the B-tree node */
4345
mtr_t* mtr) /* in: mini-transaction */
4347
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
4348
page_t* page = buf_block_get_frame(block);
4349
buf_block_t* temp_block;
4353
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4354
ut_ad(page_is_comp(page));
4355
/* Note that page_zip_validate(page_zip, page) may fail here. */
4356
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
4357
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4359
/* Disable logging */
4360
log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
4362
temp_block = buf_block_alloc(0);
4363
temp_page = temp_block->frame;
4365
btr_search_drop_page_hash_index(block);
4367
/* Copy the old page to temporary space */
4368
buf_frame_copy(temp_page, page);
4370
/* Recreate the page: note that global data on page (possible
4371
segment headers, next page-field, etc.) is preserved intact */
4373
page_create(block, mtr, TRUE);
4374
block->check_index_page_at_flush = TRUE;
4376
/* Copy the records from the temporary space to the recreated page;
4377
do not copy the lock bits yet */
4379
page_copy_rec_list_end_no_locks(block, temp_block,
4380
page_get_infimum_rec(temp_page),
4382
/* Copy max trx id to recreated page */
4383
page_set_max_trx_id(block, NULL, page_get_max_trx_id(temp_page));
4385
/* Restore logging. */
4386
mtr_set_log_mode(mtr, log_mode);
4388
if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
4390
/* Restore the old page and exit. */
4391
buf_frame_copy(page, temp_page);
4393
buf_block_free(temp_block);
4397
lock_move_reorganize_page(block, temp_block);
4399
buf_block_free(temp_block);
4403
/**************************************************************************
4404
Copy the records of a page byte for byte. Do not copy the page header
4405
or trailer, except those B-tree header fields that are directly
4406
related to the storage of records. Also copy PAGE_MAX_TRX_ID.
4407
NOTE: The caller must update the lock table and the adaptive hash index. */
4412
page_zip_des_t* page_zip, /* out: copy of src_zip
4413
(n_blobs, m_start, m_end,
4414
m_nonempty, data[0..size-1]) */
4415
page_t* page, /* out: copy of src */
4416
const page_zip_des_t* src_zip, /* in: compressed page */
4417
const page_t* src, /* in: page */
4418
dict_index_t* index, /* in: index of the B-tree */
4419
mtr_t* mtr) /* in: mini-transaction */
4421
ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
4422
ut_ad(mtr_memo_contains_page(mtr, (page_t*) src, MTR_MEMO_PAGE_X_FIX));
4423
#ifdef UNIV_ZIP_DEBUG
4424
/* The B-tree operations that call this function may set
4425
FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
4426
mismatch. A strict page_zip_validate() will be executed later
4427
during the B-tree operations. */
4428
ut_a(page_zip_validate_low(src_zip, src, TRUE));
4429
#endif /* UNIV_ZIP_DEBUG */
4430
ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
4431
if (UNIV_UNLIKELY(src_zip->n_blobs)) {
4432
ut_a(page_is_leaf(src));
4433
ut_a(dict_index_is_clust(index));
4436
UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
4437
UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
4438
UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
4439
UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
4441
/* Copy those B-tree page header fields that are related to
4442
the records stored in the page. Also copy the field
4443
PAGE_MAX_TRX_ID. Skip the rest of the page header and
4444
trailer. On the compressed page, there is no trailer. */
4445
#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
4446
# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
4448
memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
4449
PAGE_HEADER_PRIV_END);
4450
memcpy(PAGE_DATA + page, PAGE_DATA + src,
4451
UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
4452
memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
4453
PAGE_HEADER_PRIV_END);
4454
memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
4455
page_zip_get_size(page_zip) - PAGE_DATA);
4457
/* Copy all fields of src_zip to page_zip, except the pointer
4458
to the compressed data page. */
4460
page_zip_t* data = page_zip->data;
4461
memcpy(page_zip, src_zip, sizeof *page_zip);
4462
page_zip->data = data;
4464
ut_ad(page_zip_get_trailer_len(page_zip,
4465
dict_index_is_clust(index), NULL)
4466
+ page_zip->m_end < page_zip_get_size(page_zip));
4468
if (!page_is_leaf(src)
4469
&& UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
4470
&& UNIV_LIKELY(mach_read_from_4(page
4471
+ FIL_PAGE_PREV) != FIL_NULL)) {
4472
/* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
4473
ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
4475
if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
4476
rec_t* rec = page + offs;
4477
ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
4478
& REC_INFO_MIN_REC_FLAG);
4479
rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
4483
#ifdef UNIV_ZIP_DEBUG
4484
ut_a(page_zip_validate(page_zip, page));
4485
#endif /* UNIV_ZIP_DEBUG */
4487
page_zip_compress_write_log(page_zip, page, index, mtr);
4490
/**************************************************************************
4491
Parses a log record of compressing an index page. */
4494
page_zip_parse_compress(
4495
/*====================*/
4496
/* out: end of log record or NULL */
4497
byte* ptr, /* in: buffer */
4498
byte* end_ptr,/* in: buffer end */
4499
page_t* page, /* out: uncompressed page */
4500
page_zip_des_t* page_zip)/* out: compressed page */
4505
ut_ad(ptr && end_ptr);
4506
ut_ad(!page == !page_zip);
4508
if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
4513
size = mach_read_from_2(ptr);
4515
trailer_size = mach_read_from_2(ptr);
4518
if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
4524
if (UNIV_UNLIKELY(!page_zip)
4525
|| UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
4527
recv_sys->found_corrupt_log = TRUE;
4532
memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
4533
memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
4534
memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
4535
memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
4536
page_zip_get_size(page_zip) - trailer_size
4537
- (FIL_PAGE_TYPE + size));
4538
memcpy(page_zip->data + page_zip_get_size(page_zip)
4539
- trailer_size, ptr + 8 + size, trailer_size);
4541
if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page))) {
4547
return(ptr + 8 + size + trailer_size);
4550
/**************************************************************************
4551
Calculate the compressed page checksum. */
4554
page_zip_calc_checksum(
4555
/*===================*/
4556
/* out: page checksum */
4557
const void* data, /* in: compressed page */
4558
ulint size) /* in: size of compressed page */
4560
/* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
4561
and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
4563
const Bytef* s = data;
4566
ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4568
adler = adler32(0L, s + FIL_PAGE_OFFSET,
4569
FIL_PAGE_LSN - FIL_PAGE_OFFSET);
4570
adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
4571
adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
4572
size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
4574
return((ulint) adler);