1
/******************************************************
2
Compressed page interface
6
Created June 2005 by Marko Makela
7
*******************************************************/
9
#ifdef UNIV_MATERIALIZE
15
#include "page0page.h"
17
/* The format of compressed pages is as follows.
19
The header and trailer of the uncompressed pages, excluding the page
20
directory in the trailer, are copied as is to the header and trailer
21
of the compressed page.
23
At the end of the compressed page, there is a dense page directory
24
pointing to every user record contained on the page, including deleted
25
records on the free list. The dense directory is indexed in the
26
collation order, i.e., in the order in which the record list is
27
linked on the uncompressed page. The infimum and supremum records are
28
excluded. The two most significant bits of the entries are allocated
29
for the delete-mark and an n_owned flag indicating the last record in
30
a chain of records pointed to from the sparse page directory on the
33
The data between PAGE_ZIP_START and the last page directory entry will
34
be written in compressed format, starting at offset PAGE_DATA.
35
Infimum and supremum records are not stored. We exclude the
36
REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered
37
from the dense page directory stored at the end of the compressed
40
The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
41
roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
42
externally stored columns are stored separately, in ascending order of
43
heap_no and column index, starting backwards from the dense page
46
The compressed data stream may be followed by a modification log
47
covering the compressed portion of the page, as follows.
49
MODIFICATION LOG ENTRY FORMAT
51
- (heap_no - 1) << 1 (1..2 bytes)
52
- extra bytes backwards
55
- (heap_no - 1) << 1 | 1 (1..2 bytes)
57
The integer values are stored in a variable-length format:
59
- 1xxxxxxx xxxxxxxx: 0..32767
61
The end of the modification log is marked by a 0 byte.
63
In summary, the compressed page looks like this:
65
(1) Uncompressed page header (PAGE_DATA bytes)
66
(2) Compressed index information
67
(3) Compressed page data
68
(4) Page modification log (page_zip->m_start..page_zip->m_end)
69
(5) Empty zero-filled space
70
(6) BLOB pointers (on leaf pages)
71
- BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
72
- in descending collation order
73
(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
75
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
76
- REC_NODE_PTR_SIZE for non-leaf pages
78
(8) dense page directory, stored backwards
79
- n_dense = n_heap - 2
80
- existing records in ascending collation order
81
- deleted records (free list) in link order
84
/* Start offset of the area that will be compressed */
85
#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
86
/* Size of an compressed page directory entry */
87
#define PAGE_ZIP_DIR_SLOT_SIZE 2
88
/* Mask of record offsets */
89
#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff
91
#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000
93
#define PAGE_ZIP_DIR_SLOT_DEL 0x8000
95
/**************************************************************************
96
Determine the size of a compressed page in bytes. */
101
/* out: size in bytes */
102
const page_zip_des_t* page_zip) /* in: compressed page */
106
if (UNIV_UNLIKELY(!page_zip->ssize)) {
110
size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize;
112
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
113
ut_ad(size <= UNIV_PAGE_SIZE);
117
/**************************************************************************
118
Set the size of a compressed page in bytes. */
123
page_zip_des_t* page_zip, /* in/out: compressed page */
124
ulint size) /* in: size in bytes */
129
ut_ad(ut_is_2pow(size));
131
for (ssize = 1; size > (ulint) (512 << ssize); ssize++) {};
133
page_zip->ssize = ssize;
138
ut_ad(page_zip_get_size(page_zip) == size);
141
/**************************************************************************
142
Determine if a record is so big that it needs to be stored externally. */
145
page_zip_rec_needs_ext(
146
/*===================*/
147
/* out: FALSE if the entire record
148
can be stored locally on the page */
149
ulint rec_size, /* in: length of the record in bytes */
150
ulint comp, /* in: nonzero=compact format */
151
ulint zip_size) /* in: compressed page size in bytes, or 0 */
153
ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES);
154
ut_ad(ut_is_2pow(zip_size));
156
#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE
157
if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) {
162
if (UNIV_UNLIKELY(!comp)) {
164
return(rec_size >= page_get_free_space_of_empty((ulint)FALSE) / 2);
167
/* If zip_size != 0, the record should fit on the compressed page.
168
If not, the right-hand-side of the comparison will overwrap
169
and the condition will not hold. Thus, we do not need to test
170
for zip_size != 0. We subtract the size of the page header and
171
assume that compressing the index information takes 50 bytes. */
172
if (rec_size >= zip_size - (PAGE_DATA + 50)) {
176
return(rec_size >= page_get_free_space_of_empty(TRUE) / 2);
180
/**************************************************************************
181
Validate a compressed page descriptor. */
184
page_zip_simple_validate(
185
/*=====================*/
186
/* out: TRUE if ok */
187
const page_zip_des_t* page_zip)/* in: compressed page descriptor */
190
ut_ad(page_zip->data);
191
ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE);
192
ut_ad(page_zip_get_size(page_zip)
193
> PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
194
ut_ad(page_zip->m_start <= page_zip->m_end);
195
ut_ad(page_zip->m_end < page_zip_get_size(page_zip));
196
ut_ad(page_zip->n_blobs
197
< page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE);
200
#endif /* UNIV_DEBUG */
202
/**************************************************************************
203
Determine if the length of the page trailer. */
206
page_zip_get_trailer_len(
207
/*=====================*/
208
/* out: length of the page trailer,
209
in bytes, not including the terminating
210
zero byte of the modification log */
211
const page_zip_des_t* page_zip,/* in: compressed page */
212
ibool is_clust,/* in: TRUE if clustered index */
213
ulint* entry_size)/* out: size of the uncompressed
214
portion of a user record */
216
ulint uncompressed_size;
218
ut_ad(page_zip_simple_validate(page_zip));
219
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
221
if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) {
222
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
224
ut_ad(!page_zip->n_blobs);
225
} else if (UNIV_UNLIKELY(is_clust)) {
226
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
227
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
229
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
230
ut_ad(!page_zip->n_blobs);
234
*entry_size = uncompressed_size;
237
return((page_dir_get_n_heap(page_zip->data) - 2)
239
+ page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
242
/**************************************************************************
243
Determine how big record can be inserted without recompressing the page. */
246
page_zip_max_ins_size(
247
/*==================*/
248
/* out: a positive number
249
indicating the maximum size of
250
a record whose insertion is
251
guaranteed to succeed, or
253
const page_zip_des_t* page_zip,/* in: compressed page */
254
ibool is_clust)/* in: TRUE if clustered index */
256
ulint uncompressed_size;
259
trailer_len = page_zip_get_trailer_len(page_zip, is_clust,
262
/* When a record is created, a pointer may be added to
264
Likewise, space for the columns that will not be
265
compressed will be allocated from the page trailer.
266
Also the BLOB pointers will be allocated from there, but
267
we may as well count them in the length of the record. */
269
trailer_len += uncompressed_size;
271
return((lint) page_zip_get_size(page_zip)
272
- trailer_len - page_zip->m_end
273
- (REC_N_NEW_EXTRA_BYTES - 2));
276
/**************************************************************************
277
Determine if enough space is available in the modification log. */
282
/* out: TRUE if enough space
284
const page_zip_des_t* page_zip,/* in: compressed page */
285
ibool is_clust,/* in: TRUE if clustered index */
286
ulint length, /* in: combined size of the record */
287
ulint create) /* in: nonzero=add the record to
290
ulint uncompressed_size;
293
ut_ad(length > REC_N_NEW_EXTRA_BYTES);
295
trailer_len = page_zip_get_trailer_len(page_zip, is_clust,
298
/* Subtract the fixed extra bytes and add the maximum
299
space needed for identifying the record (encoded heap_no). */
300
length -= REC_N_NEW_EXTRA_BYTES - 2;
302
if (UNIV_UNLIKELY(create)) {
303
/* When a record is created, a pointer may be added to
305
Likewise, space for the columns that will not be
306
compressed will be allocated from the page trailer.
307
Also the BLOB pointers will be allocated from there, but
308
we may as well count them in the length of the record. */
310
trailer_len += uncompressed_size;
313
return(UNIV_LIKELY(length
316
< page_zip_get_size(page_zip)));
319
/**************************************************************************
320
Initialize a compressed page descriptor. */
325
page_zip_des_t* page_zip) /* in/out: compressed page
328
memset(page_zip, 0, sizeof *page_zip);
331
/**************************************************************************
332
Write a log record of writing to the uncompressed header portion of a page. */
335
page_zip_write_header_log(
336
/*======================*/
337
const byte* data,/* in: data on the uncompressed page */
338
ulint length, /* in: length of the data */
339
mtr_t* mtr); /* in: mini-transaction */
341
/**************************************************************************
342
Write data to the uncompressed header portion of a page. The data must
343
already have been written to the uncompressed page.
344
However, the data portion of the uncompressed page may differ from
345
the compressed page when a record is being inserted in
346
page_cur_insert_rec_zip(). */
349
page_zip_write_header(
350
/*==================*/
351
page_zip_des_t* page_zip,/* in/out: compressed page */
352
const byte* str, /* in: address on the uncompressed page */
353
ulint length, /* in: length of the data */
354
mtr_t* mtr) /* in: mini-transaction, or NULL */
358
ut_ad(buf_frame_get_page_zip(str) == page_zip);
359
ut_ad(page_zip_simple_validate(page_zip));
360
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
362
pos = page_offset(str);
364
ut_ad(pos < PAGE_DATA);
366
memcpy(page_zip->data + pos, str, length);
368
/* The following would fail in page_cur_insert_rec_zip(). */
369
/* ut_ad(page_zip_validate(page_zip, str - pos)); */
371
if (UNIV_LIKELY_NULL(mtr)) {
372
page_zip_write_header_log(str, length, mtr);
376
#ifdef UNIV_MATERIALIZE
378
# define UNIV_INLINE UNIV_INLINE_ORIGINAL