641.2.2
by Monty Taylor
InnoDB Plugin 1.0.3 |
1 |
/*****************************************************************************
|
2 |
||
3 |
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
|
|
4 |
||
5 |
This program is free software; you can redistribute it and/or modify it under
|
|
6 |
the terms of the GNU General Public License as published by the Free Software
|
|
7 |
Foundation; version 2 of the License.
|
|
8 |
||
9 |
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
10 |
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
11 |
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
12 |
||
13 |
You should have received a copy of the GNU General Public License along with
|
|
14 |
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
15 |
Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
*****************************************************************************/
|
|
18 |
||
641.1.2
by Monty Taylor
Imported 1.0.1 with clean - with no changes. |
19 |
/******************************************************
|
20 |
Compressed page interface
|
|
21 |
||
22 |
Created June 2005 by Marko Makela
|
|
23 |
*******************************************************/
|
|
24 |
||
25 |
#ifdef UNIV_MATERIALIZE |
|
26 |
# undef UNIV_INLINE |
|
27 |
# define UNIV_INLINE |
|
28 |
#endif
|
|
29 |
||
30 |
#include "page0zip.h" |
|
31 |
#include "page0page.h" |
|
32 |
||
33 |
/* The format of compressed pages is as follows.
|
|
34 |
||
35 |
The header and trailer of the uncompressed pages, excluding the page
|
|
36 |
directory in the trailer, are copied as is to the header and trailer
|
|
37 |
of the compressed page.
|
|
38 |
||
39 |
At the end of the compressed page, there is a dense page directory
|
|
40 |
pointing to every user record contained on the page, including deleted
|
|
41 |
records on the free list. The dense directory is indexed in the
|
|
42 |
collation order, i.e., in the order in which the record list is
|
|
43 |
linked on the uncompressed page. The infimum and supremum records are
|
|
44 |
excluded. The two most significant bits of the entries are allocated
|
|
45 |
for the delete-mark and an n_owned flag indicating the last record in
|
|
46 |
a chain of records pointed to from the sparse page directory on the
|
|
47 |
uncompressed page.
|
|
48 |
||
49 |
The data between PAGE_ZIP_START and the last page directory entry will
|
|
50 |
be written in compressed format, starting at offset PAGE_DATA.
|
|
51 |
Infimum and supremum records are not stored. We exclude the
|
|
52 |
REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered
|
|
53 |
from the dense page directory stored at the end of the compressed
|
|
54 |
page.
|
|
55 |
||
56 |
The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
|
|
57 |
roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
|
|
58 |
externally stored columns are stored separately, in ascending order of
|
|
59 |
heap_no and column index, starting backwards from the dense page
|
|
60 |
directory.
|
|
61 |
||
62 |
The compressed data stream may be followed by a modification log
|
|
63 |
covering the compressed portion of the page, as follows.
|
|
64 |
||
65 |
MODIFICATION LOG ENTRY FORMAT
|
|
66 |
- write record:
|
|
67 |
- (heap_no - 1) << 1 (1..2 bytes)
|
|
68 |
- extra bytes backwards
|
|
69 |
- data bytes
|
|
70 |
- clear record:
|
|
71 |
- (heap_no - 1) << 1 | 1 (1..2 bytes)
|
|
72 |
||
73 |
The integer values are stored in a variable-length format:
|
|
74 |
- 0xxxxxxx: 0..127
|
|
75 |
- 1xxxxxxx xxxxxxxx: 0..32767
|
|
76 |
||
77 |
The end of the modification log is marked by a 0 byte.
|
|
78 |
||
79 |
In summary, the compressed page looks like this:
|
|
80 |
||
81 |
(1) Uncompressed page header (PAGE_DATA bytes)
|
|
82 |
(2) Compressed index information
|
|
83 |
(3) Compressed page data
|
|
84 |
(4) Page modification log (page_zip->m_start..page_zip->m_end)
|
|
85 |
(5) Empty zero-filled space
|
|
86 |
(6) BLOB pointers (on leaf pages)
|
|
87 |
- BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
|
|
88 |
- in descending collation order
|
|
89 |
(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
|
|
90 |
- indexed by heap_no
|
|
91 |
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
|
|
92 |
- REC_NODE_PTR_SIZE for non-leaf pages
|
|
93 |
- 0 otherwise
|
|
94 |
(8) dense page directory, stored backwards
|
|
95 |
- n_dense = n_heap - 2
|
|
96 |
- existing records in ascending collation order
|
|
97 |
- deleted records (free list) in link order
|
|
98 |
*/
|
|
99 |
||
100 |
/* Start offset of the area that will be compressed */
|
|
101 |
#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END |
|
102 |
/* Size of an compressed page directory entry */
|
|
103 |
#define PAGE_ZIP_DIR_SLOT_SIZE 2 |
|
104 |
/* Mask of record offsets */
|
|
105 |
#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff |
|
106 |
/* 'owned' flag */
|
|
107 |
#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000 |
|
108 |
/* 'deleted' flag */
|
|
109 |
#define PAGE_ZIP_DIR_SLOT_DEL 0x8000 |
|
110 |
||
111 |
/**************************************************************************
|
|
112 |
Determine the size of a compressed page in bytes. */
|
|
113 |
UNIV_INLINE |
|
114 |
ulint |
|
115 |
page_zip_get_size(
|
|
116 |
/*==============*/
|
|
117 |
/* out: size in bytes */ |
|
118 |
const page_zip_des_t* page_zip) /* in: compressed page */ |
|
119 |
{
|
|
120 |
ulint size; |
|
121 |
||
122 |
if (UNIV_UNLIKELY(!page_zip->ssize)) { |
|
123 |
return(0); |
|
124 |
} |
|
125 |
||
126 |
size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize; |
|
127 |
||
128 |
ut_ad(size >= PAGE_ZIP_MIN_SIZE); |
|
129 |
ut_ad(size <= UNIV_PAGE_SIZE); |
|
130 |
||
131 |
return(size); |
|
132 |
}
|
|
133 |
/**************************************************************************
|
|
134 |
Set the size of a compressed page in bytes. */
|
|
135 |
UNIV_INLINE |
|
136 |
void |
|
137 |
page_zip_set_size(
|
|
138 |
/*==============*/
|
|
139 |
page_zip_des_t* page_zip, /* in/out: compressed page */ |
|
140 |
ulint size) /* in: size in bytes */ |
|
141 |
{
|
|
142 |
if (size) { |
|
143 |
int ssize; |
|
144 |
||
145 |
ut_ad(ut_is_2pow(size)); |
|
146 |
||
641.2.2
by Monty Taylor
InnoDB Plugin 1.0.3 |
147 |
for (ssize = 1; size > (ulint) (512 << ssize); ssize++) { |
148 |
} |
|
641.1.2
by Monty Taylor
Imported 1.0.1 with clean - with no changes. |
149 |
|
150 |
page_zip->ssize = ssize; |
|
151 |
} else { |
|
152 |
page_zip->ssize = 0; |
|
153 |
} |
|
154 |
||
155 |
ut_ad(page_zip_get_size(page_zip) == size); |
|
156 |
}
|
|
157 |
||
158 |
/**************************************************************************
|
|
159 |
Determine if a record is so big that it needs to be stored externally. */
|
|
160 |
UNIV_INLINE |
|
161 |
ibool |
|
162 |
page_zip_rec_needs_ext(
|
|
163 |
/*===================*/
|
|
164 |
/* out: FALSE if the entire record |
|
165 |
can be stored locally on the page */
|
|
166 |
ulint rec_size, /* in: length of the record in bytes */ |
|
167 |
ulint comp, /* in: nonzero=compact format */ |
|
641.2.1
by Monty Taylor
InnoDB Plugin 1.0.2 |
168 |
ulint n_fields, /* in: number of fields in the record; |
169 |
ignored if zip_size == 0 */
|
|
641.1.2
by Monty Taylor
Imported 1.0.1 with clean - with no changes. |
170 |
ulint zip_size) /* in: compressed page size in bytes, or 0 */ |
171 |
{
|
|
172 |
ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES); |
|
173 |
ut_ad(ut_is_2pow(zip_size)); |
|
641.2.1
by Monty Taylor
InnoDB Plugin 1.0.2 |
174 |
ut_ad(comp || !zip_size); |
641.1.2
by Monty Taylor
Imported 1.0.1 with clean - with no changes. |
175 |
|
176 |
#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE |
|
177 |
if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) { |
|
178 |
return(TRUE); |
|
179 |
} |
|
180 |
#endif
|
|
181 |
||
641.2.1
by Monty Taylor
InnoDB Plugin 1.0.2 |
182 |
if (UNIV_UNLIKELY(zip_size)) { |
183 |
ut_ad(comp); |
|
184 |
/* On a compressed page, there is a two-byte entry in |
|
185 |
the dense page directory for every record. But there
|
|
186 |
is no record header. There should be enough room for
|
|
187 |
one record on an empty leaf page. Subtract 1 byte for
|
|
188 |
the encoded heap number. Check also the available space
|
|
189 |
on the uncompressed page. */
|
|
190 |
return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2) |
|
191 |
>= (page_zip_empty_size(n_fields, zip_size) - 1) |
|
192 |
|| rec_size >= page_get_free_space_of_empty(TRUE) / 2); |
|
193 |
} |
|
194 |
||
195 |
return(rec_size >= page_get_free_space_of_empty(comp) / 2); |
|
641.1.2
by Monty Taylor
Imported 1.0.1 with clean - with no changes. |
196 |
}
|
197 |
||
198 |
#ifdef UNIV_DEBUG |
|
199 |
/**************************************************************************
|
|
200 |
Validate a compressed page descriptor. */
|
|
201 |
UNIV_INLINE |
|
202 |
ibool |
|
203 |
page_zip_simple_validate(
|
|
204 |
/*=====================*/
|
|
205 |
/* out: TRUE if ok */ |
|
206 |
const page_zip_des_t* page_zip)/* in: compressed page descriptor */ |
|
207 |
{
|
|
208 |
ut_ad(page_zip); |
|
209 |
ut_ad(page_zip->data); |
|
210 |
ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE); |
|
211 |
ut_ad(page_zip_get_size(page_zip) |
|
212 |
> PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE); |
|
213 |
ut_ad(page_zip->m_start <= page_zip->m_end); |
|
214 |
ut_ad(page_zip->m_end < page_zip_get_size(page_zip)); |
|
215 |
ut_ad(page_zip->n_blobs |
|
216 |
< page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE); |
|
217 |
return(TRUE); |
|
218 |
}
|
|
219 |
#endif /* UNIV_DEBUG */ |
|
220 |
||
221 |
/**************************************************************************
|
|
222 |
Determine if the length of the page trailer. */
|
|
223 |
UNIV_INLINE |
|
224 |
ibool |
|
225 |
page_zip_get_trailer_len(
|
|
226 |
/*=====================*/
|
|
227 |
/* out: length of the page trailer, |
|
228 |
in bytes, not including the terminating
|
|
229 |
zero byte of the modification log */
|
|
230 |
const page_zip_des_t* page_zip,/* in: compressed page */ |
|
231 |
ibool is_clust,/* in: TRUE if clustered index */ |
|
232 |
ulint* entry_size)/* out: size of the uncompressed |
|
233 |
portion of a user record */
|
|
234 |
{
|
|
235 |
ulint uncompressed_size; |
|
236 |
||
237 |
ut_ad(page_zip_simple_validate(page_zip)); |
|
238 |
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
|
239 |
||
240 |
if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) { |
|
241 |
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE |
|
242 |
+ REC_NODE_PTR_SIZE; |
|
243 |
ut_ad(!page_zip->n_blobs); |
|
244 |
} else if (UNIV_UNLIKELY(is_clust)) { |
|
245 |
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE |
|
246 |
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; |
|
247 |
} else { |
|
248 |
uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE; |
|
249 |
ut_ad(!page_zip->n_blobs); |
|
250 |
} |
|
251 |
||
252 |
if (entry_size) { |
|
253 |
*entry_size = uncompressed_size; |
|
254 |
} |
|
255 |
||
256 |
return((page_dir_get_n_heap(page_zip->data) - 2) |
|
257 |
* uncompressed_size |
|
258 |
+ page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE); |
|
259 |
}
|
|
260 |
||
261 |
/**************************************************************************
|
|
262 |
Determine how big record can be inserted without recompressing the page. */
|
|
263 |
UNIV_INLINE |
|
264 |
lint |
|
265 |
page_zip_max_ins_size(
|
|
266 |
/*==================*/
|
|
267 |
/* out: a positive number |
|
268 |
indicating the maximum size of
|
|
269 |
a record whose insertion is
|
|
270 |
guaranteed to succeed, or
|
|
271 |
zero or negative */
|
|
272 |
const page_zip_des_t* page_zip,/* in: compressed page */ |
|
273 |
ibool is_clust)/* in: TRUE if clustered index */ |
|
274 |
{
|
|
275 |
ulint uncompressed_size; |
|
276 |
ulint trailer_len; |
|
277 |
||
278 |
trailer_len = page_zip_get_trailer_len(page_zip, is_clust, |
|
279 |
&uncompressed_size); |
|
280 |
||
281 |
/* When a record is created, a pointer may be added to |
|
282 |
the dense directory.
|
|
283 |
Likewise, space for the columns that will not be
|
|
284 |
compressed will be allocated from the page trailer.
|
|
285 |
Also the BLOB pointers will be allocated from there, but
|
|
286 |
we may as well count them in the length of the record. */
|
|
287 |
||
288 |
trailer_len += uncompressed_size; |
|
289 |
||
290 |
return((lint) page_zip_get_size(page_zip) |
|
291 |
- trailer_len - page_zip->m_end |
|
292 |
- (REC_N_NEW_EXTRA_BYTES - 2)); |
|
293 |
}
|
|
294 |
||
295 |
/**************************************************************************
|
|
296 |
Determine if enough space is available in the modification log. */
|
|
297 |
UNIV_INLINE |
|
298 |
ibool |
|
299 |
page_zip_available(
|
|
300 |
/*===============*/
|
|
301 |
/* out: TRUE if enough space |
|
302 |
is available */
|
|
303 |
const page_zip_des_t* page_zip,/* in: compressed page */ |
|
304 |
ibool is_clust,/* in: TRUE if clustered index */ |
|
305 |
ulint length, /* in: combined size of the record */ |
|
306 |
ulint create) /* in: nonzero=add the record to |
|
307 |
the heap */
|
|
308 |
{
|
|
309 |
ulint uncompressed_size; |
|
310 |
ulint trailer_len; |
|
311 |
||
312 |
ut_ad(length > REC_N_NEW_EXTRA_BYTES); |
|
313 |
||
314 |
trailer_len = page_zip_get_trailer_len(page_zip, is_clust, |
|
315 |
&uncompressed_size); |
|
316 |
||
317 |
/* Subtract the fixed extra bytes and add the maximum |
|
318 |
space needed for identifying the record (encoded heap_no). */
|
|
319 |
length -= REC_N_NEW_EXTRA_BYTES - 2; |
|
320 |
||
321 |
if (UNIV_UNLIKELY(create)) { |
|
322 |
/* When a record is created, a pointer may be added to |
|
323 |
the dense directory.
|
|
324 |
Likewise, space for the columns that will not be
|
|
325 |
compressed will be allocated from the page trailer.
|
|
326 |
Also the BLOB pointers will be allocated from there, but
|
|
327 |
we may as well count them in the length of the record. */
|
|
328 |
||
329 |
trailer_len += uncompressed_size; |
|
330 |
} |
|
331 |
||
332 |
return(UNIV_LIKELY(length |
|
333 |
+ trailer_len |
|
334 |
+ page_zip->m_end |
|
335 |
< page_zip_get_size(page_zip))); |
|
336 |
}
|
|
337 |
||
338 |
/**************************************************************************
|
|
339 |
Initialize a compressed page descriptor. */
|
|
340 |
UNIV_INLINE |
|
341 |
void |
|
342 |
page_zip_des_init(
|
|
343 |
/*==============*/
|
|
344 |
page_zip_des_t* page_zip) /* in/out: compressed page |
|
345 |
descriptor */
|
|
346 |
{
|
|
347 |
memset(page_zip, 0, sizeof *page_zip); |
|
348 |
}
|
|
349 |
||
350 |
/**************************************************************************
|
|
351 |
Write a log record of writing to the uncompressed header portion of a page. */
|
|
352 |
UNIV_INTERN |
|
353 |
void |
|
354 |
page_zip_write_header_log(
|
|
355 |
/*======================*/
|
|
356 |
const byte* data,/* in: data on the uncompressed page */ |
|
357 |
ulint length, /* in: length of the data */ |
|
358 |
mtr_t* mtr); /* in: mini-transaction */ |
|
359 |
||
360 |
/**************************************************************************
|
|
361 |
Write data to the uncompressed header portion of a page. The data must
|
|
362 |
already have been written to the uncompressed page.
|
|
363 |
However, the data portion of the uncompressed page may differ from
|
|
364 |
the compressed page when a record is being inserted in
|
|
365 |
page_cur_insert_rec_zip(). */
|
|
366 |
UNIV_INLINE |
|
367 |
void |
|
368 |
page_zip_write_header(
|
|
369 |
/*==================*/
|
|
370 |
page_zip_des_t* page_zip,/* in/out: compressed page */ |
|
371 |
const byte* str, /* in: address on the uncompressed page */ |
|
372 |
ulint length, /* in: length of the data */ |
|
373 |
mtr_t* mtr) /* in: mini-transaction, or NULL */ |
|
374 |
{
|
|
375 |
ulint pos; |
|
376 |
||
377 |
ut_ad(buf_frame_get_page_zip(str) == page_zip); |
|
378 |
ut_ad(page_zip_simple_validate(page_zip)); |
|
379 |
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); |
|
380 |
||
381 |
pos = page_offset(str); |
|
382 |
||
383 |
ut_ad(pos < PAGE_DATA); |
|
384 |
||
385 |
memcpy(page_zip->data + pos, str, length); |
|
386 |
||
387 |
/* The following would fail in page_cur_insert_rec_zip(). */ |
|
388 |
/* ut_ad(page_zip_validate(page_zip, str - pos)); */ |
|
389 |
||
390 |
if (UNIV_LIKELY_NULL(mtr)) { |
|
391 |
page_zip_write_header_log(str, length, mtr); |
|
392 |
} |
|
393 |
}
|
|
394 |
||
395 |
#ifdef UNIV_MATERIALIZE |
|
396 |
# undef UNIV_INLINE |
|
397 |
# define UNIV_INLINE UNIV_INLINE_ORIGINAL |
|
398 |
#endif
|