~drizzle-trunk/drizzle/development

641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1
/*****************************************************************************
2
3
Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
4
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
8
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
16
17
*****************************************************************************/
18
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
19
/******************************************************
20
Compressed page interface
21
22
Created June 2005 by Marko Makela
23
*******************************************************/
24
25
#ifdef UNIV_MATERIALIZE
26
# undef UNIV_INLINE
27
# define UNIV_INLINE
28
#endif
29
30
#include "page0zip.h"
31
#include "page0page.h"
32
33
/* The format of compressed pages is as follows.
34
35
The header and trailer of the uncompressed pages, excluding the page
36
directory in the trailer, are copied as is to the header and trailer
37
of the compressed page.
38
39
At the end of the compressed page, there is a dense page directory
40
pointing to every user record contained on the page, including deleted
41
records on the free list.  The dense directory is indexed in the
42
collation order, i.e., in the order in which the record list is
43
linked on the uncompressed page.  The infimum and supremum records are
44
excluded.  The two most significant bits of the entries are allocated
45
for the delete-mark and an n_owned flag indicating the last record in
46
a chain of records pointed to from the sparse page directory on the
47
uncompressed page.
48
49
The data between PAGE_ZIP_START and the last page directory entry will
50
be written in compressed format, starting at offset PAGE_DATA.
51
Infimum and supremum records are not stored.  We exclude the
52
REC_N_NEW_EXTRA_BYTES in every record header.  These can be recovered
53
from the dense page directory stored at the end of the compressed
54
page.
55
56
The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
57
roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
58
externally stored columns are stored separately, in ascending order of
59
heap_no and column index, starting backwards from the dense page
60
directory.
61
62
The compressed data stream may be followed by a modification log
63
covering the compressed portion of the page, as follows.
64
65
MODIFICATION LOG ENTRY FORMAT
66
- write record:
67
  - (heap_no - 1) << 1 (1..2 bytes)
68
  - extra bytes backwards
69
  - data bytes
70
- clear record:
71
  - (heap_no - 1) << 1 | 1 (1..2 bytes)
72
73
The integer values are stored in a variable-length format:
74
- 0xxxxxxx: 0..127
75
- 1xxxxxxx xxxxxxxx: 0..32767
76
77
The end of the modification log is marked by a 0 byte.
78
79
In summary, the compressed page looks like this:
80
81
(1) Uncompressed page header (PAGE_DATA bytes)
82
(2) Compressed index information
83
(3) Compressed page data
84
(4) Page modification log (page_zip->m_start..page_zip->m_end)
85
(5) Empty zero-filled space
86
(6) BLOB pointers (on leaf pages)
87
  - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
88
  - in descending collation order
89
(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
90
  - indexed by heap_no
91
  - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
92
  - REC_NODE_PTR_SIZE for non-leaf pages
93
  - 0 otherwise
94
(8) dense page directory, stored backwards
95
  - n_dense = n_heap - 2
96
  - existing records in ascending collation order
97
  - deleted records (free list) in link order
98
*/
99
100
/* Start offset of the area that will be compressed */
101
#define PAGE_ZIP_START		PAGE_NEW_SUPREMUM_END
102
/* Size of an compressed page directory entry */
103
#define PAGE_ZIP_DIR_SLOT_SIZE	2
104
/* Mask of record offsets */
105
#define PAGE_ZIP_DIR_SLOT_MASK	0x3fff
106
/* 'owned' flag */
107
#define PAGE_ZIP_DIR_SLOT_OWNED	0x4000
108
/* 'deleted' flag */
109
#define PAGE_ZIP_DIR_SLOT_DEL	0x8000
110
111
/**************************************************************************
112
Determine the size of a compressed page in bytes. */
113
UNIV_INLINE
114
ulint
115
page_zip_get_size(
116
/*==============*/
117
						/* out: size in bytes */
118
	const page_zip_des_t*	page_zip)	/* in: compressed page */
119
{
120
	ulint	size;
121
122
	if (UNIV_UNLIKELY(!page_zip->ssize)) {
123
		return(0);
124
	}
125
126
	size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize;
127
128
	ut_ad(size >= PAGE_ZIP_MIN_SIZE);
129
	ut_ad(size <= UNIV_PAGE_SIZE);
130
131
	return(size);
132
}
133
/**************************************************************************
134
Set the size of a compressed page in bytes. */
135
UNIV_INLINE
136
void
137
page_zip_set_size(
138
/*==============*/
139
	page_zip_des_t*	page_zip,	/* in/out: compressed page */
140
	ulint		size)		/* in: size in bytes */
141
{
142
	if (size) {
143
		int	ssize;
144
145
		ut_ad(ut_is_2pow(size));
146
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
147
		for (ssize = 1; size > (ulint) (512 << ssize); ssize++) {
148
		}
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
149
150
		page_zip->ssize = ssize;
151
	} else {
152
		page_zip->ssize = 0;
153
	}
154
155
	ut_ad(page_zip_get_size(page_zip) == size);
156
}
157
158
/**************************************************************************
159
Determine if a record is so big that it needs to be stored externally. */
160
UNIV_INLINE
161
ibool
162
page_zip_rec_needs_ext(
163
/*===================*/
164
				/* out: FALSE if the entire record
165
				can be stored locally on the page */
166
	ulint	rec_size,	/* in: length of the record in bytes */
167
	ulint	comp,		/* in: nonzero=compact format */
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
168
	ulint	n_fields,	/* in: number of fields in the record;
169
				ignored if zip_size == 0 */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
170
	ulint	zip_size)	/* in: compressed page size in bytes, or 0 */
171
{
172
	ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES);
173
	ut_ad(ut_is_2pow(zip_size));
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
174
	ut_ad(comp || !zip_size);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
175
176
#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE
177
	if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) {
178
		return(TRUE);
179
	}
180
#endif
181
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
182
	if (UNIV_UNLIKELY(zip_size)) {
183
		ut_ad(comp);
184
		/* On a compressed page, there is a two-byte entry in
185
		the dense page directory for every record.  But there
186
		is no record header.  There should be enough room for
187
		one record on an empty leaf page.  Subtract 1 byte for
188
		the encoded heap number.  Check also the available space
189
		on the uncompressed page. */
190
		return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2)
191
		       >= (page_zip_empty_size(n_fields, zip_size) - 1)
192
		       || rec_size >= page_get_free_space_of_empty(TRUE) / 2);
193
	}
194
195
	return(rec_size >= page_get_free_space_of_empty(comp) / 2);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
196
}
197
198
#ifdef UNIV_DEBUG
199
/**************************************************************************
200
Validate a compressed page descriptor. */
201
UNIV_INLINE
202
ibool
203
page_zip_simple_validate(
204
/*=====================*/
205
					/* out: TRUE if ok */
206
	const page_zip_des_t*	page_zip)/* in: compressed page descriptor */
207
{
208
	ut_ad(page_zip);
209
	ut_ad(page_zip->data);
210
	ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE);
211
	ut_ad(page_zip_get_size(page_zip)
212
	      > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
213
	ut_ad(page_zip->m_start <= page_zip->m_end);
214
	ut_ad(page_zip->m_end < page_zip_get_size(page_zip));
215
	ut_ad(page_zip->n_blobs
216
	      < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE);
217
	return(TRUE);
218
}
219
#endif /* UNIV_DEBUG */
220
221
/**************************************************************************
222
Determine if the length of the page trailer. */
223
UNIV_INLINE
224
ibool
225
page_zip_get_trailer_len(
226
/*=====================*/
227
					/* out: length of the page trailer,
228
					in bytes, not including the terminating
229
					zero byte of the modification log */
230
	const page_zip_des_t*	page_zip,/* in: compressed page */
231
	ibool			is_clust,/* in: TRUE if clustered index */
232
	ulint*			entry_size)/* out: size of the uncompressed
233
					portion of a user record */
234
{
235
	ulint	uncompressed_size;
236
237
	ut_ad(page_zip_simple_validate(page_zip));
238
	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
239
240
	if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) {
241
		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
242
			+ REC_NODE_PTR_SIZE;
243
		ut_ad(!page_zip->n_blobs);
244
	} else if (UNIV_UNLIKELY(is_clust)) {
245
		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
246
			+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
247
	} else {
248
		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
249
		ut_ad(!page_zip->n_blobs);
250
	}
251
252
	if (entry_size) {
253
		*entry_size = uncompressed_size;
254
	}
255
256
	return((page_dir_get_n_heap(page_zip->data) - 2)
257
	       * uncompressed_size
258
	       + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
259
}
260
261
/**************************************************************************
262
Determine how big record can be inserted without recompressing the page. */
263
UNIV_INLINE
264
lint
265
page_zip_max_ins_size(
266
/*==================*/
267
					/* out: a positive number
268
					indicating the maximum size of
269
					a record whose insertion is
270
					guaranteed to succeed, or
271
					zero or negative */
272
	const page_zip_des_t*	page_zip,/* in: compressed page */
273
	ibool			is_clust)/* in: TRUE if clustered index */
274
{
275
	ulint	uncompressed_size;
276
	ulint	trailer_len;
277
278
	trailer_len = page_zip_get_trailer_len(page_zip, is_clust,
279
					       &uncompressed_size);
280
281
	/* When a record is created, a pointer may be added to
282
	the dense directory.
283
	Likewise, space for the columns that will not be
284
	compressed will be allocated from the page trailer.
285
	Also the BLOB pointers will be allocated from there, but
286
	we may as well count them in the length of the record. */
287
288
	trailer_len += uncompressed_size;
289
290
	return((lint) page_zip_get_size(page_zip)
291
	       - trailer_len - page_zip->m_end
292
	       - (REC_N_NEW_EXTRA_BYTES - 2));
293
}
294
295
/**************************************************************************
296
Determine if enough space is available in the modification log. */
297
UNIV_INLINE
298
ibool
299
page_zip_available(
300
/*===============*/
301
					/* out: TRUE if enough space
302
					is available */
303
	const page_zip_des_t*	page_zip,/* in: compressed page */
304
	ibool			is_clust,/* in: TRUE if clustered index */
305
	ulint			length,	/* in: combined size of the record */
306
	ulint			create)	/* in: nonzero=add the record to
307
					the heap */
308
{
309
	ulint	uncompressed_size;
310
	ulint	trailer_len;
311
312
	ut_ad(length > REC_N_NEW_EXTRA_BYTES);
313
314
	trailer_len = page_zip_get_trailer_len(page_zip, is_clust,
315
					       &uncompressed_size);
316
317
	/* Subtract the fixed extra bytes and add the maximum
318
	space needed for identifying the record (encoded heap_no). */
319
	length -= REC_N_NEW_EXTRA_BYTES - 2;
320
321
	if (UNIV_UNLIKELY(create)) {
322
		/* When a record is created, a pointer may be added to
323
		the dense directory.
324
		Likewise, space for the columns that will not be
325
		compressed will be allocated from the page trailer.
326
		Also the BLOB pointers will be allocated from there, but
327
		we may as well count them in the length of the record. */
328
329
		trailer_len += uncompressed_size;
330
	}
331
332
	return(UNIV_LIKELY(length
333
			   + trailer_len
334
			   + page_zip->m_end
335
			   < page_zip_get_size(page_zip)));
336
}
337
338
/**************************************************************************
339
Initialize a compressed page descriptor. */
340
UNIV_INLINE
341
void
342
page_zip_des_init(
343
/*==============*/
344
	page_zip_des_t*	page_zip)	/* in/out: compressed page
345
					descriptor */
346
{
347
	memset(page_zip, 0, sizeof *page_zip);
348
}
349
350
/**************************************************************************
351
Write a log record of writing to the uncompressed header portion of a page. */
352
UNIV_INTERN
353
void
354
page_zip_write_header_log(
355
/*======================*/
356
	const byte*	data,/* in: data on the uncompressed page */
357
	ulint		length,	/* in: length of the data */
358
	mtr_t*		mtr);	/* in: mini-transaction */
359
360
/**************************************************************************
361
Write data to the uncompressed header portion of a page.  The data must
362
already have been written to the uncompressed page.
363
However, the data portion of the uncompressed page may differ from
364
the compressed page when a record is being inserted in
365
page_cur_insert_rec_zip(). */
366
UNIV_INLINE
367
void
368
page_zip_write_header(
369
/*==================*/
370
	page_zip_des_t*	page_zip,/* in/out: compressed page */
371
	const byte*	str,	/* in: address on the uncompressed page */
372
	ulint		length,	/* in: length of the data */
373
	mtr_t*		mtr)	/* in: mini-transaction, or NULL */
374
{
375
	ulint	pos;
376
377
	ut_ad(buf_frame_get_page_zip(str) == page_zip);
378
	ut_ad(page_zip_simple_validate(page_zip));
379
	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
380
381
	pos = page_offset(str);
382
383
	ut_ad(pos < PAGE_DATA);
384
385
	memcpy(page_zip->data + pos, str, length);
386
387
	/* The following would fail in page_cur_insert_rec_zip(). */
388
	/* ut_ad(page_zip_validate(page_zip, str - pos)); */
389
390
	if (UNIV_LIKELY_NULL(mtr)) {
391
		page_zip_write_header_log(str, length, mtr);
392
	}
393
}
394
395
#ifdef UNIV_MATERIALIZE
396
# undef UNIV_INLINE
397
# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
398
#endif