~drizzle-trunk/drizzle/development

641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1
/*****************************************************************************
2
1999.6.1 by kalebral at gmail
update Copyright strings to a more common format to help with creating the master debian copyright file
3
Copyright (C) 1994, 2010, Innobase Oy. All Rights Reserved.
4
Copyright (C) 2008, Google Inc.
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
5
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
11
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
15
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19
20
You should have received a copy of the GNU General Public License along with
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
21
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
St, Fifth Floor, Boston, MA 02110-1301 USA
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
23
24
*****************************************************************************/
25
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
26
/**************************************************//**
27
@file btr/btr0cur.c
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
28
The index tree cursor
29
30
All changes that row operations make to a B-tree or the records
31
there must go through this module! Undo log records are written here
32
of every modify or insert of a clustered index record.
33
34
			NOTE!!!
35
To make sure we do not run out of disk space during a pessimistic
36
insert or update, we have to reserve 2 x the height of the index tree
37
many pages in the tablespace before we start the operation, because
38
if leaf splitting has been started, it is difficult to undo, except
39
by crashing the database and doing a roll-forward.
40
41
Created 10/16/1994 Heikki Tuuri
42
*******************************************************/
43
44
#include "btr0cur.h"
45
46
#ifdef UNIV_NONINL
47
#include "btr0cur.ic"
48
#endif
49
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
50
#include "row0upd.h"
51
#ifndef UNIV_HOTBACKUP
52
#include "mtr0log.h"
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
53
#include "page0page.h"
54
#include "page0zip.h"
55
#include "rem0rec.h"
56
#include "rem0cmp.h"
57
#include "buf0lru.h"
58
#include "btr0btr.h"
59
#include "btr0sea.h"
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
60
#include "row0purge.h"
61
#include "row0upd.h"
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
62
#include "trx0rec.h"
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
63
#include "trx0roll.h" /* trx_is_recv() */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
64
#include "que0que.h"
65
#include "row0row.h"
66
#include "srv0srv.h"
67
#include "ibuf0ibuf.h"
68
#include "lock0lock.h"
69
#include "zlib.h"
70
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
71
/** Buffered B-tree operation types, introduced as part of delete buffering. */
72
typedef enum btr_op_enum {
73
	BTR_NO_OP = 0,			/*!< Not buffered */
74
	BTR_INSERT_OP,			/*!< Insert, do not ignore UNIQUE */
75
	BTR_INSERT_IGNORE_UNIQUE_OP,	/*!< Insert, ignoring UNIQUE */
76
	BTR_DELETE_OP,			/*!< Purge a delete-marked record */
77
	BTR_DELMARK_OP			/*!< Mark a record for deletion */
78
} btr_op_t;
79
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
80
#ifdef UNIV_DEBUG
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
81
/** If the following is set to TRUE, this module prints a lot of
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
82
trace information of individual record operations */
83
UNIV_INTERN ibool	btr_cur_print_record_ops = FALSE;
84
#endif /* UNIV_DEBUG */
85
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
86
/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
87
UNIV_INTERN ulint	btr_cur_n_non_sea	= 0;
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
88
/** Number of successful adaptive hash index lookups in
89
btr_cur_search_to_nth_level(). */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
90
UNIV_INTERN ulint	btr_cur_n_sea		= 0;
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
91
/** Old value of btr_cur_n_non_sea.  Copied by
92
srv_refresh_innodb_monitor_stats().  Referenced by
93
srv_printf_innodb_monitor(). */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
94
UNIV_INTERN ulint	btr_cur_n_non_sea_old	= 0;
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
95
/** Old value of btr_cur_n_sea.  Copied by
96
srv_refresh_innodb_monitor_stats().  Referenced by
97
srv_printf_innodb_monitor(). */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
98
UNIV_INTERN ulint	btr_cur_n_sea_old	= 0;
99
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
100
/** In the optimistic insert, if the insert does not fit, but this much space
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
101
can be released by page reorganize, then it is reorganized */
102
#define BTR_CUR_PAGE_REORGANIZE_LIMIT	(UNIV_PAGE_SIZE / 32)
103
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
104
/** The structure of a BLOB part header */
105
/* @{ */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
106
/*--------------------------------------*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
107
#define BTR_BLOB_HDR_PART_LEN		0	/*!< BLOB part len on this
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
108
						page */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
109
#define BTR_BLOB_HDR_NEXT_PAGE_NO	4	/*!< next BLOB part page no,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
110
						FIL_NULL if none */
111
/*--------------------------------------*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
112
#define BTR_BLOB_HDR_SIZE		8	/*!< Size of a BLOB
113
						part header, in bytes */
114
/* @} */
115
#endif /* !UNIV_HOTBACKUP */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
116
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
117
/** A BLOB field reference full of zero, for use in assertions and tests.
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
118
Initially, BLOB field references are set to zero, in
119
dtuple_convert_big_rec(). */
2023.3.27 by Monty Taylor
Updated for OSX.
120
const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE]= {0};
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
121
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
122
#ifndef UNIV_HOTBACKUP
123
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
124
Marks all extern fields in a record as owned by the record. This function
125
should be called if the delete mark of a record is removed: a not delete
126
marked record always owns all its extern fields. */
127
static
128
void
129
btr_cur_unmark_extern_fields(
130
/*=========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
131
	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
132
				part will be updated, or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
133
	rec_t*		rec,	/*!< in/out: record in a clustered index */
134
	dict_index_t*	index,	/*!< in: index of the page */
135
	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
136
	mtr_t*		mtr);	/*!< in: mtr, or NULL if not logged */
137
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
138
Adds path information to the cursor for the current page, for which
139
the binary search has been performed. */
140
static
141
void
142
btr_cur_add_path_info(
143
/*==================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
144
	btr_cur_t*	cursor,		/*!< in: cursor positioned on a page */
145
	ulint		height,		/*!< in: height of the page in tree;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
146
					0 means leaf node */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
147
	ulint		root_height);	/*!< in: root node height in tree */
148
/***********************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
149
Frees the externally stored fields for a record, if the field is mentioned
150
in the update vector. */
151
static
152
void
153
btr_rec_free_updated_extern_fields(
154
/*===============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
155
	dict_index_t*	index,	/*!< in: index of rec; the index tree MUST be
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
156
				X-latched */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
157
	rec_t*		rec,	/*!< in: record */
158
	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
159
				part will be updated, or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
160
	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
161
	const upd_t*	update,	/*!< in: update vector */
162
	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
163
	mtr_t*		mtr);	/*!< in: mini-transaction handle which contains
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
164
				an X-latch to record page and to the tree */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
165
/***********************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
166
Frees the externally stored fields for a record. */
167
static
168
void
169
btr_rec_free_externally_stored_fields(
170
/*==================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
171
	dict_index_t*	index,	/*!< in: index of the data, the index
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
172
				tree MUST be X-latched */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
173
	rec_t*		rec,	/*!< in: record */
174
	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
175
	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
176
				part will be updated, or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
177
	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
178
	mtr_t*		mtr);	/*!< in: mini-transaction handle which contains
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
179
				an X-latch to record page and to the index
180
				tree */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
181
/***********************************************************//**
182
Gets the externally stored size of a record, in units of a database page.
183
@return	externally stored part, in units of a database page */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
184
static
185
ulint
186
btr_rec_get_externally_stored_len(
187
/*==============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
188
	rec_t*		rec,	/*!< in: record */
189
	const ulint*	offsets);/*!< in: array returned by rec_get_offsets() */
190
#endif /* !UNIV_HOTBACKUP */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
191
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
192
/******************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
193
The following function is used to set the deleted bit of a record. */
194
UNIV_INLINE
195
void
196
btr_rec_set_deleted_flag(
197
/*=====================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
198
	rec_t*		rec,	/*!< in/out: physical record */
199
	page_zip_des_t*	page_zip,/*!< in/out: compressed page (or NULL) */
200
	ulint		flag)	/*!< in: nonzero if delete marked */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
201
{
202
	if (page_rec_is_comp(rec)) {
203
		rec_set_deleted_flag_new(rec, page_zip, flag);
204
	} else {
205
		ut_ad(!page_zip);
206
		rec_set_deleted_flag_old(rec, flag);
207
	}
208
}
209
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
210
#ifndef UNIV_HOTBACKUP
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
211
/*==================== B-TREE SEARCH =========================*/
212
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
213
/********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
214
Latches the leaf page or pages requested. */
215
static
216
void
217
btr_cur_latch_leaves(
218
/*=================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
219
	page_t*		page,		/*!< in: leaf page where the search
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
220
					converged */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
221
	ulint		space,		/*!< in: space id */
222
	ulint		zip_size,	/*!< in: compressed page size in bytes
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
223
					or 0 for uncompressed pages */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
224
	ulint		page_no,	/*!< in: page number of the leaf */
225
	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
226
	btr_cur_t*	cursor,		/*!< in: cursor */
227
	mtr_t*		mtr)		/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
228
{
229
	ulint		mode;
230
	ulint		left_page_no;
231
	ulint		right_page_no;
232
	buf_block_t*	get_block;
233
234
	ut_ad(page && mtr);
235
236
	switch (latch_mode) {
237
	case BTR_SEARCH_LEAF:
238
	case BTR_MODIFY_LEAF:
239
		mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
240
		get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
241
#ifdef UNIV_BTR_DEBUG
242
		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
243
#endif /* UNIV_BTR_DEBUG */
244
		get_block->check_index_page_at_flush = TRUE;
245
		return;
246
	case BTR_MODIFY_TREE:
247
		/* x-latch also brothers from left to right */
248
		left_page_no = btr_page_get_prev(page, mtr);
249
250
		if (left_page_no != FIL_NULL) {
251
			get_block = btr_block_get(space, zip_size,
252
						  left_page_no,
253
						  RW_X_LATCH, mtr);
254
#ifdef UNIV_BTR_DEBUG
255
			ut_a(page_is_comp(get_block->frame)
256
			     == page_is_comp(page));
257
			ut_a(btr_page_get_next(get_block->frame, mtr)
258
			     == page_get_page_no(page));
259
#endif /* UNIV_BTR_DEBUG */
260
			get_block->check_index_page_at_flush = TRUE;
261
		}
262
263
		get_block = btr_block_get(space, zip_size, page_no,
264
					  RW_X_LATCH, mtr);
265
#ifdef UNIV_BTR_DEBUG
266
		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
267
#endif /* UNIV_BTR_DEBUG */
268
		get_block->check_index_page_at_flush = TRUE;
269
270
		right_page_no = btr_page_get_next(page, mtr);
271
272
		if (right_page_no != FIL_NULL) {
273
			get_block = btr_block_get(space, zip_size,
274
						  right_page_no,
275
						  RW_X_LATCH, mtr);
276
#ifdef UNIV_BTR_DEBUG
277
			ut_a(page_is_comp(get_block->frame)
278
			     == page_is_comp(page));
279
			ut_a(btr_page_get_prev(get_block->frame, mtr)
280
			     == page_get_page_no(page));
281
#endif /* UNIV_BTR_DEBUG */
282
			get_block->check_index_page_at_flush = TRUE;
283
		}
284
285
		return;
286
287
	case BTR_SEARCH_PREV:
288
	case BTR_MODIFY_PREV:
289
		mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
290
		/* latch also left brother */
291
		left_page_no = btr_page_get_prev(page, mtr);
292
293
		if (left_page_no != FIL_NULL) {
294
			get_block = btr_block_get(space, zip_size,
295
						  left_page_no, mode, mtr);
296
			cursor->left_block = get_block;
297
#ifdef UNIV_BTR_DEBUG
298
			ut_a(page_is_comp(get_block->frame)
299
			     == page_is_comp(page));
300
			ut_a(btr_page_get_next(get_block->frame, mtr)
301
			     == page_get_page_no(page));
302
#endif /* UNIV_BTR_DEBUG */
303
			get_block->check_index_page_at_flush = TRUE;
304
		}
305
306
		get_block = btr_block_get(space, zip_size, page_no, mode, mtr);
307
#ifdef UNIV_BTR_DEBUG
308
		ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
309
#endif /* UNIV_BTR_DEBUG */
310
		get_block->check_index_page_at_flush = TRUE;
311
		return;
312
	}
313
314
	ut_error;
315
}
316
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
317
/********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
318
Searches an index tree and positions a tree cursor on a given level.
319
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
320
to node pointer page number fields on the upper levels of the tree!
321
Note that if mode is PAGE_CUR_LE, which is used in inserts, then
322
cursor->up_match and cursor->low_match both will have sensible values.
323
If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
324
325
If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
326
search tuple should be performed in the B-tree. InnoDB does an insert
327
immediately after the cursor. Thus, the cursor may end up on a user record,
328
or on a page infimum record. */
329
UNIV_INTERN
330
void
331
btr_cur_search_to_nth_level(
332
/*========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
333
	dict_index_t*	index,	/*!< in: index */
334
	ulint		level,	/*!< in: the tree level of search */
335
	const dtuple_t*	tuple,	/*!< in: data tuple; NOTE: n_fields_cmp in
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
336
				tuple must be set so that it cannot get
337
				compared to the node ptr page number field! */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
338
	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
339
				Inserts should always be made using
340
				PAGE_CUR_LE to search the position! */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
341
	ulint		latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
342
				at most one of BTR_INSERT, BTR_DELETE_MARK,
343
				BTR_DELETE, or BTR_ESTIMATE;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
344
				cursor->left_block is used to store a pointer
345
				to the left neighbor page, in the cases
346
				BTR_SEARCH_PREV and BTR_MODIFY_PREV;
347
				NOTE that if has_search_latch
348
				is != 0, we maybe do not have a latch set
349
				on the cursor page, we assume
350
				the caller uses his search latch
351
				to protect the record! */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
352
	btr_cur_t*	cursor, /*!< in/out: tree cursor; the cursor page is
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
353
				s- or x-latched, but see also above! */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
354
	ulint		has_search_latch,/*!< in: info on the latch mode the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
355
				caller currently has on btr_search_latch:
356
				RW_S_LATCH, or 0 */
1819.5.187 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6559 from MySQL InnoDB
357
	const char*	file,	/*!< in: file name */
358
	ulint		line,	/*!< in: line where called */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
359
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
360
{
361
	page_t*		page;
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
362
	buf_block_t*	block;
363
	ulint		space;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
364
	buf_block_t*	guess;
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
365
	ulint		height;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
366
	ulint		page_no;
367
	ulint		up_match;
368
	ulint		up_bytes;
369
	ulint		low_match;
370
	ulint		low_bytes;
371
	ulint		savepoint;
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
372
	ulint		rw_latch;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
373
	ulint		page_mode;
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
374
	ulint		buf_mode;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
375
	ulint		estimate;
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
376
	ulint		zip_size;
377
	page_cur_t*	page_cursor;
378
	btr_op_t	btr_op;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
379
	ulint		root_height = 0; /* remove warning */
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
380
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
381
#ifdef BTR_CUR_ADAPT
382
	btr_search_t*	info;
383
#endif
384
	mem_heap_t*	heap		= NULL;
385
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
386
	ulint*		offsets		= offsets_;
387
	rec_offs_init(offsets_);
388
	/* Currently, PAGE_CUR_LE is the only search mode used for searches
389
	ending to upper levels */
390
391
	ut_ad(level == 0 || mode == PAGE_CUR_LE);
392
	ut_ad(dict_index_check_search_tuple(index, tuple));
393
	ut_ad(!dict_index_is_ibuf(index) || ibuf_inside());
394
	ut_ad(dtuple_check_typed(tuple));
395
396
#ifdef UNIV_DEBUG
397
	cursor->up_match = ULINT_UNDEFINED;
398
	cursor->low_match = ULINT_UNDEFINED;
399
#endif
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
400
401
	/* These flags are mutually exclusive, they are lumped together
402
	with the latch mode for historical reasons. It's possible for
403
	none of the flags to be set. */
404
	switch (UNIV_EXPECT(latch_mode
405
			    & (BTR_INSERT | BTR_DELETE | BTR_DELETE_MARK),
406
			    0)) {
407
	case 0:
408
		btr_op = BTR_NO_OP;
409
		break;
410
	case BTR_INSERT:
411
		btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE)
412
			? BTR_INSERT_IGNORE_UNIQUE_OP
413
			: BTR_INSERT_OP;
414
		break;
415
	case BTR_DELETE:
416
		btr_op = BTR_DELETE_OP;
417
		ut_a(cursor->purge_node);
418
		break;
419
	case BTR_DELETE_MARK:
420
		btr_op = BTR_DELMARK_OP;
421
		break;
422
	default:
423
		/* only one of BTR_INSERT, BTR_DELETE, BTR_DELETE_MARK
424
		should be specified at a time */
425
		ut_error;
426
	}
427
428
	/* Operations on the insert buffer tree cannot be buffered. */
429
	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
430
	/* Operations on the clustered index cannot be buffered. */
431
	ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
432
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
433
	estimate = latch_mode & BTR_ESTIMATE;
434
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
435
	/* Turn the flags unrelated to the latch mode off. */
436
	latch_mode &= ~(BTR_INSERT
437
			| BTR_DELETE_MARK
438
			| BTR_DELETE
439
			| BTR_ESTIMATE
440
			| BTR_IGNORE_SEC_UNIQUE);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
441
442
	cursor->flag = BTR_CUR_BINARY;
443
	cursor->index = index;
444
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
445
	cursor->ibuf_cnt = ULINT_UNDEFINED;
446
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
447
#ifndef BTR_CUR_ADAPT
448
	guess = NULL;
449
#else
450
	info = btr_search_get_info(index);
451
452
	guess = info->root_guess;
453
454
#ifdef BTR_CUR_HASH_ADAPT
455
456
#ifdef UNIV_SEARCH_PERF_STAT
457
	info->n_searches++;
458
#endif
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
459
	if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
460
	    && latch_mode <= BTR_MODIFY_LEAF
461
	    && info->last_hash_succ
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
462
	    && !estimate
463
#ifdef PAGE_CUR_LE_OR_EXTENDS
464
	    && mode != PAGE_CUR_LE_OR_EXTENDS
465
#endif /* PAGE_CUR_LE_OR_EXTENDS */
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
466
	    /* If !has_search_latch, we do a dirty read of
467
	    btr_search_enabled below, and btr_search_guess_on_hash()
468
	    will have to check it again. */
469
	    && UNIV_LIKELY(btr_search_enabled)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
470
	    && btr_search_guess_on_hash(index, info, tuple, mode,
471
					latch_mode, cursor,
472
					has_search_latch, mtr)) {
473
474
		/* Search using the hash index succeeded */
475
476
		ut_ad(cursor->up_match != ULINT_UNDEFINED
477
		      || mode != PAGE_CUR_GE);
478
		ut_ad(cursor->up_match != ULINT_UNDEFINED
479
		      || mode != PAGE_CUR_LE);
480
		ut_ad(cursor->low_match != ULINT_UNDEFINED
481
		      || mode != PAGE_CUR_LE);
482
		btr_cur_n_sea++;
483
484
		return;
485
	}
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
486
#endif /* BTR_CUR_HASH_ADAPT */
487
#endif /* BTR_CUR_ADAPT */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
488
	btr_cur_n_non_sea++;
489
490
	/* If the hash search did not succeed, do binary search down the
491
	tree */
492
493
	if (has_search_latch) {
494
		/* Release possible search latch to obey latching order */
495
		rw_lock_s_unlock(&btr_search_latch);
496
	}
497
498
	/* Store the position of the tree latch we push to mtr so that we
499
	know how to release it when we have latched leaf node(s) */
500
501
	savepoint = mtr_set_savepoint(mtr);
502
503
	if (latch_mode == BTR_MODIFY_TREE) {
504
		mtr_x_lock(dict_index_get_lock(index), mtr);
505
506
	} else if (latch_mode == BTR_CONT_MODIFY_TREE) {
507
		/* Do nothing */
508
		ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
509
					MTR_MEMO_X_LOCK));
510
	} else {
511
		mtr_s_lock(dict_index_get_lock(index), mtr);
512
	}
513
514
	page_cursor = btr_cur_get_page_cur(cursor);
515
516
	space = dict_index_get_space(index);
517
	page_no = dict_index_get_page(index);
518
519
	up_match = 0;
520
	up_bytes = 0;
521
	low_match = 0;
522
	low_bytes = 0;
523
524
	height = ULINT_UNDEFINED;
525
526
	/* We use these modified search modes on non-leaf levels of the
527
	B-tree. These let us end up in the right B-tree leaf. In that leaf
528
	we use the original search mode. */
529
530
	switch (mode) {
531
	case PAGE_CUR_GE:
532
		page_mode = PAGE_CUR_L;
533
		break;
534
	case PAGE_CUR_G:
535
		page_mode = PAGE_CUR_LE;
536
		break;
537
	default:
538
#ifdef PAGE_CUR_LE_OR_EXTENDS
539
		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
540
		      || mode == PAGE_CUR_LE_OR_EXTENDS);
541
#else /* PAGE_CUR_LE_OR_EXTENDS */
542
		ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
543
#endif /* PAGE_CUR_LE_OR_EXTENDS */
544
		page_mode = mode;
545
		break;
546
	}
547
548
	/* Loop and search until we arrive at the desired level */
549
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
550
search_loop:
551
	buf_mode = BUF_GET;
552
	rw_latch = RW_NO_LATCH;
553
554
	if (height != 0) {
555
		/* We are about to fetch the root or a non-leaf page. */
556
	} else if (latch_mode <= BTR_MODIFY_LEAF) {
557
		rw_latch = latch_mode;
558
559
		if (btr_op != BTR_NO_OP
560
		    && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) {
561
562
			/* Try to buffer the operation if the leaf
563
			page is not in the buffer pool. */
564
565
			buf_mode = btr_op == BTR_DELETE_OP
566
				? BUF_GET_IF_IN_POOL_OR_WATCH
567
				: BUF_GET_IF_IN_POOL;
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
568
		}
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
569
	}
570
571
	zip_size = dict_table_zip_size(index->table);
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
572
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
573
retry_page_get:
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
574
	block = buf_page_get_gen(
575
		space, zip_size, page_no, rw_latch, guess, buf_mode,
576
		file, line, mtr);
577
578
	if (block == NULL) {
579
		/* This must be a search to perform an insert/delete
580
		mark/ delete; try using the insert/delete buffer */
581
582
		ut_ad(height == 0);
583
		ut_ad(cursor->thr);
584
585
		switch (btr_op) {
586
		case BTR_INSERT_OP:
587
		case BTR_INSERT_IGNORE_UNIQUE_OP:
588
			ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
589
590
			if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
591
					space, zip_size, page_no,
592
					cursor->thr)) {
593
594
				cursor->flag = BTR_CUR_INSERT_TO_IBUF;
595
596
				goto func_exit;
597
			}
598
			break;
599
600
		case BTR_DELMARK_OP:
601
			ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
602
603
			if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
604
					index, space, zip_size,
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
605
					page_no, cursor->thr)) {
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
606
607
				cursor->flag = BTR_CUR_DEL_MARK_IBUF;
608
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
609
				goto func_exit;
610
			}
611
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
612
			break;
613
614
		case BTR_DELETE_OP:
615
			ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
616
617
			if (!row_purge_poss_sec(cursor->purge_node,
618
						index, tuple)) {
619
620
				/* The record cannot be purged yet. */
621
				cursor->flag = BTR_CUR_DELETE_REF;
622
			} else if (ibuf_insert(IBUF_OP_DELETE, tuple,
623
					       index, space, zip_size,
624
					       page_no,
625
					       cursor->thr)) {
626
627
				/* The purge was buffered. */
628
				cursor->flag = BTR_CUR_DELETE_IBUF;
629
			} else {
630
				/* The purge could not be buffered. */
631
				buf_pool_watch_unset(space, page_no);
632
				break;
633
			}
634
635
			buf_pool_watch_unset(space, page_no);
636
			goto func_exit;
637
638
		default:
639
			ut_error;
640
		}
641
642
		/* Insert to the insert/delete buffer did not succeed, we
643
		must read the page from disk. */
644
645
		buf_mode = BUF_GET;
646
647
		goto retry_page_get;
648
	}
649
650
	block->check_index_page_at_flush = TRUE;
651
	page = buf_block_get_frame(block);
652
653
	if (rw_latch != RW_NO_LATCH) {
654
#ifdef UNIV_ZIP_DEBUG
655
		const page_zip_des_t*	page_zip
656
			= buf_block_get_page_zip(block);
657
		ut_a(!page_zip || page_zip_validate(page_zip, page));
658
#endif /* UNIV_ZIP_DEBUG */
659
660
		buf_block_dbg_add_level(block, SYNC_TREE_NODE);
661
	}
662
1819.9.31 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100623110659-pk5bqnmo0j7hj6md from MySQL InnoDB
663
	ut_ad(index->id == btr_page_get_index_id(page));
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
664
665
	if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
666
		/* We are in the root node */
667
668
		height = btr_page_get_level(page, mtr);
669
		root_height = height;
670
		cursor->tree_height = root_height + 1;
671
672
#ifdef BTR_CUR_ADAPT
673
		if (block != guess) {
674
			info->root_guess = block;
675
		}
676
#endif
677
	}
678
679
	if (height == 0) {
680
		if (rw_latch == RW_NO_LATCH) {
681
682
			btr_cur_latch_leaves(
683
				page, space, zip_size, page_no, latch_mode,
684
				cursor, mtr);
685
		}
686
687
		if (latch_mode != BTR_MODIFY_TREE
688
		    && latch_mode != BTR_CONT_MODIFY_TREE) {
689
690
			/* Release the tree s-latch */
691
692
			mtr_release_s_latch_at_savepoint(
693
				mtr, savepoint, dict_index_get_lock(index));
694
		}
695
696
		page_mode = mode;
697
	}
698
699
	page_cur_search_with_match(
700
		block, index, tuple, page_mode, &up_match, &up_bytes,
701
		&low_match, &low_bytes, page_cursor);
702
703
	if (estimate) {
704
		btr_cur_add_path_info(cursor, height, root_height);
705
	}
706
707
	/* If this is the desired level, leave the loop */
708
709
	ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
710
					   mtr));
711
712
	if (level != height) {
713
714
		const rec_t*	node_ptr;
715
		ut_ad(height > 0);
716
717
		height--;
718
		guess = NULL;
719
720
		node_ptr = page_cur_get_rec(page_cursor);
721
722
		offsets = rec_get_offsets(
723
			node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
724
725
		/* Go to the child node */
726
		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
727
728
		if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
729
			/* We're doing a search on an ibuf tree and we're one
730
			level above the leaf page. */
731
732
			ulint	is_min_rec;
733
734
			ut_ad(level == 0);
735
736
			is_min_rec = rec_get_info_bits(node_ptr, 0)
737
				& REC_INFO_MIN_REC_FLAG;
738
739
			if (!is_min_rec) {
740
				cursor->ibuf_cnt
741
					= ibuf_rec_get_counter(node_ptr);
742
743
				ut_a(cursor->ibuf_cnt <= 0xFFFF
744
				     || cursor->ibuf_cnt == ULINT_UNDEFINED);
745
			}
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
746
747
			buf_mode = BUF_GET;
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
748
			rw_latch = RW_NO_LATCH;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
749
			goto retry_page_get;
750
		}
751
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
752
		goto search_loop;
753
	}
754
755
	if (level != 0) {
756
		/* x-latch the page */
757
		page = btr_page_get(
758
			space, zip_size, page_no, RW_X_LATCH, mtr);
759
760
		ut_a((ibool)!!page_is_comp(page)
761
		     == dict_table_is_comp(index->table));
762
	} else {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
763
		cursor->low_match = low_match;
764
		cursor->low_bytes = low_bytes;
765
		cursor->up_match = up_match;
766
		cursor->up_bytes = up_bytes;
767
768
#ifdef BTR_CUR_ADAPT
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
769
		/* We do a dirty read of btr_search_enabled here.  We
770
		will properly check btr_search_enabled again in
771
		btr_search_build_page_hash_index() before building a
772
		page hash index, while holding btr_search_latch. */
773
		if (UNIV_LIKELY(btr_search_enabled)) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
774
775
			btr_search_info_update(index, cursor);
776
		}
777
#endif
778
		ut_ad(cursor->up_match != ULINT_UNDEFINED
779
		      || mode != PAGE_CUR_GE);
780
		ut_ad(cursor->up_match != ULINT_UNDEFINED
781
		      || mode != PAGE_CUR_LE);
782
		ut_ad(cursor->low_match != ULINT_UNDEFINED
783
		      || mode != PAGE_CUR_LE);
784
	}
785
786
func_exit:
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
787
788
	if (UNIV_LIKELY_NULL(heap)) {
789
		mem_heap_free(heap);
790
	}
791
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
792
	if (has_search_latch) {
793
794
		rw_lock_s_lock(&btr_search_latch);
795
	}
796
}
797
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
798
/*****************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
799
Opens a cursor at either end of an index. */
800
UNIV_INTERN
801
void
1819.5.187 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6559 from MySQL InnoDB
802
btr_cur_open_at_index_side_func(
803
/*============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
804
	ibool		from_left,	/*!< in: TRUE if open to the low end,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
805
					FALSE if to the high end */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
806
	dict_index_t*	index,		/*!< in: index */
807
	ulint		latch_mode,	/*!< in: latch mode */
808
	btr_cur_t*	cursor,		/*!< in: cursor */
1819.5.187 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6559 from MySQL InnoDB
809
	const char*	file,		/*!< in: file name */
810
	ulint		line,		/*!< in: line where called */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
811
	mtr_t*		mtr)		/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
812
{
813
	page_cur_t*	page_cursor;
814
	ulint		page_no;
815
	ulint		space;
816
	ulint		zip_size;
817
	ulint		height;
818
	ulint		root_height = 0; /* remove warning */
819
	rec_t*		node_ptr;
820
	ulint		estimate;
821
	ulint		savepoint;
822
	mem_heap_t*	heap		= NULL;
823
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
824
	ulint*		offsets		= offsets_;
825
	rec_offs_init(offsets_);
826
827
	estimate = latch_mode & BTR_ESTIMATE;
828
	latch_mode = latch_mode & ~BTR_ESTIMATE;
829
830
	/* Store the position of the tree latch we push to mtr so that we
831
	know how to release it when we have latched the leaf node */
832
833
	savepoint = mtr_set_savepoint(mtr);
834
835
	if (latch_mode == BTR_MODIFY_TREE) {
836
		mtr_x_lock(dict_index_get_lock(index), mtr);
837
	} else {
838
		mtr_s_lock(dict_index_get_lock(index), mtr);
839
	}
840
841
	page_cursor = btr_cur_get_page_cur(cursor);
842
	cursor->index = index;
843
844
	space = dict_index_get_space(index);
845
	zip_size = dict_table_zip_size(index->table);
846
	page_no = dict_index_get_page(index);
847
848
	height = ULINT_UNDEFINED;
849
850
	for (;;) {
851
		buf_block_t*	block;
852
		page_t*		page;
853
		block = buf_page_get_gen(space, zip_size, page_no,
854
					 RW_NO_LATCH, NULL, BUF_GET,
1819.5.187 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6559 from MySQL InnoDB
855
					 file, line, mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
856
		page = buf_block_get_frame(block);
1819.9.31 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100623110659-pk5bqnmo0j7hj6md from MySQL InnoDB
857
		ut_ad(index->id == btr_page_get_index_id(page));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
858
859
		block->check_index_page_at_flush = TRUE;
860
861
		if (height == ULINT_UNDEFINED) {
862
			/* We are in the root node */
863
864
			height = btr_page_get_level(page, mtr);
865
			root_height = height;
866
		}
867
868
		if (height == 0) {
869
			btr_cur_latch_leaves(page, space, zip_size, page_no,
870
					     latch_mode, cursor, mtr);
871
872
			/* In versions <= 3.23.52 we had forgotten to
873
			release the tree latch here. If in an index scan
874
			we had to scan far to find a record visible to the
875
			current transaction, that could starve others
876
			waiting for the tree latch. */
877
878
			if ((latch_mode != BTR_MODIFY_TREE)
879
			    && (latch_mode != BTR_CONT_MODIFY_TREE)) {
880
881
				/* Release the tree s-latch */
882
883
				mtr_release_s_latch_at_savepoint(
884
					mtr, savepoint,
885
					dict_index_get_lock(index));
886
			}
887
		}
888
889
		if (from_left) {
890
			page_cur_set_before_first(block, page_cursor);
891
		} else {
892
			page_cur_set_after_last(block, page_cursor);
893
		}
894
895
		if (height == 0) {
896
			if (estimate) {
897
				btr_cur_add_path_info(cursor, height,
898
						      root_height);
899
			}
900
901
			break;
902
		}
903
904
		ut_ad(height > 0);
905
906
		if (from_left) {
907
			page_cur_move_to_next(page_cursor);
908
		} else {
909
			page_cur_move_to_prev(page_cursor);
910
		}
911
912
		if (estimate) {
913
			btr_cur_add_path_info(cursor, height, root_height);
914
		}
915
916
		height--;
917
918
		node_ptr = page_cur_get_rec(page_cursor);
919
		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
920
					  ULINT_UNDEFINED, &heap);
921
		/* Go to the child node */
922
		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
923
	}
924
925
	if (UNIV_LIKELY_NULL(heap)) {
926
		mem_heap_free(heap);
927
	}
928
}
929
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
930
/**********************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
931
Positions a cursor at a randomly chosen position within a B-tree. */
932
UNIV_INTERN
933
void
1819.5.187 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6559 from MySQL InnoDB
934
btr_cur_open_at_rnd_pos_func(
935
/*=========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
936
	dict_index_t*	index,		/*!< in: index */
937
	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
938
	btr_cur_t*	cursor,		/*!< in/out: B-tree cursor */
1819.5.187 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6559 from MySQL InnoDB
939
	const char*	file,		/*!< in: file name */
940
	ulint		line,		/*!< in: line where called */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
941
	mtr_t*		mtr)		/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
942
{
943
	page_cur_t*	page_cursor;
944
	ulint		page_no;
945
	ulint		space;
946
	ulint		zip_size;
947
	ulint		height;
948
	rec_t*		node_ptr;
949
	mem_heap_t*	heap		= NULL;
950
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
951
	ulint*		offsets		= offsets_;
952
	rec_offs_init(offsets_);
953
954
	if (latch_mode == BTR_MODIFY_TREE) {
955
		mtr_x_lock(dict_index_get_lock(index), mtr);
956
	} else {
957
		mtr_s_lock(dict_index_get_lock(index), mtr);
958
	}
959
960
	page_cursor = btr_cur_get_page_cur(cursor);
961
	cursor->index = index;
962
963
	space = dict_index_get_space(index);
964
	zip_size = dict_table_zip_size(index->table);
965
	page_no = dict_index_get_page(index);
966
967
	height = ULINT_UNDEFINED;
968
969
	for (;;) {
970
		buf_block_t*	block;
971
		page_t*		page;
972
973
		block = buf_page_get_gen(space, zip_size, page_no,
974
					 RW_NO_LATCH, NULL, BUF_GET,
1819.5.187 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6559 from MySQL InnoDB
975
					 file, line, mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
976
		page = buf_block_get_frame(block);
1819.9.31 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100623110659-pk5bqnmo0j7hj6md from MySQL InnoDB
977
		ut_ad(index->id == btr_page_get_index_id(page));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
978
979
		if (height == ULINT_UNDEFINED) {
980
			/* We are in the root node */
981
982
			height = btr_page_get_level(page, mtr);
983
		}
984
985
		if (height == 0) {
986
			btr_cur_latch_leaves(page, space, zip_size, page_no,
987
					     latch_mode, cursor, mtr);
988
		}
989
990
		page_cur_open_on_rnd_user_rec(block, page_cursor);
991
992
		if (height == 0) {
993
994
			break;
995
		}
996
997
		ut_ad(height > 0);
998
999
		height--;
1000
1001
		node_ptr = page_cur_get_rec(page_cursor);
1002
		offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
1003
					  ULINT_UNDEFINED, &heap);
1004
		/* Go to the child node */
1005
		page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
1006
	}
1007
1008
	if (UNIV_LIKELY_NULL(heap)) {
1009
		mem_heap_free(heap);
1010
	}
1011
}
1012
1013
/*==================== B-TREE INSERT =========================*/
1014
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1015
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1016
Inserts a record if there is enough space, or if enough space can
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
1017
be freed by reorganizing. Differs from btr_cur_optimistic_insert because
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1018
no heuristics is applied to whether it pays to use CPU time for
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1019
reorganizing the page or not.
1020
@return	pointer to inserted record if succeed, else NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1021
static
1022
rec_t*
1023
btr_cur_insert_if_possible(
1024
/*=======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1025
	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1026
				cursor stays valid */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1027
	const dtuple_t*	tuple,	/*!< in: tuple to insert; the size info need not
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1028
				have been stored to tuple */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1029
	ulint		n_ext,	/*!< in: number of externally stored columns */
1030
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1031
{
1032
	page_cur_t*	page_cursor;
1033
	buf_block_t*	block;
1034
	rec_t*		rec;
1035
1036
	ut_ad(dtuple_check_typed(tuple));
1037
1038
	block = btr_cur_get_block(cursor);
1039
1040
	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1041
	page_cursor = btr_cur_get_page_cur(cursor);
1042
1043
	/* Now, try the insert */
1044
	rec = page_cur_tuple_insert(page_cursor, tuple,
1045
				    cursor->index, n_ext, mtr);
1046
1047
	if (UNIV_UNLIKELY(!rec)) {
1048
		/* If record did not fit, reorganize */
1049
1050
		if (btr_page_reorganize(block, cursor->index, mtr)) {
1051
1052
			page_cur_search(block, cursor->index, tuple,
1053
					PAGE_CUR_LE, page_cursor);
1054
1055
			rec = page_cur_tuple_insert(page_cursor, tuple,
1056
						    cursor->index, n_ext, mtr);
1057
		}
1058
	}
1059
1060
	return(rec);
1061
}
1062
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1063
/*************************************************************//**
1064
For an insert, checks the locks and does the undo logging if desired.
1065
@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1066
UNIV_INLINE
1067
ulint
1068
btr_cur_ins_lock_and_undo(
1069
/*======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1070
	ulint		flags,	/*!< in: undo logging and locking flags: if
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1071
				not zero, the parameters index and thr
1072
				should be specified */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1073
	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert */
1819.9.197 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20101103091611-a3x9p0yivkvu5u9i from MySQL InnoDB
1074
	dtuple_t*	entry,	/*!< in/out: entry to insert */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1075
	que_thr_t*	thr,	/*!< in: query thread or NULL */
1076
	mtr_t*		mtr,	/*!< in/out: mini-transaction */
1077
	ibool*		inherit)/*!< out: TRUE if the inserted new record maybe
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1078
				should inherit LOCK_GAP type locks from the
1079
				successor record */
1080
{
1081
	dict_index_t*	index;
1082
	ulint		err;
1083
	rec_t*		rec;
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1084
	roll_ptr_t	roll_ptr;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1085
1086
	/* Check if we have to wait for a lock: enqueue an explicit lock
1087
	request if yes */
1088
1089
	rec = btr_cur_get_rec(cursor);
1090
	index = cursor->index;
1091
1092
	err = lock_rec_insert_check_and_lock(flags, rec,
1093
					     btr_cur_get_block(cursor),
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1094
					     index, thr, mtr, inherit);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1095
1096
	if (err != DB_SUCCESS) {
1097
1098
		return(err);
1099
	}
1100
1101
	if (dict_index_is_clust(index) && !dict_index_is_ibuf(index)) {
1102
1103
		err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
1104
						    thr, index, entry,
1105
						    NULL, 0, NULL,
1106
						    &roll_ptr);
1107
		if (err != DB_SUCCESS) {
1108
1109
			return(err);
1110
		}
1111
1112
		/* Now we can fill in the roll ptr field in entry */
1113
1114
		if (!(flags & BTR_KEEP_SYS_FLAG)) {
1115
1116
			row_upd_index_entry_sys_field(entry, index,
1117
						      DATA_ROLL_PTR, roll_ptr);
1118
		}
1119
	}
1120
1121
	return(DB_SUCCESS);
1122
}
1123
1124
#ifdef UNIV_DEBUG
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1125
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1126
Report information about a transaction. */
1127
static
1128
void
1129
btr_cur_trx_report(
1130
/*===============*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1131
	trx_t*			trx,	/*!< in: transaction */
1132
	const dict_index_t*	index,	/*!< in: index */
1133
	const char*		op)	/*!< in: operation */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1134
{
1135
	fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ",
1819.9.31 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100623110659-pk5bqnmo0j7hj6md from MySQL InnoDB
1136
		(ullint) trx->id);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1137
	fputs(op, stderr);
1138
	dict_index_name_print(stderr, trx, index);
1139
	putc('\n', stderr);
1140
}
1141
#endif /* UNIV_DEBUG */
1142
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1143
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1144
Tries to perform an insert to a page in an index tree, next to cursor.
1145
It is assumed that mtr holds an x-latch on the page. The operation does
1146
not succeed if there is too little space on the page. If there is just
1147
one record on the page, the insert will always succeed; this is to
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1148
prevent trying to split a page with just one record.
1149
@return	DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1150
UNIV_INTERN
1151
ulint
1152
btr_cur_optimistic_insert(
1153
/*======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1154
	ulint		flags,	/*!< in: undo logging and locking flags: if not
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1155
				zero, the parameters index and thr should be
1156
				specified */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1157
	btr_cur_t*	cursor,	/*!< in: cursor on page after which to insert;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1158
				cursor stays valid */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1159
	dtuple_t*	entry,	/*!< in/out: entry to insert */
1160
	rec_t**		rec,	/*!< out: pointer to inserted record if
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1161
				succeed */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1162
	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1163
				be stored externally by the caller, or
1164
				NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1165
	ulint		n_ext,	/*!< in: number of externally stored columns */
1166
	que_thr_t*	thr,	/*!< in: query thread or NULL */
1167
	mtr_t*		mtr)	/*!< in: mtr; if this function returns
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1168
				DB_SUCCESS on a leaf page of a secondary
1169
				index in a compressed tablespace, the
1170
				mtr must be committed before latching
1171
				any further pages */
1172
{
1173
	big_rec_t*	big_rec_vec	= NULL;
1174
	dict_index_t*	index;
1175
	page_cur_t*	page_cursor;
1176
	buf_block_t*	block;
1177
	page_t*		page;
1178
	ulint		max_size;
1179
	rec_t*		dummy_rec;
1180
	ibool		leaf;
1181
	ibool		reorg;
1182
	ibool		inherit;
1183
	ulint		zip_size;
1184
	ulint		rec_size;
1185
	ulint		err;
1186
1187
	*big_rec = NULL;
1188
1189
	block = btr_cur_get_block(cursor);
1190
	page = buf_block_get_frame(block);
1191
	index = cursor->index;
1192
	zip_size = buf_block_get_zip_size(block);
1193
#ifdef UNIV_DEBUG_VALGRIND
1194
	if (zip_size) {
1195
		UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1196
		UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
1197
	}
1198
#endif /* UNIV_DEBUG_VALGRIND */
1199
1200
	if (!dtuple_check_typed_no_assert(entry)) {
1201
		fputs("InnoDB: Error in a tuple to insert into ", stderr);
1202
		dict_index_name_print(stderr, thr_get_trx(thr), index);
1203
	}
1204
#ifdef UNIV_DEBUG
1205
	if (btr_cur_print_record_ops && thr) {
1206
		btr_cur_trx_report(thr_get_trx(thr), index, "insert into ");
1207
		dtuple_print(stderr, entry);
1208
	}
1209
#endif /* UNIV_DEBUG */
1210
1211
	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1212
	max_size = page_get_max_insert_size_after_reorganize(page, 1);
1213
	leaf = page_is_leaf(page);
1214
1215
	/* Calculate the record size when entry is converted to a record */
1216
	rec_size = rec_get_converted_size(index, entry, n_ext);
1217
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
1218
	if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
1219
				   dtuple_get_n_fields(entry), zip_size)) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1220
1221
		/* The record is so big that we have to store some fields
1222
		externally on separate database pages */
1223
		big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
1224
1225
		if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
1226
1227
			return(DB_TOO_BIG_RECORD);
1228
		}
1229
1230
		rec_size = rec_get_converted_size(index, entry, n_ext);
1231
	}
1232
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
1233
	if (UNIV_UNLIKELY(zip_size)) {
1234
		/* Estimate the free space of an empty compressed page.
1235
		Subtract one byte for the encoded heap_no in the
1236
		modification log. */
1237
		ulint	free_space_zip = page_zip_empty_size(
1238
			cursor->index->n_fields, zip_size) - 1;
1239
		ulint	n_uniq = dict_index_get_n_unique_in_tree(index);
1240
1241
		ut_ad(dict_table_is_comp(index->table));
1242
1243
		/* There should be enough room for two node pointer
1244
		records on an empty non-leaf page.  This prevents
1245
		infinite page splits. */
1246
1247
		if (UNIV_LIKELY(entry->n_fields >= n_uniq)
1248
		    && UNIV_UNLIKELY(REC_NODE_PTR_SIZE
1249
				     + rec_get_converted_size_comp_prefix(
1250
					     index, entry->fields, n_uniq,
1251
					     NULL)
1252
				     /* On a compressed page, there is
1253
				     a two-byte entry in the dense
1254
				     page directory for every record.
1255
				     But there is no record header. */
1256
				     - (REC_N_NEW_EXTRA_BYTES - 2)
1257
				     > free_space_zip / 2)) {
1258
1259
			if (big_rec_vec) {
1260
				dtuple_convert_back_big_rec(
1261
					index, entry, big_rec_vec);
1262
			}
1263
1264
			return(DB_TOO_BIG_RECORD);
1265
		}
1266
	}
1267
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1268
	/* If there have been many consecutive inserts, and we are on the leaf
1269
	level, check if we have to split the page to reserve enough free space
1270
	for future updates of records. */
1271
1272
	if (dict_index_is_clust(index)
1273
	    && (page_get_n_recs(page) >= 2)
1274
	    && UNIV_LIKELY(leaf)
1275
	    && (dict_index_get_space_reserve() + rec_size > max_size)
1276
	    && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
1277
		|| btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
1278
fail:
1279
		err = DB_FAIL;
1280
fail_err:
1281
1282
		if (big_rec_vec) {
1283
			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1284
		}
1285
1286
		return(err);
1287
	}
1288
1289
	if (UNIV_UNLIKELY(max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT
1819.7.69 by Vasil Dimov, Stewart Smith
Merge Revision revid:vasil.dimov@oracle.com-20100422112442-enb8mpw7jla5uvaz from MySQL InnoDB
1290
			  || max_size < rec_size)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1291
	    && UNIV_LIKELY(page_get_n_recs(page) > 1)
1292
	    && page_get_max_insert_size(page, 1) < rec_size) {
1293
1294
		goto fail;
1295
	}
1296
1297
	/* Check locks and write to the undo log, if specified */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1298
	err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
1299
					thr, mtr, &inherit);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1300
1301
	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1302
1303
		goto fail_err;
1304
	}
1305
1306
	page_cursor = btr_cur_get_page_cur(cursor);
1307
1308
	/* Now, try the insert */
1309
1310
	{
1311
		const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
1312
		*rec = page_cur_tuple_insert(page_cursor, entry, index,
1313
					     n_ext, mtr);
1314
		reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
1315
1316
		if (UNIV_UNLIKELY(reorg)) {
1317
			ut_a(zip_size);
1318
			ut_a(*rec);
1319
		}
1320
	}
1321
1322
	if (UNIV_UNLIKELY(!*rec) && UNIV_LIKELY(!reorg)) {
1323
		/* If the record did not fit, reorganize */
1324
		if (UNIV_UNLIKELY(!btr_page_reorganize(block, index, mtr))) {
1325
			ut_a(zip_size);
1326
1327
			goto fail;
1328
		}
1329
1330
		ut_ad(zip_size
1331
		      || page_get_max_insert_size(page, 1) == max_size);
1332
1333
		reorg = TRUE;
1334
1335
		page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
1336
1337
		*rec = page_cur_tuple_insert(page_cursor, entry, index,
1338
					     n_ext, mtr);
1339
1340
		if (UNIV_UNLIKELY(!*rec)) {
1341
			if (UNIV_LIKELY(zip_size != 0)) {
1342
1343
				goto fail;
1344
			}
1345
1346
			fputs("InnoDB: Error: cannot insert tuple ", stderr);
1347
			dtuple_print(stderr, entry);
1348
			fputs(" into ", stderr);
1349
			dict_index_name_print(stderr, thr_get_trx(thr), index);
1350
			fprintf(stderr, "\nInnoDB: max insert size %lu\n",
1351
				(ulong) max_size);
1352
			ut_error;
1353
		}
1354
	}
1355
1356
#ifdef BTR_CUR_HASH_ADAPT
1357
	if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
1358
		btr_search_update_hash_node_on_insert(cursor);
1359
	} else {
1360
		btr_search_update_hash_on_insert(cursor);
1361
	}
1362
#endif
1363
1364
	if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
1365
1366
		lock_update_insert(block, *rec);
1367
	}
1368
1369
#if 0
1370
	fprintf(stderr, "Insert into page %lu, max ins size %lu,"
1371
		" rec %lu ind type %lu\n",
1372
		buf_block_get_page_no(block), max_size,
1373
		rec_size + PAGE_DIR_SLOT_SIZE, index->type);
1374
#endif
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1375
	if (leaf && !dict_index_is_clust(index)) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1376
		/* Update the free bits of the B-tree page in the
1377
		insert buffer bitmap. */
1378
1379
		/* The free bits in the insert buffer bitmap must
1380
		never exceed the free space on a page.  It is safe to
1381
		decrement or reset the bits in the bitmap in a
1382
		mini-transaction that is committed before the
1383
		mini-transaction that affects the free space. */
1384
1385
		/* It is unsafe to increment the bits in a separately
1386
		committed mini-transaction, because in crash recovery,
1387
		the free bits could momentarily be set too high. */
1388
1389
		if (zip_size) {
1390
			/* Update the bits in the same mini-transaction. */
1391
			ibuf_update_free_bits_zip(block, mtr);
1392
		} else {
1393
			/* Decrement the bits in a separate
1394
			mini-transaction. */
1395
			ibuf_update_free_bits_if_full(
1396
				block, max_size,
1397
				rec_size + PAGE_DIR_SLOT_SIZE);
1398
		}
1399
	}
1400
1401
	*big_rec = big_rec_vec;
1402
1403
	return(DB_SUCCESS);
1404
}
1405
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1406
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1407
Performs an insert on a page of an index tree. It is assumed that mtr
1408
holds an x-latch on the tree and on the cursor page. If the insert is
1409
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1410
to brothers of page, if those brothers exist.
1411
@return	DB_SUCCESS or error number */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1412
UNIV_INTERN
1413
ulint
1414
btr_cur_pessimistic_insert(
1415
/*=======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1416
	ulint		flags,	/*!< in: undo logging and locking flags: if not
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1417
				zero, the parameter thr should be
1418
				specified; if no undo logging is specified,
1419
				then the caller must have reserved enough
1420
				free extents in the file space so that the
1421
				insertion will certainly succeed */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1422
	btr_cur_t*	cursor,	/*!< in: cursor after which to insert;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1423
				cursor stays valid */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1424
	dtuple_t*	entry,	/*!< in/out: entry to insert */
1425
	rec_t**		rec,	/*!< out: pointer to inserted record if
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1426
				succeed */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1427
	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1428
				be stored externally by the caller, or
1429
				NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1430
	ulint		n_ext,	/*!< in: number of externally stored columns */
1431
	que_thr_t*	thr,	/*!< in: query thread or NULL */
1432
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1433
{
1434
	dict_index_t*	index		= cursor->index;
1435
	ulint		zip_size	= dict_table_zip_size(index->table);
1436
	big_rec_t*	big_rec_vec	= NULL;
1437
	mem_heap_t*	heap		= NULL;
1438
	ulint		err;
1439
	ibool		dummy_inh;
1440
	ibool		success;
1441
	ulint		n_extents	= 0;
1442
	ulint		n_reserved;
1443
1444
	ut_ad(dtuple_check_typed(entry));
1445
1446
	*big_rec = NULL;
1447
1448
	ut_ad(mtr_memo_contains(mtr,
1449
				dict_index_get_lock(btr_cur_get_index(cursor)),
1450
				MTR_MEMO_X_LOCK));
1451
	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
1452
				MTR_MEMO_PAGE_X_FIX));
1453
1454
	/* Try first an optimistic insert; reset the cursor flag: we do not
1455
	assume anything of how it was positioned */
1456
1457
	cursor->flag = BTR_CUR_BINARY;
1458
1459
	err = btr_cur_optimistic_insert(flags, cursor, entry, rec,
1460
					big_rec, n_ext, thr, mtr);
1461
	if (err != DB_FAIL) {
1462
1463
		return(err);
1464
	}
1465
1466
	/* Retry with a pessimistic insert. Check locks and write to undo log,
1467
	if specified */
1468
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1469
	err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
1470
					thr, mtr, &dummy_inh);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1471
1472
	if (err != DB_SUCCESS) {
1473
1474
		return(err);
1475
	}
1476
1477
	if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
1478
		/* First reserve enough free space for the file segments
1479
		of the index tree, so that the insert will not fail because
1480
		of lack of space */
1481
1482
		n_extents = cursor->tree_height / 16 + 3;
1483
1484
		success = fsp_reserve_free_extents(&n_reserved, index->space,
1485
						   n_extents, FSP_NORMAL, mtr);
1486
		if (!success) {
1487
			return(DB_OUT_OF_FILE_SPACE);
1488
		}
1489
	}
1490
1491
	if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
1492
				   dict_table_is_comp(index->table),
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
1493
				   dict_index_get_n_fields(index),
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1494
				   zip_size)) {
1495
		/* The record is so big that we have to store some fields
1496
		externally on separate database pages */
1497
1498
		if (UNIV_LIKELY_NULL(big_rec_vec)) {
1499
			/* This should never happen, but we handle
1500
			the situation in a robust manner. */
1501
			ut_ad(0);
1502
			dtuple_convert_back_big_rec(index, entry, big_rec_vec);
1503
		}
1504
1505
		big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
1506
1507
		if (big_rec_vec == NULL) {
1508
1509
			if (n_extents > 0) {
1510
				fil_space_release_free_extents(index->space,
1511
							       n_reserved);
1512
			}
1513
			return(DB_TOO_BIG_RECORD);
1514
		}
1515
	}
1516
1517
	if (dict_index_get_page(index)
1518
	    == buf_block_get_page_no(btr_cur_get_block(cursor))) {
1519
1520
		/* The page is the root page */
1521
		*rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
1522
	} else {
1523
		*rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
1524
	}
1525
1526
	if (UNIV_LIKELY_NULL(heap)) {
1527
		mem_heap_free(heap);
1528
	}
1529
1530
	ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
1531
1532
#ifdef BTR_CUR_ADAPT
1533
	btr_search_update_hash_on_insert(cursor);
1534
#endif
1535
	if (!(flags & BTR_NO_LOCKING_FLAG)) {
1536
1537
		lock_update_insert(btr_cur_get_block(cursor), *rec);
1538
	}
1539
1540
	if (n_extents > 0) {
1541
		fil_space_release_free_extents(index->space, n_reserved);
1542
	}
1543
1544
	*big_rec = big_rec_vec;
1545
1546
	return(DB_SUCCESS);
1547
}
1548
1549
/*==================== B-TREE UPDATE =========================*/
1550
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1551
/*************************************************************//**
1552
For an update, checks the locks and does the undo logging.
1553
@return	DB_SUCCESS, DB_WAIT_LOCK, or error number */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1554
UNIV_INLINE
1555
ulint
1556
btr_cur_upd_lock_and_undo(
1557
/*======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1558
	ulint		flags,	/*!< in: undo logging and locking flags */
1559
	btr_cur_t*	cursor,	/*!< in: cursor on record to update */
1560
	const upd_t*	update,	/*!< in: update vector */
1561
	ulint		cmpl_info,/*!< in: compiler info on secondary index
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1562
				updates */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1563
	que_thr_t*	thr,	/*!< in: query thread */
1564
	mtr_t*		mtr,	/*!< in/out: mini-transaction */
1565
	roll_ptr_t*	roll_ptr)/*!< out: roll pointer */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1566
{
1567
	dict_index_t*	index;
1568
	rec_t*		rec;
1569
	ulint		err;
1570
1571
	ut_ad(cursor && update && thr && roll_ptr);
1572
1573
	rec = btr_cur_get_rec(cursor);
1574
	index = cursor->index;
1575
1576
	if (!dict_index_is_clust(index)) {
1577
		/* We do undo logging only when we update a clustered index
1578
		record */
1579
		return(lock_sec_rec_modify_check_and_lock(
1580
			       flags, btr_cur_get_block(cursor), rec,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1581
			       index, thr, mtr));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1582
	}
1583
1584
	/* Check if we have to wait for a lock: enqueue an explicit lock
1585
	request if yes */
1586
1587
	err = DB_SUCCESS;
1588
1589
	if (!(flags & BTR_NO_LOCKING_FLAG)) {
1590
		mem_heap_t*	heap		= NULL;
1591
		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1592
		rec_offs_init(offsets_);
1593
1594
		err = lock_clust_rec_modify_check_and_lock(
1595
			flags, btr_cur_get_block(cursor), rec, index,
1596
			rec_get_offsets(rec, index, offsets_,
1597
					ULINT_UNDEFINED, &heap), thr);
1598
		if (UNIV_LIKELY_NULL(heap)) {
1599
			mem_heap_free(heap);
1600
		}
1601
		if (err != DB_SUCCESS) {
1602
1603
			return(err);
1604
		}
1605
	}
1606
1607
	/* Append the info about the update in the undo log */
1608
1609
	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
1610
					    index, NULL, update,
1611
					    cmpl_info, rec, roll_ptr);
1612
	return(err);
1613
}
1614
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1615
/***********************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1616
Writes a redo log record of updating a record in-place. */
1617
UNIV_INLINE
1618
void
1619
btr_cur_update_in_place_log(
1620
/*========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1621
	ulint		flags,		/*!< in: flags */
1622
	rec_t*		rec,		/*!< in: record */
1623
	dict_index_t*	index,		/*!< in: index where cursor positioned */
1624
	const upd_t*	update,		/*!< in: update vector */
1625
	trx_t*		trx,		/*!< in: transaction */
1626
	roll_ptr_t	roll_ptr,	/*!< in: roll ptr */
1627
	mtr_t*		mtr)		/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1628
{
1629
	byte*	log_ptr;
1630
	page_t*	page	= page_align(rec);
1631
	ut_ad(flags < 256);
1632
	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
1633
1634
	log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page)
1635
					    ? MLOG_COMP_REC_UPDATE_IN_PLACE
1636
					    : MLOG_REC_UPDATE_IN_PLACE,
1637
					    1 + DATA_ROLL_PTR_LEN + 14 + 2
1638
					    + MLOG_BUF_MARGIN);
1639
1640
	if (!log_ptr) {
1641
		/* Logging in mtr is switched off during crash recovery */
1642
		return;
1643
	}
1644
1645
	/* The code below assumes index is a clustered index: change index to
1646
	the clustered index if we are updating a secondary index record (or we
1647
	could as well skip writing the sys col values to the log in this case
1648
	because they are not needed for a secondary index record update) */
1649
1650
	index = dict_table_get_first_index(index->table);
1651
1652
	mach_write_to_1(log_ptr, flags);
1653
	log_ptr++;
1654
1655
	log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
1656
						mtr);
1657
	mach_write_to_2(log_ptr, page_offset(rec));
1658
	log_ptr += 2;
1659
1660
	row_upd_index_write_log(update, log_ptr, mtr);
1661
}
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1662
#endif /* UNIV_HOTBACKUP */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1663
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1664
/***********************************************************//**
1665
Parses a redo log record of updating a record in-place.
1666
@return	end of log record or NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1667
UNIV_INTERN
1668
byte*
1669
btr_cur_parse_update_in_place(
1670
/*==========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1671
	byte*		ptr,	/*!< in: buffer */
1672
	byte*		end_ptr,/*!< in: buffer end */
1673
	page_t*		page,	/*!< in/out: page or NULL */
1674
	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
1675
	dict_index_t*	index)	/*!< in: index corresponding to page */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1676
{
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1677
	ulint		flags;
1678
	rec_t*		rec;
1679
	upd_t*		update;
1680
	ulint		pos;
1681
	trx_id_t	trx_id;
1682
	roll_ptr_t	roll_ptr;
1683
	ulint		rec_offset;
1684
	mem_heap_t*	heap;
1685
	ulint*		offsets;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1686
1687
	if (end_ptr < ptr + 1) {
1688
1689
		return(NULL);
1690
	}
1691
1692
	flags = mach_read_from_1(ptr);
1693
	ptr++;
1694
1695
	ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
1696
1697
	if (ptr == NULL) {
1698
1699
		return(NULL);
1700
	}
1701
1702
	if (end_ptr < ptr + 2) {
1703
1704
		return(NULL);
1705
	}
1706
1707
	rec_offset = mach_read_from_2(ptr);
1708
	ptr += 2;
1709
1710
	ut_a(rec_offset <= UNIV_PAGE_SIZE);
1711
1712
	heap = mem_heap_create(256);
1713
1714
	ptr = row_upd_index_parse(ptr, end_ptr, heap, &update);
1715
1716
	if (!ptr || !page) {
1717
1718
		goto func_exit;
1719
	}
1720
1721
	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
1722
	rec = page + rec_offset;
1723
1724
	/* We do not need to reserve btr_search_latch, as the page is only
1725
	being recovered, and there cannot be a hash index to it. */
1726
1727
	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
1728
1729
	if (!(flags & BTR_KEEP_SYS_FLAG)) {
1730
		row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
1731
						   pos, trx_id, roll_ptr);
1732
	}
1733
1734
	row_upd_rec_in_place(rec, index, offsets, update, page_zip);
1735
1736
func_exit:
1737
	mem_heap_free(heap);
1738
1739
	return(ptr);
1740
}
1741
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1742
#ifndef UNIV_HOTBACKUP
1743
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1744
See if there is enough place in the page modification log to log
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1745
an update-in-place.
1746
@return	TRUE if enough place */
1819.9.169 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20101019060415-bj3u6ewk022mk4nr from MySQL InnoDB
1747
UNIV_INTERN
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1748
ibool
1749
btr_cur_update_alloc_zip(
1750
/*=====================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1751
	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
1752
	buf_block_t*	block,	/*!< in/out: buffer page */
1753
	dict_index_t*	index,	/*!< in: the index corresponding to the block */
1754
	ulint		length,	/*!< in: size needed */
1755
	ibool		create,	/*!< in: TRUE=delete-and-insert,
1756
				FALSE=update-in-place */
1757
	mtr_t*		mtr)	/*!< in: mini-transaction */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1758
{
1759
	ut_a(page_zip == buf_block_get_page_zip(block));
1760
	ut_ad(page_zip);
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1761
	ut_ad(!dict_index_is_ibuf(index));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1762
1763
	if (page_zip_available(page_zip, dict_index_is_clust(index),
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1764
			       length, create)) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1765
		return(TRUE);
1766
	}
1767
1768
	if (!page_zip->m_nonempty) {
1769
		/* The page has been freshly compressed, so
1770
		recompressing it will not help. */
1771
		return(FALSE);
1772
	}
1773
1774
	if (!page_zip_compress(page_zip, buf_block_get_frame(block),
1775
			       index, mtr)) {
1776
		/* Unable to compress the page */
1777
		return(FALSE);
1778
	}
1779
1780
	/* After recompressing a page, we must make sure that the free
1781
	bits in the insert buffer bitmap will not exceed the free
1782
	space on the page.  Because this function will not attempt
1783
	recompression unless page_zip_available() fails above, it is
1784
	safe to reset the free bits if page_zip_available() fails
1785
	again, below.  The free bits can safely be reset in a separate
1786
	mini-transaction.  If page_zip_available() succeeds below, we
1787
	can be sure that the page_zip_compress() above did not reduce
1788
	the free space available on the page. */
1789
1790
	if (!page_zip_available(page_zip, dict_index_is_clust(index),
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1791
				length, create)) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1792
		/* Out of space: reset the free bits. */
1793
		if (!dict_index_is_clust(index)
1794
		    && page_is_leaf(buf_block_get_frame(block))) {
1795
			ibuf_reset_free_bits(block);
1796
		}
1797
		return(FALSE);
1798
	}
1799
1800
	return(TRUE);
1801
}
1802
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1803
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1804
Updates a record when the update causes no size changes in its fields.
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1805
We assume here that the ordering fields of the record do not change.
1806
@return	DB_SUCCESS or error number */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1807
UNIV_INTERN
1808
ulint
1809
btr_cur_update_in_place(
1810
/*====================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1811
	ulint		flags,	/*!< in: undo logging and locking flags */
1812
	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1813
				cursor stays valid and positioned on the
1814
				same record */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1815
	const upd_t*	update,	/*!< in: update vector */
1816
	ulint		cmpl_info,/*!< in: compiler info on secondary index
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1817
				updates */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1818
	que_thr_t*	thr,	/*!< in: query thread */
1819
	mtr_t*		mtr)	/*!< in: mtr; must be committed before
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1820
				latching any further pages */
1821
{
1822
	dict_index_t*	index;
1823
	buf_block_t*	block;
1824
	page_zip_des_t*	page_zip;
1825
	ulint		err;
1826
	rec_t*		rec;
1819.9.31 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100623110659-pk5bqnmo0j7hj6md from MySQL InnoDB
1827
	roll_ptr_t	roll_ptr	= 0;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1828
	trx_t*		trx;
1829
	ulint		was_delete_marked;
1830
	mem_heap_t*	heap		= NULL;
1831
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1832
	ulint*		offsets		= offsets_;
1833
	rec_offs_init(offsets_);
1834
1835
	rec = btr_cur_get_rec(cursor);
1836
	index = cursor->index;
1837
	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1838
	/* The insert buffer tree should never be updated in place. */
1839
	ut_ad(!dict_index_is_ibuf(index));
1840
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1841
	trx = thr_get_trx(thr);
1842
	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1843
#ifdef UNIV_DEBUG
1844
	if (btr_cur_print_record_ops && thr) {
1845
		btr_cur_trx_report(trx, index, "update ");
1846
		rec_print_new(stderr, rec, offsets);
1847
	}
1848
#endif /* UNIV_DEBUG */
1849
1850
	block = btr_cur_get_block(cursor);
1851
	page_zip = buf_block_get_page_zip(block);
1852
1853
	/* Check that enough space is available on the compressed page. */
1854
	if (UNIV_LIKELY_NULL(page_zip)
1855
	    && !btr_cur_update_alloc_zip(page_zip, block, index,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1856
					 rec_offs_size(offsets), FALSE, mtr)) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1857
		return(DB_ZIP_OVERFLOW);
1858
	}
1859
1860
	/* Do lock checking and undo logging */
1861
	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1862
					thr, mtr, &roll_ptr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1863
	if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1864
1865
		if (UNIV_LIKELY_NULL(heap)) {
1866
			mem_heap_free(heap);
1867
		}
1868
		return(err);
1869
	}
1870
1871
	if (block->is_hashed) {
1872
		/* The function row_upd_changes_ord_field_binary works only
1873
		if the update vector was built for a clustered index, we must
1874
		NOT call it if index is secondary */
1875
1876
		if (!dict_index_is_clust(index)
1877
		    || row_upd_changes_ord_field_binary(NULL, index, update)) {
1878
1879
			/* Remove possible hash index pointer to this record */
1880
			btr_search_update_hash_on_delete(cursor);
1881
		}
1882
1883
		rw_lock_x_lock(&btr_search_latch);
1884
	}
1885
1886
	if (!(flags & BTR_KEEP_SYS_FLAG)) {
1887
		row_upd_rec_sys_fields(rec, NULL,
1888
				       index, offsets, trx, roll_ptr);
1889
	}
1890
1891
	was_delete_marked = rec_get_deleted_flag(
1892
		rec, page_is_comp(buf_block_get_frame(block)));
1893
1894
	row_upd_rec_in_place(rec, index, offsets, update, page_zip);
1895
1896
	if (block->is_hashed) {
1897
		rw_lock_x_unlock(&btr_search_latch);
1898
	}
1899
1900
	if (page_zip && !dict_index_is_clust(index)
1901
	    && page_is_leaf(buf_block_get_frame(block))) {
1902
		/* Update the free bits in the insert buffer. */
1903
		ibuf_update_free_bits_zip(block, mtr);
1904
	}
1905
1906
	btr_cur_update_in_place_log(flags, rec, index, update,
1907
				    trx, roll_ptr, mtr);
1908
1909
	if (was_delete_marked
1910
	    && !rec_get_deleted_flag(rec, page_is_comp(
1911
					     buf_block_get_frame(block)))) {
1912
		/* The new updated record owns its possible externally
1913
		stored fields */
1914
1915
		btr_cur_unmark_extern_fields(page_zip,
1916
					     rec, index, offsets, mtr);
1917
	}
1918
1919
	if (UNIV_LIKELY_NULL(heap)) {
1920
		mem_heap_free(heap);
1921
	}
1922
	return(DB_SUCCESS);
1923
}
1924
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1925
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1926
Tries to update a record on a page in an index tree. It is assumed that mtr
1927
holds an x-latch on the page. The operation does not succeed if there is too
1928
little space on the page or if the update would result in too empty a page,
1929
so that tree compression is recommended. We assume here that the ordering
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1930
fields of the record do not change.
1931
@return DB_SUCCESS, or DB_OVERFLOW if the updated record does not fit,
1932
DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
1933
there is not enough space left on the compressed page */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1934
UNIV_INTERN
1935
ulint
1936
btr_cur_optimistic_update(
1937
/*======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1938
	ulint		flags,	/*!< in: undo logging and locking flags */
1939
	btr_cur_t*	cursor,	/*!< in: cursor on the record to update;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1940
				cursor stays valid and positioned on the
1941
				same record */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1942
	const upd_t*	update,	/*!< in: update vector; this must also
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1943
				contain trx id and roll ptr fields */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1944
	ulint		cmpl_info,/*!< in: compiler info on secondary index
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1945
				updates */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1946
	que_thr_t*	thr,	/*!< in: query thread */
1947
	mtr_t*		mtr)	/*!< in: mtr; must be committed before
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1948
				latching any further pages */
1949
{
1950
	dict_index_t*	index;
1951
	page_cur_t*	page_cursor;
1952
	ulint		err;
1953
	buf_block_t*	block;
1954
	page_t*		page;
1955
	page_zip_des_t*	page_zip;
1956
	rec_t*		rec;
1957
	ulint		max_size;
1958
	ulint		new_rec_size;
1959
	ulint		old_rec_size;
1960
	dtuple_t*	new_entry;
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
1961
	roll_ptr_t	roll_ptr;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1962
	trx_t*		trx;
1963
	mem_heap_t*	heap;
1964
	ulint		i;
1965
	ulint		n_ext;
1966
	ulint*		offsets;
1967
1968
	block = btr_cur_get_block(cursor);
1969
	page = buf_block_get_frame(block);
1819.9.117 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100914183037-22cokxcg6i3gbr4v from MySQL InnoDB
1970
	rec = btr_cur_get_rec(cursor);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1971
	index = cursor->index;
1972
	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
1973
	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
1974
	/* The insert buffer tree should never be updated in place. */
1975
	ut_ad(!dict_index_is_ibuf(index));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
1976
1977
	heap = mem_heap_create(1024);
1978
	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
1979
1980
#ifdef UNIV_DEBUG
1981
	if (btr_cur_print_record_ops && thr) {
1982
		btr_cur_trx_report(thr_get_trx(thr), index, "update ");
1983
		rec_print_new(stderr, rec, offsets);
1984
	}
1985
#endif /* UNIV_DEBUG */
1986
1987
	if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
1988
1989
		/* The simplest and the most common case: the update does not
1990
		change the size of any field and none of the updated fields is
1991
		externally stored in rec or update, and there is enough space
1992
		on the compressed page to log the update. */
1993
1994
		mem_heap_free(heap);
1995
		return(btr_cur_update_in_place(flags, cursor, update,
1996
					       cmpl_info, thr, mtr));
1997
	}
1998
1999
	if (rec_offs_any_extern(offsets)) {
2000
any_extern:
2001
		/* Externally stored fields are treated in pessimistic
2002
		update */
2003
2004
		mem_heap_free(heap);
2005
		return(DB_OVERFLOW);
2006
	}
2007
2008
	for (i = 0; i < upd_get_n_fields(update); i++) {
2009
		if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) {
2010
2011
			goto any_extern;
2012
		}
2013
	}
2014
2015
	page_cursor = btr_cur_get_page_cur(cursor);
2016
2017
	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
2018
					   &n_ext, heap);
2019
	/* We checked above that there are no externally stored fields. */
2020
	ut_a(!n_ext);
2021
2022
	/* The page containing the clustered index record
2023
	corresponding to new_entry is latched in mtr.
2024
	Thus the following call is safe. */
2025
	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
2026
						     FALSE, heap);
2027
	old_rec_size = rec_offs_size(offsets);
2028
	new_rec_size = rec_get_converted_size(index, new_entry, 0);
2029
2030
	page_zip = buf_block_get_page_zip(block);
2031
#ifdef UNIV_ZIP_DEBUG
2032
	ut_a(!page_zip || page_zip_validate(page_zip, page));
2033
#endif /* UNIV_ZIP_DEBUG */
2034
2035
	if (UNIV_LIKELY_NULL(page_zip)
2036
	    && !btr_cur_update_alloc_zip(page_zip, block, index,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2037
					 new_rec_size, TRUE, mtr)) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2038
		err = DB_ZIP_OVERFLOW;
2039
		goto err_exit;
2040
	}
2041
2042
	if (UNIV_UNLIKELY(new_rec_size
2043
			  >= (page_get_free_space_of_empty(page_is_comp(page))
2044
			      / 2))) {
2045
2046
		err = DB_OVERFLOW;
2047
		goto err_exit;
2048
	}
2049
2050
	if (UNIV_UNLIKELY(page_get_data_size(page)
2051
			  - old_rec_size + new_rec_size
2052
			  < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
2053
2054
		/* The page would become too empty */
2055
2056
		err = DB_UNDERFLOW;
2057
		goto err_exit;
2058
	}
2059
2060
	max_size = old_rec_size
2061
		+ page_get_max_insert_size_after_reorganize(page, 1);
2062
2063
	if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
2064
	       && (max_size >= new_rec_size))
2065
	      || (page_get_n_recs(page) <= 1))) {
2066
2067
		/* There was not enough space, or it did not pay to
2068
		reorganize: for simplicity, we decide what to do assuming a
2069
		reorganization is needed, though it might not be necessary */
2070
2071
		err = DB_OVERFLOW;
2072
		goto err_exit;
2073
	}
2074
2075
	/* Do lock checking and undo logging */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2076
	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
2077
					thr, mtr, &roll_ptr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2078
	if (err != DB_SUCCESS) {
1819.7.154 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100601120933-r8fnhig5m5qnnohz from MySQL InnoDB
2079
2080
		goto err_exit;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2081
	}
2082
2083
	/* Ok, we may do the replacement. Store on the page infimum the
2084
	explicit locks on rec, before deleting rec (see the comment in
2085
	btr_cur_pessimistic_update). */
2086
2087
	lock_rec_store_on_page_infimum(block, rec);
2088
2089
	btr_search_update_hash_on_delete(cursor);
2090
2091
	/* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
2092
	invokes rec_offs_make_valid() to point to the copied record that
2093
	the fields of new_entry point to.  We have to undo it here. */
2094
	ut_ad(rec_offs_validate(NULL, index, offsets));
2095
	rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
2096
2097
	page_cur_delete_rec(page_cursor, index, offsets, mtr);
2098
2099
	page_cur_move_to_prev(page_cursor);
2100
2101
	trx = thr_get_trx(thr);
2102
2103
	if (!(flags & BTR_KEEP_SYS_FLAG)) {
2104
		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
2105
					      roll_ptr);
2106
		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
2107
					      trx->id);
2108
	}
2109
2110
	/* There are no externally stored columns in new_entry */
2111
	rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
2112
	ut_a(rec); /* <- We calculated above the insert would fit */
2113
2114
	if (page_zip && !dict_index_is_clust(index)
2115
	    && page_is_leaf(page)) {
2116
		/* Update the free bits in the insert buffer. */
2117
		ibuf_update_free_bits_zip(block, mtr);
2118
	}
2119
2120
	/* Restore the old explicit lock state on the record */
2121
2122
	lock_rec_restore_from_page_infimum(block, rec, block);
2123
2124
	page_cur_move_to_next(page_cursor);
2125
1819.7.154 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100601120933-r8fnhig5m5qnnohz from MySQL InnoDB
2126
	err = DB_SUCCESS;
2127
err_exit:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2128
	mem_heap_free(heap);
1819.7.154 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20100601120933-r8fnhig5m5qnnohz from MySQL InnoDB
2129
	return(err);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2130
}
2131
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2132
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2133
If, in a split, a new supremum record was created as the predecessor of the
2134
updated record, the supremum record must inherit exactly the locks on the
2135
updated record. In the split it may have inherited locks from the successor
2136
of the updated record, which is not correct. This function restores the
2137
right locks for the new supremum. */
2138
static
2139
void
2140
btr_cur_pess_upd_restore_supremum(
2141
/*==============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2142
	buf_block_t*	block,	/*!< in: buffer block of rec */
2143
	const rec_t*	rec,	/*!< in: updated record */
2144
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2145
{
2146
	page_t*		page;
2147
	buf_block_t*	prev_block;
2148
	ulint		space;
2149
	ulint		zip_size;
2150
	ulint		prev_page_no;
2151
2152
	page = buf_block_get_frame(block);
2153
2154
	if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
2155
		/* Updated record is not the first user record on its page */
2156
2157
		return;
2158
	}
2159
2160
	space = buf_block_get_space(block);
2161
	zip_size = buf_block_get_zip_size(block);
2162
	prev_page_no = btr_page_get_prev(page, mtr);
2163
2164
	ut_ad(prev_page_no != FIL_NULL);
2165
	prev_block = buf_page_get_with_no_latch(space, zip_size,
2166
						prev_page_no, mtr);
2167
#ifdef UNIV_BTR_DEBUG
2168
	ut_a(btr_page_get_next(prev_block->frame, mtr)
2169
	     == page_get_page_no(page));
2170
#endif /* UNIV_BTR_DEBUG */
2171
2172
	/* We must already have an x-latch on prev_block! */
2173
	ut_ad(mtr_memo_contains(mtr, prev_block, MTR_MEMO_PAGE_X_FIX));
2174
2175
	lock_rec_reset_and_inherit_gap_locks(prev_block, block,
2176
					     PAGE_HEAP_NO_SUPREMUM,
2177
					     page_rec_get_heap_no(rec));
2178
}
2179
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2180
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2181
Performs an update of a record on a page of a tree. It is assumed
2182
that mtr holds an x-latch on the tree and on the cursor page. If the
2183
update is made on the leaf level, to avoid deadlocks, mtr must also
2184
own x-latches to brothers of page, if those brothers exist. We assume
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2185
here that the ordering fields of the record do not change.
2186
@return	DB_SUCCESS or error code */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2187
UNIV_INTERN
2188
ulint
2189
btr_cur_pessimistic_update(
2190
/*=======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2191
	ulint		flags,	/*!< in: undo logging, locking, and rollback
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2192
				flags */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2193
	btr_cur_t*	cursor,	/*!< in: cursor on the record to update */
2194
	mem_heap_t**	heap,	/*!< in/out: pointer to memory heap, or NULL */
2195
	big_rec_t**	big_rec,/*!< out: big rec vector whose fields have to
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2196
				be stored externally by the caller, or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2197
	const upd_t*	update,	/*!< in: update vector; this is allowed also
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2198
				contain trx id and roll ptr fields, but
2199
				the values in update vector have no effect */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2200
	ulint		cmpl_info,/*!< in: compiler info on secondary index
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2201
				updates */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2202
	que_thr_t*	thr,	/*!< in: query thread */
2203
	mtr_t*		mtr)	/*!< in: mtr; must be committed before
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2204
				latching any further pages */
2205
{
2206
	big_rec_t*	big_rec_vec	= NULL;
2207
	big_rec_t*	dummy_big_rec;
2208
	dict_index_t*	index;
2209
	buf_block_t*	block;
2210
	page_t*		page;
2211
	page_zip_des_t*	page_zip;
2212
	rec_t*		rec;
2213
	page_cur_t*	page_cursor;
2214
	dtuple_t*	new_entry;
2215
	ulint		err;
2216
	ulint		optim_err;
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2217
	roll_ptr_t	roll_ptr;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2218
	trx_t*		trx;
2219
	ibool		was_first;
2220
	ulint		n_extents	= 0;
2221
	ulint		n_reserved;
2222
	ulint		n_ext;
2223
	ulint*		offsets		= NULL;
2224
2225
	*big_rec = NULL;
2226
2227
	block = btr_cur_get_block(cursor);
2228
	page = buf_block_get_frame(block);
2229
	page_zip = buf_block_get_page_zip(block);
2230
	rec = btr_cur_get_rec(cursor);
2231
	index = cursor->index;
2232
2233
	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2234
				MTR_MEMO_X_LOCK));
2235
	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2236
#ifdef UNIV_ZIP_DEBUG
2237
	ut_a(!page_zip || page_zip_validate(page_zip, page));
2238
#endif /* UNIV_ZIP_DEBUG */
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
2239
	/* The insert buffer tree should never be updated in place. */
2240
	ut_ad(!dict_index_is_ibuf(index));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2241
2242
	optim_err = btr_cur_optimistic_update(flags, cursor, update,
2243
					      cmpl_info, thr, mtr);
2244
2245
	switch (optim_err) {
2246
	case DB_UNDERFLOW:
2247
	case DB_OVERFLOW:
2248
	case DB_ZIP_OVERFLOW:
2249
		break;
2250
	default:
2251
		return(optim_err);
2252
	}
2253
2254
	/* Do lock checking and undo logging */
2255
	err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2256
					thr, mtr, &roll_ptr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2257
	if (err != DB_SUCCESS) {
2258
2259
		return(err);
2260
	}
2261
2262
	if (optim_err == DB_OVERFLOW) {
2263
		ulint	reserve_flag;
2264
2265
		/* First reserve enough free space for the file segments
2266
		of the index tree, so that the update will not fail because
2267
		of lack of space */
2268
2269
		n_extents = cursor->tree_height / 16 + 3;
2270
2271
		if (flags & BTR_NO_UNDO_LOG_FLAG) {
2272
			reserve_flag = FSP_CLEANING;
2273
		} else {
2274
			reserve_flag = FSP_NORMAL;
2275
		}
2276
2277
		if (!fsp_reserve_free_extents(&n_reserved, index->space,
2278
					      n_extents, reserve_flag, mtr)) {
2279
			return(DB_OUT_OF_FILE_SPACE);
2280
		}
2281
	}
2282
2283
	if (!*heap) {
2284
		*heap = mem_heap_create(1024);
2285
	}
2286
	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
2287
2288
	trx = thr_get_trx(thr);
2289
2290
	new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
2291
					   &n_ext, *heap);
2292
	/* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
2293
	invokes rec_offs_make_valid() to point to the copied record that
2294
	the fields of new_entry point to.  We have to undo it here. */
2295
	ut_ad(rec_offs_validate(NULL, index, offsets));
2296
	rec_offs_make_valid(rec, index, offsets);
2297
2298
	/* The page containing the clustered index record
2299
	corresponding to new_entry is latched in mtr.  If the
2300
	clustered index record is delete-marked, then its externally
2301
	stored fields cannot have been purged yet, because then the
2302
	purge would also have removed the clustered index record
2303
	itself.  Thus the following call is safe. */
2304
	row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
2305
						     FALSE, *heap);
2306
	if (!(flags & BTR_KEEP_SYS_FLAG)) {
2307
		row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
2308
					      roll_ptr);
2309
		row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
2310
					      trx->id);
2311
	}
2312
2313
	if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
2314
		/* We are in a transaction rollback undoing a row
2315
		update: we must free possible externally stored fields
2316
		which got new values in the update, if they are not
2317
		inherited values. They can be inherited if we have
2318
		updated the primary key to another value, and then
2319
		update it back again. */
2320
2321
		ut_ad(big_rec_vec == NULL);
2322
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
2323
		btr_rec_free_updated_extern_fields(
2324
			index, rec, page_zip, offsets, update,
2325
			trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2326
	}
2327
2328
	/* We have to set appropriate extern storage bits in the new
2329
	record to be inserted: we have to remember which fields were such */
2330
2331
	ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
2332
	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
2333
	n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
2334
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
2335
	if (UNIV_LIKELY_NULL(page_zip)) {
2336
		ut_ad(page_is_comp(page));
2337
		if (page_zip_rec_needs_ext(
2338
			    rec_get_converted_size(index, new_entry, n_ext),
2339
			    TRUE,
2340
			    dict_index_get_n_fields(index),
2341
			    page_zip_get_size(page_zip))) {
2342
2343
			goto make_external;
2344
		}
2345
	} else if (page_zip_rec_needs_ext(
2346
			   rec_get_converted_size(index, new_entry, n_ext),
2347
			   page_is_comp(page), 0, 0)) {
2348
make_external:
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2349
		big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
2350
		if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
2351
2352
			err = DB_TOO_BIG_RECORD;
2353
			goto return_after_reservations;
2354
		}
2355
	}
2356
2357
	/* Store state of explicit locks on rec on the page infimum record,
2358
	before deleting rec. The page infimum acts as a dummy carrier of the
2359
	locks, taking care also of lock releases, before we can move the locks
2360
	back on the actual record. There is a special case: if we are
2361
	inserting on the root page and the insert causes a call of
2362
	btr_root_raise_and_insert. Therefore we cannot in the lock system
2363
	delete the lock structs set on the root page even if the root
2364
	page carries just node pointers. */
2365
2366
	lock_rec_store_on_page_infimum(block, rec);
2367
2368
	btr_search_update_hash_on_delete(cursor);
2369
2370
#ifdef UNIV_ZIP_DEBUG
2371
	ut_a(!page_zip || page_zip_validate(page_zip, page));
2372
#endif /* UNIV_ZIP_DEBUG */
2373
	page_cursor = btr_cur_get_page_cur(cursor);
2374
2375
	page_cur_delete_rec(page_cursor, index, offsets, mtr);
2376
2377
	page_cur_move_to_prev(page_cursor);
2378
2379
	rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
2380
2381
	if (rec) {
2382
		lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
2383
						   rec, block);
2384
2385
		offsets = rec_get_offsets(rec, index, offsets,
2386
					  ULINT_UNDEFINED, heap);
2387
2388
		if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
2389
			/* The new inserted record owns its possible externally
2390
			stored fields */
2391
			btr_cur_unmark_extern_fields(page_zip,
2392
						     rec, index, offsets, mtr);
2393
		}
2394
2395
		btr_cur_compress_if_useful(cursor, mtr);
2396
2397
		if (page_zip && !dict_index_is_clust(index)
2398
		    && page_is_leaf(page)) {
2399
			/* Update the free bits in the insert buffer. */
2400
			ibuf_update_free_bits_zip(block, mtr);
2401
		}
2402
2403
		err = DB_SUCCESS;
2404
		goto return_after_reservations;
2405
	} else {
2406
		ut_a(optim_err != DB_UNDERFLOW);
2407
2408
		/* Out of space: reset the free bits. */
2409
		if (!dict_index_is_clust(index)
2410
		    && page_is_leaf(page)) {
2411
			ibuf_reset_free_bits(block);
2412
		}
2413
	}
2414
2415
	/* Was the record to be updated positioned as the first user
2416
	record on its page? */
2417
	was_first = page_cur_is_before_first(page_cursor);
2418
2419
	/* The first parameter means that no lock checking and undo logging
2420
	is made in the insert */
2421
2422
	err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
2423
					 | BTR_NO_LOCKING_FLAG
2424
					 | BTR_KEEP_SYS_FLAG,
2425
					 cursor, new_entry, &rec,
2426
					 &dummy_big_rec, n_ext, NULL, mtr);
2427
	ut_a(rec);
2428
	ut_a(err == DB_SUCCESS);
2429
	ut_a(dummy_big_rec == NULL);
2430
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2431
	if (dict_index_is_sec_or_ibuf(index)) {
2432
		/* Update PAGE_MAX_TRX_ID in the index page header.
2433
		It was not updated by btr_cur_pessimistic_insert()
2434
		because of BTR_NO_LOCKING_FLAG. */
2435
		buf_block_t*	rec_block;
2436
2437
		rec_block = btr_cur_get_block(cursor);
2438
2439
		page_update_max_trx_id(rec_block,
2440
				       buf_block_get_page_zip(rec_block),
2441
				       trx->id, mtr);
2442
	}
2443
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2444
	if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
2445
		/* The new inserted record owns its possible externally
2446
		stored fields */
2447
		buf_block_t*	rec_block = btr_cur_get_block(cursor);
2448
2449
#ifdef UNIV_ZIP_DEBUG
2450
		ut_a(!page_zip || page_zip_validate(page_zip, page));
2451
		page = buf_block_get_frame(rec_block);
2452
#endif /* UNIV_ZIP_DEBUG */
2453
		page_zip = buf_block_get_page_zip(rec_block);
2454
2455
		offsets = rec_get_offsets(rec, index, offsets,
2456
					  ULINT_UNDEFINED, heap);
2457
		btr_cur_unmark_extern_fields(page_zip,
2458
					     rec, index, offsets, mtr);
2459
	}
2460
2461
	lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
2462
					   rec, block);
2463
2464
	/* If necessary, restore also the correct lock state for a new,
2465
	preceding supremum record created in a page split. While the old
2466
	record was nonexistent, the supremum might have inherited its locks
2467
	from a wrong record. */
2468
2469
	if (!was_first) {
2470
		btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
2471
						  rec, mtr);
2472
	}
2473
2474
return_after_reservations:
2475
#ifdef UNIV_ZIP_DEBUG
2476
	ut_a(!page_zip || page_zip_validate(page_zip, page));
2477
#endif /* UNIV_ZIP_DEBUG */
2478
2479
	if (n_extents > 0) {
2480
		fil_space_release_free_extents(index->space, n_reserved);
2481
	}
2482
2483
	*big_rec = big_rec_vec;
2484
2485
	return(err);
2486
}
2487
2488
/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
2489
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2490
/****************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2491
Writes the redo log record for delete marking or unmarking of an index
2492
record. */
2493
UNIV_INLINE
2494
void
2495
btr_cur_del_mark_set_clust_rec_log(
2496
/*===============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2497
	ulint		flags,	/*!< in: flags */
2498
	rec_t*		rec,	/*!< in: record */
2499
	dict_index_t*	index,	/*!< in: index of the record */
2500
	ibool		val,	/*!< in: value to set */
2501
	trx_t*		trx,	/*!< in: deleting transaction */
2502
	roll_ptr_t	roll_ptr,/*!< in: roll ptr to the undo log record */
2503
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2504
{
2505
	byte*	log_ptr;
2506
	ut_ad(flags < 256);
2507
	ut_ad(val <= 1);
2508
2509
	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
2510
2511
	log_ptr = mlog_open_and_write_index(mtr, rec, index,
2512
					    page_rec_is_comp(rec)
2513
					    ? MLOG_COMP_REC_CLUST_DELETE_MARK
2514
					    : MLOG_REC_CLUST_DELETE_MARK,
2515
					    1 + 1 + DATA_ROLL_PTR_LEN
2516
					    + 14 + 2);
2517
2518
	if (!log_ptr) {
2519
		/* Logging in mtr is switched off during crash recovery */
2520
		return;
2521
	}
2522
2523
	mach_write_to_1(log_ptr, flags);
2524
	log_ptr++;
2525
	mach_write_to_1(log_ptr, val);
2526
	log_ptr++;
2527
2528
	log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
2529
						mtr);
2530
	mach_write_to_2(log_ptr, page_offset(rec));
2531
	log_ptr += 2;
2532
2533
	mlog_close(mtr, log_ptr);
2534
}
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2535
#endif /* !UNIV_HOTBACKUP */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2536
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2537
/****************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2538
Parses the redo log record for delete marking or unmarking of a clustered
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2539
index record.
2540
@return	end of log record or NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2541
UNIV_INTERN
2542
byte*
2543
btr_cur_parse_del_mark_set_clust_rec(
2544
/*=================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2545
	byte*		ptr,	/*!< in: buffer */
2546
	byte*		end_ptr,/*!< in: buffer end */
2547
	page_t*		page,	/*!< in/out: page or NULL */
2548
	page_zip_des_t*	page_zip,/*!< in/out: compressed page, or NULL */
2549
	dict_index_t*	index)	/*!< in: index corresponding to page */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2550
{
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2551
	ulint		flags;
2552
	ulint		val;
2553
	ulint		pos;
2554
	trx_id_t	trx_id;
2555
	roll_ptr_t	roll_ptr;
2556
	ulint		offset;
2557
	rec_t*		rec;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2558
2559
	ut_ad(!page
2560
	      || !!page_is_comp(page) == dict_table_is_comp(index->table));
2561
2562
	if (end_ptr < ptr + 2) {
2563
2564
		return(NULL);
2565
	}
2566
2567
	flags = mach_read_from_1(ptr);
2568
	ptr++;
2569
	val = mach_read_from_1(ptr);
2570
	ptr++;
2571
2572
	ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
2573
2574
	if (ptr == NULL) {
2575
2576
		return(NULL);
2577
	}
2578
2579
	if (end_ptr < ptr + 2) {
2580
2581
		return(NULL);
2582
	}
2583
2584
	offset = mach_read_from_2(ptr);
2585
	ptr += 2;
2586
2587
	ut_a(offset <= UNIV_PAGE_SIZE);
2588
2589
	if (page) {
2590
		rec = page + offset;
2591
2592
		/* We do not need to reserve btr_search_latch, as the page
2593
		is only being recovered, and there cannot be a hash index to
2594
		it. */
2595
2596
		btr_rec_set_deleted_flag(rec, page_zip, val);
2597
2598
		if (!(flags & BTR_KEEP_SYS_FLAG)) {
2599
			mem_heap_t*	heap		= NULL;
2600
			ulint		offsets_[REC_OFFS_NORMAL_SIZE];
2601
			rec_offs_init(offsets_);
2602
2603
			row_upd_rec_sys_fields_in_recovery(
2604
				rec, page_zip,
2605
				rec_get_offsets(rec, index, offsets_,
2606
						ULINT_UNDEFINED, &heap),
2607
				pos, trx_id, roll_ptr);
2608
			if (UNIV_LIKELY_NULL(heap)) {
2609
				mem_heap_free(heap);
2610
			}
2611
		}
2612
	}
2613
2614
	return(ptr);
2615
}
2616
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2617
#ifndef UNIV_HOTBACKUP
2618
/***********************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2619
Marks a clustered index record deleted. Writes an undo log record to
2620
undo log on this delete marking. Writes in the trx id field the id
2621
of the deleting transaction, and in the roll ptr field pointer to the
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2622
undo log record created.
2623
@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2624
UNIV_INTERN
2625
ulint
2626
btr_cur_del_mark_set_clust_rec(
2627
/*===========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2628
	ulint		flags,	/*!< in: undo logging and locking flags */
2629
	btr_cur_t*	cursor,	/*!< in: cursor */
2630
	ibool		val,	/*!< in: value to set */
2631
	que_thr_t*	thr,	/*!< in: query thread */
2632
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2633
{
2634
	dict_index_t*	index;
2635
	buf_block_t*	block;
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2636
	roll_ptr_t	roll_ptr;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2637
	ulint		err;
2638
	rec_t*		rec;
2639
	page_zip_des_t*	page_zip;
2640
	trx_t*		trx;
2641
	mem_heap_t*	heap		= NULL;
2642
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
2643
	ulint*		offsets		= offsets_;
2644
	rec_offs_init(offsets_);
2645
2646
	rec = btr_cur_get_rec(cursor);
2647
	index = cursor->index;
2648
	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
2649
	offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
2650
2651
#ifdef UNIV_DEBUG
2652
	if (btr_cur_print_record_ops && thr) {
2653
		btr_cur_trx_report(thr_get_trx(thr), index, "del mark ");
2654
		rec_print_new(stderr, rec, offsets);
2655
	}
2656
#endif /* UNIV_DEBUG */
2657
2658
	ut_ad(dict_index_is_clust(index));
2659
	ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
2660
2661
	err = lock_clust_rec_modify_check_and_lock(flags,
2662
						   btr_cur_get_block(cursor),
2663
						   rec, index, offsets, thr);
2664
2665
	if (err != DB_SUCCESS) {
2666
2667
		goto func_exit;
2668
	}
2669
2670
	err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
2671
					    index, NULL, NULL, 0, rec,
2672
					    &roll_ptr);
2673
	if (err != DB_SUCCESS) {
2674
2675
		goto func_exit;
2676
	}
2677
2678
	block = btr_cur_get_block(cursor);
2679
2680
	if (block->is_hashed) {
2681
		rw_lock_x_lock(&btr_search_latch);
2682
	}
2683
2684
	page_zip = buf_block_get_page_zip(block);
2685
2686
	btr_rec_set_deleted_flag(rec, page_zip, val);
2687
2688
	trx = thr_get_trx(thr);
2689
2690
	if (!(flags & BTR_KEEP_SYS_FLAG)) {
2691
		row_upd_rec_sys_fields(rec, page_zip,
2692
				       index, offsets, trx, roll_ptr);
2693
	}
2694
2695
	if (block->is_hashed) {
2696
		rw_lock_x_unlock(&btr_search_latch);
2697
	}
2698
2699
	btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
2700
					   roll_ptr, mtr);
2701
2702
func_exit:
2703
	if (UNIV_LIKELY_NULL(heap)) {
2704
		mem_heap_free(heap);
2705
	}
2706
	return(err);
2707
}
2708
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2709
/****************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2710
Writes the redo log record for a delete mark setting of a secondary
2711
index record. */
2712
UNIV_INLINE
2713
void
2714
btr_cur_del_mark_set_sec_rec_log(
2715
/*=============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2716
	rec_t*		rec,	/*!< in: record */
2717
	ibool		val,	/*!< in: value to set */
2718
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2719
{
2720
	byte*	log_ptr;
2721
	ut_ad(val <= 1);
2722
2723
	log_ptr = mlog_open(mtr, 11 + 1 + 2);
2724
2725
	if (!log_ptr) {
2726
		/* Logging in mtr is switched off during crash recovery:
2727
		in that case mlog_open returns NULL */
2728
		return;
2729
	}
2730
2731
	log_ptr = mlog_write_initial_log_record_fast(
2732
		rec, MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr);
2733
	mach_write_to_1(log_ptr, val);
2734
	log_ptr++;
2735
2736
	mach_write_to_2(log_ptr, page_offset(rec));
2737
	log_ptr += 2;
2738
2739
	mlog_close(mtr, log_ptr);
2740
}
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2741
#endif /* !UNIV_HOTBACKUP */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2742
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2743
/****************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2744
Parses the redo log record for delete marking or unmarking of a secondary
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2745
index record.
2746
@return	end of log record or NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2747
UNIV_INTERN
2748
byte*
2749
btr_cur_parse_del_mark_set_sec_rec(
2750
/*===============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2751
	byte*		ptr,	/*!< in: buffer */
2752
	byte*		end_ptr,/*!< in: buffer end */
2753
	page_t*		page,	/*!< in/out: page or NULL */
2754
	page_zip_des_t*	page_zip)/*!< in/out: compressed page, or NULL */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2755
{
2756
	ulint	val;
2757
	ulint	offset;
2758
	rec_t*	rec;
2759
2760
	if (end_ptr < ptr + 3) {
2761
2762
		return(NULL);
2763
	}
2764
2765
	val = mach_read_from_1(ptr);
2766
	ptr++;
2767
2768
	offset = mach_read_from_2(ptr);
2769
	ptr += 2;
2770
2771
	ut_a(offset <= UNIV_PAGE_SIZE);
2772
2773
	if (page) {
2774
		rec = page + offset;
2775
2776
		/* We do not need to reserve btr_search_latch, as the page
2777
		is only being recovered, and there cannot be a hash index to
2778
		it. */
2779
2780
		btr_rec_set_deleted_flag(rec, page_zip, val);
2781
	}
2782
2783
	return(ptr);
2784
}
2785
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2786
#ifndef UNIV_HOTBACKUP
2787
/***********************************************************//**
2788
Sets a secondary index record delete mark to TRUE or FALSE.
2789
@return	DB_SUCCESS, DB_LOCK_WAIT, or error number */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2790
UNIV_INTERN
2791
ulint
2792
btr_cur_del_mark_set_sec_rec(
2793
/*=========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2794
	ulint		flags,	/*!< in: locking flag */
2795
	btr_cur_t*	cursor,	/*!< in: cursor */
2796
	ibool		val,	/*!< in: value to set */
2797
	que_thr_t*	thr,	/*!< in: query thread */
2798
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2799
{
2800
	buf_block_t*	block;
2801
	rec_t*		rec;
2802
	ulint		err;
2803
2804
	block = btr_cur_get_block(cursor);
2805
	rec = btr_cur_get_rec(cursor);
2806
2807
#ifdef UNIV_DEBUG
2808
	if (btr_cur_print_record_ops && thr) {
2809
		btr_cur_trx_report(thr_get_trx(thr), cursor->index,
2810
				   "del mark ");
2811
		rec_print(stderr, rec, cursor->index);
2812
	}
2813
#endif /* UNIV_DEBUG */
2814
2815
	err = lock_sec_rec_modify_check_and_lock(flags,
2816
						 btr_cur_get_block(cursor),
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2817
						 rec, cursor->index, thr, mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2818
	if (err != DB_SUCCESS) {
2819
2820
		return(err);
2821
	}
2822
2823
	ut_ad(!!page_rec_is_comp(rec)
2824
	      == dict_table_is_comp(cursor->index->table));
2825
2826
	if (block->is_hashed) {
2827
		rw_lock_x_lock(&btr_search_latch);
2828
	}
2829
2830
	btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
2831
2832
	if (block->is_hashed) {
2833
		rw_lock_x_unlock(&btr_search_latch);
2834
	}
2835
2836
	btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
2837
2838
	return(DB_SUCCESS);
2839
}
2840
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2841
/***********************************************************//**
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
2842
Sets a secondary index record's delete mark to the given value. This
2843
function is only used by the insert buffer merge mechanism. */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2844
UNIV_INTERN
2845
void
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
2846
btr_cur_set_deleted_flag_for_ibuf(
2847
/*==============================*/
2848
	rec_t*		rec,		/*!< in/out: record */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2849
	page_zip_des_t*	page_zip,	/*!< in/out: compressed page
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2850
					corresponding to rec, or NULL
2851
					when the tablespace is
2852
					uncompressed */
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
2853
	ibool		val,		/*!< in: value to set */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2854
	mtr_t*		mtr)		/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2855
{
2856
	/* We do not need to reserve btr_search_latch, as the page has just
2857
	been read to the buffer pool and there cannot be a hash index to it. */
2858
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
2859
	btr_rec_set_deleted_flag(rec, page_zip, val);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2860
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
2861
	btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2862
}
2863
2864
/*==================== B-TREE RECORD REMOVE =========================*/
2865
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2866
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2867
Tries to compress a page of the tree if it seems useful. It is assumed
2868
that mtr holds an x-latch on the tree and on the cursor page. To avoid
2869
deadlocks, mtr must also own x-latches to brothers of page, if those
2870
brothers exist. NOTE: it is assumed that the caller has reserved enough
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2871
free extents so that the compression will always succeed if done!
2872
@return	TRUE if compression occurred */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2873
UNIV_INTERN
2874
ibool
2875
btr_cur_compress_if_useful(
2876
/*=======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2877
	btr_cur_t*	cursor,	/*!< in: cursor on the page to compress;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2878
				cursor does not stay valid if compression
2879
				occurs */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2880
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2881
{
2882
	ut_ad(mtr_memo_contains(mtr,
2883
				dict_index_get_lock(btr_cur_get_index(cursor)),
2884
				MTR_MEMO_X_LOCK));
2885
	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
2886
				MTR_MEMO_PAGE_X_FIX));
2887
2888
	return(btr_cur_compress_recommendation(cursor, mtr)
2889
	       && btr_compress(cursor, mtr));
2890
}
2891
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2892
/*******************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2893
Removes the record on which the tree cursor is positioned on a leaf page.
2894
It is assumed that the mtr has an x-latch on the page where the cursor is
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2895
positioned, but no latch on the whole tree.
2896
@return	TRUE if success, i.e., the page did not become too empty */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2897
UNIV_INTERN
2898
ibool
2899
btr_cur_optimistic_delete(
2900
/*======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2901
	btr_cur_t*	cursor,	/*!< in: cursor on leaf page, on the record to
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2902
				delete; cursor stays valid: if deletion
2903
				succeeds, on function exit it points to the
2904
				successor of the deleted record */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2905
	mtr_t*		mtr)	/*!< in: mtr; if this function returns
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
2906
				TRUE on a leaf page of a secondary
2907
				index, the mtr must be committed
2908
				before latching any further pages */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2909
{
2910
	buf_block_t*	block;
2911
	rec_t*		rec;
2912
	mem_heap_t*	heap		= NULL;
2913
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
2914
	ulint*		offsets		= offsets_;
2915
	ibool		no_compress_needed;
2916
	rec_offs_init(offsets_);
2917
2918
	ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
2919
				MTR_MEMO_PAGE_X_FIX));
2920
	/* This is intended only for leaf page deletions */
2921
2922
	block = btr_cur_get_block(cursor);
2923
2924
	ut_ad(page_is_leaf(buf_block_get_frame(block)));
2925
2926
	rec = btr_cur_get_rec(cursor);
2927
	offsets = rec_get_offsets(rec, cursor->index, offsets,
2928
				  ULINT_UNDEFINED, &heap);
2929
2930
	no_compress_needed = !rec_offs_any_extern(offsets)
2931
		&& btr_cur_can_delete_without_compress(
2932
			cursor, rec_offs_size(offsets), mtr);
2933
2934
	if (no_compress_needed) {
2935
2936
		page_t*		page	= buf_block_get_frame(block);
2937
		page_zip_des_t*	page_zip= buf_block_get_page_zip(block);
2938
		ulint		max_ins	= 0;
2939
2940
		lock_update_delete(block, rec);
2941
2942
		btr_search_update_hash_on_delete(cursor);
2943
2944
		if (!page_zip) {
2945
			max_ins = page_get_max_insert_size_after_reorganize(
2946
				page, 1);
2947
		}
2948
#ifdef UNIV_ZIP_DEBUG
2949
		ut_a(!page_zip || page_zip_validate(page_zip, page));
2950
#endif /* UNIV_ZIP_DEBUG */
2951
		page_cur_delete_rec(btr_cur_get_page_cur(cursor),
2952
				    cursor->index, offsets, mtr);
2953
#ifdef UNIV_ZIP_DEBUG
2954
		ut_a(!page_zip || page_zip_validate(page_zip, page));
2955
#endif /* UNIV_ZIP_DEBUG */
2956
2957
		if (dict_index_is_clust(cursor->index)
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
2958
		    || dict_index_is_ibuf(cursor->index)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2959
		    || !page_is_leaf(page)) {
2960
			/* The insert buffer does not handle
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
2961
			inserts to clustered indexes, to
2962
			non-leaf pages of secondary index B-trees,
2963
			or to the insert buffer. */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2964
		} else if (page_zip) {
2965
			ibuf_update_free_bits_zip(block, mtr);
2966
		} else {
2967
			ibuf_update_free_bits_low(block, max_ins, mtr);
2968
		}
2969
	}
2970
2971
	if (UNIV_LIKELY_NULL(heap)) {
2972
		mem_heap_free(heap);
2973
	}
2974
2975
	return(no_compress_needed);
2976
}
2977
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2978
/*************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2979
Removes the record on which the tree cursor is positioned. Tries
2980
to compress the page if its fillfactor drops below a threshold
2981
or if it is the only page on the level. It is assumed that mtr holds
2982
an x-latch on the tree and on the cursor page. To avoid deadlocks,
2983
mtr must also own x-latches to brothers of page, if those brothers
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2984
exist.
2985
@return	TRUE if compression occurred */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2986
UNIV_INTERN
2987
ibool
2988
btr_cur_pessimistic_delete(
2989
/*=======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2990
	ulint*		err,	/*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2991
				the latter may occur because we may have
2992
				to update node pointers on upper levels,
2993
				and in the case of variable length keys
2994
				these may actually grow in size */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2995
	ibool		has_reserved_extents, /*!< in: TRUE if the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
2996
				caller has already reserved enough free
2997
				extents so that he knows that the operation
2998
				will succeed */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
2999
	btr_cur_t*	cursor,	/*!< in: cursor on the record to delete;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3000
				if compression does not occur, the cursor
3001
				stays valid: it points to successor of
3002
				deleted record on function exit */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3003
	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
3004
	mtr_t*		mtr)	/*!< in: mtr */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3005
{
3006
	buf_block_t*	block;
3007
	page_t*		page;
3008
	page_zip_des_t*	page_zip;
3009
	dict_index_t*	index;
3010
	rec_t*		rec;
3011
	dtuple_t*	node_ptr;
3012
	ulint		n_extents	= 0;
3013
	ulint		n_reserved;
3014
	ibool		success;
3015
	ibool		ret		= FALSE;
3016
	ulint		level;
3017
	mem_heap_t*	heap;
3018
	ulint*		offsets;
3019
3020
	block = btr_cur_get_block(cursor);
3021
	page = buf_block_get_frame(block);
3022
	index = btr_cur_get_index(cursor);
3023
3024
	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
3025
				MTR_MEMO_X_LOCK));
3026
	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3027
	if (!has_reserved_extents) {
3028
		/* First reserve enough free space for the file segments
3029
		of the index tree, so that the node pointer updates will
3030
		not fail because of lack of space */
3031
3032
		n_extents = cursor->tree_height / 32 + 1;
3033
3034
		success = fsp_reserve_free_extents(&n_reserved,
3035
						   index->space,
3036
						   n_extents,
3037
						   FSP_CLEANING, mtr);
3038
		if (!success) {
3039
			*err = DB_OUT_OF_FILE_SPACE;
3040
3041
			return(FALSE);
3042
		}
3043
	}
3044
3045
	heap = mem_heap_create(1024);
3046
	rec = btr_cur_get_rec(cursor);
3047
	page_zip = buf_block_get_page_zip(block);
3048
#ifdef UNIV_ZIP_DEBUG
3049
	ut_a(!page_zip || page_zip_validate(page_zip, page));
3050
#endif /* UNIV_ZIP_DEBUG */
3051
3052
	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
3053
3054
	if (rec_offs_any_extern(offsets)) {
3055
		btr_rec_free_externally_stored_fields(index,
3056
						      rec, offsets, page_zip,
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3057
						      rb_ctx, mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3058
#ifdef UNIV_ZIP_DEBUG
3059
		ut_a(!page_zip || page_zip_validate(page_zip, page));
3060
#endif /* UNIV_ZIP_DEBUG */
3061
	}
3062
3063
	if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3064
	    && UNIV_UNLIKELY(dict_index_get_page(index)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3065
			     != buf_block_get_page_no(block))) {
3066
3067
		/* If there is only one record, drop the whole page in
3068
		btr_discard_page, if this is not the root page */
3069
3070
		btr_discard_page(cursor, mtr);
3071
3072
		*err = DB_SUCCESS;
3073
		ret = TRUE;
3074
3075
		goto return_after_reservations;
3076
	}
3077
3078
	lock_update_delete(block, rec);
3079
	level = btr_page_get_level(page, mtr);
3080
3081
	if (level > 0
3082
	    && UNIV_UNLIKELY(rec == page_rec_get_next(
3083
				     page_get_infimum_rec(page)))) {
3084
3085
		rec_t*	next_rec = page_rec_get_next(rec);
3086
3087
		if (btr_page_get_prev(page, mtr) == FIL_NULL) {
3088
3089
			/* If we delete the leftmost node pointer on a
3090
			non-leaf level, we must mark the new leftmost node
3091
			pointer as the predefined minimum record */
3092
3093
			/* This will make page_zip_validate() fail until
3094
			page_cur_delete_rec() completes.  This is harmless,
3095
			because everything will take place within a single
3096
			mini-transaction and because writing to the redo log
3097
			is an atomic operation (performed by mtr_commit()). */
3098
			btr_set_min_rec_mark(next_rec, mtr);
3099
		} else {
3100
			/* Otherwise, if we delete the leftmost node pointer
3101
			on a page, we have to change the father node pointer
3102
			so that it is equal to the new leftmost node pointer
3103
			on the page */
3104
3105
			btr_node_ptr_delete(index, block, mtr);
3106
3107
			node_ptr = dict_index_build_node_ptr(
3108
				index, next_rec, buf_block_get_page_no(block),
3109
				heap, level);
3110
3111
			btr_insert_on_non_leaf_level(index,
3112
						     level + 1, node_ptr, mtr);
3113
		}
3114
	}
3115
3116
	btr_search_update_hash_on_delete(cursor);
3117
3118
	page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
3119
#ifdef UNIV_ZIP_DEBUG
3120
	ut_a(!page_zip || page_zip_validate(page_zip, page));
3121
#endif /* UNIV_ZIP_DEBUG */
3122
3123
	ut_ad(btr_check_node_ptr(index, block, mtr));
3124
3125
	*err = DB_SUCCESS;
3126
3127
return_after_reservations:
3128
	mem_heap_free(heap);
3129
3130
	if (ret == FALSE) {
3131
		ret = btr_cur_compress_if_useful(cursor, mtr);
3132
	}
3133
3134
	if (n_extents > 0) {
3135
		fil_space_release_free_extents(index->space, n_reserved);
3136
	}
3137
3138
	return(ret);
3139
}
3140
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3141
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3142
Adds path information to the cursor for the current page, for which
3143
the binary search has been performed. */
3144
static
3145
void
3146
btr_cur_add_path_info(
3147
/*==================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3148
	btr_cur_t*	cursor,		/*!< in: cursor positioned on a page */
3149
	ulint		height,		/*!< in: height of the page in tree;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3150
					0 means leaf node */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3151
	ulint		root_height)	/*!< in: root node height in tree */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3152
{
3153
	btr_path_t*	slot;
3154
	rec_t*		rec;
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3155
	page_t*		page;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3156
3157
	ut_a(cursor->path_arr);
3158
3159
	if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) {
3160
		/* Do nothing; return empty path */
3161
3162
		slot = cursor->path_arr;
3163
		slot->nth_rec = ULINT_UNDEFINED;
3164
3165
		return;
3166
	}
3167
3168
	if (height == 0) {
3169
		/* Mark end of slots for path */
3170
		slot = cursor->path_arr + root_height + 1;
3171
		slot->nth_rec = ULINT_UNDEFINED;
3172
	}
3173
3174
	rec = btr_cur_get_rec(cursor);
3175
3176
	slot = cursor->path_arr + (root_height - height);
3177
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3178
	page = page_align(rec);
3179
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3180
	slot->nth_rec = page_rec_get_n_recs_before(rec);
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3181
	slot->n_recs = page_get_n_recs(page);
3182
	slot->page_no = page_get_page_no(page);
3183
	slot->page_level = btr_page_get_level_low(page);
3184
}
3185
3186
/*******************************************************************//**
3187
Estimate the number of rows between slot1 and slot2 for any level on a
3188
B-tree. This function starts from slot1->page and reads a few pages to
3189
the right, counting their records. If we reach slot2->page quickly then
3190
we know exactly how many records there are between slot1 and slot2 and
3191
we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly
3192
then we calculate the average number of records in the pages scanned
3193
so far and assume that all pages that we did not scan up to slot2->page
3194
contain the same number of records, then we multiply that average to
3195
the number of pages between slot1->page and slot2->page (which is
3196
n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
3197
@return	number of rows (exact or estimated) */
3198
static
3199
ib_int64_t
3200
btr_estimate_n_rows_in_range_on_level(
3201
/*==================================*/
3202
	dict_index_t*	index,			/*!< in: index */
3203
	btr_path_t*	slot1,			/*!< in: left border */
3204
	btr_path_t*	slot2,			/*!< in: right border */
3205
	ib_int64_t	n_rows_on_prev_level,	/*!< in: number of rows
3206
						on the previous level for the
3207
						same descend paths; used to
3208
						determine the numbe of pages
3209
						on this level */
3210
	ibool*		is_n_rows_exact)	/*!< out: TRUE if the returned
3211
						value is exact i.e. not an
3212
						estimation */
3213
{
3214
	ulint		space;
3215
	ib_int64_t	n_rows;
3216
	ulint		n_pages_read;
3217
	ulint		page_no;
3218
	ulint		zip_size;
3219
	ulint		level;
3220
3221
	space = dict_index_get_space(index);
3222
3223
	n_rows = 0;
3224
	n_pages_read = 0;
3225
3226
	/* Assume by default that we will scan all pages between
3227
	slot1->page_no and slot2->page_no */
3228
	*is_n_rows_exact = TRUE;
3229
3230
	/* add records from slot1->page_no which are to the right of
3231
	the record which serves as a left border of the range, if any */
3232
	if (slot1->nth_rec < slot1->n_recs) {
3233
		n_rows += slot1->n_recs - slot1->nth_rec;
3234
	}
3235
3236
	/* add records from slot2->page_no which are to the left of
3237
	the record which servers as a right border of the range, if any */
3238
	if (slot2->nth_rec > 1) {
3239
		n_rows += slot2->nth_rec - 1;
3240
	}
3241
3242
	/* count the records in the pages between slot1->page_no and
3243
	slot2->page_no (non inclusive), if any */
3244
3245
	zip_size = fil_space_get_zip_size(space);
3246
3247
	/* Do not read more than this number of pages in order not to hurt
3248
	performance with this code which is just an estimation. If we read
3249
	this many pages before reaching slot2->page_no then we estimate the
3250
	average from the pages scanned so far */
1819.9.191 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20101101084928-ablf6soqj81v0epg from MySQL InnoDB
3251
#	define N_PAGES_READ_LIMIT	10
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3252
3253
	page_no = slot1->page_no;
3254
	level = slot1->page_level;
3255
3256
	do {
3257
		mtr_t		mtr;
3258
		page_t*		page;
3259
		buf_block_t*	block;
3260
3261
		mtr_start(&mtr);
3262
3263
		/* fetch the page */
3264
		block = buf_page_get(space, zip_size, page_no, RW_S_LATCH,
3265
				     &mtr);
3266
3267
		page = buf_block_get_frame(block);
3268
3269
		/* It is possible that the tree has been reorganized in the
3270
		meantime and this is a different page. If this happens the
3271
		calculated estimate will be bogus, which is not fatal as
3272
		this is only an estimate. We are sure that a page with
3273
		page_no exists because InnoDB never frees pages, only
3274
		reuses them. */
3275
		if (fil_page_get_type(page) != FIL_PAGE_INDEX
3276
		    || btr_page_get_index_id(page) != index->id
3277
		    || btr_page_get_level_low(page) != level) {
3278
3279
			/* The page got reused for something else */
1819.9.190 by Marko Mäkelä
Merge Revision revid:marko.makela@oracle.com-20101101081833-em4abq3hfkijwz6f from MySQL InnoDB
3280
			mtr_commit(&mtr);
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3281
			goto inexact;
3282
		}
3283
3284
		n_pages_read++;
3285
3286
		if (page_no != slot1->page_no) {
3287
			/* Do not count the records on slot1->page_no,
3288
			we already counted them before this loop. */
3289
			n_rows += page_get_n_recs(page);
3290
		}
3291
3292
		page_no = btr_page_get_next(page, &mtr);
3293
3294
		mtr_commit(&mtr);
3295
3296
		if (n_pages_read == N_PAGES_READ_LIMIT
3297
		    || page_no == FIL_NULL) {
3298
			/* Either we read too many pages or
3299
			we reached the end of the level without passing
3300
			through slot2->page_no, the tree must have changed
3301
			in the meantime */
3302
			goto inexact;
3303
		}
3304
3305
	} while (page_no != slot2->page_no);
3306
3307
	return(n_rows);
3308
3309
inexact:
3310
3311
	*is_n_rows_exact = FALSE;
3312
3313
	/* We did interrupt before reaching slot2->page */
3314
3315
	if (n_pages_read > 0) {
3316
		/* The number of pages on this level is
3317
		n_rows_on_prev_level, multiply it by the
3318
		average number of recs per page so far */
3319
		n_rows = n_rows_on_prev_level
3320
			* n_rows / n_pages_read;
3321
	} else {
3322
		/* The tree changed before we could even
3323
		start with slot1->page_no */
3324
		n_rows = 10;
3325
	}
3326
3327
	return(n_rows);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3328
}
3329
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3330
/*******************************************************************//**
3331
Estimates the number of rows in a given index range.
3332
@return	estimated number of rows */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3333
UNIV_INTERN
3334
ib_int64_t
3335
btr_estimate_n_rows_in_range(
3336
/*=========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3337
	dict_index_t*	index,	/*!< in: index */
3338
	const dtuple_t*	tuple1,	/*!< in: range start, may also be empty tuple */
3339
	ulint		mode1,	/*!< in: search mode for range start */
3340
	const dtuple_t*	tuple2,	/*!< in: range end, may also be empty tuple */
3341
	ulint		mode2)	/*!< in: search mode for range end */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3342
{
3343
	btr_path_t	path1[BTR_PATH_ARRAY_N_SLOTS];
3344
	btr_path_t	path2[BTR_PATH_ARRAY_N_SLOTS];
3345
	btr_cur_t	cursor;
3346
	btr_path_t*	slot1;
3347
	btr_path_t*	slot2;
3348
	ibool		diverged;
3349
	ibool		diverged_lot;
3350
	ulint		divergence_level;
3351
	ib_int64_t	n_rows;
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3352
	ibool		is_n_rows_exact;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3353
	ulint		i;
3354
	mtr_t		mtr;
3355
3356
	mtr_start(&mtr);
3357
3358
	cursor.path_arr = path1;
3359
3360
	if (dtuple_get_n_fields(tuple1) > 0) {
3361
3362
		btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
3363
					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
1819.5.187 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6559 from MySQL InnoDB
3364
					    &cursor, 0,
3365
					    __FILE__, __LINE__, &mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3366
	} else {
3367
		btr_cur_open_at_index_side(TRUE, index,
3368
					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
3369
					   &cursor, &mtr);
3370
	}
3371
3372
	mtr_commit(&mtr);
3373
3374
	mtr_start(&mtr);
3375
3376
	cursor.path_arr = path2;
3377
3378
	if (dtuple_get_n_fields(tuple2) > 0) {
3379
3380
		btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
3381
					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
1819.5.187 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6559 from MySQL InnoDB
3382
					    &cursor, 0,
3383
					    __FILE__, __LINE__, &mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3384
	} else {
3385
		btr_cur_open_at_index_side(FALSE, index,
3386
					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
3387
					   &cursor, &mtr);
3388
	}
3389
3390
	mtr_commit(&mtr);
3391
3392
	/* We have the path information for the range in path1 and path2 */
3393
3394
	n_rows = 1;
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3395
	is_n_rows_exact = TRUE;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3396
	diverged = FALSE;	    /* This becomes true when the path is not
3397
				    the same any more */
3398
	diverged_lot = FALSE;	    /* This becomes true when the paths are
3399
				    not the same or adjacent any more */
3400
	divergence_level = 1000000; /* This is the level where paths diverged
3401
				    a lot */
3402
	for (i = 0; ; i++) {
3403
		ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
3404
3405
		slot1 = path1 + i;
3406
		slot2 = path2 + i;
3407
3408
		if (slot1->nth_rec == ULINT_UNDEFINED
3409
		    || slot2->nth_rec == ULINT_UNDEFINED) {
3410
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3411
			if (i > divergence_level + 1 && !is_n_rows_exact) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3412
				/* In trees whose height is > 1 our algorithm
3413
				tends to underestimate: multiply the estimate
3414
				by 2: */
3415
3416
				n_rows = n_rows * 2;
3417
			}
3418
3419
			/* Do not estimate the number of rows in the range
3420
			to over 1 / 2 of the estimated rows in the whole
3421
			table */
3422
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3423
			if (n_rows > index->table->stat_n_rows / 2
3424
			    && !is_n_rows_exact) {
3425
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3426
				n_rows = index->table->stat_n_rows / 2;
3427
3428
				/* If there are just 0 or 1 rows in the table,
3429
				then we estimate all rows are in the range */
3430
3431
				if (n_rows == 0) {
3432
					n_rows = index->table->stat_n_rows;
3433
				}
3434
			}
3435
3436
			return(n_rows);
3437
		}
3438
3439
		if (!diverged && slot1->nth_rec != slot2->nth_rec) {
3440
3441
			diverged = TRUE;
3442
3443
			if (slot1->nth_rec < slot2->nth_rec) {
3444
				n_rows = slot2->nth_rec - slot1->nth_rec;
3445
3446
				if (n_rows > 1) {
3447
					diverged_lot = TRUE;
3448
					divergence_level = i;
3449
				}
3450
			} else {
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3451
				/* It is possible that
3452
				slot1->nth_rec >= slot2->nth_rec
3453
				if, for example, we have a single page
3454
				tree which contains (inf, 5, 6, supr)
3455
				and we select where x > 20 and x < 30;
3456
				in this case slot1->nth_rec will point
3457
				to the supr record and slot2->nth_rec
3458
				will point to 6 */
3459
				n_rows = 0;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3460
			}
3461
3462
		} else if (diverged && !diverged_lot) {
3463
3464
			if (slot1->nth_rec < slot1->n_recs
3465
			    || slot2->nth_rec > 1) {
3466
3467
				diverged_lot = TRUE;
3468
				divergence_level = i;
3469
3470
				n_rows = 0;
3471
3472
				if (slot1->nth_rec < slot1->n_recs) {
3473
					n_rows += slot1->n_recs
3474
						- slot1->nth_rec;
3475
				}
3476
3477
				if (slot2->nth_rec > 1) {
3478
					n_rows += slot2->nth_rec - 1;
3479
				}
3480
			}
3481
		} else if (diverged_lot) {
3482
1819.9.58 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100816142329-yimenbuktd416z1a from MySQL InnoDB
3483
			n_rows = btr_estimate_n_rows_in_range_on_level(
3484
				index, slot1, slot2, n_rows,
3485
				&is_n_rows_exact);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3486
		}
3487
	}
3488
}
3489
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3490
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3491
Estimates the number of different key values in a given index, for
3492
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
3493
The estimates are stored in the array index->stat_n_diff_key_vals. */
3494
UNIV_INTERN
3495
void
3496
btr_estimate_number_of_different_key_vals(
3497
/*======================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3498
	dict_index_t*	index)	/*!< in: index */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3499
{
3500
	btr_cur_t	cursor;
3501
	page_t*		page;
3502
	rec_t*		rec;
3503
	ulint		n_cols;
3504
	ulint		matched_fields;
3505
	ulint		matched_bytes;
3506
	ib_int64_t*	n_diff;
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3507
	ullint		n_sample_pages; /* number of pages to sample */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3508
	ulint		not_empty_flag	= 0;
3509
	ulint		total_external_size = 0;
3510
	ulint		i;
3511
	ulint		j;
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3512
	ullint		add_on;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3513
	mtr_t		mtr;
3514
	mem_heap_t*	heap		= NULL;
3515
	ulint		offsets_rec_[REC_OFFS_NORMAL_SIZE];
3516
	ulint		offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
3517
	ulint*		offsets_rec	= offsets_rec_;
3518
	ulint*		offsets_next_rec= offsets_next_rec_;
3519
	rec_offs_init(offsets_rec_);
3520
	rec_offs_init(offsets_next_rec_);
3521
3522
	n_cols = dict_index_get_n_unique(index);
3523
1992.6.2 by Monty Taylor
Cleaned up for additional gcc 4.5 warnings.
3524
	n_diff = (ib_int64_t *)mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3525
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3526
	/* It makes no sense to test more pages than are contained
3527
	in the index, thus we lower the number if it is too high */
3528
	if (srv_stats_sample_pages > index->stat_index_size) {
3529
		if (index->stat_index_size > 0) {
3530
			n_sample_pages = index->stat_index_size;
3531
		} else {
3532
			n_sample_pages = 1;
3533
		}
3534
	} else {
3535
		n_sample_pages = srv_stats_sample_pages;
3536
	}
3537
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3538
	/* We sample some pages in the index to get an estimate */
3539
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3540
	for (i = 0; i < n_sample_pages; i++) {
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3541
		rec_t*	supremum;
3542
		mtr_start(&mtr);
3543
3544
		btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
3545
3546
		/* Count the number of different key values for each prefix of
3547
		the key on this index page. If the prefix does not determine
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3548
		the index record uniquely in the B-tree, then we subtract one
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3549
		because otherwise our algorithm would give a wrong estimate
3550
		for an index where there is just one key value. */
3551
3552
		page = btr_cur_get_page(&cursor);
3553
3554
		supremum = page_get_supremum_rec(page);
3555
		rec = page_rec_get_next(page_get_infimum_rec(page));
3556
3557
		if (rec != supremum) {
3558
			not_empty_flag = 1;
3559
			offsets_rec = rec_get_offsets(rec, index, offsets_rec,
3560
						      ULINT_UNDEFINED, &heap);
3561
		}
3562
3563
		while (rec != supremum) {
3564
			rec_t*	next_rec = page_rec_get_next(rec);
3565
			if (next_rec == supremum) {
3566
				break;
3567
			}
3568
3569
			matched_fields = 0;
3570
			matched_bytes = 0;
3571
			offsets_next_rec = rec_get_offsets(next_rec, index,
3572
							   offsets_next_rec,
3573
							   n_cols, &heap);
3574
3575
			cmp_rec_rec_with_match(rec, next_rec,
3576
					       offsets_rec, offsets_next_rec,
3577
					       index, &matched_fields,
3578
					       &matched_bytes);
3579
3580
			for (j = matched_fields + 1; j <= n_cols; j++) {
3581
				/* We add one if this index record has
3582
				a different prefix from the previous */
3583
3584
				n_diff[j]++;
3585
			}
3586
3587
			total_external_size
3588
				+= btr_rec_get_externally_stored_len(
3589
					rec, offsets_rec);
3590
3591
			rec = next_rec;
3592
			/* Initialize offsets_rec for the next round
3593
			and assign the old offsets_rec buffer to
3594
			offsets_next_rec. */
3595
			{
3596
				ulint*	offsets_tmp = offsets_rec;
3597
				offsets_rec = offsets_next_rec;
3598
				offsets_next_rec = offsets_tmp;
3599
			}
3600
		}
3601
3602
3603
		if (n_cols == dict_index_get_n_unique_in_tree(index)) {
3604
3605
			/* If there is more than one leaf page in the tree,
3606
			we add one because we know that the first record
3607
			on the page certainly had a different prefix than the
3608
			last record on the previous index page in the
3609
			alphabetical order. Before this fix, if there was
3610
			just one big record on each clustered index page, the
3611
			algorithm grossly underestimated the number of rows
3612
			in the table. */
3613
3614
			if (btr_page_get_prev(page, &mtr) != FIL_NULL
3615
			    || btr_page_get_next(page, &mtr) != FIL_NULL) {
3616
3617
				n_diff[n_cols]++;
3618
			}
3619
		}
3620
3621
		offsets_rec = rec_get_offsets(rec, index, offsets_rec,
3622
					      ULINT_UNDEFINED, &heap);
3623
		total_external_size += btr_rec_get_externally_stored_len(
3624
			rec, offsets_rec);
3625
		mtr_commit(&mtr);
3626
	}
3627
3628
	/* If we saw k borders between different key values on
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3629
	n_sample_pages leaf pages, we can estimate how many
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3630
	there will be in index->stat_n_leaf_pages */
3631
3632
	/* We must take into account that our sample actually represents
3633
	also the pages used for external storage of fields (those pages are
3634
	included in index->stat_n_leaf_pages) */
3635
3636
	for (j = 0; j <= n_cols; j++) {
3637
		index->stat_n_diff_key_vals[j]
3638
			= ((n_diff[j]
3639
			    * (ib_int64_t)index->stat_n_leaf_pages
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3640
			    + n_sample_pages - 1
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3641
			    + total_external_size
3642
			    + not_empty_flag)
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3643
			   / (n_sample_pages
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3644
			      + total_external_size));
3645
3646
		/* If the tree is small, smaller than
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3647
		10 * n_sample_pages + total_external_size, then
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3648
		the above estimate is ok. For bigger trees it is common that we
3649
		do not see any borders between key values in the few pages
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3650
		we pick. But still there may be n_sample_pages
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3651
		different key values, or even more. Let us try to approximate
3652
		that: */
3653
3654
		add_on = index->stat_n_leaf_pages
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3655
			/ (10 * (n_sample_pages
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3656
				 + total_external_size));
3657
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
3658
		if (add_on > n_sample_pages) {
3659
			add_on = n_sample_pages;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3660
		}
3661
3662
		index->stat_n_diff_key_vals[j] += add_on;
3663
	}
3664
3665
	mem_free(n_diff);
3666
	if (UNIV_LIKELY_NULL(heap)) {
3667
		mem_heap_free(heap);
3668
	}
3669
}
3670
3671
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
3672
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3673
/***********************************************************//**
3674
Gets the externally stored size of a record, in units of a database page.
3675
@return	externally stored part, in units of a database page */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3676
static
3677
ulint
3678
btr_rec_get_externally_stored_len(
3679
/*==============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3680
	rec_t*		rec,	/*!< in: record */
3681
	const ulint*	offsets)/*!< in: array returned by rec_get_offsets() */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3682
{
3683
	ulint	n_fields;
3684
	byte*	data;
3685
	ulint	local_len;
3686
	ulint	extern_len;
3687
	ulint	total_extern_len = 0;
3688
	ulint	i;
3689
3690
	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3691
	n_fields = rec_offs_n_fields(offsets);
3692
3693
	for (i = 0; i < n_fields; i++) {
3694
		if (rec_offs_nth_extern(offsets, i)) {
3695
3696
			data = rec_get_nth_field(rec, offsets, i, &local_len);
3697
3698
			local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3699
3700
			extern_len = mach_read_from_4(data + local_len
3701
						      + BTR_EXTERN_LEN + 4);
3702
3703
			total_extern_len += ut_calc_align(extern_len,
3704
							  UNIV_PAGE_SIZE);
3705
		}
3706
	}
3707
3708
	return(total_extern_len / UNIV_PAGE_SIZE);
3709
}
3710
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3711
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3712
Sets the ownership bit of an externally stored field in a record. */
3713
static
3714
void
3715
btr_cur_set_ownership_of_extern_field(
3716
/*==================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3717
	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3718
				part will be updated, or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3719
	rec_t*		rec,	/*!< in/out: clustered index record */
3720
	dict_index_t*	index,	/*!< in: index of the page */
3721
	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
3722
	ulint		i,	/*!< in: field number */
3723
	ibool		val,	/*!< in: value to set */
3724
	mtr_t*		mtr)	/*!< in: mtr, or NULL if not logged */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3725
{
3726
	byte*	data;
3727
	ulint	local_len;
3728
	ulint	byte_val;
3729
3730
	data = rec_get_nth_field(rec, offsets, i, &local_len);
3731
3732
	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
3733
3734
	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
3735
3736
	byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN);
3737
3738
	if (val) {
3739
		byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
3740
	} else {
3741
		byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
3742
	}
3743
3744
	if (UNIV_LIKELY_NULL(page_zip)) {
3745
		mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
3746
		page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr);
3747
	} else if (UNIV_LIKELY(mtr != NULL)) {
3748
3749
		mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
3750
				 MLOG_1BYTE, mtr);
3751
	} else {
3752
		mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
3753
	}
3754
}
3755
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3756
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3757
Marks not updated extern fields as not-owned by this record. The ownership
3758
is transferred to the updated record which is inserted elsewhere in the
3759
index tree. In purge only the owner of externally stored field is allowed
1819.9.89 by Sunny Bains, Stewart Smith
Merge Revision revid:sunny.bains@oracle.com-20100805091817-nsk8q8dzz47puv9p from MySQL InnoDB
3760
to free the field.
3761
@return TRUE if BLOB ownership was transferred */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3762
UNIV_INTERN
1819.9.89 by Sunny Bains, Stewart Smith
Merge Revision revid:sunny.bains@oracle.com-20100805091817-nsk8q8dzz47puv9p from MySQL InnoDB
3763
ibool
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3764
btr_cur_mark_extern_inherited_fields(
3765
/*=================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3766
	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3767
				part will be updated, or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3768
	rec_t*		rec,	/*!< in/out: record in a clustered index */
3769
	dict_index_t*	index,	/*!< in: index of the page */
3770
	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
3771
	const upd_t*	update,	/*!< in: update vector */
3772
	mtr_t*		mtr)	/*!< in: mtr, or NULL if not logged */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3773
{
3774
	ulint	n;
3775
	ulint	j;
3776
	ulint	i;
1819.9.89 by Sunny Bains, Stewart Smith
Merge Revision revid:sunny.bains@oracle.com-20100805091817-nsk8q8dzz47puv9p from MySQL InnoDB
3777
	ibool	change_ownership = FALSE;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3778
3779
	ut_ad(rec_offs_validate(rec, NULL, offsets));
3780
	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3781
3782
	if (!rec_offs_any_extern(offsets)) {
3783
1819.9.89 by Sunny Bains, Stewart Smith
Merge Revision revid:sunny.bains@oracle.com-20100805091817-nsk8q8dzz47puv9p from MySQL InnoDB
3784
		return(FALSE);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3785
	}
3786
3787
	n = rec_offs_n_fields(offsets);
3788
3789
	for (i = 0; i < n; i++) {
3790
		if (rec_offs_nth_extern(offsets, i)) {
3791
3792
			/* Check it is not in updated fields */
3793
3794
			if (update) {
3795
				for (j = 0; j < upd_get_n_fields(update);
3796
				     j++) {
3797
					if (upd_get_nth_field(update, j)
3798
					    ->field_no == i) {
3799
3800
						goto updated;
3801
					}
3802
				}
3803
			}
3804
3805
			btr_cur_set_ownership_of_extern_field(
3806
				page_zip, rec, index, offsets, i, FALSE, mtr);
1819.9.89 by Sunny Bains, Stewart Smith
Merge Revision revid:sunny.bains@oracle.com-20100805091817-nsk8q8dzz47puv9p from MySQL InnoDB
3807
3808
			change_ownership = TRUE;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3809
updated:
3810
			;
3811
		}
3812
	}
1819.9.89 by Sunny Bains, Stewart Smith
Merge Revision revid:sunny.bains@oracle.com-20100805091817-nsk8q8dzz47puv9p from MySQL InnoDB
3813
3814
	return(change_ownership);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3815
}
3816
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3817
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3818
The complement of the previous function: in an update entry may inherit
3819
some externally stored fields from a record. We must mark them as inherited
3820
in entry, so that they are not freed in a rollback. */
3821
UNIV_INTERN
3822
void
3823
btr_cur_mark_dtuple_inherited_extern(
3824
/*=================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3825
	dtuple_t*	entry,		/*!< in/out: updated entry to be
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3826
					inserted to clustered index */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3827
	const upd_t*	update)		/*!< in: update vector */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3828
{
3829
	ulint		i;
3830
3831
	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3832
3833
		dfield_t*	dfield = dtuple_get_nth_field(entry, i);
3834
		byte*		data;
3835
		ulint		len;
3836
		ulint		j;
3837
3838
		if (!dfield_is_ext(dfield)) {
3839
			continue;
3840
		}
3841
3842
		/* Check if it is in updated fields */
3843
3844
		for (j = 0; j < upd_get_n_fields(update); j++) {
3845
			if (upd_get_nth_field(update, j)->field_no == i) {
3846
3847
				goto is_updated;
3848
			}
3849
		}
3850
1992.6.2 by Monty Taylor
Cleaned up for additional gcc 4.5 warnings.
3851
		data = (unsigned char *)dfield_get_data(dfield);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3852
		len = dfield_get_len(dfield);
3853
		data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3854
			|= BTR_EXTERN_INHERITED_FLAG;
3855
3856
is_updated:
3857
		;
3858
	}
3859
}
3860
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3861
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3862
Marks all extern fields in a record as owned by the record. This function
3863
should be called if the delete mark of a record is removed: a not delete
3864
marked record always owns all its extern fields. */
3865
static
3866
void
3867
btr_cur_unmark_extern_fields(
3868
/*=========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3869
	page_zip_des_t*	page_zip,/*!< in/out: compressed page whose uncompressed
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3870
				part will be updated, or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3871
	rec_t*		rec,	/*!< in/out: record in a clustered index */
3872
	dict_index_t*	index,	/*!< in: index of the page */
3873
	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
3874
	mtr_t*		mtr)	/*!< in: mtr, or NULL if not logged */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3875
{
3876
	ulint	n;
3877
	ulint	i;
3878
3879
	ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3880
	n = rec_offs_n_fields(offsets);
3881
3882
	if (!rec_offs_any_extern(offsets)) {
3883
3884
		return;
3885
	}
3886
3887
	for (i = 0; i < n; i++) {
3888
		if (rec_offs_nth_extern(offsets, i)) {
3889
3890
			btr_cur_set_ownership_of_extern_field(
3891
				page_zip, rec, index, offsets, i, TRUE, mtr);
3892
		}
3893
	}
3894
}
3895
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3896
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3897
Marks all extern fields in a dtuple as owned by the record. */
3898
UNIV_INTERN
3899
void
3900
btr_cur_unmark_dtuple_extern_fields(
3901
/*================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3902
	dtuple_t*	entry)		/*!< in/out: clustered index entry */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3903
{
3904
	ulint	i;
3905
3906
	for (i = 0; i < dtuple_get_n_fields(entry); i++) {
3907
		dfield_t* dfield = dtuple_get_nth_field(entry, i);
3908
3909
		if (dfield_is_ext(dfield)) {
1992.6.2 by Monty Taylor
Cleaned up for additional gcc 4.5 warnings.
3910
			byte*	data = (unsigned char *)dfield_get_data(dfield);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3911
			ulint	len = dfield_get_len(dfield);
3912
3913
			data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3914
				&= ~BTR_EXTERN_OWNER_FLAG;
3915
		}
3916
	}
3917
}
3918
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3919
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3920
Flags the data tuple fields that are marked as extern storage in the
3921
update vector.  We use this function to remember which fields we must
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3922
mark as extern storage in a record inserted for an update.
3923
@return	number of flagged external columns */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3924
UNIV_INTERN
3925
ulint
3926
btr_push_update_extern_fields(
3927
/*==========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3928
	dtuple_t*	tuple,	/*!< in/out: data tuple */
3929
	const upd_t*	update,	/*!< in: update vector */
3930
	mem_heap_t*	heap)	/*!< in: memory heap */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3931
{
3932
	ulint			n_pushed	= 0;
3933
	ulint			n;
3934
	const upd_field_t*	uf;
3935
3936
	ut_ad(tuple);
3937
	ut_ad(update);
3938
3939
	uf = update->fields;
3940
	n = upd_get_n_fields(update);
3941
3942
	for (; n--; uf++) {
3943
		if (dfield_is_ext(&uf->new_val)) {
3944
			dfield_t*	field
3945
				= dtuple_get_nth_field(tuple, uf->field_no);
3946
3947
			if (!dfield_is_ext(field)) {
3948
				dfield_set_ext(field);
3949
				n_pushed++;
3950
			}
3951
3952
			switch (uf->orig_len) {
3953
				byte*	data;
3954
				ulint	len;
3955
				byte*	buf;
3956
			case 0:
3957
				break;
3958
			case BTR_EXTERN_FIELD_REF_SIZE:
3959
				/* Restore the original locally stored
3960
				part of the column.  In the undo log,
3961
				InnoDB writes a longer prefix of externally
3962
				stored columns, so that column prefixes
3963
				in secondary indexes can be reconstructed. */
3964
				dfield_set_data(field, (byte*) dfield_get_data(field)
3965
						+ dfield_get_len(field)
3966
						- BTR_EXTERN_FIELD_REF_SIZE,
3967
						BTR_EXTERN_FIELD_REF_SIZE);
3968
				dfield_set_ext(field);
3969
				break;
3970
			default:
3971
				/* Reconstruct the original locally
3972
				stored part of the column.  The data
3973
				will have to be copied. */
3974
				ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
3975
1992.6.2 by Monty Taylor
Cleaned up for additional gcc 4.5 warnings.
3976
				data = (unsigned char *)dfield_get_data(field);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3977
				len = dfield_get_len(field);
3978
1992.6.2 by Monty Taylor
Cleaned up for additional gcc 4.5 warnings.
3979
				buf = (unsigned char *)mem_heap_alloc(heap, uf->orig_len);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
3980
				/* Copy the locally stored prefix. */
3981
				memcpy(buf, data,
3982
				       uf->orig_len
3983
				       - BTR_EXTERN_FIELD_REF_SIZE);
3984
				/* Copy the BLOB pointer. */
3985
				memcpy(buf + uf->orig_len
3986
				       - BTR_EXTERN_FIELD_REF_SIZE,
3987
				       data + len - BTR_EXTERN_FIELD_REF_SIZE,
3988
				       BTR_EXTERN_FIELD_REF_SIZE);
3989
3990
				dfield_set_data(field, buf, uf->orig_len);
3991
				dfield_set_ext(field);
3992
			}
3993
		}
3994
	}
3995
3996
	return(n_pushed);
3997
}
3998
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
3999
/*******************************************************************//**
4000
Returns the length of a BLOB part stored on the header page.
4001
@return	part length */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4002
static
4003
ulint
4004
btr_blob_get_part_len(
4005
/*==================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4006
	const byte*	blob_header)	/*!< in: blob header */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4007
{
4008
	return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
4009
}
4010
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4011
/*******************************************************************//**
4012
Returns the page number where the next BLOB part is stored.
4013
@return	page number or FIL_NULL if no more pages */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4014
static
4015
ulint
4016
btr_blob_get_next_page_no(
4017
/*======================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4018
	const byte*	blob_header)	/*!< in: blob header */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4019
{
4020
	return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
4021
}
4022
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4023
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4024
Deallocate a buffer block that was reserved for a BLOB part. */
4025
static
4026
void
4027
btr_blob_free(
4028
/*==========*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4029
	buf_block_t*	block,	/*!< in: buffer block */
4030
	ibool		all,	/*!< in: TRUE=remove also the compressed page
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4031
				if there is one */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4032
	mtr_t*		mtr)	/*!< in: mini-transaction to commit */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4033
{
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
4034
	buf_pool_t*	buf_pool = buf_pool_from_block(block);
4035
	ulint		space	= buf_block_get_space(block);
4036
	ulint		page_no	= buf_block_get_page_no(block);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4037
4038
	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4039
4040
	mtr_commit(mtr);
4041
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
4042
	buf_pool_mutex_enter(buf_pool);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4043
	mutex_enter(&block->mutex);
4044
4045
	/* Only free the block if it is still allocated to
4046
	the same file page. */
4047
4048
	if (buf_block_get_state(block)
4049
	    == BUF_BLOCK_FILE_PAGE
4050
	    && buf_block_get_space(block) == space
4051
	    && buf_block_get_page_no(block) == page_no) {
4052
4053
		if (buf_LRU_free_block(&block->page, all, NULL)
4054
		    != BUF_LRU_FREED
4055
		    && all && block->page.zip.data) {
4056
			/* Attempt to deallocate the uncompressed page
4057
			if the whole block cannot be deallocted. */
4058
4059
			buf_LRU_free_block(&block->page, FALSE, NULL);
4060
		}
4061
	}
4062
1819.7.68 by Stewart Smith
Merge initial InnoDB+ import.
4063
	buf_pool_mutex_exit(buf_pool);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4064
	mutex_exit(&block->mutex);
4065
}
4066
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4067
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4068
Stores the fields in big_rec_vec to the tablespace and puts pointers to
4069
them in rec.  The extern flags in rec will have to be set beforehand.
4070
The fields are stored on pages allocated from leaf node
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4071
file segment of the index tree.
1819.9.197 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20101103091611-a3x9p0yivkvu5u9i from MySQL InnoDB
4072
@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4073
UNIV_INTERN
4074
ulint
4075
btr_store_big_rec_extern_fields(
4076
/*============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4077
	dict_index_t*	index,		/*!< in: index of rec; the index tree
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4078
					MUST be X-latched */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4079
	buf_block_t*	rec_block,	/*!< in/out: block containing rec */
4080
	rec_t*		rec,		/*!< in/out: record */
4081
	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4082
					the "external storage" flags in offsets
4083
					will not correspond to rec when
4084
					this function returns */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4085
	big_rec_t*	big_rec_vec,	/*!< in: vector containing fields
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4086
					to be stored externally */
2023.3.29 by Monty Taylor
Added back INNOBASE_SKIP_WARNINGS for solaris. Also dealt with unused params.
4087
	mtr_t*		/*local_mtr __attribute__((unused))*/) /*!< in: mtr
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4088
					containing the latch to rec and to the
4089
					tree */
4090
{
4091
	ulint	rec_page_no;
4092
	byte*	field_ref;
4093
	ulint	extern_len;
4094
	ulint	store_len;
4095
	ulint	page_no;
4096
	ulint	space_id;
4097
	ulint	zip_size;
4098
	ulint	prev_page_no;
4099
	ulint	hint_page_no;
4100
	ulint	i;
4101
	mtr_t	mtr;
4102
	mem_heap_t* heap = NULL;
4103
	page_zip_des_t*	page_zip;
4104
	z_stream c_stream;
4105
4106
	ut_ad(rec_offs_validate(rec, index, offsets));
4107
	ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
4108
				MTR_MEMO_X_LOCK));
4109
	ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
4110
	ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
4111
	ut_a(dict_index_is_clust(index));
4112
4113
	page_zip = buf_block_get_page_zip(rec_block);
4114
	ut_a(dict_table_zip_size(index->table)
4115
	     == buf_block_get_zip_size(rec_block));
4116
4117
	space_id = buf_block_get_space(rec_block);
4118
	zip_size = buf_block_get_zip_size(rec_block);
4119
	rec_page_no = buf_block_get_page_no(rec_block);
4120
	ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
4121
4122
	if (UNIV_LIKELY_NULL(page_zip)) {
4123
		int	err;
4124
4125
		/* Zlib deflate needs 128 kilobytes for the default
4126
		window size, plus 512 << memLevel, plus a few
4127
		kilobytes for small objects.  We use reduced memLevel
4128
		to limit the memory consumption, and preallocate the
4129
		heap, hoping to avoid memory fragmentation. */
4130
		heap = mem_heap_create(250000);
4131
		page_zip_set_alloc(&c_stream, heap);
4132
4133
		err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
4134
				   Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
4135
		ut_a(err == Z_OK);
4136
	}
4137
4138
	/* We have to create a file segment to the tablespace
4139
	for each field and put the pointer to the field in rec */
4140
4141
	for (i = 0; i < big_rec_vec->n_fields; i++) {
4142
		ut_ad(rec_offs_nth_extern(offsets,
4143
					  big_rec_vec->fields[i].field_no));
4144
		{
4145
			ulint	local_len;
4146
			field_ref = rec_get_nth_field(
4147
				rec, offsets, big_rec_vec->fields[i].field_no,
4148
				&local_len);
4149
			ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4150
			local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4151
			field_ref += local_len;
4152
		}
4153
		extern_len = big_rec_vec->fields[i].len;
1819.7.128 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100518135254-pb5whzztc5vphjd4 from MySQL InnoDB
4154
		UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data,
4155
				   extern_len);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4156
4157
		ut_a(extern_len > 0);
4158
4159
		prev_page_no = FIL_NULL;
4160
4161
		if (UNIV_LIKELY_NULL(page_zip)) {
4162
			int	err = deflateReset(&c_stream);
4163
			ut_a(err == Z_OK);
4164
1992.6.2 by Monty Taylor
Cleaned up for additional gcc 4.5 warnings.
4165
			c_stream.next_in = (Bytef *) big_rec_vec->fields[i].data;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4166
			c_stream.avail_in = extern_len;
4167
		}
4168
4169
		for (;;) {
4170
			buf_block_t*	block;
4171
			page_t*		page;
4172
4173
			mtr_start(&mtr);
4174
4175
			if (prev_page_no == FIL_NULL) {
4176
				hint_page_no = 1 + rec_page_no;
4177
			} else {
4178
				hint_page_no = prev_page_no + 1;
4179
			}
4180
4181
			block = btr_page_alloc(index, hint_page_no,
4182
					       FSP_NO_DIR, 0, &mtr);
4183
			if (UNIV_UNLIKELY(block == NULL)) {
4184
4185
				mtr_commit(&mtr);
4186
4187
				if (UNIV_LIKELY_NULL(page_zip)) {
4188
					deflateEnd(&c_stream);
4189
					mem_heap_free(heap);
4190
				}
4191
4192
				return(DB_OUT_OF_FILE_SPACE);
4193
			}
4194
4195
			page_no = buf_block_get_page_no(block);
4196
			page = buf_block_get_frame(block);
4197
4198
			if (prev_page_no != FIL_NULL) {
4199
				buf_block_t*	prev_block;
4200
				page_t*		prev_page;
4201
4202
				prev_block = buf_page_get(space_id, zip_size,
4203
							  prev_page_no,
4204
							  RW_X_LATCH, &mtr);
4205
				buf_block_dbg_add_level(prev_block,
4206
							SYNC_EXTERN_STORAGE);
4207
				prev_page = buf_block_get_frame(prev_block);
4208
4209
				if (UNIV_LIKELY_NULL(page_zip)) {
4210
					mlog_write_ulint(
4211
						prev_page + FIL_PAGE_NEXT,
4212
						page_no, MLOG_4BYTES, &mtr);
4213
					memcpy(buf_block_get_page_zip(
4214
						       prev_block)
4215
					       ->data + FIL_PAGE_NEXT,
4216
					       prev_page + FIL_PAGE_NEXT, 4);
4217
				} else {
4218
					mlog_write_ulint(
4219
						prev_page + FIL_PAGE_DATA
4220
						+ BTR_BLOB_HDR_NEXT_PAGE_NO,
4221
						page_no, MLOG_4BYTES, &mtr);
4222
				}
4223
4224
			}
4225
4226
			if (UNIV_LIKELY_NULL(page_zip)) {
4227
				int		err;
4228
				page_zip_des_t*	blob_page_zip;
4229
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4230
				/* Write FIL_PAGE_TYPE to the redo log
4231
				separately, before logging any other
4232
				changes to the page, so that the debug
4233
				assertions in
4234
				recv_parse_or_apply_log_rec_body() can
4235
				be made simpler.  Before InnoDB Plugin
4236
				1.0.4, the initialization of
4237
				FIL_PAGE_TYPE was logged as part of
4238
				the mlog_log_string() below. */
4239
4240
				mlog_write_ulint(page + FIL_PAGE_TYPE,
4241
						 prev_page_no == FIL_NULL
4242
						 ? FIL_PAGE_TYPE_ZBLOB
4243
						 : FIL_PAGE_TYPE_ZBLOB2,
4244
						 MLOG_2BYTES, &mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4245
4246
				c_stream.next_out = page
4247
					+ FIL_PAGE_DATA;
4248
				c_stream.avail_out
4249
					= page_zip_get_size(page_zip)
4250
					- FIL_PAGE_DATA;
4251
4252
				err = deflate(&c_stream, Z_FINISH);
4253
				ut_a(err == Z_OK || err == Z_STREAM_END);
4254
				ut_a(err == Z_STREAM_END
4255
				     || c_stream.avail_out == 0);
4256
4257
				/* Write the "next BLOB page" pointer */
4258
				mlog_write_ulint(page + FIL_PAGE_NEXT,
4259
						 FIL_NULL, MLOG_4BYTES, &mtr);
4260
				/* Initialize the unused "prev page" pointer */
4261
				mlog_write_ulint(page + FIL_PAGE_PREV,
4262
						 FIL_NULL, MLOG_4BYTES, &mtr);
4263
				/* Write a back pointer to the record
4264
				into the otherwise unused area.  This
4265
				information could be useful in
4266
				debugging.  Later, we might want to
4267
				implement the possibility to relocate
4268
				BLOB pages.  Then, we would need to be
4269
				able to adjust the BLOB pointer in the
4270
				record.  We do not store the heap
4271
				number of the record, because it can
4272
				change in page_zip_reorganize() or
4273
				btr_page_reorganize().  However, also
4274
				the page number of the record may
4275
				change when B-tree nodes are split or
4276
				merged. */
4277
				mlog_write_ulint(page
4278
						 + FIL_PAGE_FILE_FLUSH_LSN,
4279
						 space_id,
4280
						 MLOG_4BYTES, &mtr);
4281
				mlog_write_ulint(page
4282
						 + FIL_PAGE_FILE_FLUSH_LSN + 4,
4283
						 rec_page_no,
4284
						 MLOG_4BYTES, &mtr);
4285
4286
				/* Zero out the unused part of the page. */
4287
				memset(page + page_zip_get_size(page_zip)
4288
				       - c_stream.avail_out,
4289
				       0, c_stream.avail_out);
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4290
				mlog_log_string(page + FIL_PAGE_FILE_FLUSH_LSN,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4291
						page_zip_get_size(page_zip)
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4292
						- FIL_PAGE_FILE_FLUSH_LSN,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4293
						&mtr);
4294
				/* Copy the page to compressed storage,
4295
				because it will be flushed to disk
4296
				from there. */
4297
				blob_page_zip = buf_block_get_page_zip(block);
4298
				ut_ad(blob_page_zip);
4299
				ut_ad(page_zip_get_size(blob_page_zip)
4300
				      == page_zip_get_size(page_zip));
4301
				memcpy(blob_page_zip->data, page,
4302
				       page_zip_get_size(page_zip));
4303
4304
				if (err == Z_OK && prev_page_no != FIL_NULL) {
4305
4306
					goto next_zip_page;
4307
				}
4308
4309
				rec_block = buf_page_get(space_id, zip_size,
4310
							 rec_page_no,
4311
							 RW_X_LATCH, &mtr);
4312
				buf_block_dbg_add_level(rec_block,
4313
							SYNC_NO_ORDER_CHECK);
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
4314
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4315
				if (err == Z_STREAM_END) {
4316
					mach_write_to_4(field_ref
4317
							+ BTR_EXTERN_LEN, 0);
4318
					mach_write_to_4(field_ref
4319
							+ BTR_EXTERN_LEN + 4,
4320
							c_stream.total_in);
4321
				} else {
4322
					memset(field_ref + BTR_EXTERN_LEN,
4323
					       0, 8);
4324
				}
4325
4326
				if (prev_page_no == FIL_NULL) {
4327
					mach_write_to_4(field_ref
4328
							+ BTR_EXTERN_SPACE_ID,
4329
							space_id);
4330
4331
					mach_write_to_4(field_ref
4332
							+ BTR_EXTERN_PAGE_NO,
4333
							page_no);
4334
4335
					mach_write_to_4(field_ref
4336
							+ BTR_EXTERN_OFFSET,
4337
							FIL_PAGE_NEXT);
4338
				}
4339
4340
				page_zip_write_blob_ptr(
4341
					page_zip, rec, index, offsets,
4342
					big_rec_vec->fields[i].field_no, &mtr);
4343
4344
next_zip_page:
4345
				prev_page_no = page_no;
4346
4347
				/* Commit mtr and release the
4348
				uncompressed page frame to save memory. */
4349
				btr_blob_free(block, FALSE, &mtr);
4350
4351
				if (err == Z_STREAM_END) {
4352
					break;
4353
				}
4354
			} else {
4355
				mlog_write_ulint(page + FIL_PAGE_TYPE,
4356
						 FIL_PAGE_TYPE_BLOB,
4357
						 MLOG_2BYTES, &mtr);
4358
4359
				if (extern_len > (UNIV_PAGE_SIZE
4360
						  - FIL_PAGE_DATA
4361
						  - BTR_BLOB_HDR_SIZE
4362
						  - FIL_PAGE_DATA_END)) {
4363
					store_len = UNIV_PAGE_SIZE
4364
						- FIL_PAGE_DATA
4365
						- BTR_BLOB_HDR_SIZE
4366
						- FIL_PAGE_DATA_END;
4367
				} else {
4368
					store_len = extern_len;
4369
				}
4370
4371
				mlog_write_string(page + FIL_PAGE_DATA
4372
						  + BTR_BLOB_HDR_SIZE,
4373
						  (const byte*)
4374
						  big_rec_vec->fields[i].data
4375
						  + big_rec_vec->fields[i].len
4376
						  - extern_len,
4377
						  store_len, &mtr);
4378
				mlog_write_ulint(page + FIL_PAGE_DATA
4379
						 + BTR_BLOB_HDR_PART_LEN,
4380
						 store_len, MLOG_4BYTES, &mtr);
4381
				mlog_write_ulint(page + FIL_PAGE_DATA
4382
						 + BTR_BLOB_HDR_NEXT_PAGE_NO,
4383
						 FIL_NULL, MLOG_4BYTES, &mtr);
4384
4385
				extern_len -= store_len;
4386
4387
				rec_block = buf_page_get(space_id, zip_size,
4388
							 rec_page_no,
4389
							 RW_X_LATCH, &mtr);
4390
				buf_block_dbg_add_level(rec_block,
4391
							SYNC_NO_ORDER_CHECK);
4392
4393
				mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
4394
						 MLOG_4BYTES, &mtr);
4395
				mlog_write_ulint(field_ref
4396
						 + BTR_EXTERN_LEN + 4,
4397
						 big_rec_vec->fields[i].len
4398
						 - extern_len,
4399
						 MLOG_4BYTES, &mtr);
4400
4401
				if (prev_page_no == FIL_NULL) {
4402
					mlog_write_ulint(field_ref
4403
							 + BTR_EXTERN_SPACE_ID,
4404
							 space_id,
4405
							 MLOG_4BYTES, &mtr);
4406
4407
					mlog_write_ulint(field_ref
4408
							 + BTR_EXTERN_PAGE_NO,
4409
							 page_no,
4410
							 MLOG_4BYTES, &mtr);
4411
4412
					mlog_write_ulint(field_ref
4413
							 + BTR_EXTERN_OFFSET,
4414
							 FIL_PAGE_DATA,
4415
							 MLOG_4BYTES, &mtr);
4416
				}
4417
4418
				prev_page_no = page_no;
4419
4420
				mtr_commit(&mtr);
4421
4422
				if (extern_len == 0) {
4423
					break;
4424
				}
4425
			}
4426
		}
4427
	}
4428
4429
	if (UNIV_LIKELY_NULL(page_zip)) {
4430
		deflateEnd(&c_stream);
4431
		mem_heap_free(heap);
4432
	}
4433
4434
	return(DB_SUCCESS);
4435
}
4436
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4437
/*******************************************************************//**
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
4438
Check the FIL_PAGE_TYPE on an uncompressed BLOB page. */
4439
static
4440
void
4441
btr_check_blob_fil_page_type(
4442
/*=========================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4443
	ulint		space_id,	/*!< in: space id */
4444
	ulint		page_no,	/*!< in: page number */
4445
	const page_t*	page,		/*!< in: page */
4446
	ibool		read)		/*!< in: TRUE=read, FALSE=purge */
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
4447
{
4448
	ulint	type = fil_page_get_type(page);
4449
4450
	ut_a(space_id == page_get_space_id(page));
4451
	ut_a(page_no == page_get_page_no(page));
4452
4453
	if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) {
4454
		ulint	flags = fil_space_get_flags(space_id);
4455
4456
		if (UNIV_LIKELY
4457
		    ((flags & DICT_TF_FORMAT_MASK) == DICT_TF_FORMAT_51)) {
4458
			/* Old versions of InnoDB did not initialize
4459
			FIL_PAGE_TYPE on BLOB pages.  Do not print
4460
			anything about the type mismatch when reading
4461
			a BLOB page that is in Antelope format.*/
4462
			return;
4463
		}
4464
4465
		ut_print_timestamp(stderr);
4466
		fprintf(stderr,
4467
			"  InnoDB: FIL_PAGE_TYPE=%lu"
4468
			" on BLOB %s space %lu page %lu flags %lx\n",
4469
			(ulong) type, read ? "read" : "purge",
4470
			(ulong) space_id, (ulong) page_no, (ulong) flags);
4471
		ut_error;
4472
	}
4473
}
4474
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4475
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4476
Frees the space in an externally stored field to the file space
4477
management if the field in data is owned by the externally stored field,
4478
in a rollback we may have the additional condition that the field must
4479
not be inherited. */
4480
UNIV_INTERN
4481
void
4482
btr_free_externally_stored_field(
4483
/*=============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4484
	dict_index_t*	index,		/*!< in: index of the data, the index
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4485
					tree MUST be X-latched; if the tree
4486
					height is 1, then also the root page
4487
					must be X-latched! (this is relevant
4488
					in the case this function is called
4489
					from purge where 'data' is located on
4490
					an undo log page, not an index
4491
					page) */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4492
	byte*		field_ref,	/*!< in/out: field reference */
4493
	const rec_t*	rec,		/*!< in: record containing field_ref, for
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4494
					page_zip_write_blob_ptr(), or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4495
	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index),
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4496
					or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4497
	page_zip_des_t*	page_zip,	/*!< in: compressed page corresponding
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4498
					to rec, or NULL if rec == NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4499
	ulint		i,		/*!< in: field number of field_ref;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4500
					ignored if rec == NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4501
	enum trx_rb_ctx	rb_ctx,		/*!< in: rollback context */
2023.3.29 by Monty Taylor
Added back INNOBASE_SKIP_WARNINGS for solaris. Also dealt with unused params.
4502
	mtr_t*		/*local_mtr __attribute__((unused))*/) /*!< in: mtr
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4503
					containing the latch to data an an
4504
					X-latch to the index tree */
4505
{
4506
	page_t*		page;
4507
	ulint		space_id;
4508
	ulint		rec_zip_size = dict_table_zip_size(index->table);
4509
	ulint		ext_zip_size;
4510
	ulint		page_no;
4511
	ulint		next_page_no;
4512
	mtr_t		mtr;
4513
#ifdef UNIV_DEBUG
4514
	ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
4515
				MTR_MEMO_X_LOCK));
4516
	ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
4517
				     MTR_MEMO_PAGE_X_FIX));
4518
	ut_ad(!rec || rec_offs_validate(rec, index, offsets));
4519
4520
	if (rec) {
4521
		ulint	local_len;
4522
		const byte*	f = rec_get_nth_field(rec, offsets,
4523
						      i, &local_len);
4524
		ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
4525
		local_len -= BTR_EXTERN_FIELD_REF_SIZE;
4526
		f += local_len;
4527
		ut_ad(f == field_ref);
4528
	}
4529
#endif /* UNIV_DEBUG */
4530
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
4531
	if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
4532
				  BTR_EXTERN_FIELD_REF_SIZE))) {
4533
		/* In the rollback of uncommitted transactions, we may
4534
		encounter a clustered index record whose BLOBs have
4535
		not been written.  There is nothing to free then. */
1819.5.205 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6673 from MySQL InnoDB
4536
		ut_a(rb_ctx == RB_RECOVERY || rb_ctx == RB_RECOVERY_PURGE_REC);
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
4537
		return;
4538
	}
4539
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4540
	space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
4541
4542
	if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
4543
		ext_zip_size = fil_space_get_zip_size(space_id);
4544
		/* This must be an undo log record in the system tablespace,
4545
		that is, in row_purge_upd_exist_or_extern().
4546
		Currently, externally stored records are stored in the
4547
		same tablespace as the referring records. */
4548
		ut_ad(!page_get_space_id(page_align(field_ref)));
4549
		ut_ad(!rec);
4550
		ut_ad(!page_zip);
4551
	} else {
4552
		ext_zip_size = rec_zip_size;
4553
	}
4554
4555
	if (!rec) {
4556
		/* This is a call from row_purge_upd_exist_or_extern(). */
4557
		ut_ad(!page_zip);
4558
		rec_zip_size = 0;
4559
	}
4560
4561
	for (;;) {
1819.9.118 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100914183302-h2bb9l76w41n3uhh from MySQL InnoDB
4562
#ifdef UNIV_SYNC_DEBUG
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4563
		buf_block_t*	rec_block;
1819.9.118 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100914183302-h2bb9l76w41n3uhh from MySQL InnoDB
4564
#endif /* UNIV_SYNC_DEBUG */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4565
		buf_block_t*	ext_block;
4566
4567
		mtr_start(&mtr);
4568
1819.9.118 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100914183302-h2bb9l76w41n3uhh from MySQL InnoDB
4569
#ifdef UNIV_SYNC_DEBUG
4570
		rec_block =
4571
#endif /* UNIV_SYNC_DEBUG */
4572
			buf_page_get(page_get_space_id(
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4573
						 page_align(field_ref)),
4574
					 rec_zip_size,
4575
					 page_get_page_no(
4576
						 page_align(field_ref)),
4577
					 RW_X_LATCH, &mtr);
4578
		buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
4579
		page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
4580
4581
		if (/* There is no external storage data */
4582
		    page_no == FIL_NULL
4583
		    /* This field does not own the externally stored field */
4584
		    || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4585
			& BTR_EXTERN_OWNER_FLAG)
4586
		    /* Rollback and inherited field */
1819.5.205 by marko
Merge Revision revid:svn-v4:16c675df-0fcb-4bc9-8058-dcc011a37293:branches/zip:6673 from MySQL InnoDB
4587
		    || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4588
			&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
4589
			    & BTR_EXTERN_INHERITED_FLAG))) {
4590
4591
			/* Do not free */
4592
			mtr_commit(&mtr);
4593
4594
			return;
4595
		}
4596
4597
		ext_block = buf_page_get(space_id, ext_zip_size, page_no,
4598
					 RW_X_LATCH, &mtr);
4599
		buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
4600
		page = buf_block_get_frame(ext_block);
4601
4602
		if (ext_zip_size) {
4603
			/* Note that page_zip will be NULL
4604
			in row_purge_upd_exist_or_extern(). */
4605
			switch (fil_page_get_type(page)) {
4606
			case FIL_PAGE_TYPE_ZBLOB:
4607
			case FIL_PAGE_TYPE_ZBLOB2:
4608
				break;
4609
			default:
4610
				ut_error;
4611
			}
4612
			next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
4613
4614
			btr_page_free_low(index, ext_block, 0, &mtr);
4615
4616
			if (UNIV_LIKELY(page_zip != NULL)) {
4617
				mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
4618
						next_page_no);
4619
				mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
4620
						0);
4621
				page_zip_write_blob_ptr(page_zip, rec, index,
4622
							offsets, i, &mtr);
4623
			} else {
4624
				mlog_write_ulint(field_ref
4625
						 + BTR_EXTERN_PAGE_NO,
4626
						 next_page_no,
4627
						 MLOG_4BYTES, &mtr);
4628
				mlog_write_ulint(field_ref
4629
						 + BTR_EXTERN_LEN + 4, 0,
4630
						 MLOG_4BYTES, &mtr);
4631
			}
4632
		} else {
4633
			ut_a(!page_zip);
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
4634
			btr_check_blob_fil_page_type(space_id, page_no, page,
4635
						     FALSE);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4636
4637
			next_page_no = mach_read_from_4(
4638
				page + FIL_PAGE_DATA
4639
				+ BTR_BLOB_HDR_NEXT_PAGE_NO);
4640
4641
			/* We must supply the page level (= 0) as an argument
4642
			because we did not store it on the page (we save the
4643
			space overhead from an index page header. */
4644
4645
			btr_page_free_low(index, ext_block, 0, &mtr);
4646
4647
			mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
4648
					 next_page_no,
4649
					 MLOG_4BYTES, &mtr);
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
4650
			/* Zero out the BLOB length.  If the server
4651
			crashes during the execution of this function,
4652
			trx_rollback_or_clean_all_recovered() could
4653
			dereference the half-deleted BLOB, fetching a
4654
			wrong prefix for the BLOB. */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4655
			mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
4656
					 0,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4657
					 MLOG_4BYTES, &mtr);
4658
		}
4659
4660
		/* Commit mtr and release the BLOB block to save memory. */
4661
		btr_blob_free(ext_block, TRUE, &mtr);
4662
	}
4663
}
4664
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4665
/***********************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4666
Frees the externally stored fields for a record. */
4667
static
4668
void
4669
btr_rec_free_externally_stored_fields(
4670
/*==================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4671
	dict_index_t*	index,	/*!< in: index of the data, the index
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4672
				tree MUST be X-latched */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4673
	rec_t*		rec,	/*!< in/out: record */
4674
	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
4675
	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4676
				part will be updated, or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4677
	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
4678
	mtr_t*		mtr)	/*!< in: mini-transaction handle which contains
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4679
				an X-latch to record page and to the index
4680
				tree */
4681
{
4682
	ulint	n_fields;
4683
	ulint	i;
4684
4685
	ut_ad(rec_offs_validate(rec, index, offsets));
4686
	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
4687
	/* Free possible externally stored fields in the record */
4688
4689
	ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
4690
	n_fields = rec_offs_n_fields(offsets);
4691
4692
	for (i = 0; i < n_fields; i++) {
4693
		if (rec_offs_nth_extern(offsets, i)) {
4694
			ulint	len;
4695
			byte*	data
4696
				= rec_get_nth_field(rec, offsets, i, &len);
4697
			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
4698
4699
			btr_free_externally_stored_field(
4700
				index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
4701
				rec, offsets, page_zip, i, rb_ctx, mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4702
		}
4703
	}
4704
}
4705
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4706
/***********************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4707
Frees the externally stored fields for a record, if the field is mentioned
4708
in the update vector. */
4709
static
4710
void
4711
btr_rec_free_updated_extern_fields(
4712
/*===============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4713
	dict_index_t*	index,	/*!< in: index of rec; the index tree MUST be
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4714
				X-latched */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4715
	rec_t*		rec,	/*!< in/out: record */
4716
	page_zip_des_t*	page_zip,/*!< in: compressed page whose uncompressed
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4717
				part will be updated, or NULL */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4718
	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
4719
	const upd_t*	update,	/*!< in: update vector */
4720
	enum trx_rb_ctx	rb_ctx,	/*!< in: rollback context */
4721
	mtr_t*		mtr)	/*!< in: mini-transaction handle which contains
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4722
				an X-latch to record page and to the tree */
4723
{
4724
	ulint	n_fields;
4725
	ulint	i;
4726
4727
	ut_ad(rec_offs_validate(rec, index, offsets));
4728
	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
4729
4730
	/* Free possible externally stored fields in the record */
4731
4732
	n_fields = upd_get_n_fields(update);
4733
4734
	for (i = 0; i < n_fields; i++) {
4735
		const upd_field_t* ufield = upd_get_nth_field(update, i);
4736
4737
		if (rec_offs_nth_extern(offsets, ufield->field_no)) {
4738
			ulint	len;
4739
			byte*	data = rec_get_nth_field(
4740
				rec, offsets, ufield->field_no, &len);
4741
			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
4742
4743
			btr_free_externally_stored_field(
4744
				index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
4745
				rec, offsets, page_zip,
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
4746
				ufield->field_no, rb_ctx, mtr);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4747
		}
4748
	}
4749
}
4750
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4751
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4752
Copies the prefix of an uncompressed BLOB.  The clustered index record
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4753
that points to this BLOB must be protected by a lock or a page latch.
4754
@return	number of bytes written to buf */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4755
static
4756
ulint
4757
btr_copy_blob_prefix(
4758
/*=================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4759
	byte*		buf,	/*!< out: the externally stored part of
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4760
				the field, or a prefix of it */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4761
	ulint		len,	/*!< in: length of buf, in bytes */
4762
	ulint		space_id,/*!< in: space id of the BLOB pages */
4763
	ulint		page_no,/*!< in: page number of the first BLOB page */
4764
	ulint		offset)	/*!< in: offset on the first BLOB page */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4765
{
4766
	ulint	copied_len	= 0;
4767
4768
	for (;;) {
4769
		mtr_t		mtr;
4770
		buf_block_t*	block;
4771
		const page_t*	page;
4772
		const byte*	blob_header;
4773
		ulint		part_len;
4774
		ulint		copy_len;
4775
4776
		mtr_start(&mtr);
4777
4778
		block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
4779
		buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
4780
		page = buf_block_get_frame(block);
4781
641.2.2 by Monty Taylor
InnoDB Plugin 1.0.3
4782
		btr_check_blob_fil_page_type(space_id, page_no, page, TRUE);
4783
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4784
		blob_header = page + offset;
4785
		part_len = btr_blob_get_part_len(blob_header);
4786
		copy_len = ut_min(part_len, len - copied_len);
4787
4788
		memcpy(buf + copied_len,
4789
		       blob_header + BTR_BLOB_HDR_SIZE, copy_len);
4790
		copied_len += copy_len;
4791
4792
		page_no = btr_blob_get_next_page_no(blob_header);
4793
4794
		mtr_commit(&mtr);
4795
4796
		if (page_no == FIL_NULL || copy_len != part_len) {
1819.7.128 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100518135254-pb5whzztc5vphjd4 from MySQL InnoDB
4797
			UNIV_MEM_ASSERT_RW(buf, copied_len);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4798
			return(copied_len);
4799
		}
4800
4801
		/* On other BLOB pages except the first the BLOB header
4802
		always is at the page data start: */
4803
4804
		offset = FIL_PAGE_DATA;
4805
4806
		ut_ad(copied_len <= len);
4807
	}
4808
}
4809
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4810
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4811
Copies the prefix of a compressed BLOB.  The clustered index record
4812
that points to this BLOB must be protected by a lock or a page latch. */
4813
static
4814
void
4815
btr_copy_zblob_prefix(
4816
/*==================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4817
	z_stream*	d_stream,/*!< in/out: the decompressing stream */
4818
	ulint		zip_size,/*!< in: compressed BLOB page size */
4819
	ulint		space_id,/*!< in: space id of the BLOB pages */
4820
	ulint		page_no,/*!< in: page number of the first BLOB page */
4821
	ulint		offset)	/*!< in: offset on the first BLOB page */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4822
{
4823
	ulint	page_type = FIL_PAGE_TYPE_ZBLOB;
4824
4825
	ut_ad(ut_is_2pow(zip_size));
4826
	ut_ad(zip_size >= PAGE_ZIP_MIN_SIZE);
4827
	ut_ad(zip_size <= UNIV_PAGE_SIZE);
4828
	ut_ad(space_id);
4829
4830
	for (;;) {
4831
		buf_page_t*	bpage;
4832
		int		err;
4833
		ulint		next_page_no;
4834
4835
		/* There is no latch on bpage directly.  Instead,
4836
		bpage is protected by the B-tree page latch that
4837
		is being held on the clustered index record, or,
4838
		in row_merge_copy_blobs(), by an exclusive table lock. */
4839
		bpage = buf_page_get_zip(space_id, zip_size, page_no);
4840
4841
		if (UNIV_UNLIKELY(!bpage)) {
4842
			ut_print_timestamp(stderr);
4843
			fprintf(stderr,
4844
				"  InnoDB: Cannot load"
4845
				" compressed BLOB"
4846
				" page %lu space %lu\n",
4847
				(ulong) page_no, (ulong) space_id);
4848
			return;
4849
		}
4850
4851
		if (UNIV_UNLIKELY
4852
		    (fil_page_get_type(bpage->zip.data) != page_type)) {
4853
			ut_print_timestamp(stderr);
4854
			fprintf(stderr,
4855
				"  InnoDB: Unexpected type %lu of"
4856
				" compressed BLOB"
4857
				" page %lu space %lu\n",
4858
				(ulong) fil_page_get_type(bpage->zip.data),
4859
				(ulong) page_no, (ulong) space_id);
4860
			goto end_of_blob;
4861
		}
4862
4863
		next_page_no = mach_read_from_4(bpage->zip.data + offset);
4864
4865
		if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
4866
			/* When the BLOB begins at page header,
4867
			the compressed data payload does not
4868
			immediately follow the next page pointer. */
4869
			offset = FIL_PAGE_DATA;
4870
		} else {
4871
			offset += 4;
4872
		}
4873
4874
		d_stream->next_in = bpage->zip.data + offset;
4875
		d_stream->avail_in = zip_size - offset;
4876
4877
		err = inflate(d_stream, Z_NO_FLUSH);
4878
		switch (err) {
4879
		case Z_OK:
4880
			if (!d_stream->avail_out) {
4881
				goto end_of_blob;
4882
			}
4883
			break;
4884
		case Z_STREAM_END:
4885
			if (next_page_no == FIL_NULL) {
4886
				goto end_of_blob;
4887
			}
4888
			/* fall through */
4889
		default:
4890
inflate_error:
4891
			ut_print_timestamp(stderr);
4892
			fprintf(stderr,
4893
				"  InnoDB: inflate() of"
4894
				" compressed BLOB"
4895
				" page %lu space %lu returned %d (%s)\n",
4896
				(ulong) page_no, (ulong) space_id,
4897
				err, d_stream->msg);
4898
		case Z_BUF_ERROR:
4899
			goto end_of_blob;
4900
		}
4901
4902
		if (next_page_no == FIL_NULL) {
4903
			if (!d_stream->avail_in) {
4904
				ut_print_timestamp(stderr);
4905
				fprintf(stderr,
4906
					"  InnoDB: unexpected end of"
4907
					" compressed BLOB"
4908
					" page %lu space %lu\n",
4909
					(ulong) page_no,
4910
					(ulong) space_id);
4911
			} else {
4912
				err = inflate(d_stream, Z_FINISH);
4913
				switch (err) {
4914
				case Z_STREAM_END:
4915
				case Z_BUF_ERROR:
4916
					break;
4917
				default:
4918
					goto inflate_error;
4919
				}
4920
			}
4921
4922
end_of_blob:
4923
			buf_page_release_zip(bpage);
4924
			return;
4925
		}
4926
4927
		buf_page_release_zip(bpage);
4928
4929
		/* On other BLOB pages except the first
4930
		the BLOB header always is at the page header: */
4931
4932
		page_no = next_page_no;
4933
		offset = FIL_PAGE_NEXT;
4934
		page_type = FIL_PAGE_TYPE_ZBLOB2;
4935
	}
4936
}
4937
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4938
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4939
Copies the prefix of an externally stored field of a record.  The
4940
clustered index record that points to this BLOB must be protected by a
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4941
lock or a page latch.
4942
@return	number of bytes written to buf */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4943
static
4944
ulint
4945
btr_copy_externally_stored_field_prefix_low(
4946
/*========================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4947
	byte*		buf,	/*!< out: the externally stored part of
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4948
				the field, or a prefix of it */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4949
	ulint		len,	/*!< in: length of buf, in bytes */
4950
	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4951
				zero for uncompressed BLOBs */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4952
	ulint		space_id,/*!< in: space id of the first BLOB page */
4953
	ulint		page_no,/*!< in: page number of the first BLOB page */
4954
	ulint		offset)	/*!< in: offset on the first BLOB page */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4955
{
4956
	if (UNIV_UNLIKELY(len == 0)) {
4957
		return(0);
4958
	}
4959
4960
	if (UNIV_UNLIKELY(zip_size)) {
4961
		int		err;
4962
		z_stream	d_stream;
4963
		mem_heap_t*	heap;
4964
4965
		/* Zlib inflate needs 32 kilobytes for the default
4966
		window size, plus a few kilobytes for small objects. */
4967
		heap = mem_heap_create(40000);
4968
		page_zip_set_alloc(&d_stream, heap);
4969
4970
		err = inflateInit(&d_stream);
4971
		ut_a(err == Z_OK);
4972
4973
		d_stream.next_out = buf;
4974
		d_stream.avail_out = len;
4975
		d_stream.avail_in = 0;
4976
4977
		btr_copy_zblob_prefix(&d_stream, zip_size,
4978
				      space_id, page_no, offset);
4979
		inflateEnd(&d_stream);
4980
		mem_heap_free(heap);
1819.7.128 by Vasil Dimov
Merge Revision revid:vasil.dimov@oracle.com-20100518135254-pb5whzztc5vphjd4 from MySQL InnoDB
4981
		UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4982
		return(d_stream.total_out);
4983
	} else {
4984
		return(btr_copy_blob_prefix(buf, len, space_id,
4985
					    page_no, offset));
4986
	}
4987
}
4988
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4989
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4990
Copies the prefix of an externally stored field of a record.  The
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4991
clustered index record must be protected by a lock or a page latch.
4992
@return the length of the copied field, or 0 if the column was being
4993
or has been deleted */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
4994
UNIV_INTERN
4995
ulint
4996
btr_copy_externally_stored_field_prefix(
4997
/*====================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
4998
	byte*		buf,	/*!< out: the field, or a prefix of it */
4999
	ulint		len,	/*!< in: length of buf, in bytes */
5000
	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5001
				zero for uncompressed BLOBs */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5002
	const byte*	data,	/*!< in: 'internally' stored part of the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5003
				field containing also the reference to
5004
				the external part; must be protected by
5005
				a lock or a page latch */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5006
	ulint		local_len)/*!< in: length of data, in bytes */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5007
{
5008
	ulint	space_id;
5009
	ulint	page_no;
5010
	ulint	offset;
5011
5012
	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
5013
5014
	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
5015
5016
	if (UNIV_UNLIKELY(local_len >= len)) {
5017
		memcpy(buf, data, len);
5018
		return(len);
5019
	}
5020
5021
	memcpy(buf, data, local_len);
5022
	data += local_len;
5023
5024
	ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
5025
641.2.1 by Monty Taylor
InnoDB Plugin 1.0.2
5026
	if (!mach_read_from_4(data + BTR_EXTERN_LEN + 4)) {
5027
		/* The externally stored part of the column has been
5028
		(partially) deleted.  Signal the half-deleted BLOB
5029
		to the caller. */
5030
5031
		return(0);
5032
	}
5033
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5034
	space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
5035
5036
	page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
5037
5038
	offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
5039
5040
	return(local_len
5041
	       + btr_copy_externally_stored_field_prefix_low(buf + local_len,
5042
							     len - local_len,
5043
							     zip_size,
5044
							     space_id, page_no,
5045
							     offset));
5046
}
5047
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5048
/*******************************************************************//**
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5049
Copies an externally stored field of a record to mem heap.  The
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5050
clustered index record must be protected by a lock or a page latch.
5051
@return	the whole field copied to heap */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5052
static
5053
byte*
5054
btr_copy_externally_stored_field(
5055
/*=============================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5056
	ulint*		len,	/*!< out: length of the whole field */
5057
	const byte*	data,	/*!< in: 'internally' stored part of the
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5058
				field containing also the reference to
5059
				the external part; must be protected by
5060
				a lock or a page latch */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5061
	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5062
				zero for uncompressed BLOBs */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5063
	ulint		local_len,/*!< in: length of data */
5064
	mem_heap_t*	heap)	/*!< in: mem heap */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5065
{
5066
	ulint	space_id;
5067
	ulint	page_no;
5068
	ulint	offset;
5069
	ulint	extern_len;
5070
	byte*	buf;
5071
5072
	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
5073
5074
	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
5075
5076
	space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
5077
5078
	page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
5079
5080
	offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
5081
5082
	/* Currently a BLOB cannot be bigger than 4 GB; we
5083
	leave the 4 upper bytes in the length field unused */
5084
5085
	extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
5086
1992.6.2 by Monty Taylor
Cleaned up for additional gcc 4.5 warnings.
5087
	buf = (unsigned char *)mem_heap_alloc(heap, local_len + extern_len);
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5088
5089
	memcpy(buf, data, local_len);
5090
	*len = local_len
5091
		+ btr_copy_externally_stored_field_prefix_low(buf + local_len,
5092
							      extern_len,
5093
							      zip_size,
5094
							      space_id,
5095
							      page_no, offset);
5096
5097
	return(buf);
5098
}
5099
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5100
/*******************************************************************//**
5101
Copies an externally stored field of a record to mem heap.
1819.9.37 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629131907-epjs6h2rv457h7qv from MySQL InnoDB
5102
@return	the field copied to heap, or NULL if the field is incomplete */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5103
UNIV_INTERN
5104
byte*
5105
btr_rec_copy_externally_stored_field(
5106
/*=================================*/
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5107
	const rec_t*	rec,	/*!< in: record in a clustered index;
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5108
				must be protected by a lock or a page latch */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5109
	const ulint*	offsets,/*!< in: array returned by rec_get_offsets() */
5110
	ulint		zip_size,/*!< in: nonzero=compressed BLOB page size,
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5111
				zero for uncompressed BLOBs */
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5112
	ulint		no,	/*!< in: field number */
5113
	ulint*		len,	/*!< out: length of the field */
5114
	mem_heap_t*	heap)	/*!< in: mem heap */
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5115
{
5116
	ulint		local_len;
5117
	const byte*	data;
5118
5119
	ut_a(rec_offs_nth_extern(offsets, no));
5120
5121
	/* An externally stored field can contain some initial
5122
	data from the field, and in the last 20 bytes it has the
5123
	space id, page number, and offset where the rest of the
5124
	field data is stored, and the data length in addition to
5125
	the data stored locally. We may need to store some data
5126
	locally to get the local record length above the 128 byte
5127
	limit so that field offsets are stored in two bytes, and
5128
	the extern bit is available in those two bytes. */
5129
5130
	data = rec_get_nth_field(rec, offsets, no, &local_len);
5131
1819.9.37 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629131907-epjs6h2rv457h7qv from MySQL InnoDB
5132
	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
5133
5134
	if (UNIV_UNLIKELY
5135
	    (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
5136
		     field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
1819.9.40 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100630095241-bietgukdy2g6k7x4 from MySQL InnoDB
5137
		/* The externally stored field was not written yet.
5138
		This record should only be seen by
1819.9.37 by Marko Mäkelä, Stewart Smith
Merge Revision revid:marko.makela@oracle.com-20100629131907-epjs6h2rv457h7qv from MySQL InnoDB
5139
		recv_recovery_rollback_active() or any
5140
		TRX_ISO_READ_UNCOMMITTED transactions. */
5141
		return(NULL);
5142
	}
5143
641.1.2 by Monty Taylor
Imported 1.0.1 with clean - with no changes.
5144
	return(btr_copy_externally_stored_field(len, data,
5145
						zip_size, local_len, heap));
5146
}
641.2.3 by Monty Taylor
InnoDB Plugin 1.0.4
5147
#endif /* !UNIV_HOTBACKUP */