~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/******************************************************
2
Insert into a table
3
4
(c) 1996 Innobase Oy
5
6
Created 4/20/1996 Heikki Tuuri
7
*******************************************************/
8
9
#include "row0ins.h"
10
11
#ifdef UNIV_NONINL
12
#include "row0ins.ic"
13
#endif
14
15
#include "dict0dict.h"
16
#include "dict0boot.h"
17
#include "trx0undo.h"
18
#include "btr0btr.h"
19
#include "btr0cur.h"
20
#include "mach0data.h"
21
#include "que0que.h"
22
#include "row0upd.h"
23
#include "row0sel.h"
24
#include "row0row.h"
25
#include "rem0cmp.h"
26
#include "lock0lock.h"
27
#include "log0log.h"
28
#include "eval0eval.h"
29
#include "data0data.h"
30
#include "usr0sess.h"
31
#include "buf0lru.h"
32
33
#define	ROW_INS_PREV	1
34
#define	ROW_INS_NEXT	2
35
36
37
/*********************************************************************
38
This prototype is copied from /mysql/sql/ha_innodb.cc.
39
Invalidates the MySQL query cache for the table.
40
NOTE that the exact prototype of this function has to be in
41
/innobase/row/row0ins.c! */
42
extern
43
void
44
innobase_invalidate_query_cache(
45
/*============================*/
46
	trx_t*	trx,		/* in: transaction which modifies the table */
47
	char*	full_name,	/* in: concatenation of database name, null
48
				char '\0', table name, null char'\0';
49
				NOTE that in Windows this is always
50
				in LOWER CASE! */
51
	ulint	full_name_len);	/* in: full name length where also the null
52
				chars count */
53
54
/*************************************************************************
55
Creates an insert node struct. */
56
57
ins_node_t*
58
ins_node_create(
59
/*============*/
60
					/* out, own: insert node struct */
61
	ulint		ins_type,	/* in: INS_VALUES, ... */
62
	dict_table_t*	table,		/* in: table where to insert */
63
	mem_heap_t*	heap)		/* in: mem heap where created */
64
{
65
	ins_node_t*	node;
66
67
	node = mem_heap_alloc(heap, sizeof(ins_node_t));
68
69
	node->common.type = QUE_NODE_INSERT;
70
71
	node->ins_type = ins_type;
72
73
	node->state = INS_NODE_SET_IX_LOCK;
74
	node->table = table;
75
	node->index = NULL;
76
	node->entry = NULL;
77
78
	node->select = NULL;
79
80
	node->trx_id = ut_dulint_zero;
81
82
	node->entry_sys_heap = mem_heap_create(128);
83
84
	node->magic_n = INS_NODE_MAGIC_N;
85
86
	return(node);
87
}
88
89
/***************************************************************
90
Creates an entry template for each index of a table. */
91
static
92
void
93
ins_node_create_entry_list(
94
/*=======================*/
95
	ins_node_t*	node)	/* in: row insert node */
96
{
97
	dict_index_t*	index;
98
	dtuple_t*	entry;
99
100
	ut_ad(node->entry_sys_heap);
101
102
	UT_LIST_INIT(node->entry_list);
103
104
	index = dict_table_get_first_index(node->table);
105
106
	while (index != NULL) {
107
		entry = row_build_index_entry(node->row, index,
108
					      node->entry_sys_heap);
109
		UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
110
111
		index = dict_table_get_next_index(index);
112
	}
113
}
114
115
/*********************************************************************
116
Adds system field buffers to a row. */
117
static
118
void
119
row_ins_alloc_sys_fields(
120
/*=====================*/
121
	ins_node_t*	node)	/* in: insert node */
122
{
123
	dtuple_t*		row;
124
	dict_table_t*		table;
125
	mem_heap_t*		heap;
126
	const dict_col_t*	col;
127
	dfield_t*		dfield;
128
	byte*			ptr;
129
130
	row = node->row;
131
	table = node->table;
132
	heap = node->entry_sys_heap;
133
134
	ut_ad(row && table && heap);
135
	ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
136
137
	/* 1. Allocate buffer for row id */
138
139
	col = dict_table_get_sys_col(table, DATA_ROW_ID);
140
141
	dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
142
143
	ptr = mem_heap_alloc(heap, DATA_ROW_ID_LEN);
144
145
	dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
146
147
	node->row_id_buf = ptr;
148
149
	/* 3. Allocate buffer for trx id */
150
151
	col = dict_table_get_sys_col(table, DATA_TRX_ID);
152
153
	dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
154
	ptr = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
155
156
	dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
157
158
	node->trx_id_buf = ptr;
159
160
	/* 4. Allocate buffer for roll ptr */
161
162
	col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
163
164
	dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
165
	ptr = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
166
167
	dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
168
}
169
170
/*************************************************************************
171
Sets a new row to insert for an INS_DIRECT node. This function is only used
172
if we have constructed the row separately, which is a rare case; this
173
function is quite slow. */
174
175
void
176
ins_node_set_new_row(
177
/*=================*/
178
	ins_node_t*	node,	/* in: insert node */
179
	dtuple_t*	row)	/* in: new row (or first row) for the node */
180
{
181
	node->state = INS_NODE_SET_IX_LOCK;
182
	node->index = NULL;
183
	node->entry = NULL;
184
185
	node->row = row;
186
187
	mem_heap_empty(node->entry_sys_heap);
188
189
	/* Create templates for index entries */
190
191
	ins_node_create_entry_list(node);
192
193
	/* Allocate from entry_sys_heap buffers for sys fields */
194
195
	row_ins_alloc_sys_fields(node);
196
197
	/* As we allocated a new trx id buf, the trx id should be written
198
	there again: */
199
200
	node->trx_id = ut_dulint_zero;
201
}
202
203
/***********************************************************************
204
Does an insert operation by updating a delete-marked existing record
205
in the index. This situation can occur if the delete-marked record is
206
kept in the index for consistent reads. */
207
static
208
ulint
209
row_ins_sec_index_entry_by_modify(
210
/*==============================*/
211
				/* out: DB_SUCCESS or error code */
212
	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
213
				depending on whether mtr holds just a leaf
214
				latch or also a tree latch */
215
	btr_cur_t*	cursor,	/* in: B-tree cursor */
216
	dtuple_t*	entry,	/* in: index entry to insert */
217
	que_thr_t*	thr,	/* in: query thread */
218
	mtr_t*		mtr)	/* in: mtr */
219
{
220
	big_rec_t*	dummy_big_rec;
221
	mem_heap_t*	heap;
222
	upd_t*		update;
223
	rec_t*		rec;
224
	ulint		err;
225
226
	rec = btr_cur_get_rec(cursor);
227
228
	ut_ad((cursor->index->type & DICT_CLUSTERED) == 0);
229
	ut_ad(rec_get_deleted_flag(rec,
230
				   dict_table_is_comp(cursor->index->table)));
231
232
	/* We know that in the alphabetical ordering, entry and rec are
233
	identified. But in their binary form there may be differences if
234
	there are char fields in them. Therefore we have to calculate the
235
	difference. */
236
237
	heap = mem_heap_create(1024);
238
239
	update = row_upd_build_sec_rec_difference_binary(
240
		cursor->index, entry, rec, thr_get_trx(thr), heap);
241
	if (mode == BTR_MODIFY_LEAF) {
242
		/* Try an optimistic updating of the record, keeping changes
243
		within the page */
244
245
		err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
246
						update, 0, thr, mtr);
247
		if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
248
			err = DB_FAIL;
249
		}
250
	} else {
251
		ut_a(mode == BTR_MODIFY_TREE);
252
		if (buf_LRU_buf_pool_running_out()) {
253
254
			err = DB_LOCK_TABLE_FULL;
255
256
			goto func_exit;
257
		}
258
259
		err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
260
						 &dummy_big_rec, update,
261
						 0, thr, mtr);
262
	}
263
func_exit:
264
	mem_heap_free(heap);
265
266
	return(err);
267
}
268
269
/***********************************************************************
270
Does an insert operation by delete unmarking and updating a delete marked
271
existing record in the index. This situation can occur if the delete marked
272
record is kept in the index for consistent reads. */
273
static
274
ulint
275
row_ins_clust_index_entry_by_modify(
276
/*================================*/
277
				/* out: DB_SUCCESS, DB_FAIL, or error code */
278
	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
279
				depending on whether mtr holds just a leaf
280
				latch or also a tree latch */
281
	btr_cur_t*	cursor,	/* in: B-tree cursor */
282
	big_rec_t**	big_rec,/* out: possible big rec vector of fields
283
				which have to be stored externally by the
284
				caller */
285
	dtuple_t*	entry,	/* in: index entry to insert */
286
	ulint*		ext_vec,/* in: array containing field numbers of
287
				externally stored fields in entry, or NULL */
288
	ulint		n_ext_vec,/* in: number of fields in ext_vec */
289
	que_thr_t*	thr,	/* in: query thread */
290
	mtr_t*		mtr)	/* in: mtr */
291
{
292
	mem_heap_t*	heap;
293
	rec_t*		rec;
294
	upd_t*		update;
295
	ulint		err;
296
297
	ut_ad(cursor->index->type & DICT_CLUSTERED);
298
299
	*big_rec = NULL;
300
301
	rec = btr_cur_get_rec(cursor);
302
303
	ut_ad(rec_get_deleted_flag(rec,
304
				   dict_table_is_comp(cursor->index->table)));
305
306
	heap = mem_heap_create(1024);
307
308
	/* Build an update vector containing all the fields to be modified;
309
	NOTE that this vector may NOT contain system columns trx_id or
310
	roll_ptr */
311
312
	update = row_upd_build_difference_binary(cursor->index, entry, ext_vec,
313
						 n_ext_vec, rec,
314
						 thr_get_trx(thr), heap);
315
	if (mode == BTR_MODIFY_LEAF) {
316
		/* Try optimistic updating of the record, keeping changes
317
		within the page */
318
319
		err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
320
						mtr);
321
		if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
322
			err = DB_FAIL;
323
		}
324
	} else {
325
		ut_a(mode == BTR_MODIFY_TREE);
326
		if (buf_LRU_buf_pool_running_out()) {
327
328
			err = DB_LOCK_TABLE_FULL;
329
330
			goto func_exit;
331
		}
332
		err = btr_cur_pessimistic_update(0, cursor, big_rec, update,
333
						 0, thr, mtr);
334
	}
335
func_exit:
336
	mem_heap_free(heap);
337
338
	return(err);
339
}
340
341
/*************************************************************************
342
Returns TRUE if in a cascaded update/delete an ancestor node of node
343
updates (not DELETE, but UPDATE) table. */
344
static
345
ibool
346
row_ins_cascade_ancestor_updates_table(
347
/*===================================*/
348
				/* out: TRUE if an ancestor updates table */
349
	que_node_t*	node,	/* in: node in a query graph */
350
	dict_table_t*	table)	/* in: table */
351
{
352
	que_node_t*	parent;
353
	upd_node_t*	upd_node;
354
355
	parent = que_node_get_parent(node);
356
357
	while (que_node_get_type(parent) == QUE_NODE_UPDATE) {
358
359
		upd_node = parent;
360
361
		if (upd_node->table == table && upd_node->is_delete == FALSE) {
362
363
			return(TRUE);
364
		}
365
366
		parent = que_node_get_parent(parent);
367
368
		ut_a(parent);
369
	}
370
371
	return(FALSE);
372
}
373
374
/*************************************************************************
375
Returns the number of ancestor UPDATE or DELETE nodes of a
376
cascaded update/delete node. */
377
static
378
ulint
379
row_ins_cascade_n_ancestors(
380
/*========================*/
381
				/* out: number of ancestors */
382
	que_node_t*	node)	/* in: node in a query graph */
383
{
384
	que_node_t*	parent;
385
	ulint		n_ancestors = 0;
386
387
	parent = que_node_get_parent(node);
388
389
	while (que_node_get_type(parent) == QUE_NODE_UPDATE) {
390
		n_ancestors++;
391
392
		parent = que_node_get_parent(parent);
393
394
		ut_a(parent);
395
	}
396
397
	return(n_ancestors);
398
}
399
400
/**********************************************************************
401
Calculates the update vector node->cascade->update for a child table in
402
a cascaded update. */
403
static
404
ulint
405
row_ins_cascade_calc_update_vec(
406
/*============================*/
407
					/* out: number of fields in the
408
					calculated update vector; the value
409
					can also be 0 if no foreign key
410
					fields changed; the returned value
411
					is ULINT_UNDEFINED if the column
412
					type in the child table is too short
413
					to fit the new value in the parent
414
					table: that means the update fails */
415
	upd_node_t*	node,		/* in: update node of the parent
416
					table */
417
	dict_foreign_t*	foreign,	/* in: foreign key constraint whose
418
					type is != 0 */
419
	mem_heap_t*	heap)		/* in: memory heap to use as
420
					temporary storage */
421
{
422
	upd_node_t*	cascade		= node->cascade_node;
423
	dict_table_t*	table		= foreign->foreign_table;
424
	dict_index_t*	index		= foreign->foreign_index;
425
	upd_t*		update;
426
	upd_field_t*	ufield;
427
	dict_table_t*	parent_table;
428
	dict_index_t*	parent_index;
429
	upd_t*		parent_update;
430
	upd_field_t*	parent_ufield;
431
	ulint		n_fields_updated;
432
	ulint		parent_field_no;
433
	ulint		i;
434
	ulint		j;
435
436
	ut_a(node);
437
	ut_a(foreign);
438
	ut_a(cascade);
439
	ut_a(table);
440
	ut_a(index);
441
442
	/* Calculate the appropriate update vector which will set the fields
443
	in the child index record to the same value (possibly padded with
444
	spaces if the column is a fixed length CHAR or FIXBINARY column) as
445
	the referenced index record will get in the update. */
446
447
	parent_table = node->table;
448
	ut_a(parent_table == foreign->referenced_table);
449
	parent_index = foreign->referenced_index;
450
	parent_update = node->update;
451
452
	update = cascade->update;
453
454
	update->info_bits = 0;
455
	update->n_fields = foreign->n_fields;
456
457
	n_fields_updated = 0;
458
459
	for (i = 0; i < foreign->n_fields; i++) {
460
461
		parent_field_no = dict_table_get_nth_col_pos(
462
			parent_table,
463
			dict_index_get_nth_col_no(parent_index, i));
464
465
		for (j = 0; j < parent_update->n_fields; j++) {
466
			parent_ufield = parent_update->fields + j;
467
468
			if (parent_ufield->field_no == parent_field_no) {
469
470
				ulint			min_size;
471
				const dict_col_t*	col;
472
473
				col = dict_index_get_nth_col(index, i);
474
475
				/* A field in the parent index record is
476
				updated. Let us make the update vector
477
				field for the child table. */
478
479
				ufield = update->fields + n_fields_updated;
480
481
				ufield->field_no
482
					= dict_table_get_nth_col_pos(
483
					table, dict_col_get_no(col));
484
				ufield->exp = NULL;
485
486
				ufield->new_val = parent_ufield->new_val;
487
488
				/* Do not allow a NOT NULL column to be
489
				updated as NULL */
490
491
				if (ufield->new_val.len == UNIV_SQL_NULL
492
				    && (col->prtype & DATA_NOT_NULL)) {
493
494
					return(ULINT_UNDEFINED);
495
				}
496
497
				/* If the new value would not fit in the
498
				column, do not allow the update */
499
500
				if (ufield->new_val.len != UNIV_SQL_NULL
501
				    && dtype_get_at_most_n_mbchars(
502
					col->prtype,
503
					col->mbminlen, col->mbmaxlen,
504
					col->len,
505
					ufield->new_val.len,
506
					ufield->new_val.data)
507
				    < ufield->new_val.len) {
508
509
					return(ULINT_UNDEFINED);
510
				}
511
512
				/* If the parent column type has a different
513
				length than the child column type, we may
514
				need to pad with spaces the new value of the
515
				child column */
516
517
				min_size = dict_col_get_min_size(col);
518
519
				if (min_size
520
				    && ufield->new_val.len != UNIV_SQL_NULL
521
				    && ufield->new_val.len < min_size) {
522
523
					char*		pad_start;
524
					const char*	pad_end;
525
					ufield->new_val.data = mem_heap_alloc(
526
						heap, min_size);
527
					pad_start = ((char*) ufield
528
						     ->new_val.data)
529
						+ ufield->new_val.len;
530
					pad_end = ((char*) ufield
531
						   ->new_val.data)
532
						+ min_size;
533
					ufield->new_val.len = min_size;
534
					ut_memcpy(ufield->new_val.data,
535
						  parent_ufield->new_val.data,
536
						  parent_ufield->new_val.len);
537
538
					switch (UNIV_EXPECT(col->mbminlen,1)) {
539
					default:
540
						ut_error;
541
					case 1:
542
						if (UNIV_UNLIKELY
543
						    (dtype_get_charset_coll(
544
							    col->prtype)
545
						     == DATA_MYSQL_BINARY_CHARSET_COLL)) {
546
							/* Do not pad BINARY
547
							columns. */
548
							return(ULINT_UNDEFINED);
549
						}
550
551
						/* space=0x20 */
552
						memset(pad_start, 0x20,
553
						       pad_end - pad_start);
554
						break;
555
					case 2:
556
						/* space=0x0020 */
557
						ut_a(!(ufield->new_val.len
558
						       % 2));
559
						ut_a(!(min_size % 2));
560
						do {
561
							*pad_start++ = 0x00;
562
							*pad_start++ = 0x20;
563
						} while (pad_start < pad_end);
564
						break;
565
					}
566
				}
567
568
				ufield->extern_storage = FALSE;
569
570
				n_fields_updated++;
571
			}
572
		}
573
	}
574
575
	update->n_fields = n_fields_updated;
576
577
	return(n_fields_updated);
578
}
579
580
/*************************************************************************
581
Set detailed error message associated with foreign key errors for
582
the given transaction. */
583
static
584
void
585
row_ins_set_detailed(
586
/*=================*/
587
	trx_t*		trx,		/* in: transaction */
588
	dict_foreign_t*	foreign)	/* in: foreign key constraint */
589
{
590
	mutex_enter(&srv_misc_tmpfile_mutex);
591
	rewind(srv_misc_tmpfile);
592
593
	if (os_file_set_eof(srv_misc_tmpfile)) {
594
		ut_print_name(srv_misc_tmpfile, trx, TRUE,
595
			      foreign->foreign_table_name);
596
		dict_print_info_on_foreign_key_in_create_format(
597
			srv_misc_tmpfile, trx, foreign, FALSE);
598
		trx_set_detailed_error_from_file(trx, srv_misc_tmpfile);
599
	} else {
600
		trx_set_detailed_error(trx, "temp file operation failed");
601
	}
602
603
	mutex_exit(&srv_misc_tmpfile_mutex);
604
}
605
606
/*************************************************************************
607
Reports a foreign key error associated with an update or a delete of a
608
parent table index entry. */
609
static
610
void
611
row_ins_foreign_report_err(
612
/*=======================*/
613
	const char*	errstr,		/* in: error string from the viewpoint
614
					of the parent table */
615
	que_thr_t*	thr,		/* in: query thread whose run_node
616
					is an update node */
617
	dict_foreign_t*	foreign,	/* in: foreign key constraint */
618
	rec_t*		rec,		/* in: a matching index record in the
619
					child table */
620
	dtuple_t*	entry)		/* in: index entry in the parent
621
					table */
622
{
623
	FILE*	ef	= dict_foreign_err_file;
624
	trx_t*	trx	= thr_get_trx(thr);
625
626
	row_ins_set_detailed(trx, foreign);
627
628
	mutex_enter(&dict_foreign_err_mutex);
629
	rewind(ef);
630
	ut_print_timestamp(ef);
631
	fputs(" Transaction:\n", ef);
632
	trx_print(ef, trx, 600);
633
634
	fputs("Foreign key constraint fails for table ", ef);
635
	ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
636
	fputs(":\n", ef);
637
	dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign,
638
							TRUE);
639
	putc('\n', ef);
640
	fputs(errstr, ef);
641
	fputs(" in parent table, in index ", ef);
642
	ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
643
	if (entry) {
644
		fputs(" tuple:\n", ef);
645
		dtuple_print(ef, entry);
646
	}
647
	fputs("\nBut in child table ", ef);
648
	ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
649
	fputs(", in index ", ef);
650
	ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
651
	if (rec) {
652
		fputs(", there is a record:\n", ef);
653
		rec_print(ef, rec, foreign->foreign_index);
654
	} else {
655
		fputs(", the record is not available\n", ef);
656
	}
657
	putc('\n', ef);
658
659
	mutex_exit(&dict_foreign_err_mutex);
660
}
661
662
/*************************************************************************
663
Reports a foreign key error to dict_foreign_err_file when we are trying
664
to add an index entry to a child table. Note that the adding may be the result
665
of an update, too. */
666
static
667
void
668
row_ins_foreign_report_add_err(
669
/*===========================*/
670
	trx_t*		trx,		/* in: transaction */
671
	dict_foreign_t*	foreign,	/* in: foreign key constraint */
672
	rec_t*		rec,		/* in: a record in the parent table:
673
					it does not match entry because we
674
					have an error! */
675
	dtuple_t*	entry)		/* in: index entry to insert in the
676
					child table */
677
{
678
	FILE*	ef	= dict_foreign_err_file;
679
680
	row_ins_set_detailed(trx, foreign);
681
682
	mutex_enter(&dict_foreign_err_mutex);
683
	rewind(ef);
684
	ut_print_timestamp(ef);
685
	fputs(" Transaction:\n", ef);
686
	trx_print(ef, trx, 600);
687
	fputs("Foreign key constraint fails for table ", ef);
688
	ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
689
	fputs(":\n", ef);
690
	dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign,
691
							TRUE);
692
	fputs("\nTrying to add in child table, in index ", ef);
693
	ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
694
	if (entry) {
695
		fputs(" tuple:\n", ef);
696
		dtuple_print(ef, entry);
697
	}
698
	fputs("\nBut in parent table ", ef);
699
	ut_print_name(ef, trx, TRUE, foreign->referenced_table_name);
700
	fputs(", in index ", ef);
701
	ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
702
	fputs(",\nthe closest match we can find is record:\n", ef);
703
	if (rec && page_rec_is_supremum(rec)) {
704
		/* If the cursor ended on a supremum record, it is better
705
		to report the previous record in the error message, so that
706
		the user gets a more descriptive error message. */
707
		rec = page_rec_get_prev(rec);
708
	}
709
710
	if (rec) {
711
		rec_print(ef, rec, foreign->referenced_index);
712
	}
713
	putc('\n', ef);
714
715
	mutex_exit(&dict_foreign_err_mutex);
716
}
717
718
/*************************************************************************
719
Invalidate the query cache for the given table. */
720
static
721
void
722
row_ins_invalidate_query_cache(
723
/*===========================*/
724
	que_thr_t*	thr,		/* in: query thread whose run_node
725
					is an update node */
726
	const char*	name)		/* in: table name prefixed with
727
					database name and a '/' character */
728
{
729
	char*	buf;
730
	char*	ptr;
731
	ulint	len = strlen(name) + 1;
732
733
	buf = mem_strdupl(name, len);
734
735
	ptr = strchr(buf, '/');
736
	ut_a(ptr);
737
	*ptr = '\0';
738
739
	/* We call a function in ha_innodb.cc */
740
#ifndef UNIV_HOTBACKUP
741
	innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
742
#endif
743
	mem_free(buf);
744
}
745
746
/*************************************************************************
747
Perform referential actions or checks when a parent row is deleted or updated
748
and the constraint had an ON DELETE or ON UPDATE condition which was not
749
RESTRICT. */
750
static
751
ulint
752
row_ins_foreign_check_on_constraint(
753
/*================================*/
754
					/* out: DB_SUCCESS, DB_LOCK_WAIT,
755
					or error code */
756
	que_thr_t*	thr,		/* in: query thread whose run_node
757
					is an update node */
758
	dict_foreign_t*	foreign,	/* in: foreign key constraint whose
759
					type is != 0 */
760
	btr_pcur_t*	pcur,		/* in: cursor placed on a matching
761
					index record in the child table */
762
	dtuple_t*	entry,		/* in: index entry in the parent
763
					table */
764
	mtr_t*		mtr)		/* in: mtr holding the latch of pcur
765
					page */
766
{
767
	upd_node_t*	node;
768
	upd_node_t*	cascade;
769
	dict_table_t*	table		= foreign->foreign_table;
770
	dict_index_t*	index;
771
	dict_index_t*	clust_index;
772
	dtuple_t*	ref;
773
	mem_heap_t*	upd_vec_heap	= NULL;
774
	rec_t*		rec;
775
	rec_t*		clust_rec;
776
	upd_t*		update;
777
	ulint		n_to_update;
778
	ulint		err;
779
	ulint		i;
780
	trx_t*		trx;
781
	mem_heap_t*	tmp_heap	= NULL;
782
783
	ut_a(thr);
784
	ut_a(foreign);
785
	ut_a(pcur);
786
	ut_a(mtr);
787
788
	trx = thr_get_trx(thr);
789
790
	/* Since we are going to delete or update a row, we have to invalidate
791
	the MySQL query cache for table. A deadlock of threads is not possible
792
	here because the caller of this function does not hold any latches with
793
	the sync0sync.h rank above the kernel mutex. The query cache mutex has
794
	a rank just above the kernel mutex. */
795
796
	row_ins_invalidate_query_cache(thr, table->name);
797
798
	node = thr->run_node;
799
800
	if (node->is_delete && 0 == (foreign->type
801
				     & (DICT_FOREIGN_ON_DELETE_CASCADE
802
					| DICT_FOREIGN_ON_DELETE_SET_NULL))) {
803
804
		row_ins_foreign_report_err("Trying to delete",
805
					   thr, foreign,
806
					   btr_pcur_get_rec(pcur), entry);
807
808
		return(DB_ROW_IS_REFERENCED);
809
	}
810
811
	if (!node->is_delete && 0 == (foreign->type
812
				      & (DICT_FOREIGN_ON_UPDATE_CASCADE
813
					 | DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
814
815
		/* This is an UPDATE */
816
817
		row_ins_foreign_report_err("Trying to update",
818
					   thr, foreign,
819
					   btr_pcur_get_rec(pcur), entry);
820
821
		return(DB_ROW_IS_REFERENCED);
822
	}
823
824
	if (node->cascade_node == NULL) {
825
		/* Extend our query graph by creating a child to current
826
		update node. The child is used in the cascade or set null
827
		operation. */
828
829
		node->cascade_heap = mem_heap_create(128);
830
		node->cascade_node = row_create_update_node_for_mysql(
831
			table, node->cascade_heap);
832
		que_node_set_parent(node->cascade_node, node);
833
	}
834
835
	/* Initialize cascade_node to do the operation we want. Note that we
836
	use the SAME cascade node to do all foreign key operations of the
837
	SQL DELETE: the table of the cascade node may change if there are
838
	several child tables to the table where the delete is done! */
839
840
	cascade = node->cascade_node;
841
842
	cascade->table = table;
843
844
	cascade->foreign = foreign;
845
846
	if (node->is_delete
847
	    && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)) {
848
		cascade->is_delete = TRUE;
849
	} else {
850
		cascade->is_delete = FALSE;
851
852
		if (foreign->n_fields > cascade->update_n_fields) {
853
			/* We have to make the update vector longer */
854
855
			cascade->update = upd_create(foreign->n_fields,
856
						     node->cascade_heap);
857
			cascade->update_n_fields = foreign->n_fields;
858
		}
859
	}
860
861
	/* We do not allow cyclic cascaded updating (DELETE is allowed,
862
	but not UPDATE) of the same table, as this can lead to an infinite
863
	cycle. Check that we are not updating the same table which is
864
	already being modified in this cascade chain. We have to check
865
	this also because the modification of the indexes of a 'parent'
866
	table may still be incomplete, and we must avoid seeing the indexes
867
	of the parent table in an inconsistent state! */
868
869
	if (!cascade->is_delete
870
	    && row_ins_cascade_ancestor_updates_table(cascade, table)) {
871
872
		/* We do not know if this would break foreign key
873
		constraints, but play safe and return an error */
874
875
		err = DB_ROW_IS_REFERENCED;
876
877
		row_ins_foreign_report_err(
878
			"Trying an update, possibly causing a cyclic"
879
			" cascaded update\n"
880
			"in the child table,", thr, foreign,
881
			btr_pcur_get_rec(pcur), entry);
882
883
		goto nonstandard_exit_func;
884
	}
885
886
	if (row_ins_cascade_n_ancestors(cascade) >= 15) {
887
		err = DB_ROW_IS_REFERENCED;
888
889
		row_ins_foreign_report_err(
890
			"Trying a too deep cascaded delete or update\n",
891
			thr, foreign, btr_pcur_get_rec(pcur), entry);
892
893
		goto nonstandard_exit_func;
894
	}
895
896
	index = btr_pcur_get_btr_cur(pcur)->index;
897
898
	ut_a(index == foreign->foreign_index);
899
900
	rec = btr_pcur_get_rec(pcur);
901
902
	if (index->type & DICT_CLUSTERED) {
903
		/* pcur is already positioned in the clustered index of
904
		the child table */
905
906
		clust_index = index;
907
		clust_rec = rec;
908
	} else {
909
		/* We have to look for the record in the clustered index
910
		in the child table */
911
912
		clust_index = dict_table_get_first_index(table);
913
914
		tmp_heap = mem_heap_create(256);
915
916
		ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec,
917
					tmp_heap);
918
		btr_pcur_open_with_no_init(clust_index, ref,
919
					   PAGE_CUR_LE, BTR_SEARCH_LEAF,
920
					   cascade->pcur, 0, mtr);
921
922
		clust_rec = btr_pcur_get_rec(cascade->pcur);
923
924
		if (!page_rec_is_user_rec(clust_rec)
925
		    || btr_pcur_get_low_match(cascade->pcur)
926
		    < dict_index_get_n_unique(clust_index)) {
927
928
			fputs("InnoDB: error in cascade of a foreign key op\n"
929
			      "InnoDB: ", stderr);
930
			dict_index_name_print(stderr, trx, index);
931
932
			fputs("\n"
933
			      "InnoDB: record ", stderr);
934
			rec_print(stderr, rec, index);
935
			fputs("\n"
936
			      "InnoDB: clustered record ", stderr);
937
			rec_print(stderr, clust_rec, clust_index);
938
			fputs("\n"
939
			      "InnoDB: Submit a detailed bug report to"
940
			      " http://bugs.mysql.com\n", stderr);
941
942
			err = DB_SUCCESS;
943
944
			goto nonstandard_exit_func;
945
		}
946
	}
947
948
	/* Set an X-lock on the row to delete or update in the child table */
949
950
	err = lock_table(0, table, LOCK_IX, thr);
951
952
	if (err == DB_SUCCESS) {
953
		/* Here it suffices to use a LOCK_REC_NOT_GAP type lock;
954
		we already have a normal shared lock on the appropriate
955
		gap if the search criterion was not unique */
956
957
		err = lock_clust_rec_read_check_and_lock_alt(
958
			0, clust_rec, clust_index, LOCK_X, LOCK_REC_NOT_GAP,
959
			thr);
960
	}
961
962
	if (err != DB_SUCCESS) {
963
964
		goto nonstandard_exit_func;
965
	}
966
967
	if (rec_get_deleted_flag(clust_rec, dict_table_is_comp(table))) {
968
		/* This can happen if there is a circular reference of
969
		rows such that cascading delete comes to delete a row
970
		already in the process of being delete marked */
971
		err = DB_SUCCESS;
972
973
		goto nonstandard_exit_func;
974
	}
975
976
	if ((node->is_delete
977
	     && (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL))
978
	    || (!node->is_delete
979
		&& (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
980
981
		/* Build the appropriate update vector which sets
982
		foreign->n_fields first fields in rec to SQL NULL */
983
984
		update = cascade->update;
985
986
		update->info_bits = 0;
987
		update->n_fields = foreign->n_fields;
988
989
		for (i = 0; i < foreign->n_fields; i++) {
990
			(update->fields + i)->field_no
991
				= dict_table_get_nth_col_pos(
992
					table,
993
					dict_index_get_nth_col_no(index, i));
994
			(update->fields + i)->exp = NULL;
995
			(update->fields + i)->new_val.len = UNIV_SQL_NULL;
996
			(update->fields + i)->new_val.data = NULL;
997
			(update->fields + i)->extern_storage = FALSE;
998
		}
999
	}
1000
1001
	if (!node->is_delete
1002
	    && (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)) {
1003
1004
		/* Build the appropriate update vector which sets changing
1005
		foreign->n_fields first fields in rec to new values */
1006
1007
		upd_vec_heap = mem_heap_create(256);
1008
1009
		n_to_update = row_ins_cascade_calc_update_vec(node, foreign,
1010
							      upd_vec_heap);
1011
		if (n_to_update == ULINT_UNDEFINED) {
1012
			err = DB_ROW_IS_REFERENCED;
1013
1014
			row_ins_foreign_report_err(
1015
				"Trying a cascaded update where the"
1016
				" updated value in the child\n"
1017
				"table would not fit in the length"
1018
				" of the column, or the value would\n"
1019
				"be NULL and the column is"
1020
				" declared as not NULL in the child table,",
1021
				thr, foreign, btr_pcur_get_rec(pcur), entry);
1022
1023
			goto nonstandard_exit_func;
1024
		}
1025
1026
		if (cascade->update->n_fields == 0) {
1027
1028
			/* The update does not change any columns referred
1029
			to in this foreign key constraint: no need to do
1030
			anything */
1031
1032
			err = DB_SUCCESS;
1033
1034
			goto nonstandard_exit_func;
1035
		}
1036
	}
1037
1038
	/* Store pcur position and initialize or store the cascade node
1039
	pcur stored position */
1040
1041
	btr_pcur_store_position(pcur, mtr);
1042
1043
	if (index == clust_index) {
1044
		btr_pcur_copy_stored_position(cascade->pcur, pcur);
1045
	} else {
1046
		btr_pcur_store_position(cascade->pcur, mtr);
1047
	}
1048
1049
	mtr_commit(mtr);
1050
1051
	ut_a(cascade->pcur->rel_pos == BTR_PCUR_ON);
1052
1053
	cascade->state = UPD_NODE_UPDATE_CLUSTERED;
1054
1055
	err = row_update_cascade_for_mysql(thr, cascade,
1056
					   foreign->foreign_table);
1057
1058
	if (foreign->foreign_table->n_foreign_key_checks_running == 0) {
1059
		fprintf(stderr,
1060
			"InnoDB: error: table %s has the counter 0"
1061
			" though there is\n"
1062
			"InnoDB: a FOREIGN KEY check running on it.\n",
1063
			foreign->foreign_table->name);
1064
	}
1065
1066
	/* Release the data dictionary latch for a while, so that we do not
1067
	starve other threads from doing CREATE TABLE etc. if we have a huge
1068
	cascaded operation running. The counter n_foreign_key_checks_running
1069
	will prevent other users from dropping or ALTERing the table when we
1070
	release the latch. */
1071
1072
	row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
1073
	row_mysql_freeze_data_dictionary(thr_get_trx(thr));
1074
1075
	mtr_start(mtr);
1076
1077
	/* Restore pcur position */
1078
1079
	btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
1080
1081
	if (tmp_heap) {
1082
		mem_heap_free(tmp_heap);
1083
	}
1084
1085
	if (upd_vec_heap) {
1086
		mem_heap_free(upd_vec_heap);
1087
	}
1088
1089
	return(err);
1090
1091
nonstandard_exit_func:
1092
	if (tmp_heap) {
1093
		mem_heap_free(tmp_heap);
1094
	}
1095
1096
	if (upd_vec_heap) {
1097
		mem_heap_free(upd_vec_heap);
1098
	}
1099
1100
	btr_pcur_store_position(pcur, mtr);
1101
1102
	mtr_commit(mtr);
1103
	mtr_start(mtr);
1104
1105
	btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
1106
1107
	return(err);
1108
}
1109
1110
/*************************************************************************
1111
Sets a shared lock on a record. Used in locking possible duplicate key
1112
records and also in checking foreign key constraints. */
1113
static
1114
ulint
1115
row_ins_set_shared_rec_lock(
1116
/*========================*/
1117
				/* out: DB_SUCCESS or error code */
1118
	ulint		type,	/* in: LOCK_ORDINARY, LOCK_GAP, or
1119
				LOCK_REC_NOT_GAP type lock */
1120
	rec_t*		rec,	/* in: record */
1121
	dict_index_t*	index,	/* in: index */
1122
	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
1123
	que_thr_t*	thr)	/* in: query thread */
1124
{
1125
	ulint	err;
1126
1127
	ut_ad(rec_offs_validate(rec, index, offsets));
1128
1129
	if (index->type & DICT_CLUSTERED) {
1130
		err = lock_clust_rec_read_check_and_lock(
1131
			0, rec, index, offsets, LOCK_S, type, thr);
1132
	} else {
1133
		err = lock_sec_rec_read_check_and_lock(
1134
			0, rec, index, offsets, LOCK_S, type, thr);
1135
	}
1136
1137
	return(err);
1138
}
1139
1140
#ifndef UNIV_HOTBACKUP
1141
/*************************************************************************
1142
Sets a exclusive lock on a record. Used in locking possible duplicate key
1143
records */
1144
static
1145
ulint
1146
row_ins_set_exclusive_rec_lock(
1147
/*===========================*/
1148
				/* out: DB_SUCCESS or error code */
1149
	ulint		type,	/* in: LOCK_ORDINARY, LOCK_GAP, or
1150
				LOCK_REC_NOT_GAP type lock */
1151
	rec_t*		rec,	/* in: record */
1152
	dict_index_t*	index,	/* in: index */
1153
	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
1154
	que_thr_t*	thr)	/* in: query thread */
1155
{
1156
	ulint	err;
1157
1158
	ut_ad(rec_offs_validate(rec, index, offsets));
1159
1160
	if (index->type & DICT_CLUSTERED) {
1161
		err = lock_clust_rec_read_check_and_lock(
1162
			0, rec, index, offsets, LOCK_X, type, thr);
1163
	} else {
1164
		err = lock_sec_rec_read_check_and_lock(
1165
			0, rec, index, offsets, LOCK_X, type, thr);
1166
	}
1167
1168
	return(err);
1169
}
1170
#endif /* !UNIV_HOTBACKUP */
1171
1172
/*******************************************************************
1173
Checks if foreign key constraint fails for an index entry. Sets shared locks
1174
which lock either the success or the failure of the constraint. NOTE that
1175
the caller must have a shared latch on dict_operation_lock. */
1176
1177
ulint
1178
row_ins_check_foreign_constraint(
1179
/*=============================*/
1180
				/* out: DB_SUCCESS,
1181
				DB_NO_REFERENCED_ROW,
1182
				or DB_ROW_IS_REFERENCED */
1183
	ibool		check_ref,/* in: TRUE if we want to check that
1184
				the referenced table is ok, FALSE if we
1185
				want to to check the foreign key table */
1186
	dict_foreign_t*	foreign,/* in: foreign constraint; NOTE that the
1187
				tables mentioned in it must be in the
1188
				dictionary cache if they exist at all */
1189
	dict_table_t*	table,	/* in: if check_ref is TRUE, then the foreign
1190
				table, else the referenced table */
1191
	dtuple_t*	entry,	/* in: index entry for index */
1192
	que_thr_t*	thr)	/* in: query thread */
1193
{
1194
	upd_node_t*	upd_node;
1195
	dict_table_t*	check_table;
1196
	dict_index_t*	check_index;
1197
	ulint		n_fields_cmp;
1198
	rec_t*		rec;
1199
	btr_pcur_t	pcur;
1200
	ibool		moved;
1201
	int		cmp;
1202
	ulint		err;
1203
	ulint		i;
1204
	mtr_t		mtr;
1205
	trx_t*		trx		= thr_get_trx(thr);
1206
	mem_heap_t*	heap		= NULL;
1207
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1208
	ulint*		offsets		= offsets_;
1209
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1210
1211
run_again:
1212
#ifdef UNIV_SYNC_DEBUG
1213
	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
1214
#endif /* UNIV_SYNC_DEBUG */
1215
1216
	err = DB_SUCCESS;
1217
1218
	if (trx->check_foreigns == FALSE) {
1219
		/* The user has suppressed foreign key checks currently for
1220
		this session */
1221
		goto exit_func;
1222
	}
1223
1224
	/* If any of the foreign key fields in entry is SQL NULL, we
1225
	suppress the foreign key check: this is compatible with Oracle,
1226
	for example */
1227
1228
	for (i = 0; i < foreign->n_fields; i++) {
1229
		if (UNIV_SQL_NULL == dfield_get_len(
1230
			    dtuple_get_nth_field(entry, i))) {
1231
1232
			goto exit_func;
1233
		}
1234
	}
1235
1236
	if (que_node_get_type(thr->run_node) == QUE_NODE_UPDATE) {
1237
		upd_node = thr->run_node;
1238
1239
		if (!(upd_node->is_delete) && upd_node->foreign == foreign) {
1240
			/* If a cascaded update is done as defined by a
1241
			foreign key constraint, do not check that
1242
			constraint for the child row. In ON UPDATE CASCADE
1243
			the update of the parent row is only half done when
1244
			we come here: if we would check the constraint here
1245
			for the child row it would fail.
1246
1247
			A QUESTION remains: if in the child table there are
1248
			several constraints which refer to the same parent
1249
			table, we should merge all updates to the child as
1250
			one update? And the updates can be contradictory!
1251
			Currently we just perform the update associated
1252
			with each foreign key constraint, one after
1253
			another, and the user has problems predicting in
1254
			which order they are performed. */
1255
1256
			goto exit_func;
1257
		}
1258
	}
1259
1260
	if (check_ref) {
1261
		check_table = foreign->referenced_table;
1262
		check_index = foreign->referenced_index;
1263
	} else {
1264
		check_table = foreign->foreign_table;
1265
		check_index = foreign->foreign_index;
1266
	}
1267
1268
	if (check_table == NULL || check_table->ibd_file_missing) {
1269
		if (check_ref) {
1270
			FILE*	ef = dict_foreign_err_file;
1271
1272
			row_ins_set_detailed(trx, foreign);
1273
1274
			mutex_enter(&dict_foreign_err_mutex);
1275
			rewind(ef);
1276
			ut_print_timestamp(ef);
1277
			fputs(" Transaction:\n", ef);
1278
			trx_print(ef, trx, 600);
1279
			fputs("Foreign key constraint fails for table ", ef);
1280
			ut_print_name(ef, trx, TRUE,
1281
				      foreign->foreign_table_name);
1282
			fputs(":\n", ef);
1283
			dict_print_info_on_foreign_key_in_create_format(
1284
				ef, trx, foreign, TRUE);
1285
			fputs("\nTrying to add to index ", ef);
1286
			ut_print_name(ef, trx, FALSE,
1287
				      foreign->foreign_index->name);
1288
			fputs(" tuple:\n", ef);
1289
			dtuple_print(ef, entry);
1290
			fputs("\nBut the parent table ", ef);
1291
			ut_print_name(ef, trx, TRUE,
1292
				      foreign->referenced_table_name);
1293
			fputs("\nor its .ibd file does"
1294
			      " not currently exist!\n", ef);
1295
			mutex_exit(&dict_foreign_err_mutex);
1296
1297
			err = DB_NO_REFERENCED_ROW;
1298
		}
1299
1300
		goto exit_func;
1301
	}
1302
1303
	ut_a(check_table);
1304
	ut_a(check_index);
1305
1306
	if (check_table != table) {
1307
		/* We already have a LOCK_IX on table, but not necessarily
1308
		on check_table */
1309
1310
		err = lock_table(0, check_table, LOCK_IS, thr);
1311
1312
		if (err != DB_SUCCESS) {
1313
1314
			goto do_possible_lock_wait;
1315
		}
1316
	}
1317
1318
	mtr_start(&mtr);
1319
1320
	/* Store old value on n_fields_cmp */
1321
1322
	n_fields_cmp = dtuple_get_n_fields_cmp(entry);
1323
1324
	dtuple_set_n_fields_cmp(entry, foreign->n_fields);
1325
1326
	btr_pcur_open(check_index, entry, PAGE_CUR_GE,
1327
		      BTR_SEARCH_LEAF, &pcur, &mtr);
1328
1329
	/* Scan index records and check if there is a matching record */
1330
1331
	for (;;) {
1332
		rec = btr_pcur_get_rec(&pcur);
1333
1334
		if (page_rec_is_infimum(rec)) {
1335
1336
			goto next_rec;
1337
		}
1338
1339
		offsets = rec_get_offsets(rec, check_index,
1340
					  offsets, ULINT_UNDEFINED, &heap);
1341
1342
		if (page_rec_is_supremum(rec)) {
1343
1344
			err = row_ins_set_shared_rec_lock(
1345
				LOCK_ORDINARY, rec, check_index, offsets, thr);
1346
			if (err != DB_SUCCESS) {
1347
1348
				break;
1349
			}
1350
1351
			goto next_rec;
1352
		}
1353
1354
		cmp = cmp_dtuple_rec(entry, rec, offsets);
1355
1356
		if (cmp == 0) {
1357
			if (rec_get_deleted_flag(rec,
1358
						 rec_offs_comp(offsets))) {
1359
				err = row_ins_set_shared_rec_lock(
1360
					LOCK_ORDINARY, rec, check_index,
1361
					offsets, thr);
1362
				if (err != DB_SUCCESS) {
1363
1364
					break;
1365
				}
1366
			} else {
1367
				/* Found a matching record. Lock only
1368
				a record because we can allow inserts
1369
				into gaps */
1370
1371
				err = row_ins_set_shared_rec_lock(
1372
					LOCK_REC_NOT_GAP, rec, check_index,
1373
					offsets, thr);
1374
1375
				if (err != DB_SUCCESS) {
1376
1377
					break;
1378
				}
1379
1380
				if (check_ref) {
1381
					err = DB_SUCCESS;
1382
1383
					break;
1384
				} else if (foreign->type != 0) {
1385
					/* There is an ON UPDATE or ON DELETE
1386
					condition: check them in a separate
1387
					function */
1388
1389
					err = row_ins_foreign_check_on_constraint(
1390
						thr, foreign, &pcur, entry,
1391
						&mtr);
1392
					if (err != DB_SUCCESS) {
1393
						/* Since reporting a plain
1394
						"duplicate key" error
1395
						message to the user in
1396
						cases where a long CASCADE
1397
						operation would lead to a
1398
						duplicate key in some
1399
						other table is very
1400
						confusing, map duplicate
1401
						key errors resulting from
1402
						FK constraints to a
1403
						separate error code. */
1404
1405
						if (err == DB_DUPLICATE_KEY) {
1406
							err = DB_FOREIGN_DUPLICATE_KEY;
1407
						}
1408
1409
						break;
1410
					}
1411
				} else {
1412
					row_ins_foreign_report_err(
1413
						"Trying to delete or update",
1414
						thr, foreign, rec, entry);
1415
1416
					err = DB_ROW_IS_REFERENCED;
1417
					break;
1418
				}
1419
			}
1420
		}
1421
1422
		if (cmp < 0) {
1423
			err = row_ins_set_shared_rec_lock(
1424
				LOCK_GAP, rec, check_index, offsets, thr);
1425
			if (err != DB_SUCCESS) {
1426
1427
				break;
1428
			}
1429
1430
			if (check_ref) {
1431
				err = DB_NO_REFERENCED_ROW;
1432
				row_ins_foreign_report_add_err(
1433
					trx, foreign, rec, entry);
1434
			} else {
1435
				err = DB_SUCCESS;
1436
			}
1437
1438
			break;
1439
		}
1440
1441
		ut_a(cmp == 0);
1442
next_rec:
1443
		moved = btr_pcur_move_to_next(&pcur, &mtr);
1444
1445
		if (!moved) {
1446
			if (check_ref) {
1447
				rec = btr_pcur_get_rec(&pcur);
1448
				row_ins_foreign_report_add_err(
1449
					trx, foreign, rec, entry);
1450
				err = DB_NO_REFERENCED_ROW;
1451
			} else {
1452
				err = DB_SUCCESS;
1453
			}
1454
1455
			break;
1456
		}
1457
	}
1458
1459
	btr_pcur_close(&pcur);
1460
1461
	mtr_commit(&mtr);
1462
1463
	/* Restore old value */
1464
	dtuple_set_n_fields_cmp(entry, n_fields_cmp);
1465
1466
do_possible_lock_wait:
1467
	if (err == DB_LOCK_WAIT) {
1468
		trx->error_state = err;
1469
1470
		que_thr_stop_for_mysql(thr);
1471
1472
		srv_suspend_mysql_thread(thr);
1473
1474
		if (trx->error_state == DB_SUCCESS) {
1475
1476
			goto run_again;
1477
		}
1478
1479
		err = trx->error_state;
1480
	}
1481
1482
exit_func:
1483
	if (UNIV_LIKELY_NULL(heap)) {
1484
		mem_heap_free(heap);
1485
	}
1486
	return(err);
1487
}
1488
1489
/*******************************************************************
1490
Checks if foreign key constraints fail for an index entry. If index
1491
is not mentioned in any constraint, this function does nothing,
1492
Otherwise does searches to the indexes of referenced tables and
1493
sets shared locks which lock either the success or the failure of
1494
a constraint. */
1495
static
1496
ulint
1497
row_ins_check_foreign_constraints(
1498
/*==============================*/
1499
				/* out: DB_SUCCESS or error code */
1500
	dict_table_t*	table,	/* in: table */
1501
	dict_index_t*	index,	/* in: index */
1502
	dtuple_t*	entry,	/* in: index entry for index */
1503
	que_thr_t*	thr)	/* in: query thread */
1504
{
1505
	dict_foreign_t*	foreign;
1506
	ulint		err;
1507
	trx_t*		trx;
1508
	ibool		got_s_lock	= FALSE;
1509
1510
	trx = thr_get_trx(thr);
1511
1512
	foreign = UT_LIST_GET_FIRST(table->foreign_list);
1513
1514
	while (foreign) {
1515
		if (foreign->foreign_index == index) {
1516
1517
			if (foreign->referenced_table == NULL) {
1518
				dict_table_get(foreign->referenced_table_name,
1519
					       FALSE);
1520
			}
1521
1522
			if (0 == trx->dict_operation_lock_mode) {
1523
				got_s_lock = TRUE;
1524
1525
				row_mysql_freeze_data_dictionary(trx);
1526
			}
1527
1528
			if (foreign->referenced_table) {
1529
				mutex_enter(&(dict_sys->mutex));
1530
1531
				(foreign->referenced_table
1532
				 ->n_foreign_key_checks_running)++;
1533
1534
				mutex_exit(&(dict_sys->mutex));
1535
			}
1536
1537
			/* NOTE that if the thread ends up waiting for a lock
1538
			we will release dict_operation_lock temporarily!
1539
			But the counter on the table protects the referenced
1540
			table from being dropped while the check is running. */
1541
1542
			err = row_ins_check_foreign_constraint(
1543
				TRUE, foreign, table, entry, thr);
1544
1545
			if (foreign->referenced_table) {
1546
				mutex_enter(&(dict_sys->mutex));
1547
1548
				ut_a(foreign->referenced_table
1549
				     ->n_foreign_key_checks_running > 0);
1550
				(foreign->referenced_table
1551
				 ->n_foreign_key_checks_running)--;
1552
1553
				mutex_exit(&(dict_sys->mutex));
1554
			}
1555
1556
			if (got_s_lock) {
1557
				row_mysql_unfreeze_data_dictionary(trx);
1558
			}
1559
1560
			if (err != DB_SUCCESS) {
1561
				return(err);
1562
			}
1563
		}
1564
1565
		foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
1566
	}
1567
1568
	return(DB_SUCCESS);
1569
}
1570
1571
#ifndef UNIV_HOTBACKUP
1572
/*******************************************************************
1573
Checks if a unique key violation to rec would occur at the index entry
1574
insert. */
1575
static
1576
ibool
1577
row_ins_dupl_error_with_rec(
1578
/*========================*/
1579
				/* out: TRUE if error */
1580
	rec_t*		rec,	/* in: user record; NOTE that we assume
1581
				that the caller already has a record lock on
1582
				the record! */
1583
	dtuple_t*	entry,	/* in: entry to insert */
1584
	dict_index_t*	index,	/* in: index */
1585
	const ulint*	offsets)/* in: rec_get_offsets(rec, index) */
1586
{
1587
	ulint	matched_fields;
1588
	ulint	matched_bytes;
1589
	ulint	n_unique;
1590
	ulint	i;
1591
1592
	ut_ad(rec_offs_validate(rec, index, offsets));
1593
1594
	n_unique = dict_index_get_n_unique(index);
1595
1596
	matched_fields = 0;
1597
	matched_bytes = 0;
1598
1599
	cmp_dtuple_rec_with_match(entry, rec, offsets,
1600
				  &matched_fields, &matched_bytes);
1601
1602
	if (matched_fields < n_unique) {
1603
1604
		return(FALSE);
1605
	}
1606
1607
	/* In a unique secondary index we allow equal key values if they
1608
	contain SQL NULLs */
1609
1610
	if (!(index->type & DICT_CLUSTERED)) {
1611
1612
		for (i = 0; i < n_unique; i++) {
1613
			if (UNIV_SQL_NULL == dfield_get_len(
1614
				    dtuple_get_nth_field(entry, i))) {
1615
1616
				return(FALSE);
1617
			}
1618
		}
1619
	}
1620
1621
	return(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
1622
}
1623
#endif /* !UNIV_HOTBACKUP */
1624
1625
/*******************************************************************
1626
Scans a unique non-clustered index at a given index entry to determine
1627
whether a uniqueness violation has occurred for the key value of the entry.
1628
Set shared locks on possible duplicate records. */
1629
static
1630
ulint
1631
row_ins_scan_sec_index_for_duplicate(
1632
/*=================================*/
1633
				/* out: DB_SUCCESS, DB_DUPLICATE_KEY, or
1634
				DB_LOCK_WAIT */
1635
	dict_index_t*	index,	/* in: non-clustered unique index */
1636
	dtuple_t*	entry,	/* in: index entry */
1637
	que_thr_t*	thr)	/* in: query thread */
1638
{
1639
#ifndef UNIV_HOTBACKUP
1640
	ulint		n_unique;
1641
	ulint		i;
1642
	int		cmp;
1643
	ulint		n_fields_cmp;
1644
	rec_t*		rec;
1645
	btr_pcur_t	pcur;
1646
	ulint		err		= DB_SUCCESS;
1647
	ibool		moved;
1648
	unsigned	allow_duplicates;
1649
	mtr_t		mtr;
1650
	mem_heap_t*	heap		= NULL;
1651
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1652
	ulint*		offsets		= offsets_;
1653
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1654
1655
	n_unique = dict_index_get_n_unique(index);
1656
1657
	/* If the secondary index is unique, but one of the fields in the
1658
	n_unique first fields is NULL, a unique key violation cannot occur,
1659
	since we define NULL != NULL in this case */
1660
1661
	for (i = 0; i < n_unique; i++) {
1662
		if (UNIV_SQL_NULL == dfield_get_len(
1663
			    dtuple_get_nth_field(entry, i))) {
1664
1665
			return(DB_SUCCESS);
1666
		}
1667
	}
1668
1669
	mtr_start(&mtr);
1670
1671
	/* Store old value on n_fields_cmp */
1672
1673
	n_fields_cmp = dtuple_get_n_fields_cmp(entry);
1674
1675
	dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique(index));
1676
1677
	btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
1678
1679
	allow_duplicates = thr_get_trx(thr)->duplicates & TRX_DUP_IGNORE;
1680
1681
	/* Scan index records and check if there is a duplicate */
1682
1683
	for (;;) {
1684
		rec = btr_pcur_get_rec(&pcur);
1685
1686
		if (page_rec_is_infimum(rec)) {
1687
1688
			goto next_rec;
1689
		}
1690
1691
		offsets = rec_get_offsets(rec, index, offsets,
1692
					  ULINT_UNDEFINED, &heap);
1693
1694
		if (allow_duplicates) {
1695
1696
			/* If the SQL-query will update or replace
1697
			duplicate key we will take X-lock for
1698
			duplicates ( REPLACE, LOAD DATAFILE REPLACE,
1699
			INSERT ON DUPLICATE KEY UPDATE). */
1700
1701
			err = row_ins_set_exclusive_rec_lock(
1702
				LOCK_ORDINARY, rec, index, offsets, thr);
1703
		} else {
1704
1705
			err = row_ins_set_shared_rec_lock(
1706
				LOCK_ORDINARY, rec, index, offsets, thr);
1707
		}
1708
1709
		if (err != DB_SUCCESS) {
1710
1711
			break;
1712
		}
1713
1714
		if (page_rec_is_supremum(rec)) {
1715
1716
			goto next_rec;
1717
		}
1718
1719
		cmp = cmp_dtuple_rec(entry, rec, offsets);
1720
1721
		if (cmp == 0) {
1722
			if (row_ins_dupl_error_with_rec(rec, entry,
1723
							index, offsets)) {
1724
				err = DB_DUPLICATE_KEY;
1725
1726
				thr_get_trx(thr)->error_info = index;
1727
1728
				break;
1729
			}
1730
		}
1731
1732
		if (cmp < 0) {
1733
			break;
1734
		}
1735
1736
		ut_a(cmp == 0);
1737
next_rec:
1738
		moved = btr_pcur_move_to_next(&pcur, &mtr);
1739
1740
		if (!moved) {
1741
			break;
1742
		}
1743
	}
1744
1745
	if (UNIV_LIKELY_NULL(heap)) {
1746
		mem_heap_free(heap);
1747
	}
1748
	mtr_commit(&mtr);
1749
1750
	/* Restore old value */
1751
	dtuple_set_n_fields_cmp(entry, n_fields_cmp);
1752
1753
	return(err);
1754
#else /* UNIV_HOTBACKUP */
1755
	/* This function depends on MySQL code that is not included in
1756
	InnoDB Hot Backup builds.  Besides, this function should never
1757
	be called in InnoDB Hot Backup. */
1758
	ut_error;
1759
	return(DB_FAIL);
1760
#endif /* UNIV_HOTBACKUP */
1761
}
1762
1763
/*******************************************************************
1764
Checks if a unique key violation error would occur at an index entry
1765
insert. Sets shared locks on possible duplicate records. Works only
1766
for a clustered index! */
1767
static
1768
ulint
1769
row_ins_duplicate_error_in_clust(
1770
/*=============================*/
1771
				/* out: DB_SUCCESS if no error,
1772
				DB_DUPLICATE_KEY if error, DB_LOCK_WAIT if we
1773
				have to wait for a lock on a possible
1774
				duplicate record */
1775
	btr_cur_t*	cursor,	/* in: B-tree cursor */
1776
	dtuple_t*	entry,	/* in: entry to insert */
1777
	que_thr_t*	thr,	/* in: query thread */
1778
	mtr_t*		mtr)	/* in: mtr */
1779
{
1780
#ifndef UNIV_HOTBACKUP
1781
	ulint	err;
1782
	rec_t*	rec;
1783
	ulint	n_unique;
1784
	trx_t*	trx		= thr_get_trx(thr);
1785
	mem_heap_t*heap		= NULL;
1786
	ulint	offsets_[REC_OFFS_NORMAL_SIZE];
1787
	ulint*	offsets		= offsets_;
1788
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1789
1790
	UT_NOT_USED(mtr);
1791
1792
	ut_a(cursor->index->type & DICT_CLUSTERED);
1793
	ut_ad(cursor->index->type & DICT_UNIQUE);
1794
1795
	/* NOTE: For unique non-clustered indexes there may be any number
1796
	of delete marked records with the same value for the non-clustered
1797
	index key (remember multiversioning), and which differ only in
1798
	the row refererence part of the index record, containing the
1799
	clustered index key fields. For such a secondary index record,
1800
	to avoid race condition, we must FIRST do the insertion and after
1801
	that check that the uniqueness condition is not breached! */
1802
1803
	/* NOTE: A problem is that in the B-tree node pointers on an
1804
	upper level may match more to the entry than the actual existing
1805
	user records on the leaf level. So, even if low_match would suggest
1806
	that a duplicate key violation may occur, this may not be the case. */
1807
1808
	n_unique = dict_index_get_n_unique(cursor->index);
1809
1810
	if (cursor->low_match >= n_unique) {
1811
1812
		rec = btr_cur_get_rec(cursor);
1813
1814
		if (!page_rec_is_infimum(rec)) {
1815
			offsets = rec_get_offsets(rec, cursor->index, offsets,
1816
						  ULINT_UNDEFINED, &heap);
1817
1818
			/* We set a lock on the possible duplicate: this
1819
			is needed in logical logging of MySQL to make
1820
			sure that in roll-forward we get the same duplicate
1821
			errors as in original execution */
1822
1823
			if (trx->duplicates & TRX_DUP_IGNORE) {
1824
1825
				/* If the SQL-query will update or replace
1826
				duplicate key we will take X-lock for
1827
				duplicates ( REPLACE, LOAD DATAFILE REPLACE,
1828
				INSERT ON DUPLICATE KEY UPDATE). */
1829
1830
				err = row_ins_set_exclusive_rec_lock(
1831
					LOCK_REC_NOT_GAP, rec,
1832
					cursor->index, offsets, thr);
1833
			} else {
1834
1835
				err = row_ins_set_shared_rec_lock(
1836
					LOCK_REC_NOT_GAP, rec,
1837
					cursor->index, offsets, thr);
1838
			}
1839
1840
			if (err != DB_SUCCESS) {
1841
				goto func_exit;
1842
			}
1843
1844
			if (row_ins_dupl_error_with_rec(
1845
				    rec, entry, cursor->index, offsets)) {
1846
				trx->error_info = cursor->index;
1847
				err = DB_DUPLICATE_KEY;
1848
				goto func_exit;
1849
			}
1850
		}
1851
	}
1852
1853
	if (cursor->up_match >= n_unique) {
1854
1855
		rec = page_rec_get_next(btr_cur_get_rec(cursor));
1856
1857
		if (!page_rec_is_supremum(rec)) {
1858
			offsets = rec_get_offsets(rec, cursor->index, offsets,
1859
						  ULINT_UNDEFINED, &heap);
1860
1861
			if (trx->duplicates & TRX_DUP_IGNORE) {
1862
1863
				/* If the SQL-query will update or replace
1864
				duplicate key we will take X-lock for
1865
				duplicates ( REPLACE, LOAD DATAFILE REPLACE,
1866
				INSERT ON DUPLICATE KEY UPDATE). */
1867
1868
				err = row_ins_set_exclusive_rec_lock(
1869
					LOCK_REC_NOT_GAP, rec,
1870
					cursor->index, offsets, thr);
1871
			} else {
1872
1873
				err = row_ins_set_shared_rec_lock(
1874
					LOCK_REC_NOT_GAP, rec,
1875
					cursor->index, offsets, thr);
1876
			}
1877
1878
			if (err != DB_SUCCESS) {
1879
				goto func_exit;
1880
			}
1881
1882
			if (row_ins_dupl_error_with_rec(
1883
				    rec, entry, cursor->index, offsets)) {
1884
				trx->error_info = cursor->index;
1885
				err = DB_DUPLICATE_KEY;
1886
				goto func_exit;
1887
			}
1888
		}
1889
1890
		ut_a(!(cursor->index->type & DICT_CLUSTERED));
1891
		/* This should never happen */
1892
	}
1893
1894
	err = DB_SUCCESS;
1895
func_exit:
1896
	if (UNIV_LIKELY_NULL(heap)) {
1897
		mem_heap_free(heap);
1898
	}
1899
	return(err);
1900
#else /* UNIV_HOTBACKUP */
1901
	/* This function depends on MySQL code that is not included in
1902
	InnoDB Hot Backup builds.  Besides, this function should never
1903
	be called in InnoDB Hot Backup. */
1904
	ut_error;
1905
	return(DB_FAIL);
1906
#endif /* UNIV_HOTBACKUP */
1907
}
1908
1909
/*******************************************************************
1910
Checks if an index entry has long enough common prefix with an existing
1911
record so that the intended insert of the entry must be changed to a modify of
1912
the existing record. In the case of a clustered index, the prefix must be
1913
n_unique fields long, and in the case of a secondary index, all fields must be
1914
equal. */
1915
UNIV_INLINE
1916
ulint
1917
row_ins_must_modify(
1918
/*================*/
1919
				/* out: 0 if no update, ROW_INS_PREV if
1920
				previous should be updated; currently we
1921
				do the search so that only the low_match
1922
				record can match enough to the search tuple,
1923
				not the next record */
1924
	btr_cur_t*	cursor)	/* in: B-tree cursor */
1925
{
1926
	ulint	enough_match;
1927
	rec_t*	rec;
1928
1929
	/* NOTE: (compare to the note in row_ins_duplicate_error) Because node
1930
	pointers on upper levels of the B-tree may match more to entry than
1931
	to actual user records on the leaf level, we have to check if the
1932
	candidate record is actually a user record. In a clustered index
1933
	node pointers contain index->n_unique first fields, and in the case
1934
	of a secondary index, all fields of the index. */
1935
1936
	enough_match = dict_index_get_n_unique_in_tree(cursor->index);
1937
1938
	if (cursor->low_match >= enough_match) {
1939
1940
		rec = btr_cur_get_rec(cursor);
1941
1942
		if (!page_rec_is_infimum(rec)) {
1943
1944
			return(ROW_INS_PREV);
1945
		}
1946
	}
1947
1948
	return(0);
1949
}
1950
1951
/*******************************************************************
1952
Tries to insert an index entry to an index. If the index is clustered
1953
and a record with the same unique key is found, the other record is
1954
necessarily marked deleted by a committed transaction, or a unique key
1955
violation error occurs. The delete marked record is then updated to an
1956
existing record, and we must write an undo log record on the delete
1957
marked record. If the index is secondary, and a record with exactly the
1958
same fields is found, the other record is necessarily marked deleted.
1959
It is then unmarked. Otherwise, the entry is just inserted to the index. */
1960
1961
ulint
1962
row_ins_index_entry_low(
1963
/*====================*/
1964
				/* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
1965
				if pessimistic retry needed, or error code */
1966
	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
1967
				depending on whether we wish optimistic or
1968
				pessimistic descent down the index tree */
1969
	dict_index_t*	index,	/* in: index */
1970
	dtuple_t*	entry,	/* in: index entry to insert */
1971
	ulint*		ext_vec,/* in: array containing field numbers of
1972
				externally stored fields in entry, or NULL */
1973
	ulint		n_ext_vec,/* in: number of fields in ext_vec */
1974
	que_thr_t*	thr)	/* in: query thread */
1975
{
1976
	btr_cur_t	cursor;
1977
	ulint		ignore_sec_unique	= 0;
1978
	ulint		modify = 0; /* remove warning */
1979
	rec_t*		insert_rec;
1980
	rec_t*		rec;
1981
	ulint		err;
1982
	ulint		n_unique;
1983
	big_rec_t*	big_rec			= NULL;
1984
	mtr_t		mtr;
1985
	mem_heap_t*	heap			= NULL;
1986
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1987
	ulint*		offsets			= offsets_;
1988
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
1989
1990
	log_free_check();
1991
1992
	mtr_start(&mtr);
1993
1994
	cursor.thr = thr;
1995
1996
	/* Note that we use PAGE_CUR_LE as the search mode, because then
1997
	the function will return in both low_match and up_match of the
1998
	cursor sensible values */
1999
2000
	if (!(thr_get_trx(thr)->check_unique_secondary)) {
2001
		ignore_sec_unique = BTR_IGNORE_SEC_UNIQUE;
2002
	}
2003
2004
	btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
2005
				    mode | BTR_INSERT | ignore_sec_unique,
2006
				    &cursor, 0, &mtr);
2007
2008
	if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
2009
		/* The insertion was made to the insert buffer already during
2010
		the search: we are done */
2011
2012
		err = DB_SUCCESS;
2013
2014
		goto function_exit;
2015
	}
2016
2017
#ifdef UNIV_DEBUG
2018
	{
2019
		page_t*	page = btr_cur_get_page(&cursor);
2020
		rec_t*	first_rec = page_rec_get_next(
2021
			page_get_infimum_rec(page));
2022
2023
		if (UNIV_LIKELY(first_rec != page_get_supremum_rec(page))) {
2024
			ut_a(rec_get_n_fields(first_rec, index)
2025
			     == dtuple_get_n_fields(entry));
2026
		}
2027
	}
2028
#endif
2029
2030
	n_unique = dict_index_get_n_unique(index);
2031
2032
	if (index->type & DICT_UNIQUE && (cursor.up_match >= n_unique
2033
					  || cursor.low_match >= n_unique)) {
2034
2035
		if (index->type & DICT_CLUSTERED) {
2036
			/* Note that the following may return also
2037
			DB_LOCK_WAIT */
2038
2039
			err = row_ins_duplicate_error_in_clust(
2040
				&cursor, entry, thr, &mtr);
2041
			if (err != DB_SUCCESS) {
2042
2043
				goto function_exit;
2044
			}
2045
		} else {
2046
			mtr_commit(&mtr);
2047
			err = row_ins_scan_sec_index_for_duplicate(
2048
				index, entry, thr);
2049
			mtr_start(&mtr);
2050
2051
			if (err != DB_SUCCESS) {
2052
2053
				goto function_exit;
2054
			}
2055
2056
			/* We did not find a duplicate and we have now
2057
			locked with s-locks the necessary records to
2058
			prevent any insertion of a duplicate by another
2059
			transaction. Let us now reposition the cursor and
2060
			continue the insertion. */
2061
2062
			btr_cur_search_to_nth_level(index, 0, entry,
2063
						    PAGE_CUR_LE,
2064
						    mode | BTR_INSERT,
2065
						    &cursor, 0, &mtr);
2066
		}
2067
	}
2068
2069
	modify = row_ins_must_modify(&cursor);
2070
2071
	if (modify != 0) {
2072
		/* There is already an index entry with a long enough common
2073
		prefix, we must convert the insert into a modify of an
2074
		existing record */
2075
2076
		if (modify == ROW_INS_NEXT) {
2077
			rec = page_rec_get_next(btr_cur_get_rec(&cursor));
2078
2079
			btr_cur_position(index, rec, &cursor);
2080
		}
2081
2082
		if (index->type & DICT_CLUSTERED) {
2083
			err = row_ins_clust_index_entry_by_modify(
2084
				mode, &cursor, &big_rec, entry,
2085
				ext_vec, n_ext_vec, thr, &mtr);
2086
		} else {
2087
			err = row_ins_sec_index_entry_by_modify(
2088
				mode, &cursor, entry, thr, &mtr);
2089
		}
2090
2091
	} else {
2092
		if (mode == BTR_MODIFY_LEAF) {
2093
			err = btr_cur_optimistic_insert(
2094
				0, &cursor, entry, &insert_rec, &big_rec,
2095
				thr, &mtr);
2096
		} else {
2097
			ut_a(mode == BTR_MODIFY_TREE);
2098
			if (buf_LRU_buf_pool_running_out()) {
2099
2100
				err = DB_LOCK_TABLE_FULL;
2101
2102
				goto function_exit;
2103
			}
2104
			err = btr_cur_pessimistic_insert(
2105
				0, &cursor, entry, &insert_rec, &big_rec,
2106
				thr, &mtr);
2107
		}
2108
2109
		if (err == DB_SUCCESS) {
2110
			if (ext_vec) {
2111
				rec_set_field_extern_bits(insert_rec, index,
2112
							  ext_vec, n_ext_vec,
2113
							  &mtr);
2114
			}
2115
		}
2116
	}
2117
2118
function_exit:
2119
	mtr_commit(&mtr);
2120
2121
	if (big_rec) {
2122
		rec_t*		rec;
2123
		mtr_start(&mtr);
2124
2125
		btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
2126
					    BTR_MODIFY_TREE, &cursor, 0, &mtr);
2127
		rec = btr_cur_get_rec(&cursor);
2128
		offsets = rec_get_offsets(rec, index, offsets,
2129
					  ULINT_UNDEFINED, &heap);
2130
2131
		err = btr_store_big_rec_extern_fields(index, rec,
2132
						      offsets, big_rec, &mtr);
2133
2134
		if (modify) {
2135
			dtuple_big_rec_free(big_rec);
2136
		} else {
2137
			dtuple_convert_back_big_rec(index, entry, big_rec);
2138
		}
2139
2140
		mtr_commit(&mtr);
2141
	}
2142
2143
	if (UNIV_LIKELY_NULL(heap)) {
2144
		mem_heap_free(heap);
2145
	}
2146
	return(err);
2147
}
2148
2149
/*******************************************************************
2150
Inserts an index entry to index. Tries first optimistic, then pessimistic
2151
descent down the tree. If the entry matches enough to a delete marked record,
2152
performs the insert by updating or delete unmarking the delete marked
2153
record. */
2154
2155
ulint
2156
row_ins_index_entry(
2157
/*================*/
2158
				/* out: DB_SUCCESS, DB_LOCK_WAIT,
2159
				DB_DUPLICATE_KEY, or some other error code */
2160
	dict_index_t*	index,	/* in: index */
2161
	dtuple_t*	entry,	/* in: index entry to insert */
2162
	ulint*		ext_vec,/* in: array containing field numbers of
2163
				externally stored fields in entry, or NULL */
2164
	ulint		n_ext_vec,/* in: number of fields in ext_vec */
2165
	que_thr_t*	thr)	/* in: query thread */
2166
{
2167
	ulint	err;
2168
2169
	if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
2170
		err = row_ins_check_foreign_constraints(index->table, index,
2171
							entry, thr);
2172
		if (err != DB_SUCCESS) {
2173
2174
			return(err);
2175
		}
2176
	}
2177
2178
	/* Try first optimistic descent to the B-tree */
2179
2180
	err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
2181
				      ext_vec, n_ext_vec, thr);
2182
	if (err != DB_FAIL) {
2183
2184
		return(err);
2185
	}
2186
2187
	/* Try then pessimistic descent to the B-tree */
2188
2189
	err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
2190
				      ext_vec, n_ext_vec, thr);
2191
	return(err);
2192
}
2193
2194
/***************************************************************
2195
Sets the values of the dtuple fields in entry from the values of appropriate
2196
columns in row. */
2197
static
2198
void
2199
row_ins_index_entry_set_vals(
2200
/*=========================*/
2201
	dict_index_t*	index,	/* in: index */
2202
	dtuple_t*	entry,	/* in: index entry to make */
2203
	dtuple_t*	row)	/* in: row */
2204
{
2205
	dict_field_t*	ind_field;
2206
	dfield_t*	field;
2207
	dfield_t*	row_field;
2208
	ulint		n_fields;
2209
	ulint		i;
2210
2211
	ut_ad(entry && row);
2212
2213
	n_fields = dtuple_get_n_fields(entry);
2214
2215
	for (i = 0; i < n_fields; i++) {
2216
		field = dtuple_get_nth_field(entry, i);
2217
		ind_field = dict_index_get_nth_field(index, i);
2218
2219
		row_field = dtuple_get_nth_field(row, ind_field->col->ind);
2220
2221
		/* Check column prefix indexes */
2222
		if (ind_field->prefix_len > 0
2223
		    && dfield_get_len(row_field) != UNIV_SQL_NULL) {
2224
2225
			const	dict_col_t*	col
2226
				= dict_field_get_col(ind_field);
2227
2228
			field->len = dtype_get_at_most_n_mbchars(
2229
				col->prtype, col->mbminlen, col->mbmaxlen,
2230
				ind_field->prefix_len,
2231
				row_field->len, row_field->data);
2232
		} else {
2233
			field->len = row_field->len;
2234
		}
2235
2236
		field->data = row_field->data;
2237
	}
2238
}
2239
2240
/***************************************************************
2241
Inserts a single index entry to the table. */
2242
static
2243
ulint
2244
row_ins_index_entry_step(
2245
/*=====================*/
2246
				/* out: DB_SUCCESS if operation successfully
2247
				completed, else error code or DB_LOCK_WAIT */
2248
	ins_node_t*	node,	/* in: row insert node */
2249
	que_thr_t*	thr)	/* in: query thread */
2250
{
2251
	ulint	err;
2252
2253
	ut_ad(dtuple_check_typed(node->row));
2254
2255
	row_ins_index_entry_set_vals(node->index, node->entry, node->row);
2256
2257
	ut_ad(dtuple_check_typed(node->entry));
2258
2259
	err = row_ins_index_entry(node->index, node->entry, NULL, 0, thr);
2260
2261
	return(err);
2262
}
2263
2264
/***************************************************************
2265
Allocates a row id for row and inits the node->index field. */
2266
UNIV_INLINE
2267
void
2268
row_ins_alloc_row_id_step(
2269
/*======================*/
2270
	ins_node_t*	node)	/* in: row insert node */
2271
{
2272
	dulint	row_id;
2273
2274
	ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
2275
2276
	if (dict_table_get_first_index(node->table)->type & DICT_UNIQUE) {
2277
2278
		/* No row id is stored if the clustered index is unique */
2279
2280
		return;
2281
	}
2282
2283
	/* Fill in row id value to row */
2284
2285
	row_id = dict_sys_get_new_row_id();
2286
2287
	dict_sys_write_row_id(node->row_id_buf, row_id);
2288
}
2289
2290
/***************************************************************
2291
Gets a row to insert from the values list. */
2292
UNIV_INLINE
2293
void
2294
row_ins_get_row_from_values(
2295
/*========================*/
2296
	ins_node_t*	node)	/* in: row insert node */
2297
{
2298
	que_node_t*	list_node;
2299
	dfield_t*	dfield;
2300
	dtuple_t*	row;
2301
	ulint		i;
2302
2303
	/* The field values are copied in the buffers of the select node and
2304
	it is safe to use them until we fetch from select again: therefore
2305
	we can just copy the pointers */
2306
2307
	row = node->row;
2308
2309
	i = 0;
2310
	list_node = node->values_list;
2311
2312
	while (list_node) {
2313
		eval_exp(list_node);
2314
2315
		dfield = dtuple_get_nth_field(row, i);
2316
		dfield_copy_data(dfield, que_node_get_val(list_node));
2317
2318
		i++;
2319
		list_node = que_node_get_next(list_node);
2320
	}
2321
}
2322
2323
/***************************************************************
2324
Gets a row to insert from the select list. */
2325
UNIV_INLINE
2326
void
2327
row_ins_get_row_from_select(
2328
/*========================*/
2329
	ins_node_t*	node)	/* in: row insert node */
2330
{
2331
	que_node_t*	list_node;
2332
	dfield_t*	dfield;
2333
	dtuple_t*	row;
2334
	ulint		i;
2335
2336
	/* The field values are copied in the buffers of the select node and
2337
	it is safe to use them until we fetch from select again: therefore
2338
	we can just copy the pointers */
2339
2340
	row = node->row;
2341
2342
	i = 0;
2343
	list_node = node->select->select_list;
2344
2345
	while (list_node) {
2346
		dfield = dtuple_get_nth_field(row, i);
2347
		dfield_copy_data(dfield, que_node_get_val(list_node));
2348
2349
		i++;
2350
		list_node = que_node_get_next(list_node);
2351
	}
2352
}
2353
2354
/***************************************************************
2355
Inserts a row to a table. */
2356
2357
ulint
2358
row_ins(
2359
/*====*/
2360
				/* out: DB_SUCCESS if operation successfully
2361
				completed, else error code or DB_LOCK_WAIT */
2362
	ins_node_t*	node,	/* in: row insert node */
2363
	que_thr_t*	thr)	/* in: query thread */
2364
{
2365
	ulint	err;
2366
2367
	ut_ad(node && thr);
2368
2369
	if (node->state == INS_NODE_ALLOC_ROW_ID) {
2370
2371
		row_ins_alloc_row_id_step(node);
2372
2373
		node->index = dict_table_get_first_index(node->table);
2374
		node->entry = UT_LIST_GET_FIRST(node->entry_list);
2375
2376
		if (node->ins_type == INS_SEARCHED) {
2377
2378
			row_ins_get_row_from_select(node);
2379
2380
		} else if (node->ins_type == INS_VALUES) {
2381
2382
			row_ins_get_row_from_values(node);
2383
		}
2384
2385
		node->state = INS_NODE_INSERT_ENTRIES;
2386
	}
2387
2388
	ut_ad(node->state == INS_NODE_INSERT_ENTRIES);
2389
2390
	while (node->index != NULL) {
2391
		err = row_ins_index_entry_step(node, thr);
2392
2393
		if (err != DB_SUCCESS) {
2394
2395
			return(err);
2396
		}
2397
2398
		node->index = dict_table_get_next_index(node->index);
2399
		node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
2400
	}
2401
2402
	ut_ad(node->entry == NULL);
2403
2404
	node->state = INS_NODE_ALLOC_ROW_ID;
2405
2406
	return(DB_SUCCESS);
2407
}
2408
2409
/***************************************************************
2410
Inserts a row to a table. This is a high-level function used in SQL execution
2411
graphs. */
2412
2413
que_thr_t*
2414
row_ins_step(
2415
/*=========*/
2416
				/* out: query thread to run next or NULL */
2417
	que_thr_t*	thr)	/* in: query thread */
2418
{
2419
	ins_node_t*	node;
2420
	que_node_t*	parent;
2421
	sel_node_t*	sel_node;
2422
	trx_t*		trx;
2423
	ulint		err;
2424
2425
	ut_ad(thr);
2426
2427
	trx = thr_get_trx(thr);
2428
2429
	trx_start_if_not_started(trx);
2430
2431
	node = thr->run_node;
2432
2433
	ut_ad(que_node_get_type(node) == QUE_NODE_INSERT);
2434
2435
	parent = que_node_get_parent(node);
2436
	sel_node = node->select;
2437
2438
	if (thr->prev_node == parent) {
2439
		node->state = INS_NODE_SET_IX_LOCK;
2440
	}
2441
2442
	/* If this is the first time this node is executed (or when
2443
	execution resumes after wait for the table IX lock), set an
2444
	IX lock on the table and reset the possible select node. MySQL's
2445
	partitioned table code may also call an insert within the same
2446
	SQL statement AFTER it has used this table handle to do a search.
2447
	This happens, for example, when a row update moves it to another
2448
	partition. In that case, we have already set the IX lock on the
2449
	table during the search operation, and there is no need to set
2450
	it again here. But we must write trx->id to node->trx_id_buf. */
2451
2452
	trx_write_trx_id(node->trx_id_buf, trx->id);
2453
2454
	if (node->state == INS_NODE_SET_IX_LOCK) {
2455
2456
		/* It may be that the current session has not yet started
2457
		its transaction, or it has been committed: */
2458
2459
		if (UT_DULINT_EQ(trx->id, node->trx_id)) {
2460
			/* No need to do IX-locking */
2461
2462
			goto same_trx;
2463
		}
2464
2465
		err = lock_table(0, node->table, LOCK_IX, thr);
2466
2467
		if (err != DB_SUCCESS) {
2468
2469
			goto error_handling;
2470
		}
2471
2472
		node->trx_id = trx->id;
2473
same_trx:
2474
		node->state = INS_NODE_ALLOC_ROW_ID;
2475
2476
		if (node->ins_type == INS_SEARCHED) {
2477
			/* Reset the cursor */
2478
			sel_node->state = SEL_NODE_OPEN;
2479
2480
			/* Fetch a row to insert */
2481
2482
			thr->run_node = sel_node;
2483
2484
			return(thr);
2485
		}
2486
	}
2487
2488
	if ((node->ins_type == INS_SEARCHED)
2489
	    && (sel_node->state != SEL_NODE_FETCH)) {
2490
2491
		ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
2492
2493
		/* No more rows to insert */
2494
		thr->run_node = parent;
2495
2496
		return(thr);
2497
	}
2498
2499
	/* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
2500
2501
	err = row_ins(node, thr);
2502
2503
error_handling:
2504
	trx->error_state = err;
2505
2506
	if (err != DB_SUCCESS) {
2507
		/* err == DB_LOCK_WAIT or SQL error detected */
2508
		return(NULL);
2509
	}
2510
2511
	/* DO THE TRIGGER ACTIONS HERE */
2512
2513
	if (node->ins_type == INS_SEARCHED) {
2514
		/* Fetch a row to insert */
2515
2516
		thr->run_node = sel_node;
2517
	} else {
2518
		thr->run_node = que_node_get_parent(node);
2519
	}
2520
2521
	return(thr);
2522
}