1
/******************************************************
2
The transaction lock system
6
Created 5/7/1996 Heikki Tuuri
7
*******************************************************/
9
#define LOCK_MODULE_IMPLEMENTATION
11
#include "lock0lock.h"
12
#include "lock0priv.h"
15
#include "lock0lock.ic"
16
#include "lock0priv.ic"
20
#include "trx0purge.h"
25
/* 2 function prototypes copied from ha_innodb.cc: */
27
/*****************************************************************
28
If you want to print a thd that is not associated with the current thread,
29
you must call this function before reserving the InnoDB kernel_mutex, to
30
protect MySQL from setting thd->query NULL. If you print a thd of the current
31
thread, we know that MySQL cannot modify thd->query, and it is not necessary
32
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
34
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
38
innobase_mysql_prepare_print_arbitrary_thd(void);
39
/*============================================*/
41
/*****************************************************************
42
Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
43
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
47
innobase_mysql_end_print_arbitrary_thd(void);
48
/*========================================*/
50
/* Restricts the length of search we will do in the waits-for
51
graph of transactions */
52
#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
54
/* Restricts the recursion depth of the search we will do in the waits-for
55
graph of transactions */
56
#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200
58
/* When releasing transaction locks, this specifies how often we release
59
the kernel mutex for a moment to give also others access to it */
61
#define LOCK_RELEASE_KERNEL_INTERVAL 1000
63
/* Safety margin when creating a new record lock: this many extra records
64
can be inserted to the page without need to create a lock with a bigger
67
#define LOCK_PAGE_BITMAP_MARGIN 64
69
/* An explicit record lock affects both the record and the gap before it.
70
An implicit x-lock does not affect the gap, it only locks the index
71
record from read or update.
73
If a transaction has modified or inserted an index record, then
74
it owns an implicit x-lock on the record. On a secondary index record,
75
a transaction has an implicit x-lock also if it has modified the
76
clustered index record, the max trx id of the page where the secondary
77
index record resides is >= trx id of the transaction (or database recovery
78
is running), and there are no explicit non-gap lock requests on the
79
secondary index record.
81
This complicated definition for a secondary index comes from the
82
implementation: we want to be able to determine if a secondary index
83
record has an implicit x-lock, just by looking at the present clustered
84
index record, not at the historical versions of the record. The
85
complicated definition can be explained to the user so that there is
86
nondeterminism in the access path when a query is answered: we may,
87
or may not, access the clustered index record and thus may, or may not,
88
bump into an x-lock set there.
90
Different transaction can have conflicting locks set on the gap at the
91
same time. The locks on the gap are purely inhibitive: an insert cannot
92
be made, or a select cursor may have to wait if a different transaction
93
has a conflicting lock on the gap. An x-lock on the gap does not give
94
the right to insert into the gap.
96
An explicit lock can be placed on a user record or the supremum record of
97
a page. The locks on the supremum record are always thought to be of the gap
98
type, though the gap bit is not set. When we perform an update of a record
99
where the size of the record changes, we may temporarily store its explicit
100
locks on the infimum record of the page, though the infimum otherwise never
103
A waiting record lock can also be of the gap type. A waiting lock request
104
can be granted when there is no conflicting mode lock request by another
105
transaction ahead of it in the explicit lock queue.
107
In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
108
It only locks the record it is placed on, not the gap before the record.
109
This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
112
-------------------------------------------------------------------------
113
RULE 1: If there is an implicit x-lock on a record, and there are non-gap
115
lock requests waiting in the queue, then the transaction holding the implicit
116
x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
117
released, we can grant locks to waiting lock requests purely by looking at
118
the explicit lock requests in the queue.
120
RULE 3: Different transactions cannot have conflicting granted non-gap locks
122
on a record at the same time. However, they can have conflicting granted gap
124
RULE 4: If a there is a waiting lock request in a queue, no lock request,
126
gap or not, can be inserted ahead of it in the queue. In record deletes
127
and page splits new gap type locks can be created by the database manager
128
for a transaction, and without rule 4, the waits-for graph of transactions
129
might become cyclic without the database noticing it, as the deadlock check
130
is only performed when a transaction itself requests a lock!
131
-------------------------------------------------------------------------
133
An insert is allowed to a gap if there are no explicit lock requests by
134
other transactions on the next record. It does not matter if these lock
135
requests are granted or waiting, gap bit set or not, with the exception
136
that a gap type request set by another transaction to wait for
137
its turn to do an insert is ignored. On the other hand, an
138
implicit x-lock by another transaction does not prevent an insert, which
139
allows for more concurrency when using an Oracle-style sequence number
140
generator for the primary key with many transactions doing inserts
143
A modify of a record is allowed if the transaction has an x-lock on the
144
record, or if other transactions do not have any non-gap lock requests on the
147
A read of a single user record with a cursor is allowed if the transaction
148
has a non-gap explicit, or an implicit lock on the record, or if the other
149
transactions have no x-lock requests on the record. At a page supremum a
150
read is always allowed.
152
In summary, an implicit lock is seen as a granted x-lock only on the
153
record, not on the gap. An explicit lock with no gap bit set is a lock
154
both on the record and the gap. If the gap bit is set, the lock is only
155
on the gap. Different transaction cannot own conflicting locks on the
156
record at the same time, but they may own conflicting locks on the gap.
157
Granted locks on a record give an access right to the record, but gap type
158
locks just inhibit operations.
160
NOTE: Finding out if some transaction has an implicit x-lock on a secondary
161
index record can be cumbersome. We may have to look at previous versions of
162
the corresponding clustered index record to find out if a delete marked
163
secondary index record was delete marked by an active transaction, not by
166
FACT A: If a transaction has inserted a row, it can delete it any time
167
without need to wait for locks.
169
PROOF: The transaction has an implicit x-lock on every index record inserted
170
for the row, and can thus modify each record without the need to wait. Q.E.D.
172
FACT B: If a transaction has read some result set with a cursor, it can read
173
it again, and retrieves the same result set, if it has not modified the
174
result set in the meantime. Hence, there is no phantom problem. If the
175
biggest record, in the alphabetical order, touched by the cursor is removed,
176
a lock wait may occur, otherwise not.
178
PROOF: When a read cursor proceeds, it sets an s-lock on each user record
179
it passes, and a gap type s-lock on each page supremum. The cursor must
180
wait until it has these locks granted. Then no other transaction can
181
have a granted x-lock on any of the user records, and therefore cannot
182
modify the user records. Neither can any other transaction insert into
183
the gaps which were passed over by the cursor. Page splits and merges,
184
and removal of obsolete versions of records do not affect this, because
185
when a user record or a page supremum is removed, the next record inherits
186
its locks as gap type locks, and therefore blocks inserts to the same gap.
187
Also, if a page supremum is inserted, it inherits its locks from the successor
188
record. When the cursor is positioned again at the start of the result set,
189
the records it will touch on its course are either records it touched
190
during the last pass or new inserted page supremums. It can immediately
191
access all these records, and when it arrives at the biggest record, it
192
notices that the result set is complete. If the biggest record was removed,
193
lock wait can occur because the next record only inherits a gap type lock,
194
and a wait may be needed. Q.E.D. */
196
/* If an index record should be changed or a new inserted, we must check
197
the lock on the record or the next. When a read cursor starts reading,
198
we will set a record level s-lock on each record it passes, except on the
199
initial record on which the cursor is positioned before we start to fetch
200
records. Our index tree search has the convention that the B-tree
201
cursor is positioned BEFORE the first possibly matching record in
202
the search. Optimizations are possible here: if the record is searched
203
on an equality condition to a unique key, we could actually set a special
204
lock on the record, a lock which would not prevent any insert before
205
this record. In the next key locking an x-lock set on a record also
206
prevents inserts just before that record.
207
There are special infimum and supremum records on each page.
208
A supremum record can be locked by a read cursor. This records cannot be
209
updated but the lock prevents insert of a user record to the end of
211
Next key locks will prevent the phantom problem where new rows
212
could appear to SELECT result sets after the select operation has been
213
performed. Prevention of phantoms ensures the serilizability of
215
What should we check if an insert of a new record is wanted?
216
Only the lock on the next record on the same page, because also the
217
supremum record can carry a lock. An s-lock prevents insertion, but
218
what about an x-lock? If it was set by a searched update, then there
219
is implicitly an s-lock, too, and the insert should be prevented.
220
What if our transaction owns an x-lock to the next record, but there is
221
a waiting s-lock request on the next record? If this s-lock was placed
222
by a read cursor moving in the ascending order in the index, we cannot
223
do the insert immediately, because when we finally commit our transaction,
224
the read cursor should see also the new inserted record. So we should
225
move the read cursor backward from the the next record for it to pass over
226
the new inserted record. This move backward may be too cumbersome to
227
implement. If we in this situation just enqueue a second x-lock request
228
for our transaction on the next record, then the deadlock mechanism
229
notices a deadlock between our transaction and the s-lock request
230
transaction. This seems to be an ok solution.
231
We could have the convention that granted explicit record locks,
232
lock the corresponding records from changing, and also lock the gaps
233
before them from inserting. A waiting explicit lock request locks the gap
234
before from inserting. Implicit record x-locks, which we derive from the
235
transaction id in the clustered index record, only lock the record itself
236
from modification, not the gap before it from inserting.
237
How should we store update locks? If the search is done by a unique
238
key, we could just modify the record trx id. Otherwise, we could put a record
239
x-lock on the record. If the update changes ordering fields of the
240
clustered index record, the inserted new record needs no record lock in
241
lock table, the trx id is enough. The same holds for a secondary index
242
record. Searched delete is similar to update.
245
What about waiting lock requests? If a transaction is waiting to make an
246
update to a record which another modified, how does the other transaction
247
know to send the end-lock-wait signal to the waiting transaction? If we have
248
the convention that a transaction may wait for just one lock at a time, how
249
do we preserve it if lock wait ends?
252
Checking the trx id label of a secondary index record. In the case of a
253
modification, not an insert, is this necessary? A secondary index record
254
is modified only by setting or resetting its deleted flag. A secondary index
255
record contains fields to uniquely determine the corresponding clustered
256
index record. A secondary index record is therefore only modified if we
257
also modify the clustered index record, and the trx id checking is done
258
on the clustered index record, before we come to modify the secondary index
259
record. So, in the case of delete marking or unmarking a secondary index
260
record, we do not have to care about trx ids, only the locks in the lock
261
table must be checked. In the case of a select from a secondary index, the
262
trx id is relevant, and in this case we may have to search the clustered
265
PROBLEM: How to update record locks when page is split or merged, or
266
--------------------------------------------------------------------
267
a record is deleted or updated?
268
If the size of fields in a record changes, we perform the update by
269
a delete followed by an insert. How can we retain the locks set or
270
waiting on the record? Because a record lock is indexed in the bitmap
271
by the heap number of the record, when we remove the record from the
272
record list, it is possible still to keep the lock bits. If the page
273
is reorganized, we could make a table of old and new heap numbers,
274
and permute the bitmaps in the locks accordingly. We can add to the
275
table a row telling where the updated record ended. If the update does
276
not require a reorganization of the page, we can simply move the lock
277
bits for the updated record to the position determined by its new heap
278
number (we may have to allocate a new lock, if we run out of the bitmap
280
A more complicated case is the one where the reinsertion of the
281
updated record is done pessimistically, because the structure of the
284
PROBLEM: If a supremum record is removed in a page merge, or a record
285
---------------------------------------------------------------------
286
removed in a purge, what to do to the waiting lock requests? In a split to
287
the right, we just move the lock requests to the new supremum. If a record
288
is removed, we could move the waiting lock request to its inheritor, the
289
next record in the index. But, the next record may already have lock
290
requests on its own queue. A new deadlock check should be made then. Maybe
291
it is easier just to release the waiting transactions. They can then enqueue
292
new lock requests on appropriate records.
294
PROBLEM: When a record is inserted, what locks should it inherit from the
295
-------------------------------------------------------------------------
296
upper neighbor? An insert of a new supremum record in a page split is
297
always possible, but an insert of a new user record requires that the upper
298
neighbor does not have any lock requests by other transactions, granted or
299
waiting, in its lock queue. Solution: We can copy the locks as gap type
300
locks, so that also the waiting locks are transformed to granted gap type
301
locks on the inserted record. */
303
/* LOCK COMPATIBILITY MATRIX
311
* Note that for rows, InnoDB only acquires S or X locks.
312
* For tables, InnoDB normally acquires IS or IX locks.
313
* S or X table locks are only acquired for LOCK TABLES.
314
* Auto-increment (AI) locks are needed because of
315
* statement-level MySQL binlog.
316
* See also lock_mode_compatible().
320
ibool lock_print_waits = FALSE;
321
#endif /* UNIV_DEBUG */
323
/* The lock system */
324
lock_sys_t* lock_sys = NULL;
326
/* We store info on the latest deadlock error to this buffer. InnoDB
327
Monitor will then fetch it and print */
328
ibool lock_deadlock_found = FALSE;
329
FILE* lock_latest_err_file;
331
/* Flags for recursive deadlock search */
332
#define LOCK_VICTIM_IS_START 1
333
#define LOCK_VICTIM_IS_OTHER 2
335
/************************************************************************
336
Checks if a lock request results in a deadlock. */
339
lock_deadlock_occurs(
340
/*=================*/
341
/* out: TRUE if a deadlock was detected and we
342
chose trx as a victim; FALSE if no deadlock, or
343
there was a deadlock, but we chose other
344
transaction(s) as victim(s) */
345
lock_t* lock, /* in: lock the transaction is requesting */
346
trx_t* trx); /* in: transaction */
347
/************************************************************************
348
Looks recursively for a deadlock. */
351
lock_deadlock_recursive(
352
/*====================*/
353
/* out: 0 if no deadlock found,
354
LOCK_VICTIM_IS_START if there was a deadlock
355
and we chose 'start' as the victim,
356
LOCK_VICTIM_IS_OTHER if a deadlock
357
was found and we chose some other trx as a
358
victim: we must do the search again in this
359
last case because there may be another
361
trx_t* start, /* in: recursion starting point */
362
trx_t* trx, /* in: a transaction waiting for a lock */
363
lock_t* wait_lock, /* in: the lock trx is waiting to be granted */
364
ulint* cost, /* in/out: number of calculation steps thus
365
far: if this exceeds LOCK_MAX_N_STEPS_...
366
we return LOCK_VICTIM_IS_START */
367
ulint depth); /* in: recursion depth: if this exceeds
368
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
369
return LOCK_VICTIM_IS_START */
371
/*************************************************************************
372
Gets the nth bit of a record lock. */
375
lock_rec_get_nth_bit(
376
/*=================*/
377
/* out: TRUE if bit set */
378
lock_t* lock, /* in: record lock */
379
ulint i) /* in: index of the bit */
386
ut_ad(lock_get_type(lock) == LOCK_REC);
388
if (i >= lock->un_member.rec_lock.n_bits) {
396
b = (ulint)*((byte*)lock + sizeof(lock_t) + byte_index);
398
return(ut_bit_get_nth(b, bit_index));
401
/*************************************************************************/
403
#define lock_mutex_enter_kernel() mutex_enter(&kernel_mutex)
404
#define lock_mutex_exit_kernel() mutex_exit(&kernel_mutex)
406
/*************************************************************************
407
Checks that a transaction id is sensible, i.e., not in the future. */
410
lock_check_trx_id_sanity(
411
/*=====================*/
412
/* out: TRUE if ok */
413
dulint trx_id, /* in: trx id */
414
rec_t* rec, /* in: user record */
415
dict_index_t* index, /* in: index */
416
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
417
ibool has_kernel_mutex)/* in: TRUE if the caller owns the
422
ut_ad(rec_offs_validate(rec, index, offsets));
424
if (!has_kernel_mutex) {
425
mutex_enter(&kernel_mutex);
428
/* A sanity check: the trx_id in rec must be smaller than the global
431
if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
432
ut_print_timestamp(stderr);
433
fputs(" InnoDB: Error: transaction id associated"
436
rec_print_new(stderr, rec, offsets);
437
fputs("InnoDB: in ", stderr);
438
dict_index_name_print(stderr, NULL, index);
440
"InnoDB: is %lu %lu which is higher than the"
441
" global trx id counter %lu %lu!\n"
442
"InnoDB: The table is corrupt. You have to do"
443
" dump + drop + reimport.\n",
444
(ulong) ut_dulint_get_high(trx_id),
445
(ulong) ut_dulint_get_low(trx_id),
446
(ulong) ut_dulint_get_high(trx_sys->max_trx_id),
447
(ulong) ut_dulint_get_low(trx_sys->max_trx_id));
452
if (!has_kernel_mutex) {
453
mutex_exit(&kernel_mutex);
459
/*************************************************************************
460
Checks that a record is seen in a consistent read. */
463
lock_clust_rec_cons_read_sees(
464
/*==========================*/
465
/* out: TRUE if sees, or FALSE if an earlier
466
version of the record should be retrieved */
467
rec_t* rec, /* in: user record which should be read or
468
passed over by a read cursor */
469
dict_index_t* index, /* in: clustered index */
470
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
471
read_view_t* view) /* in: consistent read view */
475
ut_ad(index->type & DICT_CLUSTERED);
476
ut_ad(page_rec_is_user_rec(rec));
477
ut_ad(rec_offs_validate(rec, index, offsets));
479
/* NOTE that we call this function while holding the search
480
system latch. To obey the latching order we must NOT reserve the
481
kernel mutex here! */
483
trx_id = row_get_rec_trx_id(rec, index, offsets);
485
return(read_view_sees_trx_id(view, trx_id));
488
/*************************************************************************
489
Checks that a non-clustered index record is seen in a consistent read. */
492
lock_sec_rec_cons_read_sees(
493
/*========================*/
494
/* out: TRUE if certainly sees, or FALSE if an
495
earlier version of the clustered index record
496
might be needed: NOTE that a non-clustered
497
index page contains so little information on
498
its modifications that also in the case FALSE,
499
the present version of rec may be the right,
500
but we must check this from the clustered
502
rec_t* rec, /* in: user record which should be read or
503
passed over by a read cursor */
504
dict_index_t* index, /* in: non-clustered index */
505
read_view_t* view) /* in: consistent read view */
511
ut_ad(!(index->type & DICT_CLUSTERED));
512
ut_ad(page_rec_is_user_rec(rec));
514
/* NOTE that we might call this function while holding the search
515
system latch. To obey the latching order we must NOT reserve the
516
kernel mutex here! */
518
if (recv_recovery_is_on()) {
523
max_trx_id = page_get_max_trx_id(buf_frame_align(rec));
525
if (ut_dulint_cmp(max_trx_id, view->up_limit_id) >= 0) {
533
/*************************************************************************
534
Creates the lock system at database start. */
539
ulint n_cells) /* in: number of slots in lock hash table */
541
lock_sys = mem_alloc(sizeof(lock_sys_t));
543
lock_sys->rec_hash = hash_create(n_cells);
545
/* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */
547
lock_latest_err_file = os_file_create_tmpfile();
548
ut_a(lock_latest_err_file);
551
/*************************************************************************
552
Gets the size of a lock struct. */
557
/* out: size in bytes */
559
return((ulint)sizeof(lock_t));
562
/*************************************************************************
563
Gets the mode of a lock. */
569
const lock_t* lock) /* in: lock */
573
return(lock->type_mode & LOCK_MODE_MASK);
576
/*************************************************************************
577
Gets the wait flag of a lock. */
582
/* out: TRUE if waiting */
583
lock_t* lock) /* in: lock */
587
if (lock->type_mode & LOCK_WAIT) {
595
/*************************************************************************
596
Gets the source table of an ALTER TABLE transaction. The table must be
597
covered by an IX or IS table lock. */
602
/* out: the source table of transaction,
603
if it is covered by an IX or IS table lock;
604
dest if there is no source table, and
605
NULL if the transaction is locking more than
606
two tables or an inconsistency is found */
607
trx_t* trx, /* in: transaction */
608
dict_table_t* dest, /* in: destination of ALTER TABLE */
609
ulint* mode) /* out: lock mode of the source table */
617
for (lock = UT_LIST_GET_FIRST(trx->trx_locks);
619
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
620
lock_table_t* tab_lock;
622
if (!(lock_get_type(lock) & LOCK_TABLE)) {
623
/* We are only interested in table locks. */
626
tab_lock = &lock->un_member.tab_lock;
627
if (dest == tab_lock->table) {
628
/* We are not interested in the destination table. */
631
/* This presumably is the source table. */
632
src = tab_lock->table;
633
if (UT_LIST_GET_LEN(src->locks) != 1
634
|| UT_LIST_GET_FIRST(src->locks) != lock) {
635
/* We only support the case when
636
there is only one lock on this table. */
639
} else if (src != tab_lock->table) {
640
/* The transaction is locking more than
641
two tables (src and dest): abort */
645
/* Check that the source table is locked by
646
LOCK_IX or LOCK_IS. */
647
lock_mode = lock_get_mode(lock);
651
if (*mode != LOCK_NONE && *mode != lock_mode) {
652
/* There are multiple locks on src. */
661
/* No source table lock found: flag the situation to caller */
668
/*************************************************************************
669
Determine if the given table is exclusively "owned" by the given
670
transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
674
lock_is_table_exclusive(
675
/*====================*/
676
/* out: TRUE if table is only locked by trx,
677
with LOCK_IX, and possibly LOCK_AUTO_INC */
678
dict_table_t* table, /* in: table */
679
trx_t* trx) /* in: transaction */
686
for (lock = UT_LIST_GET_FIRST(table->locks);
688
lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
689
if (lock->trx != trx) {
690
/* A lock on the table is held
691
by some other transaction. */
695
if (!(lock_get_type(lock) & LOCK_TABLE)) {
696
/* We are interested in table locks only. */
700
switch (lock_get_mode(lock)) {
705
/* It is allowed for trx to hold an
706
auto_increment lock. */
709
/* Other table locks than LOCK_IX are not allowed. */
717
/*************************************************************************
718
Sets the wait flag of a lock and the back pointer in trx to lock. */
721
lock_set_lock_and_trx_wait(
722
/*=======================*/
723
lock_t* lock, /* in: lock */
724
trx_t* trx) /* in: trx */
727
ut_ad(trx->wait_lock == NULL);
729
trx->wait_lock = lock;
730
lock->type_mode = lock->type_mode | LOCK_WAIT;
733
/**************************************************************************
734
The back pointer to a waiting lock request in the transaction is set to NULL
735
and the wait bit in lock type_mode is reset. */
738
lock_reset_lock_and_trx_wait(
739
/*=========================*/
740
lock_t* lock) /* in: record lock */
742
ut_ad((lock->trx)->wait_lock == lock);
743
ut_ad(lock_get_wait(lock));
745
/* Reset the back pointer in trx to this waiting lock request */
747
(lock->trx)->wait_lock = NULL;
748
lock->type_mode = lock->type_mode & ~LOCK_WAIT;
751
/*************************************************************************
752
Gets the gap flag of a record lock. */
757
/* out: TRUE if gap flag set */
758
lock_t* lock) /* in: record lock */
761
ut_ad(lock_get_type(lock) == LOCK_REC);
763
if (lock->type_mode & LOCK_GAP) {
771
/*************************************************************************
772
Gets the LOCK_REC_NOT_GAP flag of a record lock. */
775
lock_rec_get_rec_not_gap(
776
/*=====================*/
777
/* out: TRUE if LOCK_REC_NOT_GAP flag set */
778
lock_t* lock) /* in: record lock */
781
ut_ad(lock_get_type(lock) == LOCK_REC);
783
if (lock->type_mode & LOCK_REC_NOT_GAP) {
791
/*************************************************************************
792
Gets the waiting insert flag of a record lock. */
795
lock_rec_get_insert_intention(
796
/*==========================*/
797
/* out: TRUE if gap flag set */
798
lock_t* lock) /* in: record lock */
801
ut_ad(lock_get_type(lock) == LOCK_REC);
803
if (lock->type_mode & LOCK_INSERT_INTENTION) {
811
/*************************************************************************
812
Calculates if lock mode 1 is stronger or equal to lock mode 2. */
815
lock_mode_stronger_or_eq(
816
/*=====================*/
817
/* out: TRUE if mode1 stronger or equal to mode2 */
818
ulint mode1, /* in: lock mode */
819
ulint mode2) /* in: lock mode */
821
ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
822
|| mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
823
ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
824
|| mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
825
if (mode1 == LOCK_X) {
829
} else if (mode1 == LOCK_AUTO_INC && mode2 == LOCK_AUTO_INC) {
833
} else if (mode1 == LOCK_S
834
&& (mode2 == LOCK_S || mode2 == LOCK_IS)) {
837
} else if (mode1 == LOCK_IS && mode2 == LOCK_IS) {
841
} else if (mode1 == LOCK_IX && (mode2 == LOCK_IX
842
|| mode2 == LOCK_IS)) {
849
/*************************************************************************
850
Calculates if lock mode 1 is compatible with lock mode 2. */
853
lock_mode_compatible(
854
/*=================*/
855
/* out: TRUE if mode1 compatible with mode2 */
856
ulint mode1, /* in: lock mode */
857
ulint mode2) /* in: lock mode */
859
ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
860
|| mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
861
ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
862
|| mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
864
if (mode1 == LOCK_S && (mode2 == LOCK_IS || mode2 == LOCK_S)) {
868
} else if (mode1 == LOCK_X) {
872
} else if (mode1 == LOCK_AUTO_INC && (mode2 == LOCK_IS
873
|| mode2 == LOCK_IX)) {
876
} else if (mode1 == LOCK_IS && (mode2 == LOCK_IS
878
|| mode2 == LOCK_AUTO_INC
879
|| mode2 == LOCK_S)) {
882
} else if (mode1 == LOCK_IX && (mode2 == LOCK_IS
883
|| mode2 == LOCK_AUTO_INC
884
|| mode2 == LOCK_IX)) {
891
/*************************************************************************
892
Checks if a lock request for a new lock has to wait for request lock2. */
895
lock_rec_has_to_wait(
896
/*=================*/
897
/* out: TRUE if new lock has to wait for lock2 to be
899
trx_t* trx, /* in: trx of new lock */
900
ulint type_mode,/* in: precise mode of the new lock to set:
901
LOCK_S or LOCK_X, possibly ORed to
902
LOCK_GAP or LOCK_REC_NOT_GAP, LOCK_INSERT_INTENTION */
903
lock_t* lock2, /* in: another record lock; NOTE that it is assumed
904
that this has a lock bit set on the same record as
905
in the new lock we are setting */
906
ibool lock_is_on_supremum) /* in: TRUE if we are setting the lock
907
on the 'supremum' record of an index
908
page: we know then that the lock request
909
is really for a 'gap' type lock */
912
ut_ad(lock_get_type(lock2) == LOCK_REC);
914
if (trx != lock2->trx
915
&& !lock_mode_compatible(LOCK_MODE_MASK & type_mode,
916
lock_get_mode(lock2))) {
918
/* We have somewhat complex rules when gap type record locks
921
if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
922
&& !(type_mode & LOCK_INSERT_INTENTION)) {
924
/* Gap type locks without LOCK_INSERT_INTENTION flag
925
do not need to wait for anything. This is because
926
different users can have conflicting lock types
932
if (!(type_mode & LOCK_INSERT_INTENTION)
933
&& lock_rec_get_gap(lock2)) {
935
/* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
936
does not need to wait for a gap type lock */
941
if ((type_mode & LOCK_GAP)
942
&& lock_rec_get_rec_not_gap(lock2)) {
944
/* Lock on gap does not need to wait for
945
a LOCK_REC_NOT_GAP type lock */
950
if (lock_rec_get_insert_intention(lock2)) {
952
/* No lock request needs to wait for an insert
953
intention lock to be removed. This is ok since our
954
rules allow conflicting locks on gaps. This eliminates
955
a spurious deadlock caused by a next-key lock waiting
956
for an insert intention lock; when the insert
957
intention lock was granted, the insert deadlocked on
958
the waiting next-key lock.
960
Also, insert intention locks do not disturb each
972
/*************************************************************************
973
Checks if a lock request lock1 has to wait for request lock2. */
978
/* out: TRUE if lock1 has to wait for lock2 to be
980
lock_t* lock1, /* in: waiting lock */
981
lock_t* lock2) /* in: another lock; NOTE that it is assumed that this
982
has a lock bit set on the same record as in lock1 if
983
the locks are record locks */
985
ut_ad(lock1 && lock2);
987
if (lock1->trx != lock2->trx
988
&& !lock_mode_compatible(lock_get_mode(lock1),
989
lock_get_mode(lock2))) {
990
if (lock_get_type(lock1) == LOCK_REC) {
991
ut_ad(lock_get_type(lock2) == LOCK_REC);
993
/* If this lock request is for a supremum record
994
then the second bit on the lock bitmap is set */
996
return(lock_rec_has_to_wait(lock1->trx,
997
lock1->type_mode, lock2,
998
lock_rec_get_nth_bit(
1008
/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
1010
/*************************************************************************
1011
Gets the number of bits in a record lock bitmap. */
1014
lock_rec_get_n_bits(
1015
/*================*/
1016
/* out: number of bits */
1017
lock_t* lock) /* in: record lock */
1019
return(lock->un_member.rec_lock.n_bits);
1022
/**************************************************************************
1023
Sets the nth bit of a record lock to TRUE. */
1026
lock_rec_set_nth_bit(
1027
/*=================*/
1028
lock_t* lock, /* in: record lock */
1029
ulint i) /* in: index of the bit */
1037
ut_ad(lock_get_type(lock) == LOCK_REC);
1038
ut_ad(i < lock->un_member.rec_lock.n_bits);
1043
ptr = (byte*)lock + sizeof(lock_t) + byte_index;
1047
b = ut_bit_set_nth(b, bit_index, TRUE);
1052
/**************************************************************************
1053
Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
1057
lock_rec_find_set_bit(
1058
/*==================*/
1059
/* out: bit index == heap number of the record, or
1060
ULINT_UNDEFINED if none found */
1061
lock_t* lock) /* in: record lock with at least one bit set */
1065
for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
1067
if (lock_rec_get_nth_bit(lock, i)) {
1073
return(ULINT_UNDEFINED);
1076
/**************************************************************************
1077
Resets the nth bit of a record lock. */
1080
lock_rec_reset_nth_bit(
1081
/*===================*/
1082
lock_t* lock, /* in: record lock */
1083
ulint i) /* in: index of the bit which must be set to TRUE
1084
when this function is called */
1092
ut_ad(lock_get_type(lock) == LOCK_REC);
1093
ut_ad(i < lock->un_member.rec_lock.n_bits);
1098
ptr = (byte*)lock + sizeof(lock_t) + byte_index;
1102
b = ut_bit_set_nth(b, bit_index, FALSE);
1107
/*************************************************************************
1108
Gets the first or next record lock on a page. */
1111
lock_rec_get_next_on_page(
1112
/*======================*/
1113
/* out: next lock, NULL if none exists */
1114
lock_t* lock) /* in: a record lock */
1119
ut_ad(mutex_own(&kernel_mutex));
1120
ut_ad(lock_get_type(lock) == LOCK_REC);
1122
space = lock->un_member.rec_lock.space;
1123
page_no = lock->un_member.rec_lock.page_no;
1126
lock = HASH_GET_NEXT(hash, lock);
1133
if ((lock->un_member.rec_lock.space == space)
1134
&& (lock->un_member.rec_lock.page_no == page_no)) {
1143
/*************************************************************************
1144
Gets the first record lock on a page, where the page is identified by its
1148
lock_rec_get_first_on_page_addr(
1149
/*============================*/
1150
/* out: first lock, NULL if none exists */
1151
ulint space, /* in: space */
1152
ulint page_no)/* in: page number */
1156
ut_ad(mutex_own(&kernel_mutex));
1158
lock = HASH_GET_FIRST(lock_sys->rec_hash,
1159
lock_rec_hash(space, page_no));
1161
if ((lock->un_member.rec_lock.space == space)
1162
&& (lock->un_member.rec_lock.page_no == page_no)) {
1167
lock = HASH_GET_NEXT(hash, lock);
1173
/*************************************************************************
1174
Returns TRUE if there are explicit record locks on a page. */
1177
lock_rec_expl_exist_on_page(
1178
/*========================*/
1179
/* out: TRUE if there are explicit record locks on
1181
ulint space, /* in: space id */
1182
ulint page_no)/* in: page number */
1186
mutex_enter(&kernel_mutex);
1188
if (lock_rec_get_first_on_page_addr(space, page_no)) {
1194
mutex_exit(&kernel_mutex);
1199
/*************************************************************************
1200
Gets the first record lock on a page, where the page is identified by a
1204
lock_rec_get_first_on_page(
1205
/*=======================*/
1206
/* out: first lock, NULL if none exists */
1207
byte* ptr) /* in: pointer to somewhere on the page */
1214
ut_ad(mutex_own(&kernel_mutex));
1216
hash = buf_frame_get_lock_hash_val(ptr);
1218
lock = HASH_GET_FIRST(lock_sys->rec_hash, hash);
1221
space = buf_frame_get_space_id(ptr);
1222
page_no = buf_frame_get_page_no(ptr);
1224
if ((lock->un_member.rec_lock.space == space)
1225
&& (lock->un_member.rec_lock.page_no == page_no)) {
1230
lock = HASH_GET_NEXT(hash, lock);
1236
/*************************************************************************
1237
Gets the next explicit lock request on a record. */
1242
/* out: next lock, NULL if none exists */
1243
rec_t* rec, /* in: record on a page */
1244
lock_t* lock) /* in: lock */
1246
ut_ad(mutex_own(&kernel_mutex));
1247
ut_ad(lock_get_type(lock) == LOCK_REC);
1249
if (page_rec_is_comp(rec)) {
1251
lock = lock_rec_get_next_on_page(lock);
1252
} while (lock && !lock_rec_get_nth_bit(
1253
lock, rec_get_heap_no(rec, TRUE)));
1256
lock = lock_rec_get_next_on_page(lock);
1257
} while (lock && !lock_rec_get_nth_bit(
1258
lock, rec_get_heap_no(rec, FALSE)));
1264
/*************************************************************************
1265
Gets the first explicit lock request on a record. */
1270
/* out: first lock, NULL if none exists */
1271
rec_t* rec) /* in: record on a page */
1275
ut_ad(mutex_own(&kernel_mutex));
1277
lock = lock_rec_get_first_on_page(rec);
1278
if (UNIV_LIKELY_NULL(lock)) {
1279
ulint heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
1281
while (lock && !lock_rec_get_nth_bit(lock, heap_no)) {
1282
lock = lock_rec_get_next_on_page(lock);
1289
/*************************************************************************
1290
Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
1291
pointer in the transaction! This function is used in lock object creation
1295
lock_rec_bitmap_reset(
1296
/*==================*/
1297
lock_t* lock) /* in: record lock */
1303
ut_ad(lock_get_type(lock) == LOCK_REC);
1305
/* Reset to zero the bitmap which resides immediately after the lock
1308
ptr = (byte*)lock + sizeof(lock_t);
1310
n_bytes = lock_rec_get_n_bits(lock) / 8;
1312
ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
1314
for (i = 0; i < n_bytes; i++) {
1321
/*************************************************************************
1322
Copies a record lock to heap. */
1327
/* out: copy of lock */
1328
lock_t* lock, /* in: record lock */
1329
mem_heap_t* heap) /* in: memory heap */
1334
ut_ad(lock_get_type(lock) == LOCK_REC);
1336
size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
1338
dupl_lock = mem_heap_alloc(heap, size);
1340
ut_memcpy(dupl_lock, lock, size);
1345
/*************************************************************************
1346
Gets the previous record lock set on a record. */
1351
/* out: previous lock on the same record, NULL if
1353
lock_t* in_lock,/* in: record lock */
1354
ulint heap_no)/* in: heap number of the record */
1359
lock_t* found_lock = NULL;
1361
ut_ad(mutex_own(&kernel_mutex));
1362
ut_ad(lock_get_type(in_lock) == LOCK_REC);
1364
space = in_lock->un_member.rec_lock.space;
1365
page_no = in_lock->un_member.rec_lock.page_no;
1367
lock = lock_rec_get_first_on_page_addr(space, page_no);
1372
if (lock == in_lock) {
1377
if (lock_rec_get_nth_bit(lock, heap_no)) {
1382
lock = lock_rec_get_next_on_page(lock);
1386
/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
1388
/*************************************************************************
1389
Checks if a transaction has the specified table lock, or stronger. */
1394
/* out: lock or NULL */
1395
trx_t* trx, /* in: transaction */
1396
dict_table_t* table, /* in: table */
1397
ulint mode) /* in: lock mode */
1401
ut_ad(mutex_own(&kernel_mutex));
1403
/* Look for stronger locks the same trx already has on the table */
1405
lock = UT_LIST_GET_LAST(table->locks);
1407
while (lock != NULL) {
1409
if (lock->trx == trx
1410
&& lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
1412
/* The same trx already has locked the table in
1413
a mode stronger or equal to the mode given */
1415
ut_ad(!lock_get_wait(lock));
1420
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
1426
/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
1428
/*************************************************************************
1429
Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
1435
/* out: lock or NULL */
1436
ulint precise_mode,/* in: LOCK_S or LOCK_X possibly ORed to
1437
LOCK_GAP or LOCK_REC_NOT_GAP,
1438
for a supremum record we regard this always a gap
1440
rec_t* rec, /* in: record */
1441
trx_t* trx) /* in: transaction */
1445
ut_ad(mutex_own(&kernel_mutex));
1446
ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
1447
|| (precise_mode & LOCK_MODE_MASK) == LOCK_X);
1448
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
1450
lock = lock_rec_get_first(rec);
1453
if (lock->trx == trx
1454
&& lock_mode_stronger_or_eq(lock_get_mode(lock),
1455
precise_mode & LOCK_MODE_MASK)
1456
&& !lock_get_wait(lock)
1457
&& (!lock_rec_get_rec_not_gap(lock)
1458
|| (precise_mode & LOCK_REC_NOT_GAP)
1459
|| page_rec_is_supremum(rec))
1460
&& (!lock_rec_get_gap(lock)
1461
|| (precise_mode & LOCK_GAP)
1462
|| page_rec_is_supremum(rec))
1463
&& (!lock_rec_get_insert_intention(lock))) {
1468
lock = lock_rec_get_next(rec, lock);
1474
#ifndef UNIV_HOTBACKUP
1475
/*************************************************************************
1476
Checks if some other transaction has a lock request in the queue. */
1479
lock_rec_other_has_expl_req(
1480
/*========================*/
1481
/* out: lock or NULL */
1482
ulint mode, /* in: LOCK_S or LOCK_X */
1483
ulint gap, /* in: LOCK_GAP if also gap locks are taken
1484
into account, or 0 if not */
1485
ulint wait, /* in: LOCK_WAIT if also waiting locks are
1486
taken into account, or 0 if not */
1487
rec_t* rec, /* in: record to look at */
1488
trx_t* trx) /* in: transaction, or NULL if requests by all
1489
transactions are taken into account */
1493
ut_ad(mutex_own(&kernel_mutex));
1494
ut_ad(mode == LOCK_X || mode == LOCK_S);
1495
ut_ad(gap == 0 || gap == LOCK_GAP);
1496
ut_ad(wait == 0 || wait == LOCK_WAIT);
1498
lock = lock_rec_get_first(rec);
1501
if (lock->trx != trx
1503
|| !(lock_rec_get_gap(lock)
1504
|| page_rec_is_supremum(rec)))
1505
&& (wait || !lock_get_wait(lock))
1506
&& lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
1511
lock = lock_rec_get_next(rec, lock);
1516
#endif /* !UNIV_HOTBACKUP */
1518
/*************************************************************************
1519
Checks if some other transaction has a conflicting explicit lock request
1520
in the queue, so that we have to wait. */
1523
lock_rec_other_has_conflicting(
1524
/*===========================*/
1525
/* out: lock or NULL */
1526
ulint mode, /* in: LOCK_S or LOCK_X,
1527
possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
1528
LOCK_INSERT_INTENTION */
1529
rec_t* rec, /* in: record to look at */
1530
trx_t* trx) /* in: our transaction */
1534
ut_ad(mutex_own(&kernel_mutex));
1536
lock = lock_rec_get_first(rec);
1539
if (lock_rec_has_to_wait(trx, mode, lock,
1540
page_rec_is_supremum(rec))) {
1545
lock = lock_rec_get_next(rec, lock);
1551
/*************************************************************************
1552
Looks for a suitable type record lock struct by the same trx on the same page.
1553
This can be used to save space when a new record lock should be set on a page:
1554
no new struct is needed, if a suitable old is found. */
1557
lock_rec_find_similar_on_page(
1558
/*==========================*/
1559
/* out: lock or NULL */
1560
ulint type_mode, /* in: lock type_mode field */
1561
rec_t* rec, /* in: record */
1562
trx_t* trx) /* in: transaction */
1567
ut_ad(mutex_own(&kernel_mutex));
1569
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
1570
lock = lock_rec_get_first_on_page(rec);
1572
while (lock != NULL) {
1573
if (lock->trx == trx
1574
&& lock->type_mode == type_mode
1575
&& lock_rec_get_n_bits(lock) > heap_no) {
1580
lock = lock_rec_get_next_on_page(lock);
1586
/*************************************************************************
1587
Checks if some transaction has an implicit x-lock on a record in a secondary
1591
lock_sec_rec_some_has_impl_off_kernel(
1592
/*==================================*/
1593
/* out: transaction which has the x-lock, or
1595
rec_t* rec, /* in: user record */
1596
dict_index_t* index, /* in: secondary index */
1597
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
1601
ut_ad(mutex_own(&kernel_mutex));
1602
ut_ad(!(index->type & DICT_CLUSTERED));
1603
ut_ad(page_rec_is_user_rec(rec));
1604
ut_ad(rec_offs_validate(rec, index, offsets));
1606
page = buf_frame_align(rec);
1608
/* Some transaction may have an implicit x-lock on the record only
1609
if the max trx id for the page >= min trx id for the trx list, or
1610
database recovery is running. We do not write the changes of a page
1611
max trx id to the log, and therefore during recovery, this value
1612
for a page may be incorrect. */
1614
if (!(ut_dulint_cmp(page_get_max_trx_id(page),
1615
trx_list_get_min_trx_id()) >= 0)
1616
&& !recv_recovery_is_on()) {
1621
/* Ok, in this case it is possible that some transaction has an
1622
implicit x-lock. We have to look in the clustered index. */
1624
if (!lock_check_trx_id_sanity(page_get_max_trx_id(page),
1625
rec, index, offsets, TRUE)) {
1626
buf_page_print(page);
1628
/* The page is corrupt: try to avoid a crash by returning
1633
return(row_vers_impl_x_locked_off_kernel(rec, index, offsets));
1636
/*************************************************************************
1637
Return approximate number or record locks (bits set in the bitmap) for
1638
this transaction. Since delete-marked records may be removed, the
1639
record count will not be precise. */
1642
lock_number_of_rows_locked(
1643
/*=======================*/
1644
trx_t* trx) /* in: transaction */
1647
ulint n_records = 0;
1651
lock = UT_LIST_GET_FIRST(trx->trx_locks);
1654
if (lock_get_type(lock) == LOCK_REC) {
1655
n_bits = lock_rec_get_n_bits(lock);
1657
for (n_bit = 0; n_bit < n_bits; n_bit++) {
1658
if (lock_rec_get_nth_bit(lock, n_bit)) {
1664
lock = UT_LIST_GET_NEXT(trx_locks, lock);
1670
/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
1672
/*************************************************************************
1673
Creates a new record lock and inserts it to the lock queue. Does NOT check
1674
for deadlocks or lock compatibility! */
1679
/* out: created lock */
1680
ulint type_mode,/* in: lock mode and wait flag, type is
1681
ignored and replaced by LOCK_REC */
1682
rec_t* rec, /* in: record on page */
1683
dict_index_t* index, /* in: index of record */
1684
trx_t* trx) /* in: transaction */
1694
ut_ad(mutex_own(&kernel_mutex));
1696
page = buf_frame_align(rec);
1697
space = buf_frame_get_space_id(page);
1698
page_no = buf_frame_get_page_no(page);
1699
heap_no = rec_get_heap_no(rec, page_is_comp(page));
1701
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
1703
/* If rec is the supremum record, then we reset the gap and
1704
LOCK_REC_NOT_GAP bits, as all locks on the supremum are
1705
automatically of the gap type */
1707
if (rec == page_get_supremum_rec(page)) {
1708
ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1710
type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1713
/* Make lock bitmap bigger by a safety margin */
1714
n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
1715
n_bytes = 1 + n_bits / 8;
1717
lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes);
1719
UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
1723
lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
1724
lock->index = index;
1726
lock->un_member.rec_lock.space = space;
1727
lock->un_member.rec_lock.page_no = page_no;
1728
lock->un_member.rec_lock.n_bits = n_bytes * 8;
1730
/* Reset to zero the bitmap which resides immediately after the
1733
lock_rec_bitmap_reset(lock);
1735
/* Set the bit corresponding to rec */
1736
lock_rec_set_nth_bit(lock, heap_no);
1738
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
1739
lock_rec_fold(space, page_no), lock);
1740
if (type_mode & LOCK_WAIT) {
1742
lock_set_lock_and_trx_wait(lock, trx);
1748
/*************************************************************************
1749
Enqueues a waiting request for a lock which cannot be granted immediately.
1750
Checks for deadlocks. */
1753
lock_rec_enqueue_waiting(
1754
/*=====================*/
1755
/* out: DB_LOCK_WAIT, DB_DEADLOCK, or
1756
DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
1757
DB_SUCCESS means that there was a deadlock,
1758
but another transaction was chosen as a
1759
victim, and we got the lock immediately:
1760
no need to wait then */
1761
ulint type_mode,/* in: lock mode this transaction is
1762
requesting: LOCK_S or LOCK_X, possibly ORed
1763
with LOCK_GAP or LOCK_REC_NOT_GAP, ORed
1764
with LOCK_INSERT_INTENTION if this waiting
1765
lock request is set when performing an
1766
insert of an index record */
1767
rec_t* rec, /* in: record */
1768
dict_index_t* index, /* in: index of record */
1769
que_thr_t* thr) /* in: query thread */
1774
ut_ad(mutex_own(&kernel_mutex));
1776
/* Test if there already is some other reason to suspend thread:
1777
we do not enqueue a lock request if the query thread should be
1780
if (que_thr_stop(thr)) {
1784
return(DB_QUE_THR_SUSPENDED);
1787
trx = thr_get_trx(thr);
1789
if (trx->dict_operation) {
1790
ut_print_timestamp(stderr);
1791
fputs(" InnoDB: Error: a record lock wait happens"
1792
" in a dictionary operation!\n"
1793
"InnoDB: Table name ", stderr);
1794
ut_print_name(stderr, trx, TRUE, index->table_name);
1796
"InnoDB: Submit a detailed bug report"
1797
" to http://bugs.mysql.com\n",
1801
/* Enqueue the lock request that will wait to be granted */
1802
lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx);
1804
/* Check if a deadlock occurs: if yes, remove the lock request and
1805
return an error code */
1807
if (lock_deadlock_occurs(lock, trx)) {
1809
lock_reset_lock_and_trx_wait(lock);
1810
lock_rec_reset_nth_bit(lock, rec_get_heap_no(
1811
rec, page_rec_is_comp(rec)));
1813
return(DB_DEADLOCK);
1816
/* If there was a deadlock but we chose another transaction as a
1817
victim, it is possible that we already have the lock now granted! */
1819
if (trx->wait_lock == NULL) {
1824
trx->que_state = TRX_QUE_LOCK_WAIT;
1825
trx->was_chosen_as_deadlock_victim = FALSE;
1826
trx->wait_started = time(NULL);
1828
ut_a(que_thr_stop(thr));
1831
if (lock_print_waits) {
1832
fprintf(stderr, "Lock wait for trx %lu in index ",
1833
(ulong) ut_dulint_get_low(trx->id));
1834
ut_print_name(stderr, trx, FALSE, index->name);
1836
#endif /* UNIV_DEBUG */
1838
return(DB_LOCK_WAIT);
1841
/*************************************************************************
1842
Adds a record lock request in the record queue. The request is normally
1843
added as the last in the queue, but if there are no waiting lock requests
1844
on the record, and the request to be added is not a waiting request, we
1845
can reuse a suitable record lock object already existing on the same page,
1846
just setting the appropriate bit in its bitmap. This is a low-level function
1847
which does NOT check for deadlocks or lock compatibility! */
1850
lock_rec_add_to_queue(
1851
/*==================*/
1852
/* out: lock where the bit was set */
1853
ulint type_mode,/* in: lock mode, wait, gap etc. flags;
1854
type is ignored and replaced by LOCK_REC */
1855
rec_t* rec, /* in: record on page */
1856
dict_index_t* index, /* in: index of record */
1857
trx_t* trx) /* in: transaction */
1860
lock_t* similar_lock = NULL;
1862
ibool somebody_waits = FALSE;
1864
ut_ad(mutex_own(&kernel_mutex));
1865
ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
1866
|| ((type_mode & LOCK_MODE_MASK) != LOCK_S)
1867
|| !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT,
1869
ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP))
1870
|| ((type_mode & LOCK_MODE_MASK) != LOCK_X)
1871
|| !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
1874
type_mode = type_mode | LOCK_REC;
1876
/* If rec is the supremum record, then we can reset the gap bit, as
1877
all locks on the supremum are automatically of the gap type, and we
1878
try to avoid unnecessary memory consumption of a new record lock
1879
struct for a gap type lock */
1881
if (page_rec_is_supremum(rec)) {
1882
ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1884
/* There should never be LOCK_REC_NOT_GAP on a supremum
1885
record, but let us play safe */
1887
type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1890
/* Look for a waiting lock request on the same record or on a gap */
1892
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
1893
lock = lock_rec_get_first_on_page(rec);
1895
while (lock != NULL) {
1896
if (lock_get_wait(lock)
1897
&& (lock_rec_get_nth_bit(lock, heap_no))) {
1899
somebody_waits = TRUE;
1902
lock = lock_rec_get_next_on_page(lock);
1905
/* Look for a similar record lock on the same page: if one is found
1906
and there are no waiting lock requests, we can just set the bit */
1908
similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx);
1910
if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) {
1912
lock_rec_set_nth_bit(similar_lock, heap_no);
1914
return(similar_lock);
1917
return(lock_rec_create(type_mode, rec, index, trx));
1920
/*************************************************************************
1921
This is a fast routine for locking a record in the most common cases:
1922
there are no explicit locks on the page, or there is just one lock, owned
1923
by this transaction, and of the right type_mode. This is a low-level function
1924
which does NOT look at implicit locks! Checks lock compatibility within
1925
explicit locks. This function sets a normal next-key lock, or in the case of
1926
a page supremum record, a gap type lock. */
1931
/* out: TRUE if locking succeeded */
1932
ibool impl, /* in: if TRUE, no lock is set if no wait
1933
is necessary: we assume that the caller will
1934
set an implicit lock */
1935
ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
1936
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
1937
rec_t* rec, /* in: record */
1938
dict_index_t* index, /* in: index of record */
1939
que_thr_t* thr) /* in: query thread */
1945
ut_ad(mutex_own(&kernel_mutex));
1946
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
1947
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1948
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
1949
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
1950
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
1951
|| (LOCK_MODE_MASK & mode) == LOCK_X);
1952
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
1953
|| mode - (LOCK_MODE_MASK & mode) == 0
1954
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1956
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
1958
lock = lock_rec_get_first_on_page(rec);
1960
trx = thr_get_trx(thr);
1964
lock_rec_create(mode, rec, index, trx);
1966
if (srv_locks_unsafe_for_binlog
1967
|| trx->isolation_level
1968
== TRX_ISO_READ_COMMITTED) {
1969
trx_register_new_rec_lock(trx, index);
1976
if (lock_rec_get_next_on_page(lock)) {
1981
if (lock->trx != trx
1982
|| lock->type_mode != (mode | LOCK_REC)
1983
|| lock_rec_get_n_bits(lock) <= heap_no) {
1989
/* If the nth bit of the record lock is already set then we
1990
do not set a new lock bit, otherwise we do set */
1992
if (!lock_rec_get_nth_bit(lock, heap_no)) {
1993
lock_rec_set_nth_bit(lock, heap_no);
1994
if (srv_locks_unsafe_for_binlog
1995
|| trx->isolation_level
1996
== TRX_ISO_READ_COMMITTED) {
1997
trx_register_new_rec_lock(trx, index);
2005
/*************************************************************************
2006
This is the general, and slower, routine for locking a record. This is a
2007
low-level function which does NOT look at implicit locks! Checks lock
2008
compatibility within explicit locks. This function sets a normal next-key
2009
lock, or in the case of a page supremum record, a gap type lock. */
2014
/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
2016
ibool impl, /* in: if TRUE, no lock is set if no wait is
2017
necessary: we assume that the caller will set
2019
ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
2020
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
2021
rec_t* rec, /* in: record */
2022
dict_index_t* index, /* in: index of record */
2023
que_thr_t* thr) /* in: query thread */
2028
ut_ad(mutex_own(&kernel_mutex));
2029
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
2030
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
2031
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
2032
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
2033
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
2034
|| (LOCK_MODE_MASK & mode) == LOCK_X);
2035
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
2036
|| mode - (LOCK_MODE_MASK & mode) == 0
2037
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
2039
trx = thr_get_trx(thr);
2041
if (lock_rec_has_expl(mode, rec, trx)) {
2042
/* The trx already has a strong enough lock on rec: do
2046
} else if (lock_rec_other_has_conflicting(mode, rec, trx)) {
2048
/* If another transaction has a non-gap conflicting request in
2049
the queue, as this transaction does not have a lock strong
2050
enough already granted on the record, we have to wait. */
2052
err = lock_rec_enqueue_waiting(mode, rec, index, thr);
2054
if (srv_locks_unsafe_for_binlog
2055
|| trx->isolation_level == TRX_ISO_READ_COMMITTED) {
2056
trx_register_new_rec_lock(trx, index);
2060
/* Set the requested lock on the record */
2062
lock_rec_add_to_queue(LOCK_REC | mode, rec, index,
2064
if (srv_locks_unsafe_for_binlog
2065
|| trx->isolation_level
2066
== TRX_ISO_READ_COMMITTED) {
2067
trx_register_new_rec_lock(trx, index);
2077
/*************************************************************************
2078
Tries to lock the specified record in the mode requested. If not immediately
2079
possible, enqueues a waiting lock request. This is a low-level function
2080
which does NOT look at implicit locks! Checks lock compatibility within
2081
explicit locks. This function sets a normal next-key lock, or in the case
2082
of a page supremum record, a gap type lock. */
2087
/* out: DB_SUCCESS, DB_LOCK_WAIT, or error
2089
ibool impl, /* in: if TRUE, no lock is set if no wait is
2090
necessary: we assume that the caller will set
2092
ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly
2093
ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */
2094
rec_t* rec, /* in: record */
2095
dict_index_t* index, /* in: index of record */
2096
que_thr_t* thr) /* in: query thread */
2100
ut_ad(mutex_own(&kernel_mutex));
2101
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
2102
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
2103
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
2104
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
2105
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
2106
|| (LOCK_MODE_MASK & mode) == LOCK_X);
2107
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
2108
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
2109
|| mode - (LOCK_MODE_MASK & mode) == 0);
2111
if (lock_rec_lock_fast(impl, mode, rec, index, thr)) {
2113
/* We try a simplified and faster subroutine for the most
2118
err = lock_rec_lock_slow(impl, mode, rec, index, thr);
2124
/*************************************************************************
2125
Checks if a waiting record lock request still has to wait in a queue. */
2128
lock_rec_has_to_wait_in_queue(
2129
/*==========================*/
2130
/* out: TRUE if still has to wait */
2131
lock_t* wait_lock) /* in: waiting record lock */
2138
ut_ad(mutex_own(&kernel_mutex));
2139
ut_ad(lock_get_wait(wait_lock));
2140
ut_ad(lock_get_type(wait_lock) == LOCK_REC);
2142
space = wait_lock->un_member.rec_lock.space;
2143
page_no = wait_lock->un_member.rec_lock.page_no;
2144
heap_no = lock_rec_find_set_bit(wait_lock);
2146
lock = lock_rec_get_first_on_page_addr(space, page_no);
2148
while (lock != wait_lock) {
2150
if (lock_rec_get_nth_bit(lock, heap_no)
2151
&& lock_has_to_wait(wait_lock, lock)) {
2156
lock = lock_rec_get_next_on_page(lock);
2162
/*****************************************************************
2163
Grants a lock to a waiting lock request and releases the waiting
2169
lock_t* lock) /* in: waiting lock request */
2171
ut_ad(mutex_own(&kernel_mutex));
2173
lock_reset_lock_and_trx_wait(lock);
2175
if (lock_get_mode(lock) == LOCK_AUTO_INC) {
2177
if (lock->trx->auto_inc_lock != NULL) {
2179
"InnoDB: Error: trx already had"
2180
" an AUTO-INC lock!\n");
2183
/* Store pointer to lock to trx so that we know to
2184
release it at the end of the SQL statement */
2186
lock->trx->auto_inc_lock = lock;
2190
if (lock_print_waits) {
2191
fprintf(stderr, "Lock wait for trx %lu ends\n",
2192
(ulong) ut_dulint_get_low(lock->trx->id));
2194
#endif /* UNIV_DEBUG */
2196
/* If we are resolving a deadlock by choosing another transaction
2197
as a victim, then our original transaction may not be in the
2198
TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
2201
if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
2202
trx_end_lock_wait(lock->trx);
2206
/*****************************************************************
2207
Cancels a waiting record lock request and releases the waiting transaction
2208
that requested it. NOTE: does NOT check if waiting lock requests behind this
2209
one can now be granted! */
2214
lock_t* lock) /* in: waiting record lock request */
2216
ut_ad(mutex_own(&kernel_mutex));
2217
ut_ad(lock_get_type(lock) == LOCK_REC);
2219
/* Reset the bit (there can be only one set bit) in the lock bitmap */
2220
lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
2222
/* Reset the wait flag and the back pointer to lock in trx */
2224
lock_reset_lock_and_trx_wait(lock);
2226
/* The following function releases the trx from lock wait */
2228
trx_end_lock_wait(lock->trx);
2231
/*****************************************************************
2232
Removes a record lock request, waiting or granted, from the queue and
2233
grants locks to other transactions in the queue if they now are entitled
2234
to a lock. NOTE: all record locks contained in in_lock are removed. */
2237
lock_rec_dequeue_from_page(
2238
/*=======================*/
2239
lock_t* in_lock)/* in: record lock object: all record locks which
2240
are contained in this lock object are removed;
2241
transactions waiting behind will get their lock
2242
requests granted, if they are now qualified to it */
2249
ut_ad(mutex_own(&kernel_mutex));
2250
ut_ad(lock_get_type(in_lock) == LOCK_REC);
2254
space = in_lock->un_member.rec_lock.space;
2255
page_no = in_lock->un_member.rec_lock.page_no;
2257
HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
2258
lock_rec_fold(space, page_no), in_lock);
2260
UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
2262
/* Check if waiting locks in the queue can now be granted: grant
2263
locks if there are no conflicting locks ahead. */
2265
lock = lock_rec_get_first_on_page_addr(space, page_no);
2267
while (lock != NULL) {
2268
if (lock_get_wait(lock)
2269
&& !lock_rec_has_to_wait_in_queue(lock)) {
2271
/* Grant the lock */
2275
lock = lock_rec_get_next_on_page(lock);
2279
/*****************************************************************
2280
Removes a record lock request, waiting or granted, from the queue. */
2285
lock_t* in_lock)/* in: record lock object: all record locks which
2286
are contained in this lock object are removed */
2292
ut_ad(mutex_own(&kernel_mutex));
2293
ut_ad(lock_get_type(in_lock) == LOCK_REC);
2297
space = in_lock->un_member.rec_lock.space;
2298
page_no = in_lock->un_member.rec_lock.page_no;
2300
HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
2301
lock_rec_fold(space, page_no), in_lock);
2303
UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
2306
/*****************************************************************
2307
Removes record lock objects set on an index page which is discarded. This
2308
function does not move locks, or check for waiting locks, therefore the
2309
lock bitmaps must already be reset when this function is called. */
2312
lock_rec_free_all_from_discard_page(
2313
/*================================*/
2314
page_t* page) /* in: page to be discarded */
2321
ut_ad(mutex_own(&kernel_mutex));
2323
space = buf_frame_get_space_id(page);
2324
page_no = buf_frame_get_page_no(page);
2326
lock = lock_rec_get_first_on_page_addr(space, page_no);
2328
while (lock != NULL) {
2329
ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2330
ut_ad(!lock_get_wait(lock));
2332
next_lock = lock_rec_get_next_on_page(lock);
2334
lock_rec_discard(lock);
2340
/*============= RECORD LOCK MOVING AND INHERITING ===================*/
2342
/*****************************************************************
2343
Resets the lock bits for a single record. Releases transactions waiting for
2344
lock requests here. */
2347
lock_rec_reset_and_release_wait(
2348
/*============================*/
2349
rec_t* rec) /* in: record whose locks bits should be reset */
2354
ut_ad(mutex_own(&kernel_mutex));
2356
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
2358
lock = lock_rec_get_first(rec);
2360
while (lock != NULL) {
2361
if (lock_get_wait(lock)) {
2362
lock_rec_cancel(lock);
2364
lock_rec_reset_nth_bit(lock, heap_no);
2367
lock = lock_rec_get_next(rec, lock);
2371
/*****************************************************************
2372
Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
2373
of another record as gap type locks, but does not reset the lock bits of
2374
the other record. Also waiting lock requests on rec are inherited as
2375
GRANTED gap locks. */
2378
lock_rec_inherit_to_gap(
2379
/*====================*/
2380
rec_t* heir, /* in: record which inherits */
2381
rec_t* rec) /* in: record from which inherited; does NOT reset
2382
the locks on this record */
2386
ut_ad(mutex_own(&kernel_mutex));
2388
lock = lock_rec_get_first(rec);
2390
/* If srv_locks_unsafe_for_binlog is TRUE or session is using
2391
READ COMMITTED isolation level, we do not want locks set
2392
by an UPDATE or a DELETE to be inherited as gap type locks. But we
2393
DO want S-locks set by a consistency constraint to be inherited also
2396
while (lock != NULL) {
2397
if (!lock_rec_get_insert_intention(lock)
2398
&& !((srv_locks_unsafe_for_binlog
2399
|| lock->trx->isolation_level
2400
== TRX_ISO_READ_COMMITTED)
2401
&& lock_get_mode(lock) == LOCK_X)) {
2403
lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
2405
heir, lock->index, lock->trx);
2408
lock = lock_rec_get_next(rec, lock);
2412
/*****************************************************************
2413
Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
2414
of another record as gap type locks, but does not reset the lock bits of the
2415
other record. Also waiting lock requests are inherited as GRANTED gap locks. */
2418
lock_rec_inherit_to_gap_if_gap_lock(
2419
/*================================*/
2420
rec_t* heir, /* in: record which inherits */
2421
rec_t* rec) /* in: record from which inherited; does NOT reset
2422
the locks on this record */
2426
ut_ad(mutex_own(&kernel_mutex));
2428
lock = lock_rec_get_first(rec);
2430
while (lock != NULL) {
2431
if (!lock_rec_get_insert_intention(lock)
2432
&& (page_rec_is_supremum(rec)
2433
|| !lock_rec_get_rec_not_gap(lock))) {
2435
lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock)
2437
heir, lock->index, lock->trx);
2440
lock = lock_rec_get_next(rec, lock);
2444
/*****************************************************************
2445
Moves the locks of a record to another record and resets the lock bits of
2446
the donating record. */
2451
rec_t* receiver, /* in: record which gets locks; this record
2452
must have no lock requests on it! */
2453
rec_t* donator, /* in: record which gives locks */
2454
ulint comp) /* in: nonzero=compact page format */
2460
ut_ad(mutex_own(&kernel_mutex));
2462
heap_no = rec_get_heap_no(donator, comp);
2464
lock = lock_rec_get_first(donator);
2466
ut_ad(lock_rec_get_first(receiver) == NULL);
2468
while (lock != NULL) {
2469
type_mode = lock->type_mode;
2471
lock_rec_reset_nth_bit(lock, heap_no);
2473
if (lock_get_wait(lock)) {
2474
lock_reset_lock_and_trx_wait(lock);
2477
/* Note that we FIRST reset the bit, and then set the lock:
2478
the function works also if donator == receiver */
2480
lock_rec_add_to_queue(type_mode, receiver, lock->index,
2482
lock = lock_rec_get_next(donator, lock);
2485
ut_ad(lock_rec_get_first(donator) == NULL);
2488
/*****************************************************************
2489
Updates the lock table when we have reorganized a page. NOTE: we copy
2490
also the locks set on the infimum of the page; the infimum may carry
2491
locks if an update of a record is occurring on the page, and its locks
2492
were temporarily stored on the infimum. */
2495
lock_move_reorganize_page(
2496
/*======================*/
2497
page_t* page, /* in: old index page, now reorganized */
2498
page_t* old_page) /* in: copy of the old, not reorganized page */
2505
UT_LIST_BASE_NODE_T(lock_t) old_locks;
2506
mem_heap_t* heap = NULL;
2510
lock_mutex_enter_kernel();
2512
lock = lock_rec_get_first_on_page(page);
2515
lock_mutex_exit_kernel();
2520
heap = mem_heap_create(256);
2522
/* Copy first all the locks on the page to heap and reset the
2523
bitmaps in the original locks; chain the copies of the locks
2524
using the trx_locks field in them. */
2526
UT_LIST_INIT(old_locks);
2528
while (lock != NULL) {
2530
/* Make a copy of the lock */
2531
old_lock = lock_rec_copy(lock, heap);
2533
UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
2535
/* Reset bitmap of lock */
2536
lock_rec_bitmap_reset(lock);
2538
if (lock_get_wait(lock)) {
2539
lock_reset_lock_and_trx_wait(lock);
2542
lock = lock_rec_get_next_on_page(lock);
2545
sup = page_get_supremum_rec(page);
2547
lock = UT_LIST_GET_FIRST(old_locks);
2549
comp = page_is_comp(page);
2550
ut_ad(comp == page_is_comp(old_page));
2553
/* NOTE: we copy also the locks set on the infimum and
2554
supremum of the page; the infimum may carry locks if an
2555
update of a record is occurring on the page, and its locks
2556
were temporarily stored on the infimum */
2558
page_cur_set_before_first(page, &cur1);
2559
page_cur_set_before_first(old_page, &cur2);
2561
/* Set locks according to old locks */
2563
ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
2564
page_cur_get_rec(&cur2),
2565
rec_get_data_size_old(
2568
old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2),
2571
if (lock_rec_get_nth_bit(lock, old_heap_no)) {
2573
/* NOTE that the old lock bitmap could be too
2574
small for the new heap number! */
2576
lock_rec_add_to_queue(lock->type_mode,
2577
page_cur_get_rec(&cur1),
2578
lock->index, lock->trx);
2580
/* if ((page_cur_get_rec(&cur1) == sup)
2581
&& lock_get_wait(lock)) {
2583
"---\n--\n!!!Lock reorg: supr type %lu\n",
2588
if (page_cur_get_rec(&cur1) == sup) {
2593
page_cur_move_to_next(&cur1);
2594
page_cur_move_to_next(&cur2);
2597
/* Remember that we chained old locks on the trx_locks field */
2599
lock = UT_LIST_GET_NEXT(trx_locks, lock);
2602
lock_mutex_exit_kernel();
2604
mem_heap_free(heap);
2607
ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
2608
buf_frame_get_page_no(page)));
2612
/*****************************************************************
2613
Moves the explicit locks on user records to another page if a record
2614
list end is moved to another page. */
2617
lock_move_rec_list_end(
2618
/*===================*/
2619
page_t* new_page, /* in: index page to move to */
2620
page_t* page, /* in: index page */
2621
rec_t* rec) /* in: record on page: this is the
2622
first record moved */
2631
ut_ad(page == buf_frame_align(rec));
2633
lock_mutex_enter_kernel();
2635
/* Note: when we move locks from record to record, waiting locks
2636
and possible granted gap type locks behind them are enqueued in
2637
the original order, because new elements are inserted to a hash
2638
table to the end of the hash chain, and lock_rec_add_to_queue
2639
does not reuse locks if there are waiters in the queue. */
2641
sup = page_get_supremum_rec(page);
2643
lock = lock_rec_get_first_on_page(page);
2645
comp = page_is_comp(page);
2647
while (lock != NULL) {
2649
page_cur_position(rec, &cur1);
2651
if (page_cur_is_before_first(&cur1)) {
2652
page_cur_move_to_next(&cur1);
2655
page_cur_set_before_first(new_page, &cur2);
2656
page_cur_move_to_next(&cur2);
2658
/* Copy lock requests on user records to new page and
2659
reset the lock bits on the old */
2661
while (page_cur_get_rec(&cur1) != sup) {
2662
ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
2663
page_cur_get_rec(&cur2),
2664
rec_get_data_size_old(
2667
heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
2670
if (lock_rec_get_nth_bit(lock, heap_no)) {
2671
type_mode = lock->type_mode;
2673
lock_rec_reset_nth_bit(lock, heap_no);
2675
if (lock_get_wait(lock)) {
2676
lock_reset_lock_and_trx_wait(lock);
2679
lock_rec_add_to_queue(type_mode,
2680
page_cur_get_rec(&cur2),
2681
lock->index, lock->trx);
2684
page_cur_move_to_next(&cur1);
2685
page_cur_move_to_next(&cur2);
2688
lock = lock_rec_get_next_on_page(lock);
2691
lock_mutex_exit_kernel();
2694
ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
2695
buf_frame_get_page_no(page)));
2696
ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
2697
buf_frame_get_page_no(new_page)));
2701
/*****************************************************************
2702
Moves the explicit locks on user records to another page if a record
2703
list start is moved to another page. */
2706
lock_move_rec_list_start(
2707
/*=====================*/
2708
page_t* new_page, /* in: index page to move to */
2709
page_t* page, /* in: index page */
2710
rec_t* rec, /* in: record on page: this is the
2711
first record NOT copied */
2712
rec_t* old_end) /* in: old previous-to-last record on
2713
new_page before the records were copied */
2724
lock_mutex_enter_kernel();
2726
lock = lock_rec_get_first_on_page(page);
2727
comp = page_is_comp(page);
2728
ut_ad(comp == page_is_comp(new_page));
2729
ut_ad(page == buf_frame_align(rec));
2731
while (lock != NULL) {
2733
page_cur_set_before_first(page, &cur1);
2734
page_cur_move_to_next(&cur1);
2736
page_cur_position(old_end, &cur2);
2737
page_cur_move_to_next(&cur2);
2739
/* Copy lock requests on user records to new page and
2740
reset the lock bits on the old */
2742
while (page_cur_get_rec(&cur1) != rec) {
2743
ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
2744
page_cur_get_rec(&cur2),
2745
rec_get_data_size_old(
2748
heap_no = rec_get_heap_no(page_cur_get_rec(&cur1),
2751
if (lock_rec_get_nth_bit(lock, heap_no)) {
2752
type_mode = lock->type_mode;
2754
lock_rec_reset_nth_bit(lock, heap_no);
2756
if (lock_get_wait(lock)) {
2757
lock_reset_lock_and_trx_wait(lock);
2760
lock_rec_add_to_queue(type_mode,
2761
page_cur_get_rec(&cur2),
2762
lock->index, lock->trx);
2765
page_cur_move_to_next(&cur1);
2766
page_cur_move_to_next(&cur2);
2769
lock = lock_rec_get_next_on_page(lock);
2772
lock_mutex_exit_kernel();
2774
ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page),
2775
buf_frame_get_page_no(page)));
2776
ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page),
2777
buf_frame_get_page_no(new_page)));
2781
/*****************************************************************
2782
Updates the lock table when a page is split to the right. */
2785
lock_update_split_right(
2786
/*====================*/
2787
page_t* right_page, /* in: right page */
2788
page_t* left_page) /* in: left page */
2791
lock_mutex_enter_kernel();
2792
comp = page_is_comp(left_page);
2793
ut_ad(comp == page_is_comp(right_page));
2795
/* Move the locks on the supremum of the left page to the supremum
2796
of the right page */
2798
lock_rec_move(page_get_supremum_rec(right_page),
2799
page_get_supremum_rec(left_page), comp);
2801
/* Inherit the locks to the supremum of left page from the successor
2802
of the infimum on right page */
2804
lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
2806
page_get_infimum_rec(right_page)));
2808
lock_mutex_exit_kernel();
2811
/*****************************************************************
2812
Updates the lock table when a page is merged to the right. */
2815
lock_update_merge_right(
2816
/*====================*/
2817
rec_t* orig_succ, /* in: original successor of infimum
2818
on the right page before merge */
2819
page_t* left_page) /* in: merged index page which will be
2822
lock_mutex_enter_kernel();
2824
/* Inherit the locks from the supremum of the left page to the
2825
original successor of infimum on the right page, to which the left
2828
lock_rec_inherit_to_gap(orig_succ, page_get_supremum_rec(left_page));
2830
/* Reset the locks on the supremum of the left page, releasing
2831
waiting transactions */
2833
lock_rec_reset_and_release_wait(page_get_supremum_rec(left_page));
2835
lock_rec_free_all_from_discard_page(left_page);
2837
lock_mutex_exit_kernel();
2840
/*****************************************************************
2841
Updates the lock table when the root page is copied to another in
2842
btr_root_raise_and_insert. Note that we leave lock structs on the
2843
root page, even though they do not make sense on other than leaf
2844
pages: the reason is that in a pessimistic update the infimum record
2845
of the root page will act as a dummy carrier of the locks of the record
2849
lock_update_root_raise(
2850
/*===================*/
2851
page_t* new_page, /* in: index page to which copied */
2852
page_t* root) /* in: root page */
2855
lock_mutex_enter_kernel();
2856
comp = page_is_comp(root);
2857
ut_ad(comp == page_is_comp(new_page));
2859
/* Move the locks on the supremum of the root to the supremum
2862
lock_rec_move(page_get_supremum_rec(new_page),
2863
page_get_supremum_rec(root), comp);
2864
lock_mutex_exit_kernel();
2867
/*****************************************************************
2868
Updates the lock table when a page is copied to another and the original page
2869
is removed from the chain of leaf pages, except if page is the root! */
2872
lock_update_copy_and_discard(
2873
/*=========================*/
2874
page_t* new_page, /* in: index page to which copied */
2875
page_t* page) /* in: index page; NOT the root! */
2878
lock_mutex_enter_kernel();
2879
comp = page_is_comp(page);
2880
ut_ad(comp == page_is_comp(new_page));
2882
/* Move the locks on the supremum of the old page to the supremum
2885
lock_rec_move(page_get_supremum_rec(new_page),
2886
page_get_supremum_rec(page), comp);
2887
lock_rec_free_all_from_discard_page(page);
2889
lock_mutex_exit_kernel();
2892
/*****************************************************************
2893
Updates the lock table when a page is split to the left. */
2896
lock_update_split_left(
2897
/*===================*/
2898
page_t* right_page, /* in: right page */
2899
page_t* left_page) /* in: left page */
2901
lock_mutex_enter_kernel();
2903
/* Inherit the locks to the supremum of the left page from the
2904
successor of the infimum on the right page */
2906
lock_rec_inherit_to_gap(page_get_supremum_rec(left_page),
2908
page_get_infimum_rec(right_page)));
2910
lock_mutex_exit_kernel();
2913
/*****************************************************************
2914
Updates the lock table when a page is merged to the left. */
2917
lock_update_merge_left(
2918
/*===================*/
2919
page_t* left_page, /* in: left page to which merged */
2920
rec_t* orig_pred, /* in: original predecessor of supremum
2921
on the left page before merge */
2922
page_t* right_page) /* in: merged index page which will be
2925
rec_t* left_next_rec;
2926
rec_t* left_supremum;
2928
lock_mutex_enter_kernel();
2929
comp = page_is_comp(left_page);
2930
ut_ad(comp == page_is_comp(right_page));
2931
ut_ad(left_page == buf_frame_align(orig_pred));
2933
left_next_rec = page_rec_get_next(orig_pred);
2934
left_supremum = page_get_supremum_rec(left_page);
2936
if (UNIV_LIKELY(left_next_rec != left_supremum)) {
2938
/* Inherit the locks on the supremum of the left page to the
2939
first record which was moved from the right page */
2941
lock_rec_inherit_to_gap(left_next_rec, left_supremum);
2943
/* Reset the locks on the supremum of the left page,
2944
releasing waiting transactions */
2946
lock_rec_reset_and_release_wait(left_supremum);
2949
/* Move the locks from the supremum of right page to the supremum
2952
lock_rec_move(left_supremum, page_get_supremum_rec(right_page), comp);
2954
lock_rec_free_all_from_discard_page(right_page);
2956
lock_mutex_exit_kernel();
2959
/*****************************************************************
2960
Resets the original locks on heir and replaces them with gap type locks
2961
inherited from rec. */
2964
lock_rec_reset_and_inherit_gap_locks(
2965
/*=================================*/
2966
rec_t* heir, /* in: heir record */
2967
rec_t* rec) /* in: record */
2969
mutex_enter(&kernel_mutex);
2971
lock_rec_reset_and_release_wait(heir);
2973
lock_rec_inherit_to_gap(heir, rec);
2975
mutex_exit(&kernel_mutex);
2978
/*****************************************************************
2979
Updates the lock table when a page is discarded. */
2982
lock_update_discard(
2983
/*================*/
2984
rec_t* heir, /* in: record which will inherit the locks */
2985
page_t* page) /* in: index page which will be discarded */
2989
lock_mutex_enter_kernel();
2991
if (NULL == lock_rec_get_first_on_page(page)) {
2992
/* No locks exist on page, nothing to do */
2994
lock_mutex_exit_kernel();
2999
/* Inherit all the locks on the page to the record and reset all
3000
the locks on the page */
3002
rec = page_get_infimum_rec(page);
3005
lock_rec_inherit_to_gap(heir, rec);
3007
/* Reset the locks on rec, releasing waiting transactions */
3009
lock_rec_reset_and_release_wait(rec);
3011
if (page_rec_is_supremum(rec)) {
3016
rec = page_rec_get_next(rec);
3019
lock_rec_free_all_from_discard_page(page);
3021
lock_mutex_exit_kernel();
3024
/*****************************************************************
3025
Updates the lock table when a new user record is inserted. */
3030
rec_t* rec) /* in: the inserted record */
3032
lock_mutex_enter_kernel();
3034
/* Inherit the gap-locking locks for rec, in gap mode, from the next
3037
lock_rec_inherit_to_gap_if_gap_lock(rec, page_rec_get_next(rec));
3039
lock_mutex_exit_kernel();
3042
/*****************************************************************
3043
Updates the lock table when a record is removed. */
3048
rec_t* rec) /* in: the record to be removed */
3050
lock_mutex_enter_kernel();
3052
/* Let the next record inherit the locks from rec, in gap mode */
3054
lock_rec_inherit_to_gap(page_rec_get_next(rec), rec);
3056
/* Reset the lock bits on rec and release waiting transactions */
3058
lock_rec_reset_and_release_wait(rec);
3060
lock_mutex_exit_kernel();
3063
/*************************************************************************
3064
Stores on the page infimum record the explicit locks of another record.
3065
This function is used to store the lock state of a record when it is
3066
updated and the size of the record changes in the update. The record
3067
is moved in such an update, perhaps to another page. The infimum record
3068
acts as a dummy carrier record, taking care of lock releases while the
3069
actual record is being moved. */
3072
lock_rec_store_on_page_infimum(
3073
/*===========================*/
3074
page_t* page, /* in: page containing the record */
3075
rec_t* rec) /* in: record whose lock state is stored
3076
on the infimum record of the same page; lock
3077
bits are reset on the record */
3079
ut_ad(page == buf_frame_align(rec));
3081
lock_mutex_enter_kernel();
3083
lock_rec_move(page_get_infimum_rec(page), rec, page_is_comp(page));
3085
lock_mutex_exit_kernel();
3088
/*************************************************************************
3089
Restores the state of explicit lock requests on a single record, where the
3090
state was stored on the infimum of the page. */
3093
lock_rec_restore_from_page_infimum(
3094
/*===============================*/
3095
rec_t* rec, /* in: record whose lock state is restored */
3096
page_t* page) /* in: page (rec is not necessarily on this page)
3097
whose infimum stored the lock state; lock bits are
3098
reset on the infimum */
3101
lock_mutex_enter_kernel();
3102
comp = page_is_comp(page);
3103
ut_ad(!comp == !page_rec_is_comp(rec));
3105
lock_rec_move(rec, page_get_infimum_rec(page), comp);
3107
lock_mutex_exit_kernel();
3110
/*=========== DEADLOCK CHECKING ======================================*/
3112
/************************************************************************
3113
Checks if a lock request results in a deadlock. */
3116
lock_deadlock_occurs(
3117
/*=================*/
3118
/* out: TRUE if a deadlock was detected and we
3119
chose trx as a victim; FALSE if no deadlock, or
3120
there was a deadlock, but we chose other
3121
transaction(s) as victim(s) */
3122
lock_t* lock, /* in: lock the transaction is requesting */
3123
trx_t* trx) /* in: transaction */
3125
dict_table_t* table;
3126
dict_index_t* index;
3133
ut_ad(mutex_own(&kernel_mutex));
3135
/* We check that adding this trx to the waits-for graph
3136
does not produce a cycle. First mark all active transactions
3139
mark_trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
3142
mark_trx->deadlock_mark = 0;
3143
mark_trx = UT_LIST_GET_NEXT(trx_list, mark_trx);
3146
ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
3148
if (ret == LOCK_VICTIM_IS_OTHER) {
3149
/* We chose some other trx as a victim: retry if there still
3155
if (ret == LOCK_VICTIM_IS_START) {
3156
if (lock_get_type(lock) & LOCK_TABLE) {
3157
table = lock->un_member.tab_lock.table;
3160
index = lock->index;
3161
table = index->table;
3164
lock_deadlock_found = TRUE;
3166
fputs("*** WE ROLL BACK TRANSACTION (2)\n",
3167
lock_latest_err_file);
3175
/************************************************************************
3176
Looks recursively for a deadlock. */
3179
lock_deadlock_recursive(
3180
/*====================*/
3181
/* out: 0 if no deadlock found,
3182
LOCK_VICTIM_IS_START if there was a deadlock
3183
and we chose 'start' as the victim,
3184
LOCK_VICTIM_IS_OTHER if a deadlock
3185
was found and we chose some other trx as a
3186
victim: we must do the search again in this
3187
last case because there may be another
3189
trx_t* start, /* in: recursion starting point */
3190
trx_t* trx, /* in: a transaction waiting for a lock */
3191
lock_t* wait_lock, /* in: the lock trx is waiting to be granted */
3192
ulint* cost, /* in/out: number of calculation steps thus
3193
far: if this exceeds LOCK_MAX_N_STEPS_...
3194
we return LOCK_VICTIM_IS_START */
3195
ulint depth) /* in: recursion depth: if this exceeds
3196
LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
3197
return LOCK_VICTIM_IS_START */
3200
ulint bit_no = ULINT_UNDEFINED;
3207
ut_ad(mutex_own(&kernel_mutex));
3209
if (trx->deadlock_mark == 1) {
3210
/* We have already exhaustively searched the subtree starting
3220
if (lock_get_type(wait_lock) == LOCK_REC) {
3222
bit_no = lock_rec_find_set_bit(wait_lock);
3224
ut_a(bit_no != ULINT_UNDEFINED);
3227
/* Look at the locks ahead of wait_lock in the lock queue */
3230
if (lock_get_type(lock) & LOCK_TABLE) {
3232
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
3235
ut_ad(lock_get_type(lock) == LOCK_REC);
3236
ut_a(bit_no != ULINT_UNDEFINED);
3238
lock = lock_rec_get_prev(lock, bit_no);
3242
/* We can mark this subtree as searched */
3243
trx->deadlock_mark = 1;
3248
if (lock_has_to_wait(wait_lock, lock)) {
3251
= depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
3252
|| *cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK;
3254
lock_trx = lock->trx;
3256
if (lock_trx == start || too_far) {
3258
/* We came back to the recursion starting
3259
point: a deadlock detected; or we have
3260
searched the waits-for graph too long */
3262
FILE* ef = lock_latest_err_file;
3265
ut_print_timestamp(ef);
3267
fputs("\n*** (1) TRANSACTION:\n", ef);
3269
trx_print(ef, wait_lock->trx, 3000);
3271
fputs("*** (1) WAITING FOR THIS LOCK"
3272
" TO BE GRANTED:\n", ef);
3274
if (lock_get_type(wait_lock) == LOCK_REC) {
3275
lock_rec_print(ef, wait_lock);
3277
lock_table_print(ef, wait_lock);
3280
fputs("*** (2) TRANSACTION:\n", ef);
3282
trx_print(ef, lock->trx, 3000);
3284
fputs("*** (2) HOLDS THE LOCK(S):\n", ef);
3286
if (lock_get_type(lock) == LOCK_REC) {
3287
lock_rec_print(ef, lock);
3289
lock_table_print(ef, lock);
3292
fputs("*** (2) WAITING FOR THIS LOCK"
3293
" TO BE GRANTED:\n", ef);
3295
if (lock_get_type(start->wait_lock)
3297
lock_rec_print(ef, start->wait_lock);
3299
lock_table_print(ef, start->wait_lock);
3302
if (lock_print_waits) {
3303
fputs("Deadlock detected"
3304
" or too long search\n",
3307
#endif /* UNIV_DEBUG */
3310
fputs("TOO DEEP OR LONG SEARCH"
3311
" IN THE LOCK TABLE"
3312
" WAITS-FOR GRAPH\n", ef);
3314
return(LOCK_VICTIM_IS_START);
3317
if (trx_weight_cmp(wait_lock->trx,
3319
/* Our recursion starting point
3320
transaction is 'smaller', let us
3321
choose 'start' as the victim and roll
3324
return(LOCK_VICTIM_IS_START);
3327
lock_deadlock_found = TRUE;
3329
/* Let us choose the transaction of wait_lock
3330
as a victim to try to avoid deadlocking our
3331
recursion starting point transaction */
3333
fputs("*** WE ROLL BACK TRANSACTION (1)\n",
3336
wait_lock->trx->was_chosen_as_deadlock_victim
3339
lock_cancel_waiting_and_release(wait_lock);
3341
/* Since trx and wait_lock are no longer
3342
in the waits-for graph, we can return FALSE;
3343
note that our selective algorithm can choose
3344
several transactions as victims, but still
3345
we may end up rolling back also the recursion
3346
starting point transaction! */
3348
return(LOCK_VICTIM_IS_OTHER);
3351
if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
3353
/* Another trx ahead has requested lock in an
3354
incompatible mode, and is itself waiting for
3357
ret = lock_deadlock_recursive(
3359
lock_trx->wait_lock, cost, depth + 1);
3366
}/* end of the 'for (;;)'-loop */
3369
/*========================= TABLE LOCKS ==============================*/
3371
/*************************************************************************
3372
Creates a table lock object and adds it as the last in the lock queue
3373
of the table. Does NOT check for deadlocks or lock compatibility. */
3378
/* out, own: new lock object */
3379
dict_table_t* table, /* in: database table in dictionary cache */
3380
ulint type_mode,/* in: lock mode possibly ORed with
3382
trx_t* trx) /* in: trx */
3386
ut_ad(table && trx);
3387
ut_ad(mutex_own(&kernel_mutex));
3389
if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
3390
++table->n_waiting_or_granted_auto_inc_locks;
3393
if (type_mode == LOCK_AUTO_INC) {
3394
/* Only one trx can have the lock on the table
3395
at a time: we may use the memory preallocated
3396
to the table object */
3398
lock = table->auto_inc_lock;
3400
ut_a(trx->auto_inc_lock == NULL);
3401
trx->auto_inc_lock = lock;
3403
lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t));
3406
UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
3408
lock->type_mode = type_mode | LOCK_TABLE;
3411
lock->un_member.tab_lock.table = table;
3413
UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
3415
if (type_mode & LOCK_WAIT) {
3417
lock_set_lock_and_trx_wait(lock, trx);
3423
/*****************************************************************
3424
Removes a table lock request from the queue and the trx list of locks;
3425
this is a low-level function which does NOT check if waiting requests
3426
can now be granted. */
3429
lock_table_remove_low(
3430
/*==================*/
3431
lock_t* lock) /* in: table lock */
3433
dict_table_t* table;
3436
ut_ad(mutex_own(&kernel_mutex));
3438
table = lock->un_member.tab_lock.table;
3441
if (lock == trx->auto_inc_lock) {
3442
trx->auto_inc_lock = NULL;
3444
ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
3445
--table->n_waiting_or_granted_auto_inc_locks;
3448
UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock);
3449
UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
3452
/*************************************************************************
3453
Enqueues a waiting request for a table lock which cannot be granted
3454
immediately. Checks for deadlocks. */
3457
lock_table_enqueue_waiting(
3458
/*=======================*/
3459
/* out: DB_LOCK_WAIT, DB_DEADLOCK, or
3460
DB_QUE_THR_SUSPENDED, or DB_SUCCESS;
3461
DB_SUCCESS means that there was a deadlock,
3462
but another transaction was chosen as a
3463
victim, and we got the lock immediately:
3464
no need to wait then */
3465
ulint mode, /* in: lock mode this transaction is
3467
dict_table_t* table, /* in: table */
3468
que_thr_t* thr) /* in: query thread */
3473
ut_ad(mutex_own(&kernel_mutex));
3475
/* Test if there already is some other reason to suspend thread:
3476
we do not enqueue a lock request if the query thread should be
3479
if (que_thr_stop(thr)) {
3482
return(DB_QUE_THR_SUSPENDED);
3485
trx = thr_get_trx(thr);
3487
if (trx->dict_operation) {
3488
ut_print_timestamp(stderr);
3489
fputs(" InnoDB: Error: a table lock wait happens"
3490
" in a dictionary operation!\n"
3491
"InnoDB: Table name ", stderr);
3492
ut_print_name(stderr, trx, TRUE, table->name);
3494
"InnoDB: Submit a detailed bug report"
3495
" to http://bugs.mysql.com\n",
3499
/* Enqueue the lock request that will wait to be granted */
3501
lock = lock_table_create(table, mode | LOCK_WAIT, trx);
3503
/* Check if a deadlock occurs: if yes, remove the lock request and
3504
return an error code */
3506
if (lock_deadlock_occurs(lock, trx)) {
3508
lock_reset_lock_and_trx_wait(lock);
3509
lock_table_remove_low(lock);
3511
return(DB_DEADLOCK);
3514
if (trx->wait_lock == NULL) {
3515
/* Deadlock resolution chose another transaction as a victim,
3516
and we accidentally got our lock granted! */
3521
trx->que_state = TRX_QUE_LOCK_WAIT;
3522
trx->was_chosen_as_deadlock_victim = FALSE;
3523
trx->wait_started = time(NULL);
3525
ut_a(que_thr_stop(thr));
3527
return(DB_LOCK_WAIT);
3530
/*************************************************************************
3531
Checks if other transactions have an incompatible mode lock request in
3535
lock_table_other_has_incompatible(
3536
/*==============================*/
3537
trx_t* trx, /* in: transaction, or NULL if all
3538
transactions should be included */
3539
ulint wait, /* in: LOCK_WAIT if also waiting locks are
3540
taken into account, or 0 if not */
3541
dict_table_t* table, /* in: table */
3542
ulint mode) /* in: lock mode */
3546
ut_ad(mutex_own(&kernel_mutex));
3548
lock = UT_LIST_GET_LAST(table->locks);
3550
while (lock != NULL) {
3552
if ((lock->trx != trx)
3553
&& (!lock_mode_compatible(lock_get_mode(lock), mode))
3554
&& (wait || !(lock_get_wait(lock)))) {
3559
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
3565
/*************************************************************************
3566
Locks the specified database table in the mode given. If the lock cannot
3567
be granted immediately, the query thread is put to wait. */
3572
/* out: DB_SUCCESS, DB_LOCK_WAIT,
3573
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
3574
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
3576
dict_table_t* table, /* in: database table in dictionary cache */
3577
ulint mode, /* in: lock mode */
3578
que_thr_t* thr) /* in: query thread */
3583
ut_ad(table && thr);
3585
if (flags & BTR_NO_LOCKING_FLAG) {
3592
trx = thr_get_trx(thr);
3594
lock_mutex_enter_kernel();
3596
/* Look for stronger locks the same trx already has on the table */
3598
if (lock_table_has(trx, table, mode)) {
3600
lock_mutex_exit_kernel();
3605
/* We have to check if the new lock is compatible with any locks
3606
other transactions have in the table lock queue. */
3608
if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
3610
/* Another trx has a request on the table in an incompatible
3611
mode: this trx may have to wait */
3613
err = lock_table_enqueue_waiting(mode | flags, table, thr);
3615
lock_mutex_exit_kernel();
3620
lock_table_create(table, mode | flags, trx);
3622
ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
3624
lock_mutex_exit_kernel();
3629
/*************************************************************************
3630
Checks if there are any locks set on the table. */
3635
/* out: TRUE if there are lock(s) */
3636
dict_table_t* table) /* in: database table in dictionary cache */
3642
lock_mutex_enter_kernel();
3644
if (UT_LIST_GET_LAST(table->locks)) {
3650
lock_mutex_exit_kernel();
3655
/*************************************************************************
3656
Checks if a waiting table lock request still has to wait in a queue. */
3659
lock_table_has_to_wait_in_queue(
3660
/*============================*/
3661
/* out: TRUE if still has to wait */
3662
lock_t* wait_lock) /* in: waiting table lock */
3664
dict_table_t* table;
3667
ut_ad(lock_get_wait(wait_lock));
3669
table = wait_lock->un_member.tab_lock.table;
3671
lock = UT_LIST_GET_FIRST(table->locks);
3673
while (lock != wait_lock) {
3675
if (lock_has_to_wait(wait_lock, lock)) {
3680
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
3686
/*****************************************************************
3687
Removes a table lock request, waiting or granted, from the queue and grants
3688
locks to other transactions in the queue, if they now are entitled to a
3694
lock_t* in_lock)/* in: table lock object; transactions waiting
3695
behind will get their lock requests granted, if
3696
they are now qualified to it */
3700
ut_ad(mutex_own(&kernel_mutex));
3701
ut_a(lock_get_type(in_lock) == LOCK_TABLE);
3703
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
3705
lock_table_remove_low(in_lock);
3707
/* Check if waiting locks in the queue can now be granted: grant
3708
locks if there are no conflicting locks ahead. */
3710
while (lock != NULL) {
3712
if (lock_get_wait(lock)
3713
&& !lock_table_has_to_wait_in_queue(lock)) {
3715
/* Grant the lock */
3719
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
3723
/*=========================== LOCK RELEASE ==============================*/
3725
/*****************************************************************
3726
Removes a granted record lock of a transaction from the queue and grants
3727
locks to other transactions waiting in the queue if they now are entitled
3733
trx_t* trx, /* in: transaction that has set a record
3735
rec_t* rec, /* in: record */
3736
ulint lock_mode) /* in: LOCK_S or LOCK_X */
3739
lock_t* release_lock = NULL;
3744
mutex_enter(&kernel_mutex);
3746
heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec));
3748
lock = lock_rec_get_first(rec);
3750
/* Find the last lock with the same lock_mode and transaction
3753
while (lock != NULL) {
3754
if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
3755
release_lock = lock;
3756
ut_a(!lock_get_wait(lock));
3759
lock = lock_rec_get_next(rec, lock);
3762
/* If a record lock is found, release the record lock */
3764
if (UNIV_LIKELY(release_lock != NULL)) {
3765
lock_rec_reset_nth_bit(release_lock, heap_no);
3767
mutex_exit(&kernel_mutex);
3768
ut_print_timestamp(stderr);
3770
" InnoDB: Error: unlock row could not"
3771
" find a %lu mode lock on the record\n",
3777
/* Check if we can now grant waiting lock requests */
3779
lock = lock_rec_get_first(rec);
3781
while (lock != NULL) {
3782
if (lock_get_wait(lock)
3783
&& !lock_rec_has_to_wait_in_queue(lock)) {
3785
/* Grant the lock */
3789
lock = lock_rec_get_next(rec, lock);
3792
mutex_exit(&kernel_mutex);
3795
/*************************************************************************
3796
Releases a table lock.
3797
Releases possible other transactions waiting for this lock. */
3802
lock_t* lock) /* in: lock */
3804
mutex_enter(&kernel_mutex);
3806
lock_table_dequeue(lock);
3808
mutex_exit(&kernel_mutex);
3811
/*************************************************************************
3812
Releases an auto-inc lock a transaction possibly has on a table.
3813
Releases possible other transactions waiting for this lock. */
3816
lock_table_unlock_auto_inc(
3817
/*=======================*/
3818
trx_t* trx) /* in: transaction */
3820
if (trx->auto_inc_lock) {
3821
mutex_enter(&kernel_mutex);
3823
lock_table_dequeue(trx->auto_inc_lock);
3825
mutex_exit(&kernel_mutex);
3829
/*************************************************************************
3830
Releases transaction locks, and releases possible other transactions waiting
3831
because of these locks. */
3834
lock_release_off_kernel(
3835
/*====================*/
3836
trx_t* trx) /* in: transaction */
3838
dict_table_t* table;
3842
ut_ad(mutex_own(&kernel_mutex));
3844
lock = UT_LIST_GET_LAST(trx->trx_locks);
3848
while (lock != NULL) {
3852
if (lock_get_type(lock) == LOCK_REC) {
3854
lock_rec_dequeue_from_page(lock);
3856
ut_ad(lock_get_type(lock) & LOCK_TABLE);
3858
if (lock_get_mode(lock) != LOCK_IS
3859
&& 0 != ut_dulint_cmp(trx->undo_no,
3862
/* The trx may have modified the table. We
3863
block the use of the MySQL query cache for
3864
all currently active transactions. */
3866
table = lock->un_member.tab_lock.table;
3868
table->query_cache_inv_trx_id
3869
= trx_sys->max_trx_id;
3872
lock_table_dequeue(lock);
3875
if (count == LOCK_RELEASE_KERNEL_INTERVAL) {
3876
/* Release the kernel mutex for a while, so that we
3877
do not monopolize it */
3879
lock_mutex_exit_kernel();
3881
lock_mutex_enter_kernel();
3886
lock = UT_LIST_GET_LAST(trx->trx_locks);
3889
mem_heap_empty(trx->lock_heap);
3891
ut_a(trx->auto_inc_lock == NULL);
3894
/*************************************************************************
3895
Cancels a waiting lock request and releases possible other transactions
3896
waiting behind it. */
3899
lock_cancel_waiting_and_release(
3900
/*============================*/
3901
lock_t* lock) /* in: waiting lock request */
3903
ut_ad(mutex_own(&kernel_mutex));
3905
if (lock_get_type(lock) == LOCK_REC) {
3907
lock_rec_dequeue_from_page(lock);
3909
ut_ad(lock_get_type(lock) & LOCK_TABLE);
3911
lock_table_dequeue(lock);
3914
/* Reset the wait flag and the back pointer to lock in trx */
3916
lock_reset_lock_and_trx_wait(lock);
3918
/* The following function releases the trx from lock wait */
3920
trx_end_lock_wait(lock->trx);
3923
/*************************************************************************
3924
Resets all record and table locks of a transaction on a table to be dropped.
3925
No lock is allowed to be a wait lock. */
3928
lock_reset_all_on_table_for_trx(
3929
/*============================*/
3930
dict_table_t* table, /* in: table to be dropped */
3931
trx_t* trx) /* in: a transaction */
3936
ut_ad(mutex_own(&kernel_mutex));
3938
lock = UT_LIST_GET_LAST(trx->trx_locks);
3940
while (lock != NULL) {
3941
prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
3943
if (lock_get_type(lock) == LOCK_REC
3944
&& lock->index->table == table) {
3945
ut_a(!lock_get_wait(lock));
3947
lock_rec_discard(lock);
3948
} else if (lock_get_type(lock) & LOCK_TABLE
3949
&& lock->un_member.tab_lock.table == table) {
3951
ut_a(!lock_get_wait(lock));
3953
lock_table_remove_low(lock);
3960
/*************************************************************************
3961
Resets all locks, both table and record locks, on a table to be dropped.
3962
No lock is allowed to be a wait lock. */
3965
lock_reset_all_on_table(
3966
/*====================*/
3967
dict_table_t* table) /* in: table to be dropped */
3971
mutex_enter(&kernel_mutex);
3973
lock = UT_LIST_GET_FIRST(table->locks);
3976
ut_a(!lock_get_wait(lock));
3978
lock_reset_all_on_table_for_trx(table, lock->trx);
3980
lock = UT_LIST_GET_FIRST(table->locks);
3983
mutex_exit(&kernel_mutex);
3986
/*===================== VALIDATION AND DEBUGGING ====================*/
3988
/*************************************************************************
3989
Prints info of a table lock. */
3994
FILE* file, /* in: file where to print */
3995
lock_t* lock) /* in: table type lock */
3997
ut_ad(mutex_own(&kernel_mutex));
3998
ut_a(lock_get_type(lock) == LOCK_TABLE);
4000
fputs("TABLE LOCK table ", file);
4001
ut_print_name(file, lock->trx, TRUE,
4002
lock->un_member.tab_lock.table->name);
4003
fprintf(file, " trx id %lu %lu",
4004
(ulong) (lock->trx)->id.high, (ulong) (lock->trx)->id.low);
4006
if (lock_get_mode(lock) == LOCK_S) {
4007
fputs(" lock mode S", file);
4008
} else if (lock_get_mode(lock) == LOCK_X) {
4009
fputs(" lock mode X", file);
4010
} else if (lock_get_mode(lock) == LOCK_IS) {
4011
fputs(" lock mode IS", file);
4012
} else if (lock_get_mode(lock) == LOCK_IX) {
4013
fputs(" lock mode IX", file);
4014
} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
4015
fputs(" lock mode AUTO-INC", file);
4017
fprintf(file, " unknown lock mode %lu",
4018
(ulong) lock_get_mode(lock));
4021
if (lock_get_wait(lock)) {
4022
fputs(" waiting", file);
4028
/*************************************************************************
4029
Prints info of a record lock. */
4034
FILE* file, /* in: file where to print */
4035
lock_t* lock) /* in: record type lock */
4042
mem_heap_t* heap = NULL;
4043
ulint offsets_[REC_OFFS_NORMAL_SIZE];
4044
ulint* offsets = offsets_;
4045
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
4047
ut_ad(mutex_own(&kernel_mutex));
4048
ut_a(lock_get_type(lock) == LOCK_REC);
4050
space = lock->un_member.rec_lock.space;
4051
page_no = lock->un_member.rec_lock.page_no;
4053
fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ",
4054
(ulong) space, (ulong) page_no,
4055
(ulong) lock_rec_get_n_bits(lock));
4056
dict_index_name_print(file, lock->trx, lock->index);
4057
fprintf(file, " trx id %lu %lu",
4058
(ulong) (lock->trx)->id.high,
4059
(ulong) (lock->trx)->id.low);
4061
if (lock_get_mode(lock) == LOCK_S) {
4062
fputs(" lock mode S", file);
4063
} else if (lock_get_mode(lock) == LOCK_X) {
4064
fputs(" lock_mode X", file);
4069
if (lock_rec_get_gap(lock)) {
4070
fputs(" locks gap before rec", file);
4073
if (lock_rec_get_rec_not_gap(lock)) {
4074
fputs(" locks rec but not gap", file);
4077
if (lock_rec_get_insert_intention(lock)) {
4078
fputs(" insert intention", file);
4081
if (lock_get_wait(lock)) {
4082
fputs(" waiting", file);
4089
/* If the page is not in the buffer pool, we cannot load it
4090
because we have the kernel mutex and ibuf operations would
4091
break the latching order */
4093
page = buf_page_get_gen(space, page_no, RW_NO_LATCH,
4094
NULL, BUF_GET_IF_IN_POOL,
4095
__FILE__, __LINE__, &mtr);
4097
page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr);
4100
/* Let us try to get an X-latch. If the current thread
4101
is holding an X-latch on the page, we cannot get an
4104
page = buf_page_get_nowait(space, page_no, RW_X_LATCH,
4110
#ifdef UNIV_SYNC_DEBUG
4111
buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
4112
#endif /* UNIV_SYNC_DEBUG */
4115
for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
4117
if (lock_rec_get_nth_bit(lock, i)) {
4119
fprintf(file, "Record lock, heap no %lu ", (ulong) i);
4123
= page_find_rec_with_heap_no(page, i);
4124
offsets = rec_get_offsets(
4125
rec, lock->index, offsets,
4126
ULINT_UNDEFINED, &heap);
4127
rec_print_new(file, rec, offsets);
4135
if (UNIV_LIKELY_NULL(heap)) {
4136
mem_heap_free(heap);
4140
#ifndef UNIV_HOTBACKUP
4141
/*************************************************************************
4142
Calculates the number of record lock structs in the record lock hash table. */
4145
lock_get_n_rec_locks(void)
4146
/*======================*/
4152
ut_ad(mutex_own(&kernel_mutex));
4154
for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
4156
lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
4161
lock = HASH_GET_NEXT(hash, lock);
4168
/*************************************************************************
4169
Prints info of locks for all transactions. */
4172
lock_print_info_summary(
4173
/*====================*/
4174
FILE* file) /* in: file where to print */
4176
/* We must protect the MySQL thd->query field with a MySQL mutex, and
4177
because the MySQL mutex must be reserved before the kernel_mutex of
4178
InnoDB, we call innobase_mysql_prepare_print_arbitrary_thd() here. */
4180
innobase_mysql_prepare_print_arbitrary_thd();
4181
lock_mutex_enter_kernel();
4183
if (lock_deadlock_found) {
4184
fputs("------------------------\n"
4185
"LATEST DETECTED DEADLOCK\n"
4186
"------------------------\n", file);
4188
ut_copy_file(file, lock_latest_err_file);
4191
fputs("------------\n"
4193
"------------\n", file);
4195
fprintf(file, "Trx id counter %lu %lu\n",
4196
(ulong) ut_dulint_get_high(trx_sys->max_trx_id),
4197
(ulong) ut_dulint_get_low(trx_sys->max_trx_id));
4200
"Purge done for trx's n:o < %lu %lu undo n:o < %lu %lu\n",
4201
(ulong) ut_dulint_get_high(purge_sys->purge_trx_no),
4202
(ulong) ut_dulint_get_low(purge_sys->purge_trx_no),
4203
(ulong) ut_dulint_get_high(purge_sys->purge_undo_no),
4204
(ulong) ut_dulint_get_low(purge_sys->purge_undo_no));
4207
"History list length %lu\n",
4208
(ulong) trx_sys->rseg_history_len);
4211
"Total number of lock structs in row lock hash table %lu\n",
4212
(ulong) lock_get_n_rec_locks());
4215
/*************************************************************************
4216
Prints info of locks for each transaction. */
4219
lock_print_info_all_transactions(
4220
/*=============================*/
4221
FILE* file) /* in: file where to print */
4227
ibool load_page_first = TRUE;
4234
fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
4236
/* First print info on non-active transactions */
4238
trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
4241
if (trx->conc_state == TRX_NOT_STARTED) {
4243
trx_print(file, trx, 600);
4246
trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
4250
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
4254
/* Since we temporarily release the kernel mutex when
4255
reading a database page in below, variable trx may be
4256
obsolete now and we must loop through the trx list to
4257
get probably the same trx, or some other trx. */
4259
while (trx && (i < nth_trx)) {
4260
trx = UT_LIST_GET_NEXT(trx_list, trx);
4265
lock_mutex_exit_kernel();
4266
innobase_mysql_end_print_arbitrary_thd();
4268
ut_ad(lock_validate());
4273
if (nth_lock == 0) {
4275
trx_print(file, trx, 600);
4277
if (trx->read_view) {
4279
"Trx read view will not see trx with"
4280
" id >= %lu %lu, sees < %lu %lu\n",
4281
(ulong) ut_dulint_get_high(
4282
trx->read_view->low_limit_id),
4283
(ulong) ut_dulint_get_low(
4284
trx->read_view->low_limit_id),
4285
(ulong) ut_dulint_get_high(
4286
trx->read_view->up_limit_id),
4287
(ulong) ut_dulint_get_low(
4288
trx->read_view->up_limit_id));
4291
if (trx->que_state == TRX_QUE_LOCK_WAIT) {
4293
"------- TRX HAS BEEN WAITING %lu SEC"
4294
" FOR THIS LOCK TO BE GRANTED:\n",
4295
(ulong) difftime(time(NULL),
4296
trx->wait_started));
4298
if (lock_get_type(trx->wait_lock) == LOCK_REC) {
4299
lock_rec_print(file, trx->wait_lock);
4301
lock_table_print(file, trx->wait_lock);
4304
fputs("------------------\n", file);
4308
if (!srv_print_innodb_lock_monitor) {
4315
/* Look at the note about the trx loop above why we loop here:
4316
lock may be an obsolete pointer now. */
4318
lock = UT_LIST_GET_FIRST(trx->trx_locks);
4320
while (lock && (i < nth_lock)) {
4321
lock = UT_LIST_GET_NEXT(trx_locks, lock);
4332
if (lock_get_type(lock) == LOCK_REC) {
4333
space = lock->un_member.rec_lock.space;
4334
page_no = lock->un_member.rec_lock.page_no;
4336
if (load_page_first) {
4337
lock_mutex_exit_kernel();
4338
innobase_mysql_end_print_arbitrary_thd();
4342
page = buf_page_get_with_no_latch(
4343
space, page_no, &mtr);
4347
load_page_first = FALSE;
4349
innobase_mysql_prepare_print_arbitrary_thd();
4350
lock_mutex_enter_kernel();
4355
lock_rec_print(file, lock);
4357
ut_ad(lock_get_type(lock) & LOCK_TABLE);
4359
lock_table_print(file, lock);
4362
load_page_first = TRUE;
4366
if (nth_lock >= 10) {
4367
fputs("10 LOCKS PRINTED FOR THIS TRX:"
4368
" SUPPRESSING FURTHER PRINTS\n",
4380
/*************************************************************************
4381
Validates the lock queue on a table. */
4384
lock_table_queue_validate(
4385
/*======================*/
4386
/* out: TRUE if ok */
4387
dict_table_t* table) /* in: table */
4391
ut_ad(mutex_own(&kernel_mutex));
4393
lock = UT_LIST_GET_FIRST(table->locks);
4396
ut_a(((lock->trx)->conc_state == TRX_ACTIVE)
4397
|| ((lock->trx)->conc_state == TRX_PREPARED)
4398
|| ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY));
4400
if (!lock_get_wait(lock)) {
4402
ut_a(!lock_table_other_has_incompatible(
4403
lock->trx, 0, table,
4404
lock_get_mode(lock)));
4407
ut_a(lock_table_has_to_wait_in_queue(lock));
4410
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
4416
/*************************************************************************
4417
Validates the lock queue on a single record. */
4420
lock_rec_queue_validate(
4421
/*====================*/
4422
/* out: TRUE if ok */
4423
rec_t* rec, /* in: record to look at */
4424
dict_index_t* index, /* in: index, or NULL if not known */
4425
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
4431
ut_ad(rec_offs_validate(rec, index, offsets));
4432
ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
4434
lock_mutex_enter_kernel();
4436
if (!page_rec_is_user_rec(rec)) {
4438
lock = lock_rec_get_first(rec);
4441
switch(lock->trx->conc_state) {
4444
case TRX_COMMITTED_IN_MEMORY:
4450
ut_a(trx_in_trx_list(lock->trx));
4452
if (lock_get_wait(lock)) {
4453
ut_a(lock_rec_has_to_wait_in_queue(lock));
4457
ut_a(lock->index == index);
4460
lock = lock_rec_get_next(rec, lock);
4463
lock_mutex_exit_kernel();
4468
if (index && (index->type & DICT_CLUSTERED)) {
4470
impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
4472
if (impl_trx && lock_rec_other_has_expl_req(
4473
LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) {
4475
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
4480
if (index && !(index->type & DICT_CLUSTERED)) {
4482
/* The kernel mutex may get released temporarily in the
4483
next function call: we have to release lock table mutex
4484
to obey the latching order */
4486
impl_trx = lock_sec_rec_some_has_impl_off_kernel(
4487
rec, index, offsets);
4489
if (impl_trx && lock_rec_other_has_expl_req(
4490
LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) {
4492
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
4497
lock = lock_rec_get_first(rec);
4500
ut_a(lock->trx->conc_state == TRX_ACTIVE
4501
|| lock->trx->conc_state == TRX_PREPARED
4502
|| lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
4503
ut_a(trx_in_trx_list(lock->trx));
4506
ut_a(lock->index == index);
4509
if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
4513
if (lock_get_mode(lock) == LOCK_S) {
4518
ut_a(!lock_rec_other_has_expl_req(
4519
mode, 0, 0, rec, lock->trx));
4521
} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
4523
ut_a(lock_rec_has_to_wait_in_queue(lock));
4526
lock = lock_rec_get_next(rec, lock);
4529
lock_mutex_exit_kernel();
4534
/*************************************************************************
4535
Validates the record lock queues on a page. */
4538
lock_rec_validate_page(
4539
/*===================*/
4540
/* out: TRUE if ok */
4541
ulint space, /* in: space id */
4542
ulint page_no)/* in: page number */
4544
dict_index_t* index;
4552
mem_heap_t* heap = NULL;
4553
ulint offsets_[REC_OFFS_NORMAL_SIZE];
4554
ulint* offsets = offsets_;
4555
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
4557
ut_ad(!mutex_own(&kernel_mutex));
4561
page = buf_page_get(space, page_no, RW_X_LATCH, &mtr);
4562
#ifdef UNIV_SYNC_DEBUG
4563
buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
4564
#endif /* UNIV_SYNC_DEBUG */
4566
lock_mutex_enter_kernel();
4568
lock = lock_rec_get_first_on_page_addr(space, page_no);
4574
for (i = 0; i < nth_lock; i++) {
4576
lock = lock_rec_get_next_on_page(lock);
4583
ut_a(trx_in_trx_list(lock->trx));
4584
ut_a(lock->trx->conc_state == TRX_ACTIVE
4585
|| lock->trx->conc_state == TRX_PREPARED
4586
|| lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
4588
for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
4590
if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
4592
index = lock->index;
4593
rec = page_find_rec_with_heap_no(page, i);
4594
offsets = rec_get_offsets(rec, index, offsets,
4595
ULINT_UNDEFINED, &heap);
4598
"Validating %lu %lu\n",
4599
(ulong) space, (ulong) page_no);
4601
lock_mutex_exit_kernel();
4603
lock_rec_queue_validate(rec, index, offsets);
4605
lock_mutex_enter_kernel();
4619
lock_mutex_exit_kernel();
4623
if (UNIV_LIKELY_NULL(heap)) {
4624
mem_heap_free(heap);
4629
/*************************************************************************
4630
Validates the lock system. */
4635
/* out: TRUE if ok */
4644
lock_mutex_enter_kernel();
4646
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
4649
lock = UT_LIST_GET_FIRST(trx->trx_locks);
4652
if (lock_get_type(lock) & LOCK_TABLE) {
4654
lock_table_queue_validate(
4655
lock->un_member.tab_lock.table);
4658
lock = UT_LIST_GET_NEXT(trx_locks, lock);
4661
trx = UT_LIST_GET_NEXT(trx_list, trx);
4664
for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
4666
limit = ut_dulint_zero;
4669
lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
4672
ut_a(trx_in_trx_list(lock->trx));
4674
space = lock->un_member.rec_lock.space;
4675
page_no = lock->un_member.rec_lock.page_no;
4678
ut_dulint_create(space, page_no),
4683
lock = HASH_GET_NEXT(hash, lock);
4691
lock_mutex_exit_kernel();
4693
lock_rec_validate_page(space, page_no);
4695
lock_mutex_enter_kernel();
4697
limit = ut_dulint_create(space, page_no + 1);
4701
lock_mutex_exit_kernel();
4705
#endif /* !UNIV_HOTBACKUP */
4706
/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
4708
/*************************************************************************
4709
Checks if locks of other transactions prevent an immediate insert of
4710
a record. If they do, first tests if the query thread should anyway
4711
be suspended for some reason; if not, then puts the transaction and
4712
the query thread to the lock wait state and inserts a waiting request
4713
for a gap x-lock to the lock queue. */
4716
lock_rec_insert_check_and_lock(
4717
/*===========================*/
4718
/* out: DB_SUCCESS, DB_LOCK_WAIT,
4719
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
4720
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
4722
rec_t* rec, /* in: record after which to insert */
4723
dict_index_t* index, /* in: index */
4724
que_thr_t* thr, /* in: query thread */
4725
ibool* inherit)/* out: set to TRUE if the new inserted
4726
record maybe should inherit LOCK_GAP type
4727
locks from the successor record */
4734
if (flags & BTR_NO_LOCKING_FLAG) {
4741
trx = thr_get_trx(thr);
4742
next_rec = page_rec_get_next(rec);
4746
lock_mutex_enter_kernel();
4748
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
4750
lock = lock_rec_get_first(next_rec);
4753
/* We optimize CPU time usage in the simplest case */
4755
lock_mutex_exit_kernel();
4757
if (!(index->type & DICT_CLUSTERED)) {
4759
/* Update the page max trx id field */
4760
page_update_max_trx_id(buf_frame_align(rec),
4761
thr_get_trx(thr)->id);
4769
/* If another transaction has an explicit lock request which locks
4770
the gap, waiting or granted, on the successor, the insert has to wait.
4772
An exception is the case where the lock by the another transaction
4773
is a gap type lock which it placed to wait for its turn to insert. We
4774
do not consider that kind of a lock conflicting with our insert. This
4775
eliminates an unnecessary deadlock which resulted when 2 transactions
4776
had to wait for their insert. Both had waiting gap type lock requests
4777
on the successor, which produced an unnecessary deadlock. */
4779
if (lock_rec_other_has_conflicting(
4780
LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, next_rec,
4783
/* Note that we may get DB_SUCCESS also here! */
4784
err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
4785
| LOCK_INSERT_INTENTION,
4786
next_rec, index, thr);
4791
lock_mutex_exit_kernel();
4793
if (!(index->type & DICT_CLUSTERED) && (err == DB_SUCCESS)) {
4795
/* Update the page max trx id field */
4796
page_update_max_trx_id(buf_frame_align(rec),
4797
thr_get_trx(thr)->id);
4802
mem_heap_t* heap = NULL;
4803
ulint offsets_[REC_OFFS_NORMAL_SIZE];
4804
const ulint* offsets;
4805
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
4807
offsets = rec_get_offsets(next_rec, index, offsets_,
4808
ULINT_UNDEFINED, &heap);
4809
ut_ad(lock_rec_queue_validate(next_rec, index, offsets));
4810
if (UNIV_LIKELY_NULL(heap)) {
4811
mem_heap_free(heap);
4814
#endif /* UNIV_DEBUG */
4819
/*************************************************************************
4820
If a transaction has an implicit x-lock on a record, but no explicit x-lock
4821
set on the record, sets one for it. NOTE that in the case of a secondary
4822
index, the kernel mutex may get temporarily released. */
4825
lock_rec_convert_impl_to_expl(
4826
/*==========================*/
4827
rec_t* rec, /* in: user record on page */
4828
dict_index_t* index, /* in: index of record */
4829
const ulint* offsets)/* in: rec_get_offsets(rec, index) */
4833
ut_ad(mutex_own(&kernel_mutex));
4834
ut_ad(page_rec_is_user_rec(rec));
4835
ut_ad(rec_offs_validate(rec, index, offsets));
4836
ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
4838
if (index->type & DICT_CLUSTERED) {
4839
impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
4841
impl_trx = lock_sec_rec_some_has_impl_off_kernel(
4842
rec, index, offsets);
4846
/* If the transaction has no explicit x-lock set on the
4847
record, set one for it */
4849
if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec,
4852
lock_rec_add_to_queue(
4853
LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
4854
rec, index, impl_trx);
4859
/*************************************************************************
4860
Checks if locks of other transactions prevent an immediate modify (update,
4861
delete mark, or delete unmark) of a clustered index record. If they do,
4862
first tests if the query thread should anyway be suspended for some
4863
reason; if not, then puts the transaction and the query thread to the
4864
lock wait state and inserts a waiting request for a record x-lock to the
4868
lock_clust_rec_modify_check_and_lock(
4869
/*=================================*/
4870
/* out: DB_SUCCESS, DB_LOCK_WAIT,
4871
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
4872
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
4874
rec_t* rec, /* in: record which should be modified */
4875
dict_index_t* index, /* in: clustered index */
4876
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
4877
que_thr_t* thr) /* in: query thread */
4881
ut_ad(rec_offs_validate(rec, index, offsets));
4882
ut_ad(index->type & DICT_CLUSTERED);
4884
if (flags & BTR_NO_LOCKING_FLAG) {
4889
lock_mutex_enter_kernel();
4891
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
4893
/* If a transaction has no explicit x-lock set on the record, set one
4896
lock_rec_convert_impl_to_expl(rec, index, offsets);
4898
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
4900
lock_mutex_exit_kernel();
4902
ut_ad(lock_rec_queue_validate(rec, index, offsets));
4907
/*************************************************************************
4908
Checks if locks of other transactions prevent an immediate modify (delete
4909
mark or delete unmark) of a secondary index record. */
4912
lock_sec_rec_modify_check_and_lock(
4913
/*===============================*/
4914
/* out: DB_SUCCESS, DB_LOCK_WAIT,
4915
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
4916
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
4918
rec_t* rec, /* in: record which should be modified;
4919
NOTE: as this is a secondary index, we
4920
always have to modify the clustered index
4921
record first: see the comment below */
4922
dict_index_t* index, /* in: secondary index */
4923
que_thr_t* thr) /* in: query thread */
4927
if (flags & BTR_NO_LOCKING_FLAG) {
4932
ut_ad(!(index->type & DICT_CLUSTERED));
4934
/* Another transaction cannot have an implicit lock on the record,
4935
because when we come here, we already have modified the clustered
4936
index record, and this would not have been possible if another active
4937
transaction had modified this secondary index record. */
4939
lock_mutex_enter_kernel();
4941
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
4943
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr);
4945
lock_mutex_exit_kernel();
4949
mem_heap_t* heap = NULL;
4950
ulint offsets_[REC_OFFS_NORMAL_SIZE];
4951
const ulint* offsets;
4952
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
4954
offsets = rec_get_offsets(rec, index, offsets_,
4955
ULINT_UNDEFINED, &heap);
4956
ut_ad(lock_rec_queue_validate(rec, index, offsets));
4957
if (UNIV_LIKELY_NULL(heap)) {
4958
mem_heap_free(heap);
4961
#endif /* UNIV_DEBUG */
4963
if (err == DB_SUCCESS) {
4964
/* Update the page max trx id field */
4966
page_update_max_trx_id(buf_frame_align(rec),
4967
thr_get_trx(thr)->id);
4973
/*************************************************************************
4974
Like the counterpart for a clustered index below, but now we read a
4975
secondary index record. */
4978
lock_sec_rec_read_check_and_lock(
4979
/*=============================*/
4980
/* out: DB_SUCCESS, DB_LOCK_WAIT,
4981
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
4982
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
4984
rec_t* rec, /* in: user record or page supremum record
4985
which should be read or passed over by a read
4987
dict_index_t* index, /* in: secondary index */
4988
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
4989
ulint mode, /* in: mode of the lock which the read cursor
4990
should set on records: LOCK_S or LOCK_X; the
4991
latter is possible in SELECT FOR UPDATE */
4992
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
4994
que_thr_t* thr) /* in: query thread */
4998
ut_ad(!(index->type & DICT_CLUSTERED));
4999
ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
5000
ut_ad(rec_offs_validate(rec, index, offsets));
5002
if (flags & BTR_NO_LOCKING_FLAG) {
5007
lock_mutex_enter_kernel();
5009
ut_ad(mode != LOCK_X
5010
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5011
ut_ad(mode != LOCK_S
5012
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
5014
/* Some transaction may have an implicit x-lock on the record only
5015
if the max trx id for the page >= min trx id for the trx list or a
5016
database recovery is running. */
5018
if (((ut_dulint_cmp(page_get_max_trx_id(buf_frame_align(rec)),
5019
trx_list_get_min_trx_id()) >= 0)
5020
|| recv_recovery_is_on())
5021
&& !page_rec_is_supremum(rec)) {
5023
lock_rec_convert_impl_to_expl(rec, index, offsets);
5026
err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
5028
lock_mutex_exit_kernel();
5030
ut_ad(lock_rec_queue_validate(rec, index, offsets));
5035
/*************************************************************************
5036
Checks if locks of other transactions prevent an immediate read, or passing
5037
over by a read cursor, of a clustered index record. If they do, first tests
5038
if the query thread should anyway be suspended for some reason; if not, then
5039
puts the transaction and the query thread to the lock wait state and inserts a
5040
waiting request for a record lock to the lock queue. Sets the requested mode
5041
lock on the record. */
5044
lock_clust_rec_read_check_and_lock(
5045
/*===============================*/
5046
/* out: DB_SUCCESS, DB_LOCK_WAIT,
5047
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
5048
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
5050
rec_t* rec, /* in: user record or page supremum record
5051
which should be read or passed over by a read
5053
dict_index_t* index, /* in: clustered index */
5054
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
5055
ulint mode, /* in: mode of the lock which the read cursor
5056
should set on records: LOCK_S or LOCK_X; the
5057
latter is possible in SELECT FOR UPDATE */
5058
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
5060
que_thr_t* thr) /* in: query thread */
5064
ut_ad(index->type & DICT_CLUSTERED);
5065
ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
5066
ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
5067
|| gap_mode == LOCK_REC_NOT_GAP);
5068
ut_ad(rec_offs_validate(rec, index, offsets));
5070
if (flags & BTR_NO_LOCKING_FLAG) {
5075
lock_mutex_enter_kernel();
5077
ut_ad(mode != LOCK_X
5078
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5079
ut_ad(mode != LOCK_S
5080
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
5082
if (!page_rec_is_supremum(rec)) {
5084
lock_rec_convert_impl_to_expl(rec, index, offsets);
5087
err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr);
5089
lock_mutex_exit_kernel();
5091
ut_ad(lock_rec_queue_validate(rec, index, offsets));
5095
/*************************************************************************
5096
Checks if locks of other transactions prevent an immediate read, or passing
5097
over by a read cursor, of a clustered index record. If they do, first tests
5098
if the query thread should anyway be suspended for some reason; if not, then
5099
puts the transaction and the query thread to the lock wait state and inserts a
5100
waiting request for a record lock to the lock queue. Sets the requested mode
5101
lock on the record. This is an alternative version of
5102
lock_clust_rec_read_check_and_lock() that does not require the parameter
5106
lock_clust_rec_read_check_and_lock_alt(
5107
/*===================================*/
5108
/* out: DB_SUCCESS, DB_LOCK_WAIT,
5109
DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
5110
ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
5112
rec_t* rec, /* in: user record or page supremum record
5113
which should be read or passed over by a read
5115
dict_index_t* index, /* in: clustered index */
5116
ulint mode, /* in: mode of the lock which the read cursor
5117
should set on records: LOCK_S or LOCK_X; the
5118
latter is possible in SELECT FOR UPDATE */
5119
ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
5121
que_thr_t* thr) /* in: query thread */
5123
mem_heap_t* tmp_heap = NULL;
5124
ulint offsets_[REC_OFFS_NORMAL_SIZE];
5125
ulint* offsets = offsets_;
5127
*offsets_ = (sizeof offsets_) / sizeof *offsets_;
5129
offsets = rec_get_offsets(rec, index, offsets,
5130
ULINT_UNDEFINED, &tmp_heap);
5131
ret = lock_clust_rec_read_check_and_lock(flags, rec, index,
5132
offsets, mode, gap_mode, thr);
5134
mem_heap_free(tmp_heap);