1
/******************************************************
2
Mutex, the basic synchronization primitive
6
Created 9/5/1995 Heikki Tuuri
7
*******************************************************/
11
#include "sync0sync.ic"
17
#include "buf0types.h"
20
REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
21
============================================
23
Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
24
takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
25
Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
26
implement our own efficient spin lock mutex. Future operating systems may
27
provide efficient spin locks, but we cannot count on that.
29
Another reason for implementing a spin lock is that on multiprocessor systems
30
it can be more efficient for a processor to run a loop waiting for the
31
semaphore to be released than to switch to a different thread. A thread switch
32
takes 25 us on both platforms mentioned above. See Gray and Reuter's book
33
Transaction processing for background.
35
How long should the spin loop last before suspending the thread? On a
36
uniprocessor, spinning does not help at all, because if the thread owning the
37
mutex is not executing, it cannot be released. Spinning actually wastes
40
On a multiprocessor, we do not know if the thread owning the mutex is
41
executing or not. Thus it would make sense to spin as long as the operation
42
guarded by the mutex would typically last assuming that the thread is
43
executing. If the mutex is not released by that time, we may assume that the
44
thread owning the mutex is not executing and suspend the waiting thread.
46
A typical operation (where no i/o involved) guarded by a mutex or a read-write
47
lock may last 1 - 20 us on the current Pentium platform. The longest
48
operations are the binary searches on an index node.
50
We conclude that the best choice is to set the spin time at 20 us. Then the
51
system should work well on a multiprocessor. On a uniprocessor we have to
52
make sure that thread swithches due to mutex collisions are not frequent,
53
i.e., they do not happen every 100 us or so, because that wastes too much
54
resources. If the thread switches are not frequent, the 20 us wasted in spin
57
Empirical studies on the effect of spin time should be done for different
61
IMPLEMENTATION OF THE MUTEX
62
===========================
64
For background, see Curt Schimmel's book on Unix implementation on modern
65
architectures. The key points in the implementation are atomicity and
66
serialization of memory accesses. The test-and-set instruction (XCHG in
67
Pentium) must be atomic. As new processors may have weak memory models, also
68
serialization of memory references may be necessary. The successor of Pentium,
69
P6, has at least one mode where the memory model is weak. As far as we know,
70
in Pentium all memory accesses are serialized in the program order and we do
71
not have to worry about the memory model. On other processors there are
72
special machine instructions called a fence, memory barrier, or storage
73
barrier (STBAR in Sparc), which can be used to serialize the memory accesses
74
to happen in program order relative to the fence instruction.
76
Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
77
the atomic test-and-set, but his algorithm should be modified for weak memory
78
models. We do not use Lamport's algorithm, because we guess it is slower than
79
the atomic test-and-set.
81
Our mutex implementation works as follows: After that we perform the atomic
82
test-and-set instruction on the memory word. If the test returns zero, we
83
know we got the lock first. If the test returns not zero, some other thread
84
was quicker and got the lock: then we spin in a loop reading the memory word,
85
waiting it to become zero. It is wise to just read the word in the loop, not
86
perform numerous test-and-set instructions, because they generate memory
87
traffic between the cache and the main memory. The read loop can just access
88
the cache, saving bus bandwidth.
90
If we cannot acquire the mutex lock in the specified time, we reserve a cell
91
in the wait array, set the waiters byte in the mutex to 1. To avoid a race
92
condition, after setting the waiters byte and before suspending the waiting
93
thread, we still have to check that the mutex is reserved, because it may
94
have happened that the thread which was holding the mutex has just released
95
it and did not see the waiters byte set to 1, a case which would lead the
96
other thread to an infinite wait.
98
LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
100
thread will eventually call os_event_set() on that particular event.
101
Thus no infinite wait is possible in this case.
103
Proof: After making the reservation the thread sets the waiters field in the
104
mutex to 1. Then it checks that the mutex is still reserved by some thread,
105
or it reserves the mutex for itself. In any case, some thread (which may be
106
also some earlier thread, not necessarily the one currently holding the mutex)
107
will set the waiters field to 0 in mutex_exit, and then call
108
os_event_set() with the mutex as an argument.
111
LEMMA 2: If an os_event_set() call is made after some thread has called
113
the os_event_reset() and before it starts wait on that event, the call
114
will not be lost to the second thread. This is true even if there is an
115
intervening call to os_event_reset() by another thread.
116
Thus no infinite wait is possible in this case.
118
Proof (non-windows platforms): os_event_reset() returns a monotonically
119
increasing value of signal_count. This value is increased at every
120
call of os_event_set() If thread A has called os_event_reset() followed
121
by thread B calling os_event_set() and then some other thread C calling
122
os_event_reset(), the is_set flag of the event will be set to FALSE;
123
but now if thread A calls os_event_wait_low() with the signal_count
124
value returned from the earlier call of os_event_reset(), it will
125
return immediately without waiting.
128
Proof (windows): If there is a writer thread which is forced to wait for
129
the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
130
The design of rw_lock ensures that there is one and only one thread
131
that is able to change the state to RW_LOCK_WAIT_EX and this thread is
132
guaranteed to acquire the lock after it is released by the current
133
holders and before any other waiter gets the lock.
134
On windows this thread waits on a separate event i.e.: wait_ex_event.
135
Since only one thread can wait on this event there is no chance
136
of this event getting reset before the writer starts wait on it.
137
Therefore, this thread is guaranteed to catch the os_set_event()
138
signalled unconditionally at the release of the lock.
141
/* The number of system calls made in this module. Intended for performance
144
UNIV_INTERN ulint mutex_system_call_count = 0;
146
/* Number of spin waits on mutexes: for performance monitoring */
148
/* round=one iteration of a spin loop */
149
UNIV_INTERN ulint mutex_spin_round_count = 0;
150
UNIV_INTERN ulint mutex_spin_wait_count = 0;
151
UNIV_INTERN ulint mutex_os_wait_count = 0;
152
UNIV_INTERN ulint mutex_exit_count = 0;
154
/* The global array of wait cells for implementation of the database's own
155
mutexes and read-write locks */
156
UNIV_INTERN sync_array_t* sync_primary_wait_array;
158
/* This variable is set to TRUE when sync_init is called */
159
UNIV_INTERN ibool sync_initialized = FALSE;
162
typedef struct sync_level_struct sync_level_t;
163
typedef struct sync_thread_struct sync_thread_t;
165
#ifdef UNIV_SYNC_DEBUG
166
/* The latch levels currently owned by threads are stored in this data
167
structure; the size of this array is OS_THREAD_MAX_N */
169
UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
171
/* Mutex protecting sync_thread_level_arrays */
172
UNIV_INTERN mutex_t sync_thread_mutex;
173
#endif /* UNIV_SYNC_DEBUG */
175
/* Global list of database mutexes (not OS mutexes) created. */
176
UNIV_INTERN ut_list_base_node_t mutex_list;
178
/* Mutex protecting the mutex_list variable */
179
UNIV_INTERN mutex_t mutex_list_mutex;
181
#ifdef UNIV_SYNC_DEBUG
182
/* Latching order checks start when this is set TRUE */
183
UNIV_INTERN ibool sync_order_checks_on = FALSE;
184
#endif /* UNIV_SYNC_DEBUG */
186
struct sync_thread_struct{
187
os_thread_id_t id; /* OS thread id */
188
sync_level_t* levels; /* level array for this thread; if this is NULL
189
this slot is unused */
192
/* Number of slots reserved for each OS thread in the sync level array */
193
#define SYNC_THREAD_N_LEVELS 10000
195
struct sync_level_struct{
196
void* latch; /* pointer to a mutex or an rw-lock; NULL means that
198
ulint level; /* level of the latch in the latching order */
201
/**********************************************************************
202
Creates, or rather, initializes a mutex object in a specified memory
203
location (which must be appropriately aligned). The mutex is initialized
204
in the reset state. Explicit freeing of the mutex with mutex_free is
205
necessary only if the memory block containing it is freed. */
210
mutex_t* mutex, /* in: pointer to memory */
212
const char* cmutex_name, /* in: mutex name */
213
# ifdef UNIV_SYNC_DEBUG
214
ulint level, /* in: level */
215
# endif /* UNIV_SYNC_DEBUG */
216
#endif /* UNIV_DEBUG */
217
const char* cfile_name, /* in: file name where created */
218
ulint cline) /* in: file line where created */
220
#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
221
mutex_reset_lock_word(mutex);
223
os_fast_mutex_init(&(mutex->os_fast_mutex));
224
mutex->lock_word = 0;
226
mutex->event = os_event_create(NULL);
227
mutex_set_waiters(mutex, 0);
229
mutex->magic_n = MUTEX_MAGIC_N;
230
#endif /* UNIV_DEBUG */
231
#ifdef UNIV_SYNC_DEBUG
233
mutex->file_name = "not yet reserved";
234
mutex->level = level;
235
#endif /* UNIV_SYNC_DEBUG */
236
mutex->cfile_name = cfile_name;
237
mutex->cline = cline;
238
#ifndef UNIV_HOTBACKUP
239
mutex->count_os_wait = 0;
241
mutex->cmutex_name= cmutex_name;
242
mutex->count_using= 0;
243
mutex->mutex_type= 0;
244
mutex->lspent_time= 0;
245
mutex->lmax_spent_time= 0;
246
mutex->count_spin_loop= 0;
247
mutex->count_spin_rounds= 0;
248
mutex->count_os_yield= 0;
249
# endif /* UNIV_DEBUG */
250
#endif /* !UNIV_HOTBACKUP */
252
/* Check that lock_word is aligned; this is important on Intel */
253
ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
255
/* NOTE! The very first mutexes are not put to the mutex list */
257
if ((mutex == &mutex_list_mutex)
258
#ifdef UNIV_SYNC_DEBUG
259
|| (mutex == &sync_thread_mutex)
260
#endif /* UNIV_SYNC_DEBUG */
266
mutex_enter(&mutex_list_mutex);
268
ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
269
|| UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
271
UT_LIST_ADD_FIRST(list, mutex_list, mutex);
273
mutex_exit(&mutex_list_mutex);
276
/**********************************************************************
277
Calling this function is obligatory only if the memory buffer containing
278
the mutex is freed. Removes a mutex object from the mutex list. The mutex
279
is checked to be in the reset state. */
284
mutex_t* mutex) /* in: mutex */
286
ut_ad(mutex_validate(mutex));
287
ut_a(mutex_get_lock_word(mutex) == 0);
288
ut_a(mutex_get_waiters(mutex) == 0);
290
if (mutex != &mutex_list_mutex
291
#ifdef UNIV_SYNC_DEBUG
292
&& mutex != &sync_thread_mutex
293
#endif /* UNIV_SYNC_DEBUG */
296
mutex_enter(&mutex_list_mutex);
298
ut_ad(!UT_LIST_GET_PREV(list, mutex)
299
|| UT_LIST_GET_PREV(list, mutex)->magic_n
301
ut_ad(!UT_LIST_GET_NEXT(list, mutex)
302
|| UT_LIST_GET_NEXT(list, mutex)->magic_n
305
UT_LIST_REMOVE(list, mutex_list, mutex);
307
mutex_exit(&mutex_list_mutex);
310
os_event_free(mutex->event);
312
#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
313
os_fast_mutex_free(&(mutex->os_fast_mutex));
315
/* If we free the mutex protecting the mutex list (freeing is
316
not necessary), we have to reset the magic number AFTER removing
320
#endif /* UNIV_DEBUG */
323
/************************************************************************
324
NOTE! Use the corresponding macro in the header file, not this function
325
directly. Tries to lock the mutex for the current thread. If the lock is not
326
acquired immediately, returns with return value 1. */
329
mutex_enter_nowait_func(
330
/*====================*/
331
/* out: 0 if succeed, 1 if not */
332
mutex_t* mutex, /* in: pointer to mutex */
333
const char* file_name __attribute__((unused)),
334
/* in: file name where mutex
336
ulint line __attribute__((unused)))
337
/* in: line where requested */
339
ut_ad(mutex_validate(mutex));
341
if (!mutex_test_and_set(mutex)) {
343
ut_d(mutex->thread_id = os_thread_get_curr_id());
344
#ifdef UNIV_SYNC_DEBUG
345
mutex_set_debug_info(mutex, file_name, line);
348
return(0); /* Succeeded! */
355
/**********************************************************************
356
Checks that the mutex has been initialized. */
361
const mutex_t* mutex)
364
ut_a(mutex->magic_n == MUTEX_MAGIC_N);
369
/**********************************************************************
370
Checks that the current thread owns the mutex. Works only in the debug
376
/* out: TRUE if owns */
377
const mutex_t* mutex) /* in: mutex */
379
ut_ad(mutex_validate(mutex));
381
return(mutex_get_lock_word(mutex) == 1
382
&& os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
384
#endif /* UNIV_DEBUG */
386
/**********************************************************************
387
Sets the waiters field in a mutex. */
392
mutex_t* mutex, /* in: mutex */
393
ulint n) /* in: value to set */
395
volatile ulint* ptr; /* declared volatile to ensure that
396
the value is stored to memory */
399
ptr = &(mutex->waiters);
401
*ptr = n; /* Here we assume that the write of a single
402
word in memory is atomic */
405
/**********************************************************************
406
Reserves a mutex for the current thread. If the mutex is reserved, the
407
function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
408
for the mutex before suspending the thread. */
413
mutex_t* mutex, /* in: pointer to mutex */
414
const char* file_name, /* in: file name where mutex
416
ulint line) /* in: line where requested */
418
ulint index; /* index of the reserved wait cell */
419
ulint i; /* spin round count */
420
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
421
ib_int64_t lstart_time = 0, lfinish_time; /* for timing os_wait */
425
uint timer_started = 0;
426
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
433
/* Spin waiting for the lock word to become zero. Note that we do
434
not have to assume that the read access to the lock word is atomic,
435
as the actual locking is always committed with atomic test-and-set.
436
In reality, however, all processors probably have an atomic read of
440
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
441
mutex_spin_wait_count++;
442
mutex->count_spin_loop++;
443
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
445
while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
446
if (srv_spin_wait_delay) {
447
ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
453
if (i == SYNC_SPIN_ROUNDS) {
454
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
455
mutex->count_os_yield++;
456
if (timed_mutexes == 1 && timer_started==0) {
457
ut_usectime(&sec, &ms);
458
lstart_time= (ib_int64_t)sec * 1000000 + ms;
461
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
465
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
467
"Thread %lu spin wait mutex at %p"
468
" cfile %s cline %lu rnds %lu\n",
469
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
470
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
473
mutex_spin_round_count += i;
475
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
476
mutex->count_spin_rounds += i;
477
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
479
if (mutex_test_and_set(mutex) == 0) {
482
ut_d(mutex->thread_id = os_thread_get_curr_id());
483
#ifdef UNIV_SYNC_DEBUG
484
mutex_set_debug_info(mutex, file_name, line);
490
/* We may end up with a situation where lock_word is 0 but the OS
491
fast mutex is still reserved. On FreeBSD the OS does not seem to
492
schedule a thread which is constantly calling pthread_mutex_trylock
493
(in mutex_test_and_set implementation). Then we could end up
494
spinning here indefinitely. The following 'i++' stops this infinite
499
if (i < SYNC_SPIN_ROUNDS) {
503
sync_array_reserve_cell(sync_primary_wait_array, mutex,
504
SYNC_MUTEX, file_name, line, &index);
506
mutex_system_call_count++;
508
/* The memory order of the array reservation and the change in the
509
waiters field is important: when we suspend a thread, we first
510
reserve the cell and then set waiters field to 1. When threads are
511
released in mutex_exit, the waiters field is first set to zero and
512
then the event is set to the signaled state. */
514
mutex_set_waiters(mutex, 1);
516
/* Try to reserve still a few times */
517
for (i = 0; i < 4; i++) {
518
if (mutex_test_and_set(mutex) == 0) {
519
/* Succeeded! Free the reserved wait cell */
521
sync_array_free_cell(sync_primary_wait_array, index);
523
ut_d(mutex->thread_id = os_thread_get_curr_id());
524
#ifdef UNIV_SYNC_DEBUG
525
mutex_set_debug_info(mutex, file_name, line);
528
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
529
fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
531
(ulong) os_thread_pf(os_thread_get_curr_id()),
537
/* Note that in this case we leave the waiters field
538
set to 1. We cannot reset it to zero, as we do not
539
know if there are other waiters. */
543
/* Now we know that there has been some thread holding the mutex
544
after the change in the wait array and the waiters field was made.
545
Now there is no risk of infinite wait on the event. */
547
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
549
"Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
550
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
551
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
554
mutex_system_call_count++;
555
mutex_os_wait_count++;
557
#ifndef UNIV_HOTBACKUP
558
mutex->count_os_wait++;
560
/* !!!!! Sometimes os_wait can be called without os_thread_yield */
562
if (timed_mutexes == 1 && timer_started==0) {
563
ut_usectime(&sec, &ms);
564
lstart_time= (ib_int64_t)sec * 1000000 + ms;
567
# endif /* UNIV_DEBUG */
568
#endif /* !UNIV_HOTBACKUP */
570
sync_array_wait_event(sync_primary_wait_array, index);
574
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
575
if (timed_mutexes == 1 && timer_started==1) {
576
ut_usectime(&sec, &ms);
577
lfinish_time= (ib_int64_t)sec * 1000000 + ms;
579
ltime_diff= (ulint) (lfinish_time - lstart_time);
580
mutex->lspent_time += ltime_diff;
582
if (mutex->lmax_spent_time < ltime_diff) {
583
mutex->lmax_spent_time= ltime_diff;
586
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
590
/**********************************************************************
591
Releases the threads waiting in the primary wait array for this mutex. */
596
mutex_t* mutex) /* in: mutex */
598
mutex_set_waiters(mutex, 0);
600
/* The memory order of resetting the waiters field and
601
signaling the object is important. See LEMMA 1 above. */
602
os_event_set(mutex->event);
603
sync_array_object_signalled(sync_primary_wait_array);
606
#ifdef UNIV_SYNC_DEBUG
607
/**********************************************************************
608
Sets the debug information for a reserved mutex. */
611
mutex_set_debug_info(
612
/*=================*/
613
mutex_t* mutex, /* in: mutex */
614
const char* file_name, /* in: file where requested */
615
ulint line) /* in: line where requested */
620
sync_thread_add_level(mutex, mutex->level);
622
mutex->file_name = file_name;
626
/**********************************************************************
627
Gets the debug information for a reserved mutex. */
630
mutex_get_debug_info(
631
/*=================*/
632
mutex_t* mutex, /* in: mutex */
633
const char** file_name, /* out: file where requested */
634
ulint* line, /* out: line where requested */
635
os_thread_id_t* thread_id) /* out: id of the thread which owns
640
*file_name = mutex->file_name;
642
*thread_id = mutex->thread_id;
645
/**********************************************************************
646
Prints debug info of currently reserved mutexes. */
649
mutex_list_print_info(
650
/*==================*/
651
FILE* file) /* in: file where to print */
654
const char* file_name;
656
os_thread_id_t thread_id;
661
"----------\n", file);
663
mutex_enter(&mutex_list_mutex);
665
mutex = UT_LIST_GET_FIRST(mutex_list);
667
while (mutex != NULL) {
670
if (mutex_get_lock_word(mutex) != 0) {
671
mutex_get_debug_info(mutex, &file_name, &line,
674
"Locked mutex: addr %p thread %ld"
675
" file %s line %ld\n",
676
(void*) mutex, os_thread_pf(thread_id),
680
mutex = UT_LIST_GET_NEXT(list, mutex);
683
fprintf(file, "Total number of mutexes %ld\n", count);
685
mutex_exit(&mutex_list_mutex);
688
/**********************************************************************
689
Counts currently reserved mutexes. Works only in the debug version. */
692
mutex_n_reserved(void)
693
/*==================*/
698
mutex_enter(&mutex_list_mutex);
700
mutex = UT_LIST_GET_FIRST(mutex_list);
702
while (mutex != NULL) {
703
if (mutex_get_lock_word(mutex) != 0) {
708
mutex = UT_LIST_GET_NEXT(list, mutex);
711
mutex_exit(&mutex_list_mutex);
715
return(count - 1); /* Subtract one, because this function itself
716
was holding one mutex (mutex_list_mutex) */
719
/**********************************************************************
720
Returns TRUE if no mutex or rw-lock is currently locked. Works only in
721
the debug version. */
727
return(mutex_n_reserved() + rw_lock_n_locked() == 0);
730
/**********************************************************************
731
Gets the value in the nth slot in the thread level arrays. */
734
sync_thread_level_arrays_get_nth(
735
/*=============================*/
736
/* out: pointer to thread slot */
737
ulint n) /* in: slot number */
739
ut_ad(n < OS_THREAD_MAX_N);
741
return(sync_thread_level_arrays + n);
744
/**********************************************************************
745
Looks for the thread slot for the calling thread. */
748
sync_thread_level_arrays_find_slot(void)
749
/*====================================*/
750
/* out: pointer to thread slot, NULL if not found */
757
id = os_thread_get_curr_id();
759
for (i = 0; i < OS_THREAD_MAX_N; i++) {
761
slot = sync_thread_level_arrays_get_nth(i);
763
if (slot->levels && os_thread_eq(slot->id, id)) {
772
/**********************************************************************
773
Looks for an unused thread slot. */
776
sync_thread_level_arrays_find_free(void)
777
/*====================================*/
778
/* out: pointer to thread slot */
784
for (i = 0; i < OS_THREAD_MAX_N; i++) {
786
slot = sync_thread_level_arrays_get_nth(i);
788
if (slot->levels == NULL) {
797
/**********************************************************************
798
Gets the value in the nth slot in the thread level array. */
801
sync_thread_levels_get_nth(
802
/*=======================*/
803
/* out: pointer to level slot */
804
sync_level_t* arr, /* in: pointer to level array for an OS
806
ulint n) /* in: slot number */
808
ut_ad(n < SYNC_THREAD_N_LEVELS);
813
/**********************************************************************
814
Checks if all the level values stored in the level array are greater than
818
sync_thread_levels_g(
819
/*=================*/
820
/* out: TRUE if all greater */
821
sync_level_t* arr, /* in: pointer to level array for an OS
823
ulint limit) /* in: level limit */
830
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
832
slot = sync_thread_levels_get_nth(arr, i);
834
if (slot->latch != NULL) {
835
if (slot->level <= limit) {
841
"InnoDB: sync levels should be"
842
" > %lu but a level is %lu\n",
843
(ulong) limit, (ulong) slot->level);
845
if (mutex->magic_n == MUTEX_MAGIC_N) {
847
"Mutex created at %s %lu\n",
849
(ulong) mutex->cline);
851
if (mutex_get_lock_word(mutex) != 0) {
852
const char* file_name;
854
os_thread_id_t thread_id;
856
mutex_get_debug_info(
861
"InnoDB: Locked mutex:"
862
" addr %p thread %ld"
863
" file %s line %ld\n",
870
fputs("Not locked\n", stderr);
884
/**********************************************************************
885
Checks if the level value is stored in the level array. */
888
sync_thread_levels_contain(
889
/*=======================*/
890
/* out: TRUE if stored */
891
sync_level_t* arr, /* in: pointer to level array for an OS
893
ulint level) /* in: level */
898
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
900
slot = sync_thread_levels_get_nth(arr, i);
902
if (slot->latch != NULL) {
903
if (slot->level == level) {
913
/**********************************************************************
914
Checks that the level array for the current thread is empty. */
917
sync_thread_levels_empty_gen(
918
/*=========================*/
919
/* out: TRUE if empty except the
920
exceptions specified below */
921
ibool dict_mutex_allowed) /* in: TRUE if dictionary mutex is
922
allowed to be owned by the thread,
923
also purge_is_running mutex is
927
sync_thread_t* thread_slot;
931
if (!sync_order_checks_on) {
936
mutex_enter(&sync_thread_mutex);
938
thread_slot = sync_thread_level_arrays_find_slot();
940
if (thread_slot == NULL) {
942
mutex_exit(&sync_thread_mutex);
947
arr = thread_slot->levels;
949
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
951
slot = sync_thread_levels_get_nth(arr, i);
953
if (slot->latch != NULL
954
&& (!dict_mutex_allowed
955
|| (slot->level != SYNC_DICT
956
&& slot->level != SYNC_DICT_OPERATION))) {
958
mutex_exit(&sync_thread_mutex);
965
mutex_exit(&sync_thread_mutex);
970
/**********************************************************************
971
Checks that the level array for the current thread is empty. */
974
sync_thread_levels_empty(void)
975
/*==========================*/
976
/* out: TRUE if empty */
978
return(sync_thread_levels_empty_gen(FALSE));
981
/**********************************************************************
982
Adds a latch and its level in the thread level array. Allocates the memory
983
for the array if called first time for this OS thread. Makes the checks
984
against other latch levels stored in the array for this thread. */
987
sync_thread_add_level(
988
/*==================*/
989
void* latch, /* in: pointer to a mutex or an rw-lock */
990
ulint level) /* in: level in the latching order; if
991
SYNC_LEVEL_VARYING, nothing is done */
995
sync_thread_t* thread_slot;
998
if (!sync_order_checks_on) {
1003
if ((latch == (void*)&sync_thread_mutex)
1004
|| (latch == (void*)&mutex_list_mutex)
1005
|| (latch == (void*)&rw_lock_debug_mutex)
1006
|| (latch == (void*)&rw_lock_list_mutex)) {
1011
if (level == SYNC_LEVEL_VARYING) {
1016
mutex_enter(&sync_thread_mutex);
1018
thread_slot = sync_thread_level_arrays_find_slot();
1020
if (thread_slot == NULL) {
1021
/* We have to allocate the level array for a new thread */
1022
array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
1024
thread_slot = sync_thread_level_arrays_find_free();
1026
thread_slot->id = os_thread_get_curr_id();
1027
thread_slot->levels = array;
1029
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1031
slot = sync_thread_levels_get_nth(array, i);
1037
array = thread_slot->levels;
1039
/* NOTE that there is a problem with _NODE and _LEAF levels: if the
1040
B-tree height changes, then a leaf can change to an internal node
1041
or the other way around. We do not know at present if this can cause
1042
unnecessary assertion failures below. */
1045
case SYNC_NO_ORDER_CHECK:
1046
case SYNC_EXTERN_STORAGE:
1047
case SYNC_TREE_NODE_FROM_HASH:
1048
/* Do no order checking */
1053
case SYNC_WORK_QUEUE:
1055
case SYNC_THR_LOCAL:
1056
case SYNC_ANY_LATCH:
1057
case SYNC_TRX_SYS_HEADER:
1058
case SYNC_FILE_FORMAT_TAG:
1059
case SYNC_DOUBLEWRITE:
1061
case SYNC_SEARCH_SYS:
1062
case SYNC_TRX_LOCK_HEAP:
1064
case SYNC_IBUF_BITMAP_MUTEX:
1067
case SYNC_PURGE_LATCH:
1068
case SYNC_PURGE_SYS:
1069
case SYNC_DICT_AUTOINC_MUTEX:
1070
case SYNC_DICT_OPERATION:
1071
case SYNC_DICT_HEADER:
1072
case SYNC_TRX_I_S_RWLOCK:
1073
case SYNC_TRX_I_S_LAST_READ:
1074
if (!sync_thread_levels_g(array, level)) {
1076
"InnoDB: sync_thread_levels_g(array, %lu)"
1077
" does not hold!\n", level);
1081
case SYNC_BUF_BLOCK:
1082
ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
1083
&& sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
1084
|| sync_thread_levels_g(array, SYNC_BUF_BLOCK));
1087
ut_a((sync_thread_levels_contain(array, SYNC_KERNEL)
1088
&& sync_thread_levels_g(array, SYNC_REC_LOCK - 1))
1089
|| sync_thread_levels_g(array, SYNC_REC_LOCK));
1091
case SYNC_IBUF_BITMAP:
1092
ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX)
1093
&& sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1))
1094
|| sync_thread_levels_g(array, SYNC_IBUF_BITMAP));
1097
ut_a(sync_thread_levels_contain(array, SYNC_FSP));
1100
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
1101
|| sync_thread_levels_g(array, SYNC_FSP));
1103
case SYNC_TRX_UNDO_PAGE:
1104
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
1105
|| sync_thread_levels_contain(array, SYNC_RSEG)
1106
|| sync_thread_levels_contain(array, SYNC_PURGE_SYS)
1107
|| sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE));
1109
case SYNC_RSEG_HEADER:
1110
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
1112
case SYNC_RSEG_HEADER_NEW:
1113
ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
1114
&& sync_thread_levels_contain(array, SYNC_FSP_PAGE));
1116
case SYNC_TREE_NODE:
1117
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
1118
|| sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
1119
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
1121
case SYNC_TREE_NODE_NEW:
1122
ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
1123
|| sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1125
case SYNC_INDEX_TREE:
1126
ut_a((sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
1127
&& sync_thread_levels_contain(array, SYNC_FSP)
1128
&& sync_thread_levels_g(array, SYNC_FSP_PAGE - 1))
1129
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
1131
case SYNC_IBUF_MUTEX:
1132
ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1));
1134
case SYNC_IBUF_PESS_INSERT_MUTEX:
1135
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
1136
&& !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1138
case SYNC_IBUF_HEADER:
1139
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
1140
&& !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
1141
&& !sync_thread_levels_contain(
1142
array, SYNC_IBUF_PESS_INSERT_MUTEX));
1146
ut_a(buf_debug_prints
1147
|| sync_thread_levels_g(array, SYNC_DICT));
1148
#else /* UNIV_DEBUG */
1149
ut_a(sync_thread_levels_g(array, SYNC_DICT));
1150
#endif /* UNIV_DEBUG */
1156
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1158
slot = sync_thread_levels_get_nth(array, i);
1160
if (slot->latch == NULL) {
1161
slot->latch = latch;
1162
slot->level = level;
1168
ut_a(i < SYNC_THREAD_N_LEVELS);
1170
mutex_exit(&sync_thread_mutex);
1173
/**********************************************************************
1174
Removes a latch from the thread level array if it is found there. */
1177
sync_thread_reset_level(
1178
/*====================*/
1179
/* out: TRUE if found from the array; it is an error
1180
if the latch is not found */
1181
void* latch) /* in: pointer to a mutex or an rw-lock */
1183
sync_level_t* array;
1185
sync_thread_t* thread_slot;
1188
if (!sync_order_checks_on) {
1193
if ((latch == (void*)&sync_thread_mutex)
1194
|| (latch == (void*)&mutex_list_mutex)
1195
|| (latch == (void*)&rw_lock_debug_mutex)
1196
|| (latch == (void*)&rw_lock_list_mutex)) {
1201
mutex_enter(&sync_thread_mutex);
1203
thread_slot = sync_thread_level_arrays_find_slot();
1205
if (thread_slot == NULL) {
1209
mutex_exit(&sync_thread_mutex);
1213
array = thread_slot->levels;
1215
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1217
slot = sync_thread_levels_get_nth(array, i);
1219
if (slot->latch == latch) {
1222
mutex_exit(&sync_thread_mutex);
1228
if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
1231
rw_lock = (rw_lock_t*) latch;
1233
if (rw_lock->level == SYNC_LEVEL_VARYING) {
1234
mutex_exit(&sync_thread_mutex);
1242
mutex_exit(&sync_thread_mutex);
1246
#endif /* UNIV_SYNC_DEBUG */
1248
/**********************************************************************
1249
Initializes the synchronization data structures. */
1255
#ifdef UNIV_SYNC_DEBUG
1256
sync_thread_t* thread_slot;
1258
#endif /* UNIV_SYNC_DEBUG */
1260
ut_a(sync_initialized == FALSE);
1262
sync_initialized = TRUE;
1264
/* Create the primary system wait array which is protected by an OS
1267
sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
1268
SYNC_ARRAY_OS_MUTEX);
1269
#ifdef UNIV_SYNC_DEBUG
1270
/* Create the thread latch level array where the latch levels
1271
are stored for each OS thread */
1273
sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
1274
* sizeof(sync_thread_t));
1275
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1277
thread_slot = sync_thread_level_arrays_get_nth(i);
1278
thread_slot->levels = NULL;
1280
#endif /* UNIV_SYNC_DEBUG */
1281
/* Init the mutex list and create the mutex to protect it. */
1283
UT_LIST_INIT(mutex_list);
1284
mutex_create(&mutex_list_mutex, SYNC_NO_ORDER_CHECK);
1285
#ifdef UNIV_SYNC_DEBUG
1286
mutex_create(&sync_thread_mutex, SYNC_NO_ORDER_CHECK);
1287
#endif /* UNIV_SYNC_DEBUG */
1289
/* Init the rw-lock list and create the mutex to protect it. */
1291
UT_LIST_INIT(rw_lock_list);
1292
mutex_create(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK);
1294
#ifdef UNIV_SYNC_DEBUG
1295
mutex_create(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK);
1297
rw_lock_debug_event = os_event_create(NULL);
1298
rw_lock_debug_waiters = FALSE;
1299
#endif /* UNIV_SYNC_DEBUG */
1302
/**********************************************************************
1303
Frees the resources in InnoDB's own synchronization data structures. Use
1304
os_sync_free() after calling this. */
1312
sync_array_free(sync_primary_wait_array);
1314
mutex = UT_LIST_GET_FIRST(mutex_list);
1318
mutex = UT_LIST_GET_FIRST(mutex_list);
1321
mutex_free(&mutex_list_mutex);
1322
#ifdef UNIV_SYNC_DEBUG
1323
mutex_free(&sync_thread_mutex);
1324
#endif /* UNIV_SYNC_DEBUG */
1327
/***********************************************************************
1328
Prints wait info of the sync system. */
1331
sync_print_wait_info(
1332
/*=================*/
1333
FILE* file) /* in: file where to print */
1335
#ifdef UNIV_SYNC_DEBUG
1336
fprintf(file, "Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
1337
mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
1341
"Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
1342
"RW-shared spins %lu, OS waits %lu;"
1343
" RW-excl spins %lu, OS waits %lu\n",
1344
(ulong) mutex_spin_wait_count,
1345
(ulong) mutex_spin_round_count,
1346
(ulong) mutex_os_wait_count,
1347
(ulong) rw_s_spin_wait_count,
1348
(ulong) rw_s_os_wait_count,
1349
(ulong) rw_x_spin_wait_count,
1350
(ulong) rw_x_os_wait_count);
1353
/***********************************************************************
1354
Prints info of the sync system. */
1359
FILE* file) /* in: file where to print */
1361
#ifdef UNIV_SYNC_DEBUG
1362
mutex_list_print_info(file);
1364
rw_lock_list_print_info(file);
1365
#endif /* UNIV_SYNC_DEBUG */
1367
sync_array_print_info(file, sync_primary_wait_array);
1369
sync_print_wait_info(file);