1
/******************************************************
2
Mutex, the basic synchronization primitive
6
Created 9/5/1995 Heikki Tuuri
7
*******************************************************/
11
#include "sync0sync.ic"
17
#include "buf0types.h"
20
REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
21
============================================
23
Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
24
takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
25
Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
26
implement our own efficient spin lock mutex. Future operating systems may
27
provide efficient spin locks, but we cannot count on that.
29
Another reason for implementing a spin lock is that on multiprocessor systems
30
it can be more efficient for a processor to run a loop waiting for the
31
semaphore to be released than to switch to a different thread. A thread switch
32
takes 25 us on both platforms mentioned above. See Gray and Reuter's book
33
Transaction processing for background.
35
How long should the spin loop last before suspending the thread? On a
36
uniprocessor, spinning does not help at all, because if the thread owning the
37
mutex is not executing, it cannot be released. Spinning actually wastes
40
On a multiprocessor, we do not know if the thread owning the mutex is
41
executing or not. Thus it would make sense to spin as long as the operation
42
guarded by the mutex would typically last assuming that the thread is
43
executing. If the mutex is not released by that time, we may assume that the
44
thread owning the mutex is not executing and suspend the waiting thread.
46
A typical operation (where no i/o involved) guarded by a mutex or a read-write
47
lock may last 1 - 20 us on the current Pentium platform. The longest
48
operations are the binary searches on an index node.
50
We conclude that the best choice is to set the spin time at 20 us. Then the
51
system should work well on a multiprocessor. On a uniprocessor we have to
52
make sure that thread swithches due to mutex collisions are not frequent,
53
i.e., they do not happen every 100 us or so, because that wastes too much
54
resources. If the thread switches are not frequent, the 20 us wasted in spin
57
Empirical studies on the effect of spin time should be done for different
61
IMPLEMENTATION OF THE MUTEX
62
===========================
64
For background, see Curt Schimmel's book on Unix implementation on modern
65
architectures. The key points in the implementation are atomicity and
66
serialization of memory accesses. The test-and-set instruction (XCHG in
67
Pentium) must be atomic. As new processors may have weak memory models, also
68
serialization of memory references may be necessary. The successor of Pentium,
69
P6, has at least one mode where the memory model is weak. As far as we know,
70
in Pentium all memory accesses are serialized in the program order and we do
71
not have to worry about the memory model. On other processors there are
72
special machine instructions called a fence, memory barrier, or storage
73
barrier (STBAR in Sparc), which can be used to serialize the memory accesses
74
to happen in program order relative to the fence instruction.
76
Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
77
the atomic test-and-set, but his algorithm should be modified for weak memory
78
models. We do not use Lamport's algorithm, because we guess it is slower than
79
the atomic test-and-set.
81
Our mutex implementation works as follows: After that we perform the atomic
82
test-and-set instruction on the memory word. If the test returns zero, we
83
know we got the lock first. If the test returns not zero, some other thread
84
was quicker and got the lock: then we spin in a loop reading the memory word,
85
waiting it to become zero. It is wise to just read the word in the loop, not
86
perform numerous test-and-set instructions, because they generate memory
87
traffic between the cache and the main memory. The read loop can just access
88
the cache, saving bus bandwidth.
90
If we cannot acquire the mutex lock in the specified time, we reserve a cell
91
in the wait array, set the waiters byte in the mutex to 1. To avoid a race
92
condition, after setting the waiters byte and before suspending the waiting
93
thread, we still have to check that the mutex is reserved, because it may
94
have happened that the thread which was holding the mutex has just released
95
it and did not see the waiters byte set to 1, a case which would lead the
96
other thread to an infinite wait.
98
LEMMA 1: After a thread resets the event of the cell it reserves for waiting
100
for a mutex, some thread will eventually call sync_array_signal_object with
101
the mutex as an argument. Thus no infinite wait is possible.
103
Proof: After making the reservation the thread sets the waiters field in the
104
mutex to 1. Then it checks that the mutex is still reserved by some thread,
105
or it reserves the mutex for itself. In any case, some thread (which may be
106
also some earlier thread, not necessarily the one currently holding the mutex)
107
will set the waiters field to 0 in mutex_exit, and then call
108
sync_array_signal_object with the mutex as an argument.
111
/* The number of system calls made in this module. Intended for performance
114
ulint mutex_system_call_count = 0;
116
/* Number of spin waits on mutexes: for performance monitoring */
118
/* round=one iteration of a spin loop */
119
ulint mutex_spin_round_count = 0;
120
ulint mutex_spin_wait_count = 0;
121
ulint mutex_os_wait_count = 0;
122
ulint mutex_exit_count = 0;
124
/* The global array of wait cells for implementation of the database's own
125
mutexes and read-write locks */
126
sync_array_t* sync_primary_wait_array;
128
/* This variable is set to TRUE when sync_init is called */
129
ibool sync_initialized = FALSE;
132
typedef struct sync_level_struct sync_level_t;
133
typedef struct sync_thread_struct sync_thread_t;
135
#ifdef UNIV_SYNC_DEBUG
136
/* The latch levels currently owned by threads are stored in this data
137
structure; the size of this array is OS_THREAD_MAX_N */
139
sync_thread_t* sync_thread_level_arrays;
141
/* Mutex protecting sync_thread_level_arrays */
142
mutex_t sync_thread_mutex;
143
#endif /* UNIV_SYNC_DEBUG */
145
/* Global list of database mutexes (not OS mutexes) created. */
146
ut_list_base_node_t mutex_list;
148
/* Mutex protecting the mutex_list variable */
149
mutex_t mutex_list_mutex;
151
#ifdef UNIV_SYNC_DEBUG
152
/* Latching order checks start when this is set TRUE */
153
ibool sync_order_checks_on = FALSE;
154
#endif /* UNIV_SYNC_DEBUG */
156
struct sync_thread_struct{
157
os_thread_id_t id; /* OS thread id */
158
sync_level_t* levels; /* level array for this thread; if this is NULL
159
this slot is unused */
162
/* Number of slots reserved for each OS thread in the sync level array */
163
#define SYNC_THREAD_N_LEVELS 10000
165
struct sync_level_struct{
166
void* latch; /* pointer to a mutex or an rw-lock; NULL means that
168
ulint level; /* level of the latch in the latching order */
171
/**********************************************************************
172
A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled
173
inlining of InnoDB functions, and no inlined functions should be called from
174
there. That is why we need to duplicate the inlined function here. */
177
mutex_enter_noninline(
178
/*==================*/
179
mutex_t* mutex) /* in: mutex */
184
/**********************************************************************
188
mutex_exit_noninline(
189
/*=================*/
190
mutex_t* mutex) /* in: mutex */
195
/**********************************************************************
196
Creates, or rather, initializes a mutex object in a specified memory
197
location (which must be appropriately aligned). The mutex is initialized
198
in the reset state. Explicit freeing of the mutex with mutex_free is
199
necessary only if the memory block containing it is freed. */
204
mutex_t* mutex, /* in: pointer to memory */
206
const char* cmutex_name, /* in: mutex name */
207
# ifdef UNIV_SYNC_DEBUG
208
ulint level, /* in: level */
209
# endif /* UNIV_SYNC_DEBUG */
210
#endif /* UNIV_DEBUG */
211
const char* cfile_name, /* in: file name where created */
212
ulint cline) /* in: file line where created */
214
#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
215
mutex_reset_lock_word(mutex);
217
os_fast_mutex_init(&(mutex->os_fast_mutex));
218
mutex->lock_word = 0;
220
mutex_set_waiters(mutex, 0);
222
mutex->magic_n = MUTEX_MAGIC_N;
223
#endif /* UNIV_DEBUG */
224
#ifdef UNIV_SYNC_DEBUG
226
mutex->file_name = "not yet reserved";
227
mutex->level = level;
228
#endif /* UNIV_SYNC_DEBUG */
229
mutex->cfile_name = cfile_name;
230
mutex->cline = cline;
231
#ifndef UNIV_HOTBACKUP
232
mutex->count_os_wait = 0;
234
mutex->cmutex_name= cmutex_name;
235
mutex->count_using= 0;
236
mutex->mutex_type= 0;
237
mutex->lspent_time= 0;
238
mutex->lmax_spent_time= 0;
239
mutex->count_spin_loop= 0;
240
mutex->count_spin_rounds= 0;
241
mutex->count_os_yield= 0;
242
# endif /* UNIV_DEBUG */
243
#endif /* !UNIV_HOTBACKUP */
245
/* Check that lock_word is aligned; this is important on Intel */
246
ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
248
/* NOTE! The very first mutexes are not put to the mutex list */
250
if ((mutex == &mutex_list_mutex)
251
#ifdef UNIV_SYNC_DEBUG
252
|| (mutex == &sync_thread_mutex)
253
#endif /* UNIV_SYNC_DEBUG */
259
mutex_enter(&mutex_list_mutex);
261
ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
262
|| UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
264
UT_LIST_ADD_FIRST(list, mutex_list, mutex);
266
mutex_exit(&mutex_list_mutex);
269
/**********************************************************************
270
Calling this function is obligatory only if the memory buffer containing
271
the mutex is freed. Removes a mutex object from the mutex list. The mutex
272
is checked to be in the reset state. */
277
mutex_t* mutex) /* in: mutex */
279
ut_ad(mutex_validate(mutex));
280
ut_a(mutex_get_lock_word(mutex) == 0);
281
ut_a(mutex_get_waiters(mutex) == 0);
283
if (mutex != &mutex_list_mutex
284
#ifdef UNIV_SYNC_DEBUG
285
&& mutex != &sync_thread_mutex
286
#endif /* UNIV_SYNC_DEBUG */
289
mutex_enter(&mutex_list_mutex);
291
ut_ad(!UT_LIST_GET_PREV(list, mutex)
292
|| UT_LIST_GET_PREV(list, mutex)->magic_n
294
ut_ad(!UT_LIST_GET_NEXT(list, mutex)
295
|| UT_LIST_GET_NEXT(list, mutex)->magic_n
298
UT_LIST_REMOVE(list, mutex_list, mutex);
300
mutex_exit(&mutex_list_mutex);
303
#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
304
os_fast_mutex_free(&(mutex->os_fast_mutex));
306
/* If we free the mutex protecting the mutex list (freeing is
307
not necessary), we have to reset the magic number AFTER removing
311
#endif /* UNIV_DEBUG */
314
/************************************************************************
315
NOTE! Use the corresponding macro in the header file, not this function
316
directly. Tries to lock the mutex for the current thread. If the lock is not
317
acquired immediately, returns with return value 1. */
320
mutex_enter_nowait_func(
321
/*====================*/
322
/* out: 0 if succeed, 1 if not */
323
mutex_t* mutex, /* in: pointer to mutex */
324
const char* file_name __attribute__((unused)),
325
/* in: file name where mutex
327
ulint line __attribute__((unused)))
328
/* in: line where requested */
330
ut_ad(mutex_validate(mutex));
332
if (!mutex_test_and_set(mutex)) {
334
ut_d(mutex->thread_id = os_thread_get_curr_id());
335
#ifdef UNIV_SYNC_DEBUG
336
mutex_set_debug_info(mutex, file_name, line);
339
return(0); /* Succeeded! */
346
/**********************************************************************
347
Checks that the mutex has been initialized. */
352
const mutex_t* mutex)
355
ut_a(mutex->magic_n == MUTEX_MAGIC_N);
360
/**********************************************************************
361
Checks that the current thread owns the mutex. Works only in the debug
367
/* out: TRUE if owns */
368
const mutex_t* mutex) /* in: mutex */
370
ut_ad(mutex_validate(mutex));
372
return(mutex_get_lock_word(mutex) == 1
373
&& os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
375
#endif /* UNIV_DEBUG */
377
/**********************************************************************
378
Sets the waiters field in a mutex. */
383
mutex_t* mutex, /* in: mutex */
384
ulint n) /* in: value to set */
386
volatile ulint* ptr; /* declared volatile to ensure that
387
the value is stored to memory */
390
ptr = &(mutex->waiters);
392
*ptr = n; /* Here we assume that the write of a single
393
word in memory is atomic */
396
/**********************************************************************
397
Reserves a mutex for the current thread. If the mutex is reserved, the
398
function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
399
for the mutex before suspending the thread. */
404
mutex_t* mutex, /* in: pointer to mutex */
405
const char* file_name, /* in: file name where mutex
407
ulint line) /* in: line where requested */
409
ulint index; /* index of the reserved wait cell */
410
ulint i; /* spin round count */
411
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
412
ib_longlong lstart_time = 0, lfinish_time; /* for timing os_wait */
416
uint timer_started = 0;
417
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
424
/* Spin waiting for the lock word to become zero. Note that we do
425
not have to assume that the read access to the lock word is atomic,
426
as the actual locking is always committed with atomic test-and-set.
427
In reality, however, all processors probably have an atomic read of
431
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
432
mutex_spin_wait_count++;
433
mutex->count_spin_loop++;
434
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
436
while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
437
if (srv_spin_wait_delay) {
438
ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
444
if (i == SYNC_SPIN_ROUNDS) {
445
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
446
mutex->count_os_yield++;
447
if (timed_mutexes == 1 && timer_started==0) {
448
ut_usectime(&sec, &ms);
449
lstart_time= (ib_longlong)sec * 1000000 + ms;
452
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
456
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
458
"Thread %lu spin wait mutex at %p"
459
" cfile %s cline %lu rnds %lu\n",
460
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
461
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
464
mutex_spin_round_count += i;
466
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
467
mutex->count_spin_rounds += i;
468
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
470
if (mutex_test_and_set(mutex) == 0) {
473
ut_d(mutex->thread_id = os_thread_get_curr_id());
474
#ifdef UNIV_SYNC_DEBUG
475
mutex_set_debug_info(mutex, file_name, line);
481
/* We may end up with a situation where lock_word is 0 but the OS
482
fast mutex is still reserved. On FreeBSD the OS does not seem to
483
schedule a thread which is constantly calling pthread_mutex_trylock
484
(in mutex_test_and_set implementation). Then we could end up
485
spinning here indefinitely. The following 'i++' stops this infinite
490
if (i < SYNC_SPIN_ROUNDS) {
494
sync_array_reserve_cell(sync_primary_wait_array, mutex,
495
SYNC_MUTEX, file_name, line, &index);
497
mutex_system_call_count++;
499
/* The memory order of the array reservation and the change in the
500
waiters field is important: when we suspend a thread, we first
501
reserve the cell and then set waiters field to 1. When threads are
502
released in mutex_exit, the waiters field is first set to zero and
503
then the event is set to the signaled state. */
505
mutex_set_waiters(mutex, 1);
507
/* Try to reserve still a few times */
508
for (i = 0; i < 4; i++) {
509
if (mutex_test_and_set(mutex) == 0) {
510
/* Succeeded! Free the reserved wait cell */
512
sync_array_free_cell_protected(sync_primary_wait_array,
515
ut_d(mutex->thread_id = os_thread_get_curr_id());
516
#ifdef UNIV_SYNC_DEBUG
517
mutex_set_debug_info(mutex, file_name, line);
520
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
521
fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
523
(ulong) os_thread_pf(os_thread_get_curr_id()),
529
/* Note that in this case we leave the waiters field
530
set to 1. We cannot reset it to zero, as we do not
531
know if there are other waiters. */
535
/* Now we know that there has been some thread holding the mutex
536
after the change in the wait array and the waiters field was made.
537
Now there is no risk of infinite wait on the event. */
539
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
541
"Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
542
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
543
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
546
mutex_system_call_count++;
547
mutex_os_wait_count++;
549
#ifndef UNIV_HOTBACKUP
550
mutex->count_os_wait++;
552
/* !!!!! Sometimes os_wait can be called without os_thread_yield */
554
if (timed_mutexes == 1 && timer_started==0) {
555
ut_usectime(&sec, &ms);
556
lstart_time= (ib_longlong)sec * 1000000 + ms;
559
# endif /* UNIV_DEBUG */
560
#endif /* !UNIV_HOTBACKUP */
562
sync_array_wait_event(sync_primary_wait_array, index);
566
#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
567
if (timed_mutexes == 1 && timer_started==1) {
568
ut_usectime(&sec, &ms);
569
lfinish_time= (ib_longlong)sec * 1000000 + ms;
571
ltime_diff= (ulint) (lfinish_time - lstart_time);
572
mutex->lspent_time += ltime_diff;
574
if (mutex->lmax_spent_time < ltime_diff) {
575
mutex->lmax_spent_time= ltime_diff;
578
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
582
/**********************************************************************
583
Releases the threads waiting in the primary wait array for this mutex. */
588
mutex_t* mutex) /* in: mutex */
590
mutex_set_waiters(mutex, 0);
592
/* The memory order of resetting the waiters field and
593
signaling the object is important. See LEMMA 1 above. */
595
sync_array_signal_object(sync_primary_wait_array, mutex);
598
#ifdef UNIV_SYNC_DEBUG
599
/**********************************************************************
600
Sets the debug information for a reserved mutex. */
603
mutex_set_debug_info(
604
/*=================*/
605
mutex_t* mutex, /* in: mutex */
606
const char* file_name, /* in: file where requested */
607
ulint line) /* in: line where requested */
612
sync_thread_add_level(mutex, mutex->level);
614
mutex->file_name = file_name;
618
/**********************************************************************
619
Gets the debug information for a reserved mutex. */
622
mutex_get_debug_info(
623
/*=================*/
624
mutex_t* mutex, /* in: mutex */
625
const char** file_name, /* out: file where requested */
626
ulint* line, /* out: line where requested */
627
os_thread_id_t* thread_id) /* out: id of the thread which owns
632
*file_name = mutex->file_name;
634
*thread_id = mutex->thread_id;
637
/**********************************************************************
638
Prints debug info of currently reserved mutexes. */
641
mutex_list_print_info(
642
/*==================*/
643
FILE* file) /* in: file where to print */
646
const char* file_name;
648
os_thread_id_t thread_id;
653
"----------\n", file);
655
mutex_enter(&mutex_list_mutex);
657
mutex = UT_LIST_GET_FIRST(mutex_list);
659
while (mutex != NULL) {
662
if (mutex_get_lock_word(mutex) != 0) {
663
mutex_get_debug_info(mutex, &file_name, &line,
666
"Locked mutex: addr %p thread %ld"
667
" file %s line %ld\n",
668
(void*) mutex, os_thread_pf(thread_id),
672
mutex = UT_LIST_GET_NEXT(list, mutex);
675
fprintf(file, "Total number of mutexes %ld\n", count);
677
mutex_exit(&mutex_list_mutex);
680
/**********************************************************************
681
Counts currently reserved mutexes. Works only in the debug version. */
684
mutex_n_reserved(void)
685
/*==================*/
690
mutex_enter(&mutex_list_mutex);
692
mutex = UT_LIST_GET_FIRST(mutex_list);
694
while (mutex != NULL) {
695
if (mutex_get_lock_word(mutex) != 0) {
700
mutex = UT_LIST_GET_NEXT(list, mutex);
703
mutex_exit(&mutex_list_mutex);
707
return(count - 1); /* Subtract one, because this function itself
708
was holding one mutex (mutex_list_mutex) */
711
/**********************************************************************
712
Returns TRUE if no mutex or rw-lock is currently locked. Works only in
713
the debug version. */
719
return(mutex_n_reserved() + rw_lock_n_locked() == 0);
722
/**********************************************************************
723
Gets the value in the nth slot in the thread level arrays. */
726
sync_thread_level_arrays_get_nth(
727
/*=============================*/
728
/* out: pointer to thread slot */
729
ulint n) /* in: slot number */
731
ut_ad(n < OS_THREAD_MAX_N);
733
return(sync_thread_level_arrays + n);
736
/**********************************************************************
737
Looks for the thread slot for the calling thread. */
740
sync_thread_level_arrays_find_slot(void)
741
/*====================================*/
742
/* out: pointer to thread slot, NULL if not found */
749
id = os_thread_get_curr_id();
751
for (i = 0; i < OS_THREAD_MAX_N; i++) {
753
slot = sync_thread_level_arrays_get_nth(i);
755
if (slot->levels && os_thread_eq(slot->id, id)) {
764
/**********************************************************************
765
Looks for an unused thread slot. */
768
sync_thread_level_arrays_find_free(void)
769
/*====================================*/
770
/* out: pointer to thread slot */
776
for (i = 0; i < OS_THREAD_MAX_N; i++) {
778
slot = sync_thread_level_arrays_get_nth(i);
780
if (slot->levels == NULL) {
789
/**********************************************************************
790
Gets the value in the nth slot in the thread level array. */
793
sync_thread_levels_get_nth(
794
/*=======================*/
795
/* out: pointer to level slot */
796
sync_level_t* arr, /* in: pointer to level array for an OS
798
ulint n) /* in: slot number */
800
ut_ad(n < SYNC_THREAD_N_LEVELS);
805
/**********************************************************************
806
Checks if all the level values stored in the level array are greater than
810
sync_thread_levels_g(
811
/*=================*/
812
/* out: TRUE if all greater */
813
sync_level_t* arr, /* in: pointer to level array for an OS
815
ulint limit) /* in: level limit */
822
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
824
slot = sync_thread_levels_get_nth(arr, i);
826
if (slot->latch != NULL) {
827
if (slot->level <= limit) {
833
"InnoDB: sync levels should be"
834
" > %lu but a level is %lu\n",
835
(ulong) limit, (ulong) slot->level);
837
if (mutex->magic_n == MUTEX_MAGIC_N) {
839
"Mutex created at %s %lu\n",
841
(ulong) mutex->cline);
843
if (mutex_get_lock_word(mutex) != 0) {
844
const char* file_name;
846
os_thread_id_t thread_id;
848
mutex_get_debug_info(
853
"InnoDB: Locked mutex:"
854
" addr %p thread %ld"
855
" file %s line %ld\n",
862
fputs("Not locked\n", stderr);
876
/**********************************************************************
877
Checks if the level value is stored in the level array. */
880
sync_thread_levels_contain(
881
/*=======================*/
882
/* out: TRUE if stored */
883
sync_level_t* arr, /* in: pointer to level array for an OS
885
ulint level) /* in: level */
890
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
892
slot = sync_thread_levels_get_nth(arr, i);
894
if (slot->latch != NULL) {
895
if (slot->level == level) {
905
/**********************************************************************
906
Checks that the level array for the current thread is empty. */
909
sync_thread_levels_empty_gen(
910
/*=========================*/
911
/* out: TRUE if empty except the
912
exceptions specified below */
913
ibool dict_mutex_allowed) /* in: TRUE if dictionary mutex is
914
allowed to be owned by the thread,
915
also purge_is_running mutex is
919
sync_thread_t* thread_slot;
923
if (!sync_order_checks_on) {
928
mutex_enter(&sync_thread_mutex);
930
thread_slot = sync_thread_level_arrays_find_slot();
932
if (thread_slot == NULL) {
934
mutex_exit(&sync_thread_mutex);
939
arr = thread_slot->levels;
941
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
943
slot = sync_thread_levels_get_nth(arr, i);
945
if (slot->latch != NULL
946
&& (!dict_mutex_allowed
947
|| (slot->level != SYNC_DICT
948
&& slot->level != SYNC_DICT_OPERATION))) {
950
mutex_exit(&sync_thread_mutex);
957
mutex_exit(&sync_thread_mutex);
962
/**********************************************************************
963
Checks that the level array for the current thread is empty. */
966
sync_thread_levels_empty(void)
967
/*==========================*/
968
/* out: TRUE if empty */
970
return(sync_thread_levels_empty_gen(FALSE));
973
/**********************************************************************
974
Adds a latch and its level in the thread level array. Allocates the memory
975
for the array if called first time for this OS thread. Makes the checks
976
against other latch levels stored in the array for this thread. */
979
sync_thread_add_level(
980
/*==================*/
981
void* latch, /* in: pointer to a mutex or an rw-lock */
982
ulint level) /* in: level in the latching order; if
983
SYNC_LEVEL_VARYING, nothing is done */
987
sync_thread_t* thread_slot;
990
if (!sync_order_checks_on) {
995
if ((latch == (void*)&sync_thread_mutex)
996
|| (latch == (void*)&mutex_list_mutex)
997
|| (latch == (void*)&rw_lock_debug_mutex)
998
|| (latch == (void*)&rw_lock_list_mutex)) {
1003
if (level == SYNC_LEVEL_VARYING) {
1008
mutex_enter(&sync_thread_mutex);
1010
thread_slot = sync_thread_level_arrays_find_slot();
1012
if (thread_slot == NULL) {
1013
/* We have to allocate the level array for a new thread */
1014
array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
1016
thread_slot = sync_thread_level_arrays_find_free();
1018
thread_slot->id = os_thread_get_curr_id();
1019
thread_slot->levels = array;
1021
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1023
slot = sync_thread_levels_get_nth(array, i);
1029
array = thread_slot->levels;
1031
/* NOTE that there is a problem with _NODE and _LEAF levels: if the
1032
B-tree height changes, then a leaf can change to an internal node
1033
or the other way around. We do not know at present if this can cause
1034
unnecessary assertion failures below. */
1037
case SYNC_NO_ORDER_CHECK:
1038
case SYNC_EXTERN_STORAGE:
1039
case SYNC_TREE_NODE_FROM_HASH:
1040
/* Do no order checking */
1043
ut_a(sync_thread_levels_g(array, SYNC_MEM_POOL));
1046
ut_a(sync_thread_levels_g(array, SYNC_MEM_HASH));
1049
ut_a(sync_thread_levels_g(array, SYNC_RECV));
1051
case SYNC_WORK_QUEUE:
1052
ut_a(sync_thread_levels_g(array, SYNC_WORK_QUEUE));
1055
ut_a(sync_thread_levels_g(array, SYNC_LOG));
1057
case SYNC_THR_LOCAL:
1058
ut_a(sync_thread_levels_g(array, SYNC_THR_LOCAL));
1060
case SYNC_ANY_LATCH:
1061
ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
1063
case SYNC_TRX_SYS_HEADER:
1064
ut_a(sync_thread_levels_g(array, SYNC_TRX_SYS_HEADER));
1066
case SYNC_DOUBLEWRITE:
1067
ut_a(sync_thread_levels_g(array, SYNC_DOUBLEWRITE));
1069
case SYNC_BUF_BLOCK:
1070
ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
1071
&& sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
1072
|| sync_thread_levels_g(array, SYNC_BUF_BLOCK));
1075
ut_a(sync_thread_levels_g(array, SYNC_BUF_POOL));
1077
case SYNC_SEARCH_SYS:
1078
ut_a(sync_thread_levels_g(array, SYNC_SEARCH_SYS));
1080
case SYNC_TRX_LOCK_HEAP:
1081
ut_a(sync_thread_levels_g(array, SYNC_TRX_LOCK_HEAP));
1084
ut_a((sync_thread_levels_contain(array, SYNC_KERNEL)
1085
&& sync_thread_levels_g(array, SYNC_REC_LOCK - 1))
1086
|| sync_thread_levels_g(array, SYNC_REC_LOCK));
1089
ut_a(sync_thread_levels_g(array, SYNC_KERNEL));
1091
case SYNC_IBUF_BITMAP:
1092
ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX)
1093
&& sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1))
1094
|| sync_thread_levels_g(array, SYNC_IBUF_BITMAP));
1096
case SYNC_IBUF_BITMAP_MUTEX:
1097
ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP_MUTEX));
1100
ut_a(sync_thread_levels_contain(array, SYNC_FSP));
1103
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
1104
|| sync_thread_levels_g(array, SYNC_FSP));
1106
case SYNC_TRX_UNDO_PAGE:
1107
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
1108
|| sync_thread_levels_contain(array, SYNC_RSEG)
1109
|| sync_thread_levels_contain(array, SYNC_PURGE_SYS)
1110
|| sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE));
1112
case SYNC_RSEG_HEADER:
1113
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
1115
case SYNC_RSEG_HEADER_NEW:
1116
ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
1117
&& sync_thread_levels_contain(array, SYNC_FSP_PAGE));
1120
ut_a(sync_thread_levels_g(array, SYNC_RSEG));
1123
ut_a(sync_thread_levels_g(array, SYNC_TRX_UNDO));
1125
case SYNC_PURGE_LATCH:
1126
ut_a(sync_thread_levels_g(array, SYNC_PURGE_LATCH));
1128
case SYNC_PURGE_SYS:
1129
ut_a(sync_thread_levels_g(array, SYNC_PURGE_SYS));
1131
case SYNC_TREE_NODE:
1132
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
1133
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
1135
case SYNC_TREE_NODE_NEW:
1136
ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
1137
|| sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1139
case SYNC_INDEX_TREE:
1140
ut_a((sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
1141
&& sync_thread_levels_contain(array, SYNC_FSP)
1142
&& sync_thread_levels_g(array, SYNC_FSP_PAGE - 1))
1143
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
1145
case SYNC_IBUF_MUTEX:
1146
ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1));
1148
case SYNC_IBUF_PESS_INSERT_MUTEX:
1149
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
1150
&& !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1152
case SYNC_IBUF_HEADER:
1153
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
1154
&& !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
1155
&& !sync_thread_levels_contain(
1156
array, SYNC_IBUF_PESS_INSERT_MUTEX));
1158
case SYNC_DICT_AUTOINC_MUTEX:
1159
ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX));
1161
case SYNC_DICT_OPERATION:
1162
ut_a(sync_thread_levels_g(array, SYNC_DICT_OPERATION));
1164
case SYNC_DICT_HEADER:
1165
ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER));
1169
ut_a(buf_debug_prints
1170
|| sync_thread_levels_g(array, SYNC_DICT));
1171
#else /* UNIV_DEBUG */
1172
ut_a(sync_thread_levels_g(array, SYNC_DICT));
1173
#endif /* UNIV_DEBUG */
1179
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1181
slot = sync_thread_levels_get_nth(array, i);
1183
if (slot->latch == NULL) {
1184
slot->latch = latch;
1185
slot->level = level;
1191
ut_a(i < SYNC_THREAD_N_LEVELS);
1193
mutex_exit(&sync_thread_mutex);
1196
/**********************************************************************
1197
Removes a latch from the thread level array if it is found there. */
1200
sync_thread_reset_level(
1201
/*====================*/
1202
/* out: TRUE if found from the array; it is an error
1203
if the latch is not found */
1204
void* latch) /* in: pointer to a mutex or an rw-lock */
1206
sync_level_t* array;
1208
sync_thread_t* thread_slot;
1211
if (!sync_order_checks_on) {
1216
if ((latch == (void*)&sync_thread_mutex)
1217
|| (latch == (void*)&mutex_list_mutex)
1218
|| (latch == (void*)&rw_lock_debug_mutex)
1219
|| (latch == (void*)&rw_lock_list_mutex)) {
1224
mutex_enter(&sync_thread_mutex);
1226
thread_slot = sync_thread_level_arrays_find_slot();
1228
if (thread_slot == NULL) {
1232
mutex_exit(&sync_thread_mutex);
1236
array = thread_slot->levels;
1238
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1240
slot = sync_thread_levels_get_nth(array, i);
1242
if (slot->latch == latch) {
1245
mutex_exit(&sync_thread_mutex);
1253
mutex_exit(&sync_thread_mutex);
1257
#endif /* UNIV_SYNC_DEBUG */
1259
/**********************************************************************
1260
Initializes the synchronization data structures. */
1266
#ifdef UNIV_SYNC_DEBUG
1267
sync_thread_t* thread_slot;
1269
#endif /* UNIV_SYNC_DEBUG */
1271
ut_a(sync_initialized == FALSE);
1273
sync_initialized = TRUE;
1275
/* Create the primary system wait array which is protected by an OS
1278
sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
1279
SYNC_ARRAY_OS_MUTEX);
1280
#ifdef UNIV_SYNC_DEBUG
1281
/* Create the thread latch level array where the latch levels
1282
are stored for each OS thread */
1284
sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
1285
* sizeof(sync_thread_t));
1286
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1288
thread_slot = sync_thread_level_arrays_get_nth(i);
1289
thread_slot->levels = NULL;
1291
#endif /* UNIV_SYNC_DEBUG */
1292
/* Init the mutex list and create the mutex to protect it. */
1294
UT_LIST_INIT(mutex_list);
1295
mutex_create(&mutex_list_mutex, SYNC_NO_ORDER_CHECK);
1296
#ifdef UNIV_SYNC_DEBUG
1297
mutex_create(&sync_thread_mutex, SYNC_NO_ORDER_CHECK);
1298
#endif /* UNIV_SYNC_DEBUG */
1300
/* Init the rw-lock list and create the mutex to protect it. */
1302
UT_LIST_INIT(rw_lock_list);
1303
mutex_create(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK);
1305
#ifdef UNIV_SYNC_DEBUG
1306
mutex_create(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK);
1308
rw_lock_debug_event = os_event_create(NULL);
1309
rw_lock_debug_waiters = FALSE;
1310
#endif /* UNIV_SYNC_DEBUG */
1313
/**********************************************************************
1314
Frees the resources in InnoDB's own synchronization data structures. Use
1315
os_sync_free() after calling this. */
1323
sync_array_free(sync_primary_wait_array);
1325
mutex = UT_LIST_GET_FIRST(mutex_list);
1329
mutex = UT_LIST_GET_FIRST(mutex_list);
1332
mutex_free(&mutex_list_mutex);
1333
#ifdef UNIV_SYNC_DEBUG
1334
mutex_free(&sync_thread_mutex);
1335
#endif /* UNIV_SYNC_DEBUG */
1338
/***********************************************************************
1339
Prints wait info of the sync system. */
1342
sync_print_wait_info(
1343
/*=================*/
1344
FILE* file) /* in: file where to print */
1346
#ifdef UNIV_SYNC_DEBUG
1347
fprintf(file, "Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
1348
mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
1352
"Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
1353
"RW-shared spins %lu, OS waits %lu;"
1354
" RW-excl spins %lu, OS waits %lu\n",
1355
(ulong) mutex_spin_wait_count,
1356
(ulong) mutex_spin_round_count,
1357
(ulong) mutex_os_wait_count,
1358
(ulong) rw_s_spin_wait_count,
1359
(ulong) rw_s_os_wait_count,
1360
(ulong) rw_x_spin_wait_count,
1361
(ulong) rw_x_os_wait_count);
1364
/***********************************************************************
1365
Prints info of the sync system. */
1370
FILE* file) /* in: file where to print */
1372
#ifdef UNIV_SYNC_DEBUG
1373
mutex_list_print_info(file);
1375
rw_lock_list_print_info(file);
1376
#endif /* UNIV_SYNC_DEBUG */
1378
sync_array_print_info(file, sync_primary_wait_array);
1380
sync_print_wait_info(file);