1
/*****************************************************************************
3
Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
4
Copyright (c) 2008, Google Inc.
6
Portions of this file contain modifications contributed and copyrighted by
7
Google, Inc. Those modifications are gratefully acknowledged and are described
8
briefly in the InnoDB documentation. The contributions by Google are
9
incorporated with their permission, and subject to the conditions contained in
10
the file COPYING.Google.
12
This program is free software; you can redistribute it and/or modify it under
13
the terms of the GNU General Public License as published by the Free Software
14
Foundation; version 2 of the License.
16
This program is distributed in the hope that it will be useful, but WITHOUT
17
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
You should have received a copy of the GNU General Public License along with
21
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22
St, Fifth Floor, Boston, MA 02110-1301 USA
24
*****************************************************************************/
26
/**************************************************//**
27
@file sync/sync0sync.c
28
Mutex, the basic synchronization primitive
30
Created 9/5/1995 Heikki Tuuri
31
*******************************************************/
33
#include "sync0sync.h"
35
#include "sync0sync.ic"
41
#include "buf0types.h"
42
#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
43
#ifdef UNIV_SYNC_DEBUG
44
# include "srv0start.h" /* srv_is_being_started */
45
#endif /* UNIV_SYNC_DEBUG */
48
REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
49
============================================
51
Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
52
takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
53
Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
54
implement our own efficient spin lock mutex. Future operating systems may
55
provide efficient spin locks, but we cannot count on that.
57
Another reason for implementing a spin lock is that on multiprocessor systems
58
it can be more efficient for a processor to run a loop waiting for the
59
semaphore to be released than to switch to a different thread. A thread switch
60
takes 25 us on both platforms mentioned above. See Gray and Reuter's book
61
Transaction processing for background.
63
How long should the spin loop last before suspending the thread? On a
64
uniprocessor, spinning does not help at all, because if the thread owning the
65
mutex is not executing, it cannot be released. Spinning actually wastes
68
On a multiprocessor, we do not know if the thread owning the mutex is
69
executing or not. Thus it would make sense to spin as long as the operation
70
guarded by the mutex would typically last assuming that the thread is
71
executing. If the mutex is not released by that time, we may assume that the
72
thread owning the mutex is not executing and suspend the waiting thread.
74
A typical operation (where no i/o involved) guarded by a mutex or a read-write
75
lock may last 1 - 20 us on the current Pentium platform. The longest
76
operations are the binary searches on an index node.
78
We conclude that the best choice is to set the spin time at 20 us. Then the
79
system should work well on a multiprocessor. On a uniprocessor we have to
80
make sure that thread swithches due to mutex collisions are not frequent,
81
i.e., they do not happen every 100 us or so, because that wastes too much
82
resources. If the thread switches are not frequent, the 20 us wasted in spin
85
Empirical studies on the effect of spin time should be done for different
89
IMPLEMENTATION OF THE MUTEX
90
===========================
92
For background, see Curt Schimmel's book on Unix implementation on modern
93
architectures. The key points in the implementation are atomicity and
94
serialization of memory accesses. The test-and-set instruction (XCHG in
95
Pentium) must be atomic. As new processors may have weak memory models, also
96
serialization of memory references may be necessary. The successor of Pentium,
97
P6, has at least one mode where the memory model is weak. As far as we know,
98
in Pentium all memory accesses are serialized in the program order and we do
99
not have to worry about the memory model. On other processors there are
100
special machine instructions called a fence, memory barrier, or storage
101
barrier (STBAR in Sparc), which can be used to serialize the memory accesses
102
to happen in program order relative to the fence instruction.
104
Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
105
the atomic test-and-set, but his algorithm should be modified for weak memory
106
models. We do not use Lamport's algorithm, because we guess it is slower than
107
the atomic test-and-set.
109
Our mutex implementation works as follows: After that we perform the atomic
110
test-and-set instruction on the memory word. If the test returns zero, we
111
know we got the lock first. If the test returns not zero, some other thread
112
was quicker and got the lock: then we spin in a loop reading the memory word,
113
waiting it to become zero. It is wise to just read the word in the loop, not
114
perform numerous test-and-set instructions, because they generate memory
115
traffic between the cache and the main memory. The read loop can just access
116
the cache, saving bus bandwidth.
118
If we cannot acquire the mutex lock in the specified time, we reserve a cell
119
in the wait array, set the waiters byte in the mutex to 1. To avoid a race
120
condition, after setting the waiters byte and before suspending the waiting
121
thread, we still have to check that the mutex is reserved, because it may
122
have happened that the thread which was holding the mutex has just released
123
it and did not see the waiters byte set to 1, a case which would lead the
124
other thread to an infinite wait.
126
LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
128
thread will eventually call os_event_set() on that particular event.
129
Thus no infinite wait is possible in this case.
131
Proof: After making the reservation the thread sets the waiters field in the
132
mutex to 1. Then it checks that the mutex is still reserved by some thread,
133
or it reserves the mutex for itself. In any case, some thread (which may be
134
also some earlier thread, not necessarily the one currently holding the mutex)
135
will set the waiters field to 0 in mutex_exit, and then call
136
os_event_set() with the mutex as an argument.
139
LEMMA 2: If an os_event_set() call is made after some thread has called
141
the os_event_reset() and before it starts wait on that event, the call
142
will not be lost to the second thread. This is true even if there is an
143
intervening call to os_event_reset() by another thread.
144
Thus no infinite wait is possible in this case.
146
Proof (non-windows platforms): os_event_reset() returns a monotonically
147
increasing value of signal_count. This value is increased at every
148
call of os_event_set() If thread A has called os_event_reset() followed
149
by thread B calling os_event_set() and then some other thread C calling
150
os_event_reset(), the is_set flag of the event will be set to FALSE;
151
but now if thread A calls os_event_wait_low() with the signal_count
152
value returned from the earlier call of os_event_reset(), it will
153
return immediately without waiting.
156
Proof (windows): If there is a writer thread which is forced to wait for
157
the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
158
The design of rw_lock ensures that there is one and only one thread
159
that is able to change the state to RW_LOCK_WAIT_EX and this thread is
160
guaranteed to acquire the lock after it is released by the current
161
holders and before any other waiter gets the lock.
162
On windows this thread waits on a separate event i.e.: wait_ex_event.
163
Since only one thread can wait on this event there is no chance
164
of this event getting reset before the writer starts wait on it.
165
Therefore, this thread is guaranteed to catch the os_set_event()
166
signalled unconditionally at the release of the lock.
169
/* Number of spin waits on mutexes: for performance monitoring */
171
/** The number of iterations in the mutex_spin_wait() spin loop.
172
Intended for performance monitoring. */
173
static ib_int64_t mutex_spin_round_count = 0;
174
/** The number of mutex_spin_wait() calls. Intended for
175
performance monitoring. */
176
static ib_int64_t mutex_spin_wait_count = 0;
177
/** The number of OS waits in mutex_spin_wait(). Intended for
178
performance monitoring. */
179
static ib_int64_t mutex_os_wait_count = 0;
180
/** The number of mutex_exit() calls. Intended for performance
182
UNIV_INTERN ib_int64_t mutex_exit_count = 0;
184
/** The global array of wait cells for implementation of the database's own
185
mutexes and read-write locks */
186
UNIV_INTERN sync_array_t* sync_primary_wait_array;
188
/** This variable is set to TRUE when sync_init is called */
189
UNIV_INTERN ibool sync_initialized = FALSE;
191
/** An acquired mutex or rw-lock and its level in the latching order */
192
typedef struct sync_level_struct sync_level_t;
193
/** Mutexes or rw-locks held by a thread */
194
typedef struct sync_thread_struct sync_thread_t;
196
#ifdef UNIV_SYNC_DEBUG
197
/** The latch levels currently owned by threads are stored in this data
198
structure; the size of this array is OS_THREAD_MAX_N */
200
UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
202
/** Mutex protecting sync_thread_level_arrays */
203
UNIV_INTERN mutex_t sync_thread_mutex;
205
# ifdef UNIV_PFS_MUTEX
206
UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key;
207
# endif /* UNIV_PFS_MUTEX */
208
#endif /* UNIV_SYNC_DEBUG */
210
/** Global list of database mutexes (not OS mutexes) created. */
211
UNIV_INTERN ut_list_base_node_t mutex_list;
213
/** Mutex protecting the mutex_list variable */
214
UNIV_INTERN mutex_t mutex_list_mutex;
216
#ifdef UNIV_PFS_MUTEX
217
UNIV_INTERN mysql_pfs_key_t mutex_list_mutex_key;
218
#endif /* UNIV_PFS_MUTEX */
220
#ifdef UNIV_SYNC_DEBUG
221
/** Latching order checks start when this is set TRUE */
222
UNIV_INTERN ibool sync_order_checks_on = FALSE;
223
#endif /* UNIV_SYNC_DEBUG */
225
/** Mutexes or rw-locks held by a thread */
226
struct sync_thread_struct{
227
os_thread_id_t id; /*!< OS thread id */
228
sync_level_t* levels; /*!< level array for this thread; if
229
this is NULL this slot is unused */
232
/** Number of slots reserved for each OS thread in the sync level array */
233
#define SYNC_THREAD_N_LEVELS 10000
235
/** An acquired mutex or rw-lock and its level in the latching order */
236
struct sync_level_struct{
237
void* latch; /*!< pointer to a mutex or an rw-lock; NULL means that
239
ulint level; /*!< level of the latch in the latching order */
242
/******************************************************************//**
243
Creates, or rather, initializes a mutex object in a specified memory
244
location (which must be appropriately aligned). The mutex is initialized
245
in the reset state. Explicit freeing of the mutex with mutex_free is
246
necessary only if the memory block containing it is freed. */
251
mutex_t* mutex, /*!< in: pointer to memory */
253
const char* cmutex_name, /*!< in: mutex name */
254
# ifdef UNIV_SYNC_DEBUG
255
ulint level, /*!< in: level */
256
# endif /* UNIV_SYNC_DEBUG */
257
#endif /* UNIV_DEBUG */
258
const char* cfile_name, /*!< in: file name where created */
259
ulint cline) /*!< in: file line where created */
261
#if defined(HAVE_ATOMIC_BUILTINS)
262
mutex_reset_lock_word(mutex);
264
os_fast_mutex_init(&(mutex->os_fast_mutex));
265
mutex->lock_word = 0;
267
mutex->event = os_event_create(NULL);
268
mutex_set_waiters(mutex, 0);
270
mutex->magic_n = MUTEX_MAGIC_N;
271
#endif /* UNIV_DEBUG */
272
#ifdef UNIV_SYNC_DEBUG
274
mutex->file_name = "not yet reserved";
275
mutex->level = level;
276
#endif /* UNIV_SYNC_DEBUG */
277
mutex->cfile_name = cfile_name;
278
mutex->cline = cline;
279
mutex->count_os_wait = 0;
281
mutex->cmutex_name= cmutex_name;
282
mutex->count_using= 0;
283
mutex->mutex_type= 0;
284
mutex->lspent_time= 0;
285
mutex->lmax_spent_time= 0;
286
mutex->count_spin_loop= 0;
287
mutex->count_spin_rounds= 0;
288
mutex->count_os_yield= 0;
289
#endif /* UNIV_DEBUG */
291
/* Check that lock_word is aligned; this is important on Intel */
292
ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
294
/* NOTE! The very first mutexes are not put to the mutex list */
296
if ((mutex == &mutex_list_mutex)
297
#ifdef UNIV_SYNC_DEBUG
298
|| (mutex == &sync_thread_mutex)
299
#endif /* UNIV_SYNC_DEBUG */
305
mutex_enter(&mutex_list_mutex);
307
ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
308
|| UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
310
UT_LIST_ADD_FIRST(list, mutex_list, mutex);
312
mutex_exit(&mutex_list_mutex);
315
/******************************************************************//**
316
NOTE! Use the corresponding macro mutex_free(), not directly this function!
317
Calling this function is obligatory only if the memory buffer containing
318
the mutex is freed. Removes a mutex object from the mutex list. The mutex
319
is checked to be in the reset state. */
324
mutex_t* mutex) /*!< in: mutex */
326
ut_ad(mutex_validate(mutex));
327
ut_a(mutex_get_lock_word(mutex) == 0);
328
ut_a(mutex_get_waiters(mutex) == 0);
330
#ifdef UNIV_MEM_DEBUG
331
if (mutex == &mem_hash_mutex) {
332
ut_ad(UT_LIST_GET_LEN(mutex_list) == 1);
333
ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex);
334
UT_LIST_REMOVE(list, mutex_list, mutex);
337
#endif /* UNIV_MEM_DEBUG */
339
if (mutex != &mutex_list_mutex
340
#ifdef UNIV_SYNC_DEBUG
341
&& mutex != &sync_thread_mutex
342
#endif /* UNIV_SYNC_DEBUG */
345
mutex_enter(&mutex_list_mutex);
347
ut_ad(!UT_LIST_GET_PREV(list, mutex)
348
|| UT_LIST_GET_PREV(list, mutex)->magic_n
350
ut_ad(!UT_LIST_GET_NEXT(list, mutex)
351
|| UT_LIST_GET_NEXT(list, mutex)->magic_n
354
UT_LIST_REMOVE(list, mutex_list, mutex);
356
mutex_exit(&mutex_list_mutex);
359
os_event_free(mutex->event);
360
#ifdef UNIV_MEM_DEBUG
362
#endif /* UNIV_MEM_DEBUG */
363
#if !defined(HAVE_ATOMIC_BUILTINS)
364
os_fast_mutex_free(&(mutex->os_fast_mutex));
366
/* If we free the mutex protecting the mutex list (freeing is
367
not necessary), we have to reset the magic number AFTER removing
371
#endif /* UNIV_DEBUG */
374
/********************************************************************//**
375
NOTE! Use the corresponding macro in the header file, not this function
376
directly. Tries to lock the mutex for the current thread. If the lock is not
377
acquired immediately, returns with return value 1.
378
@return 0 if succeed, 1 if not */
381
mutex_enter_nowait_func(
382
/*====================*/
383
mutex_t* mutex, /*!< in: pointer to mutex */
384
const char* file_name __attribute__((unused)),
385
/*!< in: file name where mutex
387
ulint line __attribute__((unused)))
388
/*!< in: line where requested */
390
ut_ad(mutex_validate(mutex));
392
if (!mutex_test_and_set(mutex)) {
394
ut_d(mutex->thread_id = os_thread_get_curr_id());
395
#ifdef UNIV_SYNC_DEBUG
396
mutex_set_debug_info(mutex, file_name, line);
399
return(0); /* Succeeded! */
406
/******************************************************************//**
407
Checks that the mutex has been initialized.
413
const mutex_t* mutex) /*!< in: mutex */
416
ut_a(mutex->magic_n == MUTEX_MAGIC_N);
421
/******************************************************************//**
422
Checks that the current thread owns the mutex. Works only in the debug
424
@return TRUE if owns */
429
const mutex_t* mutex) /*!< in: mutex */
431
ut_ad(mutex_validate(mutex));
433
return(mutex_get_lock_word(mutex) == 1
434
&& os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
436
#endif /* UNIV_DEBUG */
438
/******************************************************************//**
439
Sets the waiters field in a mutex. */
444
mutex_t* mutex, /*!< in: mutex */
445
ulint n) /*!< in: value to set */
447
volatile ulint* ptr; /* declared volatile to ensure that
448
the value is stored to memory */
451
ptr = &(mutex->waiters);
453
*ptr = n; /* Here we assume that the write of a single
454
word in memory is atomic */
457
/******************************************************************//**
458
Reserves a mutex for the current thread. If the mutex is reserved, the
459
function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
460
for the mutex before suspending the thread. */
465
mutex_t* mutex, /*!< in: pointer to mutex */
466
const char* file_name, /*!< in: file name where mutex
468
ulint line) /*!< in: line where requested */
470
ulint index; /* index of the reserved wait cell */
471
ulint i; /* spin round count */
473
ib_int64_t lstart_time = 0, lfinish_time; /* for timing os_wait */
477
uint timer_started = 0;
478
#endif /* UNIV_DEBUG */
481
/* This update is not thread safe, but we don't mind if the count
482
isn't exact. Moved out of ifdef that follows because we are willing
483
to sacrifice the cost of counting this as the data is valuable.
484
Count the number of calls to mutex_spin_wait. */
485
mutex_spin_wait_count++;
491
/* Spin waiting for the lock word to become zero. Note that we do
492
not have to assume that the read access to the lock word is atomic,
493
as the actual locking is always committed with atomic test-and-set.
494
In reality, however, all processors probably have an atomic read of
498
ut_d(mutex->count_spin_loop++);
500
while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
501
if (srv_spin_wait_delay) {
502
ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
508
if (i == SYNC_SPIN_ROUNDS) {
510
mutex->count_os_yield++;
511
#ifndef UNIV_HOTBACKUP
512
if (timed_mutexes && timer_started == 0) {
513
ut_usectime(&sec, &ms);
514
lstart_time= (ib_int64_t)sec * 1000000 + ms;
517
#endif /* UNIV_HOTBACKUP */
518
#endif /* UNIV_DEBUG */
522
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
524
"Thread %lu spin wait mutex at %p"
525
" cfile %s cline %lu rnds %lu\n",
526
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
527
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
530
mutex_spin_round_count += i;
532
ut_d(mutex->count_spin_rounds += i);
534
if (mutex_test_and_set(mutex) == 0) {
537
ut_d(mutex->thread_id = os_thread_get_curr_id());
538
#ifdef UNIV_SYNC_DEBUG
539
mutex_set_debug_info(mutex, file_name, line);
545
/* We may end up with a situation where lock_word is 0 but the OS
546
fast mutex is still reserved. On FreeBSD the OS does not seem to
547
schedule a thread which is constantly calling pthread_mutex_trylock
548
(in mutex_test_and_set implementation). Then we could end up
549
spinning here indefinitely. The following 'i++' stops this infinite
554
if (i < SYNC_SPIN_ROUNDS) {
558
sync_array_reserve_cell(sync_primary_wait_array, mutex,
559
SYNC_MUTEX, file_name, line, &index);
561
/* The memory order of the array reservation and the change in the
562
waiters field is important: when we suspend a thread, we first
563
reserve the cell and then set waiters field to 1. When threads are
564
released in mutex_exit, the waiters field is first set to zero and
565
then the event is set to the signaled state. */
567
mutex_set_waiters(mutex, 1);
569
/* Try to reserve still a few times */
570
for (i = 0; i < 4; i++) {
571
if (mutex_test_and_set(mutex) == 0) {
572
/* Succeeded! Free the reserved wait cell */
574
sync_array_free_cell(sync_primary_wait_array, index);
576
ut_d(mutex->thread_id = os_thread_get_curr_id());
577
#ifdef UNIV_SYNC_DEBUG
578
mutex_set_debug_info(mutex, file_name, line);
581
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
582
fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
584
(ulong) os_thread_pf(os_thread_get_curr_id()),
590
/* Note that in this case we leave the waiters field
591
set to 1. We cannot reset it to zero, as we do not
592
know if there are other waiters. */
596
/* Now we know that there has been some thread holding the mutex
597
after the change in the wait array and the waiters field was made.
598
Now there is no risk of infinite wait on the event. */
600
#ifdef UNIV_SRV_PRINT_LATCH_WAITS
602
"Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
603
(ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
604
mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
607
mutex_os_wait_count++;
609
mutex->count_os_wait++;
611
/* !!!!! Sometimes os_wait can be called without os_thread_yield */
612
#ifndef UNIV_HOTBACKUP
613
if (timed_mutexes == 1 && timer_started == 0) {
614
ut_usectime(&sec, &ms);
615
lstart_time= (ib_int64_t)sec * 1000000 + ms;
618
#endif /* UNIV_HOTBACKUP */
619
#endif /* UNIV_DEBUG */
621
sync_array_wait_event(sync_primary_wait_array, index);
626
if (timed_mutexes == 1 && timer_started==1) {
627
ut_usectime(&sec, &ms);
628
lfinish_time= (ib_int64_t)sec * 1000000 + ms;
630
ltime_diff= (ulint) (lfinish_time - lstart_time);
631
mutex->lspent_time += ltime_diff;
633
if (mutex->lmax_spent_time < ltime_diff) {
634
mutex->lmax_spent_time= ltime_diff;
637
#endif /* UNIV_DEBUG */
641
/******************************************************************//**
642
Releases the threads waiting in the primary wait array for this mutex. */
647
mutex_t* mutex) /*!< in: mutex */
649
mutex_set_waiters(mutex, 0);
651
/* The memory order of resetting the waiters field and
652
signaling the object is important. See LEMMA 1 above. */
653
os_event_set(mutex->event);
654
sync_array_object_signalled(sync_primary_wait_array);
657
#ifdef UNIV_SYNC_DEBUG
658
/******************************************************************//**
659
Sets the debug information for a reserved mutex. */
662
mutex_set_debug_info(
663
/*=================*/
664
mutex_t* mutex, /*!< in: mutex */
665
const char* file_name, /*!< in: file where requested */
666
ulint line) /*!< in: line where requested */
671
sync_thread_add_level(mutex, mutex->level);
673
mutex->file_name = file_name;
677
/******************************************************************//**
678
Gets the debug information for a reserved mutex. */
681
mutex_get_debug_info(
682
/*=================*/
683
mutex_t* mutex, /*!< in: mutex */
684
const char** file_name, /*!< out: file where requested */
685
ulint* line, /*!< out: line where requested */
686
os_thread_id_t* thread_id) /*!< out: id of the thread which owns
691
*file_name = mutex->file_name;
693
*thread_id = mutex->thread_id;
696
/******************************************************************//**
697
Prints debug info of currently reserved mutexes. */
700
mutex_list_print_info(
701
/*==================*/
702
FILE* file) /*!< in: file where to print */
705
const char* file_name;
707
os_thread_id_t thread_id;
712
"----------\n", file);
714
mutex_enter(&mutex_list_mutex);
716
mutex = UT_LIST_GET_FIRST(mutex_list);
718
while (mutex != NULL) {
721
if (mutex_get_lock_word(mutex) != 0) {
722
mutex_get_debug_info(mutex, &file_name, &line,
725
"Locked mutex: addr %p thread %ld"
726
" file %s line %ld\n",
727
(void*) mutex, os_thread_pf(thread_id),
731
mutex = UT_LIST_GET_NEXT(list, mutex);
734
fprintf(file, "Total number of mutexes %ld\n", count);
736
mutex_exit(&mutex_list_mutex);
739
/******************************************************************//**
740
Counts currently reserved mutexes. Works only in the debug version.
741
@return number of reserved mutexes */
744
mutex_n_reserved(void)
745
/*==================*/
750
mutex_enter(&mutex_list_mutex);
752
mutex = UT_LIST_GET_FIRST(mutex_list);
754
while (mutex != NULL) {
755
if (mutex_get_lock_word(mutex) != 0) {
760
mutex = UT_LIST_GET_NEXT(list, mutex);
763
mutex_exit(&mutex_list_mutex);
767
return(count - 1); /* Subtract one, because this function itself
768
was holding one mutex (mutex_list_mutex) */
771
/******************************************************************//**
772
Returns TRUE if no mutex or rw-lock is currently locked. Works only in
774
@return TRUE if no mutexes and rw-locks reserved */
780
return(mutex_n_reserved() + rw_lock_n_locked() == 0);
783
/******************************************************************//**
784
Gets the value in the nth slot in the thread level arrays.
785
@return pointer to thread slot */
788
sync_thread_level_arrays_get_nth(
789
/*=============================*/
790
ulint n) /*!< in: slot number */
792
ut_ad(n < OS_THREAD_MAX_N);
794
return(sync_thread_level_arrays + n);
797
/******************************************************************//**
798
Looks for the thread slot for the calling thread.
799
@return pointer to thread slot, NULL if not found */
802
sync_thread_level_arrays_find_slot(void)
803
/*====================================*/
810
id = os_thread_get_curr_id();
812
for (i = 0; i < OS_THREAD_MAX_N; i++) {
814
slot = sync_thread_level_arrays_get_nth(i);
816
if (slot->levels && os_thread_eq(slot->id, id)) {
825
/******************************************************************//**
826
Looks for an unused thread slot.
827
@return pointer to thread slot */
830
sync_thread_level_arrays_find_free(void)
831
/*====================================*/
837
for (i = 0; i < OS_THREAD_MAX_N; i++) {
839
slot = sync_thread_level_arrays_get_nth(i);
841
if (slot->levels == NULL) {
850
/******************************************************************//**
851
Gets the value in the nth slot in the thread level array.
852
@return pointer to level slot */
855
sync_thread_levels_get_nth(
856
/*=======================*/
857
sync_level_t* arr, /*!< in: pointer to level array for an OS
859
ulint n) /*!< in: slot number */
861
ut_ad(n < SYNC_THREAD_N_LEVELS);
866
/******************************************************************//**
867
Checks if all the level values stored in the level array are greater than
869
@return TRUE if all greater */
872
sync_thread_levels_g(
873
/*=================*/
874
sync_level_t* arr, /*!< in: pointer to level array for an OS
876
ulint limit, /*!< in: level limit */
877
ulint warn) /*!< in: TRUE=display a diagnostic message */
884
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
886
slot = sync_thread_levels_get_nth(arr, i);
888
if (slot->latch != NULL) {
889
if (slot->level <= limit) {
900
"InnoDB: sync levels should be"
901
" > %lu but a level is %lu\n",
902
(ulong) limit, (ulong) slot->level);
904
if (mutex->magic_n == MUTEX_MAGIC_N) {
906
"Mutex created at %s %lu\n",
908
(ulong) mutex->cline);
910
if (mutex_get_lock_word(mutex) != 0) {
911
const char* file_name;
913
os_thread_id_t thread_id;
915
mutex_get_debug_info(
920
"InnoDB: Locked mutex:"
921
" addr %p thread %ld"
922
" file %s line %ld\n",
929
fputs("Not locked\n", stderr);
943
/******************************************************************//**
944
Checks if the level value is stored in the level array.
945
@return TRUE if stored */
948
sync_thread_levels_contain(
949
/*=======================*/
950
sync_level_t* arr, /*!< in: pointer to level array for an OS
952
ulint level) /*!< in: level */
957
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
959
slot = sync_thread_levels_get_nth(arr, i);
961
if (slot->latch != NULL) {
962
if (slot->level == level) {
972
/******************************************************************//**
973
Checks if the level array for the current thread contains a
974
mutex or rw-latch at the specified level.
975
@return a matching latch, or NULL if not found */
978
sync_thread_levels_contains(
979
/*========================*/
980
ulint level) /*!< in: latching order level
984
sync_thread_t* thread_slot;
988
if (!sync_order_checks_on) {
993
mutex_enter(&sync_thread_mutex);
995
thread_slot = sync_thread_level_arrays_find_slot();
997
if (thread_slot == NULL) {
999
mutex_exit(&sync_thread_mutex);
1004
arr = thread_slot->levels;
1006
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1008
slot = sync_thread_levels_get_nth(arr, i);
1010
if (slot->latch != NULL && slot->level == level) {
1012
mutex_exit(&sync_thread_mutex);
1013
return(slot->latch);
1017
mutex_exit(&sync_thread_mutex);
1022
/******************************************************************//**
1023
Checks that the level array for the current thread is empty.
1024
@return a latch, or NULL if empty except the exceptions specified below */
1027
sync_thread_levels_nonempty_gen(
1028
/*============================*/
1029
ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
1030
allowed to be owned by the thread,
1031
also purge_is_running mutex is
1035
sync_thread_t* thread_slot;
1039
if (!sync_order_checks_on) {
1044
mutex_enter(&sync_thread_mutex);
1046
thread_slot = sync_thread_level_arrays_find_slot();
1048
if (thread_slot == NULL) {
1050
mutex_exit(&sync_thread_mutex);
1055
arr = thread_slot->levels;
1057
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1059
slot = sync_thread_levels_get_nth(arr, i);
1061
if (slot->latch != NULL
1062
&& (!dict_mutex_allowed
1063
|| (slot->level != SYNC_DICT
1064
&& slot->level != SYNC_DICT_OPERATION))) {
1066
mutex_exit(&sync_thread_mutex);
1069
return(slot->latch);
1073
mutex_exit(&sync_thread_mutex);
1078
/******************************************************************//**
1079
Checks that the level array for the current thread is empty.
1080
@return TRUE if empty */
1083
sync_thread_levels_empty(void)
1084
/*==========================*/
1086
return(sync_thread_levels_empty_gen(FALSE));
1089
/******************************************************************//**
1090
Adds a latch and its level in the thread level array. Allocates the memory
1091
for the array if called first time for this OS thread. Makes the checks
1092
against other latch levels stored in the array for this thread. */
1095
sync_thread_add_level(
1096
/*==================*/
1097
void* latch, /*!< in: pointer to a mutex or an rw-lock */
1098
ulint level) /*!< in: level in the latching order; if
1099
SYNC_LEVEL_VARYING, nothing is done */
1101
sync_level_t* array;
1103
sync_thread_t* thread_slot;
1106
if (!sync_order_checks_on) {
1111
if ((latch == (void*)&sync_thread_mutex)
1112
|| (latch == (void*)&mutex_list_mutex)
1113
|| (latch == (void*)&rw_lock_debug_mutex)
1114
|| (latch == (void*)&rw_lock_list_mutex)) {
1119
if (level == SYNC_LEVEL_VARYING) {
1124
mutex_enter(&sync_thread_mutex);
1126
thread_slot = sync_thread_level_arrays_find_slot();
1128
if (thread_slot == NULL) {
1129
/* We have to allocate the level array for a new thread */
1130
array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
1132
thread_slot = sync_thread_level_arrays_find_free();
1134
thread_slot->id = os_thread_get_curr_id();
1135
thread_slot->levels = array;
1137
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1139
slot = sync_thread_levels_get_nth(array, i);
1145
array = thread_slot->levels;
1147
/* NOTE that there is a problem with _NODE and _LEAF levels: if the
1148
B-tree height changes, then a leaf can change to an internal node
1149
or the other way around. We do not know at present if this can cause
1150
unnecessary assertion failures below. */
1153
case SYNC_NO_ORDER_CHECK:
1154
case SYNC_EXTERN_STORAGE:
1155
case SYNC_TREE_NODE_FROM_HASH:
1156
/* Do no order checking */
1158
case SYNC_TRX_SYS_HEADER:
1159
if (srv_is_being_started) {
1160
/* This is violated during trx_sys_create_rsegs()
1161
when creating additional rollback segments when
1162
upgrading in innobase_start_or_create_for_mysql(). */
1168
case SYNC_WORK_QUEUE:
1170
case SYNC_LOG_FLUSH_ORDER:
1171
case SYNC_THR_LOCAL:
1172
case SYNC_ANY_LATCH:
1173
case SYNC_FILE_FORMAT_TAG:
1174
case SYNC_DOUBLEWRITE:
1175
case SYNC_SEARCH_SYS:
1176
case SYNC_SEARCH_SYS_CONF:
1177
case SYNC_TRX_LOCK_HEAP:
1179
case SYNC_IBUF_BITMAP_MUTEX:
1182
case SYNC_PURGE_LATCH:
1183
case SYNC_PURGE_SYS:
1184
case SYNC_DICT_AUTOINC_MUTEX:
1185
case SYNC_DICT_OPERATION:
1186
case SYNC_DICT_HEADER:
1187
case SYNC_TRX_I_S_RWLOCK:
1188
case SYNC_TRX_I_S_LAST_READ:
1189
if (!sync_thread_levels_g(array, level, TRUE)) {
1191
"InnoDB: sync_thread_levels_g(array, %lu)"
1192
" does not hold!\n", level);
1196
case SYNC_BUF_FLUSH_LIST:
1198
/* We can have multiple mutexes of this type therefore we
1199
can only check whether the greater than condition holds. */
1200
if (!sync_thread_levels_g(array, level-1, TRUE)) {
1202
"InnoDB: sync_thread_levels_g(array, %lu)"
1203
" does not hold!\n", level-1);
1208
case SYNC_BUF_BLOCK:
1209
/* Either the thread must own the buffer pool mutex
1210
(buf_pool_mutex), or it is allowed to latch only ONE
1211
buffer block (block->mutex or buf_pool_zip_mutex). */
1212
if (!sync_thread_levels_g(array, level, FALSE)) {
1213
ut_a(sync_thread_levels_g(array, level - 1, TRUE));
1214
ut_a(sync_thread_levels_contain(array, SYNC_BUF_POOL));
1218
if (sync_thread_levels_contain(array, SYNC_KERNEL)) {
1219
ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1,
1222
ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE));
1225
case SYNC_IBUF_BITMAP:
1226
/* Either the thread must own the master mutex to all
1227
the bitmap pages, or it is allowed to latch only ONE
1229
if (sync_thread_levels_contain(array,
1230
SYNC_IBUF_BITMAP_MUTEX)) {
1231
ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1,
1234
/* This is violated during trx_sys_create_rsegs()
1235
when creating additional rollback segments when
1236
upgrading in innobase_start_or_create_for_mysql(). */
1237
ut_a(srv_is_being_started
1238
|| sync_thread_levels_g(array, SYNC_IBUF_BITMAP,
1243
ut_a(sync_thread_levels_contain(array, SYNC_FSP));
1246
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
1247
|| sync_thread_levels_g(array, SYNC_FSP, TRUE));
1249
case SYNC_TRX_UNDO_PAGE:
1250
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
1251
|| sync_thread_levels_contain(array, SYNC_RSEG)
1252
|| sync_thread_levels_contain(array, SYNC_PURGE_SYS)
1253
|| sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE));
1255
case SYNC_RSEG_HEADER:
1256
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
1258
case SYNC_RSEG_HEADER_NEW:
1259
ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
1260
&& sync_thread_levels_contain(array, SYNC_FSP_PAGE));
1262
case SYNC_TREE_NODE:
1263
ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
1264
|| sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
1265
|| sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
1267
case SYNC_TREE_NODE_NEW:
1268
ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
1269
|| sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1271
case SYNC_INDEX_TREE:
1272
if (sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
1273
&& sync_thread_levels_contain(array, SYNC_FSP)) {
1274
ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1,
1277
ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1,
1281
case SYNC_IBUF_MUTEX:
1282
ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1, TRUE));
1284
case SYNC_IBUF_PESS_INSERT_MUTEX:
1285
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
1286
ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1288
case SYNC_IBUF_HEADER:
1289
ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
1290
ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
1291
ut_a(!sync_thread_levels_contain(array,
1292
SYNC_IBUF_PESS_INSERT_MUTEX));
1296
ut_a(buf_debug_prints
1297
|| sync_thread_levels_g(array, SYNC_DICT, TRUE));
1298
#else /* UNIV_DEBUG */
1299
ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE));
1300
#endif /* UNIV_DEBUG */
1306
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1308
slot = sync_thread_levels_get_nth(array, i);
1310
if (slot->latch == NULL) {
1311
slot->latch = latch;
1312
slot->level = level;
1318
ut_a(i < SYNC_THREAD_N_LEVELS);
1320
mutex_exit(&sync_thread_mutex);
1323
/******************************************************************//**
1324
Removes a latch from the thread level array if it is found there.
1325
@return TRUE if found in the array; it is no error if the latch is
1326
not found, as we presently are not able to determine the level for
1327
every latch reservation the program does */
1330
sync_thread_reset_level(
1331
/*====================*/
1332
void* latch) /*!< in: pointer to a mutex or an rw-lock */
1334
sync_level_t* array;
1336
sync_thread_t* thread_slot;
1339
if (!sync_order_checks_on) {
1344
if ((latch == (void*)&sync_thread_mutex)
1345
|| (latch == (void*)&mutex_list_mutex)
1346
|| (latch == (void*)&rw_lock_debug_mutex)
1347
|| (latch == (void*)&rw_lock_list_mutex)) {
1352
mutex_enter(&sync_thread_mutex);
1354
thread_slot = sync_thread_level_arrays_find_slot();
1356
if (thread_slot == NULL) {
1360
mutex_exit(&sync_thread_mutex);
1364
array = thread_slot->levels;
1366
for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
1368
slot = sync_thread_levels_get_nth(array, i);
1370
if (slot->latch == latch) {
1373
mutex_exit(&sync_thread_mutex);
1379
if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
1382
rw_lock = (rw_lock_t*) latch;
1384
if (rw_lock->level == SYNC_LEVEL_VARYING) {
1385
mutex_exit(&sync_thread_mutex);
1393
mutex_exit(&sync_thread_mutex);
1397
#endif /* UNIV_SYNC_DEBUG */
1399
/******************************************************************//**
1400
Initializes the synchronization data structures. */
1406
#ifdef UNIV_SYNC_DEBUG
1407
sync_thread_t* thread_slot;
1409
#endif /* UNIV_SYNC_DEBUG */
1411
ut_a(sync_initialized == FALSE);
1413
sync_initialized = TRUE;
1415
/* Create the primary system wait array which is protected by an OS
1418
sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
1419
SYNC_ARRAY_OS_MUTEX);
1420
#ifdef UNIV_SYNC_DEBUG
1421
/* Create the thread latch level array where the latch levels
1422
are stored for each OS thread */
1424
sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
1425
* sizeof(sync_thread_t));
1426
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1428
thread_slot = sync_thread_level_arrays_get_nth(i);
1429
thread_slot->levels = NULL;
1431
#endif /* UNIV_SYNC_DEBUG */
1432
/* Init the mutex list and create the mutex to protect it. */
1434
UT_LIST_INIT(mutex_list);
1435
mutex_create(mutex_list_mutex_key, &mutex_list_mutex,
1436
SYNC_NO_ORDER_CHECK);
1437
#ifdef UNIV_SYNC_DEBUG
1438
mutex_create(sync_thread_mutex_key, &sync_thread_mutex,
1439
SYNC_NO_ORDER_CHECK);
1440
#endif /* UNIV_SYNC_DEBUG */
1442
/* Init the rw-lock list and create the mutex to protect it. */
1444
UT_LIST_INIT(rw_lock_list);
1445
mutex_create(rw_lock_list_mutex_key, &rw_lock_list_mutex,
1446
SYNC_NO_ORDER_CHECK);
1448
#ifdef UNIV_SYNC_DEBUG
1449
mutex_create(rw_lock_debug_mutex_key, &rw_lock_debug_mutex,
1450
SYNC_NO_ORDER_CHECK);
1452
rw_lock_debug_event = os_event_create(NULL);
1453
rw_lock_debug_waiters = FALSE;
1454
#endif /* UNIV_SYNC_DEBUG */
1457
/******************************************************************//**
1458
Frees the resources in InnoDB's own synchronization data structures. Use
1459
os_sync_free() after calling this. */
1467
sync_array_free(sync_primary_wait_array);
1469
mutex = UT_LIST_GET_FIRST(mutex_list);
1472
#ifdef UNIV_MEM_DEBUG
1473
if (mutex == &mem_hash_mutex) {
1474
mutex = UT_LIST_GET_NEXT(list, mutex);
1477
#endif /* UNIV_MEM_DEBUG */
1479
mutex = UT_LIST_GET_FIRST(mutex_list);
1482
mutex_free(&mutex_list_mutex);
1483
#ifdef UNIV_SYNC_DEBUG
1484
mutex_free(&sync_thread_mutex);
1486
/* Switch latching order checks on in sync0sync.c */
1487
sync_order_checks_on = FALSE;
1488
#endif /* UNIV_SYNC_DEBUG */
1490
sync_initialized = FALSE;
1493
/*******************************************************************//**
1494
Prints wait info of the sync system. */
1497
sync_print_wait_info(
1498
/*=================*/
1499
FILE* file) /*!< in: file where to print */
1501
#ifdef UNIV_SYNC_DEBUG
1502
fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
1503
mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
1507
"Mutex spin waits %"PRId64", rounds %"PRId64", "
1508
"OS waits %"PRId64"\n"
1509
"RW-shared spins %"PRId64", rounds %"PRId64", OS waits %"PRId64";"
1510
" RW-excl spins %"PRId64", rounds %"PRId64", OS waits %"PRId64"\n",
1511
mutex_spin_wait_count,
1512
mutex_spin_round_count,
1513
mutex_os_wait_count,
1514
rw_s_spin_wait_count,
1515
rw_s_spin_round_count,
1517
rw_x_spin_wait_count,
1518
rw_x_spin_round_count,
1519
rw_x_os_wait_count);
1522
"Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
1524
(double) mutex_spin_round_count /
1525
(mutex_spin_wait_count ? mutex_spin_wait_count : 1),
1526
(double) rw_s_spin_round_count /
1527
(rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
1528
(double) rw_x_spin_round_count /
1529
(rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
1532
/*******************************************************************//**
1533
Prints info of the sync system. */
1538
FILE* file) /*!< in: file where to print */
1540
#ifdef UNIV_SYNC_DEBUG
1541
mutex_list_print_info(file);
1543
rw_lock_list_print_info(file);
1544
#endif /* UNIV_SYNC_DEBUG */
1546
sync_array_print_info(file, sync_primary_wait_array);
1548
sync_print_wait_info(file);