1
/*****************************************************************************
3
Copyright (C) 1995, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
21
The interface to the operating system
22
synchronization primitives.
24
Created 9/6/1995 Heikki Tuuri
25
*******************************************************/
40
#include "srv0start.h"
42
#include "ha_prototypes.h"
44
/* Type definition for an operating system mutex struct */
45
struct os_mutex_struct{
46
os_event_t event; /*!< Used by sync0arr.c for queing threads */
47
void* handle; /*!< OS handle to mutex */
48
ulint count; /*!< we use this counter to check
49
that the same thread does not
50
recursively lock the mutex: we
51
do not assume that the OS mutex
52
supports recursive locking, though
53
NT seems to do that */
54
UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list;
55
/* list of all 'slow' OS mutexes created */
58
/** Mutex protecting counts and the lists of OS mutexes and events */
59
UNIV_INTERN os_mutex_t os_sync_mutex;
60
/** TRUE if os_sync_mutex has been initialized */
61
static ibool os_sync_mutex_inited = FALSE;
62
/** TRUE when os_sync_free() is being executed */
63
static ibool os_sync_free_called = FALSE;
65
/** This is incremented by 1 in os_thread_create and decremented by 1 in
67
UNIV_INTERN ulint os_thread_count = 0;
69
/** The list of all events created */
70
static UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
72
/** The list of all OS 'slow' mutexes */
73
static UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
75
UNIV_INTERN ulint os_event_count = 0;
76
UNIV_INTERN ulint os_mutex_count = 0;
77
UNIV_INTERN ulint os_fast_mutex_count = 0;
79
/* The number of microsecnds in a second. */
80
static const ulint MICROSECS_IN_A_SECOND = 1000000;
82
/* Because a mutex is embedded inside an event and there is an
83
event embedded inside a mutex, on free, this generates a recursive call.
84
This version of the free event function doesn't acquire the global lock */
85
static void os_event_free_internal(os_event_t event);
87
/* On Windows (Vista and later), load function pointers for condition
88
variable handling. Those functions are not available in prior versions,
89
so we have to use them via runtime loading, as long as we support XP. */
90
static void os_cond_module_init(void);
93
/* Prototypes and function pointers for condition variable functions */
94
typedef VOID (WINAPI* InitializeConditionVariableProc)
95
(PCONDITION_VARIABLE ConditionVariable);
96
static InitializeConditionVariableProc initialize_condition_variable;
98
typedef BOOL (WINAPI* SleepConditionVariableCSProc)
99
(PCONDITION_VARIABLE ConditionVariable,
100
PCRITICAL_SECTION CriticalSection,
101
DWORD dwMilliseconds);
102
static SleepConditionVariableCSProc sleep_condition_variable;
104
typedef VOID (WINAPI* WakeAllConditionVariableProc)
105
(PCONDITION_VARIABLE ConditionVariable);
106
static WakeAllConditionVariableProc wake_all_condition_variable;
108
typedef VOID (WINAPI* WakeConditionVariableProc)
109
(PCONDITION_VARIABLE ConditionVariable);
110
static WakeConditionVariableProc wake_condition_variable;
113
/*********************************************************//**
114
Initialitze condition variable */
119
os_cond_t* cond) /*!< in: condition variable. */
124
ut_a(initialize_condition_variable != NULL);
125
initialize_condition_variable(cond);
127
ut_a(pthread_cond_init(cond, NULL) == 0);
131
/*********************************************************//**
132
Do a timed wait on condition variable.
133
@return TRUE if timed out, FALSE otherwise */
138
os_cond_t* cond, /*!< in: condition variable. */
139
os_fast_mutex_t* mutex, /*!< in: fast mutex */
141
const struct timespec* abstime /*!< in: timeout */
143
DWORD time_in_ms /*!< in: timeout in
145
#endif /* !__WIN__ */
152
ut_a(sleep_condition_variable != NULL);
154
ret = sleep_condition_variable(cond, mutex, time_in_ms);
157
err = GetLastError();
158
/* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
159
"Condition variables are subject to spurious wakeups
160
(those not associated with an explicit wake) and stolen wakeups
161
(another thread manages to run before the woken thread)."
162
Check for both types of timeouts.
163
Conditions are checked by the caller.*/
164
if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
175
ret = pthread_cond_timedwait(cond, mutex, abstime);
180
/* We play it safe by checking for EINTR even though
181
according to the POSIX documentation it can't return EINTR. */
186
fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: "
187
"%d: abstime={%lu,%lu}\n",
188
ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
192
return(ret == ETIMEDOUT);
195
/*********************************************************//**
196
Wait on condition variable */
201
os_cond_t* cond, /*!< in: condition variable. */
202
os_fast_mutex_t* mutex) /*!< in: fast mutex */
208
ut_a(sleep_condition_variable != NULL);
209
ut_a(sleep_condition_variable(cond, mutex, INFINITE));
211
ut_a(pthread_cond_wait(cond, mutex) == 0);
215
/*********************************************************//**
216
Wakes all threads waiting for condition variable */
221
os_cond_t* cond) /*!< in: condition variable. */
226
ut_a(wake_all_condition_variable != NULL);
227
wake_all_condition_variable(cond);
229
ut_a(pthread_cond_broadcast(cond) == 0);
233
/*********************************************************//**
234
Wakes one thread waiting for condition variable */
239
os_cond_t* cond) /*!< in: condition variable. */
244
ut_a(wake_condition_variable != NULL);
245
wake_condition_variable(cond);
247
ut_a(pthread_cond_signal(cond) == 0);
251
/*********************************************************//**
252
Destroys condition variable */
257
os_cond_t* cond) /*!< in: condition variable. */
262
ut_a(pthread_cond_destroy(cond) == 0);
266
/*********************************************************//**
267
On Windows (Vista and later), load function pointers for condition variable
268
handling. Those functions are not available in prior versions, so we have to
269
use them via runtime loading, as long as we support XP. */
272
os_cond_module_init(void)
273
/*=====================*/
278
if (!srv_use_native_conditions)
281
h_dll = GetModuleHandle("kernel32");
283
initialize_condition_variable = (InitializeConditionVariableProc)
284
GetProcAddress(h_dll, "InitializeConditionVariable");
285
sleep_condition_variable = (SleepConditionVariableCSProc)
286
GetProcAddress(h_dll, "SleepConditionVariableCS");
287
wake_all_condition_variable = (WakeAllConditionVariableProc)
288
GetProcAddress(h_dll, "WakeAllConditionVariable");
289
wake_condition_variable = (WakeConditionVariableProc)
290
GetProcAddress(h_dll, "WakeConditionVariable");
292
/* When using native condition variables, check function pointers */
293
ut_a(initialize_condition_variable);
294
ut_a(sleep_condition_variable);
295
ut_a(wake_all_condition_variable);
296
ut_a(wake_condition_variable);
300
/*********************************************************//**
301
Initializes global event and OS 'slow' mutex lists. */
307
UT_LIST_INIT(os_event_list);
308
UT_LIST_INIT(os_mutex_list);
310
os_sync_mutex = NULL;
311
os_sync_mutex_inited = FALSE;
313
/* Now for Windows only */
314
os_cond_module_init();
316
os_sync_mutex = os_mutex_create();
318
os_sync_mutex_inited = TRUE;
321
/*********************************************************//**
322
Frees created events and OS 'slow' mutexes. */
331
os_sync_free_called = TRUE;
332
event = UT_LIST_GET_FIRST(os_event_list);
336
os_event_free(event);
338
event = UT_LIST_GET_FIRST(os_event_list);
341
mutex = UT_LIST_GET_FIRST(os_mutex_list);
344
if (mutex == os_sync_mutex) {
345
/* Set the flag to FALSE so that we do not try to
346
reserve os_sync_mutex any more in remaining freeing
347
operations in shutdown */
348
os_sync_mutex_inited = FALSE;
351
os_mutex_free(mutex);
353
mutex = UT_LIST_GET_FIRST(os_mutex_list);
355
os_sync_free_called = FALSE;
358
/*********************************************************//**
359
Creates an event semaphore, i.e., a semaphore which may just have two
360
states: signaled and nonsignaled. The created event is manual reset: it
361
must be reset explicitly by calling sync_os_reset_event.
362
@return the event handle */
367
const char* name) /*!< in: the name of the event, if NULL
368
the event is created without a name */
373
if(!srv_use_native_conditions) {
375
event = ut_malloc(sizeof(struct os_event_struct));
377
event->handle = CreateEvent(NULL,
381
if (!event->handle) {
383
"InnoDB: Could not create a Windows event"
384
" semaphore; Windows error %lu\n",
385
(ulong) GetLastError());
387
} else /* Windows with condition variables */
393
event = static_cast<os_event_struct*>(ut_malloc(sizeof(struct os_event_struct)));
395
os_fast_mutex_init(&(event->os_mutex));
397
os_cond_init(&(event->cond_var));
399
event->is_set = FALSE;
401
/* We return this value in os_event_reset(), which can then be
402
be used to pass to the os_event_wait_low(). The value of zero
403
is reserved in os_event_wait_low() for the case when the
404
caller does not want to pass any signal_count value. To
405
distinguish between the two cases we initialize signal_count
407
event->signal_count = 1;
410
/* The os_sync_mutex can be NULL because during startup an event
411
can be created [ because it's embedded in the mutex/rwlock ] before
412
this module has been initialized */
413
if (os_sync_mutex != NULL) {
414
os_mutex_enter(os_sync_mutex);
417
/* Put to the list of events */
418
UT_LIST_ADD_FIRST(os_event_list, os_event_list, event);
422
if (os_sync_mutex != NULL) {
423
os_mutex_exit(os_sync_mutex);
429
/**********************************************************//**
430
Sets an event semaphore to the signaled state: lets waiting threads
436
os_event_t event) /*!< in: event to set */
441
if (!srv_use_native_conditions) {
442
ut_a(SetEvent(event->handle));
449
os_fast_mutex_lock(&(event->os_mutex));
454
event->is_set = TRUE;
455
event->signal_count += 1;
456
os_cond_broadcast(&(event->cond_var));
459
os_fast_mutex_unlock(&(event->os_mutex));
462
/**********************************************************//**
463
Resets an event semaphore to the nonsignaled state. Waiting threads will
464
stop to wait for the event.
465
The return value should be passed to os_even_wait_low() if it is desired
466
that this thread should not wait in case of an intervening call to
467
os_event_set() between this os_event_reset() and the
468
os_event_wait_low() call. See comments for os_event_wait_low().
469
@return current signal_count. */
474
os_event_t event) /*!< in: event to reset */
481
if(!srv_use_native_conditions) {
482
ut_a(ResetEvent(event->handle));
487
os_fast_mutex_lock(&(event->os_mutex));
489
if (!event->is_set) {
492
event->is_set = FALSE;
494
ret = event->signal_count;
496
os_fast_mutex_unlock(&(event->os_mutex));
500
/**********************************************************//**
501
Frees an event object, without acquiring the global lock. */
504
os_event_free_internal(
505
/*===================*/
506
os_event_t event) /*!< in: event to free */
509
if(!srv_use_native_conditions) {
511
ut_a(CloseHandle(event->handle));
517
/* This is to avoid freeing the mutex twice */
518
os_fast_mutex_free(&(event->os_mutex));
520
os_cond_destroy(&(event->cond_var));
523
/* Remove from the list of events */
524
UT_LIST_REMOVE(os_event_list, os_event_list, event);
531
/**********************************************************//**
532
Frees an event object. */
537
os_event_t event) /*!< in: event to free */
542
if(!srv_use_native_conditions){
543
ut_a(CloseHandle(event->handle));
544
} else /*Windows with condition variables */
547
os_fast_mutex_free(&(event->os_mutex));
549
os_cond_destroy(&(event->cond_var));
552
/* Remove from the list of events */
553
os_mutex_enter(os_sync_mutex);
555
UT_LIST_REMOVE(os_event_list, os_event_list, event);
559
os_mutex_exit(os_sync_mutex);
564
/**********************************************************//**
565
Waits for an event object until it is in the signaled state. If
566
srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
567
waiting thread when the event becomes signaled (or immediately if the
568
event is already in the signaled state).
570
Typically, if the event has been signalled after the os_event_reset()
571
we'll return immediately because event->is_set == TRUE.
572
There are, however, situations (e.g.: sync_array code) where we may
573
lose this information. For example:
575
thread A calls os_event_reset()
576
thread B calls os_event_set() [event->is_set == TRUE]
577
thread C calls os_event_reset() [event->is_set == FALSE]
578
thread A calls os_event_wait() [infinite wait!]
579
thread C calls os_event_wait() [infinite wait!]
581
Where such a scenario is possible, to avoid infinite wait, the
582
value returned by os_event_reset() should be passed in as
588
os_event_t event, /*!< in: event to wait */
589
ib_int64_t reset_sig_count)/*!< in: zero or the value
590
returned by previous call of
593
ib_int64_t old_signal_count;
596
if(!srv_use_native_conditions) {
601
UT_NOT_USED(reset_sig_count);
603
/* Specify an infinite wait */
604
err = WaitForSingleObject(event->handle, INFINITE);
606
ut_a(err == WAIT_OBJECT_0);
608
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
609
os_thread_exit(NULL);
615
os_fast_mutex_lock(&(event->os_mutex));
617
if (reset_sig_count) {
618
old_signal_count = reset_sig_count;
620
old_signal_count = event->signal_count;
624
if (event->is_set == TRUE
625
|| event->signal_count != old_signal_count) {
627
os_fast_mutex_unlock(&(event->os_mutex));
629
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
631
os_thread_exit(NULL);
633
/* Ok, we may return */
638
os_cond_wait(&(event->cond_var), &(event->os_mutex));
640
/* Solaris manual said that spurious wakeups may occur: we
641
have to check if the event really has been signaled after
642
we came here to wait */
646
/**********************************************************//**
647
Waits for an event object until it is in the signaled state or
648
a timeout is exceeded.
649
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
652
os_event_wait_time_low(
653
/*===================*/
654
os_event_t event, /*!< in: event to wait */
655
ulint time_in_usec, /*!< in: timeout in
657
OS_SYNC_INFINITE_TIME */
658
ib_int64_t reset_sig_count) /*!< in: zero or the value
659
returned by previous call of
663
ibool timed_out = FALSE;
664
ib_int64_t old_signal_count;
669
if (!srv_use_native_conditions) {
674
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
675
time_in_ms = time_in_usec / 1000;
676
err = WaitForSingleObject(event->handle, time_in_ms);
678
err = WaitForSingleObject(event->handle, INFINITE);
681
if (err == WAIT_OBJECT_0) {
683
} else if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
684
return(OS_SYNC_TIME_EXCEEDED);
688
/* Dummy value to eliminate compiler warning. */
691
ut_a(sleep_condition_variable != NULL);
693
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
694
time_in_ms = time_in_usec / 1000;
696
time_in_ms = INFINITE;
700
struct timespec abstime;
702
if (time_in_usec != OS_SYNC_INFINITE_TIME) {
708
ret = ut_usectime(&sec, &usec);
714
tv.tv_usec += time_in_usec;
716
if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) {
717
tv.tv_sec += time_in_usec / MICROSECS_IN_A_SECOND;
718
tv.tv_usec %= MICROSECS_IN_A_SECOND;
721
abstime.tv_sec = tv.tv_sec;
722
abstime.tv_nsec = tv.tv_usec * 1000;
724
abstime.tv_nsec = 999999999;
725
abstime.tv_sec = (time_t) ULINT_MAX;
728
ut_a(abstime.tv_nsec <= 999999999);
732
os_fast_mutex_lock(&event->os_mutex);
734
if (reset_sig_count) {
735
old_signal_count = reset_sig_count;
737
old_signal_count = event->signal_count;
741
if (event->is_set == TRUE
742
|| event->signal_count != old_signal_count) {
747
timed_out = os_cond_wait_timed(
748
&event->cond_var, &event->os_mutex,
753
#endif /* !__WIN__ */
756
} while (!timed_out);
758
os_fast_mutex_unlock(&event->os_mutex);
760
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
762
os_thread_exit(NULL);
765
return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
768
/*********************************************************//**
769
Creates an operating system mutex semaphore. Because these are slow, the
770
mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
771
@return the mutex handle */
774
os_mutex_create(void)
775
/*=================*/
777
os_fast_mutex_t* mutex;
778
os_mutex_t mutex_str;
780
mutex = static_cast<os_fast_mutex_t*>(ut_malloc(sizeof(os_fast_mutex_t)));
782
os_fast_mutex_init(mutex);
783
mutex_str = static_cast<os_mutex_t>(ut_malloc(sizeof(os_mutex_str_t)));
785
mutex_str->handle = mutex;
786
mutex_str->count = 0;
787
mutex_str->event = os_event_create(NULL);
789
if (UNIV_LIKELY(os_sync_mutex_inited)) {
790
/* When creating os_sync_mutex itself we cannot reserve it */
791
os_mutex_enter(os_sync_mutex);
794
UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str);
798
if (UNIV_LIKELY(os_sync_mutex_inited)) {
799
os_mutex_exit(os_sync_mutex);
805
/**********************************************************//**
806
Acquires ownership of a mutex semaphore. */
811
os_mutex_t mutex) /*!< in: mutex to acquire */
813
os_fast_mutex_lock(static_cast<os_fast_mutex_t *>(mutex->handle));
817
ut_a(mutex->count == 1);
820
/**********************************************************//**
821
Releases ownership of a mutex. */
826
os_mutex_t mutex) /*!< in: mutex to release */
830
ut_a(mutex->count == 1);
833
os_fast_mutex_unlock(static_cast<os_fast_mutex_t *>(mutex->handle));
836
/**********************************************************//**
837
Frees a mutex object. */
842
os_mutex_t mutex) /*!< in: mutex to free */
846
if (UNIV_LIKELY(!os_sync_free_called)) {
847
os_event_free_internal(mutex->event);
850
if (UNIV_LIKELY(os_sync_mutex_inited)) {
851
os_mutex_enter(os_sync_mutex);
854
UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex);
858
if (UNIV_LIKELY(os_sync_mutex_inited)) {
859
os_mutex_exit(os_sync_mutex);
862
os_fast_mutex_free(static_cast<os_fast_mutex_t *>(mutex->handle));
863
ut_free(mutex->handle);
867
/*********************************************************//**
868
Initializes an operating system fast mutex semaphore. */
873
os_fast_mutex_t* fast_mutex) /*!< in: fast mutex */
878
InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
880
ut_a(0 == pthread_mutex_init(fast_mutex, NULL));
882
if (UNIV_LIKELY(os_sync_mutex_inited)) {
883
/* When creating os_sync_mutex itself (in Unix) we cannot
886
os_mutex_enter(os_sync_mutex);
889
os_fast_mutex_count++;
891
if (UNIV_LIKELY(os_sync_mutex_inited)) {
892
os_mutex_exit(os_sync_mutex);
896
/**********************************************************//**
897
Acquires ownership of a fast mutex. */
902
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
905
EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
907
pthread_mutex_lock(fast_mutex);
911
/**********************************************************//**
912
Releases ownership of a fast mutex. */
915
os_fast_mutex_unlock(
916
/*=================*/
917
os_fast_mutex_t* fast_mutex) /*!< in: mutex to release */
920
LeaveCriticalSection(fast_mutex);
922
pthread_mutex_unlock(fast_mutex);
926
/**********************************************************//**
927
Frees a mutex object. */
932
os_fast_mutex_t* fast_mutex) /*!< in: mutex to free */
937
DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
941
ret = pthread_mutex_destroy(fast_mutex);
943
if (UNIV_UNLIKELY(ret != 0)) {
944
ut_print_timestamp(stderr);
946
" InnoDB: error: return value %lu when calling\n"
947
"InnoDB: pthread_mutex_destroy().\n", (ulint)ret);
949
"InnoDB: Byte contents of the pthread mutex at %p:\n",
951
ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
955
if (UNIV_LIKELY(os_sync_mutex_inited)) {
956
/* When freeing the last mutexes, we have
957
already freed os_sync_mutex */
959
os_mutex_enter(os_sync_mutex);
962
ut_ad(os_fast_mutex_count > 0);
963
os_fast_mutex_count--;
965
if (UNIV_LIKELY(os_sync_mutex_inited)) {
966
os_mutex_exit(os_sync_mutex);