40
40
say 200. In NT 3.51, allocating events seems to be a quadratic
41
41
algorithm, because 10 000 events are created fast, but
42
42
100 000 events takes a couple of minutes to create.
44
As of 5.0.30 the above mentioned design is changed. Since now
45
OS can handle millions of wait events efficiently, we no longer
46
have this concept of each cell of wait array having one event.
47
Instead, now the event that a thread wants to wait on is embedded
48
in the wait object (mutex or rw_lock). We still keep the global
49
wait array for the sake of diagnostics and also to avoid infinite
50
wait The error_monitor thread scans the global wait array to signal
51
any waiting threads who have missed the signal. */
45
53
/* A cell where an individual thread may wait suspended
46
54
until a resource is released. The suspending is implemented
47
55
using an operating system event semaphore. */
48
56
struct sync_cell_struct {
49
/* State of the cell. SC_WAKING_UP means
50
sync_array_struct->n_reserved has been decremented, but the thread
51
in this cell has not waken up yet. When it does, it will set the
52
state to SC_FREE. Note that this is done without the protection of
54
enum { SC_FREE, SC_RESERVED, SC_WAKING_UP } state;
56
57
void* wait_object; /* pointer to the object the
57
thread is waiting for; this is not
58
reseted to NULL when a cell is
58
thread is waiting for; if NULL
59
the cell is free for use */
61
60
mutex_t* old_wait_mutex; /* the latest wait mutex in cell */
62
61
rw_lock_t* old_wait_rw_lock;/* the latest wait rw-lock in cell */
63
62
ulint request_type; /* lock type requested on the
71
70
ibool waiting; /* TRUE if the thread has already
72
71
called sync_array_event_wait
74
ibool event_set; /* TRUE if the event is set */
75
os_event_t event; /* operating system event
73
ib_int64_t signal_count; /* We capture the signal_count
74
of the wait_object when we
75
reset the event. This value is
76
then passed on to os_event_wait
77
and we wait only if the event
78
has not been signalled in the
79
period between the reset and
77
81
time_t reservation_time;/* time when the thread reserved
85
/* NOTE: It is allowed for a thread to wait
86
for an event allocated for the array without owning the
87
protecting mutex (depending on the case: OS or database mutex), but
88
all changes (set or reset) to the state of the event must be made
89
while owning the mutex. */
81
90
struct sync_array_struct {
82
91
ulint n_reserved; /* number of currently reserved
83
92
cells in the wait array */
234
240
/**********************************************************************
235
241
Frees the resources in a wait array. */
240
246
sync_array_t* arr) /* in, own: sync wait array */
244
248
ulint protection;
246
250
ut_a(arr->n_reserved == 0);
248
252
sync_array_validate(arr);
250
for (i = 0; i < arr->n_cells; i++) {
251
cell = sync_array_get_nth_cell(arr, i);
252
os_event_free(cell->event);
255
254
protection = arr->protection;
257
256
/* Release the mutex protecting the wait array complex */
296
294
sync_array_exit(arr);
297
/***********************************************************************
298
Puts the cell event in reset state. */
301
sync_cell_event_reset(
302
/*==================*/
303
/* out: value of signal_count
304
at the time of reset. */
305
ulint type, /* in: lock type mutex/rw_lock */
306
void* object) /* in: the rw_lock/mutex object */
308
if (type == SYNC_MUTEX) {
309
return(os_event_reset(((mutex_t *) object)->event));
311
} else if (type == RW_LOCK_WAIT_EX) {
312
return(os_event_reset(
313
((rw_lock_t *) object)->wait_ex_event));
316
return(os_event_reset(((rw_lock_t *) object)->event));
299
320
/**********************************************************************
300
321
Reserves a wait array cell for waiting for an object.
301
322
The event of the cell is reset to nonsignalled state. */
304
325
sync_array_reserve_cell(
305
326
/*====================*/
324
345
for (i = 0; i < arr->n_cells; i++) {
325
346
cell = sync_array_get_nth_cell(arr, i);
327
if (cell->state == SC_FREE) {
329
/* We do not check cell->event_set because it is
330
set outside the protection of the sync array mutex
331
and we had a bug regarding it, and since resetting
332
an event when it is not needed does no harm it is
333
safer always to do it. */
335
cell->event_set = FALSE;
336
os_event_reset(cell->event);
338
cell->state = SC_RESERVED;
339
cell->reservation_time = time(NULL);
340
cell->thread = os_thread_get_curr_id();
348
if (cell->wait_object == NULL) {
350
cell->waiting = FALSE;
342
351
cell->wait_object = object;
344
353
if (type == SYNC_MUTEX) {
371
389
/**********************************************************************
372
Frees the cell. Note that we don't have any mutex reserved when calling
376
sync_array_free_cell(
377
/*=================*/
378
sync_array_t* arr, /* in: wait array */
379
ulint index) /* in: index of the cell in array */
383
cell = sync_array_get_nth_cell(arr, index);
385
ut_a(cell->state == SC_WAKING_UP);
386
ut_a(cell->wait_object != NULL);
388
cell->state = SC_FREE;
391
/**********************************************************************
392
Frees the cell safely by reserving the sync array mutex and decrementing
393
n_reserved if necessary. Should only be called from mutex_spin_wait. */
396
sync_array_free_cell_protected(
397
/*===========================*/
398
sync_array_t* arr, /* in: wait array */
399
ulint index) /* in: index of the cell in array */
403
sync_array_enter(arr);
405
cell = sync_array_get_nth_cell(arr, index);
407
ut_a(cell->state != SC_FREE);
408
ut_a(cell->wait_object != NULL);
410
/* We only need to decrement n_reserved if it has not already been
411
done by sync_array_signal_object. */
412
if (cell->state == SC_RESERVED) {
413
ut_a(arr->n_reserved > 0);
415
} else if (cell->state == SC_WAKING_UP) {
416
/* This is tricky; if we don't wait for the event to be
417
signaled, signal_object can set the state of a cell to
418
SC_WAKING_UP, mutex_spin_wait can call this and set the
419
state to SC_FREE, and then signal_object gets around to
420
calling os_set_event for the cell but since it's already
421
been freed things break horribly. */
423
sync_array_exit(arr);
424
os_event_wait(cell->event);
425
sync_array_enter(arr);
428
cell->state = SC_FREE;
430
sync_array_exit(arr);
433
/**********************************************************************
434
390
This function should be called when a thread starts to wait on
435
391
a wait array cell. In the debug version this function checks
436
392
if the wait for a semaphore will result in a deadlock, in which
437
393
case prints info and asserts. */
440
396
sync_array_wait_event(
441
397
/*==================*/
406
sync_array_enter(arr);
450
408
cell = sync_array_get_nth_cell(arr, index);
452
ut_a((cell->state == SC_RESERVED) || (cell->state == SC_WAKING_UP));
453
410
ut_a(cell->wait_object);
454
411
ut_a(!cell->waiting);
455
412
ut_ad(os_thread_get_curr_id() == cell->thread);
458
cell->waiting = TRUE;
414
if (cell->request_type == SYNC_MUTEX) {
415
event = ((mutex_t*) cell->wait_object)->event;
417
/* On windows if the thread about to wait is the one which
418
has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
419
it waits on a special event i.e.: wait_ex_event. */
420
} else if (cell->request_type == RW_LOCK_WAIT_EX) {
421
event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
424
event = ((rw_lock_t*) cell->wait_object)->event;
427
cell->waiting = TRUE;
460
429
#ifdef UNIV_SYNC_DEBUG
476
444
rw_lock_debug_mutex_exit();
477
446
sync_array_exit(arr);
479
os_event_wait(event);
448
os_event_wait_low(event, cell->signal_count);
481
450
sync_array_free_cell(arr, index);
484
453
/**********************************************************************
485
Reports info of a wait array cell. Note: sync_array_print_long_waits()
486
calls this without mutex protection. */
454
Reports info of a wait array cell. */
489
457
sync_array_cell_print(
503
471
(ulong) os_thread_pf(cell->thread), cell->file,
504
472
(ulong) cell->line,
505
473
difftime(time(NULL), cell->reservation_time));
506
fprintf(file, "Wait array cell state %lu\n", (ulong)cell->state);
508
/* If the memory area pointed to by old_wait_mutex /
509
old_wait_rw_lock has been freed, this can crash. */
511
if (cell->state != SC_RESERVED) {
512
/* If cell has this state, then even if we are holding the sync
513
array mutex, the wait object may get freed meanwhile. Do not
514
print the wait object then. */
516
} else if (type == SYNC_MUTEX) {
475
if (type == SYNC_MUTEX) {
517
476
/* We use old_wait_mutex in case the cell has already
518
477
been freed meanwhile */
519
478
mutex = cell->old_wait_mutex;
707
"Mutex %p owned by thread %lu"
708
" file %s line %lu\n",
710
(ulong) os_thread_pf(mutex->thread_id),
670
"Mutex %p owned by thread %lu file %s line %lu\n",
671
mutex, (ulong) os_thread_pf(mutex->thread_id),
711
672
mutex->file_name, (ulong) mutex->line);
712
673
sync_array_cell_print(stderr, cell);
848
/**************************************************************************
849
Looks for the cells in the wait array which refer to the wait object
850
specified, and sets their corresponding events to the signaled state. In this
851
way releases the threads waiting for the object to contend for the object.
852
It is possible that no such cell is found, in which case does nothing. */
811
/**********************************************************************
812
Frees the cell. NOTE! sync_array_wait_event frees the cell
855
sync_array_signal_object(
856
/*=====================*/
816
sync_array_free_cell(
817
/*=================*/
857
818
sync_array_t* arr, /* in: wait array */
858
void* object) /* in: wait object */
819
ulint index) /* in: index of the cell in array */
860
821
sync_cell_t* cell;
865
/* We store the addresses of cells we need to signal and signal
866
them only after we have released the sync array's mutex (for
867
performance reasons). cell_count is the number of such cells, and
868
cell_ptr points to the first one. If there are less than
869
UT_ARR_SIZE(cells) of them, cell_ptr == &cells[0], otherwise
870
cell_ptr points to malloc'd memory that we must free. */
872
sync_cell_t* cells[100];
873
sync_cell_t** cell_ptr = &cells[0];
874
ulint cell_count = 0;
875
ulint cell_max_count = UT_ARR_SIZE(cells);
877
ut_a(100 == cell_max_count);
823
sync_array_enter(arr);
825
cell = sync_array_get_nth_cell(arr, index);
827
ut_a(cell->wait_object != NULL);
829
cell->waiting = FALSE;
830
cell->wait_object = NULL;
831
cell->signal_count = 0;
833
ut_a(arr->n_reserved > 0);
836
sync_array_exit(arr);
839
/**************************************************************************
840
Increments the signalled count. */
843
sync_array_object_signalled(
844
/*========================*/
845
sync_array_t* arr) /* in: wait array */
879
847
sync_array_enter(arr);
886
/* We need to store this to a local variable because it is modified
888
res_count = arr->n_reserved;
890
while (count < res_count) {
892
cell = sync_array_get_nth_cell(arr, i);
894
if (cell->state == SC_RESERVED) {
897
if (cell->wait_object == object) {
898
cell->state = SC_WAKING_UP;
900
ut_a(arr->n_reserved > 0);
903
if (cell_count == cell_max_count) {
904
sync_cell_t** old_cell_ptr = cell_ptr;
905
size_t old_size, new_size;
907
old_size = cell_max_count
908
* sizeof(sync_cell_t*);
910
new_size = cell_max_count
911
* sizeof(sync_cell_t*);
913
cell_ptr = malloc(new_size);
915
memcpy(cell_ptr, old_cell_ptr,
918
if (old_cell_ptr != &cells[0]) {
923
cell_ptr[cell_count] = cell;
931
851
sync_array_exit(arr);
933
for (i = 0; i < cell_count; i++) {
936
cell->event_set = TRUE;
937
os_event_set(cell->event);
940
if (cell_ptr != &cells[0]) {
945
854
/**************************************************************************
959
868
sync_cell_t* cell;
964
872
sync_array_enter(arr);
969
/* We need to store this to a local variable because it is modified
972
res_count = arr->n_reserved;
974
while (count < res_count) {
877
while (count < arr->n_reserved) {
976
879
cell = sync_array_get_nth_cell(arr, i);
978
if (cell->state == SC_RESERVED) {
881
if (cell->wait_object != NULL) {
982
885
if (sync_arr_cell_can_wake_up(cell)) {
983
cell->state = SC_WAKING_UP;
984
cell->event_set = TRUE;
985
os_event_set(cell->event);
987
ut_a(arr->n_reserved > 0);
887
if (cell->request_type == SYNC_MUTEX) {
890
mutex = cell->wait_object;
891
os_event_set(mutex->event);
893
} else if (cell->request_type
894
== RW_LOCK_WAIT_EX) {
897
lock = cell->wait_object;
898
os_event_set(lock->wait_ex_event);
903
lock = cell->wait_object;
904
os_event_set(lock->event);
1074
991
sync_cell_t* cell;
1078
"OS WAIT ARRAY INFO: reservation count %ld,"
1079
" signal count %ld\n",
1080
(long) arr->res_count,
1081
(long) arr->sg_count);
1082
for (i = 0; i < arr->n_cells; i++) {
996
"OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
997
(long) arr->res_count, (long) arr->sg_count);
1001
while (count < arr->n_reserved) {
1084
1003
cell = sync_array_get_nth_cell(arr, i);
1086
if (cell->state != SC_FREE) {
1005
if (cell->wait_object != NULL) {
1087
1007
sync_array_cell_print(file, cell);
1092
1014
/**************************************************************************
1093
1015
Prints info of the wait array. */
1096
1018
sync_array_print_info(
1097
1019
/*==================*/