1
/******************************************************
2
The database server main program
4
NOTE: SQL Server 7 uses something which the documentation
5
calls user mode scheduled threads (UMS threads). One such
6
thread is usually allocated per processor. Win32
7
documentation does not know any UMS threads, which suggests
8
that the concept is internal to SQL Server 7. It may mean that
9
SQL Server 7 does all the scheduling of threads itself, even
10
in i/o waits. We should maybe modify InnoDB to use the same
11
technique, because thread switches within NT may be too slow.
13
SQL Server 7 also mentions fibers, which are cooperatively
14
scheduled threads. They can boost performance by 5 %,
15
according to the Delaney and Soukup's book.
17
Windows 2000 will have something called thread pooling
18
(see msdn website), which we could possibly use.
20
Another possibility could be to use some very fast user space
21
thread library. This might confuse NT though.
25
Created 10/8/1995 Heikki Tuuri
26
*******************************************************/
35
#include "sync0sync.h"
40
#include "pars0pars.h"
42
#include "lock0lock.h"
43
#include "trx0purge.h"
44
#include "ibuf0ibuf.h"
48
#include "dict0load.h"
49
#include "dict0boot.h"
50
#include "srv0start.h"
51
#include "row0mysql.h"
52
#include "ha_prototypes.h"
55
/* This is set to TRUE if the MySQL user has set it in MySQL; currently
56
affects only FOREIGN KEY definition parsing */
57
UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
59
/* The following counter is incremented whenever there is some user activity
61
UNIV_INTERN ulint srv_activity_count = 0;
63
/* The following is the maximum allowed duration of a lock wait. */
64
UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
66
/* How much data manipulation language (DML) statements need to be delayed,
67
in microseconds, in order to reduce the lagging of the purge thread. */
68
UNIV_INTERN ulint srv_dml_needed_delay = 0;
70
UNIV_INTERN ibool srv_lock_timeout_and_monitor_active = FALSE;
71
UNIV_INTERN ibool srv_error_monitor_active = FALSE;
73
UNIV_INTERN const char* srv_main_thread_op_info = "";
75
/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
76
UNIV_INTERN const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
78
/* Server parameters which are read from the initfile */
80
/* The following three are dir paths which are catenated before file
81
names, where the file name itself may also contain a path */
83
UNIV_INTERN char* srv_data_home = NULL;
84
#ifdef UNIV_LOG_ARCHIVE
85
UNIV_INTERN char* srv_arch_dir = NULL;
86
#endif /* UNIV_LOG_ARCHIVE */
88
/* store to its own file each table created by an user; data
89
dictionary tables are in the system tablespace 0 */
90
UNIV_INTERN my_bool srv_file_per_table;
91
/* The file format to use on new *.ibd files. */
92
UNIV_INTERN ulint srv_file_format = 0;
93
/* Whether to check file format during startup a value of
94
DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
95
set it to the highest format we support. */
96
UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
99
# error "DICT_TF_FORMAT_51 must be 0!"
101
/* Place locks to records only i.e. do not use next-key locking except
102
on duplicate key checking and foreign key checking */
103
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
105
UNIV_INTERN ulint srv_n_data_files = 0;
106
UNIV_INTERN char** srv_data_file_names = NULL;
107
/* size in database pages */
108
UNIV_INTERN ulint* srv_data_file_sizes = NULL;
110
/* if TRUE, then we auto-extend the last data file */
111
UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
112
/* if != 0, this tells the max size auto-extending may increase the
113
last data file size */
114
UNIV_INTERN ulint srv_last_file_size_max = 0;
115
/* If the last data file is auto-extended, we add this
116
many pages to it at a time */
117
UNIV_INTERN ulong srv_auto_extend_increment = 8;
118
UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
120
/* If the following is TRUE we do not allow inserts etc. This protects
121
the user from forgetting the 'newraw' keyword to my.cnf */
123
UNIV_INTERN ibool srv_created_new_raw = FALSE;
125
UNIV_INTERN char** srv_log_group_home_dirs = NULL;
127
UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
128
UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
129
/* size in database pages */
130
UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
131
/* size in database pages */
132
UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
133
UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
135
/* The sort order table of the MySQL latin1_swedish_ci character set
137
UNIV_INTERN const byte srv_latin1_ordering[256] /* The sort order table of the latin1
138
character set. The following table is
139
the MySQL order as of Feb 10th, 2002 */
141
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
142
, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
143
, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
144
, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
145
, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
146
, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
147
, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
148
, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
149
, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
150
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
151
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
152
, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
153
, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
154
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
155
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
156
, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
157
, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
158
, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
159
, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
160
, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
161
, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
162
, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
163
, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
164
, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
165
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
166
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
167
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
168
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
169
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
170
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
171
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
172
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
176
/* requested size in kilobytes */
177
UNIV_INTERN ulong srv_buf_pool_size = ULINT_MAX;
178
/* previously requested size */
179
UNIV_INTERN ulong srv_buf_pool_old_size;
180
/* current size in kilobytes */
181
UNIV_INTERN ulong srv_buf_pool_curr_size = 0;
183
UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
184
UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
186
UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
188
#ifdef UNIV_LOG_ARCHIVE
189
UNIV_INTERN ibool srv_log_archive_on = FALSE;
190
UNIV_INTERN ibool srv_archive_recovery = 0;
191
UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
192
#endif /* UNIV_LOG_ARCHIVE */
194
UNIV_INTERN ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
196
/* This parameter is used to throttle the number of insert buffers that are
197
merged in a batch. By increasing this parameter on a faster disk you can
198
possibly reduce the number of I/O operations performed to complete the
199
merge operation. The value of this parameter is used as is by the
200
background loop when the system is idle (low load), on a busy system
201
the parameter is scaled down by a factor of 4, this is to avoid putting
202
a heavier load on the I/O sub system. */
204
UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
206
UNIV_INTERN char* srv_file_flush_method_str = NULL;
207
UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
208
UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
210
UNIV_INTERN ulint srv_max_n_open_files = 300;
212
/* The InnoDB main thread tries to keep the ratio of modified pages
213
in the buffer pool to all database pages in the buffer pool smaller than
214
the following number. But it is not guaranteed that the value stays below
215
that during a time of heavy update/insert activity. */
217
UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 90;
219
/* variable counts amount of data read in total (in bytes) */
220
UNIV_INTERN ulint srv_data_read = 0;
222
/* here we count the amount of data written in total (in bytes) */
223
UNIV_INTERN ulint srv_data_written = 0;
225
/* the number of the log write requests done */
226
UNIV_INTERN ulint srv_log_write_requests = 0;
228
/* the number of physical writes to the log performed */
229
UNIV_INTERN ulint srv_log_writes = 0;
231
/* amount of data written to the log files in bytes */
232
UNIV_INTERN ulint srv_os_log_written = 0;
234
/* amount of writes being done to the log files */
235
UNIV_INTERN ulint srv_os_log_pending_writes = 0;
237
/* we increase this counter, when there we don't have enough space in the
238
log buffer and have to flush it */
239
UNIV_INTERN ulint srv_log_waits = 0;
241
/* this variable counts the amount of times, when the doublewrite buffer
243
UNIV_INTERN ulint srv_dblwr_writes = 0;
245
/* here we store the number of pages that have been flushed to the
246
doublewrite buffer */
247
UNIV_INTERN ulint srv_dblwr_pages_written = 0;
249
/* in this variable we store the number of write requests issued */
250
UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
252
/* here we store the number of times when we had to wait for a free page
253
in the buffer pool. It happens when the buffer pool is full and we need
254
to make a flush, in order to be able to read or create a page. */
255
UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
257
/* variable to count the number of pages that were written from buffer
259
UNIV_INTERN ulint srv_buf_pool_flushed = 0;
261
/* variable to count the number of buffer pool reads that led to the
262
reading of a disk page */
263
UNIV_INTERN ulint srv_buf_pool_reads = 0;
265
/* variable to count the number of sequential read-aheads */
266
UNIV_INTERN ulint srv_read_ahead_seq = 0;
268
/* variable to count the number of random read-aheads */
269
UNIV_INTERN ulint srv_read_ahead_rnd = 0;
271
/* structure to pass status variables to MySQL */
272
UNIV_INTERN export_struc export_vars;
274
/* If the following is != 0 we do not allow inserts etc. This protects
275
the user from forgetting the innodb_force_recovery keyword to my.cnf */
277
UNIV_INTERN ulint srv_force_recovery = 0;
278
/*-----------------------*/
279
/* We are prepared for a situation that we have this many threads waiting for
280
a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
283
UNIV_INTERN ulint srv_max_n_threads = 0;
285
/* The following controls how many threads we let inside InnoDB concurrently:
286
threads waiting for locks are not counted into the number because otherwise
287
we could get a deadlock. MySQL creates a thread for each user session, and
288
semaphore contention and convoy problems can occur withput this restriction.
289
Value 10 should be good if there are less than 4 processors + 4 disks in the
290
computer. Bigger computers need bigger values. Value 0 will disable the
291
concurrency check. */
293
UNIV_INTERN ulong srv_thread_concurrency = 0;
294
UNIV_INTERN ulong srv_commit_concurrency = 0;
296
/* this mutex protects srv_conc data structures */
297
UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
298
/* number of OS threads currently inside InnoDB; it is not an error if
299
this drops temporarily below zero because we do not demand that every
300
thread increments this, but a thread waiting for a lock decrements
302
UNIV_INTERN lint srv_conc_n_threads = 0;
303
/* number of OS threads waiting in the FIFO for a permission to enter
305
UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
307
typedef struct srv_conc_slot_struct srv_conc_slot_t;
308
struct srv_conc_slot_struct{
309
os_event_t event; /* event to wait */
310
ibool reserved; /* TRUE if slot
312
ibool wait_ended; /* TRUE when another
313
thread has already set
315
thread in this slot is
317
reserved may still be
318
TRUE at that point */
319
UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue node */
322
/* queue of threads waiting to get in */
323
UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
324
/* array of wait slots */
325
UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
327
/* Number of times a thread is allowed to enter InnoDB within the same
328
SQL query after it has once got the ticket at srv_conc_enter_innodb */
329
#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
330
#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
331
/*-----------------------*/
332
/* If the following is set to 1 then we do not run purge and insert buffer
333
merge to completion before shutdown. If it is set to 2, do not even flush the
334
buffer pool to data files at the shutdown: we effectively 'crash'
335
InnoDB (but lose no committed transactions). */
336
UNIV_INTERN ulint srv_fast_shutdown = 0;
338
/* Generate a innodb_status.<pid> file */
339
UNIV_INTERN ibool srv_innodb_status = FALSE;
341
UNIV_INTERN ibool srv_stats_on_metadata = TRUE;
343
UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
344
UNIV_INTERN ibool srv_use_checksums = TRUE;
346
UNIV_INTERN ibool srv_set_thread_priorities = TRUE;
347
UNIV_INTERN int srv_query_thread_priority = 0;
349
UNIV_INTERN ulint srv_replication_delay = 0;
351
/*-------------------------------------------*/
352
UNIV_INTERN ulong srv_n_spin_wait_rounds = 20;
353
UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
354
UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
355
UNIV_INTERN ulint srv_spin_wait_delay = 5;
356
UNIV_INTERN ibool srv_priority_boost = TRUE;
359
UNIV_INTERN ibool srv_print_thread_releases = FALSE;
360
UNIV_INTERN ibool srv_print_lock_waits = FALSE;
361
UNIV_INTERN ibool srv_print_buf_io = FALSE;
362
UNIV_INTERN ibool srv_print_log_io = FALSE;
363
UNIV_INTERN ibool srv_print_latch_waits = FALSE;
364
#endif /* UNIV_DEBUG */
366
UNIV_INTERN ulint srv_n_rows_inserted = 0;
367
UNIV_INTERN ulint srv_n_rows_updated = 0;
368
UNIV_INTERN ulint srv_n_rows_deleted = 0;
369
UNIV_INTERN ulint srv_n_rows_read = 0;
370
#ifndef UNIV_HOTBACKUP
371
static ulint srv_n_rows_inserted_old = 0;
372
static ulint srv_n_rows_updated_old = 0;
373
static ulint srv_n_rows_deleted_old = 0;
374
static ulint srv_n_rows_read_old = 0;
375
#endif /* !UNIV_HOTBACKUP */
377
UNIV_INTERN ulint srv_n_lock_wait_count = 0;
378
UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
379
UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
380
UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
384
Set the following to 0 if you want InnoDB to write messages on
385
stderr on startup/shutdown
387
UNIV_INTERN ibool srv_print_verbose_log = TRUE;
388
UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
389
UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
390
UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
391
UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
393
/* Array of English strings describing the current state of an
394
i/o handler thread */
396
UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
397
UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
399
UNIV_INTERN time_t srv_last_monitor_time;
401
UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
403
/* Mutex for locking srv_monitor_file */
404
UNIV_INTERN mutex_t srv_monitor_file_mutex;
405
/* Temporary file for innodb monitor output */
406
UNIV_INTERN FILE* srv_monitor_file;
407
/* Mutex for locking srv_dict_tmpfile.
408
This mutex has a very high rank; threads reserving it should not
409
be holding any InnoDB latches. */
410
UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
411
/* Temporary file for output from the data dictionary */
412
UNIV_INTERN FILE* srv_dict_tmpfile;
413
/* Mutex for locking srv_misc_tmpfile.
414
This mutex has a very low rank; threads reserving it should not
415
acquire any further latches or sleep before releasing this one. */
416
UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
417
/* Temporary file for miscellanous diagnostic output */
418
UNIV_INTERN FILE* srv_misc_tmpfile;
420
UNIV_INTERN ulint srv_main_thread_process_no = 0;
421
UNIV_INTERN ulint srv_main_thread_id = 0;
424
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
425
=========================================
427
There is the following analogue between this database
428
server and an operating system kernel:
430
DB concept equivalent OS concept
431
---------- ---------------------
432
transaction -- process;
434
query thread -- thread;
439
the rollback state -- kill signal delivered to a process;
443
query thread execution:
444
(a) without kernel mutex
445
reserved -- process executing in user mode;
446
(b) with kernel mutex reserved
447
-- process executing in kernel mode;
449
The server is controlled by a master thread which runs at
450
a priority higher than normal, that is, higher than user threads.
451
It sleeps most of the time, and wakes up, say, every 300 milliseconds,
452
to check whether there is anything happening in the server which
453
requires intervention of the master thread. Such situations may be,
454
for example, when flushing of dirty blocks is needed in the buffer
455
pool or old version of database rows have to be cleaned away.
457
The threads which we call user threads serve the queries of
458
the clients and input from the console of the server.
459
They run at normal priority. The server may have several
460
communications endpoints. A dedicated set of user threads waits
461
at each of these endpoints ready to receive a client request.
462
Each request is taken by a single user thread, which then starts
463
processing and, when the result is ready, sends it to the client
464
and returns to wait at the same endpoint the thread started from.
466
So, we do not have dedicated communication threads listening at
467
the endpoints and dealing the jobs to dedicated worker threads.
468
Our architecture saves one thread swithch per request, compared
469
to the solution with dedicated communication threads
470
which amounts to 15 microseconds on 100 MHz Pentium
471
running NT. If the client
472
is communicating over a network, this saving is negligible, but
473
if the client resides in the same machine, maybe in an SMP machine
474
on a different processor from the server thread, the saving
475
can be important as the threads can communicate over shared
476
memory with an overhead of a few microseconds.
478
We may later implement a dedicated communication thread solution
479
for those endpoints which communicate over a network.
481
Our solution with user threads has two problems: for each endpoint
482
there has to be a number of listening threads. If there are many
483
communication endpoints, it may be difficult to set the right number
484
of concurrent threads in the system, as many of the threads
485
may always be waiting at less busy endpoints. Another problem
486
is queuing of the messages, as the server internally does not
487
offer any queue for jobs.
489
Another group of user threads is intended for splitting the
490
queries and processing them in parallel. Let us call these
491
parallel communication threads. These threads are waiting for
492
parallelized tasks, suspended on event semaphores.
494
A single user thread waits for input from the console,
495
like a command to shut the database.
497
Utility threads are a different group of threads which takes
498
care of the buffer pool flushing and other, mainly background
499
operations, in the server.
500
Some of these utility threads always run at a lower than normal
501
priority, so that they are always in background. Some of them
502
may dynamically boost their priority by the pri_adjust function,
503
even to higher than normal priority, if their task becomes urgent.
504
The running of utilities is controlled by high- and low-water marks
505
of urgency. The urgency may be measured by the number of dirty blocks
506
in the buffer pool, in the case of the flush thread, for example.
507
When the high-water mark is exceeded, an utility starts running, until
508
the urgency drops under the low-water mark. Then the utility thread
509
suspend itself to wait for an event. The master thread is
510
responsible of signaling this event when the utility thread is
513
For each individual type of utility, some threads always remain
514
at lower than normal priority. This is because pri_adjust is implemented
515
so that the threads at normal or higher priority control their
516
share of running time by calling sleep. Thus, if the load of the
517
system sudenly drops, these threads cannot necessarily utilize
518
the system fully. The background priority threads make up for this,
519
starting to run when the load drops.
521
When there is no activity in the system, also the master thread
522
suspends itself to wait for an event making
523
the server totally silent. The responsibility to signal this
524
event is on the user thread which again receives a message
527
There is still one complication in our server design. If a
528
background utility thread obtains a resource (e.g., mutex) needed by a user
529
thread, and there is also some other user activity in the system,
530
the user thread may have to wait indefinitely long for the
531
resource, as the OS does not schedule a background thread if
532
there is some other runnable user thread. This problem is called
533
priority inversion in real-time programming.
535
One solution to the priority inversion problem would be to
536
keep record of which thread owns which resource and
537
in the above case boost the priority of the background thread
538
so that it will be scheduled and it can release the resource.
539
This solution is called priority inheritance in real-time programming.
540
A drawback of this solution is that the overhead of acquiring a mutex
541
increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
542
the thread has to call os_thread_get_curr_id.
543
This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
544
pair. Note that the thread
545
cannot store the information in the resource, say mutex, itself,
546
because competing threads could wipe out the information if it is
547
stored before acquiring the mutex, and if it stored afterwards,
548
the information is outdated for the time of one machine instruction,
549
at least. (To be precise, the information could be stored to
550
lock_word in mutex if the machine supports atomic swap.)
552
The above solution with priority inheritance may become actual in the
553
future, but at the moment we plan to implement a more coarse solution,
554
which could be called a global priority inheritance. If a thread
555
has to wait for a long time, say 300 milliseconds, for a resource,
556
we just guess that it may be waiting for a resource owned by a background
557
thread, and boost the the priority of all runnable background threads
558
to the normal level. The background threads then themselves adjust
559
their fixed priority back to background after releasing all resources
560
they had (or, at some fixed points in their program code).
562
What is the performance of the global priority inheritance solution?
563
We may weigh the length of the wait time 300 milliseconds, during
564
which the system processes some other thread
565
to the cost of boosting the priority of each runnable background
566
thread, rescheduling it, and lowering the priority again.
567
On 100 MHz Pentium + NT this overhead may be of the order 100
568
microseconds per thread. So, if the number of runnable background
569
threads is not very big, say < 100, the cost is tolerable.
570
Utility threads probably will access resources used by
571
user threads not very often, so collisions of user threads
572
to preempted utility threads should not happen very often.
574
The thread table contains
575
information of the current status of each thread existing in the system,
576
and also the event semaphores used in suspending the master thread
577
and utility and parallel communication threads when they have nothing to do.
578
The thread table can be seen as an analogue to the process table
579
in a traditional Unix implementation.
581
The thread table is also used in the global priority inheritance
582
scheme. This brings in one additional complication: threads accessing
583
the thread table must have at least normal fixed priority,
584
because the priority inheritance solution does not work if a background
585
thread is preempted while possessing the mutex protecting the thread table.
586
So, if a thread accesses the thread table, its priority has to be
587
boosted at least to normal. This priority requirement can be seen similar to
588
the privileged mode used when processing the kernel calls in traditional
591
/* Thread slot in the thread table */
592
struct srv_slot_struct{
593
os_thread_id_t id; /* thread id */
594
os_thread_t handle; /* thread handle */
595
unsigned type:3; /* thread type: user, utility etc. */
596
unsigned in_use:1; /* TRUE if this slot is in use */
597
unsigned suspended:1; /* TRUE if the thread is waiting
598
for the event of this slot */
599
ib_time_t suspend_time; /* time when the thread was
601
os_event_t event; /* event used in suspending the
602
thread when it has nothing to do */
603
que_thr_t* thr; /* suspended query thread (only
604
used for MySQL threads) */
607
/* Table for MySQL threads where they will be suspended to wait for locks */
608
UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
610
UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
612
UNIV_INTERN srv_sys_t* srv_sys = NULL;
614
/* padding to prevent other memory update hotspots from residing on
615
the same memory cache line */
616
UNIV_INTERN byte srv_pad1[64];
617
/* mutex protecting the server, trx structs, query threads, and lock table */
618
UNIV_INTERN mutex_t* kernel_mutex_temp;
619
/* padding to prevent other memory update hotspots from residing on
620
the same memory cache line */
621
UNIV_INTERN byte srv_pad2[64];
624
/* The following three values measure the urgency of the jobs of
625
buffer, version, and insert threads. They may vary from 0 - 1000.
626
The server mutex protects all these variables. The low-water values
627
tell that the server can acquiesce the utility when the value
628
drops below this low-water mark. */
630
static ulint srv_meter[SRV_MASTER + 1];
631
static ulint srv_meter_low_water[SRV_MASTER + 1];
632
static ulint srv_meter_high_water[SRV_MASTER + 1];
633
static ulint srv_meter_high_water2[SRV_MASTER + 1];
634
static ulint srv_meter_foreground[SRV_MASTER + 1];
637
/* The following values give info about the activity going on in
638
the database. They are protected by the server mutex. The arrays
639
are indexed by the type of the thread. */
641
UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
642
UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
644
/*************************************************************************
645
Sets the info describing an i/o thread current state. */
648
srv_set_io_thread_op_info(
649
/*======================*/
650
ulint i, /* in: the 'segment' of the i/o thread */
651
const char* str) /* in: constant char string describing the
654
ut_a(i < SRV_MAX_N_IO_THREADS);
656
srv_io_thread_op_info[i] = str;
659
/*************************************************************************
660
Accessor function to get pointer to n'th slot in the server thread
664
srv_table_get_nth_slot(
665
/*===================*/
666
/* out: pointer to the slot */
667
ulint index) /* in: index of the slot */
669
ut_a(index < OS_THREAD_MAX_N);
671
return(srv_sys->threads + index);
674
#ifndef UNIV_HOTBACKUP
675
/*************************************************************************
676
Gets the number of threads in the system. */
679
srv_get_n_threads(void)
680
/*===================*/
685
mutex_enter(&kernel_mutex);
687
for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
689
n_threads += srv_n_threads[i];
692
mutex_exit(&kernel_mutex);
697
/*************************************************************************
698
Reserves a slot in the thread table for the current thread. Also creates the
699
thread local storage struct for the current thread. NOTE! The server mutex
700
has to be reserved by the caller! */
703
srv_table_reserve_slot(
704
/*===================*/
705
/* out: reserved slot index */
706
enum srv_thread_type type) /* in: type of the thread */
712
ut_a(type <= SRV_MASTER);
715
slot = srv_table_get_nth_slot(i);
717
while (slot->in_use) {
719
slot = srv_table_get_nth_slot(i);
722
ut_a(slot->in_use == FALSE);
725
slot->suspended = FALSE;
727
slot->id = os_thread_get_curr_id();
728
slot->handle = os_thread_get_curr();
732
thr_local_set_slot_no(os_thread_get_curr_id(), i);
737
/*************************************************************************
738
Suspends the calling thread to wait for the event in its thread slot.
739
NOTE! The server mutex has to be reserved by the caller! */
742
srv_suspend_thread(void)
743
/*====================*/
744
/* out: event for the calling thread to wait */
749
enum srv_thread_type type;
751
ut_ad(mutex_own(&kernel_mutex));
753
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
755
if (srv_print_thread_releases) {
757
"Suspending thread %lu to slot %lu\n",
758
(ulong) os_thread_get_curr_id(), (ulong) slot_no);
761
slot = srv_table_get_nth_slot(slot_no);
765
ut_ad(type >= SRV_WORKER);
766
ut_ad(type <= SRV_MASTER);
770
slot->suspended = TRUE;
772
ut_ad(srv_n_threads_active[type] > 0);
774
srv_n_threads_active[type]--;
776
os_event_reset(event);
780
#endif /* !UNIV_HOTBACKUP */
782
/*************************************************************************
783
Releases threads of the type given from suspension in the thread table.
784
NOTE! The server mutex has to be reserved by the caller! */
789
/* out: number of threads
790
released: this may be < n if
791
not enough threads were
792
suspended at the moment */
793
enum srv_thread_type type, /* in: thread type */
794
ulint n) /* in: number of threads to release */
800
ut_ad(type >= SRV_WORKER);
801
ut_ad(type <= SRV_MASTER);
803
ut_ad(mutex_own(&kernel_mutex));
805
for (i = 0; i < OS_THREAD_MAX_N; i++) {
807
slot = srv_table_get_nth_slot(i);
809
if (slot->in_use && slot->type == type && slot->suspended) {
811
slot->suspended = FALSE;
813
srv_n_threads_active[type]++;
815
os_event_set(slot->event);
817
if (srv_print_thread_releases) {
819
"Releasing thread %lu type %lu"
821
(ulong) slot->id, (ulong) type,
836
/*************************************************************************
837
Returns the calling thread type. */
840
srv_get_thread_type(void)
841
/*=====================*/
842
/* out: SRV_COM, ... */
846
enum srv_thread_type type;
848
mutex_enter(&kernel_mutex);
850
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
852
slot = srv_table_get_nth_slot(slot_no);
856
ut_ad(type >= SRV_WORKER);
857
ut_ad(type <= SRV_MASTER);
859
mutex_exit(&kernel_mutex);
864
/*************************************************************************
865
Initializes the server. */
871
srv_conc_slot_t* conc_slot;
876
srv_sys = mem_alloc(sizeof(srv_sys_t));
878
kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
879
mutex_create(&kernel_mutex, SYNC_KERNEL);
881
mutex_create(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
883
srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
885
for (i = 0; i < OS_THREAD_MAX_N; i++) {
886
slot = srv_table_get_nth_slot(i);
887
slot->in_use = FALSE;
888
slot->type=0; /* Avoid purify errors */
889
slot->event = os_event_create(NULL);
893
srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
895
for (i = 0; i < OS_THREAD_MAX_N; i++) {
896
slot = srv_mysql_table + i;
897
slot->in_use = FALSE;
899
slot->event = os_event_create(NULL);
903
srv_lock_timeout_thread_event = os_event_create(NULL);
905
for (i = 0; i < SRV_MASTER + 1; i++) {
906
srv_n_threads_active[i] = 0;
907
srv_n_threads[i] = 0;
910
srv_meter_low_water[i] = 50;
911
srv_meter_high_water[i] = 100;
912
srv_meter_high_water2[i] = 200;
913
srv_meter_foreground[i] = 250;
917
UT_LIST_INIT(srv_sys->tasks);
919
/* create dummy table and index for old-style infimum and supremum */
920
table = dict_mem_table_create("SYS_DUMMY1",
921
DICT_HDR_SPACE, 1, 0);
922
dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
923
DATA_ENGLISH | DATA_NOT_NULL, 8);
925
srv_sys->dummy_ind1 = dict_mem_index_create(
926
"SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1);
927
dict_index_add_col(srv_sys->dummy_ind1, table,
928
dict_table_get_nth_col(table, 0), 0);
929
srv_sys->dummy_ind1->table = table;
930
/* create dummy table and index for new-style infimum and supremum */
931
table = dict_mem_table_create("SYS_DUMMY2",
932
DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
933
dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
934
DATA_ENGLISH | DATA_NOT_NULL, 8);
935
srv_sys->dummy_ind2 = dict_mem_index_create(
936
"SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1);
937
dict_index_add_col(srv_sys->dummy_ind2, table,
938
dict_table_get_nth_col(table, 0), 0);
939
srv_sys->dummy_ind2->table = table;
941
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
942
srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE;
944
/* Init the server concurrency restriction data structures */
946
os_fast_mutex_init(&srv_conc_mutex);
948
UT_LIST_INIT(srv_conc_queue);
950
srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
952
for (i = 0; i < OS_THREAD_MAX_N; i++) {
953
conc_slot = srv_conc_slots + i;
954
conc_slot->reserved = FALSE;
955
conc_slot->event = os_event_create(NULL);
956
ut_a(conc_slot->event);
959
/* Initialize some INFORMATION SCHEMA internal structures */
960
trx_i_s_cache_init(trx_i_s_cache);
963
/*************************************************************************
964
Frees the OS fast mutex created in srv_init(). */
970
os_fast_mutex_free(&srv_conc_mutex);
973
/*************************************************************************
974
Initializes the synchronization primitives, memory system, and the thread
978
srv_general_init(void)
979
/*==================*/
983
mem_init(srv_mem_pool_size);
987
/*======================= InnoDB Server FIFO queue =======================*/
989
/* Maximum allowable purge history length. <=0 means 'infinite'. */
990
UNIV_INTERN ulong srv_max_purge_lag = 0;
992
/*************************************************************************
993
Puts an OS thread to wait if there are too many concurrent threads
994
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
997
srv_conc_enter_innodb(
998
/*==================*/
999
trx_t* trx) /* in: transaction object associated with the
1002
ibool has_slept = FALSE;
1003
srv_conc_slot_t* slot = NULL;
1006
if (trx->mysql_thd != NULL
1007
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
1009
UT_WAIT_FOR(srv_conc_n_threads
1010
< (lint)srv_thread_concurrency,
1011
srv_replication_delay * 1000);
1016
/* If trx has 'free tickets' to enter the engine left, then use one
1019
if (trx->n_tickets_to_enter_innodb > 0) {
1020
trx->n_tickets_to_enter_innodb--;
1025
os_fast_mutex_lock(&srv_conc_mutex);
1027
if (trx->declared_to_be_inside_innodb) {
1028
ut_print_timestamp(stderr);
1029
fputs(" InnoDB: Error: trying to declare trx"
1030
" to enter InnoDB, but\n"
1031
"InnoDB: it already is declared.\n", stderr);
1032
trx_print(stderr, trx, 0);
1034
os_fast_mutex_unlock(&srv_conc_mutex);
1039
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1041
srv_conc_n_threads++;
1042
trx->declared_to_be_inside_innodb = TRUE;
1043
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1045
os_fast_mutex_unlock(&srv_conc_mutex);
1050
/* If the transaction is not holding resources, let it sleep
1051
for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
1053
if (!has_slept && !trx->has_search_latch
1054
&& NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
1056
has_slept = TRUE; /* We let it sleep only once to avoid
1059
srv_conc_n_waiting_threads++;
1061
os_fast_mutex_unlock(&srv_conc_mutex);
1063
trx->op_info = "sleeping before joining InnoDB queue";
1065
/* Peter Zaitsev suggested that we take the sleep away
1066
altogether. But the sleep may be good in pathological
1067
situations of lots of thread switches. Simply put some
1068
threads aside for a while to reduce the number of thread
1070
if (SRV_THREAD_SLEEP_DELAY > 0) {
1071
os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
1076
os_fast_mutex_lock(&srv_conc_mutex);
1078
srv_conc_n_waiting_threads--;
1083
/* Too many threads inside: put the current thread to a queue */
1085
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1086
slot = srv_conc_slots + i;
1088
if (!slot->reserved) {
1094
if (i == OS_THREAD_MAX_N) {
1095
/* Could not find a free wait slot, we must let the
1098
srv_conc_n_threads++;
1099
trx->declared_to_be_inside_innodb = TRUE;
1100
trx->n_tickets_to_enter_innodb = 0;
1102
os_fast_mutex_unlock(&srv_conc_mutex);
1107
/* Release possible search system latch this thread has */
1108
if (trx->has_search_latch) {
1109
trx_search_latch_release_if_reserved(trx);
1112
/* Add to the queue */
1113
slot->reserved = TRUE;
1114
slot->wait_ended = FALSE;
1116
UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
1118
os_event_reset(slot->event);
1120
srv_conc_n_waiting_threads++;
1122
os_fast_mutex_unlock(&srv_conc_mutex);
1124
/* Go to wait for the event; when a thread leaves InnoDB it will
1125
release this thread */
1127
trx->op_info = "waiting in InnoDB queue";
1129
os_event_wait(slot->event);
1133
os_fast_mutex_lock(&srv_conc_mutex);
1135
srv_conc_n_waiting_threads--;
1137
/* NOTE that the thread which released this thread already
1138
incremented the thread counter on behalf of this thread */
1140
slot->reserved = FALSE;
1142
UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
1144
trx->declared_to_be_inside_innodb = TRUE;
1145
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1147
os_fast_mutex_unlock(&srv_conc_mutex);
1150
/*************************************************************************
1151
This lets a thread enter InnoDB regardless of the number of threads inside
1152
InnoDB. This must be called when a thread ends a lock wait. */
1155
srv_conc_force_enter_innodb(
1156
/*========================*/
1157
trx_t* trx) /* in: transaction object associated with the
1160
if (UNIV_LIKELY(!srv_thread_concurrency)) {
1165
os_fast_mutex_lock(&srv_conc_mutex);
1167
srv_conc_n_threads++;
1168
trx->declared_to_be_inside_innodb = TRUE;
1169
trx->n_tickets_to_enter_innodb = 1;
1171
os_fast_mutex_unlock(&srv_conc_mutex);
1174
/*************************************************************************
1175
This must be called when a thread exits InnoDB in a lock wait or at the
1176
end of an SQL statement. */
1179
srv_conc_force_exit_innodb(
1180
/*=======================*/
1181
trx_t* trx) /* in: transaction object associated with the
1184
srv_conc_slot_t* slot = NULL;
1186
if (UNIV_LIKELY(!srv_thread_concurrency)) {
1191
if (trx->mysql_thd != NULL
1192
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
1197
if (trx->declared_to_be_inside_innodb == FALSE) {
1202
os_fast_mutex_lock(&srv_conc_mutex);
1204
srv_conc_n_threads--;
1205
trx->declared_to_be_inside_innodb = FALSE;
1206
trx->n_tickets_to_enter_innodb = 0;
1208
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1209
/* Look for a slot where a thread is waiting and no other
1210
thread has yet released the thread */
1212
slot = UT_LIST_GET_FIRST(srv_conc_queue);
1214
while (slot && slot->wait_ended == TRUE) {
1215
slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
1219
slot->wait_ended = TRUE;
1221
/* We increment the count on behalf of the released
1224
srv_conc_n_threads++;
1228
os_fast_mutex_unlock(&srv_conc_mutex);
1231
os_event_set(slot->event);
1235
/*************************************************************************
1236
This must be called when a thread exits InnoDB. */
1239
srv_conc_exit_innodb(
1240
/*=================*/
1241
trx_t* trx) /* in: transaction object associated with the
1244
if (trx->n_tickets_to_enter_innodb > 0) {
1245
/* We will pretend the thread is still inside InnoDB though it
1246
now leaves the InnoDB engine. In this way we save
1247
a lot of semaphore operations. srv_conc_force_exit_innodb is
1248
used to declare the thread definitely outside InnoDB. It
1249
should be called when there is a lock wait or an SQL statement
1255
srv_conc_force_exit_innodb(trx);
1258
/*========================================================================*/
1260
/*************************************************************************
1261
Normalizes init parameter values to use units we use inside InnoDB. */
1264
srv_normalize_init_values(void)
1265
/*===========================*/
1266
/* out: DB_SUCCESS or error code */
1271
n = srv_n_data_files;
1273
for (i = 0; i < n; i++) {
1274
srv_data_file_sizes[i] = srv_data_file_sizes[i]
1275
* ((1024 * 1024) / UNIV_PAGE_SIZE);
1278
srv_last_file_size_max = srv_last_file_size_max
1279
* ((1024 * 1024) / UNIV_PAGE_SIZE);
1281
srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1283
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1285
srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1290
/*************************************************************************
1291
Boots the InnoDB server. */
1296
/* out: DB_SUCCESS or error code */
1300
/* Transform the init parameter values given by MySQL to
1301
use units we use inside InnoDB: */
1303
err = srv_normalize_init_values();
1305
if (err != DB_SUCCESS) {
1309
/* Initialize synchronization primitives, memory management, and thread
1314
/* Initialize this module */
1321
#ifndef UNIV_HOTBACKUP
1322
/*************************************************************************
1323
Reserves a slot in the thread table for the current MySQL OS thread.
1324
NOTE! The kernel mutex has to be reserved by the caller! */
1327
srv_table_reserve_slot_for_mysql(void)
1328
/*==================================*/
1329
/* out: reserved slot */
1334
ut_ad(mutex_own(&kernel_mutex));
1337
slot = srv_mysql_table + i;
1339
while (slot->in_use) {
1342
if (i >= OS_THREAD_MAX_N) {
1344
ut_print_timestamp(stderr);
1347
" InnoDB: There appear to be %lu MySQL"
1348
" threads currently waiting\n"
1349
"InnoDB: inside InnoDB, which is the"
1350
" upper limit. Cannot continue operation.\n"
1351
"InnoDB: We intentionally generate"
1352
" a seg fault to print a stack trace\n"
1353
"InnoDB: on Linux. But first we print"
1354
" a list of waiting threads.\n", (ulong) i);
1356
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1358
slot = srv_mysql_table + i;
1361
"Slot %lu: thread id %lu, type %lu,"
1362
" in use %lu, susp %lu, time %lu\n",
1364
(ulong) os_thread_pf(slot->id),
1366
(ulong) slot->in_use,
1367
(ulong) slot->suspended,
1368
(ulong) difftime(ut_time(),
1369
slot->suspend_time));
1375
slot = srv_mysql_table + i;
1378
ut_a(slot->in_use == FALSE);
1380
slot->in_use = TRUE;
1381
slot->id = os_thread_get_curr_id();
1382
slot->handle = os_thread_get_curr();
1386
#endif /* !UNIV_HOTBACKUP */
1388
/*******************************************************************
1389
Puts a MySQL OS thread to wait for a lock to be released. If an error
1390
occurs during the wait trx->error_state associated with thr is
1391
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
1392
are possible errors. DB_DEADLOCK is returned if selective deadlock
1393
resolution chose this transaction as a victim. */
1396
srv_suspend_mysql_thread(
1397
/*=====================*/
1398
que_thr_t* thr) /* in: query thread associated with the MySQL
1401
#ifndef UNIV_HOTBACKUP
1406
ulint had_dict_lock;
1407
ibool was_declared_inside_innodb = FALSE;
1408
ib_int64_t start_time = 0;
1409
ib_int64_t finish_time;
1414
ut_ad(!mutex_own(&kernel_mutex));
1416
trx = thr_get_trx(thr);
1418
os_event_set(srv_lock_timeout_thread_event);
1420
mutex_enter(&kernel_mutex);
1422
trx->error_state = DB_SUCCESS;
1424
if (thr->state == QUE_THR_RUNNING) {
1426
ut_ad(thr->is_active == TRUE);
1428
/* The lock has already been released or this transaction
1429
was chosen as a deadlock victim: no need to suspend */
1431
if (trx->was_chosen_as_deadlock_victim) {
1433
trx->error_state = DB_DEADLOCK;
1434
trx->was_chosen_as_deadlock_victim = FALSE;
1437
mutex_exit(&kernel_mutex);
1442
ut_ad(thr->is_active == FALSE);
1444
slot = srv_table_reserve_slot_for_mysql();
1446
event = slot->event;
1450
os_event_reset(event);
1452
slot->suspend_time = ut_time();
1454
if (thr->lock_state == QUE_THR_LOCK_ROW) {
1455
srv_n_lock_wait_count++;
1456
srv_n_lock_wait_current_count++;
1458
ut_usectime(&sec, &ms);
1459
start_time = (ib_int64_t)sec * 1000000 + ms;
1461
/* Wake the lock timeout monitor thread, if it is suspended */
1463
os_event_set(srv_lock_timeout_thread_event);
1465
mutex_exit(&kernel_mutex);
1467
if (trx->declared_to_be_inside_innodb) {
1469
was_declared_inside_innodb = TRUE;
1471
/* We must declare this OS thread to exit InnoDB, since a
1472
possible other thread holding a lock which this thread waits
1473
for must be allowed to enter, sooner or later */
1475
srv_conc_force_exit_innodb(trx);
1478
had_dict_lock = trx->dict_operation_lock_mode;
1480
switch (had_dict_lock) {
1482
/* Release foreign key check latch */
1483
row_mysql_unfreeze_data_dictionary(trx);
1486
/* Release fast index creation latch */
1487
row_mysql_unlock_data_dictionary(trx);
1491
ut_a(trx->dict_operation_lock_mode == 0);
1493
/* Wait for the release */
1495
os_event_wait(event);
1497
switch (had_dict_lock) {
1499
row_mysql_freeze_data_dictionary(trx);
1502
row_mysql_lock_data_dictionary(trx);
1506
if (was_declared_inside_innodb) {
1508
/* Return back inside InnoDB */
1510
srv_conc_force_enter_innodb(trx);
1513
mutex_enter(&kernel_mutex);
1515
/* Release the slot for others to use */
1517
slot->in_use = FALSE;
1519
wait_time = ut_difftime(ut_time(), slot->suspend_time);
1521
if (thr->lock_state == QUE_THR_LOCK_ROW) {
1522
ut_usectime(&sec, &ms);
1523
finish_time = (ib_int64_t)sec * 1000000 + ms;
1525
diff_time = (ulint) (finish_time - start_time);
1527
srv_n_lock_wait_current_count--;
1528
srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
1529
if (diff_time > srv_n_lock_max_wait_time) {
1530
srv_n_lock_max_wait_time = diff_time;
1534
if (trx->was_chosen_as_deadlock_victim) {
1536
trx->error_state = DB_DEADLOCK;
1537
trx->was_chosen_as_deadlock_victim = FALSE;
1540
mutex_exit(&kernel_mutex);
1542
if (srv_lock_wait_timeout < 100000000
1543
&& wait_time > (double)srv_lock_wait_timeout) {
1545
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1547
#else /* UNIV_HOTBACKUP */
1548
/* This function depends on MySQL code that is not included in
1549
InnoDB Hot Backup builds. Besides, this function should never
1550
be called in InnoDB Hot Backup. */
1552
#endif /* UNIV_HOTBACKUP */
1555
/************************************************************************
1556
Releases a MySQL OS thread waiting for a lock to be released, if the
1557
thread is already suspended. */
1560
srv_release_mysql_thread_if_suspended(
1561
/*==================================*/
1562
que_thr_t* thr) /* in: query thread associated with the
1565
#ifndef UNIV_HOTBACKUP
1569
ut_ad(mutex_own(&kernel_mutex));
1571
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1573
slot = srv_mysql_table + i;
1575
if (slot->in_use && slot->thr == thr) {
1578
os_event_set(slot->event);
1585
#else /* UNIV_HOTBACKUP */
1586
/* This function depends on MySQL code that is not included in
1587
InnoDB Hot Backup builds. Besides, this function should never
1588
be called in InnoDB Hot Backup. */
1590
#endif /* UNIV_HOTBACKUP */
1593
#ifndef UNIV_HOTBACKUP
1594
/**********************************************************************
1595
Refreshes the values used to calculate per-second averages. */
1598
srv_refresh_innodb_monitor_stats(void)
1599
/*==================================*/
1601
mutex_enter(&srv_innodb_monitor_mutex);
1603
srv_last_monitor_time = time(NULL);
1605
os_aio_refresh_stats();
1607
btr_cur_n_sea_old = btr_cur_n_sea;
1608
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1610
log_refresh_stats();
1612
buf_refresh_io_stats();
1614
srv_n_rows_inserted_old = srv_n_rows_inserted;
1615
srv_n_rows_updated_old = srv_n_rows_updated;
1616
srv_n_rows_deleted_old = srv_n_rows_deleted;
1617
srv_n_rows_read_old = srv_n_rows_read;
1619
mutex_exit(&srv_innodb_monitor_mutex);
1622
/**********************************************************************
1623
Outputs to a file the output of the InnoDB Monitor. */
1626
srv_printf_innodb_monitor(
1627
/*======================*/
1628
FILE* file, /* in: output stream */
1629
ulint* trx_start, /* out: file position of the start of
1630
the list of active transactions */
1631
ulint* trx_end) /* out: file position of the end of
1632
the list of active transactions */
1634
double time_elapsed;
1635
time_t current_time;
1638
mutex_enter(&srv_innodb_monitor_mutex);
1640
current_time = time(NULL);
1642
/* We add 0.001 seconds to time_elapsed to prevent division
1643
by zero if two users happen to call SHOW INNODB STATUS at the same
1646
time_elapsed = difftime(current_time, srv_last_monitor_time)
1649
srv_last_monitor_time = time(NULL);
1651
fputs("\n=====================================\n", file);
1653
ut_print_timestamp(file);
1655
" INNODB MONITOR OUTPUT\n"
1656
"=====================================\n"
1657
"Per second averages calculated from the last %lu seconds\n",
1658
(ulong)time_elapsed);
1660
fputs("----------\n"
1662
"----------\n", file);
1665
/* Conceptually, srv_innodb_monitor_mutex has a very high latching
1666
order level in sync0sync.h, while dict_foreign_err_mutex has a very
1667
low level 135. Therefore we can reserve the latter mutex here without
1668
a danger of a deadlock of threads. */
1670
mutex_enter(&dict_foreign_err_mutex);
1672
if (ftell(dict_foreign_err_file) != 0L) {
1673
fputs("------------------------\n"
1674
"LATEST FOREIGN KEY ERROR\n"
1675
"------------------------\n", file);
1676
ut_copy_file(file, dict_foreign_err_file);
1679
mutex_exit(&dict_foreign_err_mutex);
1681
lock_print_info_summary(file);
1683
long t = ftell(file);
1685
*trx_start = ULINT_UNDEFINED;
1687
*trx_start = (ulint) t;
1690
lock_print_info_all_transactions(file);
1692
long t = ftell(file);
1694
*trx_end = ULINT_UNDEFINED;
1696
*trx_end = (ulint) t;
1701
"--------\n", file);
1704
fputs("-------------------------------------\n"
1705
"INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1706
"-------------------------------------\n", file);
1709
ha_print_info(file, btr_search_sys->hash_index);
1712
"%.2f hash searches/s, %.2f non-hash searches/s\n",
1713
(btr_cur_n_sea - btr_cur_n_sea_old)
1715
(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1717
btr_cur_n_sea_old = btr_cur_n_sea;
1718
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1725
fputs("----------------------\n"
1726
"BUFFER POOL AND MEMORY\n"
1727
"----------------------\n", file);
1729
"Total memory allocated " ULINTPF
1730
"; in additional pool allocated " ULINTPF "\n",
1731
ut_total_allocated_memory,
1732
mem_pool_get_reserved(mem_comm_pool));
1733
fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1738
fputs("--------------\n"
1740
"--------------\n", file);
1741
fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1742
(long) srv_conc_n_threads,
1743
(ulong) srv_conc_n_waiting_threads);
1745
fprintf(file, "%lu read views open inside InnoDB\n",
1746
UT_LIST_GET_LEN(trx_sys->view_list));
1748
n_reserved = fil_space_get_n_reserved_extents(0);
1749
if (n_reserved > 0) {
1751
"%lu tablespace extents now reserved for"
1752
" B-tree split operations\n",
1753
(ulong) n_reserved);
1757
fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1758
(ulong) srv_main_thread_process_no,
1759
(ulong) srv_main_thread_id,
1760
srv_main_thread_op_info);
1762
fprintf(file, "Main thread id %lu, state: %s\n",
1763
(ulong) srv_main_thread_id,
1764
srv_main_thread_op_info);
1767
"Number of rows inserted " ULINTPF
1768
", updated " ULINTPF ", deleted " ULINTPF
1769
", read " ULINTPF "\n",
1770
srv_n_rows_inserted,
1775
"%.2f inserts/s, %.2f updates/s,"
1776
" %.2f deletes/s, %.2f reads/s\n",
1777
(srv_n_rows_inserted - srv_n_rows_inserted_old)
1779
(srv_n_rows_updated - srv_n_rows_updated_old)
1781
(srv_n_rows_deleted - srv_n_rows_deleted_old)
1783
(srv_n_rows_read - srv_n_rows_read_old)
1786
srv_n_rows_inserted_old = srv_n_rows_inserted;
1787
srv_n_rows_updated_old = srv_n_rows_updated;
1788
srv_n_rows_deleted_old = srv_n_rows_deleted;
1789
srv_n_rows_read_old = srv_n_rows_read;
1791
fputs("----------------------------\n"
1792
"END OF INNODB MONITOR OUTPUT\n"
1793
"============================\n", file);
1794
mutex_exit(&srv_innodb_monitor_mutex);
1798
/**********************************************************************
1799
Function to pass InnoDB status variables to MySQL */
1802
srv_export_innodb_status(void)
1804
mutex_enter(&srv_innodb_monitor_mutex);
1806
export_vars.innodb_data_pending_reads
1807
= os_n_pending_reads;
1808
export_vars.innodb_data_pending_writes
1809
= os_n_pending_writes;
1810
export_vars.innodb_data_pending_fsyncs
1811
= fil_n_pending_log_flushes
1812
+ fil_n_pending_tablespace_flushes;
1813
export_vars.innodb_data_fsyncs = os_n_fsyncs;
1814
export_vars.innodb_data_read = srv_data_read;
1815
export_vars.innodb_data_reads = os_n_file_reads;
1816
export_vars.innodb_data_writes = os_n_file_writes;
1817
export_vars.innodb_data_written = srv_data_written;
1818
export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets;
1819
export_vars.innodb_buffer_pool_write_requests
1820
= srv_buf_pool_write_requests;
1821
export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
1822
export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
1823
export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
1824
export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd;
1825
export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq;
1826
export_vars.innodb_buffer_pool_pages_data
1827
= UT_LIST_GET_LEN(buf_pool->LRU);
1828
export_vars.innodb_buffer_pool_pages_dirty
1829
= UT_LIST_GET_LEN(buf_pool->flush_list);
1830
export_vars.innodb_buffer_pool_pages_free
1831
= UT_LIST_GET_LEN(buf_pool->free);
1832
export_vars.innodb_buffer_pool_pages_latched
1833
= buf_get_latched_pages_number();
1834
export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size;
1836
export_vars.innodb_buffer_pool_pages_misc = buf_pool->curr_size
1837
- UT_LIST_GET_LEN(buf_pool->LRU)
1838
- UT_LIST_GET_LEN(buf_pool->free);
1839
export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1840
export_vars.innodb_log_waits = srv_log_waits;
1841
export_vars.innodb_os_log_written = srv_os_log_written;
1842
export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1843
export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1844
export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
1845
export_vars.innodb_log_write_requests = srv_log_write_requests;
1846
export_vars.innodb_log_writes = srv_log_writes;
1847
export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
1848
export_vars.innodb_dblwr_writes = srv_dblwr_writes;
1849
export_vars.innodb_pages_created = buf_pool->n_pages_created;
1850
export_vars.innodb_pages_read = buf_pool->n_pages_read;
1851
export_vars.innodb_pages_written = buf_pool->n_pages_written;
1852
export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
1853
export_vars.innodb_row_lock_current_waits
1854
= srv_n_lock_wait_current_count;
1855
export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
1856
if (srv_n_lock_wait_count > 0) {
1857
export_vars.innodb_row_lock_time_avg = (ulint)
1858
(srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
1860
export_vars.innodb_row_lock_time_avg = 0;
1862
export_vars.innodb_row_lock_time_max
1863
= srv_n_lock_max_wait_time / 1000;
1864
export_vars.innodb_rows_read = srv_n_rows_read;
1865
export_vars.innodb_rows_inserted = srv_n_rows_inserted;
1866
export_vars.innodb_rows_updated = srv_n_rows_updated;
1867
export_vars.innodb_rows_deleted = srv_n_rows_deleted;
1869
mutex_exit(&srv_innodb_monitor_mutex);
1872
/*************************************************************************
1873
A thread which wakes up threads whose lock wait may have lasted too long.
1874
This also prints the info output by various InnoDB monitors. */
1877
srv_lock_timeout_and_monitor_thread(
1878
/*================================*/
1879
/* out: a dummy parameter */
1880
void* arg __attribute__((unused)))
1881
/* in: a dummy parameter required by
1885
double time_elapsed;
1886
time_t current_time;
1887
time_t last_table_monitor_time;
1888
time_t last_tablespace_monitor_time;
1889
time_t last_monitor_time;
1894
#ifdef UNIV_DEBUG_THREAD_CREATION
1895
fprintf(stderr, "Lock timeout thread starts, id %lu\n",
1896
os_thread_pf(os_thread_get_curr_id()));
1899
srv_last_monitor_time = time(NULL);
1900
last_table_monitor_time = time(NULL);
1901
last_tablespace_monitor_time = time(NULL);
1902
last_monitor_time = time(NULL);
1904
srv_lock_timeout_and_monitor_active = TRUE;
1906
/* When someone is waiting for a lock, we wake up every second
1907
and check if a timeout has passed for a lock wait */
1909
os_thread_sleep(1000000);
1911
current_time = time(NULL);
1913
time_elapsed = difftime(current_time, last_monitor_time);
1915
if (time_elapsed > 15) {
1916
last_monitor_time = time(NULL);
1918
if (srv_print_innodb_monitor) {
1919
srv_printf_innodb_monitor(stderr, NULL, NULL);
1922
if (srv_innodb_status) {
1923
mutex_enter(&srv_monitor_file_mutex);
1924
rewind(srv_monitor_file);
1925
srv_printf_innodb_monitor(srv_monitor_file, NULL,
1927
os_file_set_eof(srv_monitor_file);
1928
mutex_exit(&srv_monitor_file_mutex);
1931
if (srv_print_innodb_tablespace_monitor
1932
&& difftime(current_time,
1933
last_tablespace_monitor_time) > 60) {
1934
last_tablespace_monitor_time = time(NULL);
1936
fputs("========================"
1937
"========================\n",
1940
ut_print_timestamp(stderr);
1942
fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
1943
"========================"
1944
"========================\n",
1948
fputs("Validating tablespace\n", stderr);
1950
fputs("Validation ok\n"
1951
"---------------------------------------\n"
1952
"END OF INNODB TABLESPACE MONITOR OUTPUT\n"
1953
"=======================================\n",
1957
if (srv_print_innodb_table_monitor
1958
&& difftime(current_time, last_table_monitor_time) > 60) {
1960
last_table_monitor_time = time(NULL);
1962
fputs("===========================================\n",
1965
ut_print_timestamp(stderr);
1967
fputs(" INNODB TABLE MONITOR OUTPUT\n"
1968
"===========================================\n",
1972
fputs("-----------------------------------\n"
1973
"END OF INNODB TABLE MONITOR OUTPUT\n"
1974
"==================================\n",
1979
mutex_enter(&kernel_mutex);
1983
/* Check of all slots if a thread is waiting there, and if it
1984
has exceeded the time limit */
1986
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1988
slot = srv_mysql_table + i;
1993
wait_time = ut_difftime(ut_time(), slot->suspend_time);
1995
if (srv_lock_wait_timeout < 100000000
1996
&& (wait_time > (double) srv_lock_wait_timeout
1997
|| wait_time < 0)) {
1999
/* Timeout exceeded or a wrap-around in system
2000
time counter: cancel the lock request queued
2001
by the transaction and release possible
2002
other transactions waiting behind; it is
2003
possible that the lock has already been
2004
granted: in that case do nothing */
2006
if (thr_get_trx(slot->thr)->wait_lock) {
2007
lock_cancel_waiting_and_release(
2008
thr_get_trx(slot->thr)
2015
os_event_reset(srv_lock_timeout_thread_event);
2017
mutex_exit(&kernel_mutex);
2019
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2023
if (some_waits || srv_print_innodb_monitor
2024
|| srv_print_innodb_lock_monitor
2025
|| srv_print_innodb_tablespace_monitor
2026
|| srv_print_innodb_table_monitor) {
2030
/* No one was waiting for a lock and no monitor was active:
2031
suspend this thread */
2033
srv_lock_timeout_and_monitor_active = FALSE;
2036
/* The following synchronisation is disabled, since
2037
the InnoDB monitor output is to be updated every 15 seconds. */
2038
os_event_wait(srv_lock_timeout_thread_event);
2043
srv_lock_timeout_and_monitor_active = FALSE;
2045
/* We count the number of threads in os_thread_exit(). A created
2046
thread should always use that to exit and not use return() to exit. */
2048
os_thread_exit(NULL);
2050
OS_THREAD_DUMMY_RETURN;
2053
/*************************************************************************
2054
A thread which prints warnings about semaphore waits which have lasted
2055
too long. These can be used to track bugs which cause hangs. */
2058
srv_error_monitor_thread(
2059
/*=====================*/
2060
/* out: a dummy parameter */
2061
void* arg __attribute__((unused)))
2062
/* in: a dummy parameter required by
2065
/* number of successive fatal timeouts observed */
2066
ulint fatal_cnt = 0;
2067
ib_uint64_t old_lsn;
2068
ib_uint64_t new_lsn;
2070
old_lsn = srv_start_lsn;
2072
#ifdef UNIV_DEBUG_THREAD_CREATION
2073
fprintf(stderr, "Error monitor thread starts, id %lu\n",
2074
os_thread_pf(os_thread_get_curr_id()));
2077
srv_error_monitor_active = TRUE;
2079
/* Try to track a strange bug reported by Harald Fuchs and others,
2080
where the lsn seems to decrease at times */
2082
new_lsn = log_get_lsn();
2084
if (new_lsn < old_lsn) {
2085
ut_print_timestamp(stderr);
2087
" InnoDB: Error: old log sequence number %"PRIu64""
2089
"InnoDB: than the new log sequence number %"PRIu64"!\n"
2090
"InnoDB: Please submit a bug report"
2091
" to http://bugs.mysql.com\n",
2097
if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2098
/* We referesh InnoDB Monitor values so that averages are
2099
printed from at most 60 last seconds */
2101
srv_refresh_innodb_monitor_stats();
2104
/* Update the statistics collected for deciding LRU
2106
buf_LRU_stat_update();
2108
/* In case mutex_exit is not a memory barrier, it is
2109
theoretically possible some threads are left waiting though
2110
the semaphore is already released. Wake up those threads: */
2112
sync_arr_wake_threads_if_sema_free();
2114
if (sync_array_print_long_waits()) {
2116
if (fatal_cnt > 10) {
2119
"InnoDB: Error: semaphore wait has lasted"
2121
"InnoDB: We intentionally crash the server,"
2122
" because it appears to be hung.\n",
2123
(ulong) srv_fatal_semaphore_wait_threshold);
2131
/* Flush stderr so that a database user gets the output
2132
to possible MySQL error file */
2136
os_thread_sleep(1000000);
2138
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2143
srv_error_monitor_active = FALSE;
2145
/* We count the number of threads in os_thread_exit(). A created
2146
thread should always use that to exit and not use return() to exit. */
2148
os_thread_exit(NULL);
2150
OS_THREAD_DUMMY_RETURN;
2153
/***********************************************************************
2154
Tells the InnoDB server that there has been activity in the database
2155
and wakes up the master thread if it is suspended (not sleeping). Used
2156
in the MySQL interface. Note that there is a small chance that the master
2157
thread stays suspended (we do not protect our operation with the kernel
2158
mutex, for performace reasons). */
2161
srv_active_wake_master_thread(void)
2162
/*===============================*/
2164
srv_activity_count++;
2166
if (srv_n_threads_active[SRV_MASTER] == 0) {
2168
mutex_enter(&kernel_mutex);
2170
srv_release_threads(SRV_MASTER, 1);
2172
mutex_exit(&kernel_mutex);
2176
/***********************************************************************
2177
Wakes up the master thread if it is suspended or being suspended. */
2180
srv_wake_master_thread(void)
2181
/*========================*/
2183
srv_activity_count++;
2185
mutex_enter(&kernel_mutex);
2187
srv_release_threads(SRV_MASTER, 1);
2189
mutex_exit(&kernel_mutex);
2192
/*************************************************************************
2193
The master thread controlling the server. */
2198
/* out: a dummy parameter */
2199
void* arg __attribute__((unused)))
2200
/* in: a dummy parameter required by
2204
time_t last_flush_time;
2205
time_t current_time;
2206
ulint old_activity_count;
2207
ulint n_pages_purged = 0;
2208
ulint n_bytes_merged;
2209
ulint n_pages_flushed;
2210
ulint n_bytes_archived;
2211
ulint n_tables_to_drop;
2214
ulint n_ios_very_old;
2216
ibool skip_sleep = FALSE;
2219
#ifdef UNIV_DEBUG_THREAD_CREATION
2220
fprintf(stderr, "Master thread starts, id %lu\n",
2221
os_thread_pf(os_thread_get_curr_id()));
2223
srv_main_thread_process_no = os_proc_get_number();
2224
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2226
srv_table_reserve_slot(SRV_MASTER);
2228
mutex_enter(&kernel_mutex);
2230
srv_n_threads_active[SRV_MASTER]++;
2232
mutex_exit(&kernel_mutex);
2235
/*****************************************************************/
2236
/* ---- When there is database activity by users, we cycle in this
2239
srv_main_thread_op_info = "reserving kernel mutex";
2241
n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read
2242
+ buf_pool->n_pages_written;
2243
mutex_enter(&kernel_mutex);
2245
/* Store the user activity counter at the start of this loop */
2246
old_activity_count = srv_activity_count;
2248
mutex_exit(&kernel_mutex);
2250
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2252
goto suspend_thread;
2255
/* ---- We run the following loop approximately once per second
2256
when there is database activity */
2260
for (i = 0; i < 10; i++) {
2261
n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
2262
+ buf_pool->n_pages_written;
2263
srv_main_thread_op_info = "sleeping";
2267
os_thread_sleep(1000000);
2272
/* ALTER TABLE in MySQL requires on Unix that the table handler
2273
can drop tables lazily after there no longer are SELECT
2276
srv_main_thread_op_info = "doing background drop tables";
2278
row_drop_tables_for_mysql_in_background();
2280
srv_main_thread_op_info = "";
2282
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2284
goto background_loop;
2287
/* We flush the log once in a second even if no commit
2288
is issued or the we have specified in my.cnf no flush
2289
at transaction commit */
2291
srv_main_thread_op_info = "flushing log";
2292
log_buffer_flush_to_disk();
2294
srv_main_thread_op_info = "making checkpoint";
2297
/* If there were less than 5 i/os during the
2298
one second sleep, we assume that there is free
2299
disk i/o capacity available, and it makes sense to
2300
do an insert buffer merge. */
2302
n_pend_ios = buf_get_n_pending_ios()
2303
+ log_sys->n_pending_writes;
2304
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
2305
+ buf_pool->n_pages_written;
2306
if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
2307
srv_main_thread_op_info = "doing insert buffer merge";
2308
ibuf_contract_for_n_pages(
2309
TRUE, srv_insert_buffer_batch_size / 4);
2311
srv_main_thread_op_info = "flushing log";
2313
log_buffer_flush_to_disk();
2316
if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
2317
> srv_max_buf_pool_modified_pct)) {
2319
/* Try to keep the number of modified pages in the
2320
buffer pool under the limit wished by the user */
2322
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
2325
/* If we had to do the flush, it may have taken
2326
even more than 1 second, and also, there may be more
2327
to flush. Do not sleep 1 second during the next
2328
iteration of this loop. */
2333
if (srv_activity_count == old_activity_count) {
2335
/* There is no user activity at the moment, go to
2336
the background loop */
2338
goto background_loop;
2342
/* ---- We perform the following code approximately once per
2343
10 seconds when there is database activity */
2345
#ifdef MEM_PERIODIC_CHECK
2346
/* Check magic numbers of every allocated mem block once in 10
2348
mem_validate_all_blocks();
2350
/* If there were less than 200 i/os during the 10 second period,
2351
we assume that there is free disk i/o capacity available, and it
2352
makes sense to flush 100 pages. */
2354
n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
2355
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
2356
+ buf_pool->n_pages_written;
2357
if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
2359
srv_main_thread_op_info = "flushing buffer pool pages";
2360
buf_flush_batch(BUF_FLUSH_LIST, 100, IB_ULONGLONG_MAX);
2362
srv_main_thread_op_info = "flushing log";
2363
log_buffer_flush_to_disk();
2366
/* We run a batch of insert buffer merge every 10 seconds,
2367
even if the server were active */
2369
srv_main_thread_op_info = "doing insert buffer merge";
2370
ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
2372
srv_main_thread_op_info = "flushing log";
2373
log_buffer_flush_to_disk();
2375
/* We run a full purge every 10 seconds, even if the server
2378
last_flush_time = time(NULL);
2382
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2384
goto background_loop;
2387
srv_main_thread_op_info = "purging";
2388
n_pages_purged = trx_purge();
2390
current_time = time(NULL);
2392
if (difftime(current_time, last_flush_time) > 1) {
2393
srv_main_thread_op_info = "flushing log";
2395
log_buffer_flush_to_disk();
2396
last_flush_time = current_time;
2398
} while (n_pages_purged);
2400
srv_main_thread_op_info = "flushing buffer pool pages";
2402
/* Flush a few oldest pages to make a new checkpoint younger */
2404
if (buf_get_modified_ratio_pct() > 70) {
2406
/* If there are lots of modified pages in the buffer pool
2407
(> 70 %), we assume we can afford reserving the disk(s) for
2408
the time it requires to flush 100 pages */
2410
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
2413
/* Otherwise, we only flush a small number of pages so that
2414
we do not unnecessarily use much disk i/o capacity from
2417
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
2421
srv_main_thread_op_info = "making checkpoint";
2423
/* Make a new checkpoint about once in 10 seconds */
2425
log_checkpoint(TRUE, FALSE);
2427
srv_main_thread_op_info = "reserving kernel mutex";
2429
mutex_enter(&kernel_mutex);
2431
/* ---- When there is database activity, we jump from here back to
2432
the start of loop */
2434
if (srv_activity_count != old_activity_count) {
2435
mutex_exit(&kernel_mutex);
2439
mutex_exit(&kernel_mutex);
2441
/* If the database is quiet, we enter the background loop */
2443
/*****************************************************************/
2445
/* ---- In this loop we run background operations when the server
2446
is quiet from user activity. Also in the case of a shutdown, we
2447
loop here, flushing the buffer pool to the data files. */
2449
/* The server has been quiet for a while: start running background
2452
srv_main_thread_op_info = "doing background drop tables";
2454
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2456
if (n_tables_to_drop > 0) {
2457
/* Do not monopolize the CPU even if there are tables waiting
2458
in the background drop queue. (It is essentially a bug if
2459
MySQL tries to drop a table while there are still open handles
2460
to it and we had to put it to the background drop queue.) */
2462
os_thread_sleep(100000);
2465
srv_main_thread_op_info = "purging";
2467
/* Run a full purge */
2469
last_flush_time = time(NULL);
2472
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2477
srv_main_thread_op_info = "purging";
2478
n_pages_purged = trx_purge();
2480
current_time = time(NULL);
2482
if (difftime(current_time, last_flush_time) > 1) {
2483
srv_main_thread_op_info = "flushing log";
2485
log_buffer_flush_to_disk();
2486
last_flush_time = current_time;
2488
} while (n_pages_purged);
2490
srv_main_thread_op_info = "reserving kernel mutex";
2492
mutex_enter(&kernel_mutex);
2493
if (srv_activity_count != old_activity_count) {
2494
mutex_exit(&kernel_mutex);
2497
mutex_exit(&kernel_mutex);
2499
srv_main_thread_op_info = "doing insert buffer merge";
2501
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2504
n_bytes_merged = ibuf_contract_for_n_pages(
2505
TRUE, srv_insert_buffer_batch_size);
2508
srv_main_thread_op_info = "reserving kernel mutex";
2510
mutex_enter(&kernel_mutex);
2511
if (srv_activity_count != old_activity_count) {
2512
mutex_exit(&kernel_mutex);
2515
mutex_exit(&kernel_mutex);
2518
srv_main_thread_op_info = "flushing buffer pool pages";
2520
if (srv_fast_shutdown < 2) {
2521
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
2524
/* In the fastest shutdown we do not flush the buffer pool
2525
to data files: we set n_pages_flushed to 0 artificially. */
2527
n_pages_flushed = 0;
2530
srv_main_thread_op_info = "reserving kernel mutex";
2532
mutex_enter(&kernel_mutex);
2533
if (srv_activity_count != old_activity_count) {
2534
mutex_exit(&kernel_mutex);
2537
mutex_exit(&kernel_mutex);
2539
srv_main_thread_op_info = "waiting for buffer pool flush to end";
2540
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
2542
srv_main_thread_op_info = "flushing log";
2544
log_buffer_flush_to_disk();
2546
srv_main_thread_op_info = "making checkpoint";
2548
log_checkpoint(TRUE, FALSE);
2550
if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
2552
/* Try to keep the number of modified pages in the
2553
buffer pool under the limit wished by the user */
2558
srv_main_thread_op_info = "reserving kernel mutex";
2560
mutex_enter(&kernel_mutex);
2561
if (srv_activity_count != old_activity_count) {
2562
mutex_exit(&kernel_mutex);
2565
mutex_exit(&kernel_mutex);
2567
srv_main_thread_op_info = "archiving log (if log archive is on)";
2569
log_archive_do(FALSE, &n_bytes_archived);
2571
n_bytes_archived = 0;
2573
/* Keep looping in the background loop if still work to do */
2575
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2576
if (n_tables_to_drop + n_pages_flushed
2577
+ n_bytes_archived != 0) {
2579
/* If we are doing a fast shutdown (= the default)
2580
we do not do purge or insert buffer merge. But we
2581
flush the buffer pool completely to disk.
2582
In a 'very fast' shutdown we do not flush the buffer
2583
pool to data files: we have set n_pages_flushed to
2586
goto background_loop;
2588
} else if (n_tables_to_drop
2589
+ n_pages_purged + n_bytes_merged + n_pages_flushed
2590
+ n_bytes_archived != 0) {
2591
/* In a 'slow' shutdown we run purge and the insert buffer
2592
merge to completion */
2594
goto background_loop;
2597
/* There is no work for background operations either: suspend
2598
master thread to wait for more server activity */
2601
srv_main_thread_op_info = "suspending";
2603
mutex_enter(&kernel_mutex);
2605
if (row_get_background_drop_list_len_low() > 0) {
2606
mutex_exit(&kernel_mutex);
2611
event = srv_suspend_thread();
2613
mutex_exit(&kernel_mutex);
2615
/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2616
waits for database activity to die down when converting < 4.1.x
2617
databases, and relies on this string being exactly as it is. InnoDB
2618
manual also mentions this string in several places. */
2619
srv_main_thread_op_info = "waiting for server activity";
2621
os_event_wait(event);
2623
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
2624
/* This is only extra safety, the thread should exit
2625
already when the event wait ends */
2627
os_thread_exit(NULL);
2630
/* When there is user activity, InnoDB will set the event and the
2631
main thread goes back to loop. */
2635
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
2637
#endif /* !UNIV_HOTBACKUP */