~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/******************************************************
2
The database server main program
3
4
NOTE: SQL Server 7 uses something which the documentation
5
calls user mode scheduled threads (UMS threads). One such
6
thread is usually allocated per processor. Win32
7
documentation does not know any UMS threads, which suggests
8
that the concept is internal to SQL Server 7. It may mean that
9
SQL Server 7 does all the scheduling of threads itself, even
10
in i/o waits. We should maybe modify InnoDB to use the same
11
technique, because thread switches within NT may be too slow.
12
13
SQL Server 7 also mentions fibers, which are cooperatively
14
scheduled threads. They can boost performance by 5 %,
15
according to the Delaney and Soukup's book.
16
17
Windows 2000 will have something called thread pooling
18
(see msdn website), which we could possibly use.
19
20
Another possibility could be to use some very fast user space
21
thread library. This might confuse NT though.
22
23
(c) 1995 Innobase Oy
24
25
Created 10/8/1995 Heikki Tuuri
26
*******************************************************/
27
/* Dummy comment */
28
#include "srv0srv.h"
29
30
#include "ut0mem.h"
31
#include "os0proc.h"
32
#include "mem0mem.h"
33
#include "mem0pool.h"
34
#include "sync0sync.h"
35
#include "thr0loc.h"
36
#include "que0que.h"
37
#include "srv0que.h"
38
#include "log0recv.h"
39
#include "pars0pars.h"
40
#include "usr0sess.h"
41
#include "lock0lock.h"
42
#include "trx0purge.h"
43
#include "ibuf0ibuf.h"
44
#include "buf0flu.h"
45
#include "btr0sea.h"
46
#include "dict0load.h"
47
#include "dict0boot.h"
48
#include "srv0start.h"
49
#include "row0mysql.h"
50
#include "ha_prototypes.h"
51
52
/* This is set to TRUE if the MySQL user has set it in MySQL; currently
53
affects only FOREIGN KEY definition parsing */
54
ibool	srv_lower_case_table_names	= FALSE;
55
56
/* The following counter is incremented whenever there is some user activity
57
in the server */
58
ulint	srv_activity_count	= 0;
59
60
/* The following is the maximum allowed duration of a lock wait. */
61
ulint	srv_fatal_semaphore_wait_threshold = 600;
62
63
/* How much data manipulation language (DML) statements need to be delayed,
64
in microseconds, in order to reduce the lagging of the purge thread. */
65
ulint	srv_dml_needed_delay = 0;
66
67
ibool	srv_lock_timeout_and_monitor_active = FALSE;
68
ibool	srv_error_monitor_active = FALSE;
69
70
const char*	srv_main_thread_op_info = "";
71
72
/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
73
const char	srv_mysql50_table_name_prefix[9] = "#mysql50#";
74
75
/* Server parameters which are read from the initfile */
76
77
/* The following three are dir paths which are catenated before file
78
names, where the file name itself may also contain a path */
79
80
char*	srv_data_home	= NULL;
81
#ifdef UNIV_LOG_ARCHIVE
82
char*	srv_arch_dir	= NULL;
83
#endif /* UNIV_LOG_ARCHIVE */
84
85
ibool	srv_file_per_table = FALSE;	/* store to its own file each table
86
					created by an user; data dictionary
87
					tables are in the system tablespace
88
					0 */
89
ibool	srv_locks_unsafe_for_binlog = FALSE;	/* Place locks to
90
						records only i.e. do
91
						not use next-key
92
						locking except on
93
						duplicate key checking
94
						and foreign key
95
						checking */
96
ulint	srv_n_data_files = 0;
97
char**	srv_data_file_names = NULL;
98
ulint*	srv_data_file_sizes = NULL;	/* size in database pages */
99
100
ibool	srv_auto_extend_last_data_file	= FALSE; /* if TRUE, then we
101
						 auto-extend the last data
102
						 file */
103
ulint	srv_last_file_size_max	= 0;		 /* if != 0, this tells
104
						 the max size auto-extending
105
						 may increase the last data
106
						 file size */
107
ulong	srv_auto_extend_increment = 8;		 /* If the last data file is
108
						 auto-extended, we add this
109
						 many pages to it at a time */
110
ulint*	srv_data_file_is_raw_partition = NULL;
111
112
/* If the following is TRUE we do not allow inserts etc. This protects
113
the user from forgetting the 'newraw' keyword to my.cnf */
114
115
ibool	srv_created_new_raw	= FALSE;
116
117
char**	srv_log_group_home_dirs = NULL;
118
119
ulint	srv_n_log_groups	= ULINT_MAX;
120
ulint	srv_n_log_files		= ULINT_MAX;
121
ulint	srv_log_file_size	= ULINT_MAX;	/* size in database pages */
122
ulint	srv_log_buffer_size	= ULINT_MAX;	/* size in database pages */
123
ulong	srv_flush_log_at_trx_commit = 1;
124
125
byte	srv_latin1_ordering[256]	/* The sort order table of the latin1
126
					character set. The following table is
127
					the MySQL order as of Feb 10th, 2002 */
128
= {
129
  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
130
, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
131
, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
132
, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
133
, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
134
, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
135
, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
136
, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
137
, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
138
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
139
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
140
, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
141
, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
142
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
143
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
144
, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
145
, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
146
, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
147
, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
148
, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
149
, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
150
, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
151
, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
152
, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
153
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
154
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
155
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
156
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
157
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
158
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
159
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
160
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
161
};
162
163
ulint	srv_pool_size		= ULINT_MAX;	/* size in pages; MySQL inits
164
						this to size in kilobytes but
165
						we normalize this to pages in
166
						srv_boot() */
167
ulint	srv_awe_window_size	= 0;		/* size in pages; MySQL inits
168
						this to bytes, but we
169
						normalize it to pages in
170
						srv_boot() */
171
ulint	srv_mem_pool_size	= ULINT_MAX;	/* size in bytes */
172
ulint	srv_lock_table_size	= ULINT_MAX;
173
174
ulint	srv_n_file_io_threads	= ULINT_MAX;
175
176
#ifdef UNIV_LOG_ARCHIVE
177
ibool	srv_log_archive_on	= FALSE;
178
ibool	srv_archive_recovery	= 0;
179
dulint	srv_archive_recovery_limit_lsn;
180
#endif /* UNIV_LOG_ARCHIVE */
181
182
ulint	srv_lock_wait_timeout	= 1024 * 1024 * 1024;
183
184
/* This parameter is used to throttle the number of insert buffers that are
185
merged in a batch. By increasing this parameter on a faster disk you can
186
possibly reduce the number of I/O operations performed to complete the
187
merge operation. The value of this parameter is used as is by the
188
background loop when the system is idle (low load), on a busy system
189
the parameter is scaled down by a factor of 4, this is to avoid putting
190
a heavier load on the I/O sub system. */
191
192
ulong	srv_insert_buffer_batch_size = 20;
193
194
char*	srv_file_flush_method_str = NULL;
195
ulint	srv_unix_file_flush_method = SRV_UNIX_FSYNC;
196
ulint	srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
197
198
ulint	srv_max_n_open_files	  = 300;
199
200
/* The InnoDB main thread tries to keep the ratio of modified pages
201
in the buffer pool to all database pages in the buffer pool smaller than
202
the following number. But it is not guaranteed that the value stays below
203
that during a time of heavy update/insert activity. */
204
205
ulong	srv_max_buf_pool_modified_pct	= 90;
206
207
/* variable counts amount of data read in total (in bytes) */
208
ulint srv_data_read = 0;
209
210
/* here we count the amount of data written in total (in bytes) */
211
ulint srv_data_written = 0;
212
213
/* the number of the log write requests done */
214
ulint srv_log_write_requests = 0;
215
216
/* the number of physical writes to the log performed */
217
ulint srv_log_writes = 0;
218
219
/* amount of data written to the log files in bytes */
220
ulint srv_os_log_written = 0;
221
222
/* amount of writes being done to the log files */
223
ulint srv_os_log_pending_writes = 0;
224
225
/* we increase this counter, when there we don't have enough space in the
226
log buffer and have to flush it */
227
ulint srv_log_waits = 0;
228
229
/* this variable counts the amount of times, when the doublewrite buffer
230
was flushed */
231
ulint srv_dblwr_writes = 0;
232
233
/* here we store the number of pages that have been flushed to the
234
doublewrite buffer */
235
ulint srv_dblwr_pages_written = 0;
236
237
/* in this variable we store the number of write requests issued */
238
ulint srv_buf_pool_write_requests = 0;
239
240
/* here we store the number of times when we had to wait for a free page
241
in the buffer pool. It happens when the buffer pool is full and we need
242
to make a flush, in order to be able to read or create a page. */
243
ulint srv_buf_pool_wait_free = 0;
244
245
/* variable to count the number of pages that were written from buffer
246
pool to the disk */
247
ulint srv_buf_pool_flushed = 0;
248
249
/* variable to count the number of buffer pool reads that led to the
250
reading of a disk page */
251
ulint srv_buf_pool_reads = 0;
252
253
/* variable to count the number of sequential read-aheads */
254
ulint srv_read_ahead_seq = 0;
255
256
/* variable to count the number of random read-aheads */
257
ulint srv_read_ahead_rnd = 0;
258
259
/* structure to pass status variables to MySQL */
260
export_struc export_vars;
261
262
/* If the following is != 0 we do not allow inserts etc. This protects
263
the user from forgetting the innodb_force_recovery keyword to my.cnf */
264
265
ulint	srv_force_recovery	= 0;
266
/*-----------------------*/
267
/* We are prepared for a situation that we have this many threads waiting for
268
a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
269
value. */
270
271
ulint	srv_max_n_threads	= 0;
272
273
/* The following controls how many threads we let inside InnoDB concurrently:
274
threads waiting for locks are not counted into the number because otherwise
275
we could get a deadlock. MySQL creates a thread for each user session, and
276
semaphore contention and convoy problems can occur withput this restriction.
277
Value 10 should be good if there are less than 4 processors + 4 disks in the
278
computer. Bigger computers need bigger values. Value 0 will disable the
279
concurrency check. */
280
281
ulong	srv_thread_concurrency	= 0;
282
ulong	srv_commit_concurrency	= 0;
283
284
os_fast_mutex_t	srv_conc_mutex;		/* this mutex protects srv_conc data
285
					structures */
286
lint	srv_conc_n_threads	= 0;	/* number of OS threads currently
287
					inside InnoDB; it is not an error
288
					if this drops temporarily below zero
289
					because we do not demand that every
290
					thread increments this, but a thread
291
					waiting for a lock decrements this
292
					temporarily */
293
ulint	srv_conc_n_waiting_threads = 0;	/* number of OS threads waiting in the
294
					FIFO for a permission to enter InnoDB
295
					*/
296
297
typedef struct srv_conc_slot_struct	srv_conc_slot_t;
298
struct srv_conc_slot_struct{
299
	os_event_t			event;		/* event to wait */
300
	ibool				reserved;	/* TRUE if slot
301
							reserved */
302
	ibool				wait_ended;	/* TRUE when another
303
							thread has already set
304
							the event and the
305
							thread in this slot is
306
							free to proceed; but
307
							reserved may still be
308
							TRUE at that point */
309
	UT_LIST_NODE_T(srv_conc_slot_t)	srv_conc_queue;	/* queue node */
310
};
311
312
UT_LIST_BASE_NODE_T(srv_conc_slot_t)	srv_conc_queue;	/* queue of threads
313
							waiting to get in */
314
srv_conc_slot_t* srv_conc_slots;			/* array of wait
315
							slots */
316
317
/* Number of times a thread is allowed to enter InnoDB within the same
318
SQL query after it has once got the ticket at srv_conc_enter_innodb */
319
#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
320
#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
321
/*-----------------------*/
322
/* If the following is set to 1 then we do not run purge and insert buffer
323
merge to completion before shutdown. If it is set to 2, do not even flush the
324
buffer pool to data files at the shutdown: we effectively 'crash'
325
InnoDB (but lose no committed transactions). */
326
ulint	srv_fast_shutdown	= 0;
327
328
/* Generate a innodb_status.<pid> file */
329
ibool	srv_innodb_status	= FALSE;
330
331
ibool	srv_stats_on_metadata	= TRUE;
332
333
ibool	srv_use_doublewrite_buf	= TRUE;
334
ibool	srv_use_checksums = TRUE;
335
336
ibool	srv_set_thread_priorities = TRUE;
337
int	srv_query_thread_priority = 0;
338
339
/* TRUE if the Address Windowing Extensions of Windows are used; then we must
340
disable adaptive hash indexes */
341
ibool	srv_use_awe			= FALSE;
342
ibool	srv_use_adaptive_hash_indexes	= TRUE;
343
344
/*-------------------------------------------*/
345
ulong	srv_n_spin_wait_rounds	= 20;
346
ulong	srv_n_free_tickets_to_enter = 500;
347
ulong	srv_thread_sleep_delay = 10000;
348
ulint	srv_spin_wait_delay	= 5;
349
ibool	srv_priority_boost	= TRUE;
350
351
ibool	srv_print_thread_releases	= FALSE;
352
ibool	srv_print_lock_waits		= FALSE;
353
ibool	srv_print_buf_io		= FALSE;
354
ibool	srv_print_log_io		= FALSE;
355
ibool	srv_print_latch_waits		= FALSE;
356
357
ulint		srv_n_rows_inserted		= 0;
358
ulint		srv_n_rows_updated		= 0;
359
ulint		srv_n_rows_deleted		= 0;
360
ulint		srv_n_rows_read			= 0;
361
#ifndef UNIV_HOTBACKUP
362
static ulint	srv_n_rows_inserted_old		= 0;
363
static ulint	srv_n_rows_updated_old		= 0;
364
static ulint	srv_n_rows_deleted_old		= 0;
365
static ulint	srv_n_rows_read_old		= 0;
366
#endif /* !UNIV_HOTBACKUP */
367
368
ulint		srv_n_lock_wait_count		= 0;
369
ulint		srv_n_lock_wait_current_count	= 0;
370
ib_longlong	srv_n_lock_wait_time		= 0;
371
ulint		srv_n_lock_max_wait_time	= 0;
372
373
374
/*
375
  Set the following to 0 if you want InnoDB to write messages on
376
  stderr on startup/shutdown
377
*/
378
ibool	srv_print_verbose_log		= TRUE;
379
ibool	srv_print_innodb_monitor	= FALSE;
380
ibool	srv_print_innodb_lock_monitor	= FALSE;
381
ibool	srv_print_innodb_tablespace_monitor = FALSE;
382
ibool	srv_print_innodb_table_monitor = FALSE;
383
384
/* Array of English strings describing the current state of an
385
i/o handler thread */
386
387
const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
388
const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
389
390
time_t	srv_last_monitor_time;
391
392
mutex_t	srv_innodb_monitor_mutex;
393
394
/* Mutex for locking srv_monitor_file */
395
mutex_t	srv_monitor_file_mutex;
396
/* Temporary file for innodb monitor output */
397
FILE*	srv_monitor_file;
398
/* Mutex for locking srv_dict_tmpfile.
399
This mutex has a very high rank; threads reserving it should not
400
be holding any InnoDB latches. */
401
mutex_t	srv_dict_tmpfile_mutex;
402
/* Temporary file for output from the data dictionary */
403
FILE*	srv_dict_tmpfile;
404
/* Mutex for locking srv_misc_tmpfile.
405
This mutex has a very low rank; threads reserving it should not
406
acquire any further latches or sleep before releasing this one. */
407
mutex_t	srv_misc_tmpfile_mutex;
408
/* Temporary file for miscellanous diagnostic output */
409
FILE*	srv_misc_tmpfile;
410
411
ulint	srv_main_thread_process_no	= 0;
412
ulint	srv_main_thread_id		= 0;
413
414
/*
415
	IMPLEMENTATION OF THE SERVER MAIN PROGRAM
416
	=========================================
417
418
There is the following analogue between this database
419
server and an operating system kernel:
420
421
DB concept			equivalent OS concept
422
----------			---------------------
423
transaction		--	process;
424
425
query thread		--	thread;
426
427
lock			--	semaphore;
428
429
transaction set to
430
the rollback state	--	kill signal delivered to a process;
431
432
kernel			--	kernel;
433
434
query thread execution:
435
(a) without kernel mutex
436
reserved		--	process executing in user mode;
437
(b) with kernel mutex reserved
438
			--	process executing in kernel mode;
439
440
The server is controlled by a master thread which runs at
441
a priority higher than normal, that is, higher than user threads.
442
It sleeps most of the time, and wakes up, say, every 300 milliseconds,
443
to check whether there is anything happening in the server which
444
requires intervention of the master thread. Such situations may be,
445
for example, when flushing of dirty blocks is needed in the buffer
446
pool or old version of database rows have to be cleaned away.
447
448
The threads which we call user threads serve the queries of
449
the clients and input from the console of the server.
450
They run at normal priority. The server may have several
451
communications endpoints. A dedicated set of user threads waits
452
at each of these endpoints ready to receive a client request.
453
Each request is taken by a single user thread, which then starts
454
processing and, when the result is ready, sends it to the client
455
and returns to wait at the same endpoint the thread started from.
456
457
So, we do not have dedicated communication threads listening at
458
the endpoints and dealing the jobs to dedicated worker threads.
459
Our architecture saves one thread swithch per request, compared
460
to the solution with dedicated communication threads
461
which amounts to 15 microseconds on 100 MHz Pentium
462
running NT. If the client
463
is communicating over a network, this saving is negligible, but
464
if the client resides in the same machine, maybe in an SMP machine
465
on a different processor from the server thread, the saving
466
can be important as the threads can communicate over shared
467
memory with an overhead of a few microseconds.
468
469
We may later implement a dedicated communication thread solution
470
for those endpoints which communicate over a network.
471
472
Our solution with user threads has two problems: for each endpoint
473
there has to be a number of listening threads. If there are many
474
communication endpoints, it may be difficult to set the right number
475
of concurrent threads in the system, as many of the threads
476
may always be waiting at less busy endpoints. Another problem
477
is queuing of the messages, as the server internally does not
478
offer any queue for jobs.
479
480
Another group of user threads is intended for splitting the
481
queries and processing them in parallel. Let us call these
482
parallel communication threads. These threads are waiting for
483
parallelized tasks, suspended on event semaphores.
484
485
A single user thread waits for input from the console,
486
like a command to shut the database.
487
488
Utility threads are a different group of threads which takes
489
care of the buffer pool flushing and other, mainly background
490
operations, in the server.
491
Some of these utility threads always run at a lower than normal
492
priority, so that they are always in background. Some of them
493
may dynamically boost their priority by the pri_adjust function,
494
even to higher than normal priority, if their task becomes urgent.
495
The running of utilities is controlled by high- and low-water marks
496
of urgency. The urgency may be measured by the number of dirty blocks
497
in the buffer pool, in the case of the flush thread, for example.
498
When the high-water mark is exceeded, an utility starts running, until
499
the urgency drops under the low-water mark. Then the utility thread
500
suspend itself to wait for an event. The master thread is
501
responsible of signaling this event when the utility thread is
502
again needed.
503
504
For each individual type of utility, some threads always remain
505
at lower than normal priority. This is because pri_adjust is implemented
506
so that the threads at normal or higher priority control their
507
share of running time by calling sleep. Thus, if the load of the
508
system sudenly drops, these threads cannot necessarily utilize
509
the system fully. The background priority threads make up for this,
510
starting to run when the load drops.
511
512
When there is no activity in the system, also the master thread
513
suspends itself to wait for an event making
514
the server totally silent. The responsibility to signal this
515
event is on the user thread which again receives a message
516
from a client.
517
518
There is still one complication in our server design. If a
519
background utility thread obtains a resource (e.g., mutex) needed by a user
520
thread, and there is also some other user activity in the system,
521
the user thread may have to wait indefinitely long for the
522
resource, as the OS does not schedule a background thread if
523
there is some other runnable user thread. This problem is called
524
priority inversion in real-time programming.
525
526
One solution to the priority inversion problem would be to
527
keep record of which thread owns which resource and
528
in the above case boost the priority of the background thread
529
so that it will be scheduled and it can release the resource.
530
This solution is called priority inheritance in real-time programming.
531
A drawback of this solution is that the overhead of acquiring a mutex
532
increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
533
the thread has to call os_thread_get_curr_id.
534
This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
535
pair. Note that the thread
536
cannot store the information in the resource, say mutex, itself,
537
because competing threads could wipe out the information if it is
538
stored before acquiring the mutex, and if it stored afterwards,
539
the information is outdated for the time of one machine instruction,
540
at least. (To be precise, the information could be stored to
541
lock_word in mutex if the machine supports atomic swap.)
542
543
The above solution with priority inheritance may become actual in the
544
future, but at the moment we plan to implement a more coarse solution,
545
which could be called a global priority inheritance. If a thread
546
has to wait for a long time, say 300 milliseconds, for a resource,
547
we just guess that it may be waiting for a resource owned by a background
548
thread, and boost the the priority of all runnable background threads
549
to the normal level. The background threads then themselves adjust
550
their fixed priority back to background after releasing all resources
551
they had (or, at some fixed points in their program code).
552
553
What is the performance of the global priority inheritance solution?
554
We may weigh the length of the wait time 300 milliseconds, during
555
which the system processes some other thread
556
to the cost of boosting the priority of each runnable background
557
thread, rescheduling it, and lowering the priority again.
558
On 100 MHz Pentium + NT this overhead may be of the order 100
559
microseconds per thread. So, if the number of runnable background
560
threads is not very big, say < 100, the cost is tolerable.
561
Utility threads probably will access resources used by
562
user threads not very often, so collisions of user threads
563
to preempted utility threads should not happen very often.
564
565
The thread table contains
566
information of the current status of each thread existing in the system,
567
and also the event semaphores used in suspending the master thread
568
and utility and parallel communication threads when they have nothing to do.
569
The thread table can be seen as an analogue to the process table
570
in a traditional Unix implementation.
571
572
The thread table is also used in the global priority inheritance
573
scheme. This brings in one additional complication: threads accessing
574
the thread table must have at least normal fixed priority,
575
because the priority inheritance solution does not work if a background
576
thread is preempted while possessing the mutex protecting the thread table.
577
So, if a thread accesses the thread table, its priority has to be
578
boosted at least to normal. This priority requirement can be seen similar to
579
the privileged mode used when processing the kernel calls in traditional
580
Unix.*/
581
582
/* Thread slot in the thread table */
583
struct srv_slot_struct{
584
	os_thread_id_t	id;		/* thread id */
585
	os_thread_t	handle;		/* thread handle */
586
	ulint		type;		/* thread type: user, utility etc. */
587
	ibool		in_use;		/* TRUE if this slot is in use */
588
	ibool		suspended;	/* TRUE if the thread is waiting
589
					for the event of this slot */
590
	ib_time_t	suspend_time;	/* time when the thread was
591
					suspended */
592
	os_event_t	event;		/* event used in suspending the
593
					thread when it has nothing to do */
594
	que_thr_t*	thr;		/* suspended query thread (only
595
					used for MySQL threads) */
596
};
597
598
/* Table for MySQL threads where they will be suspended to wait for locks */
599
srv_slot_t*	srv_mysql_table = NULL;
600
601
os_event_t	srv_lock_timeout_thread_event;
602
603
srv_sys_t*	srv_sys	= NULL;
604
605
byte		srv_pad1[64];	/* padding to prevent other memory update
606
				hotspots from residing on the same memory
607
				cache line */
608
mutex_t*	kernel_mutex_temp;/* mutex protecting the server, trx structs,
609
				query threads, and lock table */
610
byte		srv_pad2[64];	/* padding to prevent other memory update
611
				hotspots from residing on the same memory
612
				cache line */
613
614
/* The following three values measure the urgency of the jobs of
615
buffer, version, and insert threads. They may vary from 0 - 1000.
616
The server mutex protects all these variables. The low-water values
617
tell that the server can acquiesce the utility when the value
618
drops below this low-water mark. */
619
620
ulint	srv_meter[SRV_MASTER + 1];
621
ulint	srv_meter_low_water[SRV_MASTER + 1];
622
ulint	srv_meter_high_water[SRV_MASTER + 1];
623
ulint	srv_meter_high_water2[SRV_MASTER + 1];
624
ulint	srv_meter_foreground[SRV_MASTER + 1];
625
626
/* The following values give info about the activity going on in
627
the database. They are protected by the server mutex. The arrays
628
are indexed by the type of the thread. */
629
630
ulint	srv_n_threads_active[SRV_MASTER + 1];
631
ulint	srv_n_threads[SRV_MASTER + 1];
632
633
/*************************************************************************
634
Sets the info describing an i/o thread current state. */
635
636
void
637
srv_set_io_thread_op_info(
638
/*======================*/
639
	ulint		i,	/* in: the 'segment' of the i/o thread */
640
	const char*	str)	/* in: constant char string describing the
641
				state */
642
{
643
	ut_a(i < SRV_MAX_N_IO_THREADS);
644
645
	srv_io_thread_op_info[i] = str;
646
}
647
648
/*************************************************************************
649
Accessor function to get pointer to n'th slot in the server thread
650
table. */
651
static
652
srv_slot_t*
653
srv_table_get_nth_slot(
654
/*===================*/
655
				/* out: pointer to the slot */
656
	ulint	index)		/* in: index of the slot */
657
{
658
	ut_a(index < OS_THREAD_MAX_N);
659
660
	return(srv_sys->threads + index);
661
}
662
663
#ifndef UNIV_HOTBACKUP
664
/*************************************************************************
665
Gets the number of threads in the system. */
666
667
ulint
668
srv_get_n_threads(void)
669
/*===================*/
670
{
671
	ulint	i;
672
	ulint	n_threads	= 0;
673
674
	mutex_enter(&kernel_mutex);
675
676
	for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
677
678
		n_threads += srv_n_threads[i];
679
	}
680
681
	mutex_exit(&kernel_mutex);
682
683
	return(n_threads);
684
}
685
686
/*************************************************************************
687
Reserves a slot in the thread table for the current thread. Also creates the
688
thread local storage struct for the current thread. NOTE! The server mutex
689
has to be reserved by the caller! */
690
static
691
ulint
692
srv_table_reserve_slot(
693
/*===================*/
694
			/* out: reserved slot index */
695
	ulint	type)	/* in: type of the thread: one of SRV_COM, ... */
696
{
697
	srv_slot_t*	slot;
698
	ulint		i;
699
700
	ut_a(type > 0);
701
	ut_a(type <= SRV_MASTER);
702
703
	i = 0;
704
	slot = srv_table_get_nth_slot(i);
705
706
	while (slot->in_use) {
707
		i++;
708
		slot = srv_table_get_nth_slot(i);
709
	}
710
711
	ut_a(slot->in_use == FALSE);
712
713
	slot->in_use = TRUE;
714
	slot->suspended = FALSE;
715
	slot->id = os_thread_get_curr_id();
716
	slot->handle = os_thread_get_curr();
717
	slot->type = type;
718
719
	thr_local_create();
720
721
	thr_local_set_slot_no(os_thread_get_curr_id(), i);
722
723
	return(i);
724
}
725
726
/*************************************************************************
727
Suspends the calling thread to wait for the event in its thread slot.
728
NOTE! The server mutex has to be reserved by the caller! */
729
static
730
os_event_t
731
srv_suspend_thread(void)
732
/*====================*/
733
			/* out: event for the calling thread to wait */
734
{
735
	srv_slot_t*	slot;
736
	os_event_t	event;
737
	ulint		slot_no;
738
	ulint		type;
739
740
	ut_ad(mutex_own(&kernel_mutex));
741
742
	slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
743
744
	if (srv_print_thread_releases) {
745
		fprintf(stderr,
746
			"Suspending thread %lu to slot %lu meter %lu\n",
747
			(ulong) os_thread_get_curr_id(), (ulong) slot_no,
748
			(ulong) srv_meter[SRV_RECOVERY]);
749
	}
750
751
	slot = srv_table_get_nth_slot(slot_no);
752
753
	type = slot->type;
754
755
	ut_ad(type >= SRV_WORKER);
756
	ut_ad(type <= SRV_MASTER);
757
758
	event = slot->event;
759
760
	slot->suspended = TRUE;
761
762
	ut_ad(srv_n_threads_active[type] > 0);
763
764
	srv_n_threads_active[type]--;
765
766
	os_event_reset(event);
767
768
	return(event);
769
}
770
#endif /* !UNIV_HOTBACKUP */
771
772
/*************************************************************************
773
Releases threads of the type given from suspension in the thread table.
774
NOTE! The server mutex has to be reserved by the caller! */
775
776
ulint
777
srv_release_threads(
778
/*================*/
779
			/* out: number of threads released: this may be
780
			< n if not enough threads were suspended at the
781
			moment */
782
	ulint	type,	/* in: thread type */
783
	ulint	n)	/* in: number of threads to release */
784
{
785
	srv_slot_t*	slot;
786
	ulint		i;
787
	ulint		count	= 0;
788
789
	ut_ad(type >= SRV_WORKER);
790
	ut_ad(type <= SRV_MASTER);
791
	ut_ad(n > 0);
792
	ut_ad(mutex_own(&kernel_mutex));
793
794
	for (i = 0; i < OS_THREAD_MAX_N; i++) {
795
796
		slot = srv_table_get_nth_slot(i);
797
798
		if (slot->in_use && slot->type == type && slot->suspended) {
799
800
			slot->suspended = FALSE;
801
802
			srv_n_threads_active[type]++;
803
804
			os_event_set(slot->event);
805
806
			if (srv_print_thread_releases) {
807
				fprintf(stderr,
808
					"Releasing thread %lu type %lu"
809
					" from slot %lu meter %lu\n",
810
					(ulong) slot->id, (ulong) type,
811
					(ulong) i,
812
					(ulong) srv_meter[SRV_RECOVERY]);
813
			}
814
815
			count++;
816
817
			if (count == n) {
818
				break;
819
			}
820
		}
821
	}
822
823
	return(count);
824
}
825
826
/*************************************************************************
827
Returns the calling thread type. */
828
829
ulint
830
srv_get_thread_type(void)
831
/*=====================*/
832
			/* out: SRV_COM, ... */
833
{
834
	ulint		slot_no;
835
	srv_slot_t*	slot;
836
	ulint		type;
837
838
	mutex_enter(&kernel_mutex);
839
840
	slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
841
842
	slot = srv_table_get_nth_slot(slot_no);
843
844
	type = slot->type;
845
846
	ut_ad(type >= SRV_WORKER);
847
	ut_ad(type <= SRV_MASTER);
848
849
	mutex_exit(&kernel_mutex);
850
851
	return(type);
852
}
853
854
/*************************************************************************
855
Initializes the server. */
856
857
void
858
srv_init(void)
859
/*==========*/
860
{
861
	srv_conc_slot_t*	conc_slot;
862
	srv_slot_t*		slot;
863
	dict_table_t*		table;
864
	ulint			i;
865
866
	srv_sys = mem_alloc(sizeof(srv_sys_t));
867
868
	kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
869
	mutex_create(&kernel_mutex, SYNC_KERNEL);
870
871
	mutex_create(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
872
873
	srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
874
875
	for (i = 0; i < OS_THREAD_MAX_N; i++) {
876
		slot = srv_table_get_nth_slot(i);
877
		slot->in_use = FALSE;
878
		slot->type=0;	/* Avoid purify errors */
879
		slot->event = os_event_create(NULL);
880
		ut_a(slot->event);
881
	}
882
883
	srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
884
885
	for (i = 0; i < OS_THREAD_MAX_N; i++) {
886
		slot = srv_mysql_table + i;
887
		slot->in_use = FALSE;
888
		slot->type = 0;
889
		slot->event = os_event_create(NULL);
890
		ut_a(slot->event);
891
	}
892
893
	srv_lock_timeout_thread_event = os_event_create(NULL);
894
895
	for (i = 0; i < SRV_MASTER + 1; i++) {
896
		srv_n_threads_active[i] = 0;
897
		srv_n_threads[i] = 0;
898
		srv_meter[i] = 30;
899
		srv_meter_low_water[i] = 50;
900
		srv_meter_high_water[i] = 100;
901
		srv_meter_high_water2[i] = 200;
902
		srv_meter_foreground[i] = 250;
903
	}
904
905
	UT_LIST_INIT(srv_sys->tasks);
906
907
	/* create dummy table and index for old-style infimum and supremum */
908
	table = dict_mem_table_create("SYS_DUMMY1",
909
				      DICT_HDR_SPACE, 1, 0);
910
	dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
911
			       DATA_ENGLISH | DATA_NOT_NULL, 8);
912
913
	srv_sys->dummy_ind1 = dict_mem_index_create(
914
		"SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1);
915
	dict_index_add_col(srv_sys->dummy_ind1, table, (dict_col_t*)
916
			   dict_table_get_nth_col(table, 0), 0);
917
	srv_sys->dummy_ind1->table = table;
918
	/* create dummy table and index for new-style infimum and supremum */
919
	table = dict_mem_table_create("SYS_DUMMY2",
920
				      DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
921
	dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
922
			       DATA_ENGLISH | DATA_NOT_NULL, 8);
923
	srv_sys->dummy_ind2 = dict_mem_index_create(
924
		"SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1);
925
	dict_index_add_col(srv_sys->dummy_ind2, table, (dict_col_t*)
926
			   dict_table_get_nth_col(table, 0), 0);
927
	srv_sys->dummy_ind2->table = table;
928
929
	/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
930
	srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE;
931
932
	/* Init the server concurrency restriction data structures */
933
934
	os_fast_mutex_init(&srv_conc_mutex);
935
936
	UT_LIST_INIT(srv_conc_queue);
937
938
	srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
939
940
	for (i = 0; i < OS_THREAD_MAX_N; i++) {
941
		conc_slot = srv_conc_slots + i;
942
		conc_slot->reserved = FALSE;
943
		conc_slot->event = os_event_create(NULL);
944
		ut_a(conc_slot->event);
945
	}
946
}
947
948
/*************************************************************************
949
Frees the OS fast mutex created in srv_init(). */
950
951
void
952
srv_free(void)
953
/*==========*/
954
{
955
	os_fast_mutex_free(&srv_conc_mutex);
956
}
957
958
/*************************************************************************
959
Initializes the synchronization primitives, memory system, and the thread
960
local storage. */
961
962
void
963
srv_general_init(void)
964
/*==================*/
965
{
966
	os_sync_init();
967
	sync_init();
968
	mem_init(srv_mem_pool_size);
969
	thr_local_init();
970
}
971
972
/*======================= InnoDB Server FIFO queue =======================*/
973
974
/* Maximum allowable purge history length.  <=0 means 'infinite'. */
975
ulong	srv_max_purge_lag		= 0;
976
977
/*************************************************************************
978
Puts an OS thread to wait if there are too many concurrent threads
979
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
980
981
void
982
srv_conc_enter_innodb(
983
/*==================*/
984
	trx_t*	trx)	/* in: transaction object associated with the
985
			thread */
986
{
987
	ibool			has_slept = FALSE;
988
	srv_conc_slot_t*	slot	  = NULL;
989
	ulint			i;
990
991
	if (trx->mysql_thd != NULL
992
	    && thd_is_replication_slave_thread(trx->mysql_thd)) {
993
994
		/* TODO Do something more interesting (based on a config
995
		parameter). Some users what to give the replication
996
		thread very low priority, see http://bugs.mysql.com/25078
997
		This can be done by introducing
998
		innodb_replication_delay(ms) config parameter */
999
		return;
1000
	}
1001
1002
	/* If trx has 'free tickets' to enter the engine left, then use one
1003
	such ticket */
1004
1005
	if (trx->n_tickets_to_enter_innodb > 0) {
1006
		trx->n_tickets_to_enter_innodb--;
1007
1008
		return;
1009
	}
1010
1011
	os_fast_mutex_lock(&srv_conc_mutex);
1012
retry:
1013
	if (trx->declared_to_be_inside_innodb) {
1014
		ut_print_timestamp(stderr);
1015
		fputs("  InnoDB: Error: trying to declare trx"
1016
		      " to enter InnoDB, but\n"
1017
		      "InnoDB: it already is declared.\n", stderr);
1018
		trx_print(stderr, trx, 0);
1019
		putc('\n', stderr);
1020
		os_fast_mutex_unlock(&srv_conc_mutex);
1021
1022
		return;
1023
	}
1024
1025
	if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1026
1027
		srv_conc_n_threads++;
1028
		trx->declared_to_be_inside_innodb = TRUE;
1029
		trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1030
1031
		os_fast_mutex_unlock(&srv_conc_mutex);
1032
1033
		return;
1034
	}
1035
1036
	/* If the transaction is not holding resources, let it sleep
1037
	for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
1038
1039
	if (!has_slept && !trx->has_search_latch
1040
	    && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
1041
1042
		has_slept = TRUE; /* We let it sleep only once to avoid
1043
				  starvation */
1044
1045
		srv_conc_n_waiting_threads++;
1046
1047
		os_fast_mutex_unlock(&srv_conc_mutex);
1048
1049
		trx->op_info = "sleeping before joining InnoDB queue";
1050
1051
		/* Peter Zaitsev suggested that we take the sleep away
1052
		altogether. But the sleep may be good in pathological
1053
		situations of lots of thread switches. Simply put some
1054
		threads aside for a while to reduce the number of thread
1055
		switches. */
1056
		if (SRV_THREAD_SLEEP_DELAY > 0) {
1057
			os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
1058
		}
1059
1060
		trx->op_info = "";
1061
1062
		os_fast_mutex_lock(&srv_conc_mutex);
1063
1064
		srv_conc_n_waiting_threads--;
1065
1066
		goto retry;
1067
	}
1068
1069
	/* Too many threads inside: put the current thread to a queue */
1070
1071
	for (i = 0; i < OS_THREAD_MAX_N; i++) {
1072
		slot = srv_conc_slots + i;
1073
1074
		if (!slot->reserved) {
1075
1076
			break;
1077
		}
1078
	}
1079
1080
	if (i == OS_THREAD_MAX_N) {
1081
		/* Could not find a free wait slot, we must let the
1082
		thread enter */
1083
1084
		srv_conc_n_threads++;
1085
		trx->declared_to_be_inside_innodb = TRUE;
1086
		trx->n_tickets_to_enter_innodb = 0;
1087
1088
		os_fast_mutex_unlock(&srv_conc_mutex);
1089
1090
		return;
1091
	}
1092
1093
	/* Release possible search system latch this thread has */
1094
	if (trx->has_search_latch) {
1095
		trx_search_latch_release_if_reserved(trx);
1096
	}
1097
1098
	/* Add to the queue */
1099
	slot->reserved = TRUE;
1100
	slot->wait_ended = FALSE;
1101
1102
	UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
1103
1104
	os_event_reset(slot->event);
1105
1106
	srv_conc_n_waiting_threads++;
1107
1108
	os_fast_mutex_unlock(&srv_conc_mutex);
1109
1110
	/* Go to wait for the event; when a thread leaves InnoDB it will
1111
	release this thread */
1112
1113
	trx->op_info = "waiting in InnoDB queue";
1114
1115
	os_event_wait(slot->event);
1116
1117
	trx->op_info = "";
1118
1119
	os_fast_mutex_lock(&srv_conc_mutex);
1120
1121
	srv_conc_n_waiting_threads--;
1122
1123
	/* NOTE that the thread which released this thread already
1124
	incremented the thread counter on behalf of this thread */
1125
1126
	slot->reserved = FALSE;
1127
1128
	UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
1129
1130
	trx->declared_to_be_inside_innodb = TRUE;
1131
	trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1132
1133
	os_fast_mutex_unlock(&srv_conc_mutex);
1134
}
1135
1136
/*************************************************************************
1137
This lets a thread enter InnoDB regardless of the number of threads inside
1138
InnoDB. This must be called when a thread ends a lock wait. */
1139
1140
void
1141
srv_conc_force_enter_innodb(
1142
/*========================*/
1143
	trx_t*	trx)	/* in: transaction object associated with the
1144
			thread */
1145
{
1146
	if (UNIV_LIKELY(!srv_thread_concurrency)) {
1147
1148
		return;
1149
	}
1150
1151
	os_fast_mutex_lock(&srv_conc_mutex);
1152
1153
	srv_conc_n_threads++;
1154
	trx->declared_to_be_inside_innodb = TRUE;
1155
	trx->n_tickets_to_enter_innodb = 1;
1156
1157
	os_fast_mutex_unlock(&srv_conc_mutex);
1158
}
1159
1160
/*************************************************************************
1161
This must be called when a thread exits InnoDB in a lock wait or at the
1162
end of an SQL statement. */
1163
1164
void
1165
srv_conc_force_exit_innodb(
1166
/*=======================*/
1167
	trx_t*	trx)	/* in: transaction object associated with the
1168
			thread */
1169
{
1170
	srv_conc_slot_t*	slot	= NULL;
1171
1172
	if (UNIV_LIKELY(!srv_thread_concurrency)) {
1173
1174
		return;
1175
	}
1176
1177
	if (trx->mysql_thd != NULL
1178
	    && thd_is_replication_slave_thread(trx->mysql_thd)) {
1179
1180
		return;
1181
	}
1182
1183
	if (trx->declared_to_be_inside_innodb == FALSE) {
1184
1185
		return;
1186
	}
1187
1188
	os_fast_mutex_lock(&srv_conc_mutex);
1189
1190
	srv_conc_n_threads--;
1191
	trx->declared_to_be_inside_innodb = FALSE;
1192
	trx->n_tickets_to_enter_innodb = 0;
1193
1194
	if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1195
		/* Look for a slot where a thread is waiting and no other
1196
		thread has yet released the thread */
1197
1198
		slot = UT_LIST_GET_FIRST(srv_conc_queue);
1199
1200
		while (slot && slot->wait_ended == TRUE) {
1201
			slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
1202
		}
1203
1204
		if (slot != NULL) {
1205
			slot->wait_ended = TRUE;
1206
1207
			/* We increment the count on behalf of the released
1208
			thread */
1209
1210
			srv_conc_n_threads++;
1211
		}
1212
	}
1213
1214
	os_fast_mutex_unlock(&srv_conc_mutex);
1215
1216
	if (slot != NULL) {
1217
		os_event_set(slot->event);
1218
	}
1219
}
1220
1221
/*************************************************************************
1222
This must be called when a thread exits InnoDB. */
1223
1224
void
1225
srv_conc_exit_innodb(
1226
/*=================*/
1227
	trx_t*	trx)	/* in: transaction object associated with the
1228
			thread */
1229
{
1230
	if (trx->n_tickets_to_enter_innodb > 0) {
1231
		/* We will pretend the thread is still inside InnoDB though it
1232
		now leaves the InnoDB engine. In this way we save
1233
		a lot of semaphore operations. srv_conc_force_exit_innodb is
1234
		used to declare the thread definitely outside InnoDB. It
1235
		should be called when there is a lock wait or an SQL statement
1236
		ends. */
1237
1238
		return;
1239
	}
1240
1241
	srv_conc_force_exit_innodb(trx);
1242
}
1243
1244
/*========================================================================*/
1245
1246
/*************************************************************************
1247
Normalizes init parameter values to use units we use inside InnoDB. */
1248
static
1249
ulint
1250
srv_normalize_init_values(void)
1251
/*===========================*/
1252
				/* out: DB_SUCCESS or error code */
1253
{
1254
	ulint	n;
1255
	ulint	i;
1256
1257
	n = srv_n_data_files;
1258
1259
	for (i = 0; i < n; i++) {
1260
		srv_data_file_sizes[i] = srv_data_file_sizes[i]
1261
			* ((1024 * 1024) / UNIV_PAGE_SIZE);
1262
	}
1263
1264
	srv_last_file_size_max = srv_last_file_size_max
1265
		* ((1024 * 1024) / UNIV_PAGE_SIZE);
1266
1267
	srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1268
1269
	srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1270
1271
	srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
1272
1273
	srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
1274
1275
	if (srv_use_awe) {
1276
		/* If we are using AWE we must save memory in the 32-bit
1277
		address space of the process, and cannot bind the lock
1278
		table size to the real buffer pool size. */
1279
1280
		srv_lock_table_size = 20 * srv_awe_window_size;
1281
	} else {
1282
		srv_lock_table_size = 5 * srv_pool_size;
1283
	}
1284
1285
	return(DB_SUCCESS);
1286
}
1287
1288
/*************************************************************************
1289
Boots the InnoDB server. */
1290
1291
ulint
1292
srv_boot(void)
1293
/*==========*/
1294
			/* out: DB_SUCCESS or error code */
1295
{
1296
	ulint	err;
1297
1298
	/* Transform the init parameter values given by MySQL to
1299
	use units we use inside InnoDB: */
1300
1301
	err = srv_normalize_init_values();
1302
1303
	if (err != DB_SUCCESS) {
1304
		return(err);
1305
	}
1306
1307
	/* Initialize synchronization primitives, memory management, and thread
1308
	local storage */
1309
1310
	srv_general_init();
1311
1312
	/* Initialize this module */
1313
1314
	srv_init();
1315
1316
	return(DB_SUCCESS);
1317
}
1318
1319
#ifndef UNIV_HOTBACKUP
1320
/*************************************************************************
1321
Reserves a slot in the thread table for the current MySQL OS thread.
1322
NOTE! The kernel mutex has to be reserved by the caller! */
1323
static
1324
srv_slot_t*
1325
srv_table_reserve_slot_for_mysql(void)
1326
/*==================================*/
1327
			/* out: reserved slot */
1328
{
1329
	srv_slot_t*	slot;
1330
	ulint		i;
1331
1332
	ut_ad(mutex_own(&kernel_mutex));
1333
1334
	i = 0;
1335
	slot = srv_mysql_table + i;
1336
1337
	while (slot->in_use) {
1338
		i++;
1339
1340
		if (i >= OS_THREAD_MAX_N) {
1341
1342
			ut_print_timestamp(stderr);
1343
1344
			fprintf(stderr,
1345
				"  InnoDB: There appear to be %lu MySQL"
1346
				" threads currently waiting\n"
1347
				"InnoDB: inside InnoDB, which is the"
1348
				" upper limit. Cannot continue operation.\n"
1349
				"InnoDB: We intentionally generate"
1350
				" a seg fault to print a stack trace\n"
1351
				"InnoDB: on Linux. But first we print"
1352
				" a list of waiting threads.\n", (ulong) i);
1353
1354
			for (i = 0; i < OS_THREAD_MAX_N; i++) {
1355
1356
				slot = srv_mysql_table + i;
1357
1358
				fprintf(stderr,
1359
					"Slot %lu: thread id %lu, type %lu,"
1360
					" in use %lu, susp %lu, time %lu\n",
1361
					(ulong) i,
1362
					(ulong) os_thread_pf(slot->id),
1363
					(ulong) slot->type,
1364
					(ulong) slot->in_use,
1365
					(ulong) slot->suspended,
1366
					(ulong) difftime(ut_time(),
1367
							 slot->suspend_time));
1368
			}
1369
1370
			ut_error;
1371
		}
1372
1373
		slot = srv_mysql_table + i;
1374
	}
1375
1376
	ut_a(slot->in_use == FALSE);
1377
1378
	slot->in_use = TRUE;
1379
	slot->id = os_thread_get_curr_id();
1380
	slot->handle = os_thread_get_curr();
1381
1382
	return(slot);
1383
}
1384
#endif /* !UNIV_HOTBACKUP */
1385
1386
/*******************************************************************
1387
Puts a MySQL OS thread to wait for a lock to be released. If an error
1388
occurs during the wait trx->error_state associated with thr is
1389
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
1390
are possible errors. DB_DEADLOCK is returned if selective deadlock
1391
resolution chose this transaction as a victim. */
1392
1393
void
1394
srv_suspend_mysql_thread(
1395
/*=====================*/
1396
	que_thr_t*	thr)	/* in: query thread associated with the MySQL
1397
				OS thread */
1398
{
1399
#ifndef UNIV_HOTBACKUP
1400
	srv_slot_t*	slot;
1401
	os_event_t	event;
1402
	double		wait_time;
1403
	trx_t*		trx;
1404
	ibool		had_dict_lock			= FALSE;
1405
	ibool		was_declared_inside_innodb	= FALSE;
1406
	ib_longlong	start_time			= 0;
1407
	ib_longlong	finish_time;
1408
	ulint		diff_time;
1409
	ulint		sec;
1410
	ulint		ms;
1411
1412
	ut_ad(!mutex_own(&kernel_mutex));
1413
1414
	trx = thr_get_trx(thr);
1415
1416
	os_event_set(srv_lock_timeout_thread_event);
1417
1418
	mutex_enter(&kernel_mutex);
1419
1420
	trx->error_state = DB_SUCCESS;
1421
1422
	if (thr->state == QUE_THR_RUNNING) {
1423
1424
		ut_ad(thr->is_active == TRUE);
1425
1426
		/* The lock has already been released or this transaction
1427
		was chosen as a deadlock victim: no need to suspend */
1428
1429
		if (trx->was_chosen_as_deadlock_victim) {
1430
1431
			trx->error_state = DB_DEADLOCK;
1432
			trx->was_chosen_as_deadlock_victim = FALSE;
1433
		}
1434
1435
		mutex_exit(&kernel_mutex);
1436
1437
		return;
1438
	}
1439
1440
	ut_ad(thr->is_active == FALSE);
1441
1442
	slot = srv_table_reserve_slot_for_mysql();
1443
1444
	event = slot->event;
1445
1446
	slot->thr = thr;
1447
1448
	os_event_reset(event);
1449
1450
	slot->suspend_time = ut_time();
1451
1452
	if (thr->lock_state == QUE_THR_LOCK_ROW) {
1453
		srv_n_lock_wait_count++;
1454
		srv_n_lock_wait_current_count++;
1455
1456
		ut_usectime(&sec, &ms);
1457
		start_time = (ib_longlong)sec * 1000000 + ms;
1458
	}
1459
	/* Wake the lock timeout monitor thread, if it is suspended */
1460
1461
	os_event_set(srv_lock_timeout_thread_event);
1462
1463
	mutex_exit(&kernel_mutex);
1464
1465
	if (trx->declared_to_be_inside_innodb) {
1466
1467
		was_declared_inside_innodb = TRUE;
1468
1469
		/* We must declare this OS thread to exit InnoDB, since a
1470
		possible other thread holding a lock which this thread waits
1471
		for must be allowed to enter, sooner or later */
1472
1473
		srv_conc_force_exit_innodb(trx);
1474
	}
1475
1476
	/* Release possible foreign key check latch */
1477
	if (trx->dict_operation_lock_mode == RW_S_LATCH) {
1478
1479
		had_dict_lock = TRUE;
1480
1481
		row_mysql_unfreeze_data_dictionary(trx);
1482
	}
1483
1484
	ut_a(trx->dict_operation_lock_mode == 0);
1485
1486
	/* Wait for the release */
1487
1488
	os_event_wait(event);
1489
1490
	if (had_dict_lock) {
1491
1492
		row_mysql_freeze_data_dictionary(trx);
1493
	}
1494
1495
	if (was_declared_inside_innodb) {
1496
1497
		/* Return back inside InnoDB */
1498
1499
		srv_conc_force_enter_innodb(trx);
1500
	}
1501
1502
	mutex_enter(&kernel_mutex);
1503
1504
	/* Release the slot for others to use */
1505
1506
	slot->in_use = FALSE;
1507
1508
	wait_time = ut_difftime(ut_time(), slot->suspend_time);
1509
1510
	if (thr->lock_state == QUE_THR_LOCK_ROW) {
1511
		ut_usectime(&sec, &ms);
1512
		finish_time = (ib_longlong)sec * 1000000 + ms;
1513
1514
		diff_time = (ulint) (finish_time - start_time);
1515
1516
		srv_n_lock_wait_current_count--;
1517
		srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
1518
		if (diff_time > srv_n_lock_max_wait_time) {
1519
			srv_n_lock_max_wait_time = diff_time;
1520
		}
1521
	}
1522
1523
	if (trx->was_chosen_as_deadlock_victim) {
1524
1525
		trx->error_state = DB_DEADLOCK;
1526
		trx->was_chosen_as_deadlock_victim = FALSE;
1527
	}
1528
1529
	mutex_exit(&kernel_mutex);
1530
1531
	if (srv_lock_wait_timeout < 100000000
1532
	    && wait_time > (double)srv_lock_wait_timeout) {
1533
1534
		trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1535
	}
1536
#else /* UNIV_HOTBACKUP */
1537
	/* This function depends on MySQL code that is not included in
1538
	InnoDB Hot Backup builds.  Besides, this function should never
1539
	be called in InnoDB Hot Backup. */
1540
	ut_error;
1541
#endif /* UNIV_HOTBACKUP */
1542
}
1543
1544
/************************************************************************
1545
Releases a MySQL OS thread waiting for a lock to be released, if the
1546
thread is already suspended. */
1547
1548
void
1549
srv_release_mysql_thread_if_suspended(
1550
/*==================================*/
1551
	que_thr_t*	thr)	/* in: query thread associated with the
1552
				MySQL OS thread	 */
1553
{
1554
#ifndef UNIV_HOTBACKUP
1555
	srv_slot_t*	slot;
1556
	ulint		i;
1557
1558
	ut_ad(mutex_own(&kernel_mutex));
1559
1560
	for (i = 0; i < OS_THREAD_MAX_N; i++) {
1561
1562
		slot = srv_mysql_table + i;
1563
1564
		if (slot->in_use && slot->thr == thr) {
1565
			/* Found */
1566
1567
			os_event_set(slot->event);
1568
1569
			return;
1570
		}
1571
	}
1572
1573
	/* not found */
1574
#else /* UNIV_HOTBACKUP */
1575
	/* This function depends on MySQL code that is not included in
1576
	InnoDB Hot Backup builds.  Besides, this function should never
1577
	be called in InnoDB Hot Backup. */
1578
	ut_error;
1579
#endif /* UNIV_HOTBACKUP */
1580
}
1581
1582
#ifndef UNIV_HOTBACKUP
1583
/**********************************************************************
1584
Refreshes the values used to calculate per-second averages. */
1585
static
1586
void
1587
srv_refresh_innodb_monitor_stats(void)
1588
/*==================================*/
1589
{
1590
	mutex_enter(&srv_innodb_monitor_mutex);
1591
1592
	srv_last_monitor_time = time(NULL);
1593
1594
	os_aio_refresh_stats();
1595
1596
	btr_cur_n_sea_old = btr_cur_n_sea;
1597
	btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1598
1599
	log_refresh_stats();
1600
1601
	buf_refresh_io_stats();
1602
1603
	srv_n_rows_inserted_old = srv_n_rows_inserted;
1604
	srv_n_rows_updated_old = srv_n_rows_updated;
1605
	srv_n_rows_deleted_old = srv_n_rows_deleted;
1606
	srv_n_rows_read_old = srv_n_rows_read;
1607
1608
	mutex_exit(&srv_innodb_monitor_mutex);
1609
}
1610
1611
/**********************************************************************
1612
Outputs to a file the output of the InnoDB Monitor. */
1613
1614
void
1615
srv_printf_innodb_monitor(
1616
/*======================*/
1617
	FILE*	file,		/* in: output stream */
1618
	ulint*	trx_start,	/* out: file position of the start of
1619
				the list of active transactions */
1620
	ulint*	trx_end)	/* out: file position of the end of
1621
				the list of active transactions */
1622
{
1623
	double	time_elapsed;
1624
	time_t	current_time;
1625
	ulint	n_reserved;
1626
1627
	mutex_enter(&srv_innodb_monitor_mutex);
1628
1629
	current_time = time(NULL);
1630
1631
	/* We add 0.001 seconds to time_elapsed to prevent division
1632
	by zero if two users happen to call SHOW INNODB STATUS at the same
1633
	time */
1634
1635
	time_elapsed = difftime(current_time, srv_last_monitor_time)
1636
		+ 0.001;
1637
1638
	srv_last_monitor_time = time(NULL);
1639
1640
	fputs("\n=====================================\n", file);
1641
1642
	ut_print_timestamp(file);
1643
	fprintf(file,
1644
		" INNODB MONITOR OUTPUT\n"
1645
		"=====================================\n"
1646
		"Per second averages calculated from the last %lu seconds\n",
1647
		(ulong)time_elapsed);
1648
1649
	fputs("----------\n"
1650
	      "SEMAPHORES\n"
1651
	      "----------\n", file);
1652
	sync_print(file);
1653
1654
	/* Conceptually, srv_innodb_monitor_mutex has a very high latching
1655
	order level in sync0sync.h, while dict_foreign_err_mutex has a very
1656
	low level 135. Therefore we can reserve the latter mutex here without
1657
	a danger of a deadlock of threads. */
1658
1659
	mutex_enter(&dict_foreign_err_mutex);
1660
1661
	if (ftell(dict_foreign_err_file) != 0L) {
1662
		fputs("------------------------\n"
1663
		      "LATEST FOREIGN KEY ERROR\n"
1664
		      "------------------------\n", file);
1665
		ut_copy_file(file, dict_foreign_err_file);
1666
	}
1667
1668
	mutex_exit(&dict_foreign_err_mutex);
1669
1670
	lock_print_info_summary(file);
1671
	if (trx_start) {
1672
		long	t = ftell(file);
1673
		if (t < 0) {
1674
			*trx_start = ULINT_UNDEFINED;
1675
		} else {
1676
			*trx_start = (ulint) t;
1677
		}
1678
	}
1679
	lock_print_info_all_transactions(file);
1680
	if (trx_end) {
1681
		long	t = ftell(file);
1682
		if (t < 0) {
1683
			*trx_end = ULINT_UNDEFINED;
1684
		} else {
1685
			*trx_end = (ulint) t;
1686
		}
1687
	}
1688
	fputs("--------\n"
1689
	      "FILE I/O\n"
1690
	      "--------\n", file);
1691
	os_aio_print(file);
1692
1693
	fputs("-------------------------------------\n"
1694
	      "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1695
	      "-------------------------------------\n", file);
1696
	ibuf_print(file);
1697
1698
	ha_print_info(file, btr_search_sys->hash_index);
1699
1700
	fprintf(file,
1701
		"%.2f hash searches/s, %.2f non-hash searches/s\n",
1702
		(btr_cur_n_sea - btr_cur_n_sea_old)
1703
		/ time_elapsed,
1704
		(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1705
		/ time_elapsed);
1706
	btr_cur_n_sea_old = btr_cur_n_sea;
1707
	btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1708
1709
	fputs("---\n"
1710
	      "LOG\n"
1711
	      "---\n", file);
1712
	log_print(file);
1713
1714
	fputs("----------------------\n"
1715
	      "BUFFER POOL AND MEMORY\n"
1716
	      "----------------------\n", file);
1717
	fprintf(file,
1718
		"Total memory allocated " ULINTPF
1719
		"; in additional pool allocated " ULINTPF "\n",
1720
		ut_total_allocated_memory,
1721
		mem_pool_get_reserved(mem_comm_pool));
1722
	fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1723
		dict_sys->size);
1724
1725
	if (srv_use_awe) {
1726
		fprintf(file,
1727
			"In addition to that %lu MB of AWE memory allocated\n",
1728
			(ulong) (srv_pool_size
1729
				 / ((1024 * 1024) / UNIV_PAGE_SIZE)));
1730
	}
1731
1732
	buf_print_io(file);
1733
1734
	fputs("--------------\n"
1735
	      "ROW OPERATIONS\n"
1736
	      "--------------\n", file);
1737
	fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1738
		(long) srv_conc_n_threads,
1739
		(ulong) srv_conc_n_waiting_threads);
1740
1741
	fprintf(file, "%lu read views open inside InnoDB\n",
1742
		UT_LIST_GET_LEN(trx_sys->view_list));
1743
1744
	n_reserved = fil_space_get_n_reserved_extents(0);
1745
	if (n_reserved > 0) {
1746
		fprintf(file,
1747
			"%lu tablespace extents now reserved for"
1748
			" B-tree split operations\n",
1749
			(ulong) n_reserved);
1750
	}
1751
1752
#ifdef UNIV_LINUX
1753
	fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1754
		(ulong) srv_main_thread_process_no,
1755
		(ulong) srv_main_thread_id,
1756
		srv_main_thread_op_info);
1757
#else
1758
	fprintf(file, "Main thread id %lu, state: %s\n",
1759
		(ulong) srv_main_thread_id,
1760
		srv_main_thread_op_info);
1761
#endif
1762
	fprintf(file,
1763
		"Number of rows inserted " ULINTPF
1764
		", updated " ULINTPF ", deleted " ULINTPF
1765
		", read " ULINTPF "\n",
1766
		srv_n_rows_inserted,
1767
		srv_n_rows_updated,
1768
		srv_n_rows_deleted,
1769
		srv_n_rows_read);
1770
	fprintf(file,
1771
		"%.2f inserts/s, %.2f updates/s,"
1772
		" %.2f deletes/s, %.2f reads/s\n",
1773
		(srv_n_rows_inserted - srv_n_rows_inserted_old)
1774
		/ time_elapsed,
1775
		(srv_n_rows_updated - srv_n_rows_updated_old)
1776
		/ time_elapsed,
1777
		(srv_n_rows_deleted - srv_n_rows_deleted_old)
1778
		/ time_elapsed,
1779
		(srv_n_rows_read - srv_n_rows_read_old)
1780
		/ time_elapsed);
1781
1782
	srv_n_rows_inserted_old = srv_n_rows_inserted;
1783
	srv_n_rows_updated_old = srv_n_rows_updated;
1784
	srv_n_rows_deleted_old = srv_n_rows_deleted;
1785
	srv_n_rows_read_old = srv_n_rows_read;
1786
1787
	fputs("----------------------------\n"
1788
	      "END OF INNODB MONITOR OUTPUT\n"
1789
	      "============================\n", file);
1790
	mutex_exit(&srv_innodb_monitor_mutex);
1791
	fflush(file);
1792
}
1793
1794
/**********************************************************************
1795
Function to pass InnoDB status variables to MySQL */
1796
1797
void
1798
srv_export_innodb_status(void)
1799
{
1800
	mutex_enter(&srv_innodb_monitor_mutex);
1801
1802
	export_vars.innodb_data_pending_reads
1803
		= os_n_pending_reads;
1804
	export_vars.innodb_data_pending_writes
1805
		= os_n_pending_writes;
1806
	export_vars.innodb_data_pending_fsyncs
1807
		= fil_n_pending_log_flushes
1808
		+ fil_n_pending_tablespace_flushes;
1809
	export_vars.innodb_data_fsyncs = os_n_fsyncs;
1810
	export_vars.innodb_data_read = srv_data_read;
1811
	export_vars.innodb_data_reads = os_n_file_reads;
1812
	export_vars.innodb_data_writes = os_n_file_writes;
1813
	export_vars.innodb_data_written = srv_data_written;
1814
	export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets;
1815
	export_vars.innodb_buffer_pool_write_requests
1816
		= srv_buf_pool_write_requests;
1817
	export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
1818
	export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
1819
	export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
1820
	export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd;
1821
	export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq;
1822
	export_vars.innodb_buffer_pool_pages_data
1823
		= UT_LIST_GET_LEN(buf_pool->LRU);
1824
	export_vars.innodb_buffer_pool_pages_dirty
1825
		= UT_LIST_GET_LEN(buf_pool->flush_list);
1826
	export_vars.innodb_buffer_pool_pages_free
1827
		= UT_LIST_GET_LEN(buf_pool->free);
1828
	export_vars.innodb_buffer_pool_pages_latched
1829
		= buf_get_latched_pages_number();
1830
	export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size;
1831
1832
	export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size
1833
		- UT_LIST_GET_LEN(buf_pool->LRU)
1834
		- UT_LIST_GET_LEN(buf_pool->free);
1835
	export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1836
	export_vars.innodb_log_waits = srv_log_waits;
1837
	export_vars.innodb_os_log_written = srv_os_log_written;
1838
	export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1839
	export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1840
	export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
1841
	export_vars.innodb_log_write_requests = srv_log_write_requests;
1842
	export_vars.innodb_log_writes = srv_log_writes;
1843
	export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
1844
	export_vars.innodb_dblwr_writes = srv_dblwr_writes;
1845
	export_vars.innodb_pages_created = buf_pool->n_pages_created;
1846
	export_vars.innodb_pages_read = buf_pool->n_pages_read;
1847
	export_vars.innodb_pages_written = buf_pool->n_pages_written;
1848
	export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
1849
	export_vars.innodb_row_lock_current_waits
1850
		= srv_n_lock_wait_current_count;
1851
	export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
1852
	if (srv_n_lock_wait_count > 0) {
1853
		export_vars.innodb_row_lock_time_avg = (ulint)
1854
			(srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
1855
	} else {
1856
		export_vars.innodb_row_lock_time_avg = 0;
1857
	}
1858
	export_vars.innodb_row_lock_time_max
1859
		= srv_n_lock_max_wait_time / 1000;
1860
	export_vars.innodb_rows_read = srv_n_rows_read;
1861
	export_vars.innodb_rows_inserted = srv_n_rows_inserted;
1862
	export_vars.innodb_rows_updated = srv_n_rows_updated;
1863
	export_vars.innodb_rows_deleted = srv_n_rows_deleted;
1864
1865
	mutex_exit(&srv_innodb_monitor_mutex);
1866
}
1867
1868
/*************************************************************************
1869
A thread which wakes up threads whose lock wait may have lasted too long.
1870
This also prints the info output by various InnoDB monitors. */
1871
1872
os_thread_ret_t
1873
srv_lock_timeout_and_monitor_thread(
1874
/*================================*/
1875
			/* out: a dummy parameter */
1876
	void*	arg __attribute__((unused)))
1877
			/* in: a dummy parameter required by
1878
			os_thread_create */
1879
{
1880
	srv_slot_t*	slot;
1881
	double		time_elapsed;
1882
	time_t		current_time;
1883
	time_t		last_table_monitor_time;
1884
	time_t		last_tablespace_monitor_time;
1885
	time_t		last_monitor_time;
1886
	ibool		some_waits;
1887
	double		wait_time;
1888
	ulint		i;
1889
1890
#ifdef UNIV_DEBUG_THREAD_CREATION
1891
	fprintf(stderr, "Lock timeout thread starts, id %lu\n",
1892
		os_thread_pf(os_thread_get_curr_id()));
1893
#endif
1894
	UT_NOT_USED(arg);
1895
	srv_last_monitor_time = time(NULL);
1896
	last_table_monitor_time = time(NULL);
1897
	last_tablespace_monitor_time = time(NULL);
1898
	last_monitor_time = time(NULL);
1899
loop:
1900
	srv_lock_timeout_and_monitor_active = TRUE;
1901
1902
	/* When someone is waiting for a lock, we wake up every second
1903
	and check if a timeout has passed for a lock wait */
1904
1905
	os_thread_sleep(1000000);
1906
1907
	/* In case mutex_exit is not a memory barrier, it is
1908
	theoretically possible some threads are left waiting though
1909
	the semaphore is already released. Wake up those threads: */
1910
1911
	sync_arr_wake_threads_if_sema_free();
1912
1913
	current_time = time(NULL);
1914
1915
	time_elapsed = difftime(current_time, last_monitor_time);
1916
1917
	if (time_elapsed > 15) {
1918
		last_monitor_time = time(NULL);
1919
1920
		if (srv_print_innodb_monitor) {
1921
			srv_printf_innodb_monitor(stderr, NULL, NULL);
1922
		}
1923
1924
		if (srv_innodb_status) {
1925
			mutex_enter(&srv_monitor_file_mutex);
1926
			rewind(srv_monitor_file);
1927
			srv_printf_innodb_monitor(srv_monitor_file, NULL,
1928
						  NULL);
1929
			os_file_set_eof(srv_monitor_file);
1930
			mutex_exit(&srv_monitor_file_mutex);
1931
		}
1932
1933
		if (srv_print_innodb_tablespace_monitor
1934
		    && difftime(current_time,
1935
				last_tablespace_monitor_time) > 60) {
1936
			last_tablespace_monitor_time = time(NULL);
1937
1938
			fputs("========================"
1939
			      "========================\n",
1940
			      stderr);
1941
1942
			ut_print_timestamp(stderr);
1943
1944
			fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
1945
			      "========================"
1946
			      "========================\n",
1947
			      stderr);
1948
1949
			fsp_print(0);
1950
			fputs("Validating tablespace\n", stderr);
1951
			fsp_validate(0);
1952
			fputs("Validation ok\n"
1953
			      "---------------------------------------\n"
1954
			      "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
1955
			      "=======================================\n",
1956
			      stderr);
1957
		}
1958
1959
		if (srv_print_innodb_table_monitor
1960
		    && difftime(current_time, last_table_monitor_time) > 60) {
1961
1962
			last_table_monitor_time = time(NULL);
1963
1964
			fputs("===========================================\n",
1965
			      stderr);
1966
1967
			ut_print_timestamp(stderr);
1968
1969
			fputs(" INNODB TABLE MONITOR OUTPUT\n"
1970
			      "===========================================\n",
1971
			      stderr);
1972
			dict_print();
1973
1974
			fputs("-----------------------------------\n"
1975
			      "END OF INNODB TABLE MONITOR OUTPUT\n"
1976
			      "==================================\n",
1977
			      stderr);
1978
		}
1979
	}
1980
1981
	mutex_enter(&kernel_mutex);
1982
1983
	some_waits = FALSE;
1984
1985
	/* Check of all slots if a thread is waiting there, and if it
1986
	has exceeded the time limit */
1987
1988
	for (i = 0; i < OS_THREAD_MAX_N; i++) {
1989
1990
		slot = srv_mysql_table + i;
1991
1992
		if (slot->in_use) {
1993
			some_waits = TRUE;
1994
1995
			wait_time = ut_difftime(ut_time(), slot->suspend_time);
1996
1997
			if (srv_lock_wait_timeout < 100000000
1998
			    && (wait_time > (double) srv_lock_wait_timeout
1999
				|| wait_time < 0)) {
2000
2001
				/* Timeout exceeded or a wrap-around in system
2002
				time counter: cancel the lock request queued
2003
				by the transaction and release possible
2004
				other transactions waiting behind; it is
2005
				possible that the lock has already been
2006
				granted: in that case do nothing */
2007
2008
				if (thr_get_trx(slot->thr)->wait_lock) {
2009
					lock_cancel_waiting_and_release(
2010
						thr_get_trx(slot->thr)
2011
						->wait_lock);
2012
				}
2013
			}
2014
		}
2015
	}
2016
2017
	os_event_reset(srv_lock_timeout_thread_event);
2018
2019
	mutex_exit(&kernel_mutex);
2020
2021
	if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2022
		goto exit_func;
2023
	}
2024
2025
	if (some_waits || srv_print_innodb_monitor
2026
	    || srv_print_innodb_lock_monitor
2027
	    || srv_print_innodb_tablespace_monitor
2028
	    || srv_print_innodb_table_monitor) {
2029
		goto loop;
2030
	}
2031
2032
	/* No one was waiting for a lock and no monitor was active:
2033
	suspend this thread */
2034
2035
	srv_lock_timeout_and_monitor_active = FALSE;
2036
2037
#if 0
2038
	/* The following synchronisation is disabled, since
2039
	the InnoDB monitor output is to be updated every 15 seconds. */
2040
	os_event_wait(srv_lock_timeout_thread_event);
2041
#endif
2042
	goto loop;
2043
2044
exit_func:
2045
	srv_lock_timeout_and_monitor_active = FALSE;
2046
2047
	/* We count the number of threads in os_thread_exit(). A created
2048
	thread should always use that to exit and not use return() to exit. */
2049
2050
	os_thread_exit(NULL);
2051
2052
	OS_THREAD_DUMMY_RETURN;
2053
}
2054
2055
/*************************************************************************
2056
A thread which prints warnings about semaphore waits which have lasted
2057
too long. These can be used to track bugs which cause hangs. */
2058
2059
os_thread_ret_t
2060
srv_error_monitor_thread(
2061
/*=====================*/
2062
			/* out: a dummy parameter */
2063
	void*	arg __attribute__((unused)))
2064
			/* in: a dummy parameter required by
2065
			os_thread_create */
2066
{
2067
	/* number of successive fatal timeouts observed */
2068
	ulint	fatal_cnt	= 0;
2069
	dulint	old_lsn;
2070
	dulint	new_lsn;
2071
2072
	old_lsn = srv_start_lsn;
2073
2074
#ifdef UNIV_DEBUG_THREAD_CREATION
2075
	fprintf(stderr, "Error monitor thread starts, id %lu\n",
2076
		os_thread_pf(os_thread_get_curr_id()));
2077
#endif
2078
loop:
2079
	srv_error_monitor_active = TRUE;
2080
2081
	/* Try to track a strange bug reported by Harald Fuchs and others,
2082
	where the lsn seems to decrease at times */
2083
2084
	new_lsn = log_get_lsn();
2085
2086
	if (ut_dulint_cmp(new_lsn, old_lsn) < 0) {
2087
		ut_print_timestamp(stderr);
2088
		fprintf(stderr,
2089
			"  InnoDB: Error: old log sequence number %lu %lu"
2090
			" was greater\n"
2091
			"InnoDB: than the new log sequence number %lu %lu!\n"
2092
			"InnoDB: Please submit a bug report"
2093
			" to http://bugs.mysql.com\n",
2094
			(ulong) ut_dulint_get_high(old_lsn),
2095
			(ulong) ut_dulint_get_low(old_lsn),
2096
			(ulong) ut_dulint_get_high(new_lsn),
2097
			(ulong) ut_dulint_get_low(new_lsn));
2098
	}
2099
2100
	old_lsn = new_lsn;
2101
2102
	if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2103
		/* We referesh InnoDB Monitor values so that averages are
2104
		printed from at most 60 last seconds */
2105
2106
		srv_refresh_innodb_monitor_stats();
2107
	}
2108
2109
	if (sync_array_print_long_waits()) {
2110
		fatal_cnt++;
2111
		if (fatal_cnt > 5) {
2112
2113
			fprintf(stderr,
2114
				"InnoDB: Error: semaphore wait has lasted"
2115
				" > %lu seconds\n"
2116
				"InnoDB: We intentionally crash the server,"
2117
				" because it appears to be hung.\n",
2118
				(ulong) srv_fatal_semaphore_wait_threshold);
2119
2120
			ut_error;
2121
		}
2122
	} else {
2123
		fatal_cnt = 0;
2124
	}
2125
2126
	/* Flush stderr so that a database user gets the output
2127
	to possible MySQL error file */
2128
2129
	fflush(stderr);
2130
2131
	os_thread_sleep(2000000);
2132
2133
	if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2134
2135
		goto loop;
2136
	}
2137
2138
	srv_error_monitor_active = FALSE;
2139
2140
	/* We count the number of threads in os_thread_exit(). A created
2141
	thread should always use that to exit and not use return() to exit. */
2142
2143
	os_thread_exit(NULL);
2144
2145
	OS_THREAD_DUMMY_RETURN;
2146
}
2147
2148
/***********************************************************************
2149
Tells the InnoDB server that there has been activity in the database
2150
and wakes up the master thread if it is suspended (not sleeping). Used
2151
in the MySQL interface. Note that there is a small chance that the master
2152
thread stays suspended (we do not protect our operation with the kernel
2153
mutex, for performace reasons). */
2154
2155
void
2156
srv_active_wake_master_thread(void)
2157
/*===============================*/
2158
{
2159
	srv_activity_count++;
2160
2161
	if (srv_n_threads_active[SRV_MASTER] == 0) {
2162
2163
		mutex_enter(&kernel_mutex);
2164
2165
		srv_release_threads(SRV_MASTER, 1);
2166
2167
		mutex_exit(&kernel_mutex);
2168
	}
2169
}
2170
2171
/***********************************************************************
2172
Wakes up the master thread if it is suspended or being suspended. */
2173
2174
void
2175
srv_wake_master_thread(void)
2176
/*========================*/
2177
{
2178
	srv_activity_count++;
2179
2180
	mutex_enter(&kernel_mutex);
2181
2182
	srv_release_threads(SRV_MASTER, 1);
2183
2184
	mutex_exit(&kernel_mutex);
2185
}
2186
2187
/*************************************************************************
2188
The master thread controlling the server. */
2189
2190
os_thread_ret_t
2191
srv_master_thread(
2192
/*==============*/
2193
			/* out: a dummy parameter */
2194
	void*	arg __attribute__((unused)))
2195
			/* in: a dummy parameter required by
2196
			os_thread_create */
2197
{
2198
	os_event_t	event;
2199
	time_t		last_flush_time;
2200
	time_t		current_time;
2201
	ulint		old_activity_count;
2202
	ulint		n_pages_purged;
2203
	ulint		n_bytes_merged;
2204
	ulint		n_pages_flushed;
2205
	ulint		n_bytes_archived;
2206
	ulint		n_tables_to_drop;
2207
	ulint		n_ios;
2208
	ulint		n_ios_old;
2209
	ulint		n_ios_very_old;
2210
	ulint		n_pend_ios;
2211
	ibool		skip_sleep	= FALSE;
2212
	ulint		i;
2213
2214
#ifdef UNIV_DEBUG_THREAD_CREATION
2215
	fprintf(stderr, "Master thread starts, id %lu\n",
2216
		os_thread_pf(os_thread_get_curr_id()));
2217
#endif
2218
	srv_main_thread_process_no = os_proc_get_number();
2219
	srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2220
2221
	srv_table_reserve_slot(SRV_MASTER);
2222
2223
	mutex_enter(&kernel_mutex);
2224
2225
	srv_n_threads_active[SRV_MASTER]++;
2226
2227
	mutex_exit(&kernel_mutex);
2228
2229
loop:
2230
	/*****************************************************************/
2231
	/* ---- When there is database activity by users, we cycle in this
2232
	loop */
2233
2234
	srv_main_thread_op_info = "reserving kernel mutex";
2235
2236
	n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read
2237
		+ buf_pool->n_pages_written;
2238
	mutex_enter(&kernel_mutex);
2239
2240
	/* Store the user activity counter at the start of this loop */
2241
	old_activity_count = srv_activity_count;
2242
2243
	mutex_exit(&kernel_mutex);
2244
2245
	if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2246
2247
		goto suspend_thread;
2248
	}
2249
2250
	/* ---- We run the following loop approximately once per second
2251
	when there is database activity */
2252
2253
	skip_sleep = FALSE;
2254
2255
	for (i = 0; i < 10; i++) {
2256
		n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
2257
			+ buf_pool->n_pages_written;
2258
		srv_main_thread_op_info = "sleeping";
2259
2260
		if (!skip_sleep) {
2261
2262
			os_thread_sleep(1000000);
2263
		}
2264
2265
		skip_sleep = FALSE;
2266
2267
		/* ALTER TABLE in MySQL requires on Unix that the table handler
2268
		can drop tables lazily after there no longer are SELECT
2269
		queries to them. */
2270
2271
		srv_main_thread_op_info = "doing background drop tables";
2272
2273
		row_drop_tables_for_mysql_in_background();
2274
2275
		srv_main_thread_op_info = "";
2276
2277
		if (srv_fast_shutdown && srv_shutdown_state > 0) {
2278
2279
			goto background_loop;
2280
		}
2281
2282
		/* We flush the log once in a second even if no commit
2283
		is issued or the we have specified in my.cnf no flush
2284
		at transaction commit */
2285
2286
		srv_main_thread_op_info = "flushing log";
2287
		log_buffer_flush_to_disk();
2288
2289
		srv_main_thread_op_info = "making checkpoint";
2290
		log_free_check();
2291
2292
		/* If there were less than 5 i/os during the
2293
		one second sleep, we assume that there is free
2294
		disk i/o capacity available, and it makes sense to
2295
		do an insert buffer merge. */
2296
2297
		n_pend_ios = buf_get_n_pending_ios()
2298
			+ log_sys->n_pending_writes;
2299
		n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
2300
			+ buf_pool->n_pages_written;
2301
		if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
2302
			srv_main_thread_op_info = "doing insert buffer merge";
2303
			ibuf_contract_for_n_pages(
2304
				TRUE, srv_insert_buffer_batch_size / 4);
2305
2306
			srv_main_thread_op_info = "flushing log";
2307
2308
			log_buffer_flush_to_disk();
2309
		}
2310
2311
		if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
2312
				  > srv_max_buf_pool_modified_pct)) {
2313
2314
			/* Try to keep the number of modified pages in the
2315
			buffer pool under the limit wished by the user */
2316
2317
			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
2318
							  ut_dulint_max);
2319
2320
			/* If we had to do the flush, it may have taken
2321
			even more than 1 second, and also, there may be more
2322
			to flush. Do not sleep 1 second during the next
2323
			iteration of this loop. */
2324
2325
			skip_sleep = TRUE;
2326
		}
2327
2328
		if (srv_activity_count == old_activity_count) {
2329
2330
			/* There is no user activity at the moment, go to
2331
			the background loop */
2332
2333
			goto background_loop;
2334
		}
2335
	}
2336
2337
	/* ---- We perform the following code approximately once per
2338
	10 seconds when there is database activity */
2339
2340
#ifdef MEM_PERIODIC_CHECK
2341
	/* Check magic numbers of every allocated mem block once in 10
2342
	seconds */
2343
	mem_validate_all_blocks();
2344
#endif
2345
	/* If there were less than 200 i/os during the 10 second period,
2346
	we assume that there is free disk i/o capacity available, and it
2347
	makes sense to flush 100 pages. */
2348
2349
	n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
2350
	n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
2351
		+ buf_pool->n_pages_written;
2352
	if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
2353
2354
		srv_main_thread_op_info = "flushing buffer pool pages";
2355
		buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
2356
2357
		srv_main_thread_op_info = "flushing log";
2358
		log_buffer_flush_to_disk();
2359
	}
2360
2361
	/* We run a batch of insert buffer merge every 10 seconds,
2362
	even if the server were active */
2363
2364
	srv_main_thread_op_info = "doing insert buffer merge";
2365
	ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
2366
2367
	srv_main_thread_op_info = "flushing log";
2368
	log_buffer_flush_to_disk();
2369
2370
	/* We run a full purge every 10 seconds, even if the server
2371
	were active */
2372
2373
	n_pages_purged = 1;
2374
2375
	last_flush_time = time(NULL);
2376
2377
	while (n_pages_purged) {
2378
2379
		if (srv_fast_shutdown && srv_shutdown_state > 0) {
2380
2381
			goto background_loop;
2382
		}
2383
2384
		srv_main_thread_op_info = "purging";
2385
		n_pages_purged = trx_purge();
2386
2387
		current_time = time(NULL);
2388
2389
		if (difftime(current_time, last_flush_time) > 1) {
2390
			srv_main_thread_op_info = "flushing log";
2391
2392
			log_buffer_flush_to_disk();
2393
			last_flush_time = current_time;
2394
		}
2395
	}
2396
2397
	srv_main_thread_op_info = "flushing buffer pool pages";
2398
2399
	/* Flush a few oldest pages to make a new checkpoint younger */
2400
2401
	if (buf_get_modified_ratio_pct() > 70) {
2402
2403
		/* If there are lots of modified pages in the buffer pool
2404
		(> 70 %), we assume we can afford reserving the disk(s) for
2405
		the time it requires to flush 100 pages */
2406
2407
		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
2408
						  ut_dulint_max);
2409
	} else {
2410
		/* Otherwise, we only flush a small number of pages so that
2411
		we do not unnecessarily use much disk i/o capacity from
2412
		other work */
2413
2414
		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
2415
						  ut_dulint_max);
2416
	}
2417
2418
	srv_main_thread_op_info = "making checkpoint";
2419
2420
	/* Make a new checkpoint about once in 10 seconds */
2421
2422
	log_checkpoint(TRUE, FALSE);
2423
2424
	srv_main_thread_op_info = "reserving kernel mutex";
2425
2426
	mutex_enter(&kernel_mutex);
2427
2428
	/* ---- When there is database activity, we jump from here back to
2429
	the start of loop */
2430
2431
	if (srv_activity_count != old_activity_count) {
2432
		mutex_exit(&kernel_mutex);
2433
		goto loop;
2434
	}
2435
2436
	mutex_exit(&kernel_mutex);
2437
2438
	/* If the database is quiet, we enter the background loop */
2439
2440
	/*****************************************************************/
2441
background_loop:
2442
	/* ---- In this loop we run background operations when the server
2443
	is quiet from user activity. Also in the case of a shutdown, we
2444
	loop here, flushing the buffer pool to the data files. */
2445
2446
	/* The server has been quiet for a while: start running background
2447
	operations */
2448
2449
	srv_main_thread_op_info = "doing background drop tables";
2450
2451
	n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2452
2453
	if (n_tables_to_drop > 0) {
2454
		/* Do not monopolize the CPU even if there are tables waiting
2455
		in the background drop queue. (It is essentially a bug if
2456
		MySQL tries to drop a table while there are still open handles
2457
		to it and we had to put it to the background drop queue.) */
2458
2459
		os_thread_sleep(100000);
2460
	}
2461
2462
	srv_main_thread_op_info = "purging";
2463
2464
	/* Run a full purge */
2465
2466
	n_pages_purged = 1;
2467
2468
	last_flush_time = time(NULL);
2469
2470
	while (n_pages_purged) {
2471
		if (srv_fast_shutdown && srv_shutdown_state > 0) {
2472
2473
			break;
2474
		}
2475
2476
		srv_main_thread_op_info = "purging";
2477
		n_pages_purged = trx_purge();
2478
2479
		current_time = time(NULL);
2480
2481
		if (difftime(current_time, last_flush_time) > 1) {
2482
			srv_main_thread_op_info = "flushing log";
2483
2484
			log_buffer_flush_to_disk();
2485
			last_flush_time = current_time;
2486
		}
2487
	}
2488
2489
	srv_main_thread_op_info = "reserving kernel mutex";
2490
2491
	mutex_enter(&kernel_mutex);
2492
	if (srv_activity_count != old_activity_count) {
2493
		mutex_exit(&kernel_mutex);
2494
		goto loop;
2495
	}
2496
	mutex_exit(&kernel_mutex);
2497
2498
	srv_main_thread_op_info = "doing insert buffer merge";
2499
2500
	if (srv_fast_shutdown && srv_shutdown_state > 0) {
2501
		n_bytes_merged = 0;
2502
	} else {
2503
		n_bytes_merged = ibuf_contract_for_n_pages(
2504
			TRUE, srv_insert_buffer_batch_size);
2505
	}
2506
2507
	srv_main_thread_op_info = "reserving kernel mutex";
2508
2509
	mutex_enter(&kernel_mutex);
2510
	if (srv_activity_count != old_activity_count) {
2511
		mutex_exit(&kernel_mutex);
2512
		goto loop;
2513
	}
2514
	mutex_exit(&kernel_mutex);
2515
2516
flush_loop:
2517
	srv_main_thread_op_info = "flushing buffer pool pages";
2518
2519
	if (srv_fast_shutdown < 2) {
2520
		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
2521
						  ut_dulint_max);
2522
	} else {
2523
		/* In the fastest shutdown we do not flush the buffer pool
2524
		to data files: we set n_pages_flushed to 0 artificially. */
2525
2526
		n_pages_flushed = 0;
2527
	}
2528
2529
	srv_main_thread_op_info = "reserving kernel mutex";
2530
2531
	mutex_enter(&kernel_mutex);
2532
	if (srv_activity_count != old_activity_count) {
2533
		mutex_exit(&kernel_mutex);
2534
		goto loop;
2535
	}
2536
	mutex_exit(&kernel_mutex);
2537
2538
	srv_main_thread_op_info = "waiting for buffer pool flush to end";
2539
	buf_flush_wait_batch_end(BUF_FLUSH_LIST);
2540
2541
	srv_main_thread_op_info = "flushing log";
2542
2543
	log_buffer_flush_to_disk();
2544
2545
	srv_main_thread_op_info = "making checkpoint";
2546
2547
	log_checkpoint(TRUE, FALSE);
2548
2549
	if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
2550
2551
		/* Try to keep the number of modified pages in the
2552
		buffer pool under the limit wished by the user */
2553
2554
		goto flush_loop;
2555
	}
2556
2557
	srv_main_thread_op_info = "reserving kernel mutex";
2558
2559
	mutex_enter(&kernel_mutex);
2560
	if (srv_activity_count != old_activity_count) {
2561
		mutex_exit(&kernel_mutex);
2562
		goto loop;
2563
	}
2564
	mutex_exit(&kernel_mutex);
2565
	/*
2566
	srv_main_thread_op_info = "archiving log (if log archive is on)";
2567
2568
	log_archive_do(FALSE, &n_bytes_archived);
2569
	*/
2570
	n_bytes_archived = 0;
2571
2572
	/* Keep looping in the background loop if still work to do */
2573
2574
	if (srv_fast_shutdown && srv_shutdown_state > 0) {
2575
		if (n_tables_to_drop + n_pages_flushed
2576
		    + n_bytes_archived != 0) {
2577
2578
			/* If we are doing a fast shutdown (= the default)
2579
			we do not do purge or insert buffer merge. But we
2580
			flush the buffer pool completely to disk.
2581
			In a 'very fast' shutdown we do not flush the buffer
2582
			pool to data files: we have set n_pages_flushed to
2583
			0 artificially. */
2584
2585
			goto background_loop;
2586
		}
2587
	} else if (n_tables_to_drop
2588
		   + n_pages_purged + n_bytes_merged + n_pages_flushed
2589
		   + n_bytes_archived != 0) {
2590
		/* In a 'slow' shutdown we run purge and the insert buffer
2591
		merge to completion */
2592
2593
		goto background_loop;
2594
	}
2595
2596
	/* There is no work for background operations either: suspend
2597
	master thread to wait for more server activity */
2598
2599
suspend_thread:
2600
	srv_main_thread_op_info = "suspending";
2601
2602
	mutex_enter(&kernel_mutex);
2603
2604
	if (row_get_background_drop_list_len_low() > 0) {
2605
		mutex_exit(&kernel_mutex);
2606
2607
		goto loop;
2608
	}
2609
2610
	event = srv_suspend_thread();
2611
2612
	mutex_exit(&kernel_mutex);
2613
2614
	/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2615
	waits for database activity to die down when converting < 4.1.x
2616
	databases, and relies on this string being exactly as it is. InnoDB
2617
	manual also mentions this string in several places. */
2618
	srv_main_thread_op_info = "waiting for server activity";
2619
2620
	os_event_wait(event);
2621
2622
	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
2623
		/* This is only extra safety, the thread should exit
2624
		already when the event wait ends */
2625
2626
		os_thread_exit(NULL);
2627
	}
2628
2629
	/* When there is user activity, InnoDB will set the event and the
2630
	main thread goes back to loop. */
2631
2632
	goto loop;
2633
2634
	OS_THREAD_DUMMY_RETURN;	/* Not reached, avoid compiler warning */
2635
}
2636
#endif /* !UNIV_HOTBACKUP */