1
/*****************************************************************************
3
Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4
Copyright (C) 2008, 2009 Google Inc.
5
Copyright (C) 2009, Percona Inc.
7
Portions of this file contain modifications contributed and copyrighted by
8
Google, Inc. Those modifications are gratefully acknowledged and are described
9
briefly in the InnoDB documentation. The contributions by Google are
10
incorporated with their permission, and subject to the conditions contained in
11
the file COPYING.Google.
13
Portions of this file contain modifications contributed and copyrighted
14
by Percona Inc.. Those modifications are
15
gratefully acknowledged and are described briefly in the InnoDB
16
documentation. The contributions by Percona Inc. are incorporated with
17
their permission, and subject to the conditions contained in the file
20
This program is free software; you can redistribute it and/or modify it under
21
the terms of the GNU General Public License as published by the Free Software
22
Foundation; version 2 of the License.
24
This program is distributed in the hope that it will be useful, but WITHOUT
25
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
28
You should have received a copy of the GNU General Public License along with
29
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
30
St, Fifth Floor, Boston, MA 02110-1301 USA
32
*****************************************************************************/
34
/**************************************************//**
36
The database server main program
38
NOTE: SQL Server 7 uses something which the documentation
39
calls user mode scheduled threads (UMS threads). One such
40
thread is usually allocated per processor. Win32
41
documentation does not know any UMS threads, which suggests
42
that the concept is internal to SQL Server 7. It may mean that
43
SQL Server 7 does all the scheduling of threads itself, even
44
in i/o waits. We should maybe modify InnoDB to use the same
45
technique, because thread switches within NT may be too slow.
47
SQL Server 7 also mentions fibers, which are cooperatively
48
scheduled threads. They can boost performance by 5 %,
49
according to the Delaney and Soukup's book.
51
Windows 2000 will have something called thread pooling
52
(see msdn website), which we could possibly use.
54
Another possibility could be to use some very fast user space
55
thread library. This might confuse NT though.
57
Created 10/8/1995 Heikki Tuuri
58
*******************************************************/
68
#include "sync0sync.h"
72
#include "pars0pars.h"
74
#include "lock0lock.h"
75
#include "trx0purge.h"
76
#include "ibuf0ibuf.h"
80
#include "dict0load.h"
81
#include "dict0boot.h"
82
#include "srv0start.h"
83
#include "row0mysql.h"
84
#include "ha_prototypes.h"
86
#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
88
/* This is set to TRUE if the MySQL user has set it in MySQL; currently
89
affects only FOREIGN KEY definition parsing */
90
UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
92
/* The following counter is incremented whenever there is some user activity
94
UNIV_INTERN ulint srv_activity_count = 0;
96
/* The following is the maximum allowed duration of a lock wait. */
97
UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
99
/* How much data manipulation language (DML) statements need to be delayed,
100
in microseconds, in order to reduce the lagging of the purge thread. */
101
UNIV_INTERN ulint srv_dml_needed_delay = 0;
103
UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
104
UNIV_INTERN ibool srv_monitor_active = FALSE;
105
UNIV_INTERN ibool srv_error_monitor_active = FALSE;
107
UNIV_INTERN const char* srv_main_thread_op_info = "";
109
/* Server parameters which are read from the initfile */
111
/* The following three are dir paths which are catenated before file
112
names, where the file name itself may also contain a path */
114
UNIV_INTERN char* srv_data_home = NULL;
115
#ifdef UNIV_LOG_ARCHIVE
116
UNIV_INTERN char* srv_arch_dir = NULL;
117
#endif /* UNIV_LOG_ARCHIVE */
119
/** store to its own file each table created by an user; data
120
dictionary tables are in the system tablespace 0 */
121
UNIV_INTERN my_bool srv_file_per_table;
122
/** The file format to use on new *.ibd files. */
123
UNIV_INTERN ulint srv_file_format = 0;
124
/** Whether to check file format during startup. A value of
125
DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
126
set it to the highest format we support. */
127
UNIV_INTERN ulint srv_max_file_format_at_startup = DICT_TF_FORMAT_MAX;
129
#if DICT_TF_FORMAT_51
130
# error "DICT_TF_FORMAT_51 must be 0!"
132
/** Place locks to records only i.e. do not use next-key locking except
133
on duplicate key checking and foreign key checking */
134
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
136
/* If this flag is TRUE, then we will use the native aio of the
137
OS (provided we compiled Innobase with it in), otherwise we will
138
use simulated aio we build below with threads.
139
Currently we support native aio on windows and linux */
140
UNIV_INTERN my_bool srv_use_native_aio = TRUE;
143
/* Windows native condition variables. We use runtime loading / function
144
pointers, because they are not available on Windows Server 2003 and
147
We use condition for events on Windows if possible, even if os_event
148
resembles Windows kernel event object well API-wise. The reason is
149
performance, kernel objects are heavyweights and WaitForSingleObject() is a
150
performance killer causing calling thread to context switch. Besides, Innodb
151
is preallocating large number (often millions) of os_events. With kernel event
152
objects it takes a big chunk out of non-paged pool, which is better suited
153
for tasks like IO than for storing idle event objects. */
154
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
157
UNIV_INTERN ulint srv_n_data_files = 0;
158
UNIV_INTERN char** srv_data_file_names = NULL;
159
/* size in database pages */
160
UNIV_INTERN ulint* srv_data_file_sizes = NULL;
162
/* if TRUE, then we auto-extend the last data file */
163
UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
164
/* if != 0, this tells the max size auto-extending may increase the
165
last data file size */
166
UNIV_INTERN ulint srv_last_file_size_max = 0;
167
/* If the last data file is auto-extended, we add this
168
many pages to it at a time */
169
UNIV_INTERN unsigned int srv_auto_extend_increment = 8;
170
UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
172
/* If the following is TRUE we do not allow inserts etc. This protects
173
the user from forgetting the 'newraw' keyword to my.cnf */
175
UNIV_INTERN ibool srv_created_new_raw = FALSE;
177
UNIV_INTERN char** srv_log_group_home_dirs = NULL;
179
UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
180
UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
181
/* size in database pages */
182
UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
183
/* size in database pages */
184
UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
185
UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
187
/* Try to flush dirty pages so as to avoid IO bursts at
189
UNIV_INTERN bool srv_adaptive_flushing = TRUE;
191
/** Maximum number of times allowed to conditionally acquire
192
mutex before switching to blocking wait on the mutex */
193
#define MAX_MUTEX_NOWAIT 20
195
/** Check whether the number of failed nonblocking mutex
196
acquisition attempts exceeds maximum allowed value. If so,
197
srv_printf_innodb_monitor() will request mutex acquisition
198
with mutex_enter(), which will wait until it gets the mutex. */
199
#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
201
/** The sort order table of the MySQL latin1_swedish_ci character set
203
#if defined(BUILD_DRIZZLE)
204
const byte srv_latin1_ordering[256] /* The sort order table of the latin1
205
character set. The following table is
206
the MySQL order as of Feb 10th, 2002 */
208
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
209
, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
210
, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
211
, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
212
, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
213
, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
214
, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
215
, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
216
, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
217
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
218
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
219
, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
220
, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
221
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
222
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
223
, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
224
, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
225
, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
226
, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
227
, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
228
, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
229
, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
230
, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
231
, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
232
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
233
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
234
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
235
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
236
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
237
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
238
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
239
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
242
UNIV_INTERN const byte* srv_latin1_ordering;
243
#endif /* BUILD_DRIZZLE */
246
/* use os/external memory allocator */
247
UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
248
/* requested size in kilobytes */
249
UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
250
/* requested number of buffer pool instances */
251
UNIV_INTERN ulint srv_buf_pool_instances = 1;
252
/* previously requested size */
253
UNIV_INTERN ulint srv_buf_pool_old_size;
254
/* current size in kilobytes */
255
UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
257
UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
258
UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
260
/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
262
UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
263
UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
264
UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
266
/* User settable value of the number of pages that must be present
267
in the buffer cache and accessed sequentially for InnoDB to trigger a
268
readahead request. */
269
UNIV_INTERN ulong srv_read_ahead_threshold = 56;
271
#ifdef UNIV_LOG_ARCHIVE
272
UNIV_INTERN ibool srv_log_archive_on = FALSE;
273
UNIV_INTERN ibool srv_archive_recovery = 0;
274
UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
275
#endif /* UNIV_LOG_ARCHIVE */
277
/* This parameter is used to throttle the number of insert buffers that are
278
merged in a batch. By increasing this parameter on a faster disk you can
279
possibly reduce the number of I/O operations performed to complete the
280
merge operation. The value of this parameter is used as is by the
281
background loop when the system is idle (low load), on a busy system
282
the parameter is scaled down by a factor of 4, this is to avoid putting
283
a heavier load on the I/O sub system. */
285
UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
287
UNIV_INTERN char* srv_file_flush_method_str = NULL;
288
UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
289
UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
291
UNIV_INTERN ulint srv_max_n_open_files = 300;
293
/* Number of IO operations per second the server can do */
294
UNIV_INTERN ulong srv_io_capacity = 200;
296
/* The InnoDB main thread tries to keep the ratio of modified pages
297
in the buffer pool to all database pages in the buffer pool smaller than
298
the following number. But it is not guaranteed that the value stays below
299
that during a time of heavy update/insert activity. */
301
UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
303
/* the number of purge threads to use from the worker pool (currently 0 or 1).*/
304
UNIV_INTERN ulong srv_n_purge_threads = 0;
306
/* the number of records to purge in one batch */
307
UNIV_INTERN ulong srv_purge_batch_size = 20;
309
/* variable counts amount of data read in total (in bytes) */
310
UNIV_INTERN ulint srv_data_read = 0;
312
/* here we count the amount of data written in total (in bytes) */
313
UNIV_INTERN ulint srv_data_written = 0;
315
/* the number of the log write requests done */
316
UNIV_INTERN ulint srv_log_write_requests = 0;
318
/* the number of physical writes to the log performed */
319
UNIV_INTERN ulint srv_log_writes = 0;
321
/* amount of data written to the log files in bytes */
322
UNIV_INTERN ulint srv_os_log_written = 0;
324
/* amount of writes being done to the log files */
325
UNIV_INTERN ulint srv_os_log_pending_writes = 0;
327
/* we increase this counter, when there we don't have enough space in the
328
log buffer and have to flush it */
329
UNIV_INTERN ulint srv_log_waits = 0;
331
/* this variable counts the amount of times, when the doublewrite buffer
333
UNIV_INTERN ulint srv_dblwr_writes = 0;
335
/* here we store the number of pages that have been flushed to the
336
doublewrite buffer */
337
UNIV_INTERN ulint srv_dblwr_pages_written = 0;
339
/* in this variable we store the number of write requests issued */
340
UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
342
/* here we store the number of times when we had to wait for a free page
343
in the buffer pool. It happens when the buffer pool is full and we need
344
to make a flush, in order to be able to read or create a page. */
345
UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
347
/* variable to count the number of pages that were written from buffer
349
UNIV_INTERN ulint srv_buf_pool_flushed = 0;
351
/** Number of buffer pool reads that led to the
352
reading of a disk page */
353
UNIV_INTERN ulint srv_buf_pool_reads = 0;
355
/* structure to pass status variables to MySQL */
356
UNIV_INTERN export_struc export_vars;
358
/* If the following is != 0 we do not allow inserts etc. This protects
359
the user from forgetting the innodb_force_recovery keyword to my.cnf */
361
UNIV_INTERN ulint srv_force_recovery = 0;
362
/*-----------------------*/
363
/* We are prepared for a situation that we have this many threads waiting for
364
a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
367
UNIV_INTERN ulint srv_max_n_threads = 0;
369
/* The following controls how many threads we let inside InnoDB concurrently:
370
threads waiting for locks are not counted into the number because otherwise
371
we could get a deadlock. MySQL creates a thread for each user session, and
372
semaphore contention and convoy problems can occur withput this restriction.
373
Value 10 should be good if there are less than 4 processors + 4 disks in the
374
computer. Bigger computers need bigger values. Value 0 will disable the
375
concurrency check. */
377
UNIV_INTERN ulong srv_thread_concurrency = 0;
379
/* this mutex protects srv_conc data structures */
380
UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
381
/* number of transactions that have declared_to_be_inside_innodb set.
382
It used to be a non-error for this value to drop below zero temporarily.
383
This is no longer true. We'll, however, keep the lint datatype to add
384
assertions to catch any corner cases that we may have missed. */
385
UNIV_INTERN lint srv_conc_n_threads = 0;
386
/* number of OS threads waiting in the FIFO for a permission to enter
388
UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
390
typedef struct srv_conc_slot_struct srv_conc_slot_t;
391
struct srv_conc_slot_struct{
392
os_event_t event; /*!< event to wait */
393
ibool reserved; /*!< TRUE if slot
395
ibool wait_ended; /*!< TRUE when another
396
thread has already set
398
thread in this slot is
400
reserved may still be
401
TRUE at that point */
402
UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */
405
/* queue of threads waiting to get in */
406
UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
407
/* array of wait slots */
408
UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
410
/* Number of times a thread is allowed to enter InnoDB within the same
411
SQL query after it has once got the ticket at srv_conc_enter_innodb */
412
#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
413
#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
414
/*-----------------------*/
415
/* If the following is set to 1 then we do not run purge and insert buffer
416
merge to completion before shutdown. If it is set to 2, do not even flush the
417
buffer pool to data files at the shutdown: we effectively 'crash'
418
InnoDB (but lose no committed transactions). */
419
UNIV_INTERN ulint srv_fast_shutdown = 0;
421
/* Generate a innodb_status.<pid> file */
422
UNIV_INTERN ibool srv_innodb_status = FALSE;
424
/* When estimating number of different key values in an index, sample
425
this many index pages */
426
UNIV_INTERN ib_uint64_t srv_stats_sample_pages = 8;
428
UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
429
UNIV_INTERN ibool srv_use_checksums = TRUE;
431
UNIV_INTERN ulong srv_replication_delay = 0;
433
/*-------------------------------------------*/
434
UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
435
UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
436
UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
437
UNIV_INTERN ulong srv_spin_wait_delay = 6;
438
UNIV_INTERN ibool srv_priority_boost = TRUE;
441
UNIV_INTERN ibool srv_print_thread_releases = FALSE;
442
UNIV_INTERN ibool srv_print_lock_waits = FALSE;
443
UNIV_INTERN ibool srv_print_buf_io = FALSE;
444
UNIV_INTERN ibool srv_print_log_io = FALSE;
445
UNIV_INTERN ibool srv_print_latch_waits = FALSE;
446
#endif /* UNIV_DEBUG */
448
UNIV_INTERN ulint srv_n_rows_inserted = 0;
449
UNIV_INTERN ulint srv_n_rows_updated = 0;
450
UNIV_INTERN ulint srv_n_rows_deleted = 0;
451
UNIV_INTERN ulint srv_n_rows_read = 0;
453
static ulint srv_n_rows_inserted_old = 0;
454
static ulint srv_n_rows_updated_old = 0;
455
static ulint srv_n_rows_deleted_old = 0;
456
static ulint srv_n_rows_read_old = 0;
458
UNIV_INTERN ulint srv_n_lock_wait_count = 0;
459
UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
460
UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
461
UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
463
UNIV_INTERN ulint srv_truncated_status_writes = 0;
466
Set the following to 0 if you want InnoDB to write messages on
467
stderr on startup/shutdown
469
UNIV_INTERN ibool srv_print_verbose_log = TRUE;
470
UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
471
UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
472
UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
473
UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
475
/* Array of English strings describing the current state of an
476
i/o handler thread */
478
UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
479
UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
481
UNIV_INTERN time_t srv_last_monitor_time;
483
UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
485
/* Mutex for locking srv_monitor_file */
486
UNIV_INTERN mutex_t srv_monitor_file_mutex;
488
#ifdef UNIV_PFS_MUTEX
489
/* Key to register kernel_mutex with performance schema */
490
UNIV_INTERN mysql_pfs_key_t kernel_mutex_key;
491
/* Key to protect writing the commit_id to the sys header */
492
UNIV_INTERN mysql_pfs_key_t commit_id_mutex_key;
493
/* Key to register srv_innodb_monitor_mutex with performance schema */
494
UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
495
/* Key to register srv_monitor_file_mutex with performance schema */
496
UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
497
/* Key to register srv_dict_tmpfile_mutex with performance schema */
498
UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
499
/* Key to register the mutex with performance schema */
500
UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
501
#endif /* UNIV_PFS_MUTEX */
503
/* Temporary file for innodb monitor output */
504
UNIV_INTERN FILE* srv_monitor_file;
505
/* Mutex for locking srv_dict_tmpfile.
506
This mutex has a very high rank; threads reserving it should not
507
be holding any InnoDB latches. */
508
UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
509
/* Temporary file for output from the data dictionary */
510
UNIV_INTERN FILE* srv_dict_tmpfile;
511
/* Mutex for locking srv_misc_tmpfile.
512
This mutex has a very low rank; threads reserving it should not
513
acquire any further latches or sleep before releasing this one. */
514
UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
515
/* Temporary file for miscellanous diagnostic output */
516
UNIV_INTERN FILE* srv_misc_tmpfile;
518
UNIV_INTERN ulint srv_main_thread_process_no = 0;
519
UNIV_INTERN ulint srv_main_thread_id = 0;
521
/* The following count work done by srv_master_thread. */
523
/* Iterations by the 'once per second' loop. */
524
static ulint srv_main_1_second_loops = 0;
525
/* Calls to sleep by the 'once per second' loop. */
526
static ulint srv_main_sleeps = 0;
527
/* Iterations by the 'once per 10 seconds' loop. */
528
static ulint srv_main_10_second_loops = 0;
529
/* Iterations of the loop bounded by the 'background_loop' label. */
530
static ulint srv_main_background_loops = 0;
531
/* Iterations of the loop bounded by the 'flush_loop' label. */
532
static ulint srv_main_flush_loops = 0;
533
/* Log writes involving flush. */
534
static ulint srv_log_writes_and_flush = 0;
536
/* This is only ever touched by the master thread. It records the
537
time when the last flush of log file has happened. The master
538
thread ensures that we flush the log files at least once per
540
static time_t srv_last_log_flush_time;
542
/* The master thread performs various tasks based on the current
543
state of IO activity and the level of IO utilization is past
544
intervals. Following macros define thresholds for these conditions. */
545
#define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
546
#define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
547
#define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
550
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
551
=========================================
553
There is the following analogue between this database
554
server and an operating system kernel:
556
DB concept equivalent OS concept
557
---------- ---------------------
558
transaction -- process;
560
query thread -- thread;
565
the rollback state -- kill signal delivered to a process;
569
query thread execution:
570
(a) without kernel mutex
571
reserved -- process executing in user mode;
572
(b) with kernel mutex reserved
573
-- process executing in kernel mode;
575
The server is controlled by a master thread which runs at
576
a priority higher than normal, that is, higher than user threads.
577
It sleeps most of the time, and wakes up, say, every 300 milliseconds,
578
to check whether there is anything happening in the server which
579
requires intervention of the master thread. Such situations may be,
580
for example, when flushing of dirty blocks is needed in the buffer
581
pool or old version of database rows have to be cleaned away.
583
The threads which we call user threads serve the queries of
584
the clients and input from the console of the server.
585
They run at normal priority. The server may have several
586
communications endpoints. A dedicated set of user threads waits
587
at each of these endpoints ready to receive a client request.
588
Each request is taken by a single user thread, which then starts
589
processing and, when the result is ready, sends it to the client
590
and returns to wait at the same endpoint the thread started from.
592
So, we do not have dedicated communication threads listening at
593
the endpoints and dealing the jobs to dedicated worker threads.
594
Our architecture saves one thread swithch per request, compared
595
to the solution with dedicated communication threads
596
which amounts to 15 microseconds on 100 MHz Pentium
597
running NT. If the client
598
is communicating over a network, this saving is negligible, but
599
if the client resides in the same machine, maybe in an SMP machine
600
on a different processor from the server thread, the saving
601
can be important as the threads can communicate over shared
602
memory with an overhead of a few microseconds.
604
We may later implement a dedicated communication thread solution
605
for those endpoints which communicate over a network.
607
Our solution with user threads has two problems: for each endpoint
608
there has to be a number of listening threads. If there are many
609
communication endpoints, it may be difficult to set the right number
610
of concurrent threads in the system, as many of the threads
611
may always be waiting at less busy endpoints. Another problem
612
is queuing of the messages, as the server internally does not
613
offer any queue for jobs.
615
Another group of user threads is intended for splitting the
616
queries and processing them in parallel. Let us call these
617
parallel communication threads. These threads are waiting for
618
parallelized tasks, suspended on event semaphores.
620
A single user thread waits for input from the console,
621
like a command to shut the database.
623
Utility threads are a different group of threads which takes
624
care of the buffer pool flushing and other, mainly background
625
operations, in the server.
626
Some of these utility threads always run at a lower than normal
627
priority, so that they are always in background. Some of them
628
may dynamically boost their priority by the pri_adjust function,
629
even to higher than normal priority, if their task becomes urgent.
630
The running of utilities is controlled by high- and low-water marks
631
of urgency. The urgency may be measured by the number of dirty blocks
632
in the buffer pool, in the case of the flush thread, for example.
633
When the high-water mark is exceeded, an utility starts running, until
634
the urgency drops under the low-water mark. Then the utility thread
635
suspend itself to wait for an event. The master thread is
636
responsible of signaling this event when the utility thread is
639
For each individual type of utility, some threads always remain
640
at lower than normal priority. This is because pri_adjust is implemented
641
so that the threads at normal or higher priority control their
642
share of running time by calling sleep. Thus, if the load of the
643
system sudenly drops, these threads cannot necessarily utilize
644
the system fully. The background priority threads make up for this,
645
starting to run when the load drops.
647
When there is no activity in the system, also the master thread
648
suspends itself to wait for an event making
649
the server totally silent. The responsibility to signal this
650
event is on the user thread which again receives a message
653
There is still one complication in our server design. If a
654
background utility thread obtains a resource (e.g., mutex) needed by a user
655
thread, and there is also some other user activity in the system,
656
the user thread may have to wait indefinitely long for the
657
resource, as the OS does not schedule a background thread if
658
there is some other runnable user thread. This problem is called
659
priority inversion in real-time programming.
661
One solution to the priority inversion problem would be to
662
keep record of which thread owns which resource and
663
in the above case boost the priority of the background thread
664
so that it will be scheduled and it can release the resource.
665
This solution is called priority inheritance in real-time programming.
666
A drawback of this solution is that the overhead of acquiring a mutex
667
increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
668
the thread has to call os_thread_get_curr_id.
669
This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
670
pair. Note that the thread
671
cannot store the information in the resource, say mutex, itself,
672
because competing threads could wipe out the information if it is
673
stored before acquiring the mutex, and if it stored afterwards,
674
the information is outdated for the time of one machine instruction,
675
at least. (To be precise, the information could be stored to
676
lock_word in mutex if the machine supports atomic swap.)
678
The above solution with priority inheritance may become actual in the
679
future, but at the moment we plan to implement a more coarse solution,
680
which could be called a global priority inheritance. If a thread
681
has to wait for a long time, say 300 milliseconds, for a resource,
682
we just guess that it may be waiting for a resource owned by a background
683
thread, and boost the the priority of all runnable background threads
684
to the normal level. The background threads then themselves adjust
685
their fixed priority back to background after releasing all resources
686
they had (or, at some fixed points in their program code).
688
What is the performance of the global priority inheritance solution?
689
We may weigh the length of the wait time 300 milliseconds, during
690
which the system processes some other thread
691
to the cost of boosting the priority of each runnable background
692
thread, rescheduling it, and lowering the priority again.
693
On 100 MHz Pentium + NT this overhead may be of the order 100
694
microseconds per thread. So, if the number of runnable background
695
threads is not very big, say < 100, the cost is tolerable.
696
Utility threads probably will access resources used by
697
user threads not very often, so collisions of user threads
698
to preempted utility threads should not happen very often.
700
The thread table contains
701
information of the current status of each thread existing in the system,
702
and also the event semaphores used in suspending the master thread
703
and utility and parallel communication threads when they have nothing to do.
704
The thread table can be seen as an analogue to the process table
705
in a traditional Unix implementation.
707
The thread table is also used in the global priority inheritance
708
scheme. This brings in one additional complication: threads accessing
709
the thread table must have at least normal fixed priority,
710
because the priority inheritance solution does not work if a background
711
thread is preempted while possessing the mutex protecting the thread table.
712
So, if a thread accesses the thread table, its priority has to be
713
boosted at least to normal. This priority requirement can be seen similar to
714
the privileged mode used when processing the kernel calls in traditional
717
/* Thread slot in the thread table */
718
struct srv_slot_struct{
719
os_thread_id_t id; /*!< thread id */
720
os_thread_t handle; /*!< thread handle */
721
unsigned type:3; /*!< thread type: user, utility etc. */
722
unsigned in_use:1; /*!< TRUE if this slot is in use */
723
unsigned suspended:1; /*!< TRUE if the thread is waiting
724
for the event of this slot */
725
ib_time_t suspend_time; /*!< time when the thread was
727
os_event_t event; /*!< event used in suspending the
728
thread when it has nothing to do */
729
que_thr_t* thr; /*!< suspended query thread (only
730
used for MySQL threads) */
733
/* Table for MySQL threads where they will be suspended to wait for locks */
734
UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
736
UNIV_INTERN os_event_t srv_timeout_event;
738
UNIV_INTERN os_event_t srv_monitor_event;
740
UNIV_INTERN os_event_t srv_error_event;
742
UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
744
UNIV_INTERN srv_sys_t* srv_sys = NULL;
746
/* padding to prevent other memory update hotspots from residing on
747
the same memory cache line */
748
UNIV_INTERN byte srv_pad1[64];
749
/* mutex protecting the server, trx structs, query threads, and lock table */
750
UNIV_INTERN mutex_t* kernel_mutex_temp;
751
/* mutex protecting the sys header for writing the commit id */
752
UNIV_INTERN mutex_t* commit_id_mutex_temp;
754
/* padding to prevent other memory update hotspots from residing on
755
the same memory cache line */
756
UNIV_INTERN byte srv_pad2[64];
759
/* The following three values measure the urgency of the jobs of
760
buffer, version, and insert threads. They may vary from 0 - 1000.
761
The server mutex protects all these variables. The low-water values
762
tell that the server can acquiesce the utility when the value
763
drops below this low-water mark. */
765
static ulint srv_meter[SRV_MASTER + 1];
766
static ulint srv_meter_low_water[SRV_MASTER + 1];
767
static ulint srv_meter_high_water[SRV_MASTER + 1];
768
static ulint srv_meter_high_water2[SRV_MASTER + 1];
769
static ulint srv_meter_foreground[SRV_MASTER + 1];
772
/***********************************************************************
773
Prints counters for work done by srv_master_thread. */
776
srv_print_master_thread_info(
777
/*=========================*/
778
FILE *file) /* in: output stream */
780
fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
781
"%lu 10_second, %lu background, %lu flush\n",
782
srv_main_1_second_loops, srv_main_sleeps,
783
srv_main_10_second_loops, srv_main_background_loops,
784
srv_main_flush_loops);
785
fprintf(file, "srv_master_thread log flush and writes: %lu\n",
786
srv_log_writes_and_flush);
789
/* The following values give info about the activity going on in
790
the database. They are protected by the server mutex. The arrays
791
are indexed by the type of the thread. */
793
UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
794
UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
796
/*********************************************************************//**
797
Sets the info describing an i/o thread current state. */
800
srv_set_io_thread_op_info(
801
/*======================*/
802
ulint i, /*!< in: the 'segment' of the i/o thread */
803
const char* str) /*!< in: constant char string describing the
806
ut_a(i < SRV_MAX_N_IO_THREADS);
808
srv_io_thread_op_info[i] = str;
811
/*********************************************************************//**
812
Accessor function to get pointer to n'th slot in the server thread
814
@return pointer to the slot */
817
srv_table_get_nth_slot(
818
/*===================*/
819
ulint index) /*!< in: index of the slot */
821
ut_a(index < OS_THREAD_MAX_N);
823
return(srv_sys->threads + index);
826
/*********************************************************************//**
827
Gets the number of threads in the system.
828
@return sum of srv_n_threads[] */
831
srv_get_n_threads(void)
832
/*===================*/
837
mutex_enter(&kernel_mutex);
839
for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
841
n_threads += srv_n_threads[i];
844
mutex_exit(&kernel_mutex);
849
/*********************************************************************//**
850
Reserves a slot in the thread table for the current thread. Also creates the
851
thread local storage struct for the current thread. NOTE! The server mutex
852
has to be reserved by the caller!
853
@return reserved slot index */
856
srv_table_reserve_slot(
857
/*===================*/
858
enum srv_thread_type type) /*!< in: type of the thread */
864
ut_a(type <= SRV_MASTER);
867
slot = srv_table_get_nth_slot(i);
869
while (slot->in_use) {
871
slot = srv_table_get_nth_slot(i);
874
ut_a(slot->in_use == FALSE);
877
slot->suspended = FALSE;
879
slot->id = os_thread_get_curr_id();
880
slot->handle = os_thread_get_curr();
884
thr_local_set_slot_no(os_thread_get_curr_id(), i);
889
/*********************************************************************//**
890
Suspends the calling thread to wait for the event in its thread slot.
891
NOTE! The server mutex has to be reserved by the caller!
892
@return event for the calling thread to wait */
895
srv_suspend_thread(void)
896
/*====================*/
901
enum srv_thread_type type;
903
ut_ad(mutex_own(&kernel_mutex));
905
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
907
if (srv_print_thread_releases) {
909
"Suspending thread %lu to slot %lu\n",
910
(ulong) os_thread_get_curr_id(), (ulong) slot_no);
913
slot = srv_table_get_nth_slot(slot_no);
915
type = static_cast<srv_thread_type>(slot->type);
917
ut_ad(type >= SRV_WORKER);
918
ut_ad(type <= SRV_MASTER);
922
slot->suspended = TRUE;
924
ut_ad(srv_n_threads_active[type] > 0);
926
srv_n_threads_active[type]--;
928
os_event_reset(event);
933
/*********************************************************************//**
934
Releases threads of the type given from suspension in the thread table.
935
NOTE! The server mutex has to be reserved by the caller!
936
@return number of threads released: this may be less than n if not
937
enough threads were suspended at the moment */
942
enum srv_thread_type type, /*!< in: thread type */
943
ulint n) /*!< in: number of threads to release */
949
ut_ad(type >= SRV_WORKER);
950
ut_ad(type <= SRV_MASTER);
952
ut_ad(mutex_own(&kernel_mutex));
954
for (i = 0; i < OS_THREAD_MAX_N; i++) {
956
slot = srv_table_get_nth_slot(i);
959
(static_cast<srv_thread_type>(slot->type) == type) &&
962
slot->suspended = FALSE;
964
srv_n_threads_active[type]++;
966
os_event_set(slot->event);
968
if (srv_print_thread_releases) {
970
"Releasing thread %lu type %lu"
972
(ulong) slot->id, (ulong) type,
987
/*********************************************************************//**
988
Returns the calling thread type.
989
@return SRV_COM, ... */
992
srv_get_thread_type(void)
993
/*=====================*/
997
enum srv_thread_type type;
999
mutex_enter(&kernel_mutex);
1001
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
1003
slot = srv_table_get_nth_slot(slot_no);
1005
type = static_cast<srv_thread_type>(slot->type);
1007
ut_ad(type >= SRV_WORKER);
1008
ut_ad(type <= SRV_MASTER);
1010
mutex_exit(&kernel_mutex);
1015
/*********************************************************************//**
1016
Initializes the server. */
1022
srv_conc_slot_t* conc_slot;
1026
srv_sys = static_cast<srv_sys_t *>(mem_alloc(sizeof(srv_sys_t)));
1028
kernel_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
1029
mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL);
1031
commit_id_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
1032
mutex_create(commit_id_mutex_key, &commit_id_mutex, SYNC_COMMIT_ID_LOCK);
1034
mutex_create(srv_innodb_monitor_mutex_key,
1035
&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
1037
srv_sys->threads = static_cast<srv_table_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
1039
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1040
slot = srv_table_get_nth_slot(i);
1041
slot->in_use = FALSE;
1042
slot->type=0; /* Avoid purify errors */
1043
slot->event = os_event_create(NULL);
1047
srv_mysql_table = static_cast<srv_slot_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
1049
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1050
slot = srv_mysql_table + i;
1051
slot->in_use = FALSE;
1053
slot->event = os_event_create(NULL);
1057
srv_error_event = os_event_create(NULL);
1059
srv_timeout_event = os_event_create(NULL);
1061
srv_monitor_event = os_event_create(NULL);
1063
srv_lock_timeout_thread_event = os_event_create(NULL);
1065
for (i = 0; i < SRV_MASTER + 1; i++) {
1066
srv_n_threads_active[i] = 0;
1067
srv_n_threads[i] = 0;
1070
srv_meter_low_water[i] = 50;
1071
srv_meter_high_water[i] = 100;
1072
srv_meter_high_water2[i] = 200;
1073
srv_meter_foreground[i] = 250;
1077
UT_LIST_INIT(srv_sys->tasks);
1079
/* Create dummy indexes for infimum and supremum records */
1083
/* Init the server concurrency restriction data structures */
1085
os_fast_mutex_init(&srv_conc_mutex);
1087
UT_LIST_INIT(srv_conc_queue);
1089
srv_conc_slots = static_cast<srv_conc_slot_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t)));
1091
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1092
conc_slot = srv_conc_slots + i;
1093
conc_slot->reserved = FALSE;
1094
conc_slot->event = os_event_create(NULL);
1095
ut_a(conc_slot->event);
1098
/* Initialize some INFORMATION SCHEMA internal structures */
1099
trx_i_s_cache_init(trx_i_s_cache);
1102
/*********************************************************************//**
1103
Frees the data structures created in srv_init(). */
1109
os_fast_mutex_free(&srv_conc_mutex);
1110
mem_free(srv_conc_slots);
1111
srv_conc_slots = NULL;
1113
mem_free(srv_sys->threads);
1117
mem_free(kernel_mutex_temp);
1118
kernel_mutex_temp = NULL;
1119
mem_free(srv_mysql_table);
1120
srv_mysql_table = NULL;
1122
mem_free(commit_id_mutex_temp);
1123
commit_id_mutex_temp = NULL;
1125
trx_i_s_cache_free(trx_i_s_cache);
1128
/*********************************************************************//**
1129
Initializes the synchronization primitives, memory system, and the thread
1133
srv_general_init(void)
1134
/*==================*/
1137
/* Reset the system variables in the recovery module. */
1138
recv_sys_var_init();
1141
mem_init(srv_mem_pool_size);
1145
/*======================= InnoDB Server FIFO queue =======================*/
1147
/* Maximum allowable purge history length. <=0 means 'infinite'. */
1148
UNIV_INTERN ulong srv_max_purge_lag = 0;
1150
/*********************************************************************//**
1151
Puts an OS thread to wait if there are too many concurrent threads
1152
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
1155
srv_conc_enter_innodb(
1156
/*==================*/
1157
trx_t* trx) /*!< in: transaction object associated with the
1160
ibool has_slept = FALSE;
1161
srv_conc_slot_t* slot = NULL;
1164
if (trx->mysql_thd != NULL
1165
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
1167
UT_WAIT_FOR(srv_conc_n_threads
1168
< (lint)srv_thread_concurrency,
1169
srv_replication_delay * 1000);
1174
/* If trx has 'free tickets' to enter the engine left, then use one
1177
if (trx->n_tickets_to_enter_innodb > 0) {
1178
trx->n_tickets_to_enter_innodb--;
1183
os_fast_mutex_lock(&srv_conc_mutex);
1185
if (trx->declared_to_be_inside_innodb) {
1186
ut_print_timestamp(stderr);
1187
fputs(" InnoDB: Error: trying to declare trx"
1188
" to enter InnoDB, but\n"
1189
"InnoDB: it already is declared.\n", stderr);
1190
trx_print(stderr, trx, 0);
1192
os_fast_mutex_unlock(&srv_conc_mutex);
1197
ut_ad(srv_conc_n_threads >= 0);
1199
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1201
srv_conc_n_threads++;
1202
trx->declared_to_be_inside_innodb = TRUE;
1203
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1205
os_fast_mutex_unlock(&srv_conc_mutex);
1210
/* If the transaction is not holding resources, let it sleep
1211
for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
1213
if (!has_slept && !trx->has_search_latch
1214
&& NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
1216
has_slept = TRUE; /* We let it sleep only once to avoid
1219
srv_conc_n_waiting_threads++;
1221
os_fast_mutex_unlock(&srv_conc_mutex);
1223
trx->op_info = "sleeping before joining InnoDB queue";
1225
/* Peter Zaitsev suggested that we take the sleep away
1226
altogether. But the sleep may be good in pathological
1227
situations of lots of thread switches. Simply put some
1228
threads aside for a while to reduce the number of thread
1230
if (SRV_THREAD_SLEEP_DELAY > 0) {
1231
os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
1236
os_fast_mutex_lock(&srv_conc_mutex);
1238
srv_conc_n_waiting_threads--;
1243
/* Too many threads inside: put the current thread to a queue */
1245
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1246
slot = srv_conc_slots + i;
1248
if (!slot->reserved) {
1254
if (i == OS_THREAD_MAX_N) {
1255
/* Could not find a free wait slot, we must let the
1258
srv_conc_n_threads++;
1259
trx->declared_to_be_inside_innodb = TRUE;
1260
trx->n_tickets_to_enter_innodb = 0;
1262
os_fast_mutex_unlock(&srv_conc_mutex);
1267
/* Release possible search system latch this thread has */
1268
if (trx->has_search_latch) {
1269
trx_search_latch_release_if_reserved(trx);
1272
/* Add to the queue */
1273
slot->reserved = TRUE;
1274
slot->wait_ended = FALSE;
1276
UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
1278
os_event_reset(slot->event);
1280
srv_conc_n_waiting_threads++;
1282
os_fast_mutex_unlock(&srv_conc_mutex);
1284
/* Go to wait for the event; when a thread leaves InnoDB it will
1285
release this thread */
1287
trx->op_info = "waiting in InnoDB queue";
1289
os_event_wait(slot->event);
1293
os_fast_mutex_lock(&srv_conc_mutex);
1295
srv_conc_n_waiting_threads--;
1297
/* NOTE that the thread which released this thread already
1298
incremented the thread counter on behalf of this thread */
1300
slot->reserved = FALSE;
1302
UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
1304
trx->declared_to_be_inside_innodb = TRUE;
1305
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1307
os_fast_mutex_unlock(&srv_conc_mutex);
1310
/*********************************************************************//**
1311
This lets a thread enter InnoDB regardless of the number of threads inside
1312
InnoDB. This must be called when a thread ends a lock wait. */
1315
srv_conc_force_enter_innodb(
1316
/*========================*/
1317
trx_t* trx) /*!< in: transaction object associated with the
1320
if (UNIV_LIKELY(!srv_thread_concurrency)) {
1325
ut_ad(srv_conc_n_threads >= 0);
1327
os_fast_mutex_lock(&srv_conc_mutex);
1329
srv_conc_n_threads++;
1330
trx->declared_to_be_inside_innodb = TRUE;
1331
trx->n_tickets_to_enter_innodb = 1;
1333
os_fast_mutex_unlock(&srv_conc_mutex);
1336
/*********************************************************************//**
1337
This must be called when a thread exits InnoDB in a lock wait or at the
1338
end of an SQL statement. */
1341
srv_conc_force_exit_innodb(
1342
/*=======================*/
1343
trx_t* trx) /*!< in: transaction object associated with the
1346
srv_conc_slot_t* slot = NULL;
1348
if (trx->mysql_thd != NULL
1349
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
1354
if (trx->declared_to_be_inside_innodb == FALSE) {
1359
os_fast_mutex_lock(&srv_conc_mutex);
1361
ut_ad(srv_conc_n_threads > 0);
1362
srv_conc_n_threads--;
1363
trx->declared_to_be_inside_innodb = FALSE;
1364
trx->n_tickets_to_enter_innodb = 0;
1366
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1367
/* Look for a slot where a thread is waiting and no other
1368
thread has yet released the thread */
1370
slot = UT_LIST_GET_FIRST(srv_conc_queue);
1372
while (slot && slot->wait_ended == TRUE) {
1373
slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
1377
slot->wait_ended = TRUE;
1379
/* We increment the count on behalf of the released
1382
srv_conc_n_threads++;
1386
os_fast_mutex_unlock(&srv_conc_mutex);
1389
os_event_set(slot->event);
1393
/*********************************************************************//**
1394
This must be called when a thread exits InnoDB. */
1397
srv_conc_exit_innodb(
1398
/*=================*/
1399
trx_t* trx) /*!< in: transaction object associated with the
1402
if (trx->n_tickets_to_enter_innodb > 0) {
1403
/* We will pretend the thread is still inside InnoDB though it
1404
now leaves the InnoDB engine. In this way we save
1405
a lot of semaphore operations. srv_conc_force_exit_innodb is
1406
used to declare the thread definitely outside InnoDB. It
1407
should be called when there is a lock wait or an SQL statement
1413
srv_conc_force_exit_innodb(trx);
1416
/*========================================================================*/
1418
/*********************************************************************//**
1419
Normalizes init parameter values to use units we use inside InnoDB.
1420
@return DB_SUCCESS or error code */
1423
srv_normalize_init_values(void)
1424
/*===========================*/
1429
n = srv_n_data_files;
1431
for (i = 0; i < n; i++) {
1432
srv_data_file_sizes[i] = srv_data_file_sizes[i]
1433
* ((1024 * 1024) / UNIV_PAGE_SIZE);
1436
srv_last_file_size_max = srv_last_file_size_max
1437
* ((1024 * 1024) / UNIV_PAGE_SIZE);
1439
srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1441
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1443
srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1448
/*********************************************************************//**
1449
Boots the InnoDB server.
1450
@return DB_SUCCESS or error code */
1458
/* Transform the init parameter values given by MySQL to
1459
use units we use inside InnoDB: */
1461
err = srv_normalize_init_values();
1463
if (err != DB_SUCCESS) {
1467
/* Initialize synchronization primitives, memory management, and thread
1472
/* Initialize this module */
1479
/*********************************************************************//**
1480
Reserves a slot in the thread table for the current MySQL OS thread.
1481
NOTE! The kernel mutex has to be reserved by the caller!
1482
@return reserved slot */
1485
srv_table_reserve_slot_for_mysql(void)
1486
/*==================================*/
1491
ut_ad(mutex_own(&kernel_mutex));
1494
slot = srv_mysql_table + i;
1496
while (slot->in_use) {
1499
if (i >= OS_THREAD_MAX_N) {
1501
ut_print_timestamp(stderr);
1504
" InnoDB: There appear to be %lu MySQL"
1505
" threads currently waiting\n"
1506
"InnoDB: inside InnoDB, which is the"
1507
" upper limit. Cannot continue operation.\n"
1508
"InnoDB: We intentionally generate"
1509
" a seg fault to print a stack trace\n"
1510
"InnoDB: on Linux. But first we print"
1511
" a list of waiting threads.\n", (ulong) i);
1513
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1515
slot = srv_mysql_table + i;
1518
"Slot %lu: thread id %lu, type %lu,"
1519
" in use %lu, susp %lu, time %lu\n",
1521
(ulong) os_thread_pf(slot->id),
1523
(ulong) slot->in_use,
1524
(ulong) slot->suspended,
1525
(ulong) difftime(ut_time(),
1526
slot->suspend_time));
1532
slot = srv_mysql_table + i;
1535
ut_a(slot->in_use == FALSE);
1537
slot->in_use = TRUE;
1538
slot->id = os_thread_get_curr_id();
1539
slot->handle = os_thread_get_curr();
1544
/***************************************************************//**
1545
Puts a MySQL OS thread to wait for a lock to be released. If an error
1546
occurs during the wait trx->error_state associated with thr is
1547
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
1548
are possible errors. DB_DEADLOCK is returned if selective deadlock
1549
resolution chose this transaction as a victim. */
1552
srv_suspend_mysql_thread(
1553
/*=====================*/
1554
que_thr_t* thr) /*!< in: query thread associated with the MySQL
1561
ulint had_dict_lock;
1562
ibool was_declared_inside_innodb = FALSE;
1563
ib_int64_t start_time = 0;
1564
ib_int64_t finish_time;
1568
ulong lock_wait_timeout;
1570
ut_ad(!mutex_own(&kernel_mutex));
1572
trx = thr_get_trx(thr);
1574
os_event_set(srv_lock_timeout_thread_event);
1576
mutex_enter(&kernel_mutex);
1578
trx->error_state = DB_SUCCESS;
1580
if (thr->state == QUE_THR_RUNNING) {
1582
ut_ad(thr->is_active == TRUE);
1584
/* The lock has already been released or this transaction
1585
was chosen as a deadlock victim: no need to suspend */
1587
if (trx->was_chosen_as_deadlock_victim) {
1589
trx->error_state = DB_DEADLOCK;
1590
trx->was_chosen_as_deadlock_victim = FALSE;
1593
mutex_exit(&kernel_mutex);
1598
ut_ad(thr->is_active == FALSE);
1600
slot = srv_table_reserve_slot_for_mysql();
1602
event = slot->event;
1606
os_event_reset(event);
1608
slot->suspend_time = ut_time();
1610
if (thr->lock_state == QUE_THR_LOCK_ROW) {
1611
srv_n_lock_wait_count++;
1612
srv_n_lock_wait_current_count++;
1614
if (ut_usectime(&sec, &ms) == -1) {
1617
start_time = (ib_int64_t) sec * 1000000 + ms;
1620
/* Wake the lock timeout monitor thread, if it is suspended */
1622
os_event_set(srv_lock_timeout_thread_event);
1624
mutex_exit(&kernel_mutex);
1626
if (trx->declared_to_be_inside_innodb) {
1628
was_declared_inside_innodb = TRUE;
1630
/* We must declare this OS thread to exit InnoDB, since a
1631
possible other thread holding a lock which this thread waits
1632
for must be allowed to enter, sooner or later */
1634
srv_conc_force_exit_innodb(trx);
1637
had_dict_lock = trx->dict_operation_lock_mode;
1639
switch (had_dict_lock) {
1641
/* Release foreign key check latch */
1642
row_mysql_unfreeze_data_dictionary(trx);
1645
/* There should never be a lock wait when the
1646
dictionary latch is reserved in X mode. Dictionary
1647
transactions should only acquire locks on dictionary
1648
tables, not other tables. All access to dictionary
1649
tables should be covered by dictionary
1651
ut_print_timestamp(stderr);
1652
fputs(" InnoDB: Error: dict X latch held in "
1653
"srv_suspend_mysql_thread\n", stderr);
1654
/* This should never occur. This incorrect handling
1655
was added in the early development of
1656
ha_innobase::add_index() in InnoDB Plugin 1.0. */
1657
/* Release fast index creation latch */
1658
row_mysql_unlock_data_dictionary(trx);
1662
ut_a(trx->dict_operation_lock_mode == 0);
1664
/* Suspend this thread and wait for the event. */
1666
os_event_wait(event);
1668
/* After resuming, reacquire the data dictionary latch if
1671
switch (had_dict_lock) {
1673
row_mysql_freeze_data_dictionary(trx);
1676
/* This should never occur. This incorrect handling
1677
was added in the early development of
1678
ha_innobase::add_index() in InnoDB Plugin 1.0. */
1679
row_mysql_lock_data_dictionary(trx);
1683
if (was_declared_inside_innodb) {
1685
/* Return back inside InnoDB */
1687
srv_conc_force_enter_innodb(trx);
1690
mutex_enter(&kernel_mutex);
1692
/* Release the slot for others to use */
1694
slot->in_use = FALSE;
1696
wait_time = ut_difftime(ut_time(), slot->suspend_time);
1698
if (thr->lock_state == QUE_THR_LOCK_ROW) {
1699
if (ut_usectime(&sec, &ms) == -1) {
1702
finish_time = (ib_int64_t) sec * 1000000 + ms;
1705
diff_time = (ulint) (finish_time - start_time);
1707
srv_n_lock_wait_current_count--;
1708
srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
1709
if (diff_time > srv_n_lock_max_wait_time &&
1710
/* only update the variable if we successfully
1711
retrieved the start and finish times. See Bug#36819. */
1712
start_time != -1 && finish_time != -1) {
1713
srv_n_lock_max_wait_time = diff_time;
1716
/* Record the lock wait time for this thread */
1717
thd_set_lock_wait_time(trx->mysql_thd, diff_time);
1720
if (trx->was_chosen_as_deadlock_victim) {
1722
trx->error_state = DB_DEADLOCK;
1723
trx->was_chosen_as_deadlock_victim = FALSE;
1726
mutex_exit(&kernel_mutex);
1728
/* InnoDB system transactions (such as the purge, and
1729
incomplete transactions that are being rolled back after crash
1730
recovery) will use the global value of
1731
innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
1732
lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
1734
if (lock_wait_timeout < 100000000
1735
&& wait_time > (double) lock_wait_timeout) {
1737
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1740
if (trx_is_interrupted(trx)) {
1742
trx->error_state = DB_INTERRUPTED;
1746
/********************************************************************//**
1747
Releases a MySQL OS thread waiting for a lock to be released, if the
1748
thread is already suspended. */
1751
srv_release_mysql_thread_if_suspended(
1752
/*==================================*/
1753
que_thr_t* thr) /*!< in: query thread associated with the
1759
ut_ad(mutex_own(&kernel_mutex));
1761
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1763
slot = srv_mysql_table + i;
1765
if (slot->in_use && slot->thr == thr) {
1768
os_event_set(slot->event);
1777
/******************************************************************//**
1778
Refreshes the values used to calculate per-second averages. */
1781
srv_refresh_innodb_monitor_stats(void)
1782
/*==================================*/
1784
mutex_enter(&srv_innodb_monitor_mutex);
1786
srv_last_monitor_time = time(NULL);
1788
os_aio_refresh_stats();
1790
btr_cur_n_sea_old = btr_cur_n_sea;
1791
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1793
log_refresh_stats();
1795
buf_refresh_io_stats_all();
1797
srv_n_rows_inserted_old = srv_n_rows_inserted;
1798
srv_n_rows_updated_old = srv_n_rows_updated;
1799
srv_n_rows_deleted_old = srv_n_rows_deleted;
1800
srv_n_rows_read_old = srv_n_rows_read;
1802
mutex_exit(&srv_innodb_monitor_mutex);
1805
/******************************************************************//**
1806
Outputs to a file the output of the InnoDB Monitor.
1807
@return FALSE if not all information printed
1808
due to failure to obtain necessary mutex */
1811
srv_printf_innodb_monitor(
1812
/*======================*/
1813
FILE* file, /*!< in: output stream */
1814
ibool nowait, /*!< in: whether to wait for kernel mutex */
1815
ulint* trx_start, /*!< out: file position of the start of
1816
the list of active transactions */
1817
ulint* trx_end) /*!< out: file position of the end of
1818
the list of active transactions */
1820
double time_elapsed;
1821
time_t current_time;
1825
mutex_enter(&srv_innodb_monitor_mutex);
1827
current_time = time(NULL);
1829
/* We add 0.001 seconds to time_elapsed to prevent division
1830
by zero if two users happen to call SHOW INNODB STATUS at the same
1833
time_elapsed = difftime(current_time, srv_last_monitor_time)
1836
srv_last_monitor_time = time(NULL);
1838
fputs("\n=====================================\n", file);
1840
ut_print_timestamp(file);
1842
" INNODB MONITOR OUTPUT\n"
1843
"=====================================\n"
1844
"Per second averages calculated from the last %lu seconds\n",
1845
(ulong)time_elapsed);
1847
fputs("-----------------\n"
1848
"BACKGROUND THREAD\n"
1849
"-----------------\n", file);
1850
srv_print_master_thread_info(file);
1852
fputs("----------\n"
1854
"----------\n", file);
1857
/* Conceptually, srv_innodb_monitor_mutex has a very high latching
1858
order level in sync0sync.h, while dict_foreign_err_mutex has a very
1859
low level 135. Therefore we can reserve the latter mutex here without
1860
a danger of a deadlock of threads. */
1862
mutex_enter(&dict_foreign_err_mutex);
1864
if (ftell(dict_foreign_err_file) != 0L) {
1865
fputs("------------------------\n"
1866
"LATEST FOREIGN KEY ERROR\n"
1867
"------------------------\n", file);
1868
ut_copy_file(file, dict_foreign_err_file);
1871
mutex_exit(&dict_foreign_err_mutex);
1873
/* Only if lock_print_info_summary proceeds correctly,
1874
before we call the lock_print_info_all_transactions
1875
to print all the lock information. */
1876
ret = lock_print_info_summary(file, nowait);
1880
long t = ftell(file);
1882
*trx_start = ULINT_UNDEFINED;
1884
*trx_start = (ulint) t;
1887
lock_print_info_all_transactions(file);
1889
long t = ftell(file);
1891
*trx_end = ULINT_UNDEFINED;
1893
*trx_end = (ulint) t;
1900
"--------\n", file);
1903
fputs("-------------------------------------\n"
1904
"INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1905
"-------------------------------------\n", file);
1908
ha_print_info(file, btr_search_sys->hash_index);
1911
"%.2f hash searches/s, %.2f non-hash searches/s\n",
1912
(btr_cur_n_sea - btr_cur_n_sea_old)
1914
(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1916
btr_cur_n_sea_old = btr_cur_n_sea;
1917
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1924
fputs("----------------------\n"
1925
"BUFFER POOL AND MEMORY\n"
1926
"----------------------\n", file);
1928
"Total memory allocated " ULINTPF
1929
"; in additional pool allocated " ULINTPF "\n",
1930
ut_total_allocated_memory,
1931
mem_pool_get_reserved(mem_comm_pool));
1932
fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1937
fputs("--------------\n"
1939
"--------------\n", file);
1940
fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1941
(long) srv_conc_n_threads,
1942
(ulong) srv_conc_n_waiting_threads);
1944
fprintf(file, "%lu read views open inside InnoDB\n",
1945
static_cast<ulint>(UT_LIST_GET_LEN(trx_sys->view_list)));
1947
n_reserved = fil_space_get_n_reserved_extents(0);
1948
if (n_reserved > 0) {
1950
"%lu tablespace extents now reserved for"
1951
" B-tree split operations\n",
1952
(ulong) n_reserved);
1956
fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1957
(ulong) srv_main_thread_process_no,
1958
(ulong) srv_main_thread_id,
1959
srv_main_thread_op_info);
1961
fprintf(file, "Main thread id %lu, state: %s\n",
1962
(ulong) srv_main_thread_id,
1963
srv_main_thread_op_info);
1966
"Number of rows inserted " ULINTPF
1967
", updated " ULINTPF ", deleted " ULINTPF
1968
", read " ULINTPF "\n",
1969
srv_n_rows_inserted,
1974
"%.2f inserts/s, %.2f updates/s,"
1975
" %.2f deletes/s, %.2f reads/s\n",
1976
(srv_n_rows_inserted - srv_n_rows_inserted_old)
1978
(srv_n_rows_updated - srv_n_rows_updated_old)
1980
(srv_n_rows_deleted - srv_n_rows_deleted_old)
1982
(srv_n_rows_read - srv_n_rows_read_old)
1985
srv_n_rows_inserted_old = srv_n_rows_inserted;
1986
srv_n_rows_updated_old = srv_n_rows_updated;
1987
srv_n_rows_deleted_old = srv_n_rows_deleted;
1988
srv_n_rows_read_old = srv_n_rows_read;
1990
fputs("----------------------------\n"
1991
"END OF INNODB MONITOR OUTPUT\n"
1992
"============================\n", file);
1993
mutex_exit(&srv_innodb_monitor_mutex);
1999
/******************************************************************//**
2000
Function to pass InnoDB status variables to MySQL */
2003
srv_export_innodb_status(void)
2004
/*==========================*/
2006
buf_pool_stat_t stat;
2009
ulint flush_list_len;
2011
buf_get_total_stat(&stat);
2012
buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
2014
mutex_enter(&srv_innodb_monitor_mutex);
2016
export_vars.innodb_data_pending_reads
2017
= os_n_pending_reads;
2018
export_vars.innodb_data_pending_writes
2019
= os_n_pending_writes;
2020
export_vars.innodb_data_pending_fsyncs
2021
= fil_n_pending_log_flushes
2022
+ fil_n_pending_tablespace_flushes;
2023
export_vars.innodb_data_fsyncs = os_n_fsyncs;
2024
export_vars.innodb_data_read = srv_data_read;
2025
export_vars.innodb_data_reads = os_n_file_reads;
2026
export_vars.innodb_data_writes = os_n_file_writes;
2027
export_vars.innodb_data_written = srv_data_written;
2028
export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
2029
export_vars.innodb_buffer_pool_write_requests
2030
= srv_buf_pool_write_requests;
2031
export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
2032
export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
2033
export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
2034
export_vars.innodb_buffer_pool_read_ahead
2035
= stat.n_ra_pages_read;
2036
export_vars.innodb_buffer_pool_read_ahead_evicted
2037
= stat.n_ra_pages_evicted;
2038
export_vars.innodb_buffer_pool_pages_data = LRU_len;
2039
export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
2040
export_vars.innodb_buffer_pool_pages_free = free_len;
2042
export_vars.innodb_buffer_pool_pages_latched
2043
= buf_get_latched_pages_number();
2044
#endif /* UNIV_DEBUG */
2045
export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
2047
export_vars.innodb_buffer_pool_pages_misc
2048
= buf_pool_get_n_pages() - LRU_len - free_len;
2049
#ifdef HAVE_ATOMIC_BUILTINS
2050
export_vars.innodb_have_atomic_builtins = 1;
2052
export_vars.innodb_have_atomic_builtins = 0;
2054
export_vars.innodb_page_size = UNIV_PAGE_SIZE;
2055
export_vars.innodb_log_waits = srv_log_waits;
2056
export_vars.innodb_os_log_written = srv_os_log_written;
2057
export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
2058
export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
2059
export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
2060
export_vars.innodb_log_write_requests = srv_log_write_requests;
2061
export_vars.innodb_log_writes = srv_log_writes;
2062
export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
2063
export_vars.innodb_dblwr_writes = srv_dblwr_writes;
2064
export_vars.innodb_pages_created = stat.n_pages_created;
2065
export_vars.innodb_pages_read = stat.n_pages_read;
2066
export_vars.innodb_pages_written = stat.n_pages_written;
2067
export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
2068
export_vars.innodb_row_lock_current_waits
2069
= srv_n_lock_wait_current_count;
2070
export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
2071
if (srv_n_lock_wait_count > 0) {
2072
export_vars.innodb_row_lock_time_avg = (ulint)
2073
(srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
2075
export_vars.innodb_row_lock_time_avg = 0;
2077
export_vars.innodb_row_lock_time_max
2078
= srv_n_lock_max_wait_time / 1000;
2079
export_vars.innodb_rows_read = srv_n_rows_read;
2080
export_vars.innodb_rows_inserted = srv_n_rows_inserted;
2081
export_vars.innodb_rows_updated = srv_n_rows_updated;
2082
export_vars.innodb_rows_deleted = srv_n_rows_deleted;
2083
export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
2085
mutex_exit(&srv_innodb_monitor_mutex);
2088
/*********************************************************************//**
2089
A thread which prints the info output by various InnoDB monitors.
2090
@return a dummy parameter */
2095
void* /*arg __attribute__((unused))*/)
2096
/*!< in: a dummy parameter required by
2099
ib_int64_t sig_count;
2100
double time_elapsed;
2101
time_t current_time;
2102
time_t last_table_monitor_time;
2103
time_t last_tablespace_monitor_time;
2104
time_t last_monitor_time;
2105
ulint mutex_skipped;
2106
ibool last_srv_print_monitor;
2108
#ifdef UNIV_DEBUG_THREAD_CREATION
2109
fprintf(stderr, "Lock timeout thread starts, id %lu\n",
2110
os_thread_pf(os_thread_get_curr_id()));
2113
#ifdef UNIV_PFS_THREAD
2114
pfs_register_thread(srv_monitor_thread_key);
2117
srv_last_monitor_time = ut_time();
2118
last_table_monitor_time = ut_time();
2119
last_tablespace_monitor_time = ut_time();
2120
last_monitor_time = ut_time();
2122
last_srv_print_monitor = srv_print_innodb_monitor;
2124
srv_monitor_active = TRUE;
2126
/* Wake up every 5 seconds to see if we need to print
2127
monitor information or if signalled at shutdown. */
2129
sig_count = os_event_reset(srv_monitor_event);
2131
os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
2133
current_time = ut_time();
2135
time_elapsed = difftime(current_time, last_monitor_time);
2137
if (time_elapsed > 15) {
2138
last_monitor_time = ut_time();
2140
if (srv_print_innodb_monitor) {
2141
/* Reset mutex_skipped counter everytime
2142
srv_print_innodb_monitor changes. This is to
2143
ensure we will not be blocked by kernel_mutex
2144
for short duration information printing,
2145
such as requested by sync_array_print_long_waits() */
2146
if (!last_srv_print_monitor) {
2148
last_srv_print_monitor = TRUE;
2151
if (!srv_printf_innodb_monitor(stderr,
2152
MUTEX_NOWAIT(mutex_skipped),
2156
/* Reset the counter */
2160
last_srv_print_monitor = FALSE;
2164
if (srv_innodb_status) {
2165
mutex_enter(&srv_monitor_file_mutex);
2166
rewind(srv_monitor_file);
2167
if (!srv_printf_innodb_monitor(srv_monitor_file,
2168
MUTEX_NOWAIT(mutex_skipped),
2175
os_file_set_eof(srv_monitor_file);
2176
mutex_exit(&srv_monitor_file_mutex);
2179
if (srv_print_innodb_tablespace_monitor
2180
&& difftime(current_time,
2181
last_tablespace_monitor_time) > 60) {
2182
last_tablespace_monitor_time = ut_time();
2184
fputs("========================"
2185
"========================\n",
2188
ut_print_timestamp(stderr);
2190
fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
2191
"========================"
2192
"========================\n",
2196
fputs("Validating tablespace\n", stderr);
2198
fputs("Validation ok\n"
2199
"---------------------------------------\n"
2200
"END OF INNODB TABLESPACE MONITOR OUTPUT\n"
2201
"=======================================\n",
2205
if (srv_print_innodb_table_monitor
2206
&& difftime(current_time, last_table_monitor_time) > 60) {
2208
last_table_monitor_time = ut_time();
2210
fputs("===========================================\n",
2213
ut_print_timestamp(stderr);
2215
fputs(" INNODB TABLE MONITOR OUTPUT\n"
2216
"===========================================\n",
2220
fputs("-----------------------------------\n"
2221
"END OF INNODB TABLE MONITOR OUTPUT\n"
2222
"==================================\n",
2227
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2231
if (srv_print_innodb_monitor
2232
|| srv_print_innodb_lock_monitor
2233
|| srv_print_innodb_tablespace_monitor
2234
|| srv_print_innodb_table_monitor) {
2238
srv_monitor_active = FALSE;
2243
srv_monitor_active = FALSE;
2245
/* We count the number of threads in os_thread_exit(). A created
2246
thread should always use that to exit and not use return() to exit. */
2248
os_thread_exit(NULL);
2250
OS_THREAD_DUMMY_RETURN;
2253
/*********************************************************************//**
2254
A thread which wakes up threads whose lock wait may have lasted too long.
2255
@return a dummy parameter */
2258
srv_lock_timeout_thread(
2259
/*====================*/
2260
void* /*arg __attribute__((unused))*/)
2261
/* in: a dummy parameter required by
2268
ib_int64_t sig_count;
2270
#ifdef UNIV_PFS_THREAD
2271
pfs_register_thread(srv_lock_timeout_thread_key);
2276
/* When someone is waiting for a lock, we wake up every second
2277
and check if a timeout has passed for a lock wait */
2279
sig_count = os_event_reset(srv_timeout_event);
2281
os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
2283
srv_lock_timeout_active = TRUE;
2285
mutex_enter(&kernel_mutex);
2289
/* Check of all slots if a thread is waiting there, and if it
2290
has exceeded the time limit */
2292
for (i = 0; i < OS_THREAD_MAX_N; i++) {
2294
slot = srv_mysql_table + i;
2298
ulong lock_wait_timeout;
2302
wait_time = ut_difftime(ut_time(), slot->suspend_time);
2304
trx = thr_get_trx(slot->thr);
2305
lock_wait_timeout = thd_lock_wait_timeout(
2308
if (trx_is_interrupted(trx)
2309
|| (lock_wait_timeout < 100000000
2310
&& (wait_time > (double) lock_wait_timeout
2311
|| wait_time < 0))) {
2313
/* Timeout exceeded or a wrap-around in system
2314
time counter: cancel the lock request queued
2315
by the transaction and release possible
2316
other transactions waiting behind; it is
2317
possible that the lock has already been
2318
granted: in that case do nothing */
2320
if (trx->wait_lock) {
2321
lock_cancel_waiting_and_release(
2328
os_event_reset(srv_lock_timeout_thread_event);
2330
mutex_exit(&kernel_mutex);
2332
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2340
srv_lock_timeout_active = FALSE;
2343
/* The following synchronisation is disabled, since
2344
the InnoDB monitor output is to be updated every 15 seconds. */
2345
os_event_wait(srv_lock_timeout_thread_event);
2350
srv_lock_timeout_active = FALSE;
2352
/* We count the number of threads in os_thread_exit(). A created
2353
thread should always use that to exit and not use return() to exit. */
2355
os_thread_exit(NULL);
2357
OS_THREAD_DUMMY_RETURN;
2360
/*********************************************************************//**
2361
A thread which prints warnings about semaphore waits which have lasted
2362
too long. These can be used to track bugs which cause hangs.
2363
@return a dummy parameter */
2366
srv_error_monitor_thread(
2367
/*=====================*/
2368
void* /*arg __attribute__((unused))*/)
2369
/*!< in: a dummy parameter required by
2372
/* number of successive fatal timeouts observed */
2373
ulint fatal_cnt = 0;
2374
ib_uint64_t old_lsn;
2375
ib_uint64_t new_lsn;
2376
ib_int64_t sig_count;
2378
old_lsn = srv_start_lsn;
2380
#ifdef UNIV_DEBUG_THREAD_CREATION
2381
fprintf(stderr, "Error monitor thread starts, id %lu\n",
2382
os_thread_pf(os_thread_get_curr_id()));
2385
#ifdef UNIV_PFS_THREAD
2386
pfs_register_thread(srv_error_monitor_thread_key);
2390
srv_error_monitor_active = TRUE;
2392
/* Try to track a strange bug reported by Harald Fuchs and others,
2393
where the lsn seems to decrease at times */
2395
new_lsn = log_get_lsn();
2397
if (new_lsn < old_lsn) {
2398
ut_print_timestamp(stderr);
2400
" InnoDB: Error: old log sequence number %"PRIu64""
2402
"InnoDB: than the new log sequence number %"PRIu64"!\n"
2403
"InnoDB: Please submit a bug report"
2404
" to http://bugs.mysql.com\n",
2410
if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2411
/* We referesh InnoDB Monitor values so that averages are
2412
printed from at most 60 last seconds */
2414
srv_refresh_innodb_monitor_stats();
2417
/* Update the statistics collected for deciding LRU
2419
buf_LRU_stat_update();
2421
/* Update the statistics collected for flush rate policy. */
2422
buf_flush_stat_update();
2424
/* In case mutex_exit is not a memory barrier, it is
2425
theoretically possible some threads are left waiting though
2426
the semaphore is already released. Wake up those threads: */
2428
sync_arr_wake_threads_if_sema_free();
2430
if (sync_array_print_long_waits()) {
2432
if (fatal_cnt > 10) {
2435
"InnoDB: Error: semaphore wait has lasted"
2437
"InnoDB: We intentionally crash the server,"
2438
" because it appears to be hung.\n",
2439
(ulong) srv_fatal_semaphore_wait_threshold);
2447
/* Flush stderr so that a database user gets the output
2448
to possible MySQL error file */
2452
sig_count = os_event_reset(srv_error_event);
2454
os_event_wait_time_low(srv_error_event, 1000000, sig_count);
2456
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2461
srv_error_monitor_active = FALSE;
2463
/* We count the number of threads in os_thread_exit(). A created
2464
thread should always use that to exit and not use return() to exit. */
2466
os_thread_exit(NULL);
2468
OS_THREAD_DUMMY_RETURN;
2471
/**********************************************************************//**
2472
Check whether any background thread is active.
2473
@return FALSE if all are are suspended or have exited. */
2476
srv_is_any_background_thread_active(void)
2477
/*=====================================*/
2482
mutex_enter(&kernel_mutex);
2484
for (i = SRV_COM; i <= SRV_MASTER; ++i) {
2485
if (srv_n_threads_active[i] != 0) {
2491
mutex_exit(&kernel_mutex);
2496
/*******************************************************************//**
2497
Tells the InnoDB server that there has been activity in the database
2498
and wakes up the master thread if it is suspended (not sleeping). Used
2499
in the MySQL interface. Note that there is a small chance that the master
2500
thread stays suspended (we do not protect our operation with the
2501
srv_sys_t->mutex, for performance reasons). */
2504
srv_active_wake_master_thread(void)
2505
/*===============================*/
2507
srv_activity_count++;
2509
if (srv_n_threads_active[SRV_MASTER] == 0) {
2511
mutex_enter(&kernel_mutex);
2513
srv_release_threads(SRV_MASTER, 1);
2515
mutex_exit(&kernel_mutex);
2519
/*******************************************************************//**
2520
Tells the purge thread that there has been activity in the database
2521
and wakes up the purge thread if it is suspended (not sleeping). Note
2522
that there is a small chance that the purge thread stays suspended
2523
(we do not protect our operation with the kernel mutex, for
2524
performace reasons). */
2527
srv_wake_purge_thread_if_not_active(void)
2528
/*=====================================*/
2530
ut_ad(!mutex_own(&kernel_mutex));
2532
if (srv_n_purge_threads > 0
2533
&& srv_n_threads_active[SRV_WORKER] == 0) {
2535
mutex_enter(&kernel_mutex);
2537
srv_release_threads(SRV_WORKER, 1);
2539
mutex_exit(&kernel_mutex);
2543
/*******************************************************************//**
2544
Wakes up the master thread if it is suspended or being suspended. */
2547
srv_wake_master_thread(void)
2548
/*========================*/
2550
srv_activity_count++;
2552
mutex_enter(&kernel_mutex);
2554
srv_release_threads(SRV_MASTER, 1);
2556
mutex_exit(&kernel_mutex);
2559
/*******************************************************************//**
2560
Wakes up the purge thread if it's not already awake. */
2563
srv_wake_purge_thread(void)
2564
/*=======================*/
2566
ut_ad(!mutex_own(&kernel_mutex));
2568
if (srv_n_purge_threads > 0) {
2570
mutex_enter(&kernel_mutex);
2572
srv_release_threads(SRV_WORKER, 1);
2574
mutex_exit(&kernel_mutex);
2578
/**********************************************************************
2579
The master thread is tasked to ensure that flush of log file happens
2580
once every second in the background. This is to ensure that not more
2581
than one second of trxs are lost in case of crash when
2582
innodb_flush_logs_at_trx_commit != 1 */
2585
srv_sync_log_buffer_in_background(void)
2586
/*===================================*/
2588
time_t current_time = time(NULL);
2590
srv_main_thread_op_info = "flushing log";
2591
if (difftime(current_time, srv_last_log_flush_time) >= 1) {
2592
log_buffer_sync_in_background(TRUE);
2593
srv_last_log_flush_time = current_time;
2594
srv_log_writes_and_flush++;
2598
/********************************************************************//**
2599
Do a full purge, reconfigure the purge sub-system if a dynamic
2600
change is detected. */
2603
srv_master_do_purge(void)
2604
/*=====================*/
2606
ulint n_pages_purged;
2608
ut_ad(!mutex_own(&kernel_mutex));
2610
ut_a(srv_n_purge_threads == 0);
2613
/* Check for shutdown and change in purge config. */
2614
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2615
/* Nothing to purge. */
2618
n_pages_purged = trx_purge(srv_purge_batch_size);
2621
srv_sync_log_buffer_in_background();
2623
} while (n_pages_purged > 0);
2626
/*********************************************************************//**
2627
The master thread controlling the server.
2628
@return a dummy parameter */
2633
void* /*arg __attribute__((unused))*/)
2634
/*!< in: a dummy parameter required by
2637
buf_pool_stat_t buf_stat;
2639
ulint old_activity_count;
2640
ulint n_pages_purged = 0;
2641
ulint n_bytes_merged;
2642
ulint n_pages_flushed;
2643
ulint n_bytes_archived;
2644
ulint n_tables_to_drop;
2647
ulint n_ios_very_old;
2649
ulint next_itr_time;
2652
#ifdef UNIV_DEBUG_THREAD_CREATION
2653
fprintf(stderr, "Master thread starts, id %lu\n",
2654
os_thread_pf(os_thread_get_curr_id()));
2657
#ifdef UNIV_PFS_THREAD
2658
pfs_register_thread(srv_master_thread_key);
2661
srv_main_thread_process_no = os_proc_get_number();
2662
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2664
srv_table_reserve_slot(SRV_MASTER);
2666
mutex_enter(&kernel_mutex);
2668
srv_n_threads_active[SRV_MASTER]++;
2670
mutex_exit(&kernel_mutex);
2673
/*****************************************************************/
2674
/* ---- When there is database activity by users, we cycle in this
2677
srv_main_thread_op_info = "reserving kernel mutex";
2679
buf_get_total_stat(&buf_stat);
2680
n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
2681
+ buf_stat.n_pages_written;
2682
mutex_enter(&kernel_mutex);
2684
/* Store the user activity counter at the start of this loop */
2685
old_activity_count = srv_activity_count;
2687
mutex_exit(&kernel_mutex);
2689
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2691
goto suspend_thread;
2694
/* ---- We run the following loop approximately once per second
2695
when there is database activity */
2697
srv_last_log_flush_time = time(NULL);
2699
/* Sleep for 1 second on entrying the for loop below the first time. */
2700
next_itr_time = ut_time_ms() + 1000;
2702
for (i = 0; i < 10; i++) {
2703
ulint cur_time = ut_time_ms();
2705
/* ALTER TABLE in MySQL requires on Unix that the table handler
2706
can drop tables lazily after there no longer are SELECT
2709
srv_main_thread_op_info = "doing background drop tables";
2711
row_drop_tables_for_mysql_in_background();
2713
srv_main_thread_op_info = "";
2715
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2717
goto background_loop;
2720
buf_get_total_stat(&buf_stat);
2722
n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read
2723
+ buf_stat.n_pages_written;
2725
srv_main_thread_op_info = "sleeping";
2726
srv_main_1_second_loops++;
2728
if (next_itr_time > cur_time
2729
&& srv_shutdown_state == SRV_SHUTDOWN_NONE) {
2731
/* Get sleep interval in micro seconds. We use
2732
ut_min() to avoid long sleep in case of
2734
os_thread_sleep(ut_min(1000000,
2735
(next_itr_time - cur_time)
2740
/* Each iteration should happen at 1 second interval. */
2741
next_itr_time = ut_time_ms() + 1000;
2743
/* Flush logs if needed */
2744
srv_sync_log_buffer_in_background();
2746
srv_main_thread_op_info = "making checkpoint";
2749
/* If i/os during one second sleep were less than 5% of
2750
capacity, we assume that there is free disk i/o capacity
2751
available, and it makes sense to do an insert buffer merge. */
2753
buf_get_total_stat(&buf_stat);
2754
n_pend_ios = buf_get_n_pending_ios()
2755
+ log_sys->n_pending_writes;
2756
n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
2757
+ buf_stat.n_pages_written;
2758
if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2759
&& (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
2760
srv_main_thread_op_info = "doing insert buffer merge";
2761
ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
2763
/* Flush logs if needed */
2764
srv_sync_log_buffer_in_background();
2767
if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
2768
> srv_max_buf_pool_modified_pct)) {
2770
/* Try to keep the number of modified pages in the
2771
buffer pool under the limit wished by the user */
2773
srv_main_thread_op_info =
2774
"flushing buffer pool pages";
2775
n_pages_flushed = buf_flush_list(
2776
PCT_IO(100), IB_ULONGLONG_MAX);
2778
} else if (srv_adaptive_flushing) {
2780
/* Try to keep the rate of flushing of dirty
2781
pages such that redo log generation does not
2782
produce bursts of IO at checkpoint time. */
2783
ulint n_flush = buf_flush_get_desired_flush_rate();
2786
srv_main_thread_op_info =
2787
"flushing buffer pool pages";
2788
n_flush = ut_min(PCT_IO(100), n_flush);
2796
if (srv_activity_count == old_activity_count) {
2798
/* There is no user activity at the moment, go to
2799
the background loop */
2801
goto background_loop;
2805
/* ---- We perform the following code approximately once per
2806
10 seconds when there is database activity */
2808
#ifdef MEM_PERIODIC_CHECK
2809
/* Check magic numbers of every allocated mem block once in 10
2811
mem_validate_all_blocks();
2813
/* If i/os during the 10 second period were less than 200% of
2814
capacity, we assume that there is free disk i/o capacity
2815
available, and it makes sense to flush srv_io_capacity pages.
2817
Note that this is done regardless of the fraction of dirty
2818
pages relative to the max requested by the user. The one second
2819
loop above requests writes for that case. The writes done here
2820
are not required, and may be disabled. */
2822
buf_get_total_stat(&buf_stat);
2823
n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
2824
n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
2825
+ buf_stat.n_pages_written;
2827
srv_main_10_second_loops++;
2828
if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2829
&& (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
2831
srv_main_thread_op_info = "flushing buffer pool pages";
2832
buf_flush_list(PCT_IO(100), IB_ULONGLONG_MAX);
2834
/* Flush logs if needed */
2835
srv_sync_log_buffer_in_background();
2838
/* We run a batch of insert buffer merge every 10 seconds,
2839
even if the server were active */
2841
srv_main_thread_op_info = "doing insert buffer merge";
2842
ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
2844
/* Flush logs if needed */
2845
srv_sync_log_buffer_in_background();
2847
if (srv_n_purge_threads == 0) {
2848
srv_main_thread_op_info = "master purging";
2850
srv_master_do_purge();
2852
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2854
goto background_loop;
2858
srv_main_thread_op_info = "flushing buffer pool pages";
2860
/* Flush a few oldest pages to make a new checkpoint younger */
2862
if (buf_get_modified_ratio_pct() > 70) {
2864
/* If there are lots of modified pages in the buffer pool
2865
(> 70 %), we assume we can afford reserving the disk(s) for
2866
the time it requires to flush 100 pages */
2868
n_pages_flushed = buf_flush_list(
2869
PCT_IO(100), IB_ULONGLONG_MAX);
2871
/* Otherwise, we only flush a small number of pages so that
2872
we do not unnecessarily use much disk i/o capacity from
2875
n_pages_flushed = buf_flush_list(
2876
PCT_IO(10), IB_ULONGLONG_MAX);
2879
srv_main_thread_op_info = "making checkpoint";
2881
/* Make a new checkpoint about once in 10 seconds */
2883
log_checkpoint(TRUE, FALSE);
2885
srv_main_thread_op_info = "reserving kernel mutex";
2887
mutex_enter(&kernel_mutex);
2889
/* ---- When there is database activity, we jump from here back to
2890
the start of loop */
2892
if (srv_activity_count != old_activity_count) {
2893
mutex_exit(&kernel_mutex);
2897
mutex_exit(&kernel_mutex);
2899
/* If the database is quiet, we enter the background loop */
2901
/*****************************************************************/
2903
/* ---- In this loop we run background operations when the server
2904
is quiet from user activity. Also in the case of a shutdown, we
2905
loop here, flushing the buffer pool to the data files. */
2907
/* The server has been quiet for a while: start running background
2909
srv_main_background_loops++;
2910
srv_main_thread_op_info = "doing background drop tables";
2912
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2914
if (n_tables_to_drop > 0) {
2915
/* Do not monopolize the CPU even if there are tables waiting
2916
in the background drop queue. (It is essentially a bug if
2917
MySQL tries to drop a table while there are still open handles
2918
to it and we had to put it to the background drop queue.) */
2920
if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
2921
os_thread_sleep(100000);
2925
if (srv_n_purge_threads == 0) {
2926
srv_main_thread_op_info = "master purging";
2928
srv_master_do_purge();
2931
srv_main_thread_op_info = "reserving kernel mutex";
2933
mutex_enter(&kernel_mutex);
2934
if (srv_activity_count != old_activity_count) {
2935
mutex_exit(&kernel_mutex);
2938
mutex_exit(&kernel_mutex);
2940
srv_main_thread_op_info = "doing insert buffer merge";
2942
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2945
/* This should do an amount of IO similar to the number of
2946
dirty pages that will be flushed in the call to
2947
buf_flush_list below. Otherwise, the system favors
2948
clean pages over cleanup throughput. */
2949
n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
2953
srv_main_thread_op_info = "reserving kernel mutex";
2955
mutex_enter(&kernel_mutex);
2956
if (srv_activity_count != old_activity_count) {
2957
mutex_exit(&kernel_mutex);
2960
mutex_exit(&kernel_mutex);
2963
srv_main_thread_op_info = "flushing buffer pool pages";
2964
srv_main_flush_loops++;
2965
if (srv_fast_shutdown < 2) {
2966
n_pages_flushed = buf_flush_list(
2967
PCT_IO(100), IB_ULONGLONG_MAX);
2969
/* In the fastest shutdown we do not flush the buffer pool
2970
to data files: we set n_pages_flushed to 0 artificially. */
2972
n_pages_flushed = 0;
2975
srv_main_thread_op_info = "reserving kernel mutex";
2977
mutex_enter(&kernel_mutex);
2978
if (srv_activity_count != old_activity_count) {
2979
mutex_exit(&kernel_mutex);
2982
mutex_exit(&kernel_mutex);
2984
srv_main_thread_op_info = "waiting for buffer pool flush to end";
2985
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2987
/* Flush logs if needed */
2988
srv_sync_log_buffer_in_background();
2990
srv_main_thread_op_info = "making checkpoint";
2992
log_checkpoint(TRUE, FALSE);
2994
if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
2996
/* Try to keep the number of modified pages in the
2997
buffer pool under the limit wished by the user */
3002
srv_main_thread_op_info = "reserving kernel mutex";
3004
mutex_enter(&kernel_mutex);
3005
if (srv_activity_count != old_activity_count) {
3006
mutex_exit(&kernel_mutex);
3009
mutex_exit(&kernel_mutex);
3011
srv_main_thread_op_info = "archiving log (if log archive is on)";
3013
log_archive_do(FALSE, &n_bytes_archived);
3015
n_bytes_archived = 0;
3017
/* Keep looping in the background loop if still work to do */
3019
if (srv_fast_shutdown && srv_shutdown_state > 0) {
3020
if (n_tables_to_drop + n_pages_flushed
3021
+ n_bytes_archived != 0) {
3023
/* If we are doing a fast shutdown (= the default)
3024
we do not do purge or insert buffer merge. But we
3025
flush the buffer pool completely to disk.
3026
In a 'very fast' shutdown we do not flush the buffer
3027
pool to data files: we have set n_pages_flushed to
3030
goto background_loop;
3032
} else if (n_tables_to_drop
3033
+ n_pages_purged + n_bytes_merged + n_pages_flushed
3034
+ n_bytes_archived != 0) {
3035
/* In a 'slow' shutdown we run purge and the insert buffer
3036
merge to completion */
3038
goto background_loop;
3041
/* There is no work for background operations either: suspend
3042
master thread to wait for more server activity */
3045
srv_main_thread_op_info = "suspending";
3047
mutex_enter(&kernel_mutex);
3049
if (row_get_background_drop_list_len_low() > 0) {
3050
mutex_exit(&kernel_mutex);
3055
event = srv_suspend_thread();
3057
mutex_exit(&kernel_mutex);
3059
/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
3060
waits for database activity to die down when converting < 4.1.x
3061
databases, and relies on this string being exactly as it is. InnoDB
3062
manual also mentions this string in several places. */
3063
srv_main_thread_op_info = "waiting for server activity";
3065
os_event_wait(event);
3067
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
3068
/* This is only extra safety, the thread should exit
3069
already when the event wait ends */
3071
os_thread_exit(NULL);
3075
/* When there is user activity, InnoDB will set the event and the
3076
main thread goes back to loop. */
3081
#if !defined(__SUNPRO_C)
3082
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3086
/*********************************************************************//**
3087
Asynchronous purge thread.
3088
@return a dummy parameter */
3093
void* /*arg __attribute__((unused))*/) /*!< in: a dummy parameter
3094
required by os_thread_create */
3097
ulint slot_no = ULINT_UNDEFINED;
3098
ulint n_total_purged = ULINT_UNDEFINED;
3100
ut_a(srv_n_purge_threads == 1);
3102
#ifdef UNIV_DEBUG_THREAD_CREATION
3103
fprintf(stderr, "InnoDB: Purge thread running, id %lu\n",
3104
os_thread_pf(os_thread_get_curr_id()));
3105
#endif /* UNIV_DEBUG_THREAD_CREATION */
3107
mutex_enter(&kernel_mutex);
3109
slot_no = srv_table_reserve_slot(SRV_WORKER);
3111
slot = srv_table_get_nth_slot(slot_no);
3113
++srv_n_threads_active[SRV_WORKER];
3115
mutex_exit(&kernel_mutex);
3117
while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
3119
ulint n_pages_purged;
3121
/* If there are very few records to purge or the last
3122
purge didn't purge any records then wait for activity.
3123
We peek at the history len without holding any mutex
3124
because in the worst case we will end up waiting for
3125
the next purge event. */
3126
if (trx_sys->rseg_history_len < srv_purge_batch_size
3127
|| n_total_purged == 0) {
3131
mutex_enter(&kernel_mutex);
3133
event = srv_suspend_thread();
3135
mutex_exit(&kernel_mutex);
3137
os_event_wait(event);
3140
/* Check for shutdown and whether we should do purge at all. */
3141
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
3142
|| srv_shutdown_state != 0
3143
|| srv_fast_shutdown) {
3150
/* Purge until there are no more records to purge and there is
3151
no change in configuration or server state. */
3153
n_pages_purged = trx_purge(srv_purge_batch_size);
3155
n_total_purged += n_pages_purged;
3157
} while (n_pages_purged > 0 && !srv_fast_shutdown);
3159
srv_sync_log_buffer_in_background();
3162
mutex_enter(&kernel_mutex);
3164
ut_ad(srv_table_get_nth_slot(slot_no) == slot);
3166
/* Decrement the active count. */
3167
srv_suspend_thread();
3169
slot->in_use = FALSE;
3171
/* Free the thread local memory. */
3172
thr_local_free(os_thread_get_curr_id());
3174
mutex_exit(&kernel_mutex);
3176
#ifdef UNIV_DEBUG_THREAD_CREATION
3177
fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n",
3178
os_thread_pf(os_thread_get_curr_id()));
3179
#endif /* UNIV_DEBUG_THREAD_CREATION */
3181
/* We count the number of threads in os_thread_exit(). A created
3182
thread should always use that to exit and not use return() to exit. */
3183
os_thread_exit(NULL);
3185
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3188
/**********************************************************************//**
3189
Enqueues a task to server task queue and releases a worker thread, if there
3190
is a suspended one. */
3193
srv_que_task_enqueue_low(
3194
/*=====================*/
3195
que_thr_t* thr) /*!< in: query thread */
3199
mutex_enter(&kernel_mutex);
3201
UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
3203
srv_release_threads(SRV_WORKER, 1);
3205
mutex_exit(&kernel_mutex);