1
/*****************************************************************************
3
Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4
Copyright (C) 2008, 2009 Google Inc.
5
Copyright (C) 2009, Percona Inc.
7
Portions of this file contain modifications contributed and copyrighted by
8
Google, Inc. Those modifications are gratefully acknowledged and are described
9
briefly in the InnoDB documentation. The contributions by Google are
10
incorporated with their permission, and subject to the conditions contained in
11
the file COPYING.Google.
13
Portions of this file contain modifications contributed and copyrighted
14
by Percona Inc.. Those modifications are
15
gratefully acknowledged and are described briefly in the InnoDB
16
documentation. The contributions by Percona Inc. are incorporated with
17
their permission, and subject to the conditions contained in the file
20
This program is free software; you can redistribute it and/or modify it under
21
the terms of the GNU General Public License as published by the Free Software
22
Foundation; version 2 of the License.
24
This program is distributed in the hope that it will be useful, but WITHOUT
25
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
28
You should have received a copy of the GNU General Public License along with
29
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
30
St, Fifth Floor, Boston, MA 02110-1301 USA
32
*****************************************************************************/
34
/**************************************************//**
36
The database server main program
38
NOTE: SQL Server 7 uses something which the documentation
39
calls user mode scheduled threads (UMS threads). One such
40
thread is usually allocated per processor. Win32
41
documentation does not know any UMS threads, which suggests
42
that the concept is internal to SQL Server 7. It may mean that
43
SQL Server 7 does all the scheduling of threads itself, even
44
in i/o waits. We should maybe modify InnoDB to use the same
45
technique, because thread switches within NT may be too slow.
47
SQL Server 7 also mentions fibers, which are cooperatively
48
scheduled threads. They can boost performance by 5 %,
49
according to the Delaney and Soukup's book.
51
Windows 2000 will have something called thread pooling
52
(see msdn website), which we could possibly use.
54
Another possibility could be to use some very fast user space
55
thread library. This might confuse NT though.
57
Created 10/8/1995 Heikki Tuuri
58
*******************************************************/
63
#include <drizzled/error.h>
64
#include <drizzled/errmsg_print.h>
71
#include "sync0sync.h"
75
#include "pars0pars.h"
77
#include "lock0lock.h"
78
#include "trx0purge.h"
79
#include "ibuf0ibuf.h"
83
#include "dict0load.h"
84
#include "dict0boot.h"
85
#include "srv0start.h"
86
#include "row0mysql.h"
87
#include "ha_prototypes.h"
89
#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
91
/* This is set to TRUE if the MySQL user has set it in MySQL; currently
92
affects only FOREIGN KEY definition parsing */
93
UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
95
/* The following counter is incremented whenever there is some user activity
97
UNIV_INTERN ulint srv_activity_count = 0;
99
/* The following is the maximum allowed duration of a lock wait. */
100
UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
102
/* How much data manipulation language (DML) statements need to be delayed,
103
in microseconds, in order to reduce the lagging of the purge thread. */
104
UNIV_INTERN ulint srv_dml_needed_delay = 0;
106
UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
107
UNIV_INTERN ibool srv_monitor_active = FALSE;
108
UNIV_INTERN ibool srv_error_monitor_active = FALSE;
110
UNIV_INTERN const char* srv_main_thread_op_info = "";
112
/* Server parameters which are read from the initfile */
114
/* The following three are dir paths which are catenated before file
115
names, where the file name itself may also contain a path */
117
UNIV_INTERN char* srv_data_home = NULL;
118
#ifdef UNIV_LOG_ARCHIVE
119
UNIV_INTERN char* srv_arch_dir = NULL;
120
#endif /* UNIV_LOG_ARCHIVE */
122
/** store to its own file each table created by an user; data
123
dictionary tables are in the system tablespace 0 */
124
UNIV_INTERN my_bool srv_file_per_table;
125
/** The file format to use on new *.ibd files. */
126
UNIV_INTERN ulint srv_file_format = 0;
127
/** Whether to check file format during startup. A value of
128
DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
129
set it to the highest format we support. */
130
UNIV_INTERN ulint srv_max_file_format_at_startup = DICT_TF_FORMAT_MAX;
132
#if DICT_TF_FORMAT_51
133
# error "DICT_TF_FORMAT_51 must be 0!"
135
/** Place locks to records only i.e. do not use next-key locking except
136
on duplicate key checking and foreign key checking */
137
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
139
/* If this flag is TRUE, then we will use the native aio of the
140
OS (provided we compiled Innobase with it in), otherwise we will
141
use simulated aio we build below with threads.
142
Currently we support native aio on windows and linux */
143
UNIV_INTERN my_bool srv_use_native_aio = TRUE;
146
/* Windows native condition variables. We use runtime loading / function
147
pointers, because they are not available on Windows Server 2003 and
150
We use condition for events on Windows if possible, even if os_event
151
resembles Windows kernel event object well API-wise. The reason is
152
performance, kernel objects are heavyweights and WaitForSingleObject() is a
153
performance killer causing calling thread to context switch. Besides, Innodb
154
is preallocating large number (often millions) of os_events. With kernel event
155
objects it takes a big chunk out of non-paged pool, which is better suited
156
for tasks like IO than for storing idle event objects. */
157
UNIV_INTERN ibool srv_use_native_conditions = FALSE;
160
UNIV_INTERN ulint srv_n_data_files = 0;
161
UNIV_INTERN char** srv_data_file_names = NULL;
162
/* size in database pages */
163
UNIV_INTERN ulint* srv_data_file_sizes = NULL;
165
/* if TRUE, then we auto-extend the last data file */
166
UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
167
/* if != 0, this tells the max size auto-extending may increase the
168
last data file size */
169
UNIV_INTERN ulint srv_last_file_size_max = 0;
170
/* If the last data file is auto-extended, we add this
171
many pages to it at a time */
172
UNIV_INTERN unsigned int srv_auto_extend_increment = 8;
173
UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
175
/* If the following is TRUE we do not allow inserts etc. This protects
176
the user from forgetting the 'newraw' keyword to my.cnf */
178
UNIV_INTERN ibool srv_created_new_raw = FALSE;
180
UNIV_INTERN char** srv_log_group_home_dirs = NULL;
182
UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
183
UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
184
/* size in database pages */
185
UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
186
/* size in database pages */
187
UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
188
UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
190
/* Try to flush dirty pages so as to avoid IO bursts at
192
UNIV_INTERN bool srv_adaptive_flushing = TRUE;
194
/** Maximum number of times allowed to conditionally acquire
195
mutex before switching to blocking wait on the mutex */
196
#define MAX_MUTEX_NOWAIT 20
198
/** Check whether the number of failed nonblocking mutex
199
acquisition attempts exceeds maximum allowed value. If so,
200
srv_printf_innodb_monitor() will request mutex acquisition
201
with mutex_enter(), which will wait until it gets the mutex. */
202
#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
204
/** The sort order table of the MySQL latin1_swedish_ci character set
206
#if defined(BUILD_DRIZZLE)
207
const byte srv_latin1_ordering[256] /* The sort order table of the latin1
208
character set. The following table is
209
the MySQL order as of Feb 10th, 2002 */
211
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
212
, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
213
, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
214
, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
215
, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
216
, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
217
, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
218
, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
219
, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
220
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
221
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
222
, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
223
, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
224
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
225
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
226
, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
227
, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
228
, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
229
, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
230
, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
231
, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
232
, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
233
, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
234
, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
235
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
236
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
237
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
238
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
239
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
240
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
241
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
242
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
245
UNIV_INTERN const byte* srv_latin1_ordering;
246
#endif /* BUILD_DRIZZLE */
249
/* use os/external memory allocator */
250
UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
251
/* requested size in kilobytes */
252
UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
253
/* requested number of buffer pool instances */
254
UNIV_INTERN ulint srv_buf_pool_instances = 1;
255
/* previously requested size */
256
UNIV_INTERN ulint srv_buf_pool_old_size;
257
/* current size in kilobytes */
258
UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
260
UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
261
UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
263
/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
265
UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
266
UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
267
UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
269
/* User settable value of the number of pages that must be present
270
in the buffer cache and accessed sequentially for InnoDB to trigger a
271
readahead request. */
272
UNIV_INTERN ulong srv_read_ahead_threshold = 56;
274
#ifdef UNIV_LOG_ARCHIVE
275
UNIV_INTERN ibool srv_log_archive_on = FALSE;
276
UNIV_INTERN ibool srv_archive_recovery = 0;
277
UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
278
#endif /* UNIV_LOG_ARCHIVE */
280
/* This parameter is used to throttle the number of insert buffers that are
281
merged in a batch. By increasing this parameter on a faster disk you can
282
possibly reduce the number of I/O operations performed to complete the
283
merge operation. The value of this parameter is used as is by the
284
background loop when the system is idle (low load), on a busy system
285
the parameter is scaled down by a factor of 4, this is to avoid putting
286
a heavier load on the I/O sub system. */
288
UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
290
UNIV_INTERN char* srv_file_flush_method_str = NULL;
291
UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
292
UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
294
UNIV_INTERN ulint srv_max_n_open_files = 300;
296
/* Number of IO operations per second the server can do */
297
UNIV_INTERN ulong srv_io_capacity = 200;
299
/* The InnoDB main thread tries to keep the ratio of modified pages
300
in the buffer pool to all database pages in the buffer pool smaller than
301
the following number. But it is not guaranteed that the value stays below
302
that during a time of heavy update/insert activity. */
304
UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
306
/* the number of purge threads to use from the worker pool (currently 0 or 1).*/
307
UNIV_INTERN ulong srv_n_purge_threads = 0;
309
/* the number of records to purge in one batch */
310
UNIV_INTERN ulong srv_purge_batch_size = 20;
312
/* variable counts amount of data read in total (in bytes) */
313
UNIV_INTERN ulint srv_data_read = 0;
315
/* here we count the amount of data written in total (in bytes) */
316
UNIV_INTERN ulint srv_data_written = 0;
318
/* the number of the log write requests done */
319
UNIV_INTERN ulint srv_log_write_requests = 0;
321
/* the number of physical writes to the log performed */
322
UNIV_INTERN ulint srv_log_writes = 0;
324
/* amount of data written to the log files in bytes */
325
UNIV_INTERN ulint srv_os_log_written = 0;
327
/* amount of writes being done to the log files */
328
UNIV_INTERN ulint srv_os_log_pending_writes = 0;
330
/* we increase this counter, when there we don't have enough space in the
331
log buffer and have to flush it */
332
UNIV_INTERN ulint srv_log_waits = 0;
334
/* this variable counts the amount of times, when the doublewrite buffer
336
UNIV_INTERN ulint srv_dblwr_writes = 0;
338
/* here we store the number of pages that have been flushed to the
339
doublewrite buffer */
340
UNIV_INTERN ulint srv_dblwr_pages_written = 0;
342
/* in this variable we store the number of write requests issued */
343
UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
345
/* here we store the number of times when we had to wait for a free page
346
in the buffer pool. It happens when the buffer pool is full and we need
347
to make a flush, in order to be able to read or create a page. */
348
UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
350
/* variable to count the number of pages that were written from buffer
352
UNIV_INTERN ulint srv_buf_pool_flushed = 0;
354
/** Number of buffer pool reads that led to the
355
reading of a disk page */
356
UNIV_INTERN ulint srv_buf_pool_reads = 0;
358
/* structure to pass status variables to MySQL */
359
UNIV_INTERN export_struc export_vars;
361
/* If the following is != 0 we do not allow inserts etc. This protects
362
the user from forgetting the innodb_force_recovery keyword to my.cnf */
364
UNIV_INTERN ulint srv_force_recovery = 0;
365
/*-----------------------*/
366
/* We are prepared for a situation that we have this many threads waiting for
367
a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
370
UNIV_INTERN ulint srv_max_n_threads = 0;
372
/* The following controls how many threads we let inside InnoDB concurrently:
373
threads waiting for locks are not counted into the number because otherwise
374
we could get a deadlock. MySQL creates a thread for each user session, and
375
semaphore contention and convoy problems can occur withput this restriction.
376
Value 10 should be good if there are less than 4 processors + 4 disks in the
377
computer. Bigger computers need bigger values. Value 0 will disable the
378
concurrency check. */
380
UNIV_INTERN ulong srv_thread_concurrency = 0;
382
/* this mutex protects srv_conc data structures */
383
UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
384
/* number of transactions that have declared_to_be_inside_innodb set.
385
It used to be a non-error for this value to drop below zero temporarily.
386
This is no longer true. We'll, however, keep the lint datatype to add
387
assertions to catch any corner cases that we may have missed. */
388
UNIV_INTERN lint srv_conc_n_threads = 0;
389
/* number of OS threads waiting in the FIFO for a permission to enter
391
UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
393
typedef struct srv_conc_slot_struct srv_conc_slot_t;
394
struct srv_conc_slot_struct{
395
os_event_t event; /*!< event to wait */
396
ibool reserved; /*!< TRUE if slot
398
ibool wait_ended; /*!< TRUE when another
399
thread has already set
401
thread in this slot is
403
reserved may still be
404
TRUE at that point */
405
UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */
408
/* queue of threads waiting to get in */
409
UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
410
/* array of wait slots */
411
UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
413
/* Number of times a thread is allowed to enter InnoDB within the same
414
SQL query after it has once got the ticket at srv_conc_enter_innodb */
415
#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
416
#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
417
/*-----------------------*/
418
/* If the following is set to 1 then we do not run purge and insert buffer
419
merge to completion before shutdown. If it is set to 2, do not even flush the
420
buffer pool to data files at the shutdown: we effectively 'crash'
421
InnoDB (but lose no committed transactions). */
422
UNIV_INTERN ulint srv_fast_shutdown = 0;
424
/* Generate a innodb_status.<pid> file */
425
UNIV_INTERN ibool srv_innodb_status = FALSE;
427
/* When estimating number of different key values in an index, sample
428
this many index pages */
429
UNIV_INTERN ib_uint64_t srv_stats_sample_pages = 8;
431
UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
432
UNIV_INTERN ibool srv_use_checksums = TRUE;
434
UNIV_INTERN ulong srv_replication_delay = 0;
436
/*-------------------------------------------*/
437
UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
438
UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
439
UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
440
UNIV_INTERN ulong srv_spin_wait_delay = 6;
441
UNIV_INTERN ibool srv_priority_boost = TRUE;
444
UNIV_INTERN ibool srv_print_thread_releases = FALSE;
445
UNIV_INTERN ibool srv_print_lock_waits = FALSE;
446
UNIV_INTERN ibool srv_print_buf_io = FALSE;
447
UNIV_INTERN ibool srv_print_log_io = FALSE;
448
UNIV_INTERN ibool srv_print_latch_waits = FALSE;
449
#endif /* UNIV_DEBUG */
451
UNIV_INTERN ulint srv_n_rows_inserted = 0;
452
UNIV_INTERN ulint srv_n_rows_updated = 0;
453
UNIV_INTERN ulint srv_n_rows_deleted = 0;
454
UNIV_INTERN ulint srv_n_rows_read = 0;
456
static ulint srv_n_rows_inserted_old = 0;
457
static ulint srv_n_rows_updated_old = 0;
458
static ulint srv_n_rows_deleted_old = 0;
459
static ulint srv_n_rows_read_old = 0;
461
UNIV_INTERN ulint srv_n_lock_wait_count = 0;
462
UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
463
UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
464
UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
466
UNIV_INTERN ulint srv_truncated_status_writes = 0;
469
Set the following to 0 if you want InnoDB to write messages on
470
stderr on startup/shutdown
472
UNIV_INTERN ibool srv_print_verbose_log = TRUE;
473
UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
474
UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
475
UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
476
UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
478
/* Array of English strings describing the current state of an
479
i/o handler thread */
481
UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
482
UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
484
UNIV_INTERN time_t srv_last_monitor_time;
486
UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
488
/* Mutex for locking srv_monitor_file */
489
UNIV_INTERN mutex_t srv_monitor_file_mutex;
491
#ifdef UNIV_PFS_MUTEX
492
/* Key to register kernel_mutex with performance schema */
493
UNIV_INTERN mysql_pfs_key_t kernel_mutex_key;
494
/* Key to protect writing the commit_id to the sys header */
495
UNIV_INTERN mysql_pfs_key_t commit_id_mutex_key;
496
/* Key to register srv_innodb_monitor_mutex with performance schema */
497
UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
498
/* Key to register srv_monitor_file_mutex with performance schema */
499
UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
500
/* Key to register srv_dict_tmpfile_mutex with performance schema */
501
UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
502
/* Key to register the mutex with performance schema */
503
UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
504
#endif /* UNIV_PFS_MUTEX */
506
/* Temporary file for innodb monitor output */
507
UNIV_INTERN FILE* srv_monitor_file;
508
/* Mutex for locking srv_dict_tmpfile.
509
This mutex has a very high rank; threads reserving it should not
510
be holding any InnoDB latches. */
511
UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
512
/* Temporary file for output from the data dictionary */
513
UNIV_INTERN FILE* srv_dict_tmpfile;
514
/* Mutex for locking srv_misc_tmpfile.
515
This mutex has a very low rank; threads reserving it should not
516
acquire any further latches or sleep before releasing this one. */
517
UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
518
/* Temporary file for miscellanous diagnostic output */
519
UNIV_INTERN FILE* srv_misc_tmpfile;
521
UNIV_INTERN ulint srv_main_thread_process_no = 0;
522
UNIV_INTERN ulint srv_main_thread_id = 0;
524
/* The following count work done by srv_master_thread. */
526
/* Iterations by the 'once per second' loop. */
527
static ulint srv_main_1_second_loops = 0;
528
/* Calls to sleep by the 'once per second' loop. */
529
static ulint srv_main_sleeps = 0;
530
/* Iterations by the 'once per 10 seconds' loop. */
531
static ulint srv_main_10_second_loops = 0;
532
/* Iterations of the loop bounded by the 'background_loop' label. */
533
static ulint srv_main_background_loops = 0;
534
/* Iterations of the loop bounded by the 'flush_loop' label. */
535
static ulint srv_main_flush_loops = 0;
536
/* Log writes involving flush. */
537
static ulint srv_log_writes_and_flush = 0;
539
/* This is only ever touched by the master thread. It records the
540
time when the last flush of log file has happened. The master
541
thread ensures that we flush the log files at least once per
543
static time_t srv_last_log_flush_time;
545
/* The master thread performs various tasks based on the current
546
state of IO activity and the level of IO utilization is past
547
intervals. Following macros define thresholds for these conditions. */
548
#define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
549
#define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
550
#define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
553
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
554
=========================================
556
There is the following analogue between this database
557
server and an operating system kernel:
559
DB concept equivalent OS concept
560
---------- ---------------------
561
transaction -- process;
563
query thread -- thread;
568
the rollback state -- kill signal delivered to a process;
572
query thread execution:
573
(a) without kernel mutex
574
reserved -- process executing in user mode;
575
(b) with kernel mutex reserved
576
-- process executing in kernel mode;
578
The server is controlled by a master thread which runs at
579
a priority higher than normal, that is, higher than user threads.
580
It sleeps most of the time, and wakes up, say, every 300 milliseconds,
581
to check whether there is anything happening in the server which
582
requires intervention of the master thread. Such situations may be,
583
for example, when flushing of dirty blocks is needed in the buffer
584
pool or old version of database rows have to be cleaned away.
586
The threads which we call user threads serve the queries of
587
the clients and input from the console of the server.
588
They run at normal priority. The server may have several
589
communications endpoints. A dedicated set of user threads waits
590
at each of these endpoints ready to receive a client request.
591
Each request is taken by a single user thread, which then starts
592
processing and, when the result is ready, sends it to the client
593
and returns to wait at the same endpoint the thread started from.
595
So, we do not have dedicated communication threads listening at
596
the endpoints and dealing the jobs to dedicated worker threads.
597
Our architecture saves one thread swithch per request, compared
598
to the solution with dedicated communication threads
599
which amounts to 15 microseconds on 100 MHz Pentium
600
running NT. If the client
601
is communicating over a network, this saving is negligible, but
602
if the client resides in the same machine, maybe in an SMP machine
603
on a different processor from the server thread, the saving
604
can be important as the threads can communicate over shared
605
memory with an overhead of a few microseconds.
607
We may later implement a dedicated communication thread solution
608
for those endpoints which communicate over a network.
610
Our solution with user threads has two problems: for each endpoint
611
there has to be a number of listening threads. If there are many
612
communication endpoints, it may be difficult to set the right number
613
of concurrent threads in the system, as many of the threads
614
may always be waiting at less busy endpoints. Another problem
615
is queuing of the messages, as the server internally does not
616
offer any queue for jobs.
618
Another group of user threads is intended for splitting the
619
queries and processing them in parallel. Let us call these
620
parallel communication threads. These threads are waiting for
621
parallelized tasks, suspended on event semaphores.
623
A single user thread waits for input from the console,
624
like a command to shut the database.
626
Utility threads are a different group of threads which takes
627
care of the buffer pool flushing and other, mainly background
628
operations, in the server.
629
Some of these utility threads always run at a lower than normal
630
priority, so that they are always in background. Some of them
631
may dynamically boost their priority by the pri_adjust function,
632
even to higher than normal priority, if their task becomes urgent.
633
The running of utilities is controlled by high- and low-water marks
634
of urgency. The urgency may be measured by the number of dirty blocks
635
in the buffer pool, in the case of the flush thread, for example.
636
When the high-water mark is exceeded, an utility starts running, until
637
the urgency drops under the low-water mark. Then the utility thread
638
suspend itself to wait for an event. The master thread is
639
responsible of signaling this event when the utility thread is
642
For each individual type of utility, some threads always remain
643
at lower than normal priority. This is because pri_adjust is implemented
644
so that the threads at normal or higher priority control their
645
share of running time by calling sleep. Thus, if the load of the
646
system sudenly drops, these threads cannot necessarily utilize
647
the system fully. The background priority threads make up for this,
648
starting to run when the load drops.
650
When there is no activity in the system, also the master thread
651
suspends itself to wait for an event making
652
the server totally silent. The responsibility to signal this
653
event is on the user thread which again receives a message
656
There is still one complication in our server design. If a
657
background utility thread obtains a resource (e.g., mutex) needed by a user
658
thread, and there is also some other user activity in the system,
659
the user thread may have to wait indefinitely long for the
660
resource, as the OS does not schedule a background thread if
661
there is some other runnable user thread. This problem is called
662
priority inversion in real-time programming.
664
One solution to the priority inversion problem would be to
665
keep record of which thread owns which resource and
666
in the above case boost the priority of the background thread
667
so that it will be scheduled and it can release the resource.
668
This solution is called priority inheritance in real-time programming.
669
A drawback of this solution is that the overhead of acquiring a mutex
670
increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
671
the thread has to call os_thread_get_curr_id.
672
This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
673
pair. Note that the thread
674
cannot store the information in the resource, say mutex, itself,
675
because competing threads could wipe out the information if it is
676
stored before acquiring the mutex, and if it stored afterwards,
677
the information is outdated for the time of one machine instruction,
678
at least. (To be precise, the information could be stored to
679
lock_word in mutex if the machine supports atomic swap.)
681
The above solution with priority inheritance may become actual in the
682
future, but at the moment we plan to implement a more coarse solution,
683
which could be called a global priority inheritance. If a thread
684
has to wait for a long time, say 300 milliseconds, for a resource,
685
we just guess that it may be waiting for a resource owned by a background
686
thread, and boost the the priority of all runnable background threads
687
to the normal level. The background threads then themselves adjust
688
their fixed priority back to background after releasing all resources
689
they had (or, at some fixed points in their program code).
691
What is the performance of the global priority inheritance solution?
692
We may weigh the length of the wait time 300 milliseconds, during
693
which the system processes some other thread
694
to the cost of boosting the priority of each runnable background
695
thread, rescheduling it, and lowering the priority again.
696
On 100 MHz Pentium + NT this overhead may be of the order 100
697
microseconds per thread. So, if the number of runnable background
698
threads is not very big, say < 100, the cost is tolerable.
699
Utility threads probably will access resources used by
700
user threads not very often, so collisions of user threads
701
to preempted utility threads should not happen very often.
703
The thread table contains
704
information of the current status of each thread existing in the system,
705
and also the event semaphores used in suspending the master thread
706
and utility and parallel communication threads when they have nothing to do.
707
The thread table can be seen as an analogue to the process table
708
in a traditional Unix implementation.
710
The thread table is also used in the global priority inheritance
711
scheme. This brings in one additional complication: threads accessing
712
the thread table must have at least normal fixed priority,
713
because the priority inheritance solution does not work if a background
714
thread is preempted while possessing the mutex protecting the thread table.
715
So, if a thread accesses the thread table, its priority has to be
716
boosted at least to normal. This priority requirement can be seen similar to
717
the privileged mode used when processing the kernel calls in traditional
720
/* Thread slot in the thread table */
721
struct srv_slot_struct{
722
os_thread_id_t id; /*!< thread id */
723
os_thread_t handle; /*!< thread handle */
724
unsigned type:3; /*!< thread type: user, utility etc. */
725
unsigned in_use:1; /*!< TRUE if this slot is in use */
726
unsigned suspended:1; /*!< TRUE if the thread is waiting
727
for the event of this slot */
728
ib_time_t suspend_time; /*!< time when the thread was
730
os_event_t event; /*!< event used in suspending the
731
thread when it has nothing to do */
732
que_thr_t* thr; /*!< suspended query thread (only
733
used for MySQL threads) */
736
/* Table for MySQL threads where they will be suspended to wait for locks */
737
UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
739
UNIV_INTERN os_event_t srv_timeout_event;
741
UNIV_INTERN os_event_t srv_monitor_event;
743
UNIV_INTERN os_event_t srv_error_event;
745
UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
747
UNIV_INTERN srv_sys_t* srv_sys = NULL;
749
/* padding to prevent other memory update hotspots from residing on
750
the same memory cache line */
751
UNIV_INTERN byte srv_pad1[64];
752
/* mutex protecting the server, trx structs, query threads, and lock table */
753
UNIV_INTERN mutex_t* kernel_mutex_temp;
754
/* mutex protecting the sys header for writing the commit id */
755
UNIV_INTERN mutex_t* commit_id_mutex_temp;
757
/* padding to prevent other memory update hotspots from residing on
758
the same memory cache line */
759
UNIV_INTERN byte srv_pad2[64];
762
/* The following three values measure the urgency of the jobs of
763
buffer, version, and insert threads. They may vary from 0 - 1000.
764
The server mutex protects all these variables. The low-water values
765
tell that the server can acquiesce the utility when the value
766
drops below this low-water mark. */
768
static ulint srv_meter[SRV_MASTER + 1];
769
static ulint srv_meter_low_water[SRV_MASTER + 1];
770
static ulint srv_meter_high_water[SRV_MASTER + 1];
771
static ulint srv_meter_high_water2[SRV_MASTER + 1];
772
static ulint srv_meter_foreground[SRV_MASTER + 1];
775
/***********************************************************************
776
Prints counters for work done by srv_master_thread. */
779
srv_print_master_thread_info(
780
/*=========================*/
781
FILE *file) /* in: output stream */
783
fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
784
"%lu 10_second, %lu background, %lu flush\n",
785
srv_main_1_second_loops, srv_main_sleeps,
786
srv_main_10_second_loops, srv_main_background_loops,
787
srv_main_flush_loops);
788
fprintf(file, "srv_master_thread log flush and writes: %lu\n",
789
srv_log_writes_and_flush);
792
/* The following values give info about the activity going on in
793
the database. They are protected by the server mutex. The arrays
794
are indexed by the type of the thread. */
796
UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
797
UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
799
/*********************************************************************//**
800
Sets the info describing an i/o thread current state. */
803
srv_set_io_thread_op_info(
804
/*======================*/
805
ulint i, /*!< in: the 'segment' of the i/o thread */
806
const char* str) /*!< in: constant char string describing the
809
ut_a(i < SRV_MAX_N_IO_THREADS);
811
srv_io_thread_op_info[i] = str;
814
/*********************************************************************//**
815
Accessor function to get pointer to n'th slot in the server thread
817
@return pointer to the slot */
820
srv_table_get_nth_slot(
821
/*===================*/
822
ulint index) /*!< in: index of the slot */
824
ut_a(index < OS_THREAD_MAX_N);
826
return(srv_sys->threads + index);
829
/*********************************************************************//**
830
Gets the number of threads in the system.
831
@return sum of srv_n_threads[] */
834
srv_get_n_threads(void)
835
/*===================*/
840
mutex_enter(&kernel_mutex);
842
for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
844
n_threads += srv_n_threads[i];
847
mutex_exit(&kernel_mutex);
852
/*********************************************************************//**
853
Reserves a slot in the thread table for the current thread. Also creates the
854
thread local storage struct for the current thread. NOTE! The server mutex
855
has to be reserved by the caller!
856
@return reserved slot index */
859
srv_table_reserve_slot(
860
/*===================*/
861
enum srv_thread_type type) /*!< in: type of the thread */
867
ut_a(type <= SRV_MASTER);
870
slot = srv_table_get_nth_slot(i);
872
while (slot->in_use) {
874
slot = srv_table_get_nth_slot(i);
877
ut_a(slot->in_use == FALSE);
880
slot->suspended = FALSE;
882
slot->id = os_thread_get_curr_id();
883
slot->handle = os_thread_get_curr();
887
thr_local_set_slot_no(os_thread_get_curr_id(), i);
892
/*********************************************************************//**
893
Suspends the calling thread to wait for the event in its thread slot.
894
NOTE! The server mutex has to be reserved by the caller!
895
@return event for the calling thread to wait */
898
srv_suspend_thread(void)
899
/*====================*/
904
enum srv_thread_type type;
906
ut_ad(mutex_own(&kernel_mutex));
908
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
910
if (srv_print_thread_releases) {
912
"Suspending thread %lu to slot %lu\n",
913
(ulong) os_thread_get_curr_id(), (ulong) slot_no);
916
slot = srv_table_get_nth_slot(slot_no);
918
type = static_cast<srv_thread_type>(slot->type);
920
ut_ad(type >= SRV_WORKER);
921
ut_ad(type <= SRV_MASTER);
925
slot->suspended = TRUE;
927
ut_ad(srv_n_threads_active[type] > 0);
929
srv_n_threads_active[type]--;
931
os_event_reset(event);
936
/*********************************************************************//**
937
Releases threads of the type given from suspension in the thread table.
938
NOTE! The server mutex has to be reserved by the caller!
939
@return number of threads released: this may be less than n if not
940
enough threads were suspended at the moment */
945
enum srv_thread_type type, /*!< in: thread type */
946
ulint n) /*!< in: number of threads to release */
952
ut_ad(type >= SRV_WORKER);
953
ut_ad(type <= SRV_MASTER);
955
ut_ad(mutex_own(&kernel_mutex));
957
for (i = 0; i < OS_THREAD_MAX_N; i++) {
959
slot = srv_table_get_nth_slot(i);
962
(static_cast<srv_thread_type>(slot->type) == type) &&
965
slot->suspended = FALSE;
967
srv_n_threads_active[type]++;
969
os_event_set(slot->event);
971
if (srv_print_thread_releases) {
973
"Releasing thread %lu type %lu"
975
(ulong) slot->id, (ulong) type,
990
/*********************************************************************//**
991
Returns the calling thread type.
992
@return SRV_COM, ... */
995
srv_get_thread_type(void)
996
/*=====================*/
1000
enum srv_thread_type type;
1002
mutex_enter(&kernel_mutex);
1004
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
1006
slot = srv_table_get_nth_slot(slot_no);
1008
type = static_cast<srv_thread_type>(slot->type);
1010
ut_ad(type >= SRV_WORKER);
1011
ut_ad(type <= SRV_MASTER);
1013
mutex_exit(&kernel_mutex);
1018
/*********************************************************************//**
1019
Initializes the server. */
1025
srv_conc_slot_t* conc_slot;
1029
srv_sys = static_cast<srv_sys_t *>(mem_alloc(sizeof(srv_sys_t)));
1031
kernel_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
1032
mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL);
1034
commit_id_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
1035
mutex_create(commit_id_mutex_key, &commit_id_mutex, SYNC_COMMIT_ID_LOCK);
1037
mutex_create(srv_innodb_monitor_mutex_key,
1038
&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
1040
srv_sys->threads = static_cast<srv_table_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
1042
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1043
slot = srv_table_get_nth_slot(i);
1044
slot->in_use = FALSE;
1045
slot->type=0; /* Avoid purify errors */
1046
slot->event = os_event_create(NULL);
1050
srv_mysql_table = static_cast<srv_slot_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
1052
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1053
slot = srv_mysql_table + i;
1054
slot->in_use = FALSE;
1056
slot->event = os_event_create(NULL);
1060
srv_error_event = os_event_create(NULL);
1062
srv_timeout_event = os_event_create(NULL);
1064
srv_monitor_event = os_event_create(NULL);
1066
srv_lock_timeout_thread_event = os_event_create(NULL);
1068
for (i = 0; i < SRV_MASTER + 1; i++) {
1069
srv_n_threads_active[i] = 0;
1070
srv_n_threads[i] = 0;
1073
srv_meter_low_water[i] = 50;
1074
srv_meter_high_water[i] = 100;
1075
srv_meter_high_water2[i] = 200;
1076
srv_meter_foreground[i] = 250;
1080
UT_LIST_INIT(srv_sys->tasks);
1082
/* Create dummy indexes for infimum and supremum records */
1086
/* Init the server concurrency restriction data structures */
1088
os_fast_mutex_init(&srv_conc_mutex);
1090
UT_LIST_INIT(srv_conc_queue);
1092
srv_conc_slots = static_cast<srv_conc_slot_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t)));
1094
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1095
conc_slot = srv_conc_slots + i;
1096
conc_slot->reserved = FALSE;
1097
conc_slot->event = os_event_create(NULL);
1098
ut_a(conc_slot->event);
1101
/* Initialize some INFORMATION SCHEMA internal structures */
1102
trx_i_s_cache_init(trx_i_s_cache);
1105
/*********************************************************************//**
1106
Frees the data structures created in srv_init(). */
1112
os_fast_mutex_free(&srv_conc_mutex);
1113
mem_free(srv_conc_slots);
1114
srv_conc_slots = NULL;
1116
mem_free(srv_sys->threads);
1120
mem_free(kernel_mutex_temp);
1121
kernel_mutex_temp = NULL;
1122
mem_free(srv_mysql_table);
1123
srv_mysql_table = NULL;
1125
mem_free(commit_id_mutex_temp);
1126
commit_id_mutex_temp = NULL;
1128
trx_i_s_cache_free(trx_i_s_cache);
1131
/*********************************************************************//**
1132
Initializes the synchronization primitives, memory system, and the thread
1136
srv_general_init(void)
1137
/*==================*/
1140
/* Reset the system variables in the recovery module. */
1141
recv_sys_var_init();
1144
mem_init(srv_mem_pool_size);
1148
/*======================= InnoDB Server FIFO queue =======================*/
1150
/* Maximum allowable purge history length. <=0 means 'infinite'. */
1151
UNIV_INTERN ulong srv_max_purge_lag = 0;
1153
/*********************************************************************//**
1154
Puts an OS thread to wait if there are too many concurrent threads
1155
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
1158
srv_conc_enter_innodb(
1159
/*==================*/
1160
trx_t* trx) /*!< in: transaction object associated with the
1163
ibool has_slept = FALSE;
1164
srv_conc_slot_t* slot = NULL;
1167
if (trx->mysql_thd != NULL
1168
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
1170
UT_WAIT_FOR(srv_conc_n_threads
1171
< (lint)srv_thread_concurrency,
1172
srv_replication_delay * 1000);
1177
/* If trx has 'free tickets' to enter the engine left, then use one
1180
if (trx->n_tickets_to_enter_innodb > 0) {
1181
trx->n_tickets_to_enter_innodb--;
1186
os_fast_mutex_lock(&srv_conc_mutex);
1188
if (trx->declared_to_be_inside_innodb) {
1189
ut_print_timestamp(stderr);
1190
fputs(" InnoDB: Error: trying to declare trx"
1191
" to enter InnoDB, but\n"
1192
"InnoDB: it already is declared.\n", stderr);
1193
trx_print(stderr, trx, 0);
1195
os_fast_mutex_unlock(&srv_conc_mutex);
1200
ut_ad(srv_conc_n_threads >= 0);
1202
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1204
srv_conc_n_threads++;
1205
trx->declared_to_be_inside_innodb = TRUE;
1206
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1208
os_fast_mutex_unlock(&srv_conc_mutex);
1213
/* If the transaction is not holding resources, let it sleep
1214
for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
1216
if (!has_slept && !trx->has_search_latch
1217
&& NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
1219
has_slept = TRUE; /* We let it sleep only once to avoid
1222
srv_conc_n_waiting_threads++;
1224
os_fast_mutex_unlock(&srv_conc_mutex);
1226
trx->op_info = "sleeping before joining InnoDB queue";
1228
/* Peter Zaitsev suggested that we take the sleep away
1229
altogether. But the sleep may be good in pathological
1230
situations of lots of thread switches. Simply put some
1231
threads aside for a while to reduce the number of thread
1233
if (SRV_THREAD_SLEEP_DELAY > 0) {
1234
os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
1239
os_fast_mutex_lock(&srv_conc_mutex);
1241
srv_conc_n_waiting_threads--;
1246
/* Too many threads inside: put the current thread to a queue */
1248
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1249
slot = srv_conc_slots + i;
1251
if (!slot->reserved) {
1257
if (i == OS_THREAD_MAX_N) {
1258
/* Could not find a free wait slot, we must let the
1261
srv_conc_n_threads++;
1262
trx->declared_to_be_inside_innodb = TRUE;
1263
trx->n_tickets_to_enter_innodb = 0;
1265
os_fast_mutex_unlock(&srv_conc_mutex);
1270
/* Release possible search system latch this thread has */
1271
if (trx->has_search_latch) {
1272
trx_search_latch_release_if_reserved(trx);
1275
/* Add to the queue */
1276
slot->reserved = TRUE;
1277
slot->wait_ended = FALSE;
1279
UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
1281
os_event_reset(slot->event);
1283
srv_conc_n_waiting_threads++;
1285
os_fast_mutex_unlock(&srv_conc_mutex);
1287
/* Go to wait for the event; when a thread leaves InnoDB it will
1288
release this thread */
1290
trx->op_info = "waiting in InnoDB queue";
1292
os_event_wait(slot->event);
1296
os_fast_mutex_lock(&srv_conc_mutex);
1298
srv_conc_n_waiting_threads--;
1300
/* NOTE that the thread which released this thread already
1301
incremented the thread counter on behalf of this thread */
1303
slot->reserved = FALSE;
1305
UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
1307
trx->declared_to_be_inside_innodb = TRUE;
1308
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1310
os_fast_mutex_unlock(&srv_conc_mutex);
1313
/*********************************************************************//**
1314
This lets a thread enter InnoDB regardless of the number of threads inside
1315
InnoDB. This must be called when a thread ends a lock wait. */
1318
srv_conc_force_enter_innodb(
1319
/*========================*/
1320
trx_t* trx) /*!< in: transaction object associated with the
1323
if (UNIV_LIKELY(!srv_thread_concurrency)) {
1328
ut_ad(srv_conc_n_threads >= 0);
1330
os_fast_mutex_lock(&srv_conc_mutex);
1332
srv_conc_n_threads++;
1333
trx->declared_to_be_inside_innodb = TRUE;
1334
trx->n_tickets_to_enter_innodb = 1;
1336
os_fast_mutex_unlock(&srv_conc_mutex);
1339
/*********************************************************************//**
1340
This must be called when a thread exits InnoDB in a lock wait or at the
1341
end of an SQL statement. */
1344
srv_conc_force_exit_innodb(
1345
/*=======================*/
1346
trx_t* trx) /*!< in: transaction object associated with the
1349
srv_conc_slot_t* slot = NULL;
1351
if (trx->mysql_thd != NULL
1352
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
1357
if (trx->declared_to_be_inside_innodb == FALSE) {
1362
os_fast_mutex_lock(&srv_conc_mutex);
1364
ut_ad(srv_conc_n_threads > 0);
1365
srv_conc_n_threads--;
1366
trx->declared_to_be_inside_innodb = FALSE;
1367
trx->n_tickets_to_enter_innodb = 0;
1369
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1370
/* Look for a slot where a thread is waiting and no other
1371
thread has yet released the thread */
1373
slot = UT_LIST_GET_FIRST(srv_conc_queue);
1375
while (slot && slot->wait_ended == TRUE) {
1376
slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
1380
slot->wait_ended = TRUE;
1382
/* We increment the count on behalf of the released
1385
srv_conc_n_threads++;
1389
os_fast_mutex_unlock(&srv_conc_mutex);
1392
os_event_set(slot->event);
1396
/*********************************************************************//**
1397
This must be called when a thread exits InnoDB. */
1400
srv_conc_exit_innodb(
1401
/*=================*/
1402
trx_t* trx) /*!< in: transaction object associated with the
1405
if (trx->n_tickets_to_enter_innodb > 0) {
1406
/* We will pretend the thread is still inside InnoDB though it
1407
now leaves the InnoDB engine. In this way we save
1408
a lot of semaphore operations. srv_conc_force_exit_innodb is
1409
used to declare the thread definitely outside InnoDB. It
1410
should be called when there is a lock wait or an SQL statement
1416
srv_conc_force_exit_innodb(trx);
1419
/*========================================================================*/
1421
/*********************************************************************//**
1422
Normalizes init parameter values to use units we use inside InnoDB.
1423
@return DB_SUCCESS or error code */
1426
srv_normalize_init_values(void)
1427
/*===========================*/
1432
n = srv_n_data_files;
1434
for (i = 0; i < n; i++) {
1435
srv_data_file_sizes[i] = srv_data_file_sizes[i]
1436
* ((1024 * 1024) / UNIV_PAGE_SIZE);
1439
srv_last_file_size_max = srv_last_file_size_max
1440
* ((1024 * 1024) / UNIV_PAGE_SIZE);
1442
srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1444
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1446
srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1451
/*********************************************************************//**
1452
Boots the InnoDB server.
1453
@return DB_SUCCESS or error code */
1461
/* Transform the init parameter values given by MySQL to
1462
use units we use inside InnoDB: */
1464
err = srv_normalize_init_values();
1466
if (err != DB_SUCCESS) {
1470
/* Initialize synchronization primitives, memory management, and thread
1475
/* Initialize this module */
1482
/*********************************************************************//**
1483
Reserves a slot in the thread table for the current MySQL OS thread.
1484
NOTE! The kernel mutex has to be reserved by the caller!
1485
@return reserved slot */
1488
srv_table_reserve_slot_for_mysql(void)
1489
/*==================================*/
1494
ut_ad(mutex_own(&kernel_mutex));
1497
slot = srv_mysql_table + i;
1499
while (slot->in_use) {
1502
if (i >= OS_THREAD_MAX_N) {
1504
ut_print_timestamp(stderr);
1507
" InnoDB: There appear to be %lu MySQL"
1508
" threads currently waiting\n"
1509
"InnoDB: inside InnoDB, which is the"
1510
" upper limit. Cannot continue operation.\n"
1511
"InnoDB: We intentionally generate"
1512
" a seg fault to print a stack trace\n"
1513
"InnoDB: on Linux. But first we print"
1514
" a list of waiting threads.\n", (ulong) i);
1516
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1518
slot = srv_mysql_table + i;
1521
"Slot %lu: thread id %lu, type %lu,"
1522
" in use %lu, susp %lu, time %lu\n",
1524
(ulong) os_thread_pf(slot->id),
1526
(ulong) slot->in_use,
1527
(ulong) slot->suspended,
1528
(ulong) difftime(ut_time(),
1529
slot->suspend_time));
1535
slot = srv_mysql_table + i;
1538
ut_a(slot->in_use == FALSE);
1540
slot->in_use = TRUE;
1541
slot->id = os_thread_get_curr_id();
1542
slot->handle = os_thread_get_curr();
1547
/***************************************************************//**
1548
Puts a MySQL OS thread to wait for a lock to be released. If an error
1549
occurs during the wait trx->error_state associated with thr is
1550
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
1551
are possible errors. DB_DEADLOCK is returned if selective deadlock
1552
resolution chose this transaction as a victim. */
1555
srv_suspend_mysql_thread(
1556
/*=====================*/
1557
que_thr_t* thr) /*!< in: query thread associated with the MySQL
1564
ulint had_dict_lock;
1565
ibool was_declared_inside_innodb = FALSE;
1566
ib_int64_t start_time = 0;
1567
ib_int64_t finish_time;
1571
ulong lock_wait_timeout;
1573
ut_ad(!mutex_own(&kernel_mutex));
1575
trx = thr_get_trx(thr);
1577
os_event_set(srv_lock_timeout_thread_event);
1579
mutex_enter(&kernel_mutex);
1581
trx->error_state = DB_SUCCESS;
1583
if (thr->state == QUE_THR_RUNNING) {
1585
ut_ad(thr->is_active == TRUE);
1587
/* The lock has already been released or this transaction
1588
was chosen as a deadlock victim: no need to suspend */
1590
if (trx->was_chosen_as_deadlock_victim) {
1592
trx->error_state = DB_DEADLOCK;
1593
trx->was_chosen_as_deadlock_victim = FALSE;
1596
mutex_exit(&kernel_mutex);
1601
ut_ad(thr->is_active == FALSE);
1603
slot = srv_table_reserve_slot_for_mysql();
1605
event = slot->event;
1609
os_event_reset(event);
1611
slot->suspend_time = ut_time();
1613
if (thr->lock_state == QUE_THR_LOCK_ROW) {
1614
srv_n_lock_wait_count++;
1615
srv_n_lock_wait_current_count++;
1617
if (ut_usectime(&sec, &ms) == -1) {
1620
start_time = (ib_int64_t) sec * 1000000 + ms;
1623
/* Wake the lock timeout monitor thread, if it is suspended */
1625
os_event_set(srv_lock_timeout_thread_event);
1627
mutex_exit(&kernel_mutex);
1629
if (trx->declared_to_be_inside_innodb) {
1631
was_declared_inside_innodb = TRUE;
1633
/* We must declare this OS thread to exit InnoDB, since a
1634
possible other thread holding a lock which this thread waits
1635
for must be allowed to enter, sooner or later */
1637
srv_conc_force_exit_innodb(trx);
1640
had_dict_lock = trx->dict_operation_lock_mode;
1642
switch (had_dict_lock) {
1644
/* Release foreign key check latch */
1645
row_mysql_unfreeze_data_dictionary(trx);
1648
/* There should never be a lock wait when the
1649
dictionary latch is reserved in X mode. Dictionary
1650
transactions should only acquire locks on dictionary
1651
tables, not other tables. All access to dictionary
1652
tables should be covered by dictionary
1654
ut_print_timestamp(stderr);
1655
fputs(" InnoDB: Error: dict X latch held in "
1656
"srv_suspend_mysql_thread\n", stderr);
1657
/* This should never occur. This incorrect handling
1658
was added in the early development of
1659
ha_innobase::add_index() in InnoDB Plugin 1.0. */
1660
/* Release fast index creation latch */
1661
row_mysql_unlock_data_dictionary(trx);
1665
ut_a(trx->dict_operation_lock_mode == 0);
1667
/* Suspend this thread and wait for the event. */
1669
os_event_wait(event);
1671
/* After resuming, reacquire the data dictionary latch if
1674
switch (had_dict_lock) {
1676
row_mysql_freeze_data_dictionary(trx);
1679
/* This should never occur. This incorrect handling
1680
was added in the early development of
1681
ha_innobase::add_index() in InnoDB Plugin 1.0. */
1682
row_mysql_lock_data_dictionary(trx);
1686
if (was_declared_inside_innodb) {
1688
/* Return back inside InnoDB */
1690
srv_conc_force_enter_innodb(trx);
1693
mutex_enter(&kernel_mutex);
1695
/* Release the slot for others to use */
1697
slot->in_use = FALSE;
1699
wait_time = ut_difftime(ut_time(), slot->suspend_time);
1701
if (thr->lock_state == QUE_THR_LOCK_ROW) {
1702
if (ut_usectime(&sec, &ms) == -1) {
1705
finish_time = (ib_int64_t) sec * 1000000 + ms;
1708
diff_time = (ulint) (finish_time - start_time);
1710
srv_n_lock_wait_current_count--;
1711
srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
1712
if (diff_time > srv_n_lock_max_wait_time &&
1713
/* only update the variable if we successfully
1714
retrieved the start and finish times. See Bug#36819. */
1715
start_time != -1 && finish_time != -1) {
1716
srv_n_lock_max_wait_time = diff_time;
1719
/* Record the lock wait time for this thread */
1720
thd_set_lock_wait_time(trx->mysql_thd, diff_time);
1723
if (trx->was_chosen_as_deadlock_victim) {
1725
trx->error_state = DB_DEADLOCK;
1726
trx->was_chosen_as_deadlock_victim = FALSE;
1729
mutex_exit(&kernel_mutex);
1731
/* InnoDB system transactions (such as the purge, and
1732
incomplete transactions that are being rolled back after crash
1733
recovery) will use the global value of
1734
innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
1735
lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
1737
if (lock_wait_timeout < 100000000
1738
&& wait_time > (double) lock_wait_timeout) {
1740
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1743
if (trx_is_interrupted(trx)) {
1745
trx->error_state = DB_INTERRUPTED;
1749
/********************************************************************//**
1750
Releases a MySQL OS thread waiting for a lock to be released, if the
1751
thread is already suspended. */
1754
srv_release_mysql_thread_if_suspended(
1755
/*==================================*/
1756
que_thr_t* thr) /*!< in: query thread associated with the
1762
ut_ad(mutex_own(&kernel_mutex));
1764
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1766
slot = srv_mysql_table + i;
1768
if (slot->in_use && slot->thr == thr) {
1771
os_event_set(slot->event);
1780
/******************************************************************//**
1781
Refreshes the values used to calculate per-second averages. */
1784
srv_refresh_innodb_monitor_stats(void)
1785
/*==================================*/
1787
mutex_enter(&srv_innodb_monitor_mutex);
1789
srv_last_monitor_time = time(NULL);
1791
os_aio_refresh_stats();
1793
btr_cur_n_sea_old = btr_cur_n_sea;
1794
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1796
log_refresh_stats();
1798
buf_refresh_io_stats_all();
1800
srv_n_rows_inserted_old = srv_n_rows_inserted;
1801
srv_n_rows_updated_old = srv_n_rows_updated;
1802
srv_n_rows_deleted_old = srv_n_rows_deleted;
1803
srv_n_rows_read_old = srv_n_rows_read;
1805
mutex_exit(&srv_innodb_monitor_mutex);
1808
/******************************************************************//**
1809
Outputs to a file the output of the InnoDB Monitor.
1810
@return FALSE if not all information printed
1811
due to failure to obtain necessary mutex */
1814
srv_printf_innodb_monitor(
1815
/*======================*/
1816
FILE* file, /*!< in: output stream */
1817
ibool nowait, /*!< in: whether to wait for kernel mutex */
1818
ulint* trx_start, /*!< out: file position of the start of
1819
the list of active transactions */
1820
ulint* trx_end) /*!< out: file position of the end of
1821
the list of active transactions */
1823
double time_elapsed;
1824
time_t current_time;
1828
mutex_enter(&srv_innodb_monitor_mutex);
1830
current_time = time(NULL);
1832
/* We add 0.001 seconds to time_elapsed to prevent division
1833
by zero if two users happen to call SHOW INNODB STATUS at the same
1836
time_elapsed = difftime(current_time, srv_last_monitor_time)
1839
srv_last_monitor_time = time(NULL);
1841
fputs("\n=====================================\n", file);
1843
ut_print_timestamp(file);
1845
" INNODB MONITOR OUTPUT\n"
1846
"=====================================\n"
1847
"Per second averages calculated from the last %lu seconds\n",
1848
(ulong)time_elapsed);
1850
fputs("-----------------\n"
1851
"BACKGROUND THREAD\n"
1852
"-----------------\n", file);
1853
srv_print_master_thread_info(file);
1855
fputs("----------\n"
1857
"----------\n", file);
1860
/* Conceptually, srv_innodb_monitor_mutex has a very high latching
1861
order level in sync0sync.h, while dict_foreign_err_mutex has a very
1862
low level 135. Therefore we can reserve the latter mutex here without
1863
a danger of a deadlock of threads. */
1865
mutex_enter(&dict_foreign_err_mutex);
1867
if (ftell(dict_foreign_err_file) != 0L) {
1868
fputs("------------------------\n"
1869
"LATEST FOREIGN KEY ERROR\n"
1870
"------------------------\n", file);
1871
ut_copy_file(file, dict_foreign_err_file);
1874
mutex_exit(&dict_foreign_err_mutex);
1876
/* Only if lock_print_info_summary proceeds correctly,
1877
before we call the lock_print_info_all_transactions
1878
to print all the lock information. */
1879
ret = lock_print_info_summary(file, nowait);
1883
long t = ftell(file);
1885
*trx_start = ULINT_UNDEFINED;
1887
*trx_start = (ulint) t;
1890
lock_print_info_all_transactions(file);
1892
long t = ftell(file);
1894
*trx_end = ULINT_UNDEFINED;
1896
*trx_end = (ulint) t;
1903
"--------\n", file);
1906
fputs("-------------------------------------\n"
1907
"INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1908
"-------------------------------------\n", file);
1911
ha_print_info(file, btr_search_sys->hash_index);
1914
"%.2f hash searches/s, %.2f non-hash searches/s\n",
1915
(btr_cur_n_sea - btr_cur_n_sea_old)
1917
(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1919
btr_cur_n_sea_old = btr_cur_n_sea;
1920
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1927
fputs("----------------------\n"
1928
"BUFFER POOL AND MEMORY\n"
1929
"----------------------\n", file);
1931
"Total memory allocated " ULINTPF
1932
"; in additional pool allocated " ULINTPF "\n",
1933
ut_total_allocated_memory,
1934
mem_pool_get_reserved(mem_comm_pool));
1935
fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1940
fputs("--------------\n"
1942
"--------------\n", file);
1943
fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1944
(long) srv_conc_n_threads,
1945
(ulong) srv_conc_n_waiting_threads);
1947
fprintf(file, "%lu read views open inside InnoDB\n",
1948
static_cast<ulint>(UT_LIST_GET_LEN(trx_sys->view_list)));
1950
n_reserved = fil_space_get_n_reserved_extents(0);
1951
if (n_reserved > 0) {
1953
"%lu tablespace extents now reserved for"
1954
" B-tree split operations\n",
1955
(ulong) n_reserved);
1959
fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1960
(ulong) srv_main_thread_process_no,
1961
(ulong) srv_main_thread_id,
1962
srv_main_thread_op_info);
1964
fprintf(file, "Main thread id %lu, state: %s\n",
1965
(ulong) srv_main_thread_id,
1966
srv_main_thread_op_info);
1969
"Number of rows inserted " ULINTPF
1970
", updated " ULINTPF ", deleted " ULINTPF
1971
", read " ULINTPF "\n",
1972
srv_n_rows_inserted,
1977
"%.2f inserts/s, %.2f updates/s,"
1978
" %.2f deletes/s, %.2f reads/s\n",
1979
(srv_n_rows_inserted - srv_n_rows_inserted_old)
1981
(srv_n_rows_updated - srv_n_rows_updated_old)
1983
(srv_n_rows_deleted - srv_n_rows_deleted_old)
1985
(srv_n_rows_read - srv_n_rows_read_old)
1988
srv_n_rows_inserted_old = srv_n_rows_inserted;
1989
srv_n_rows_updated_old = srv_n_rows_updated;
1990
srv_n_rows_deleted_old = srv_n_rows_deleted;
1991
srv_n_rows_read_old = srv_n_rows_read;
1993
fputs("----------------------------\n"
1994
"END OF INNODB MONITOR OUTPUT\n"
1995
"============================\n", file);
1996
mutex_exit(&srv_innodb_monitor_mutex);
2002
/******************************************************************//**
2003
Function to pass InnoDB status variables to MySQL */
2006
srv_export_innodb_status(void)
2007
/*==========================*/
2009
buf_pool_stat_t stat;
2012
ulint flush_list_len;
2014
buf_get_total_stat(&stat);
2015
buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
2017
mutex_enter(&srv_innodb_monitor_mutex);
2019
export_vars.innodb_data_pending_reads
2020
= os_n_pending_reads;
2021
export_vars.innodb_data_pending_writes
2022
= os_n_pending_writes;
2023
export_vars.innodb_data_pending_fsyncs
2024
= fil_n_pending_log_flushes
2025
+ fil_n_pending_tablespace_flushes;
2026
export_vars.innodb_data_fsyncs = os_n_fsyncs;
2027
export_vars.innodb_data_read = srv_data_read;
2028
export_vars.innodb_data_reads = os_n_file_reads;
2029
export_vars.innodb_data_writes = os_n_file_writes;
2030
export_vars.innodb_data_written = srv_data_written;
2031
export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
2032
export_vars.innodb_buffer_pool_write_requests
2033
= srv_buf_pool_write_requests;
2034
export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
2035
export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
2036
export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
2037
export_vars.innodb_buffer_pool_read_ahead
2038
= stat.n_ra_pages_read;
2039
export_vars.innodb_buffer_pool_read_ahead_evicted
2040
= stat.n_ra_pages_evicted;
2041
export_vars.innodb_buffer_pool_pages_data = LRU_len;
2042
export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
2043
export_vars.innodb_buffer_pool_pages_free = free_len;
2045
export_vars.innodb_buffer_pool_pages_latched
2046
= buf_get_latched_pages_number();
2047
#endif /* UNIV_DEBUG */
2048
export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
2050
export_vars.innodb_buffer_pool_pages_misc
2051
= buf_pool_get_n_pages() - LRU_len - free_len;
2052
#ifdef HAVE_ATOMIC_BUILTINS
2053
export_vars.innodb_have_atomic_builtins = 1;
2055
export_vars.innodb_have_atomic_builtins = 0;
2057
export_vars.innodb_page_size = UNIV_PAGE_SIZE;
2058
export_vars.innodb_log_waits = srv_log_waits;
2059
export_vars.innodb_os_log_written = srv_os_log_written;
2060
export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
2061
export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
2062
export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
2063
export_vars.innodb_log_write_requests = srv_log_write_requests;
2064
export_vars.innodb_log_writes = srv_log_writes;
2065
export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
2066
export_vars.innodb_dblwr_writes = srv_dblwr_writes;
2067
export_vars.innodb_pages_created = stat.n_pages_created;
2068
export_vars.innodb_pages_read = stat.n_pages_read;
2069
export_vars.innodb_pages_written = stat.n_pages_written;
2070
export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
2071
export_vars.innodb_row_lock_current_waits
2072
= srv_n_lock_wait_current_count;
2073
export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
2074
if (srv_n_lock_wait_count > 0) {
2075
export_vars.innodb_row_lock_time_avg = (ulint)
2076
(srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
2078
export_vars.innodb_row_lock_time_avg = 0;
2080
export_vars.innodb_row_lock_time_max
2081
= srv_n_lock_max_wait_time / 1000;
2082
export_vars.innodb_rows_read = srv_n_rows_read;
2083
export_vars.innodb_rows_inserted = srv_n_rows_inserted;
2084
export_vars.innodb_rows_updated = srv_n_rows_updated;
2085
export_vars.innodb_rows_deleted = srv_n_rows_deleted;
2086
export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
2088
mutex_exit(&srv_innodb_monitor_mutex);
2091
/*********************************************************************//**
2092
A thread which prints the info output by various InnoDB monitors.
2093
@return a dummy parameter */
2098
void* /*arg __attribute__((unused))*/)
2099
/*!< in: a dummy parameter required by
2102
ib_int64_t sig_count;
2103
double time_elapsed;
2104
time_t current_time;
2105
time_t last_table_monitor_time;
2106
time_t last_tablespace_monitor_time;
2107
time_t last_monitor_time;
2108
ulint mutex_skipped;
2109
ibool last_srv_print_monitor;
2111
#ifdef UNIV_DEBUG_THREAD_CREATION
2112
fprintf(stderr, "Lock timeout thread starts, id %lu\n",
2113
os_thread_pf(os_thread_get_curr_id()));
2116
#ifdef UNIV_PFS_THREAD
2117
pfs_register_thread(srv_monitor_thread_key);
2120
srv_last_monitor_time = ut_time();
2121
last_table_monitor_time = ut_time();
2122
last_tablespace_monitor_time = ut_time();
2123
last_monitor_time = ut_time();
2125
last_srv_print_monitor = srv_print_innodb_monitor;
2127
srv_monitor_active = TRUE;
2129
/* Wake up every 5 seconds to see if we need to print
2130
monitor information or if signalled at shutdown. */
2132
sig_count = os_event_reset(srv_monitor_event);
2134
os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
2136
current_time = ut_time();
2138
time_elapsed = difftime(current_time, last_monitor_time);
2140
if (time_elapsed > 15) {
2141
last_monitor_time = ut_time();
2143
if (srv_print_innodb_monitor) {
2144
/* Reset mutex_skipped counter everytime
2145
srv_print_innodb_monitor changes. This is to
2146
ensure we will not be blocked by kernel_mutex
2147
for short duration information printing,
2148
such as requested by sync_array_print_long_waits() */
2149
if (!last_srv_print_monitor) {
2151
last_srv_print_monitor = TRUE;
2154
if (!srv_printf_innodb_monitor(stderr,
2155
MUTEX_NOWAIT(mutex_skipped),
2159
/* Reset the counter */
2163
last_srv_print_monitor = FALSE;
2167
if (srv_innodb_status) {
2168
mutex_enter(&srv_monitor_file_mutex);
2169
rewind(srv_monitor_file);
2170
if (!srv_printf_innodb_monitor(srv_monitor_file,
2171
MUTEX_NOWAIT(mutex_skipped),
2178
os_file_set_eof(srv_monitor_file);
2179
mutex_exit(&srv_monitor_file_mutex);
2182
if (srv_print_innodb_tablespace_monitor
2183
&& difftime(current_time,
2184
last_tablespace_monitor_time) > 60) {
2185
last_tablespace_monitor_time = ut_time();
2187
fputs("========================"
2188
"========================\n",
2191
ut_print_timestamp(stderr);
2193
fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
2194
"========================"
2195
"========================\n",
2199
fputs("Validating tablespace\n", stderr);
2201
fputs("Validation ok\n"
2202
"---------------------------------------\n"
2203
"END OF INNODB TABLESPACE MONITOR OUTPUT\n"
2204
"=======================================\n",
2208
if (srv_print_innodb_table_monitor
2209
&& difftime(current_time, last_table_monitor_time) > 60) {
2211
last_table_monitor_time = ut_time();
2213
fputs("===========================================\n",
2216
ut_print_timestamp(stderr);
2218
fputs(" INNODB TABLE MONITOR OUTPUT\n"
2219
"===========================================\n",
2223
fputs("-----------------------------------\n"
2224
"END OF INNODB TABLE MONITOR OUTPUT\n"
2225
"==================================\n",
2230
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2234
if (srv_print_innodb_monitor
2235
|| srv_print_innodb_lock_monitor
2236
|| srv_print_innodb_tablespace_monitor
2237
|| srv_print_innodb_table_monitor) {
2241
srv_monitor_active = FALSE;
2246
srv_monitor_active = FALSE;
2248
/* We count the number of threads in os_thread_exit(). A created
2249
thread should always use that to exit and not use return() to exit. */
2251
os_thread_exit(NULL);
2253
OS_THREAD_DUMMY_RETURN;
2256
/*********************************************************************//**
2257
A thread which wakes up threads whose lock wait may have lasted too long.
2258
@return a dummy parameter */
2261
srv_lock_timeout_thread(
2262
/*====================*/
2263
void* /*arg __attribute__((unused))*/)
2264
/* in: a dummy parameter required by
2271
ib_int64_t sig_count;
2273
#ifdef UNIV_PFS_THREAD
2274
pfs_register_thread(srv_lock_timeout_thread_key);
2279
/* When someone is waiting for a lock, we wake up every second
2280
and check if a timeout has passed for a lock wait */
2282
sig_count = os_event_reset(srv_timeout_event);
2284
os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
2286
srv_lock_timeout_active = TRUE;
2288
mutex_enter(&kernel_mutex);
2292
/* Check of all slots if a thread is waiting there, and if it
2293
has exceeded the time limit */
2295
for (i = 0; i < OS_THREAD_MAX_N; i++) {
2297
slot = srv_mysql_table + i;
2301
ulong lock_wait_timeout;
2305
wait_time = ut_difftime(ut_time(), slot->suspend_time);
2307
trx = thr_get_trx(slot->thr);
2308
lock_wait_timeout = thd_lock_wait_timeout(
2311
if (trx_is_interrupted(trx)
2312
|| (lock_wait_timeout < 100000000
2313
&& (wait_time > (double) lock_wait_timeout
2314
|| wait_time < 0))) {
2316
/* Timeout exceeded or a wrap-around in system
2317
time counter: cancel the lock request queued
2318
by the transaction and release possible
2319
other transactions waiting behind; it is
2320
possible that the lock has already been
2321
granted: in that case do nothing */
2323
if (trx->wait_lock) {
2324
lock_cancel_waiting_and_release(
2331
os_event_reset(srv_lock_timeout_thread_event);
2333
mutex_exit(&kernel_mutex);
2335
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2343
srv_lock_timeout_active = FALSE;
2346
/* The following synchronisation is disabled, since
2347
the InnoDB monitor output is to be updated every 15 seconds. */
2348
os_event_wait(srv_lock_timeout_thread_event);
2353
srv_lock_timeout_active = FALSE;
2355
/* We count the number of threads in os_thread_exit(). A created
2356
thread should always use that to exit and not use return() to exit. */
2358
os_thread_exit(NULL);
2360
OS_THREAD_DUMMY_RETURN;
2363
/*********************************************************************//**
2364
A thread which prints warnings about semaphore waits which have lasted
2365
too long. These can be used to track bugs which cause hangs.
2366
@return a dummy parameter */
2369
srv_error_monitor_thread(
2370
/*=====================*/
2371
void* /*arg __attribute__((unused))*/)
2372
/*!< in: a dummy parameter required by
2375
/* number of successive fatal timeouts observed */
2376
ulint fatal_cnt = 0;
2377
ib_uint64_t old_lsn;
2378
ib_uint64_t new_lsn;
2379
ib_int64_t sig_count;
2381
old_lsn = srv_start_lsn;
2383
#ifdef UNIV_DEBUG_THREAD_CREATION
2384
fprintf(stderr, "Error monitor thread starts, id %lu\n",
2385
os_thread_pf(os_thread_get_curr_id()));
2388
#ifdef UNIV_PFS_THREAD
2389
pfs_register_thread(srv_error_monitor_thread_key);
2393
srv_error_monitor_active = TRUE;
2395
/* Try to track a strange bug reported by Harald Fuchs and others,
2396
where the lsn seems to decrease at times */
2398
new_lsn = log_get_lsn();
2400
if (new_lsn < old_lsn) {
2401
drizzled::errmsg_printf(drizzled::error::INFO,
2402
"InnoDB: Error: old log sequence number %"PRIu64" was greater than the new log sequence number %"PRIu64"!"
2403
"InnoDB: Please submit a bug report to http://bugs.launchpad.net/drizzle",
2409
if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2410
/* We referesh InnoDB Monitor values so that averages are
2411
printed from at most 60 last seconds */
2413
srv_refresh_innodb_monitor_stats();
2416
/* Update the statistics collected for deciding LRU
2418
buf_LRU_stat_update();
2420
/* Update the statistics collected for flush rate policy. */
2421
buf_flush_stat_update();
2423
/* In case mutex_exit is not a memory barrier, it is
2424
theoretically possible some threads are left waiting though
2425
the semaphore is already released. Wake up those threads: */
2427
sync_arr_wake_threads_if_sema_free();
2429
if (sync_array_print_long_waits()) {
2431
if (fatal_cnt > 10) {
2434
"InnoDB: Error: semaphore wait has lasted"
2436
"InnoDB: We intentionally crash the server,"
2437
" because it appears to be hung.\n",
2438
(ulong) srv_fatal_semaphore_wait_threshold);
2446
/* Flush stderr so that a database user gets the output
2447
to possible MySQL error file */
2451
sig_count = os_event_reset(srv_error_event);
2453
os_event_wait_time_low(srv_error_event, 1000000, sig_count);
2455
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2460
srv_error_monitor_active = FALSE;
2462
/* We count the number of threads in os_thread_exit(). A created
2463
thread should always use that to exit and not use return() to exit. */
2465
os_thread_exit(NULL);
2467
OS_THREAD_DUMMY_RETURN;
2470
/**********************************************************************//**
2471
Check whether any background thread is active.
2472
@return FALSE if all are are suspended or have exited. */
2475
srv_is_any_background_thread_active(void)
2476
/*=====================================*/
2481
mutex_enter(&kernel_mutex);
2483
for (i = SRV_COM; i <= SRV_MASTER; ++i) {
2484
if (srv_n_threads_active[i] != 0) {
2490
mutex_exit(&kernel_mutex);
2495
/*******************************************************************//**
2496
Tells the InnoDB server that there has been activity in the database
2497
and wakes up the master thread if it is suspended (not sleeping). Used
2498
in the MySQL interface. Note that there is a small chance that the master
2499
thread stays suspended (we do not protect our operation with the
2500
srv_sys_t->mutex, for performance reasons). */
2503
srv_active_wake_master_thread(void)
2504
/*===============================*/
2506
srv_activity_count++;
2508
if (srv_n_threads_active[SRV_MASTER] == 0) {
2510
mutex_enter(&kernel_mutex);
2512
srv_release_threads(SRV_MASTER, 1);
2514
mutex_exit(&kernel_mutex);
2518
/*******************************************************************//**
2519
Tells the purge thread that there has been activity in the database
2520
and wakes up the purge thread if it is suspended (not sleeping). Note
2521
that there is a small chance that the purge thread stays suspended
2522
(we do not protect our operation with the kernel mutex, for
2523
performace reasons). */
2526
srv_wake_purge_thread_if_not_active(void)
2527
/*=====================================*/
2529
ut_ad(!mutex_own(&kernel_mutex));
2531
if (srv_n_purge_threads > 0
2532
&& srv_n_threads_active[SRV_WORKER] == 0) {
2534
mutex_enter(&kernel_mutex);
2536
srv_release_threads(SRV_WORKER, 1);
2538
mutex_exit(&kernel_mutex);
2542
/*******************************************************************//**
2543
Wakes up the master thread if it is suspended or being suspended. */
2546
srv_wake_master_thread(void)
2547
/*========================*/
2549
srv_activity_count++;
2551
mutex_enter(&kernel_mutex);
2553
srv_release_threads(SRV_MASTER, 1);
2555
mutex_exit(&kernel_mutex);
2558
/*******************************************************************//**
2559
Wakes up the purge thread if it's not already awake. */
2562
srv_wake_purge_thread(void)
2563
/*=======================*/
2565
ut_ad(!mutex_own(&kernel_mutex));
2567
if (srv_n_purge_threads > 0) {
2569
mutex_enter(&kernel_mutex);
2571
srv_release_threads(SRV_WORKER, 1);
2573
mutex_exit(&kernel_mutex);
2577
/**********************************************************************
2578
The master thread is tasked to ensure that flush of log file happens
2579
once every second in the background. This is to ensure that not more
2580
than one second of trxs are lost in case of crash when
2581
innodb_flush_logs_at_trx_commit != 1 */
2584
srv_sync_log_buffer_in_background(void)
2585
/*===================================*/
2587
time_t current_time = time(NULL);
2589
srv_main_thread_op_info = "flushing log";
2590
if (difftime(current_time, srv_last_log_flush_time) >= 1) {
2591
log_buffer_sync_in_background(TRUE);
2592
srv_last_log_flush_time = current_time;
2593
srv_log_writes_and_flush++;
2597
/********************************************************************//**
2598
Do a full purge, reconfigure the purge sub-system if a dynamic
2599
change is detected. */
2602
srv_master_do_purge(void)
2603
/*=====================*/
2605
ulint n_pages_purged;
2607
ut_ad(!mutex_own(&kernel_mutex));
2609
ut_a(srv_n_purge_threads == 0);
2612
/* Check for shutdown and change in purge config. */
2613
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2614
/* Nothing to purge. */
2617
n_pages_purged = trx_purge(srv_purge_batch_size);
2620
srv_sync_log_buffer_in_background();
2622
} while (n_pages_purged > 0);
2625
/*********************************************************************//**
2626
The master thread controlling the server.
2627
@return a dummy parameter */
2632
void* /*arg __attribute__((unused))*/)
2633
/*!< in: a dummy parameter required by
2636
buf_pool_stat_t buf_stat;
2638
ulint old_activity_count;
2639
ulint n_pages_purged = 0;
2640
ulint n_bytes_merged;
2641
ulint n_pages_flushed;
2642
ulint n_bytes_archived;
2643
ulint n_tables_to_drop;
2646
ulint n_ios_very_old;
2648
ulint next_itr_time;
2651
#ifdef UNIV_DEBUG_THREAD_CREATION
2652
fprintf(stderr, "Master thread starts, id %lu\n",
2653
os_thread_pf(os_thread_get_curr_id()));
2656
#ifdef UNIV_PFS_THREAD
2657
pfs_register_thread(srv_master_thread_key);
2660
srv_main_thread_process_no = os_proc_get_number();
2661
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2663
srv_table_reserve_slot(SRV_MASTER);
2665
mutex_enter(&kernel_mutex);
2667
srv_n_threads_active[SRV_MASTER]++;
2669
mutex_exit(&kernel_mutex);
2672
/*****************************************************************/
2673
/* ---- When there is database activity by users, we cycle in this
2676
srv_main_thread_op_info = "reserving kernel mutex";
2678
buf_get_total_stat(&buf_stat);
2679
n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
2680
+ buf_stat.n_pages_written;
2681
mutex_enter(&kernel_mutex);
2683
/* Store the user activity counter at the start of this loop */
2684
old_activity_count = srv_activity_count;
2686
mutex_exit(&kernel_mutex);
2688
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2690
goto suspend_thread;
2693
/* ---- We run the following loop approximately once per second
2694
when there is database activity */
2696
srv_last_log_flush_time = time(NULL);
2698
/* Sleep for 1 second on entrying the for loop below the first time. */
2699
next_itr_time = ut_time_ms() + 1000;
2701
for (i = 0; i < 10; i++) {
2702
ulint cur_time = ut_time_ms();
2704
/* ALTER TABLE in MySQL requires on Unix that the table handler
2705
can drop tables lazily after there no longer are SELECT
2708
srv_main_thread_op_info = "doing background drop tables";
2710
row_drop_tables_for_mysql_in_background();
2712
srv_main_thread_op_info = "";
2714
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2716
goto background_loop;
2719
buf_get_total_stat(&buf_stat);
2721
n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read
2722
+ buf_stat.n_pages_written;
2724
srv_main_thread_op_info = "sleeping";
2725
srv_main_1_second_loops++;
2727
if (next_itr_time > cur_time
2728
&& srv_shutdown_state == SRV_SHUTDOWN_NONE) {
2730
/* Get sleep interval in micro seconds. We use
2731
ut_min() to avoid long sleep in case of
2733
os_thread_sleep(ut_min(1000000,
2734
(next_itr_time - cur_time)
2739
/* Each iteration should happen at 1 second interval. */
2740
next_itr_time = ut_time_ms() + 1000;
2742
/* Flush logs if needed */
2743
srv_sync_log_buffer_in_background();
2745
srv_main_thread_op_info = "making checkpoint";
2748
/* If i/os during one second sleep were less than 5% of
2749
capacity, we assume that there is free disk i/o capacity
2750
available, and it makes sense to do an insert buffer merge. */
2752
buf_get_total_stat(&buf_stat);
2753
n_pend_ios = buf_get_n_pending_ios()
2754
+ log_sys->n_pending_writes;
2755
n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
2756
+ buf_stat.n_pages_written;
2757
if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2758
&& (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
2759
srv_main_thread_op_info = "doing insert buffer merge";
2760
ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
2762
/* Flush logs if needed */
2763
srv_sync_log_buffer_in_background();
2766
if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
2767
> srv_max_buf_pool_modified_pct)) {
2769
/* Try to keep the number of modified pages in the
2770
buffer pool under the limit wished by the user */
2772
srv_main_thread_op_info =
2773
"flushing buffer pool pages";
2774
n_pages_flushed = buf_flush_list(
2775
PCT_IO(100), IB_ULONGLONG_MAX);
2777
} else if (srv_adaptive_flushing) {
2779
/* Try to keep the rate of flushing of dirty
2780
pages such that redo log generation does not
2781
produce bursts of IO at checkpoint time. */
2782
ulint n_flush = buf_flush_get_desired_flush_rate();
2785
srv_main_thread_op_info =
2786
"flushing buffer pool pages";
2787
n_flush = ut_min(PCT_IO(100), n_flush);
2795
if (srv_activity_count == old_activity_count) {
2797
/* There is no user activity at the moment, go to
2798
the background loop */
2800
goto background_loop;
2804
/* ---- We perform the following code approximately once per
2805
10 seconds when there is database activity */
2807
#ifdef MEM_PERIODIC_CHECK
2808
/* Check magic numbers of every allocated mem block once in 10
2810
mem_validate_all_blocks();
2812
/* If i/os during the 10 second period were less than 200% of
2813
capacity, we assume that there is free disk i/o capacity
2814
available, and it makes sense to flush srv_io_capacity pages.
2816
Note that this is done regardless of the fraction of dirty
2817
pages relative to the max requested by the user. The one second
2818
loop above requests writes for that case. The writes done here
2819
are not required, and may be disabled. */
2821
buf_get_total_stat(&buf_stat);
2822
n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
2823
n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
2824
+ buf_stat.n_pages_written;
2826
srv_main_10_second_loops++;
2827
if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2828
&& (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
2830
srv_main_thread_op_info = "flushing buffer pool pages";
2831
buf_flush_list(PCT_IO(100), IB_ULONGLONG_MAX);
2833
/* Flush logs if needed */
2834
srv_sync_log_buffer_in_background();
2837
/* We run a batch of insert buffer merge every 10 seconds,
2838
even if the server were active */
2840
srv_main_thread_op_info = "doing insert buffer merge";
2841
ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
2843
/* Flush logs if needed */
2844
srv_sync_log_buffer_in_background();
2846
if (srv_n_purge_threads == 0) {
2847
srv_main_thread_op_info = "master purging";
2849
srv_master_do_purge();
2851
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2853
goto background_loop;
2857
srv_main_thread_op_info = "flushing buffer pool pages";
2859
/* Flush a few oldest pages to make a new checkpoint younger */
2861
if (buf_get_modified_ratio_pct() > 70) {
2863
/* If there are lots of modified pages in the buffer pool
2864
(> 70 %), we assume we can afford reserving the disk(s) for
2865
the time it requires to flush 100 pages */
2867
n_pages_flushed = buf_flush_list(
2868
PCT_IO(100), IB_ULONGLONG_MAX);
2870
/* Otherwise, we only flush a small number of pages so that
2871
we do not unnecessarily use much disk i/o capacity from
2874
n_pages_flushed = buf_flush_list(
2875
PCT_IO(10), IB_ULONGLONG_MAX);
2878
srv_main_thread_op_info = "making checkpoint";
2880
/* Make a new checkpoint about once in 10 seconds */
2882
log_checkpoint(TRUE, FALSE);
2884
srv_main_thread_op_info = "reserving kernel mutex";
2886
mutex_enter(&kernel_mutex);
2888
/* ---- When there is database activity, we jump from here back to
2889
the start of loop */
2891
if (srv_activity_count != old_activity_count) {
2892
mutex_exit(&kernel_mutex);
2896
mutex_exit(&kernel_mutex);
2898
/* If the database is quiet, we enter the background loop */
2900
/*****************************************************************/
2902
/* ---- In this loop we run background operations when the server
2903
is quiet from user activity. Also in the case of a shutdown, we
2904
loop here, flushing the buffer pool to the data files. */
2906
/* The server has been quiet for a while: start running background
2908
srv_main_background_loops++;
2909
srv_main_thread_op_info = "doing background drop tables";
2911
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2913
if (n_tables_to_drop > 0) {
2914
/* Do not monopolize the CPU even if there are tables waiting
2915
in the background drop queue. (It is essentially a bug if
2916
MySQL tries to drop a table while there are still open handles
2917
to it and we had to put it to the background drop queue.) */
2919
if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
2920
os_thread_sleep(100000);
2924
if (srv_n_purge_threads == 0) {
2925
srv_main_thread_op_info = "master purging";
2927
srv_master_do_purge();
2930
srv_main_thread_op_info = "reserving kernel mutex";
2932
mutex_enter(&kernel_mutex);
2933
if (srv_activity_count != old_activity_count) {
2934
mutex_exit(&kernel_mutex);
2937
mutex_exit(&kernel_mutex);
2939
srv_main_thread_op_info = "doing insert buffer merge";
2941
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2944
/* This should do an amount of IO similar to the number of
2945
dirty pages that will be flushed in the call to
2946
buf_flush_list below. Otherwise, the system favors
2947
clean pages over cleanup throughput. */
2948
n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
2952
srv_main_thread_op_info = "reserving kernel mutex";
2954
mutex_enter(&kernel_mutex);
2955
if (srv_activity_count != old_activity_count) {
2956
mutex_exit(&kernel_mutex);
2959
mutex_exit(&kernel_mutex);
2962
srv_main_thread_op_info = "flushing buffer pool pages";
2963
srv_main_flush_loops++;
2964
if (srv_fast_shutdown < 2) {
2965
n_pages_flushed = buf_flush_list(
2966
PCT_IO(100), IB_ULONGLONG_MAX);
2968
/* In the fastest shutdown we do not flush the buffer pool
2969
to data files: we set n_pages_flushed to 0 artificially. */
2971
n_pages_flushed = 0;
2974
srv_main_thread_op_info = "reserving kernel mutex";
2976
mutex_enter(&kernel_mutex);
2977
if (srv_activity_count != old_activity_count) {
2978
mutex_exit(&kernel_mutex);
2981
mutex_exit(&kernel_mutex);
2983
srv_main_thread_op_info = "waiting for buffer pool flush to end";
2984
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2986
/* Flush logs if needed */
2987
srv_sync_log_buffer_in_background();
2989
srv_main_thread_op_info = "making checkpoint";
2991
log_checkpoint(TRUE, FALSE);
2993
if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
2995
/* Try to keep the number of modified pages in the
2996
buffer pool under the limit wished by the user */
3001
srv_main_thread_op_info = "reserving kernel mutex";
3003
mutex_enter(&kernel_mutex);
3004
if (srv_activity_count != old_activity_count) {
3005
mutex_exit(&kernel_mutex);
3008
mutex_exit(&kernel_mutex);
3010
srv_main_thread_op_info = "archiving log (if log archive is on)";
3012
log_archive_do(FALSE, &n_bytes_archived);
3014
n_bytes_archived = 0;
3016
/* Keep looping in the background loop if still work to do */
3018
if (srv_fast_shutdown && srv_shutdown_state > 0) {
3019
if (n_tables_to_drop + n_pages_flushed
3020
+ n_bytes_archived != 0) {
3022
/* If we are doing a fast shutdown (= the default)
3023
we do not do purge or insert buffer merge. But we
3024
flush the buffer pool completely to disk.
3025
In a 'very fast' shutdown we do not flush the buffer
3026
pool to data files: we have set n_pages_flushed to
3029
goto background_loop;
3031
} else if (n_tables_to_drop
3032
+ n_pages_purged + n_bytes_merged + n_pages_flushed
3033
+ n_bytes_archived != 0) {
3034
/* In a 'slow' shutdown we run purge and the insert buffer
3035
merge to completion */
3037
goto background_loop;
3040
/* There is no work for background operations either: suspend
3041
master thread to wait for more server activity */
3044
srv_main_thread_op_info = "suspending";
3046
mutex_enter(&kernel_mutex);
3048
if (row_get_background_drop_list_len_low() > 0) {
3049
mutex_exit(&kernel_mutex);
3054
event = srv_suspend_thread();
3056
mutex_exit(&kernel_mutex);
3058
/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
3059
waits for database activity to die down when converting < 4.1.x
3060
databases, and relies on this string being exactly as it is. InnoDB
3061
manual also mentions this string in several places. */
3062
srv_main_thread_op_info = "waiting for server activity";
3064
os_event_wait(event);
3066
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
3067
/* This is only extra safety, the thread should exit
3068
already when the event wait ends */
3070
os_thread_exit(NULL);
3074
/* When there is user activity, InnoDB will set the event and the
3075
main thread goes back to loop. */
3080
#if !defined(__SUNPRO_C)
3081
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3085
/*********************************************************************//**
3086
Asynchronous purge thread.
3087
@return a dummy parameter */
3092
void* /*arg __attribute__((unused))*/) /*!< in: a dummy parameter
3093
required by os_thread_create */
3096
ulint slot_no = ULINT_UNDEFINED;
3097
ulint n_total_purged = ULINT_UNDEFINED;
3099
ut_a(srv_n_purge_threads == 1);
3101
#ifdef UNIV_DEBUG_THREAD_CREATION
3102
fprintf(stderr, "InnoDB: Purge thread running, id %lu\n",
3103
os_thread_pf(os_thread_get_curr_id()));
3104
#endif /* UNIV_DEBUG_THREAD_CREATION */
3106
mutex_enter(&kernel_mutex);
3108
slot_no = srv_table_reserve_slot(SRV_WORKER);
3110
slot = srv_table_get_nth_slot(slot_no);
3112
++srv_n_threads_active[SRV_WORKER];
3114
mutex_exit(&kernel_mutex);
3116
while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
3118
ulint n_pages_purged;
3120
/* If there are very few records to purge or the last
3121
purge didn't purge any records then wait for activity.
3122
We peek at the history len without holding any mutex
3123
because in the worst case we will end up waiting for
3124
the next purge event. */
3125
if (trx_sys->rseg_history_len < srv_purge_batch_size
3126
|| n_total_purged == 0) {
3130
mutex_enter(&kernel_mutex);
3132
event = srv_suspend_thread();
3134
mutex_exit(&kernel_mutex);
3136
os_event_wait(event);
3139
/* Check for shutdown and whether we should do purge at all. */
3140
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
3141
|| srv_shutdown_state != 0
3142
|| srv_fast_shutdown) {
3149
/* Purge until there are no more records to purge and there is
3150
no change in configuration or server state. */
3152
n_pages_purged = trx_purge(srv_purge_batch_size);
3154
n_total_purged += n_pages_purged;
3156
} while (n_pages_purged > 0 && !srv_fast_shutdown);
3158
srv_sync_log_buffer_in_background();
3161
mutex_enter(&kernel_mutex);
3163
ut_ad(srv_table_get_nth_slot(slot_no) == slot);
3165
/* Decrement the active count. */
3166
srv_suspend_thread();
3168
slot->in_use = FALSE;
3170
/* Free the thread local memory. */
3171
thr_local_free(os_thread_get_curr_id());
3173
mutex_exit(&kernel_mutex);
3175
#ifdef UNIV_DEBUG_THREAD_CREATION
3176
fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n",
3177
os_thread_pf(os_thread_get_curr_id()));
3178
#endif /* UNIV_DEBUG_THREAD_CREATION */
3180
/* We count the number of threads in os_thread_exit(). A created
3181
thread should always use that to exit and not use return() to exit. */
3182
os_thread_exit(NULL);
3184
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3187
/**********************************************************************//**
3188
Enqueues a task to server task queue and releases a worker thread, if there
3189
is a suspended one. */
3192
srv_que_task_enqueue_low(
3193
/*=====================*/
3194
que_thr_t* thr) /*!< in: query thread */
3198
mutex_enter(&kernel_mutex);
3200
UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
3202
srv_release_threads(SRV_WORKER, 1);
3204
mutex_exit(&kernel_mutex);