1
/******************************************************
2
The database server main program
4
NOTE: SQL Server 7 uses something which the documentation
5
calls user mode scheduled threads (UMS threads). One such
6
thread is usually allocated per processor. Win32
7
documentation does not know any UMS threads, which suggests
8
that the concept is internal to SQL Server 7. It may mean that
9
SQL Server 7 does all the scheduling of threads itself, even
10
in i/o waits. We should maybe modify InnoDB to use the same
11
technique, because thread switches within NT may be too slow.
13
SQL Server 7 also mentions fibers, which are cooperatively
14
scheduled threads. They can boost performance by 5 %,
15
according to the Delaney and Soukup's book.
17
Windows 2000 will have something called thread pooling
18
(see msdn website), which we could possibly use.
20
Another possibility could be to use some very fast user space
21
thread library. This might confuse NT though.
25
Created 10/8/1995 Heikki Tuuri
26
*******************************************************/
34
#include "sync0sync.h"
39
#include "pars0pars.h"
41
#include "lock0lock.h"
42
#include "trx0purge.h"
43
#include "ibuf0ibuf.h"
46
#include "dict0load.h"
47
#include "dict0boot.h"
48
#include "srv0start.h"
49
#include "row0mysql.h"
50
#include "ha_prototypes.h"
52
/* This is set to TRUE if the MySQL user has set it in MySQL; currently
53
affects only FOREIGN KEY definition parsing */
54
ibool srv_lower_case_table_names = FALSE;
56
/* The following counter is incremented whenever there is some user activity
58
ulint srv_activity_count = 0;
60
/* The following is the maximum allowed duration of a lock wait. */
61
ulint srv_fatal_semaphore_wait_threshold = 600;
63
/* How much data manipulation language (DML) statements need to be delayed,
64
in microseconds, in order to reduce the lagging of the purge thread. */
65
ulint srv_dml_needed_delay = 0;
67
ibool srv_lock_timeout_and_monitor_active = FALSE;
68
ibool srv_error_monitor_active = FALSE;
70
const char* srv_main_thread_op_info = "";
72
/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
73
const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
75
/* Server parameters which are read from the initfile */
77
/* The following three are dir paths which are catenated before file
78
names, where the file name itself may also contain a path */
80
char* srv_data_home = NULL;
81
#ifdef UNIV_LOG_ARCHIVE
82
char* srv_arch_dir = NULL;
83
#endif /* UNIV_LOG_ARCHIVE */
85
ibool srv_file_per_table = FALSE; /* store to its own file each table
86
created by an user; data dictionary
87
tables are in the system tablespace
89
ibool srv_locks_unsafe_for_binlog = FALSE; /* Place locks to
93
duplicate key checking
96
ulint srv_n_data_files = 0;
97
char** srv_data_file_names = NULL;
98
ulint* srv_data_file_sizes = NULL; /* size in database pages */
100
ibool srv_auto_extend_last_data_file = FALSE; /* if TRUE, then we
101
auto-extend the last data
103
ulint srv_last_file_size_max = 0; /* if != 0, this tells
104
the max size auto-extending
105
may increase the last data
107
ulong srv_auto_extend_increment = 8; /* If the last data file is
108
auto-extended, we add this
109
many pages to it at a time */
110
ulint* srv_data_file_is_raw_partition = NULL;
112
/* If the following is TRUE we do not allow inserts etc. This protects
113
the user from forgetting the 'newraw' keyword to my.cnf */
115
ibool srv_created_new_raw = FALSE;
117
char** srv_log_group_home_dirs = NULL;
119
ulint srv_n_log_groups = ULINT_MAX;
120
ulint srv_n_log_files = ULINT_MAX;
121
ulint srv_log_file_size = ULINT_MAX; /* size in database pages */
122
ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
123
ulong srv_flush_log_at_trx_commit = 1;
125
byte srv_latin1_ordering[256] /* The sort order table of the latin1
126
character set. The following table is
127
the MySQL order as of Feb 10th, 2002 */
129
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
130
, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
131
, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
132
, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
133
, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
134
, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
135
, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
136
, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
137
, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
138
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
139
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
140
, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
141
, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
142
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
143
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
144
, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
145
, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
146
, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
147
, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
148
, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
149
, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
150
, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
151
, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
152
, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
153
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
154
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
155
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
156
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
157
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
158
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
159
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
160
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
163
ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits
164
this to size in kilobytes but
165
we normalize this to pages in
167
ulint srv_awe_window_size = 0; /* size in pages; MySQL inits
168
this to bytes, but we
169
normalize it to pages in
171
ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
172
ulint srv_lock_table_size = ULINT_MAX;
174
ulint srv_n_file_io_threads = ULINT_MAX;
176
#ifdef UNIV_LOG_ARCHIVE
177
ibool srv_log_archive_on = FALSE;
178
ibool srv_archive_recovery = 0;
179
dulint srv_archive_recovery_limit_lsn;
180
#endif /* UNIV_LOG_ARCHIVE */
182
ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
184
/* This parameter is used to throttle the number of insert buffers that are
185
merged in a batch. By increasing this parameter on a faster disk you can
186
possibly reduce the number of I/O operations performed to complete the
187
merge operation. The value of this parameter is used as is by the
188
background loop when the system is idle (low load), on a busy system
189
the parameter is scaled down by a factor of 4, this is to avoid putting
190
a heavier load on the I/O sub system. */
192
ulong srv_insert_buffer_batch_size = 20;
194
char* srv_file_flush_method_str = NULL;
195
ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
196
ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
198
ulint srv_max_n_open_files = 300;
200
/* The InnoDB main thread tries to keep the ratio of modified pages
201
in the buffer pool to all database pages in the buffer pool smaller than
202
the following number. But it is not guaranteed that the value stays below
203
that during a time of heavy update/insert activity. */
205
ulong srv_max_buf_pool_modified_pct = 90;
207
/* variable counts amount of data read in total (in bytes) */
208
ulint srv_data_read = 0;
210
/* here we count the amount of data written in total (in bytes) */
211
ulint srv_data_written = 0;
213
/* the number of the log write requests done */
214
ulint srv_log_write_requests = 0;
216
/* the number of physical writes to the log performed */
217
ulint srv_log_writes = 0;
219
/* amount of data written to the log files in bytes */
220
ulint srv_os_log_written = 0;
222
/* amount of writes being done to the log files */
223
ulint srv_os_log_pending_writes = 0;
225
/* we increase this counter, when there we don't have enough space in the
226
log buffer and have to flush it */
227
ulint srv_log_waits = 0;
229
/* this variable counts the amount of times, when the doublewrite buffer
231
ulint srv_dblwr_writes = 0;
233
/* here we store the number of pages that have been flushed to the
234
doublewrite buffer */
235
ulint srv_dblwr_pages_written = 0;
237
/* in this variable we store the number of write requests issued */
238
ulint srv_buf_pool_write_requests = 0;
240
/* here we store the number of times when we had to wait for a free page
241
in the buffer pool. It happens when the buffer pool is full and we need
242
to make a flush, in order to be able to read or create a page. */
243
ulint srv_buf_pool_wait_free = 0;
245
/* variable to count the number of pages that were written from buffer
247
ulint srv_buf_pool_flushed = 0;
249
/* variable to count the number of buffer pool reads that led to the
250
reading of a disk page */
251
ulint srv_buf_pool_reads = 0;
253
/* variable to count the number of sequential read-aheads */
254
ulint srv_read_ahead_seq = 0;
256
/* variable to count the number of random read-aheads */
257
ulint srv_read_ahead_rnd = 0;
259
/* structure to pass status variables to MySQL */
260
export_struc export_vars;
262
/* If the following is != 0 we do not allow inserts etc. This protects
263
the user from forgetting the innodb_force_recovery keyword to my.cnf */
265
ulint srv_force_recovery = 0;
266
/*-----------------------*/
267
/* We are prepared for a situation that we have this many threads waiting for
268
a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
271
ulint srv_max_n_threads = 0;
273
/* The following controls how many threads we let inside InnoDB concurrently:
274
threads waiting for locks are not counted into the number because otherwise
275
we could get a deadlock. MySQL creates a thread for each user session, and
276
semaphore contention and convoy problems can occur withput this restriction.
277
Value 10 should be good if there are less than 4 processors + 4 disks in the
278
computer. Bigger computers need bigger values. Value 0 will disable the
279
concurrency check. */
281
ulong srv_thread_concurrency = 0;
282
ulong srv_commit_concurrency = 0;
284
os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data
286
lint srv_conc_n_threads = 0; /* number of OS threads currently
287
inside InnoDB; it is not an error
288
if this drops temporarily below zero
289
because we do not demand that every
290
thread increments this, but a thread
291
waiting for a lock decrements this
293
ulint srv_conc_n_waiting_threads = 0; /* number of OS threads waiting in the
294
FIFO for a permission to enter InnoDB
297
typedef struct srv_conc_slot_struct srv_conc_slot_t;
298
struct srv_conc_slot_struct{
299
os_event_t event; /* event to wait */
300
ibool reserved; /* TRUE if slot
302
ibool wait_ended; /* TRUE when another
303
thread has already set
305
thread in this slot is
307
reserved may still be
308
TRUE at that point */
309
UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue node */
312
UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue of threads
314
srv_conc_slot_t* srv_conc_slots; /* array of wait
317
/* Number of times a thread is allowed to enter InnoDB within the same
318
SQL query after it has once got the ticket at srv_conc_enter_innodb */
319
#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
320
#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
321
/*-----------------------*/
322
/* If the following is set to 1 then we do not run purge and insert buffer
323
merge to completion before shutdown. If it is set to 2, do not even flush the
324
buffer pool to data files at the shutdown: we effectively 'crash'
325
InnoDB (but lose no committed transactions). */
326
ulint srv_fast_shutdown = 0;
328
/* Generate a innodb_status.<pid> file */
329
ibool srv_innodb_status = FALSE;
331
ibool srv_stats_on_metadata = TRUE;
333
ibool srv_use_doublewrite_buf = TRUE;
334
ibool srv_use_checksums = TRUE;
336
ibool srv_set_thread_priorities = TRUE;
337
int srv_query_thread_priority = 0;
339
/* TRUE if the Address Windowing Extensions of Windows are used; then we must
340
disable adaptive hash indexes */
341
ibool srv_use_awe = FALSE;
342
ibool srv_use_adaptive_hash_indexes = TRUE;
344
/*-------------------------------------------*/
345
ulong srv_n_spin_wait_rounds = 20;
346
ulong srv_n_free_tickets_to_enter = 500;
347
ulong srv_thread_sleep_delay = 10000;
348
ulint srv_spin_wait_delay = 5;
349
ibool srv_priority_boost = TRUE;
351
ibool srv_print_thread_releases = FALSE;
352
ibool srv_print_lock_waits = FALSE;
353
ibool srv_print_buf_io = FALSE;
354
ibool srv_print_log_io = FALSE;
355
ibool srv_print_latch_waits = FALSE;
357
ulint srv_n_rows_inserted = 0;
358
ulint srv_n_rows_updated = 0;
359
ulint srv_n_rows_deleted = 0;
360
ulint srv_n_rows_read = 0;
361
#ifndef UNIV_HOTBACKUP
362
static ulint srv_n_rows_inserted_old = 0;
363
static ulint srv_n_rows_updated_old = 0;
364
static ulint srv_n_rows_deleted_old = 0;
365
static ulint srv_n_rows_read_old = 0;
366
#endif /* !UNIV_HOTBACKUP */
368
ulint srv_n_lock_wait_count = 0;
369
ulint srv_n_lock_wait_current_count = 0;
370
ib_longlong srv_n_lock_wait_time = 0;
371
ulint srv_n_lock_max_wait_time = 0;
375
Set the following to 0 if you want InnoDB to write messages on
376
stderr on startup/shutdown
378
ibool srv_print_verbose_log = TRUE;
379
ibool srv_print_innodb_monitor = FALSE;
380
ibool srv_print_innodb_lock_monitor = FALSE;
381
ibool srv_print_innodb_tablespace_monitor = FALSE;
382
ibool srv_print_innodb_table_monitor = FALSE;
384
/* Array of English strings describing the current state of an
385
i/o handler thread */
387
const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
388
const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
390
time_t srv_last_monitor_time;
392
mutex_t srv_innodb_monitor_mutex;
394
/* Mutex for locking srv_monitor_file */
395
mutex_t srv_monitor_file_mutex;
396
/* Temporary file for innodb monitor output */
397
FILE* srv_monitor_file;
398
/* Mutex for locking srv_dict_tmpfile.
399
This mutex has a very high rank; threads reserving it should not
400
be holding any InnoDB latches. */
401
mutex_t srv_dict_tmpfile_mutex;
402
/* Temporary file for output from the data dictionary */
403
FILE* srv_dict_tmpfile;
404
/* Mutex for locking srv_misc_tmpfile.
405
This mutex has a very low rank; threads reserving it should not
406
acquire any further latches or sleep before releasing this one. */
407
mutex_t srv_misc_tmpfile_mutex;
408
/* Temporary file for miscellanous diagnostic output */
409
FILE* srv_misc_tmpfile;
411
ulint srv_main_thread_process_no = 0;
412
ulint srv_main_thread_id = 0;
415
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
416
=========================================
418
There is the following analogue between this database
419
server and an operating system kernel:
421
DB concept equivalent OS concept
422
---------- ---------------------
423
transaction -- process;
425
query thread -- thread;
430
the rollback state -- kill signal delivered to a process;
434
query thread execution:
435
(a) without kernel mutex
436
reserved -- process executing in user mode;
437
(b) with kernel mutex reserved
438
-- process executing in kernel mode;
440
The server is controlled by a master thread which runs at
441
a priority higher than normal, that is, higher than user threads.
442
It sleeps most of the time, and wakes up, say, every 300 milliseconds,
443
to check whether there is anything happening in the server which
444
requires intervention of the master thread. Such situations may be,
445
for example, when flushing of dirty blocks is needed in the buffer
446
pool or old version of database rows have to be cleaned away.
448
The threads which we call user threads serve the queries of
449
the clients and input from the console of the server.
450
They run at normal priority. The server may have several
451
communications endpoints. A dedicated set of user threads waits
452
at each of these endpoints ready to receive a client request.
453
Each request is taken by a single user thread, which then starts
454
processing and, when the result is ready, sends it to the client
455
and returns to wait at the same endpoint the thread started from.
457
So, we do not have dedicated communication threads listening at
458
the endpoints and dealing the jobs to dedicated worker threads.
459
Our architecture saves one thread swithch per request, compared
460
to the solution with dedicated communication threads
461
which amounts to 15 microseconds on 100 MHz Pentium
462
running NT. If the client
463
is communicating over a network, this saving is negligible, but
464
if the client resides in the same machine, maybe in an SMP machine
465
on a different processor from the server thread, the saving
466
can be important as the threads can communicate over shared
467
memory with an overhead of a few microseconds.
469
We may later implement a dedicated communication thread solution
470
for those endpoints which communicate over a network.
472
Our solution with user threads has two problems: for each endpoint
473
there has to be a number of listening threads. If there are many
474
communication endpoints, it may be difficult to set the right number
475
of concurrent threads in the system, as many of the threads
476
may always be waiting at less busy endpoints. Another problem
477
is queuing of the messages, as the server internally does not
478
offer any queue for jobs.
480
Another group of user threads is intended for splitting the
481
queries and processing them in parallel. Let us call these
482
parallel communication threads. These threads are waiting for
483
parallelized tasks, suspended on event semaphores.
485
A single user thread waits for input from the console,
486
like a command to shut the database.
488
Utility threads are a different group of threads which takes
489
care of the buffer pool flushing and other, mainly background
490
operations, in the server.
491
Some of these utility threads always run at a lower than normal
492
priority, so that they are always in background. Some of them
493
may dynamically boost their priority by the pri_adjust function,
494
even to higher than normal priority, if their task becomes urgent.
495
The running of utilities is controlled by high- and low-water marks
496
of urgency. The urgency may be measured by the number of dirty blocks
497
in the buffer pool, in the case of the flush thread, for example.
498
When the high-water mark is exceeded, an utility starts running, until
499
the urgency drops under the low-water mark. Then the utility thread
500
suspend itself to wait for an event. The master thread is
501
responsible of signaling this event when the utility thread is
504
For each individual type of utility, some threads always remain
505
at lower than normal priority. This is because pri_adjust is implemented
506
so that the threads at normal or higher priority control their
507
share of running time by calling sleep. Thus, if the load of the
508
system sudenly drops, these threads cannot necessarily utilize
509
the system fully. The background priority threads make up for this,
510
starting to run when the load drops.
512
When there is no activity in the system, also the master thread
513
suspends itself to wait for an event making
514
the server totally silent. The responsibility to signal this
515
event is on the user thread which again receives a message
518
There is still one complication in our server design. If a
519
background utility thread obtains a resource (e.g., mutex) needed by a user
520
thread, and there is also some other user activity in the system,
521
the user thread may have to wait indefinitely long for the
522
resource, as the OS does not schedule a background thread if
523
there is some other runnable user thread. This problem is called
524
priority inversion in real-time programming.
526
One solution to the priority inversion problem would be to
527
keep record of which thread owns which resource and
528
in the above case boost the priority of the background thread
529
so that it will be scheduled and it can release the resource.
530
This solution is called priority inheritance in real-time programming.
531
A drawback of this solution is that the overhead of acquiring a mutex
532
increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
533
the thread has to call os_thread_get_curr_id.
534
This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
535
pair. Note that the thread
536
cannot store the information in the resource, say mutex, itself,
537
because competing threads could wipe out the information if it is
538
stored before acquiring the mutex, and if it stored afterwards,
539
the information is outdated for the time of one machine instruction,
540
at least. (To be precise, the information could be stored to
541
lock_word in mutex if the machine supports atomic swap.)
543
The above solution with priority inheritance may become actual in the
544
future, but at the moment we plan to implement a more coarse solution,
545
which could be called a global priority inheritance. If a thread
546
has to wait for a long time, say 300 milliseconds, for a resource,
547
we just guess that it may be waiting for a resource owned by a background
548
thread, and boost the the priority of all runnable background threads
549
to the normal level. The background threads then themselves adjust
550
their fixed priority back to background after releasing all resources
551
they had (or, at some fixed points in their program code).
553
What is the performance of the global priority inheritance solution?
554
We may weigh the length of the wait time 300 milliseconds, during
555
which the system processes some other thread
556
to the cost of boosting the priority of each runnable background
557
thread, rescheduling it, and lowering the priority again.
558
On 100 MHz Pentium + NT this overhead may be of the order 100
559
microseconds per thread. So, if the number of runnable background
560
threads is not very big, say < 100, the cost is tolerable.
561
Utility threads probably will access resources used by
562
user threads not very often, so collisions of user threads
563
to preempted utility threads should not happen very often.
565
The thread table contains
566
information of the current status of each thread existing in the system,
567
and also the event semaphores used in suspending the master thread
568
and utility and parallel communication threads when they have nothing to do.
569
The thread table can be seen as an analogue to the process table
570
in a traditional Unix implementation.
572
The thread table is also used in the global priority inheritance
573
scheme. This brings in one additional complication: threads accessing
574
the thread table must have at least normal fixed priority,
575
because the priority inheritance solution does not work if a background
576
thread is preempted while possessing the mutex protecting the thread table.
577
So, if a thread accesses the thread table, its priority has to be
578
boosted at least to normal. This priority requirement can be seen similar to
579
the privileged mode used when processing the kernel calls in traditional
582
/* Thread slot in the thread table */
583
struct srv_slot_struct{
584
os_thread_id_t id; /* thread id */
585
os_thread_t handle; /* thread handle */
586
ulint type; /* thread type: user, utility etc. */
587
ibool in_use; /* TRUE if this slot is in use */
588
ibool suspended; /* TRUE if the thread is waiting
589
for the event of this slot */
590
ib_time_t suspend_time; /* time when the thread was
592
os_event_t event; /* event used in suspending the
593
thread when it has nothing to do */
594
que_thr_t* thr; /* suspended query thread (only
595
used for MySQL threads) */
598
/* Table for MySQL threads where they will be suspended to wait for locks */
599
srv_slot_t* srv_mysql_table = NULL;
601
os_event_t srv_lock_timeout_thread_event;
603
srv_sys_t* srv_sys = NULL;
605
byte srv_pad1[64]; /* padding to prevent other memory update
606
hotspots from residing on the same memory
608
mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
609
query threads, and lock table */
610
byte srv_pad2[64]; /* padding to prevent other memory update
611
hotspots from residing on the same memory
614
/* The following three values measure the urgency of the jobs of
615
buffer, version, and insert threads. They may vary from 0 - 1000.
616
The server mutex protects all these variables. The low-water values
617
tell that the server can acquiesce the utility when the value
618
drops below this low-water mark. */
620
ulint srv_meter[SRV_MASTER + 1];
621
ulint srv_meter_low_water[SRV_MASTER + 1];
622
ulint srv_meter_high_water[SRV_MASTER + 1];
623
ulint srv_meter_high_water2[SRV_MASTER + 1];
624
ulint srv_meter_foreground[SRV_MASTER + 1];
626
/* The following values give info about the activity going on in
627
the database. They are protected by the server mutex. The arrays
628
are indexed by the type of the thread. */
630
ulint srv_n_threads_active[SRV_MASTER + 1];
631
ulint srv_n_threads[SRV_MASTER + 1];
633
/*************************************************************************
634
Sets the info describing an i/o thread current state. */
637
srv_set_io_thread_op_info(
638
/*======================*/
639
ulint i, /* in: the 'segment' of the i/o thread */
640
const char* str) /* in: constant char string describing the
643
ut_a(i < SRV_MAX_N_IO_THREADS);
645
srv_io_thread_op_info[i] = str;
648
/*************************************************************************
649
Accessor function to get pointer to n'th slot in the server thread
653
srv_table_get_nth_slot(
654
/*===================*/
655
/* out: pointer to the slot */
656
ulint index) /* in: index of the slot */
658
ut_a(index < OS_THREAD_MAX_N);
660
return(srv_sys->threads + index);
663
#ifndef UNIV_HOTBACKUP
664
/*************************************************************************
665
Gets the number of threads in the system. */
668
srv_get_n_threads(void)
669
/*===================*/
674
mutex_enter(&kernel_mutex);
676
for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
678
n_threads += srv_n_threads[i];
681
mutex_exit(&kernel_mutex);
686
/*************************************************************************
687
Reserves a slot in the thread table for the current thread. Also creates the
688
thread local storage struct for the current thread. NOTE! The server mutex
689
has to be reserved by the caller! */
692
srv_table_reserve_slot(
693
/*===================*/
694
/* out: reserved slot index */
695
ulint type) /* in: type of the thread: one of SRV_COM, ... */
701
ut_a(type <= SRV_MASTER);
704
slot = srv_table_get_nth_slot(i);
706
while (slot->in_use) {
708
slot = srv_table_get_nth_slot(i);
711
ut_a(slot->in_use == FALSE);
714
slot->suspended = FALSE;
715
slot->id = os_thread_get_curr_id();
716
slot->handle = os_thread_get_curr();
721
thr_local_set_slot_no(os_thread_get_curr_id(), i);
726
/*************************************************************************
727
Suspends the calling thread to wait for the event in its thread slot.
728
NOTE! The server mutex has to be reserved by the caller! */
731
srv_suspend_thread(void)
732
/*====================*/
733
/* out: event for the calling thread to wait */
740
ut_ad(mutex_own(&kernel_mutex));
742
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
744
if (srv_print_thread_releases) {
746
"Suspending thread %lu to slot %lu meter %lu\n",
747
(ulong) os_thread_get_curr_id(), (ulong) slot_no,
748
(ulong) srv_meter[SRV_RECOVERY]);
751
slot = srv_table_get_nth_slot(slot_no);
755
ut_ad(type >= SRV_WORKER);
756
ut_ad(type <= SRV_MASTER);
760
slot->suspended = TRUE;
762
ut_ad(srv_n_threads_active[type] > 0);
764
srv_n_threads_active[type]--;
766
os_event_reset(event);
770
#endif /* !UNIV_HOTBACKUP */
772
/*************************************************************************
773
Releases threads of the type given from suspension in the thread table.
774
NOTE! The server mutex has to be reserved by the caller! */
779
/* out: number of threads released: this may be
780
< n if not enough threads were suspended at the
782
ulint type, /* in: thread type */
783
ulint n) /* in: number of threads to release */
789
ut_ad(type >= SRV_WORKER);
790
ut_ad(type <= SRV_MASTER);
792
ut_ad(mutex_own(&kernel_mutex));
794
for (i = 0; i < OS_THREAD_MAX_N; i++) {
796
slot = srv_table_get_nth_slot(i);
798
if (slot->in_use && slot->type == type && slot->suspended) {
800
slot->suspended = FALSE;
802
srv_n_threads_active[type]++;
804
os_event_set(slot->event);
806
if (srv_print_thread_releases) {
808
"Releasing thread %lu type %lu"
809
" from slot %lu meter %lu\n",
810
(ulong) slot->id, (ulong) type,
812
(ulong) srv_meter[SRV_RECOVERY]);
826
/*************************************************************************
827
Returns the calling thread type. */
830
srv_get_thread_type(void)
831
/*=====================*/
832
/* out: SRV_COM, ... */
838
mutex_enter(&kernel_mutex);
840
slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
842
slot = srv_table_get_nth_slot(slot_no);
846
ut_ad(type >= SRV_WORKER);
847
ut_ad(type <= SRV_MASTER);
849
mutex_exit(&kernel_mutex);
854
/*************************************************************************
855
Initializes the server. */
861
srv_conc_slot_t* conc_slot;
866
srv_sys = mem_alloc(sizeof(srv_sys_t));
868
kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
869
mutex_create(&kernel_mutex, SYNC_KERNEL);
871
mutex_create(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
873
srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
875
for (i = 0; i < OS_THREAD_MAX_N; i++) {
876
slot = srv_table_get_nth_slot(i);
877
slot->in_use = FALSE;
878
slot->type=0; /* Avoid purify errors */
879
slot->event = os_event_create(NULL);
883
srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
885
for (i = 0; i < OS_THREAD_MAX_N; i++) {
886
slot = srv_mysql_table + i;
887
slot->in_use = FALSE;
889
slot->event = os_event_create(NULL);
893
srv_lock_timeout_thread_event = os_event_create(NULL);
895
for (i = 0; i < SRV_MASTER + 1; i++) {
896
srv_n_threads_active[i] = 0;
897
srv_n_threads[i] = 0;
899
srv_meter_low_water[i] = 50;
900
srv_meter_high_water[i] = 100;
901
srv_meter_high_water2[i] = 200;
902
srv_meter_foreground[i] = 250;
905
UT_LIST_INIT(srv_sys->tasks);
907
/* create dummy table and index for old-style infimum and supremum */
908
table = dict_mem_table_create("SYS_DUMMY1",
909
DICT_HDR_SPACE, 1, 0);
910
dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
911
DATA_ENGLISH | DATA_NOT_NULL, 8);
913
srv_sys->dummy_ind1 = dict_mem_index_create(
914
"SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1);
915
dict_index_add_col(srv_sys->dummy_ind1, table, (dict_col_t*)
916
dict_table_get_nth_col(table, 0), 0);
917
srv_sys->dummy_ind1->table = table;
918
/* create dummy table and index for new-style infimum and supremum */
919
table = dict_mem_table_create("SYS_DUMMY2",
920
DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
921
dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
922
DATA_ENGLISH | DATA_NOT_NULL, 8);
923
srv_sys->dummy_ind2 = dict_mem_index_create(
924
"SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1);
925
dict_index_add_col(srv_sys->dummy_ind2, table, (dict_col_t*)
926
dict_table_get_nth_col(table, 0), 0);
927
srv_sys->dummy_ind2->table = table;
929
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
930
srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE;
932
/* Init the server concurrency restriction data structures */
934
os_fast_mutex_init(&srv_conc_mutex);
936
UT_LIST_INIT(srv_conc_queue);
938
srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
940
for (i = 0; i < OS_THREAD_MAX_N; i++) {
941
conc_slot = srv_conc_slots + i;
942
conc_slot->reserved = FALSE;
943
conc_slot->event = os_event_create(NULL);
944
ut_a(conc_slot->event);
948
/*************************************************************************
949
Frees the OS fast mutex created in srv_init(). */
955
os_fast_mutex_free(&srv_conc_mutex);
958
/*************************************************************************
959
Initializes the synchronization primitives, memory system, and the thread
963
srv_general_init(void)
964
/*==================*/
968
mem_init(srv_mem_pool_size);
972
/*======================= InnoDB Server FIFO queue =======================*/
974
/* Maximum allowable purge history length. <=0 means 'infinite'. */
975
ulong srv_max_purge_lag = 0;
977
/*************************************************************************
978
Puts an OS thread to wait if there are too many concurrent threads
979
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
982
srv_conc_enter_innodb(
983
/*==================*/
984
trx_t* trx) /* in: transaction object associated with the
987
ibool has_slept = FALSE;
988
srv_conc_slot_t* slot = NULL;
991
if (trx->mysql_thd != NULL
992
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
994
/* TODO Do something more interesting (based on a config
995
parameter). Some users what to give the replication
996
thread very low priority, see http://bugs.mysql.com/25078
997
This can be done by introducing
998
innodb_replication_delay(ms) config parameter */
1002
/* If trx has 'free tickets' to enter the engine left, then use one
1005
if (trx->n_tickets_to_enter_innodb > 0) {
1006
trx->n_tickets_to_enter_innodb--;
1011
os_fast_mutex_lock(&srv_conc_mutex);
1013
if (trx->declared_to_be_inside_innodb) {
1014
ut_print_timestamp(stderr);
1015
fputs(" InnoDB: Error: trying to declare trx"
1016
" to enter InnoDB, but\n"
1017
"InnoDB: it already is declared.\n", stderr);
1018
trx_print(stderr, trx, 0);
1020
os_fast_mutex_unlock(&srv_conc_mutex);
1025
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1027
srv_conc_n_threads++;
1028
trx->declared_to_be_inside_innodb = TRUE;
1029
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1031
os_fast_mutex_unlock(&srv_conc_mutex);
1036
/* If the transaction is not holding resources, let it sleep
1037
for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
1039
if (!has_slept && !trx->has_search_latch
1040
&& NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
1042
has_slept = TRUE; /* We let it sleep only once to avoid
1045
srv_conc_n_waiting_threads++;
1047
os_fast_mutex_unlock(&srv_conc_mutex);
1049
trx->op_info = "sleeping before joining InnoDB queue";
1051
/* Peter Zaitsev suggested that we take the sleep away
1052
altogether. But the sleep may be good in pathological
1053
situations of lots of thread switches. Simply put some
1054
threads aside for a while to reduce the number of thread
1056
if (SRV_THREAD_SLEEP_DELAY > 0) {
1057
os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
1062
os_fast_mutex_lock(&srv_conc_mutex);
1064
srv_conc_n_waiting_threads--;
1069
/* Too many threads inside: put the current thread to a queue */
1071
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1072
slot = srv_conc_slots + i;
1074
if (!slot->reserved) {
1080
if (i == OS_THREAD_MAX_N) {
1081
/* Could not find a free wait slot, we must let the
1084
srv_conc_n_threads++;
1085
trx->declared_to_be_inside_innodb = TRUE;
1086
trx->n_tickets_to_enter_innodb = 0;
1088
os_fast_mutex_unlock(&srv_conc_mutex);
1093
/* Release possible search system latch this thread has */
1094
if (trx->has_search_latch) {
1095
trx_search_latch_release_if_reserved(trx);
1098
/* Add to the queue */
1099
slot->reserved = TRUE;
1100
slot->wait_ended = FALSE;
1102
UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
1104
os_event_reset(slot->event);
1106
srv_conc_n_waiting_threads++;
1108
os_fast_mutex_unlock(&srv_conc_mutex);
1110
/* Go to wait for the event; when a thread leaves InnoDB it will
1111
release this thread */
1113
trx->op_info = "waiting in InnoDB queue";
1115
os_event_wait(slot->event);
1119
os_fast_mutex_lock(&srv_conc_mutex);
1121
srv_conc_n_waiting_threads--;
1123
/* NOTE that the thread which released this thread already
1124
incremented the thread counter on behalf of this thread */
1126
slot->reserved = FALSE;
1128
UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
1130
trx->declared_to_be_inside_innodb = TRUE;
1131
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1133
os_fast_mutex_unlock(&srv_conc_mutex);
1136
/*************************************************************************
1137
This lets a thread enter InnoDB regardless of the number of threads inside
1138
InnoDB. This must be called when a thread ends a lock wait. */
1141
srv_conc_force_enter_innodb(
1142
/*========================*/
1143
trx_t* trx) /* in: transaction object associated with the
1146
if (UNIV_LIKELY(!srv_thread_concurrency)) {
1151
os_fast_mutex_lock(&srv_conc_mutex);
1153
srv_conc_n_threads++;
1154
trx->declared_to_be_inside_innodb = TRUE;
1155
trx->n_tickets_to_enter_innodb = 1;
1157
os_fast_mutex_unlock(&srv_conc_mutex);
1160
/*************************************************************************
1161
This must be called when a thread exits InnoDB in a lock wait or at the
1162
end of an SQL statement. */
1165
srv_conc_force_exit_innodb(
1166
/*=======================*/
1167
trx_t* trx) /* in: transaction object associated with the
1170
srv_conc_slot_t* slot = NULL;
1172
if (UNIV_LIKELY(!srv_thread_concurrency)) {
1177
if (trx->mysql_thd != NULL
1178
&& thd_is_replication_slave_thread(trx->mysql_thd)) {
1183
if (trx->declared_to_be_inside_innodb == FALSE) {
1188
os_fast_mutex_lock(&srv_conc_mutex);
1190
srv_conc_n_threads--;
1191
trx->declared_to_be_inside_innodb = FALSE;
1192
trx->n_tickets_to_enter_innodb = 0;
1194
if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1195
/* Look for a slot where a thread is waiting and no other
1196
thread has yet released the thread */
1198
slot = UT_LIST_GET_FIRST(srv_conc_queue);
1200
while (slot && slot->wait_ended == TRUE) {
1201
slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
1205
slot->wait_ended = TRUE;
1207
/* We increment the count on behalf of the released
1210
srv_conc_n_threads++;
1214
os_fast_mutex_unlock(&srv_conc_mutex);
1217
os_event_set(slot->event);
1221
/*************************************************************************
1222
This must be called when a thread exits InnoDB. */
1225
srv_conc_exit_innodb(
1226
/*=================*/
1227
trx_t* trx) /* in: transaction object associated with the
1230
if (trx->n_tickets_to_enter_innodb > 0) {
1231
/* We will pretend the thread is still inside InnoDB though it
1232
now leaves the InnoDB engine. In this way we save
1233
a lot of semaphore operations. srv_conc_force_exit_innodb is
1234
used to declare the thread definitely outside InnoDB. It
1235
should be called when there is a lock wait or an SQL statement
1241
srv_conc_force_exit_innodb(trx);
1244
/*========================================================================*/
1246
/*************************************************************************
1247
Normalizes init parameter values to use units we use inside InnoDB. */
1250
srv_normalize_init_values(void)
1251
/*===========================*/
1252
/* out: DB_SUCCESS or error code */
1257
n = srv_n_data_files;
1259
for (i = 0; i < n; i++) {
1260
srv_data_file_sizes[i] = srv_data_file_sizes[i]
1261
* ((1024 * 1024) / UNIV_PAGE_SIZE);
1264
srv_last_file_size_max = srv_last_file_size_max
1265
* ((1024 * 1024) / UNIV_PAGE_SIZE);
1267
srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1269
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1271
srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
1273
srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
1276
/* If we are using AWE we must save memory in the 32-bit
1277
address space of the process, and cannot bind the lock
1278
table size to the real buffer pool size. */
1280
srv_lock_table_size = 20 * srv_awe_window_size;
1282
srv_lock_table_size = 5 * srv_pool_size;
1288
/*************************************************************************
1289
Boots the InnoDB server. */
1294
/* out: DB_SUCCESS or error code */
1298
/* Transform the init parameter values given by MySQL to
1299
use units we use inside InnoDB: */
1301
err = srv_normalize_init_values();
1303
if (err != DB_SUCCESS) {
1307
/* Initialize synchronization primitives, memory management, and thread
1312
/* Initialize this module */
1319
#ifndef UNIV_HOTBACKUP
1320
/*************************************************************************
1321
Reserves a slot in the thread table for the current MySQL OS thread.
1322
NOTE! The kernel mutex has to be reserved by the caller! */
1325
srv_table_reserve_slot_for_mysql(void)
1326
/*==================================*/
1327
/* out: reserved slot */
1332
ut_ad(mutex_own(&kernel_mutex));
1335
slot = srv_mysql_table + i;
1337
while (slot->in_use) {
1340
if (i >= OS_THREAD_MAX_N) {
1342
ut_print_timestamp(stderr);
1345
" InnoDB: There appear to be %lu MySQL"
1346
" threads currently waiting\n"
1347
"InnoDB: inside InnoDB, which is the"
1348
" upper limit. Cannot continue operation.\n"
1349
"InnoDB: We intentionally generate"
1350
" a seg fault to print a stack trace\n"
1351
"InnoDB: on Linux. But first we print"
1352
" a list of waiting threads.\n", (ulong) i);
1354
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1356
slot = srv_mysql_table + i;
1359
"Slot %lu: thread id %lu, type %lu,"
1360
" in use %lu, susp %lu, time %lu\n",
1362
(ulong) os_thread_pf(slot->id),
1364
(ulong) slot->in_use,
1365
(ulong) slot->suspended,
1366
(ulong) difftime(ut_time(),
1367
slot->suspend_time));
1373
slot = srv_mysql_table + i;
1376
ut_a(slot->in_use == FALSE);
1378
slot->in_use = TRUE;
1379
slot->id = os_thread_get_curr_id();
1380
slot->handle = os_thread_get_curr();
1384
#endif /* !UNIV_HOTBACKUP */
1386
/*******************************************************************
1387
Puts a MySQL OS thread to wait for a lock to be released. If an error
1388
occurs during the wait trx->error_state associated with thr is
1389
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
1390
are possible errors. DB_DEADLOCK is returned if selective deadlock
1391
resolution chose this transaction as a victim. */
1394
srv_suspend_mysql_thread(
1395
/*=====================*/
1396
que_thr_t* thr) /* in: query thread associated with the MySQL
1399
#ifndef UNIV_HOTBACKUP
1404
ibool had_dict_lock = FALSE;
1405
ibool was_declared_inside_innodb = FALSE;
1406
ib_longlong start_time = 0;
1407
ib_longlong finish_time;
1412
ut_ad(!mutex_own(&kernel_mutex));
1414
trx = thr_get_trx(thr);
1416
os_event_set(srv_lock_timeout_thread_event);
1418
mutex_enter(&kernel_mutex);
1420
trx->error_state = DB_SUCCESS;
1422
if (thr->state == QUE_THR_RUNNING) {
1424
ut_ad(thr->is_active == TRUE);
1426
/* The lock has already been released or this transaction
1427
was chosen as a deadlock victim: no need to suspend */
1429
if (trx->was_chosen_as_deadlock_victim) {
1431
trx->error_state = DB_DEADLOCK;
1432
trx->was_chosen_as_deadlock_victim = FALSE;
1435
mutex_exit(&kernel_mutex);
1440
ut_ad(thr->is_active == FALSE);
1442
slot = srv_table_reserve_slot_for_mysql();
1444
event = slot->event;
1448
os_event_reset(event);
1450
slot->suspend_time = ut_time();
1452
if (thr->lock_state == QUE_THR_LOCK_ROW) {
1453
srv_n_lock_wait_count++;
1454
srv_n_lock_wait_current_count++;
1456
ut_usectime(&sec, &ms);
1457
start_time = (ib_longlong)sec * 1000000 + ms;
1459
/* Wake the lock timeout monitor thread, if it is suspended */
1461
os_event_set(srv_lock_timeout_thread_event);
1463
mutex_exit(&kernel_mutex);
1465
if (trx->declared_to_be_inside_innodb) {
1467
was_declared_inside_innodb = TRUE;
1469
/* We must declare this OS thread to exit InnoDB, since a
1470
possible other thread holding a lock which this thread waits
1471
for must be allowed to enter, sooner or later */
1473
srv_conc_force_exit_innodb(trx);
1476
/* Release possible foreign key check latch */
1477
if (trx->dict_operation_lock_mode == RW_S_LATCH) {
1479
had_dict_lock = TRUE;
1481
row_mysql_unfreeze_data_dictionary(trx);
1484
ut_a(trx->dict_operation_lock_mode == 0);
1486
/* Wait for the release */
1488
os_event_wait(event);
1490
if (had_dict_lock) {
1492
row_mysql_freeze_data_dictionary(trx);
1495
if (was_declared_inside_innodb) {
1497
/* Return back inside InnoDB */
1499
srv_conc_force_enter_innodb(trx);
1502
mutex_enter(&kernel_mutex);
1504
/* Release the slot for others to use */
1506
slot->in_use = FALSE;
1508
wait_time = ut_difftime(ut_time(), slot->suspend_time);
1510
if (thr->lock_state == QUE_THR_LOCK_ROW) {
1511
ut_usectime(&sec, &ms);
1512
finish_time = (ib_longlong)sec * 1000000 + ms;
1514
diff_time = (ulint) (finish_time - start_time);
1516
srv_n_lock_wait_current_count--;
1517
srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
1518
if (diff_time > srv_n_lock_max_wait_time) {
1519
srv_n_lock_max_wait_time = diff_time;
1523
if (trx->was_chosen_as_deadlock_victim) {
1525
trx->error_state = DB_DEADLOCK;
1526
trx->was_chosen_as_deadlock_victim = FALSE;
1529
mutex_exit(&kernel_mutex);
1531
if (srv_lock_wait_timeout < 100000000
1532
&& wait_time > (double)srv_lock_wait_timeout) {
1534
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1536
#else /* UNIV_HOTBACKUP */
1537
/* This function depends on MySQL code that is not included in
1538
InnoDB Hot Backup builds. Besides, this function should never
1539
be called in InnoDB Hot Backup. */
1541
#endif /* UNIV_HOTBACKUP */
1544
/************************************************************************
1545
Releases a MySQL OS thread waiting for a lock to be released, if the
1546
thread is already suspended. */
1549
srv_release_mysql_thread_if_suspended(
1550
/*==================================*/
1551
que_thr_t* thr) /* in: query thread associated with the
1554
#ifndef UNIV_HOTBACKUP
1558
ut_ad(mutex_own(&kernel_mutex));
1560
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1562
slot = srv_mysql_table + i;
1564
if (slot->in_use && slot->thr == thr) {
1567
os_event_set(slot->event);
1574
#else /* UNIV_HOTBACKUP */
1575
/* This function depends on MySQL code that is not included in
1576
InnoDB Hot Backup builds. Besides, this function should never
1577
be called in InnoDB Hot Backup. */
1579
#endif /* UNIV_HOTBACKUP */
1582
#ifndef UNIV_HOTBACKUP
1583
/**********************************************************************
1584
Refreshes the values used to calculate per-second averages. */
1587
srv_refresh_innodb_monitor_stats(void)
1588
/*==================================*/
1590
mutex_enter(&srv_innodb_monitor_mutex);
1592
srv_last_monitor_time = time(NULL);
1594
os_aio_refresh_stats();
1596
btr_cur_n_sea_old = btr_cur_n_sea;
1597
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1599
log_refresh_stats();
1601
buf_refresh_io_stats();
1603
srv_n_rows_inserted_old = srv_n_rows_inserted;
1604
srv_n_rows_updated_old = srv_n_rows_updated;
1605
srv_n_rows_deleted_old = srv_n_rows_deleted;
1606
srv_n_rows_read_old = srv_n_rows_read;
1608
mutex_exit(&srv_innodb_monitor_mutex);
1611
/**********************************************************************
1612
Outputs to a file the output of the InnoDB Monitor. */
1615
srv_printf_innodb_monitor(
1616
/*======================*/
1617
FILE* file, /* in: output stream */
1618
ulint* trx_start, /* out: file position of the start of
1619
the list of active transactions */
1620
ulint* trx_end) /* out: file position of the end of
1621
the list of active transactions */
1623
double time_elapsed;
1624
time_t current_time;
1627
mutex_enter(&srv_innodb_monitor_mutex);
1629
current_time = time(NULL);
1631
/* We add 0.001 seconds to time_elapsed to prevent division
1632
by zero if two users happen to call SHOW INNODB STATUS at the same
1635
time_elapsed = difftime(current_time, srv_last_monitor_time)
1638
srv_last_monitor_time = time(NULL);
1640
fputs("\n=====================================\n", file);
1642
ut_print_timestamp(file);
1644
" INNODB MONITOR OUTPUT\n"
1645
"=====================================\n"
1646
"Per second averages calculated from the last %lu seconds\n",
1647
(ulong)time_elapsed);
1649
fputs("----------\n"
1651
"----------\n", file);
1654
/* Conceptually, srv_innodb_monitor_mutex has a very high latching
1655
order level in sync0sync.h, while dict_foreign_err_mutex has a very
1656
low level 135. Therefore we can reserve the latter mutex here without
1657
a danger of a deadlock of threads. */
1659
mutex_enter(&dict_foreign_err_mutex);
1661
if (ftell(dict_foreign_err_file) != 0L) {
1662
fputs("------------------------\n"
1663
"LATEST FOREIGN KEY ERROR\n"
1664
"------------------------\n", file);
1665
ut_copy_file(file, dict_foreign_err_file);
1668
mutex_exit(&dict_foreign_err_mutex);
1670
lock_print_info_summary(file);
1672
long t = ftell(file);
1674
*trx_start = ULINT_UNDEFINED;
1676
*trx_start = (ulint) t;
1679
lock_print_info_all_transactions(file);
1681
long t = ftell(file);
1683
*trx_end = ULINT_UNDEFINED;
1685
*trx_end = (ulint) t;
1690
"--------\n", file);
1693
fputs("-------------------------------------\n"
1694
"INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1695
"-------------------------------------\n", file);
1698
ha_print_info(file, btr_search_sys->hash_index);
1701
"%.2f hash searches/s, %.2f non-hash searches/s\n",
1702
(btr_cur_n_sea - btr_cur_n_sea_old)
1704
(btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1706
btr_cur_n_sea_old = btr_cur_n_sea;
1707
btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1714
fputs("----------------------\n"
1715
"BUFFER POOL AND MEMORY\n"
1716
"----------------------\n", file);
1718
"Total memory allocated " ULINTPF
1719
"; in additional pool allocated " ULINTPF "\n",
1720
ut_total_allocated_memory,
1721
mem_pool_get_reserved(mem_comm_pool));
1722
fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1727
"In addition to that %lu MB of AWE memory allocated\n",
1728
(ulong) (srv_pool_size
1729
/ ((1024 * 1024) / UNIV_PAGE_SIZE)));
1734
fputs("--------------\n"
1736
"--------------\n", file);
1737
fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1738
(long) srv_conc_n_threads,
1739
(ulong) srv_conc_n_waiting_threads);
1741
fprintf(file, "%lu read views open inside InnoDB\n",
1742
UT_LIST_GET_LEN(trx_sys->view_list));
1744
n_reserved = fil_space_get_n_reserved_extents(0);
1745
if (n_reserved > 0) {
1747
"%lu tablespace extents now reserved for"
1748
" B-tree split operations\n",
1749
(ulong) n_reserved);
1753
fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1754
(ulong) srv_main_thread_process_no,
1755
(ulong) srv_main_thread_id,
1756
srv_main_thread_op_info);
1758
fprintf(file, "Main thread id %lu, state: %s\n",
1759
(ulong) srv_main_thread_id,
1760
srv_main_thread_op_info);
1763
"Number of rows inserted " ULINTPF
1764
", updated " ULINTPF ", deleted " ULINTPF
1765
", read " ULINTPF "\n",
1766
srv_n_rows_inserted,
1771
"%.2f inserts/s, %.2f updates/s,"
1772
" %.2f deletes/s, %.2f reads/s\n",
1773
(srv_n_rows_inserted - srv_n_rows_inserted_old)
1775
(srv_n_rows_updated - srv_n_rows_updated_old)
1777
(srv_n_rows_deleted - srv_n_rows_deleted_old)
1779
(srv_n_rows_read - srv_n_rows_read_old)
1782
srv_n_rows_inserted_old = srv_n_rows_inserted;
1783
srv_n_rows_updated_old = srv_n_rows_updated;
1784
srv_n_rows_deleted_old = srv_n_rows_deleted;
1785
srv_n_rows_read_old = srv_n_rows_read;
1787
fputs("----------------------------\n"
1788
"END OF INNODB MONITOR OUTPUT\n"
1789
"============================\n", file);
1790
mutex_exit(&srv_innodb_monitor_mutex);
1794
/**********************************************************************
1795
Function to pass InnoDB status variables to MySQL */
1798
srv_export_innodb_status(void)
1800
mutex_enter(&srv_innodb_monitor_mutex);
1802
export_vars.innodb_data_pending_reads
1803
= os_n_pending_reads;
1804
export_vars.innodb_data_pending_writes
1805
= os_n_pending_writes;
1806
export_vars.innodb_data_pending_fsyncs
1807
= fil_n_pending_log_flushes
1808
+ fil_n_pending_tablespace_flushes;
1809
export_vars.innodb_data_fsyncs = os_n_fsyncs;
1810
export_vars.innodb_data_read = srv_data_read;
1811
export_vars.innodb_data_reads = os_n_file_reads;
1812
export_vars.innodb_data_writes = os_n_file_writes;
1813
export_vars.innodb_data_written = srv_data_written;
1814
export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets;
1815
export_vars.innodb_buffer_pool_write_requests
1816
= srv_buf_pool_write_requests;
1817
export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
1818
export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
1819
export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
1820
export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd;
1821
export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq;
1822
export_vars.innodb_buffer_pool_pages_data
1823
= UT_LIST_GET_LEN(buf_pool->LRU);
1824
export_vars.innodb_buffer_pool_pages_dirty
1825
= UT_LIST_GET_LEN(buf_pool->flush_list);
1826
export_vars.innodb_buffer_pool_pages_free
1827
= UT_LIST_GET_LEN(buf_pool->free);
1828
export_vars.innodb_buffer_pool_pages_latched
1829
= buf_get_latched_pages_number();
1830
export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size;
1832
export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size
1833
- UT_LIST_GET_LEN(buf_pool->LRU)
1834
- UT_LIST_GET_LEN(buf_pool->free);
1835
export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1836
export_vars.innodb_log_waits = srv_log_waits;
1837
export_vars.innodb_os_log_written = srv_os_log_written;
1838
export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1839
export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1840
export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
1841
export_vars.innodb_log_write_requests = srv_log_write_requests;
1842
export_vars.innodb_log_writes = srv_log_writes;
1843
export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
1844
export_vars.innodb_dblwr_writes = srv_dblwr_writes;
1845
export_vars.innodb_pages_created = buf_pool->n_pages_created;
1846
export_vars.innodb_pages_read = buf_pool->n_pages_read;
1847
export_vars.innodb_pages_written = buf_pool->n_pages_written;
1848
export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
1849
export_vars.innodb_row_lock_current_waits
1850
= srv_n_lock_wait_current_count;
1851
export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
1852
if (srv_n_lock_wait_count > 0) {
1853
export_vars.innodb_row_lock_time_avg = (ulint)
1854
(srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
1856
export_vars.innodb_row_lock_time_avg = 0;
1858
export_vars.innodb_row_lock_time_max
1859
= srv_n_lock_max_wait_time / 1000;
1860
export_vars.innodb_rows_read = srv_n_rows_read;
1861
export_vars.innodb_rows_inserted = srv_n_rows_inserted;
1862
export_vars.innodb_rows_updated = srv_n_rows_updated;
1863
export_vars.innodb_rows_deleted = srv_n_rows_deleted;
1865
mutex_exit(&srv_innodb_monitor_mutex);
1868
/*************************************************************************
1869
A thread which wakes up threads whose lock wait may have lasted too long.
1870
This also prints the info output by various InnoDB monitors. */
1873
srv_lock_timeout_and_monitor_thread(
1874
/*================================*/
1875
/* out: a dummy parameter */
1876
void* arg __attribute__((unused)))
1877
/* in: a dummy parameter required by
1881
double time_elapsed;
1882
time_t current_time;
1883
time_t last_table_monitor_time;
1884
time_t last_tablespace_monitor_time;
1885
time_t last_monitor_time;
1890
#ifdef UNIV_DEBUG_THREAD_CREATION
1891
fprintf(stderr, "Lock timeout thread starts, id %lu\n",
1892
os_thread_pf(os_thread_get_curr_id()));
1895
srv_last_monitor_time = time(NULL);
1896
last_table_monitor_time = time(NULL);
1897
last_tablespace_monitor_time = time(NULL);
1898
last_monitor_time = time(NULL);
1900
srv_lock_timeout_and_monitor_active = TRUE;
1902
/* When someone is waiting for a lock, we wake up every second
1903
and check if a timeout has passed for a lock wait */
1905
os_thread_sleep(1000000);
1907
/* In case mutex_exit is not a memory barrier, it is
1908
theoretically possible some threads are left waiting though
1909
the semaphore is already released. Wake up those threads: */
1911
sync_arr_wake_threads_if_sema_free();
1913
current_time = time(NULL);
1915
time_elapsed = difftime(current_time, last_monitor_time);
1917
if (time_elapsed > 15) {
1918
last_monitor_time = time(NULL);
1920
if (srv_print_innodb_monitor) {
1921
srv_printf_innodb_monitor(stderr, NULL, NULL);
1924
if (srv_innodb_status) {
1925
mutex_enter(&srv_monitor_file_mutex);
1926
rewind(srv_monitor_file);
1927
srv_printf_innodb_monitor(srv_monitor_file, NULL,
1929
os_file_set_eof(srv_monitor_file);
1930
mutex_exit(&srv_monitor_file_mutex);
1933
if (srv_print_innodb_tablespace_monitor
1934
&& difftime(current_time,
1935
last_tablespace_monitor_time) > 60) {
1936
last_tablespace_monitor_time = time(NULL);
1938
fputs("========================"
1939
"========================\n",
1942
ut_print_timestamp(stderr);
1944
fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
1945
"========================"
1946
"========================\n",
1950
fputs("Validating tablespace\n", stderr);
1952
fputs("Validation ok\n"
1953
"---------------------------------------\n"
1954
"END OF INNODB TABLESPACE MONITOR OUTPUT\n"
1955
"=======================================\n",
1959
if (srv_print_innodb_table_monitor
1960
&& difftime(current_time, last_table_monitor_time) > 60) {
1962
last_table_monitor_time = time(NULL);
1964
fputs("===========================================\n",
1967
ut_print_timestamp(stderr);
1969
fputs(" INNODB TABLE MONITOR OUTPUT\n"
1970
"===========================================\n",
1974
fputs("-----------------------------------\n"
1975
"END OF INNODB TABLE MONITOR OUTPUT\n"
1976
"==================================\n",
1981
mutex_enter(&kernel_mutex);
1985
/* Check of all slots if a thread is waiting there, and if it
1986
has exceeded the time limit */
1988
for (i = 0; i < OS_THREAD_MAX_N; i++) {
1990
slot = srv_mysql_table + i;
1995
wait_time = ut_difftime(ut_time(), slot->suspend_time);
1997
if (srv_lock_wait_timeout < 100000000
1998
&& (wait_time > (double) srv_lock_wait_timeout
1999
|| wait_time < 0)) {
2001
/* Timeout exceeded or a wrap-around in system
2002
time counter: cancel the lock request queued
2003
by the transaction and release possible
2004
other transactions waiting behind; it is
2005
possible that the lock has already been
2006
granted: in that case do nothing */
2008
if (thr_get_trx(slot->thr)->wait_lock) {
2009
lock_cancel_waiting_and_release(
2010
thr_get_trx(slot->thr)
2017
os_event_reset(srv_lock_timeout_thread_event);
2019
mutex_exit(&kernel_mutex);
2021
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2025
if (some_waits || srv_print_innodb_monitor
2026
|| srv_print_innodb_lock_monitor
2027
|| srv_print_innodb_tablespace_monitor
2028
|| srv_print_innodb_table_monitor) {
2032
/* No one was waiting for a lock and no monitor was active:
2033
suspend this thread */
2035
srv_lock_timeout_and_monitor_active = FALSE;
2038
/* The following synchronisation is disabled, since
2039
the InnoDB monitor output is to be updated every 15 seconds. */
2040
os_event_wait(srv_lock_timeout_thread_event);
2045
srv_lock_timeout_and_monitor_active = FALSE;
2047
/* We count the number of threads in os_thread_exit(). A created
2048
thread should always use that to exit and not use return() to exit. */
2050
os_thread_exit(NULL);
2052
OS_THREAD_DUMMY_RETURN;
2055
/*************************************************************************
2056
A thread which prints warnings about semaphore waits which have lasted
2057
too long. These can be used to track bugs which cause hangs. */
2060
srv_error_monitor_thread(
2061
/*=====================*/
2062
/* out: a dummy parameter */
2063
void* arg __attribute__((unused)))
2064
/* in: a dummy parameter required by
2067
/* number of successive fatal timeouts observed */
2068
ulint fatal_cnt = 0;
2072
old_lsn = srv_start_lsn;
2074
#ifdef UNIV_DEBUG_THREAD_CREATION
2075
fprintf(stderr, "Error monitor thread starts, id %lu\n",
2076
os_thread_pf(os_thread_get_curr_id()));
2079
srv_error_monitor_active = TRUE;
2081
/* Try to track a strange bug reported by Harald Fuchs and others,
2082
where the lsn seems to decrease at times */
2084
new_lsn = log_get_lsn();
2086
if (ut_dulint_cmp(new_lsn, old_lsn) < 0) {
2087
ut_print_timestamp(stderr);
2089
" InnoDB: Error: old log sequence number %lu %lu"
2091
"InnoDB: than the new log sequence number %lu %lu!\n"
2092
"InnoDB: Please submit a bug report"
2093
" to http://bugs.mysql.com\n",
2094
(ulong) ut_dulint_get_high(old_lsn),
2095
(ulong) ut_dulint_get_low(old_lsn),
2096
(ulong) ut_dulint_get_high(new_lsn),
2097
(ulong) ut_dulint_get_low(new_lsn));
2102
if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2103
/* We referesh InnoDB Monitor values so that averages are
2104
printed from at most 60 last seconds */
2106
srv_refresh_innodb_monitor_stats();
2109
if (sync_array_print_long_waits()) {
2111
if (fatal_cnt > 5) {
2114
"InnoDB: Error: semaphore wait has lasted"
2116
"InnoDB: We intentionally crash the server,"
2117
" because it appears to be hung.\n",
2118
(ulong) srv_fatal_semaphore_wait_threshold);
2126
/* Flush stderr so that a database user gets the output
2127
to possible MySQL error file */
2131
os_thread_sleep(2000000);
2133
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2138
srv_error_monitor_active = FALSE;
2140
/* We count the number of threads in os_thread_exit(). A created
2141
thread should always use that to exit and not use return() to exit. */
2143
os_thread_exit(NULL);
2145
OS_THREAD_DUMMY_RETURN;
2148
/***********************************************************************
2149
Tells the InnoDB server that there has been activity in the database
2150
and wakes up the master thread if it is suspended (not sleeping). Used
2151
in the MySQL interface. Note that there is a small chance that the master
2152
thread stays suspended (we do not protect our operation with the kernel
2153
mutex, for performace reasons). */
2156
srv_active_wake_master_thread(void)
2157
/*===============================*/
2159
srv_activity_count++;
2161
if (srv_n_threads_active[SRV_MASTER] == 0) {
2163
mutex_enter(&kernel_mutex);
2165
srv_release_threads(SRV_MASTER, 1);
2167
mutex_exit(&kernel_mutex);
2171
/***********************************************************************
2172
Wakes up the master thread if it is suspended or being suspended. */
2175
srv_wake_master_thread(void)
2176
/*========================*/
2178
srv_activity_count++;
2180
mutex_enter(&kernel_mutex);
2182
srv_release_threads(SRV_MASTER, 1);
2184
mutex_exit(&kernel_mutex);
2187
/*************************************************************************
2188
The master thread controlling the server. */
2193
/* out: a dummy parameter */
2194
void* arg __attribute__((unused)))
2195
/* in: a dummy parameter required by
2199
time_t last_flush_time;
2200
time_t current_time;
2201
ulint old_activity_count;
2202
ulint n_pages_purged;
2203
ulint n_bytes_merged;
2204
ulint n_pages_flushed;
2205
ulint n_bytes_archived;
2206
ulint n_tables_to_drop;
2209
ulint n_ios_very_old;
2211
ibool skip_sleep = FALSE;
2214
#ifdef UNIV_DEBUG_THREAD_CREATION
2215
fprintf(stderr, "Master thread starts, id %lu\n",
2216
os_thread_pf(os_thread_get_curr_id()));
2218
srv_main_thread_process_no = os_proc_get_number();
2219
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2221
srv_table_reserve_slot(SRV_MASTER);
2223
mutex_enter(&kernel_mutex);
2225
srv_n_threads_active[SRV_MASTER]++;
2227
mutex_exit(&kernel_mutex);
2230
/*****************************************************************/
2231
/* ---- When there is database activity by users, we cycle in this
2234
srv_main_thread_op_info = "reserving kernel mutex";
2236
n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read
2237
+ buf_pool->n_pages_written;
2238
mutex_enter(&kernel_mutex);
2240
/* Store the user activity counter at the start of this loop */
2241
old_activity_count = srv_activity_count;
2243
mutex_exit(&kernel_mutex);
2245
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2247
goto suspend_thread;
2250
/* ---- We run the following loop approximately once per second
2251
when there is database activity */
2255
for (i = 0; i < 10; i++) {
2256
n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
2257
+ buf_pool->n_pages_written;
2258
srv_main_thread_op_info = "sleeping";
2262
os_thread_sleep(1000000);
2267
/* ALTER TABLE in MySQL requires on Unix that the table handler
2268
can drop tables lazily after there no longer are SELECT
2271
srv_main_thread_op_info = "doing background drop tables";
2273
row_drop_tables_for_mysql_in_background();
2275
srv_main_thread_op_info = "";
2277
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2279
goto background_loop;
2282
/* We flush the log once in a second even if no commit
2283
is issued or the we have specified in my.cnf no flush
2284
at transaction commit */
2286
srv_main_thread_op_info = "flushing log";
2287
log_buffer_flush_to_disk();
2289
srv_main_thread_op_info = "making checkpoint";
2292
/* If there were less than 5 i/os during the
2293
one second sleep, we assume that there is free
2294
disk i/o capacity available, and it makes sense to
2295
do an insert buffer merge. */
2297
n_pend_ios = buf_get_n_pending_ios()
2298
+ log_sys->n_pending_writes;
2299
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
2300
+ buf_pool->n_pages_written;
2301
if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
2302
srv_main_thread_op_info = "doing insert buffer merge";
2303
ibuf_contract_for_n_pages(
2304
TRUE, srv_insert_buffer_batch_size / 4);
2306
srv_main_thread_op_info = "flushing log";
2308
log_buffer_flush_to_disk();
2311
if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
2312
> srv_max_buf_pool_modified_pct)) {
2314
/* Try to keep the number of modified pages in the
2315
buffer pool under the limit wished by the user */
2317
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
2320
/* If we had to do the flush, it may have taken
2321
even more than 1 second, and also, there may be more
2322
to flush. Do not sleep 1 second during the next
2323
iteration of this loop. */
2328
if (srv_activity_count == old_activity_count) {
2330
/* There is no user activity at the moment, go to
2331
the background loop */
2333
goto background_loop;
2337
/* ---- We perform the following code approximately once per
2338
10 seconds when there is database activity */
2340
#ifdef MEM_PERIODIC_CHECK
2341
/* Check magic numbers of every allocated mem block once in 10
2343
mem_validate_all_blocks();
2345
/* If there were less than 200 i/os during the 10 second period,
2346
we assume that there is free disk i/o capacity available, and it
2347
makes sense to flush 100 pages. */
2349
n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
2350
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
2351
+ buf_pool->n_pages_written;
2352
if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
2354
srv_main_thread_op_info = "flushing buffer pool pages";
2355
buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
2357
srv_main_thread_op_info = "flushing log";
2358
log_buffer_flush_to_disk();
2361
/* We run a batch of insert buffer merge every 10 seconds,
2362
even if the server were active */
2364
srv_main_thread_op_info = "doing insert buffer merge";
2365
ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
2367
srv_main_thread_op_info = "flushing log";
2368
log_buffer_flush_to_disk();
2370
/* We run a full purge every 10 seconds, even if the server
2375
last_flush_time = time(NULL);
2377
while (n_pages_purged) {
2379
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2381
goto background_loop;
2384
srv_main_thread_op_info = "purging";
2385
n_pages_purged = trx_purge();
2387
current_time = time(NULL);
2389
if (difftime(current_time, last_flush_time) > 1) {
2390
srv_main_thread_op_info = "flushing log";
2392
log_buffer_flush_to_disk();
2393
last_flush_time = current_time;
2397
srv_main_thread_op_info = "flushing buffer pool pages";
2399
/* Flush a few oldest pages to make a new checkpoint younger */
2401
if (buf_get_modified_ratio_pct() > 70) {
2403
/* If there are lots of modified pages in the buffer pool
2404
(> 70 %), we assume we can afford reserving the disk(s) for
2405
the time it requires to flush 100 pages */
2407
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
2410
/* Otherwise, we only flush a small number of pages so that
2411
we do not unnecessarily use much disk i/o capacity from
2414
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
2418
srv_main_thread_op_info = "making checkpoint";
2420
/* Make a new checkpoint about once in 10 seconds */
2422
log_checkpoint(TRUE, FALSE);
2424
srv_main_thread_op_info = "reserving kernel mutex";
2426
mutex_enter(&kernel_mutex);
2428
/* ---- When there is database activity, we jump from here back to
2429
the start of loop */
2431
if (srv_activity_count != old_activity_count) {
2432
mutex_exit(&kernel_mutex);
2436
mutex_exit(&kernel_mutex);
2438
/* If the database is quiet, we enter the background loop */
2440
/*****************************************************************/
2442
/* ---- In this loop we run background operations when the server
2443
is quiet from user activity. Also in the case of a shutdown, we
2444
loop here, flushing the buffer pool to the data files. */
2446
/* The server has been quiet for a while: start running background
2449
srv_main_thread_op_info = "doing background drop tables";
2451
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2453
if (n_tables_to_drop > 0) {
2454
/* Do not monopolize the CPU even if there are tables waiting
2455
in the background drop queue. (It is essentially a bug if
2456
MySQL tries to drop a table while there are still open handles
2457
to it and we had to put it to the background drop queue.) */
2459
os_thread_sleep(100000);
2462
srv_main_thread_op_info = "purging";
2464
/* Run a full purge */
2468
last_flush_time = time(NULL);
2470
while (n_pages_purged) {
2471
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2476
srv_main_thread_op_info = "purging";
2477
n_pages_purged = trx_purge();
2479
current_time = time(NULL);
2481
if (difftime(current_time, last_flush_time) > 1) {
2482
srv_main_thread_op_info = "flushing log";
2484
log_buffer_flush_to_disk();
2485
last_flush_time = current_time;
2489
srv_main_thread_op_info = "reserving kernel mutex";
2491
mutex_enter(&kernel_mutex);
2492
if (srv_activity_count != old_activity_count) {
2493
mutex_exit(&kernel_mutex);
2496
mutex_exit(&kernel_mutex);
2498
srv_main_thread_op_info = "doing insert buffer merge";
2500
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2503
n_bytes_merged = ibuf_contract_for_n_pages(
2504
TRUE, srv_insert_buffer_batch_size);
2507
srv_main_thread_op_info = "reserving kernel mutex";
2509
mutex_enter(&kernel_mutex);
2510
if (srv_activity_count != old_activity_count) {
2511
mutex_exit(&kernel_mutex);
2514
mutex_exit(&kernel_mutex);
2517
srv_main_thread_op_info = "flushing buffer pool pages";
2519
if (srv_fast_shutdown < 2) {
2520
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
2523
/* In the fastest shutdown we do not flush the buffer pool
2524
to data files: we set n_pages_flushed to 0 artificially. */
2526
n_pages_flushed = 0;
2529
srv_main_thread_op_info = "reserving kernel mutex";
2531
mutex_enter(&kernel_mutex);
2532
if (srv_activity_count != old_activity_count) {
2533
mutex_exit(&kernel_mutex);
2536
mutex_exit(&kernel_mutex);
2538
srv_main_thread_op_info = "waiting for buffer pool flush to end";
2539
buf_flush_wait_batch_end(BUF_FLUSH_LIST);
2541
srv_main_thread_op_info = "flushing log";
2543
log_buffer_flush_to_disk();
2545
srv_main_thread_op_info = "making checkpoint";
2547
log_checkpoint(TRUE, FALSE);
2549
if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
2551
/* Try to keep the number of modified pages in the
2552
buffer pool under the limit wished by the user */
2557
srv_main_thread_op_info = "reserving kernel mutex";
2559
mutex_enter(&kernel_mutex);
2560
if (srv_activity_count != old_activity_count) {
2561
mutex_exit(&kernel_mutex);
2564
mutex_exit(&kernel_mutex);
2566
srv_main_thread_op_info = "archiving log (if log archive is on)";
2568
log_archive_do(FALSE, &n_bytes_archived);
2570
n_bytes_archived = 0;
2572
/* Keep looping in the background loop if still work to do */
2574
if (srv_fast_shutdown && srv_shutdown_state > 0) {
2575
if (n_tables_to_drop + n_pages_flushed
2576
+ n_bytes_archived != 0) {
2578
/* If we are doing a fast shutdown (= the default)
2579
we do not do purge or insert buffer merge. But we
2580
flush the buffer pool completely to disk.
2581
In a 'very fast' shutdown we do not flush the buffer
2582
pool to data files: we have set n_pages_flushed to
2585
goto background_loop;
2587
} else if (n_tables_to_drop
2588
+ n_pages_purged + n_bytes_merged + n_pages_flushed
2589
+ n_bytes_archived != 0) {
2590
/* In a 'slow' shutdown we run purge and the insert buffer
2591
merge to completion */
2593
goto background_loop;
2596
/* There is no work for background operations either: suspend
2597
master thread to wait for more server activity */
2600
srv_main_thread_op_info = "suspending";
2602
mutex_enter(&kernel_mutex);
2604
if (row_get_background_drop_list_len_low() > 0) {
2605
mutex_exit(&kernel_mutex);
2610
event = srv_suspend_thread();
2612
mutex_exit(&kernel_mutex);
2614
/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2615
waits for database activity to die down when converting < 4.1.x
2616
databases, and relies on this string being exactly as it is. InnoDB
2617
manual also mentions this string in several places. */
2618
srv_main_thread_op_info = "waiting for server activity";
2620
os_event_wait(event);
2622
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
2623
/* This is only extra safety, the thread should exit
2624
already when the event wait ends */
2626
os_thread_exit(NULL);
2629
/* When there is user activity, InnoDB will set the event and the
2630
main thread goes back to loop. */
2634
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
2636
#endif /* !UNIV_HOTBACKUP */