1
by brian
clean slate |
1 |
/******************************************************
|
2 |
The database server main program
|
|
3 |
||
4 |
NOTE: SQL Server 7 uses something which the documentation
|
|
5 |
calls user mode scheduled threads (UMS threads). One such
|
|
6 |
thread is usually allocated per processor. Win32
|
|
7 |
documentation does not know any UMS threads, which suggests
|
|
8 |
that the concept is internal to SQL Server 7. It may mean that
|
|
9 |
SQL Server 7 does all the scheduling of threads itself, even
|
|
10 |
in i/o waits. We should maybe modify InnoDB to use the same
|
|
11 |
technique, because thread switches within NT may be too slow.
|
|
12 |
||
13 |
SQL Server 7 also mentions fibers, which are cooperatively
|
|
14 |
scheduled threads. They can boost performance by 5 %,
|
|
15 |
according to the Delaney and Soukup's book.
|
|
16 |
||
17 |
Windows 2000 will have something called thread pooling
|
|
18 |
(see msdn website), which we could possibly use.
|
|
19 |
||
20 |
Another possibility could be to use some very fast user space
|
|
21 |
thread library. This might confuse NT though.
|
|
22 |
||
23 |
(c) 1995 Innobase Oy
|
|
24 |
||
25 |
Created 10/8/1995 Heikki Tuuri
|
|
26 |
*******************************************************/
|
|
27 |
/* Dummy comment */
|
|
28 |
#include "srv0srv.h" |
|
29 |
||
30 |
#include "ut0mem.h" |
|
31 |
#include "os0proc.h" |
|
32 |
#include "mem0mem.h" |
|
33 |
#include "mem0pool.h" |
|
34 |
#include "sync0sync.h" |
|
35 |
#include "thr0loc.h" |
|
36 |
#include "que0que.h" |
|
37 |
#include "srv0que.h" |
|
38 |
#include "log0recv.h" |
|
39 |
#include "pars0pars.h" |
|
40 |
#include "usr0sess.h" |
|
41 |
#include "lock0lock.h" |
|
42 |
#include "trx0purge.h" |
|
43 |
#include "ibuf0ibuf.h" |
|
44 |
#include "buf0flu.h" |
|
45 |
#include "btr0sea.h" |
|
46 |
#include "dict0load.h" |
|
47 |
#include "dict0boot.h" |
|
48 |
#include "srv0start.h" |
|
49 |
#include "row0mysql.h" |
|
50 |
#include "ha_prototypes.h" |
|
51 |
||
52 |
/* This is set to TRUE if the MySQL user has set it in MySQL; currently
|
|
53 |
affects only FOREIGN KEY definition parsing */
|
|
54 |
ibool srv_lower_case_table_names = FALSE; |
|
55 |
||
56 |
/* The following counter is incremented whenever there is some user activity
|
|
57 |
in the server */
|
|
58 |
ulint srv_activity_count = 0; |
|
59 |
||
60 |
/* The following is the maximum allowed duration of a lock wait. */
|
|
61 |
ulint srv_fatal_semaphore_wait_threshold = 600; |
|
62 |
||
63 |
/* How much data manipulation language (DML) statements need to be delayed,
|
|
64 |
in microseconds, in order to reduce the lagging of the purge thread. */
|
|
65 |
ulint srv_dml_needed_delay = 0; |
|
66 |
||
67 |
ibool srv_lock_timeout_and_monitor_active = FALSE; |
|
68 |
ibool srv_error_monitor_active = FALSE; |
|
69 |
||
70 |
const char* srv_main_thread_op_info = ""; |
|
71 |
||
72 |
/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
|
|
73 |
const char srv_mysql50_table_name_prefix[9] = "#mysql50#"; |
|
74 |
||
75 |
/* Server parameters which are read from the initfile */
|
|
76 |
||
77 |
/* The following three are dir paths which are catenated before file
|
|
78 |
names, where the file name itself may also contain a path */
|
|
79 |
||
80 |
char* srv_data_home = NULL; |
|
81 |
#ifdef UNIV_LOG_ARCHIVE
|
|
82 |
char* srv_arch_dir = NULL; |
|
83 |
#endif /* UNIV_LOG_ARCHIVE */ |
|
84 |
||
85 |
ibool srv_file_per_table = FALSE; /* store to its own file each table |
|
86 |
created by an user; data dictionary
|
|
87 |
tables are in the system tablespace
|
|
88 |
0 */
|
|
89 |
ibool srv_locks_unsafe_for_binlog = FALSE; /* Place locks to |
|
90 |
records only i.e. do
|
|
91 |
not use next-key
|
|
92 |
locking except on
|
|
93 |
duplicate key checking
|
|
94 |
and foreign key
|
|
95 |
checking */
|
|
96 |
ulint srv_n_data_files = 0; |
|
97 |
char** srv_data_file_names = NULL; |
|
98 |
ulint* srv_data_file_sizes = NULL; /* size in database pages */ |
|
99 |
||
100 |
ibool srv_auto_extend_last_data_file = FALSE; /* if TRUE, then we |
|
101 |
auto-extend the last data
|
|
102 |
file */
|
|
103 |
ulint srv_last_file_size_max = 0; /* if != 0, this tells |
|
104 |
the max size auto-extending
|
|
105 |
may increase the last data
|
|
106 |
file size */
|
|
107 |
ulong srv_auto_extend_increment = 8; /* If the last data file is |
|
108 |
auto-extended, we add this
|
|
109 |
many pages to it at a time */
|
|
110 |
ulint* srv_data_file_is_raw_partition = NULL; |
|
111 |
||
112 |
/* If the following is TRUE we do not allow inserts etc. This protects
|
|
113 |
the user from forgetting the 'newraw' keyword to my.cnf */
|
|
114 |
||
115 |
ibool srv_created_new_raw = FALSE; |
|
116 |
||
117 |
char** srv_log_group_home_dirs = NULL; |
|
118 |
||
119 |
ulint srv_n_log_groups = ULINT_MAX; |
|
120 |
ulint srv_n_log_files = ULINT_MAX; |
|
121 |
ulint srv_log_file_size = ULINT_MAX; /* size in database pages */ |
|
122 |
ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ |
|
123 |
ulong srv_flush_log_at_trx_commit = 1; |
|
124 |
||
125 |
byte srv_latin1_ordering[256] /* The sort order table of the latin1 |
|
126 |
character set. The following table is
|
|
127 |
the MySQL order as of Feb 10th, 2002 */
|
|
128 |
= { |
|
129 |
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 |
|
130 |
, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F |
|
131 |
, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 |
|
132 |
, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F |
|
133 |
, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 |
|
134 |
, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F |
|
135 |
, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37 |
|
136 |
, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F |
|
137 |
, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 |
|
138 |
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F |
|
139 |
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57 |
|
140 |
, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F |
|
141 |
, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 |
|
142 |
, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F |
|
143 |
, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57 |
|
144 |
, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F |
|
145 |
, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 |
|
146 |
, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F |
|
147 |
, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97 |
|
148 |
, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F |
|
149 |
, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7 |
|
150 |
, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF |
|
151 |
, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7 |
|
152 |
, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF |
|
153 |
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43 |
|
154 |
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49 |
|
155 |
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7 |
|
156 |
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF |
|
157 |
, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43 |
|
158 |
, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49 |
|
159 |
, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7 |
|
160 |
, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF |
|
161 |
};
|
|
162 |
||
163 |
ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits |
|
164 |
this to size in kilobytes but
|
|
165 |
we normalize this to pages in
|
|
166 |
srv_boot() */
|
|
167 |
ulint srv_awe_window_size = 0; /* size in pages; MySQL inits |
|
168 |
this to bytes, but we
|
|
169 |
normalize it to pages in
|
|
170 |
srv_boot() */
|
|
171 |
ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */ |
|
172 |
ulint srv_lock_table_size = ULINT_MAX; |
|
173 |
||
174 |
ulint srv_n_file_io_threads = ULINT_MAX; |
|
175 |
||
176 |
#ifdef UNIV_LOG_ARCHIVE
|
|
177 |
ibool srv_log_archive_on = FALSE; |
|
178 |
ibool srv_archive_recovery = 0; |
|
179 |
dulint srv_archive_recovery_limit_lsn; |
|
180 |
#endif /* UNIV_LOG_ARCHIVE */ |
|
181 |
||
182 |
ulint srv_lock_wait_timeout = 1024 * 1024 * 1024; |
|
183 |
||
184 |
/* This parameter is used to throttle the number of insert buffers that are
|
|
185 |
merged in a batch. By increasing this parameter on a faster disk you can
|
|
186 |
possibly reduce the number of I/O operations performed to complete the
|
|
187 |
merge operation. The value of this parameter is used as is by the
|
|
188 |
background loop when the system is idle (low load), on a busy system
|
|
189 |
the parameter is scaled down by a factor of 4, this is to avoid putting
|
|
190 |
a heavier load on the I/O sub system. */
|
|
191 |
||
192 |
ulong srv_insert_buffer_batch_size = 20; |
|
193 |
||
194 |
char* srv_file_flush_method_str = NULL; |
|
195 |
ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC; |
|
196 |
ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; |
|
197 |
||
198 |
ulint srv_max_n_open_files = 300; |
|
199 |
||
200 |
/* The InnoDB main thread tries to keep the ratio of modified pages
|
|
201 |
in the buffer pool to all database pages in the buffer pool smaller than
|
|
202 |
the following number. But it is not guaranteed that the value stays below
|
|
203 |
that during a time of heavy update/insert activity. */
|
|
204 |
||
205 |
ulong srv_max_buf_pool_modified_pct = 90; |
|
206 |
||
207 |
/* variable counts amount of data read in total (in bytes) */
|
|
208 |
ulint srv_data_read = 0; |
|
209 |
||
210 |
/* here we count the amount of data written in total (in bytes) */
|
|
211 |
ulint srv_data_written = 0; |
|
212 |
||
213 |
/* the number of the log write requests done */
|
|
214 |
ulint srv_log_write_requests = 0; |
|
215 |
||
216 |
/* the number of physical writes to the log performed */
|
|
217 |
ulint srv_log_writes = 0; |
|
218 |
||
219 |
/* amount of data written to the log files in bytes */
|
|
220 |
ulint srv_os_log_written = 0; |
|
221 |
||
222 |
/* amount of writes being done to the log files */
|
|
223 |
ulint srv_os_log_pending_writes = 0; |
|
224 |
||
225 |
/* we increase this counter, when there we don't have enough space in the
|
|
226 |
log buffer and have to flush it */
|
|
227 |
ulint srv_log_waits = 0; |
|
228 |
||
229 |
/* this variable counts the amount of times, when the doublewrite buffer
|
|
230 |
was flushed */
|
|
231 |
ulint srv_dblwr_writes = 0; |
|
232 |
||
233 |
/* here we store the number of pages that have been flushed to the
|
|
234 |
doublewrite buffer */
|
|
235 |
ulint srv_dblwr_pages_written = 0; |
|
236 |
||
237 |
/* in this variable we store the number of write requests issued */
|
|
238 |
ulint srv_buf_pool_write_requests = 0; |
|
239 |
||
240 |
/* here we store the number of times when we had to wait for a free page
|
|
241 |
in the buffer pool. It happens when the buffer pool is full and we need
|
|
242 |
to make a flush, in order to be able to read or create a page. */
|
|
243 |
ulint srv_buf_pool_wait_free = 0; |
|
244 |
||
245 |
/* variable to count the number of pages that were written from buffer
|
|
246 |
pool to the disk */
|
|
247 |
ulint srv_buf_pool_flushed = 0; |
|
248 |
||
249 |
/* variable to count the number of buffer pool reads that led to the
|
|
250 |
reading of a disk page */
|
|
251 |
ulint srv_buf_pool_reads = 0; |
|
252 |
||
253 |
/* variable to count the number of sequential read-aheads */
|
|
254 |
ulint srv_read_ahead_seq = 0; |
|
255 |
||
256 |
/* variable to count the number of random read-aheads */
|
|
257 |
ulint srv_read_ahead_rnd = 0; |
|
258 |
||
259 |
/* structure to pass status variables to MySQL */
|
|
260 |
export_struc export_vars; |
|
261 |
||
262 |
/* If the following is != 0 we do not allow inserts etc. This protects
|
|
263 |
the user from forgetting the innodb_force_recovery keyword to my.cnf */
|
|
264 |
||
265 |
ulint srv_force_recovery = 0; |
|
266 |
/*-----------------------*/
|
|
267 |
/* We are prepared for a situation that we have this many threads waiting for
|
|
268 |
a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
|
|
269 |
value. */
|
|
270 |
||
271 |
ulint srv_max_n_threads = 0; |
|
272 |
||
273 |
/* The following controls how many threads we let inside InnoDB concurrently:
|
|
274 |
threads waiting for locks are not counted into the number because otherwise
|
|
275 |
we could get a deadlock. MySQL creates a thread for each user session, and
|
|
276 |
semaphore contention and convoy problems can occur withput this restriction.
|
|
277 |
Value 10 should be good if there are less than 4 processors + 4 disks in the
|
|
278 |
computer. Bigger computers need bigger values. Value 0 will disable the
|
|
279 |
concurrency check. */
|
|
280 |
||
281 |
ulong srv_thread_concurrency = 0; |
|
282 |
ulong srv_commit_concurrency = 0; |
|
283 |
||
284 |
os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data |
|
285 |
structures */
|
|
286 |
lint srv_conc_n_threads = 0; /* number of OS threads currently |
|
287 |
inside InnoDB; it is not an error
|
|
288 |
if this drops temporarily below zero
|
|
289 |
because we do not demand that every
|
|
290 |
thread increments this, but a thread
|
|
291 |
waiting for a lock decrements this
|
|
292 |
temporarily */
|
|
293 |
ulint srv_conc_n_waiting_threads = 0; /* number of OS threads waiting in the |
|
294 |
FIFO for a permission to enter InnoDB
|
|
295 |
*/
|
|
296 |
||
297 |
typedef struct srv_conc_slot_struct srv_conc_slot_t; |
|
298 |
struct srv_conc_slot_struct{ |
|
299 |
os_event_t event; /* event to wait */ |
|
300 |
ibool reserved; /* TRUE if slot |
|
301 |
reserved */
|
|
302 |
ibool wait_ended; /* TRUE when another |
|
303 |
thread has already set
|
|
304 |
the event and the
|
|
305 |
thread in this slot is
|
|
306 |
free to proceed; but
|
|
307 |
reserved may still be
|
|
308 |
TRUE at that point */
|
|
309 |
UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue node */ |
|
310 |
};
|
|
311 |
||
312 |
UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue of threads |
|
313 |
waiting to get in */
|
|
314 |
srv_conc_slot_t* srv_conc_slots; /* array of wait |
|
315 |
slots */
|
|
316 |
||
317 |
/* Number of times a thread is allowed to enter InnoDB within the same
|
|
318 |
SQL query after it has once got the ticket at srv_conc_enter_innodb */
|
|
319 |
#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
|
|
320 |
#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
|
|
321 |
/*-----------------------*/
|
|
322 |
/* If the following is set to 1 then we do not run purge and insert buffer
|
|
323 |
merge to completion before shutdown. If it is set to 2, do not even flush the
|
|
324 |
buffer pool to data files at the shutdown: we effectively 'crash'
|
|
325 |
InnoDB (but lose no committed transactions). */
|
|
326 |
ulint srv_fast_shutdown = 0; |
|
327 |
||
328 |
/* Generate a innodb_status.<pid> file */
|
|
329 |
ibool srv_innodb_status = FALSE; |
|
330 |
||
331 |
ibool srv_stats_on_metadata = TRUE; |
|
332 |
||
333 |
ibool srv_use_doublewrite_buf = TRUE; |
|
334 |
ibool srv_use_checksums = TRUE; |
|
335 |
||
336 |
ibool srv_set_thread_priorities = TRUE; |
|
337 |
int srv_query_thread_priority = 0; |
|
338 |
||
339 |
/* TRUE if the Address Windowing Extensions of Windows are used; then we must
|
|
340 |
disable adaptive hash indexes */
|
|
341 |
ibool srv_use_awe = FALSE; |
|
342 |
ibool srv_use_adaptive_hash_indexes = TRUE; |
|
343 |
||
344 |
/*-------------------------------------------*/
|
|
345 |
ulong srv_n_spin_wait_rounds = 20; |
|
346 |
ulong srv_n_free_tickets_to_enter = 500; |
|
347 |
ulong srv_thread_sleep_delay = 10000; |
|
348 |
ulint srv_spin_wait_delay = 5; |
|
349 |
ibool srv_priority_boost = TRUE; |
|
350 |
||
351 |
ibool srv_print_thread_releases = FALSE; |
|
352 |
ibool srv_print_lock_waits = FALSE; |
|
353 |
ibool srv_print_buf_io = FALSE; |
|
354 |
ibool srv_print_log_io = FALSE; |
|
355 |
ibool srv_print_latch_waits = FALSE; |
|
356 |
||
357 |
ulint srv_n_rows_inserted = 0; |
|
358 |
ulint srv_n_rows_updated = 0; |
|
359 |
ulint srv_n_rows_deleted = 0; |
|
360 |
ulint srv_n_rows_read = 0; |
|
361 |
#ifndef UNIV_HOTBACKUP
|
|
362 |
static ulint srv_n_rows_inserted_old = 0; |
|
363 |
static ulint srv_n_rows_updated_old = 0; |
|
364 |
static ulint srv_n_rows_deleted_old = 0; |
|
365 |
static ulint srv_n_rows_read_old = 0; |
|
366 |
#endif /* !UNIV_HOTBACKUP */ |
|
367 |
||
368 |
ulint srv_n_lock_wait_count = 0; |
|
369 |
ulint srv_n_lock_wait_current_count = 0; |
|
370 |
ib_longlong srv_n_lock_wait_time = 0; |
|
371 |
ulint srv_n_lock_max_wait_time = 0; |
|
372 |
||
373 |
||
374 |
/*
|
|
375 |
Set the following to 0 if you want InnoDB to write messages on
|
|
376 |
stderr on startup/shutdown
|
|
377 |
*/
|
|
378 |
ibool srv_print_verbose_log = TRUE; |
|
379 |
ibool srv_print_innodb_monitor = FALSE; |
|
380 |
ibool srv_print_innodb_lock_monitor = FALSE; |
|
381 |
ibool srv_print_innodb_tablespace_monitor = FALSE; |
|
382 |
ibool srv_print_innodb_table_monitor = FALSE; |
|
383 |
||
384 |
/* Array of English strings describing the current state of an
|
|
385 |
i/o handler thread */
|
|
386 |
||
387 |
const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; |
|
388 |
const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS]; |
|
389 |
||
390 |
time_t srv_last_monitor_time; |
|
391 |
||
392 |
mutex_t srv_innodb_monitor_mutex; |
|
393 |
||
394 |
/* Mutex for locking srv_monitor_file */
|
|
395 |
mutex_t srv_monitor_file_mutex; |
|
396 |
/* Temporary file for innodb monitor output */
|
|
397 |
FILE* srv_monitor_file; |
|
398 |
/* Mutex for locking srv_dict_tmpfile.
|
|
399 |
This mutex has a very high rank; threads reserving it should not
|
|
400 |
be holding any InnoDB latches. */
|
|
401 |
mutex_t srv_dict_tmpfile_mutex; |
|
402 |
/* Temporary file for output from the data dictionary */
|
|
403 |
FILE* srv_dict_tmpfile; |
|
404 |
/* Mutex for locking srv_misc_tmpfile.
|
|
405 |
This mutex has a very low rank; threads reserving it should not
|
|
406 |
acquire any further latches or sleep before releasing this one. */
|
|
407 |
mutex_t srv_misc_tmpfile_mutex; |
|
408 |
/* Temporary file for miscellanous diagnostic output */
|
|
409 |
FILE* srv_misc_tmpfile; |
|
410 |
||
411 |
ulint srv_main_thread_process_no = 0; |
|
412 |
ulint srv_main_thread_id = 0; |
|
413 |
||
414 |
/*
|
|
415 |
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
|
|
416 |
=========================================
|
|
417 |
||
418 |
There is the following analogue between this database
|
|
419 |
server and an operating system kernel:
|
|
420 |
||
421 |
DB concept equivalent OS concept
|
|
422 |
---------- ---------------------
|
|
423 |
transaction -- process;
|
|
424 |
||
425 |
query thread -- thread;
|
|
426 |
||
427 |
lock -- semaphore;
|
|
428 |
||
429 |
transaction set to
|
|
430 |
the rollback state -- kill signal delivered to a process;
|
|
431 |
||
432 |
kernel -- kernel;
|
|
433 |
||
434 |
query thread execution:
|
|
435 |
(a) without kernel mutex
|
|
436 |
reserved -- process executing in user mode;
|
|
437 |
(b) with kernel mutex reserved
|
|
438 |
-- process executing in kernel mode;
|
|
439 |
||
440 |
The server is controlled by a master thread which runs at
|
|
441 |
a priority higher than normal, that is, higher than user threads.
|
|
442 |
It sleeps most of the time, and wakes up, say, every 300 milliseconds,
|
|
443 |
to check whether there is anything happening in the server which
|
|
444 |
requires intervention of the master thread. Such situations may be,
|
|
445 |
for example, when flushing of dirty blocks is needed in the buffer
|
|
446 |
pool or old version of database rows have to be cleaned away.
|
|
447 |
||
448 |
The threads which we call user threads serve the queries of
|
|
449 |
the clients and input from the console of the server.
|
|
450 |
They run at normal priority. The server may have several
|
|
451 |
communications endpoints. A dedicated set of user threads waits
|
|
452 |
at each of these endpoints ready to receive a client request.
|
|
453 |
Each request is taken by a single user thread, which then starts
|
|
454 |
processing and, when the result is ready, sends it to the client
|
|
455 |
and returns to wait at the same endpoint the thread started from.
|
|
456 |
||
457 |
So, we do not have dedicated communication threads listening at
|
|
458 |
the endpoints and dealing the jobs to dedicated worker threads.
|
|
459 |
Our architecture saves one thread swithch per request, compared
|
|
460 |
to the solution with dedicated communication threads
|
|
461 |
which amounts to 15 microseconds on 100 MHz Pentium
|
|
462 |
running NT. If the client
|
|
463 |
is communicating over a network, this saving is negligible, but
|
|
464 |
if the client resides in the same machine, maybe in an SMP machine
|
|
465 |
on a different processor from the server thread, the saving
|
|
466 |
can be important as the threads can communicate over shared
|
|
467 |
memory with an overhead of a few microseconds.
|
|
468 |
||
469 |
We may later implement a dedicated communication thread solution
|
|
470 |
for those endpoints which communicate over a network.
|
|
471 |
||
472 |
Our solution with user threads has two problems: for each endpoint
|
|
473 |
there has to be a number of listening threads. If there are many
|
|
474 |
communication endpoints, it may be difficult to set the right number
|
|
475 |
of concurrent threads in the system, as many of the threads
|
|
476 |
may always be waiting at less busy endpoints. Another problem
|
|
477 |
is queuing of the messages, as the server internally does not
|
|
478 |
offer any queue for jobs.
|
|
479 |
||
480 |
Another group of user threads is intended for splitting the
|
|
481 |
queries and processing them in parallel. Let us call these
|
|
482 |
parallel communication threads. These threads are waiting for
|
|
483 |
parallelized tasks, suspended on event semaphores.
|
|
484 |
||
485 |
A single user thread waits for input from the console,
|
|
486 |
like a command to shut the database.
|
|
487 |
||
488 |
Utility threads are a different group of threads which takes
|
|
489 |
care of the buffer pool flushing and other, mainly background
|
|
490 |
operations, in the server.
|
|
491 |
Some of these utility threads always run at a lower than normal
|
|
492 |
priority, so that they are always in background. Some of them
|
|
493 |
may dynamically boost their priority by the pri_adjust function,
|
|
494 |
even to higher than normal priority, if their task becomes urgent.
|
|
495 |
The running of utilities is controlled by high- and low-water marks
|
|
496 |
of urgency. The urgency may be measured by the number of dirty blocks
|
|
497 |
in the buffer pool, in the case of the flush thread, for example.
|
|
498 |
When the high-water mark is exceeded, an utility starts running, until
|
|
499 |
the urgency drops under the low-water mark. Then the utility thread
|
|
500 |
suspend itself to wait for an event. The master thread is
|
|
501 |
responsible of signaling this event when the utility thread is
|
|
502 |
again needed.
|
|
503 |
||
504 |
For each individual type of utility, some threads always remain
|
|
505 |
at lower than normal priority. This is because pri_adjust is implemented
|
|
506 |
so that the threads at normal or higher priority control their
|
|
507 |
share of running time by calling sleep. Thus, if the load of the
|
|
508 |
system sudenly drops, these threads cannot necessarily utilize
|
|
509 |
the system fully. The background priority threads make up for this,
|
|
510 |
starting to run when the load drops.
|
|
511 |
||
512 |
When there is no activity in the system, also the master thread
|
|
513 |
suspends itself to wait for an event making
|
|
514 |
the server totally silent. The responsibility to signal this
|
|
515 |
event is on the user thread which again receives a message
|
|
516 |
from a client.
|
|
517 |
||
518 |
There is still one complication in our server design. If a
|
|
519 |
background utility thread obtains a resource (e.g., mutex) needed by a user
|
|
520 |
thread, and there is also some other user activity in the system,
|
|
521 |
the user thread may have to wait indefinitely long for the
|
|
522 |
resource, as the OS does not schedule a background thread if
|
|
523 |
there is some other runnable user thread. This problem is called
|
|
524 |
priority inversion in real-time programming.
|
|
525 |
||
526 |
One solution to the priority inversion problem would be to
|
|
527 |
keep record of which thread owns which resource and
|
|
528 |
in the above case boost the priority of the background thread
|
|
529 |
so that it will be scheduled and it can release the resource.
|
|
530 |
This solution is called priority inheritance in real-time programming.
|
|
531 |
A drawback of this solution is that the overhead of acquiring a mutex
|
|
532 |
increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
|
|
533 |
the thread has to call os_thread_get_curr_id.
|
|
534 |
This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
|
|
535 |
pair. Note that the thread
|
|
536 |
cannot store the information in the resource, say mutex, itself,
|
|
537 |
because competing threads could wipe out the information if it is
|
|
538 |
stored before acquiring the mutex, and if it stored afterwards,
|
|
539 |
the information is outdated for the time of one machine instruction,
|
|
540 |
at least. (To be precise, the information could be stored to
|
|
541 |
lock_word in mutex if the machine supports atomic swap.)
|
|
542 |
||
543 |
The above solution with priority inheritance may become actual in the
|
|
544 |
future, but at the moment we plan to implement a more coarse solution,
|
|
545 |
which could be called a global priority inheritance. If a thread
|
|
546 |
has to wait for a long time, say 300 milliseconds, for a resource,
|
|
547 |
we just guess that it may be waiting for a resource owned by a background
|
|
548 |
thread, and boost the the priority of all runnable background threads
|
|
549 |
to the normal level. The background threads then themselves adjust
|
|
550 |
their fixed priority back to background after releasing all resources
|
|
551 |
they had (or, at some fixed points in their program code).
|
|
552 |
||
553 |
What is the performance of the global priority inheritance solution?
|
|
554 |
We may weigh the length of the wait time 300 milliseconds, during
|
|
555 |
which the system processes some other thread
|
|
556 |
to the cost of boosting the priority of each runnable background
|
|
557 |
thread, rescheduling it, and lowering the priority again.
|
|
558 |
On 100 MHz Pentium + NT this overhead may be of the order 100
|
|
559 |
microseconds per thread. So, if the number of runnable background
|
|
560 |
threads is not very big, say < 100, the cost is tolerable.
|
|
561 |
Utility threads probably will access resources used by
|
|
562 |
user threads not very often, so collisions of user threads
|
|
563 |
to preempted utility threads should not happen very often.
|
|
564 |
||
565 |
The thread table contains
|
|
566 |
information of the current status of each thread existing in the system,
|
|
567 |
and also the event semaphores used in suspending the master thread
|
|
568 |
and utility and parallel communication threads when they have nothing to do.
|
|
569 |
The thread table can be seen as an analogue to the process table
|
|
570 |
in a traditional Unix implementation.
|
|
571 |
||
572 |
The thread table is also used in the global priority inheritance
|
|
573 |
scheme. This brings in one additional complication: threads accessing
|
|
574 |
the thread table must have at least normal fixed priority,
|
|
575 |
because the priority inheritance solution does not work if a background
|
|
576 |
thread is preempted while possessing the mutex protecting the thread table.
|
|
577 |
So, if a thread accesses the thread table, its priority has to be
|
|
578 |
boosted at least to normal. This priority requirement can be seen similar to
|
|
579 |
the privileged mode used when processing the kernel calls in traditional
|
|
580 |
Unix.*/
|
|
581 |
||
582 |
/* Thread slot in the thread table */
|
|
583 |
struct srv_slot_struct{ |
|
584 |
os_thread_id_t id; /* thread id */ |
|
585 |
os_thread_t handle; /* thread handle */ |
|
586 |
ulint type; /* thread type: user, utility etc. */ |
|
587 |
ibool in_use; /* TRUE if this slot is in use */ |
|
588 |
ibool suspended; /* TRUE if the thread is waiting |
|
589 |
for the event of this slot */
|
|
590 |
ib_time_t suspend_time; /* time when the thread was |
|
591 |
suspended */
|
|
592 |
os_event_t event; /* event used in suspending the |
|
593 |
thread when it has nothing to do */
|
|
594 |
que_thr_t* thr; /* suspended query thread (only |
|
595 |
used for MySQL threads) */
|
|
596 |
};
|
|
597 |
||
598 |
/* Table for MySQL threads where they will be suspended to wait for locks */
|
|
599 |
srv_slot_t* srv_mysql_table = NULL; |
|
600 |
||
601 |
os_event_t srv_lock_timeout_thread_event; |
|
602 |
||
603 |
srv_sys_t* srv_sys = NULL; |
|
604 |
||
605 |
byte srv_pad1[64]; /* padding to prevent other memory update |
|
606 |
hotspots from residing on the same memory
|
|
607 |
cache line */
|
|
608 |
mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, |
|
609 |
query threads, and lock table */
|
|
610 |
byte srv_pad2[64]; /* padding to prevent other memory update |
|
611 |
hotspots from residing on the same memory
|
|
612 |
cache line */
|
|
613 |
||
614 |
/* The following three values measure the urgency of the jobs of
|
|
615 |
buffer, version, and insert threads. They may vary from 0 - 1000.
|
|
616 |
The server mutex protects all these variables. The low-water values
|
|
617 |
tell that the server can acquiesce the utility when the value
|
|
618 |
drops below this low-water mark. */
|
|
619 |
||
620 |
ulint srv_meter[SRV_MASTER + 1]; |
|
621 |
ulint srv_meter_low_water[SRV_MASTER + 1]; |
|
622 |
ulint srv_meter_high_water[SRV_MASTER + 1]; |
|
623 |
ulint srv_meter_high_water2[SRV_MASTER + 1]; |
|
624 |
ulint srv_meter_foreground[SRV_MASTER + 1]; |
|
625 |
||
626 |
/* The following values give info about the activity going on in
|
|
627 |
the database. They are protected by the server mutex. The arrays
|
|
628 |
are indexed by the type of the thread. */
|
|
629 |
||
630 |
ulint srv_n_threads_active[SRV_MASTER + 1]; |
|
631 |
ulint srv_n_threads[SRV_MASTER + 1]; |
|
632 |
||
633 |
/*************************************************************************
|
|
634 |
Sets the info describing an i/o thread current state. */
|
|
635 |
||
636 |
void
|
|
637 |
srv_set_io_thread_op_info( |
|
638 |
/*======================*/
|
|
639 |
ulint i, /* in: the 'segment' of the i/o thread */ |
|
640 |
const char* str) /* in: constant char string describing the |
|
641 |
state */
|
|
642 |
{
|
|
643 |
ut_a(i < SRV_MAX_N_IO_THREADS); |
|
644 |
||
645 |
srv_io_thread_op_info[i] = str; |
|
646 |
}
|
|
647 |
||
648 |
/*************************************************************************
|
|
649 |
Accessor function to get pointer to n'th slot in the server thread
|
|
650 |
table. */
|
|
651 |
static
|
|
652 |
srv_slot_t* |
|
653 |
srv_table_get_nth_slot( |
|
654 |
/*===================*/
|
|
655 |
/* out: pointer to the slot */
|
|
656 |
ulint index) /* in: index of the slot */ |
|
657 |
{
|
|
658 |
ut_a(index < OS_THREAD_MAX_N); |
|
659 |
||
660 |
return(srv_sys->threads + index); |
|
661 |
}
|
|
662 |
||
663 |
#ifndef UNIV_HOTBACKUP
|
|
664 |
/*************************************************************************
|
|
665 |
Gets the number of threads in the system. */
|
|
666 |
||
667 |
ulint
|
|
668 |
srv_get_n_threads(void) |
|
669 |
/*===================*/
|
|
670 |
{
|
|
671 |
ulint i; |
|
672 |
ulint n_threads = 0; |
|
673 |
||
674 |
mutex_enter(&kernel_mutex); |
|
675 |
||
676 |
for (i = SRV_COM; i < SRV_MASTER + 1; i++) { |
|
677 |
||
678 |
n_threads += srv_n_threads[i]; |
|
679 |
}
|
|
680 |
||
681 |
mutex_exit(&kernel_mutex); |
|
682 |
||
683 |
return(n_threads); |
|
684 |
}
|
|
685 |
||
686 |
/*************************************************************************
|
|
687 |
Reserves a slot in the thread table for the current thread. Also creates the
|
|
688 |
thread local storage struct for the current thread. NOTE! The server mutex
|
|
689 |
has to be reserved by the caller! */
|
|
690 |
static
|
|
691 |
ulint
|
|
692 |
srv_table_reserve_slot( |
|
693 |
/*===================*/
|
|
694 |
/* out: reserved slot index */
|
|
695 |
ulint type) /* in: type of the thread: one of SRV_COM, ... */ |
|
696 |
{
|
|
697 |
srv_slot_t* slot; |
|
698 |
ulint i; |
|
699 |
||
700 |
ut_a(type > 0); |
|
701 |
ut_a(type <= SRV_MASTER); |
|
702 |
||
703 |
i = 0; |
|
704 |
slot = srv_table_get_nth_slot(i); |
|
705 |
||
706 |
while (slot->in_use) { |
|
707 |
i++; |
|
708 |
slot = srv_table_get_nth_slot(i); |
|
709 |
}
|
|
710 |
||
711 |
ut_a(slot->in_use == FALSE); |
|
712 |
||
713 |
slot->in_use = TRUE; |
|
714 |
slot->suspended = FALSE; |
|
715 |
slot->id = os_thread_get_curr_id(); |
|
716 |
slot->handle = os_thread_get_curr(); |
|
717 |
slot->type = type; |
|
718 |
||
719 |
thr_local_create(); |
|
720 |
||
721 |
thr_local_set_slot_no(os_thread_get_curr_id(), i); |
|
722 |
||
723 |
return(i); |
|
724 |
}
|
|
725 |
||
726 |
/*************************************************************************
|
|
727 |
Suspends the calling thread to wait for the event in its thread slot.
|
|
728 |
NOTE! The server mutex has to be reserved by the caller! */
|
|
729 |
static
|
|
730 |
os_event_t
|
|
731 |
srv_suspend_thread(void) |
|
732 |
/*====================*/
|
|
733 |
/* out: event for the calling thread to wait */
|
|
734 |
{
|
|
735 |
srv_slot_t* slot; |
|
736 |
os_event_t event; |
|
737 |
ulint slot_no; |
|
738 |
ulint type; |
|
739 |
||
740 |
ut_ad(mutex_own(&kernel_mutex)); |
|
741 |
||
742 |
slot_no = thr_local_get_slot_no(os_thread_get_curr_id()); |
|
743 |
||
744 |
if (srv_print_thread_releases) { |
|
745 |
fprintf(stderr, |
|
746 |
"Suspending thread %lu to slot %lu meter %lu\n", |
|
747 |
(ulong) os_thread_get_curr_id(), (ulong) slot_no, |
|
748 |
(ulong) srv_meter[SRV_RECOVERY]); |
|
749 |
}
|
|
750 |
||
751 |
slot = srv_table_get_nth_slot(slot_no); |
|
752 |
||
753 |
type = slot->type; |
|
754 |
||
755 |
ut_ad(type >= SRV_WORKER); |
|
756 |
ut_ad(type <= SRV_MASTER); |
|
757 |
||
758 |
event = slot->event; |
|
759 |
||
760 |
slot->suspended = TRUE; |
|
761 |
||
762 |
ut_ad(srv_n_threads_active[type] > 0); |
|
763 |
||
764 |
srv_n_threads_active[type]--; |
|
765 |
||
766 |
os_event_reset(event); |
|
767 |
||
768 |
return(event); |
|
769 |
}
|
|
770 |
#endif /* !UNIV_HOTBACKUP */ |
|
771 |
||
772 |
/*************************************************************************
|
|
773 |
Releases threads of the type given from suspension in the thread table.
|
|
774 |
NOTE! The server mutex has to be reserved by the caller! */
|
|
775 |
||
776 |
ulint
|
|
777 |
srv_release_threads( |
|
778 |
/*================*/
|
|
779 |
/* out: number of threads released: this may be
|
|
780 |
< n if not enough threads were suspended at the
|
|
781 |
moment */
|
|
782 |
ulint type, /* in: thread type */ |
|
783 |
ulint n) /* in: number of threads to release */ |
|
784 |
{
|
|
785 |
srv_slot_t* slot; |
|
786 |
ulint i; |
|
787 |
ulint count = 0; |
|
788 |
||
789 |
ut_ad(type >= SRV_WORKER); |
|
790 |
ut_ad(type <= SRV_MASTER); |
|
791 |
ut_ad(n > 0); |
|
792 |
ut_ad(mutex_own(&kernel_mutex)); |
|
793 |
||
794 |
for (i = 0; i < OS_THREAD_MAX_N; i++) { |
|
795 |
||
796 |
slot = srv_table_get_nth_slot(i); |
|
797 |
||
798 |
if (slot->in_use && slot->type == type && slot->suspended) { |
|
799 |
||
800 |
slot->suspended = FALSE; |
|
801 |
||
802 |
srv_n_threads_active[type]++; |
|
803 |
||
804 |
os_event_set(slot->event); |
|
805 |
||
806 |
if (srv_print_thread_releases) { |
|
807 |
fprintf(stderr, |
|
808 |
"Releasing thread %lu type %lu"
|
|
809 |
" from slot %lu meter %lu\n", |
|
810 |
(ulong) slot->id, (ulong) type, |
|
811 |
(ulong) i, |
|
812 |
(ulong) srv_meter[SRV_RECOVERY]); |
|
813 |
}
|
|
814 |
||
815 |
count++; |
|
816 |
||
817 |
if (count == n) { |
|
818 |
break; |
|
819 |
}
|
|
820 |
}
|
|
821 |
}
|
|
822 |
||
823 |
return(count); |
|
824 |
}
|
|
825 |
||
826 |
/*************************************************************************
|
|
827 |
Returns the calling thread type. */
|
|
828 |
||
829 |
ulint
|
|
830 |
srv_get_thread_type(void) |
|
831 |
/*=====================*/
|
|
832 |
/* out: SRV_COM, ... */
|
|
833 |
{
|
|
834 |
ulint slot_no; |
|
835 |
srv_slot_t* slot; |
|
836 |
ulint type; |
|
837 |
||
838 |
mutex_enter(&kernel_mutex); |
|
839 |
||
840 |
slot_no = thr_local_get_slot_no(os_thread_get_curr_id()); |
|
841 |
||
842 |
slot = srv_table_get_nth_slot(slot_no); |
|
843 |
||
844 |
type = slot->type; |
|
845 |
||
846 |
ut_ad(type >= SRV_WORKER); |
|
847 |
ut_ad(type <= SRV_MASTER); |
|
848 |
||
849 |
mutex_exit(&kernel_mutex); |
|
850 |
||
851 |
return(type); |
|
852 |
}
|
|
853 |
||
854 |
/*************************************************************************
|
|
855 |
Initializes the server. */
|
|
856 |
||
857 |
void
|
|
858 |
srv_init(void) |
|
859 |
/*==========*/
|
|
860 |
{
|
|
861 |
srv_conc_slot_t* conc_slot; |
|
862 |
srv_slot_t* slot; |
|
863 |
dict_table_t* table; |
|
864 |
ulint i; |
|
865 |
||
866 |
srv_sys = mem_alloc(sizeof(srv_sys_t)); |
|
867 |
||
868 |
kernel_mutex_temp = mem_alloc(sizeof(mutex_t)); |
|
869 |
mutex_create(&kernel_mutex, SYNC_KERNEL); |
|
870 |
||
871 |
mutex_create(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK); |
|
872 |
||
873 |
srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)); |
|
874 |
||
875 |
for (i = 0; i < OS_THREAD_MAX_N; i++) { |
|
876 |
slot = srv_table_get_nth_slot(i); |
|
877 |
slot->in_use = FALSE; |
|
878 |
slot->type=0; /* Avoid purify errors */ |
|
879 |
slot->event = os_event_create(NULL); |
|
880 |
ut_a(slot->event); |
|
881 |
}
|
|
882 |
||
883 |
srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)); |
|
884 |
||
885 |
for (i = 0; i < OS_THREAD_MAX_N; i++) { |
|
886 |
slot = srv_mysql_table + i; |
|
887 |
slot->in_use = FALSE; |
|
888 |
slot->type = 0; |
|
889 |
slot->event = os_event_create(NULL); |
|
890 |
ut_a(slot->event); |
|
891 |
}
|
|
892 |
||
893 |
srv_lock_timeout_thread_event = os_event_create(NULL); |
|
894 |
||
895 |
for (i = 0; i < SRV_MASTER + 1; i++) { |
|
896 |
srv_n_threads_active[i] = 0; |
|
897 |
srv_n_threads[i] = 0; |
|
898 |
srv_meter[i] = 30; |
|
899 |
srv_meter_low_water[i] = 50; |
|
900 |
srv_meter_high_water[i] = 100; |
|
901 |
srv_meter_high_water2[i] = 200; |
|
902 |
srv_meter_foreground[i] = 250; |
|
903 |
}
|
|
904 |
||
905 |
UT_LIST_INIT(srv_sys->tasks); |
|
906 |
||
907 |
/* create dummy table and index for old-style infimum and supremum */
|
|
908 |
table = dict_mem_table_create("SYS_DUMMY1", |
|
909 |
DICT_HDR_SPACE, 1, 0); |
|
910 |
dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, |
|
911 |
DATA_ENGLISH | DATA_NOT_NULL, 8); |
|
912 |
||
913 |
srv_sys->dummy_ind1 = dict_mem_index_create( |
|
914 |
"SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1); |
|
915 |
dict_index_add_col(srv_sys->dummy_ind1, table, (dict_col_t*) |
|
916 |
dict_table_get_nth_col(table, 0), 0); |
|
917 |
srv_sys->dummy_ind1->table = table; |
|
918 |
/* create dummy table and index for new-style infimum and supremum */
|
|
919 |
table = dict_mem_table_create("SYS_DUMMY2", |
|
920 |
DICT_HDR_SPACE, 1, DICT_TF_COMPACT); |
|
921 |
dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, |
|
922 |
DATA_ENGLISH | DATA_NOT_NULL, 8); |
|
923 |
srv_sys->dummy_ind2 = dict_mem_index_create( |
|
924 |
"SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1); |
|
925 |
dict_index_add_col(srv_sys->dummy_ind2, table, (dict_col_t*) |
|
926 |
dict_table_get_nth_col(table, 0), 0); |
|
927 |
srv_sys->dummy_ind2->table = table; |
|
928 |
||
929 |
/* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
|
|
930 |
srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE; |
|
931 |
||
932 |
/* Init the server concurrency restriction data structures */
|
|
933 |
||
934 |
os_fast_mutex_init(&srv_conc_mutex); |
|
935 |
||
936 |
UT_LIST_INIT(srv_conc_queue); |
|
937 |
||
938 |
srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t)); |
|
939 |
||
940 |
for (i = 0; i < OS_THREAD_MAX_N; i++) { |
|
941 |
conc_slot = srv_conc_slots + i; |
|
942 |
conc_slot->reserved = FALSE; |
|
943 |
conc_slot->event = os_event_create(NULL); |
|
944 |
ut_a(conc_slot->event); |
|
945 |
}
|
|
946 |
}
|
|
947 |
||
948 |
/*************************************************************************
|
|
949 |
Frees the OS fast mutex created in srv_init(). */
|
|
950 |
||
951 |
void
|
|
952 |
srv_free(void) |
|
953 |
/*==========*/
|
|
954 |
{
|
|
955 |
os_fast_mutex_free(&srv_conc_mutex); |
|
956 |
}
|
|
957 |
||
958 |
/*************************************************************************
|
|
959 |
Initializes the synchronization primitives, memory system, and the thread
|
|
960 |
local storage. */
|
|
961 |
||
962 |
void
|
|
963 |
srv_general_init(void) |
|
964 |
/*==================*/
|
|
965 |
{
|
|
966 |
os_sync_init(); |
|
967 |
sync_init(); |
|
968 |
mem_init(srv_mem_pool_size); |
|
969 |
thr_local_init(); |
|
970 |
}
|
|
971 |
||
972 |
/*======================= InnoDB Server FIFO queue =======================*/
|
|
973 |
||
974 |
/* Maximum allowable purge history length. <=0 means 'infinite'. */
|
|
975 |
ulong srv_max_purge_lag = 0; |
|
976 |
||
977 |
/*************************************************************************
|
|
978 |
Puts an OS thread to wait if there are too many concurrent threads
|
|
979 |
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
|
|
980 |
||
981 |
void
|
|
982 |
srv_conc_enter_innodb( |
|
983 |
/*==================*/
|
|
984 |
trx_t* trx) /* in: transaction object associated with the |
|
985 |
thread */
|
|
986 |
{
|
|
987 |
ibool has_slept = FALSE; |
|
988 |
srv_conc_slot_t* slot = NULL; |
|
989 |
ulint i; |
|
990 |
||
991 |
if (trx->mysql_thd != NULL |
|
992 |
&& thd_is_replication_slave_thread(trx->mysql_thd)) { |
|
993 |
||
994 |
/* TODO Do something more interesting (based on a config
|
|
995 |
parameter). Some users what to give the replication
|
|
996 |
thread very low priority, see http://bugs.mysql.com/25078
|
|
997 |
This can be done by introducing
|
|
998 |
innodb_replication_delay(ms) config parameter */
|
|
999 |
return; |
|
1000 |
}
|
|
1001 |
||
1002 |
/* If trx has 'free tickets' to enter the engine left, then use one
|
|
1003 |
such ticket */
|
|
1004 |
||
1005 |
if (trx->n_tickets_to_enter_innodb > 0) { |
|
1006 |
trx->n_tickets_to_enter_innodb--; |
|
1007 |
||
1008 |
return; |
|
1009 |
}
|
|
1010 |
||
1011 |
os_fast_mutex_lock(&srv_conc_mutex); |
|
1012 |
retry: |
|
1013 |
if (trx->declared_to_be_inside_innodb) { |
|
1014 |
ut_print_timestamp(stderr); |
|
1015 |
fputs(" InnoDB: Error: trying to declare trx" |
|
1016 |
" to enter InnoDB, but\n" |
|
1017 |
"InnoDB: it already is declared.\n", stderr); |
|
1018 |
trx_print(stderr, trx, 0); |
|
1019 |
putc('\n', stderr); |
|
1020 |
os_fast_mutex_unlock(&srv_conc_mutex); |
|
1021 |
||
1022 |
return; |
|
1023 |
}
|
|
1024 |
||
1025 |
if (srv_conc_n_threads < (lint)srv_thread_concurrency) { |
|
1026 |
||
1027 |
srv_conc_n_threads++; |
|
1028 |
trx->declared_to_be_inside_innodb = TRUE; |
|
1029 |
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER; |
|
1030 |
||
1031 |
os_fast_mutex_unlock(&srv_conc_mutex); |
|
1032 |
||
1033 |
return; |
|
1034 |
}
|
|
1035 |
||
1036 |
/* If the transaction is not holding resources, let it sleep
|
|
1037 |
for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
|
|
1038 |
||
1039 |
if (!has_slept && !trx->has_search_latch |
|
1040 |
&& NULL == UT_LIST_GET_FIRST(trx->trx_locks)) { |
|
1041 |
||
1042 |
has_slept = TRUE; /* We let it sleep only once to avoid |
|
1043 |
starvation */
|
|
1044 |
||
1045 |
srv_conc_n_waiting_threads++; |
|
1046 |
||
1047 |
os_fast_mutex_unlock(&srv_conc_mutex); |
|
1048 |
||
1049 |
trx->op_info = "sleeping before joining InnoDB queue"; |
|
1050 |
||
1051 |
/* Peter Zaitsev suggested that we take the sleep away
|
|
1052 |
altogether. But the sleep may be good in pathological
|
|
1053 |
situations of lots of thread switches. Simply put some
|
|
1054 |
threads aside for a while to reduce the number of thread
|
|
1055 |
switches. */
|
|
1056 |
if (SRV_THREAD_SLEEP_DELAY > 0) { |
|
1057 |
os_thread_sleep(SRV_THREAD_SLEEP_DELAY); |
|
1058 |
}
|
|
1059 |
||
1060 |
trx->op_info = ""; |
|
1061 |
||
1062 |
os_fast_mutex_lock(&srv_conc_mutex); |
|
1063 |
||
1064 |
srv_conc_n_waiting_threads--; |
|
1065 |
||
1066 |
goto retry; |
|
1067 |
}
|
|
1068 |
||
1069 |
/* Too many threads inside: put the current thread to a queue */
|
|
1070 |
||
1071 |
for (i = 0; i < OS_THREAD_MAX_N; i++) { |
|
1072 |
slot = srv_conc_slots + i; |
|
1073 |
||
1074 |
if (!slot->reserved) { |
|
1075 |
||
1076 |
break; |
|
1077 |
}
|
|
1078 |
}
|
|
1079 |
||
1080 |
if (i == OS_THREAD_MAX_N) { |
|
1081 |
/* Could not find a free wait slot, we must let the
|
|
1082 |
thread enter */
|
|
1083 |
||
1084 |
srv_conc_n_threads++; |
|
1085 |
trx->declared_to_be_inside_innodb = TRUE; |
|
1086 |
trx->n_tickets_to_enter_innodb = 0; |
|
1087 |
||
1088 |
os_fast_mutex_unlock(&srv_conc_mutex); |
|
1089 |
||
1090 |
return; |
|
1091 |
}
|
|
1092 |
||
1093 |
/* Release possible search system latch this thread has */
|
|
1094 |
if (trx->has_search_latch) { |
|
1095 |
trx_search_latch_release_if_reserved(trx); |
|
1096 |
}
|
|
1097 |
||
1098 |
/* Add to the queue */
|
|
1099 |
slot->reserved = TRUE; |
|
1100 |
slot->wait_ended = FALSE; |
|
1101 |
||
1102 |
UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot); |
|
1103 |
||
1104 |
os_event_reset(slot->event); |
|
1105 |
||
1106 |
srv_conc_n_waiting_threads++; |
|
1107 |
||
1108 |
os_fast_mutex_unlock(&srv_conc_mutex); |
|
1109 |
||
1110 |
/* Go to wait for the event; when a thread leaves InnoDB it will
|
|
1111 |
release this thread */
|
|
1112 |
||
1113 |
trx->op_info = "waiting in InnoDB queue"; |
|
1114 |
||
1115 |
os_event_wait(slot->event); |
|
1116 |
||
1117 |
trx->op_info = ""; |
|
1118 |
||
1119 |
os_fast_mutex_lock(&srv_conc_mutex); |
|
1120 |
||
1121 |
srv_conc_n_waiting_threads--; |
|
1122 |
||
1123 |
/* NOTE that the thread which released this thread already
|
|
1124 |
incremented the thread counter on behalf of this thread */
|
|
1125 |
||
1126 |
slot->reserved = FALSE; |
|
1127 |
||
1128 |
UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot); |
|
1129 |
||
1130 |
trx->declared_to_be_inside_innodb = TRUE; |
|
1131 |
trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER; |
|
1132 |
||
1133 |
os_fast_mutex_unlock(&srv_conc_mutex); |
|
1134 |
}
|
|
1135 |
||
1136 |
/*************************************************************************
|
|
1137 |
This lets a thread enter InnoDB regardless of the number of threads inside
|
|
1138 |
InnoDB. This must be called when a thread ends a lock wait. */
|
|
1139 |
||
1140 |
void
|
|
1141 |
srv_conc_force_enter_innodb( |
|
1142 |
/*========================*/
|
|
1143 |
trx_t* trx) /* in: transaction object associated with the |
|
1144 |
thread */
|
|
1145 |
{
|
|
1146 |
if (UNIV_LIKELY(!srv_thread_concurrency)) { |
|
1147 |
||
1148 |
return; |
|
1149 |
}
|
|
1150 |
||
1151 |
os_fast_mutex_lock(&srv_conc_mutex); |
|
1152 |
||
1153 |
srv_conc_n_threads++; |
|
1154 |
trx->declared_to_be_inside_innodb = TRUE; |
|
1155 |
trx->n_tickets_to_enter_innodb = 1; |
|
1156 |
||
1157 |
os_fast_mutex_unlock(&srv_conc_mutex); |
|
1158 |
}
|
|
1159 |
||
1160 |
/*************************************************************************
|
|
1161 |
This must be called when a thread exits InnoDB in a lock wait or at the
|
|
1162 |
end of an SQL statement. */
|
|
1163 |
||
1164 |
void
|
|
1165 |
srv_conc_force_exit_innodb( |
|
1166 |
/*=======================*/
|
|
1167 |
trx_t* trx) /* in: transaction object associated with the |
|
1168 |
thread */
|
|
1169 |
{
|
|
1170 |
srv_conc_slot_t* slot = NULL; |
|
1171 |
||
1172 |
if (UNIV_LIKELY(!srv_thread_concurrency)) { |
|
1173 |
||
1174 |
return; |
|
1175 |
}
|
|
1176 |
||
1177 |
if (trx->mysql_thd != NULL |
|
1178 |
&& thd_is_replication_slave_thread(trx->mysql_thd)) { |
|
1179 |
||
1180 |
return; |
|
1181 |
}
|
|
1182 |
||
1183 |
if (trx->declared_to_be_inside_innodb == FALSE) { |
|
1184 |
||
1185 |
return; |
|
1186 |
}
|
|
1187 |
||
1188 |
os_fast_mutex_lock(&srv_conc_mutex); |
|
1189 |
||
1190 |
srv_conc_n_threads--; |
|
1191 |
trx->declared_to_be_inside_innodb = FALSE; |
|
1192 |
trx->n_tickets_to_enter_innodb = 0; |
|
1193 |
||
1194 |
if (srv_conc_n_threads < (lint)srv_thread_concurrency) { |
|
1195 |
/* Look for a slot where a thread is waiting and no other
|
|
1196 |
thread has yet released the thread */
|
|
1197 |
||
1198 |
slot = UT_LIST_GET_FIRST(srv_conc_queue); |
|
1199 |
||
1200 |
while (slot && slot->wait_ended == TRUE) { |
|
1201 |
slot = UT_LIST_GET_NEXT(srv_conc_queue, slot); |
|
1202 |
}
|
|
1203 |
||
1204 |
if (slot != NULL) { |
|
1205 |
slot->wait_ended = TRUE; |
|
1206 |
||
1207 |
/* We increment the count on behalf of the released
|
|
1208 |
thread */
|
|
1209 |
||
1210 |
srv_conc_n_threads++; |
|
1211 |
}
|
|
1212 |
}
|
|
1213 |
||
1214 |
os_fast_mutex_unlock(&srv_conc_mutex); |
|
1215 |
||
1216 |
if (slot != NULL) { |
|
1217 |
os_event_set(slot->event); |
|
1218 |
}
|
|
1219 |
}
|
|
1220 |
||
1221 |
/*************************************************************************
|
|
1222 |
This must be called when a thread exits InnoDB. */
|
|
1223 |
||
1224 |
void
|
|
1225 |
srv_conc_exit_innodb( |
|
1226 |
/*=================*/
|
|
1227 |
trx_t* trx) /* in: transaction object associated with the |
|
1228 |
thread */
|
|
1229 |
{
|
|
1230 |
if (trx->n_tickets_to_enter_innodb > 0) { |
|
1231 |
/* We will pretend the thread is still inside InnoDB though it
|
|
1232 |
now leaves the InnoDB engine. In this way we save
|
|
1233 |
a lot of semaphore operations. srv_conc_force_exit_innodb is
|
|
1234 |
used to declare the thread definitely outside InnoDB. It
|
|
1235 |
should be called when there is a lock wait or an SQL statement
|
|
1236 |
ends. */
|
|
1237 |
||
1238 |
return; |
|
1239 |
}
|
|
1240 |
||
1241 |
srv_conc_force_exit_innodb(trx); |
|
1242 |
}
|
|
1243 |
||
1244 |
/*========================================================================*/
|
|
1245 |
||
1246 |
/*************************************************************************
|
|
1247 |
Normalizes init parameter values to use units we use inside InnoDB. */
|
|
1248 |
static
|
|
1249 |
ulint
|
|
1250 |
srv_normalize_init_values(void) |
|
1251 |
/*===========================*/
|
|
1252 |
/* out: DB_SUCCESS or error code */
|
|
1253 |
{
|
|
1254 |
ulint n; |
|
1255 |
ulint i; |
|
1256 |
||
1257 |
n = srv_n_data_files; |
|
1258 |
||
1259 |
for (i = 0; i < n; i++) { |
|
1260 |
srv_data_file_sizes[i] = srv_data_file_sizes[i] |
|
1261 |
* ((1024 * 1024) / UNIV_PAGE_SIZE); |
|
1262 |
}
|
|
1263 |
||
1264 |
srv_last_file_size_max = srv_last_file_size_max |
|
1265 |
* ((1024 * 1024) / UNIV_PAGE_SIZE); |
|
1266 |
||
1267 |
srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE; |
|
1268 |
||
1269 |
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE; |
|
1270 |
||
1271 |
srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024); |
|
1272 |
||
1273 |
srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE; |
|
1274 |
||
1275 |
if (srv_use_awe) { |
|
1276 |
/* If we are using AWE we must save memory in the 32-bit
|
|
1277 |
address space of the process, and cannot bind the lock
|
|
1278 |
table size to the real buffer pool size. */
|
|
1279 |
||
1280 |
srv_lock_table_size = 20 * srv_awe_window_size; |
|
1281 |
} else { |
|
1282 |
srv_lock_table_size = 5 * srv_pool_size; |
|
1283 |
}
|
|
1284 |
||
1285 |
return(DB_SUCCESS); |
|
1286 |
}
|
|
1287 |
||
1288 |
/*************************************************************************
|
|
1289 |
Boots the InnoDB server. */
|
|
1290 |
||
1291 |
ulint
|
|
1292 |
srv_boot(void) |
|
1293 |
/*==========*/
|
|
1294 |
/* out: DB_SUCCESS or error code */
|
|
1295 |
{
|
|
1296 |
ulint err; |
|
1297 |
||
1298 |
/* Transform the init parameter values given by MySQL to
|
|
1299 |
use units we use inside InnoDB: */
|
|
1300 |
||
1301 |
err = srv_normalize_init_values(); |
|
1302 |
||
1303 |
if (err != DB_SUCCESS) { |
|
1304 |
return(err); |
|
1305 |
}
|
|
1306 |
||
1307 |
/* Initialize synchronization primitives, memory management, and thread
|
|
1308 |
local storage */
|
|
1309 |
||
1310 |
srv_general_init(); |
|
1311 |
||
1312 |
/* Initialize this module */
|
|
1313 |
||
1314 |
srv_init(); |
|
1315 |
||
1316 |
return(DB_SUCCESS); |
|
1317 |
}
|
|
1318 |
||
1319 |
#ifndef UNIV_HOTBACKUP
|
|
1320 |
/*************************************************************************
|
|
1321 |
Reserves a slot in the thread table for the current MySQL OS thread.
|
|
1322 |
NOTE! The kernel mutex has to be reserved by the caller! */
|
|
1323 |
static
|
|
1324 |
srv_slot_t* |
|
1325 |
srv_table_reserve_slot_for_mysql(void) |
|
1326 |
/*==================================*/
|
|
1327 |
/* out: reserved slot */
|
|
1328 |
{
|
|
1329 |
srv_slot_t* slot; |
|
1330 |
ulint i; |
|
1331 |
||
1332 |
ut_ad(mutex_own(&kernel_mutex)); |
|
1333 |
||
1334 |
i = 0; |
|
1335 |
slot = srv_mysql_table + i; |
|
1336 |
||
1337 |
while (slot->in_use) { |
|
1338 |
i++; |
|
1339 |
||
1340 |
if (i >= OS_THREAD_MAX_N) { |
|
1341 |
||
1342 |
ut_print_timestamp(stderr); |
|
1343 |
||
1344 |
fprintf(stderr, |
|
1345 |
" InnoDB: There appear to be %lu MySQL"
|
|
1346 |
" threads currently waiting\n" |
|
1347 |
"InnoDB: inside InnoDB, which is the"
|
|
1348 |
" upper limit. Cannot continue operation.\n" |
|
1349 |
"InnoDB: We intentionally generate"
|
|
1350 |
" a seg fault to print a stack trace\n" |
|
1351 |
"InnoDB: on Linux. But first we print"
|
|
1352 |
" a list of waiting threads.\n", (ulong) i); |
|
1353 |
||
1354 |
for (i = 0; i < OS_THREAD_MAX_N; i++) { |
|
1355 |
||
1356 |
slot = srv_mysql_table + i; |
|
1357 |
||
1358 |
fprintf(stderr, |
|
1359 |
"Slot %lu: thread id %lu, type %lu,"
|
|
1360 |
" in use %lu, susp %lu, time %lu\n", |
|
1361 |
(ulong) i, |
|
1362 |
(ulong) os_thread_pf(slot->id), |
|
1363 |
(ulong) slot->type, |
|
1364 |
(ulong) slot->in_use, |
|
1365 |
(ulong) slot->suspended, |
|
1366 |
(ulong) difftime(ut_time(), |
|
1367 |
slot->suspend_time)); |
|
1368 |
}
|
|
1369 |
||
1370 |
ut_error; |
|
1371 |
}
|
|
1372 |
||
1373 |
slot = srv_mysql_table + i; |
|
1374 |
}
|
|
1375 |
||
1376 |
ut_a(slot->in_use == FALSE); |
|
1377 |
||
1378 |
slot->in_use = TRUE; |
|
1379 |
slot->id = os_thread_get_curr_id(); |
|
1380 |
slot->handle = os_thread_get_curr(); |
|
1381 |
||
1382 |
return(slot); |
|
1383 |
}
|
|
1384 |
#endif /* !UNIV_HOTBACKUP */ |
|
1385 |
||
1386 |
/*******************************************************************
|
|
1387 |
Puts a MySQL OS thread to wait for a lock to be released. If an error
|
|
1388 |
occurs during the wait trx->error_state associated with thr is
|
|
1389 |
!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
|
|
1390 |
are possible errors. DB_DEADLOCK is returned if selective deadlock
|
|
1391 |
resolution chose this transaction as a victim. */
|
|
1392 |
||
1393 |
void
|
|
1394 |
srv_suspend_mysql_thread( |
|
1395 |
/*=====================*/
|
|
1396 |
que_thr_t* thr) /* in: query thread associated with the MySQL |
|
1397 |
OS thread */
|
|
1398 |
{
|
|
1399 |
#ifndef UNIV_HOTBACKUP
|
|
1400 |
srv_slot_t* slot; |
|
1401 |
os_event_t event; |
|
1402 |
double wait_time; |
|
1403 |
trx_t* trx; |
|
1404 |
ibool had_dict_lock = FALSE; |
|
1405 |
ibool was_declared_inside_innodb = FALSE; |
|
1406 |
ib_longlong start_time = 0; |
|
1407 |
ib_longlong finish_time; |
|
1408 |
ulint diff_time; |
|
1409 |
ulint sec; |
|
1410 |
ulint ms; |
|
1411 |
||
1412 |
ut_ad(!mutex_own(&kernel_mutex)); |
|
1413 |
||
1414 |
trx = thr_get_trx(thr); |
|
1415 |
||
1416 |
os_event_set(srv_lock_timeout_thread_event); |
|
1417 |
||
1418 |
mutex_enter(&kernel_mutex); |
|
1419 |
||
1420 |
trx->error_state = DB_SUCCESS; |
|
1421 |
||
1422 |
if (thr->state == QUE_THR_RUNNING) { |
|
1423 |
||
1424 |
ut_ad(thr->is_active == TRUE); |
|
1425 |
||
1426 |
/* The lock has already been released or this transaction
|
|
1427 |
was chosen as a deadlock victim: no need to suspend */
|
|
1428 |
||
1429 |
if (trx->was_chosen_as_deadlock_victim) { |
|
1430 |
||
1431 |
trx->error_state = DB_DEADLOCK; |
|
1432 |
trx->was_chosen_as_deadlock_victim = FALSE; |
|
1433 |
}
|
|
1434 |
||
1435 |
mutex_exit(&kernel_mutex); |
|
1436 |
||
1437 |
return; |
|
1438 |
}
|
|
1439 |
||
1440 |
ut_ad(thr->is_active == FALSE); |
|
1441 |
||
1442 |
slot = srv_table_reserve_slot_for_mysql(); |
|
1443 |
||
1444 |
event = slot->event; |
|
1445 |
||
1446 |
slot->thr = thr; |
|
1447 |
||
1448 |
os_event_reset(event); |
|
1449 |
||
1450 |
slot->suspend_time = ut_time(); |
|
1451 |
||
1452 |
if (thr->lock_state == QUE_THR_LOCK_ROW) { |
|
1453 |
srv_n_lock_wait_count++; |
|
1454 |
srv_n_lock_wait_current_count++; |
|
1455 |
||
1456 |
ut_usectime(&sec, &ms); |
|
1457 |
start_time = (ib_longlong)sec * 1000000 + ms; |
|
1458 |
}
|
|
1459 |
/* Wake the lock timeout monitor thread, if it is suspended */
|
|
1460 |
||
1461 |
os_event_set(srv_lock_timeout_thread_event); |
|
1462 |
||
1463 |
mutex_exit(&kernel_mutex); |
|
1464 |
||
1465 |
if (trx->declared_to_be_inside_innodb) { |
|
1466 |
||
1467 |
was_declared_inside_innodb = TRUE; |
|
1468 |
||
1469 |
/* We must declare this OS thread to exit InnoDB, since a
|
|
1470 |
possible other thread holding a lock which this thread waits
|
|
1471 |
for must be allowed to enter, sooner or later */
|
|
1472 |
||
1473 |
srv_conc_force_exit_innodb(trx); |
|
1474 |
}
|
|
1475 |
||
1476 |
/* Release possible foreign key check latch */
|
|
1477 |
if (trx->dict_operation_lock_mode == RW_S_LATCH) { |
|
1478 |
||
1479 |
had_dict_lock = TRUE; |
|
1480 |
||
1481 |
row_mysql_unfreeze_data_dictionary(trx); |
|
1482 |
}
|
|
1483 |
||
1484 |
ut_a(trx->dict_operation_lock_mode == 0); |
|
1485 |
||
1486 |
/* Wait for the release */
|
|
1487 |
||
1488 |
os_event_wait(event); |
|
1489 |
||
1490 |
if (had_dict_lock) { |
|
1491 |
||
1492 |
row_mysql_freeze_data_dictionary(trx); |
|
1493 |
}
|
|
1494 |
||
1495 |
if (was_declared_inside_innodb) { |
|
1496 |
||
1497 |
/* Return back inside InnoDB */
|
|
1498 |
||
1499 |
srv_conc_force_enter_innodb(trx); |
|
1500 |
}
|
|
1501 |
||
1502 |
mutex_enter(&kernel_mutex); |
|
1503 |
||
1504 |
/* Release the slot for others to use */
|
|
1505 |
||
1506 |
slot->in_use = FALSE; |
|
1507 |
||
1508 |
wait_time = ut_difftime(ut_time(), slot->suspend_time); |
|
1509 |
||
1510 |
if (thr->lock_state == QUE_THR_LOCK_ROW) { |
|
1511 |
ut_usectime(&sec, &ms); |
|
1512 |
finish_time = (ib_longlong)sec * 1000000 + ms; |
|
1513 |
||
1514 |
diff_time = (ulint) (finish_time - start_time); |
|
1515 |
||
1516 |
srv_n_lock_wait_current_count--; |
|
1517 |
srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time; |
|
1518 |
if (diff_time > srv_n_lock_max_wait_time) { |
|
1519 |
srv_n_lock_max_wait_time = diff_time; |
|
1520 |
}
|
|
1521 |
}
|
|
1522 |
||
1523 |
if (trx->was_chosen_as_deadlock_victim) { |
|
1524 |
||
1525 |
trx->error_state = DB_DEADLOCK; |
|
1526 |
trx->was_chosen_as_deadlock_victim = FALSE; |
|
1527 |
}
|
|
1528 |
||
1529 |
mutex_exit(&kernel_mutex); |
|
1530 |
||
1531 |
if (srv_lock_wait_timeout < 100000000 |
|
1532 |
&& wait_time > (double)srv_lock_wait_timeout) { |
|
1533 |
||
1534 |
trx->error_state = DB_LOCK_WAIT_TIMEOUT; |
|
1535 |
}
|
|
1536 |
#else /* UNIV_HOTBACKUP */ |
|
1537 |
/* This function depends on MySQL code that is not included in
|
|
1538 |
InnoDB Hot Backup builds. Besides, this function should never
|
|
1539 |
be called in InnoDB Hot Backup. */
|
|
1540 |
ut_error; |
|
1541 |
#endif /* UNIV_HOTBACKUP */ |
|
1542 |
}
|
|
1543 |
||
1544 |
/************************************************************************
|
|
1545 |
Releases a MySQL OS thread waiting for a lock to be released, if the
|
|
1546 |
thread is already suspended. */
|
|
1547 |
||
1548 |
void
|
|
1549 |
srv_release_mysql_thread_if_suspended( |
|
1550 |
/*==================================*/
|
|
1551 |
que_thr_t* thr) /* in: query thread associated with the |
|
1552 |
MySQL OS thread */
|
|
1553 |
{
|
|
1554 |
#ifndef UNIV_HOTBACKUP
|
|
1555 |
srv_slot_t* slot; |
|
1556 |
ulint i; |
|
1557 |
||
1558 |
ut_ad(mutex_own(&kernel_mutex)); |
|
1559 |
||
1560 |
for (i = 0; i < OS_THREAD_MAX_N; i++) { |
|
1561 |
||
1562 |
slot = srv_mysql_table + i; |
|
1563 |
||
1564 |
if (slot->in_use && slot->thr == thr) { |
|
1565 |
/* Found */
|
|
1566 |
||
1567 |
os_event_set(slot->event); |
|
1568 |
||
1569 |
return; |
|
1570 |
}
|
|
1571 |
}
|
|
1572 |
||
1573 |
/* not found */
|
|
1574 |
#else /* UNIV_HOTBACKUP */ |
|
1575 |
/* This function depends on MySQL code that is not included in
|
|
1576 |
InnoDB Hot Backup builds. Besides, this function should never
|
|
1577 |
be called in InnoDB Hot Backup. */
|
|
1578 |
ut_error; |
|
1579 |
#endif /* UNIV_HOTBACKUP */ |
|
1580 |
}
|
|
1581 |
||
1582 |
#ifndef UNIV_HOTBACKUP
|
|
1583 |
/**********************************************************************
|
|
1584 |
Refreshes the values used to calculate per-second averages. */
|
|
1585 |
static
|
|
1586 |
void
|
|
1587 |
srv_refresh_innodb_monitor_stats(void) |
|
1588 |
/*==================================*/
|
|
1589 |
{
|
|
1590 |
mutex_enter(&srv_innodb_monitor_mutex); |
|
1591 |
||
1592 |
srv_last_monitor_time = time(NULL); |
|
1593 |
||
1594 |
os_aio_refresh_stats(); |
|
1595 |
||
1596 |
btr_cur_n_sea_old = btr_cur_n_sea; |
|
1597 |
btr_cur_n_non_sea_old = btr_cur_n_non_sea; |
|
1598 |
||
1599 |
log_refresh_stats(); |
|
1600 |
||
1601 |
buf_refresh_io_stats(); |
|
1602 |
||
1603 |
srv_n_rows_inserted_old = srv_n_rows_inserted; |
|
1604 |
srv_n_rows_updated_old = srv_n_rows_updated; |
|
1605 |
srv_n_rows_deleted_old = srv_n_rows_deleted; |
|
1606 |
srv_n_rows_read_old = srv_n_rows_read; |
|
1607 |
||
1608 |
mutex_exit(&srv_innodb_monitor_mutex); |
|
1609 |
}
|
|
1610 |
||
1611 |
/**********************************************************************
|
|
1612 |
Outputs to a file the output of the InnoDB Monitor. */
|
|
1613 |
||
1614 |
void
|
|
1615 |
srv_printf_innodb_monitor( |
|
1616 |
/*======================*/
|
|
1617 |
FILE* file, /* in: output stream */ |
|
1618 |
ulint* trx_start, /* out: file position of the start of |
|
1619 |
the list of active transactions */
|
|
1620 |
ulint* trx_end) /* out: file position of the end of |
|
1621 |
the list of active transactions */
|
|
1622 |
{
|
|
1623 |
double time_elapsed; |
|
1624 |
time_t current_time; |
|
1625 |
ulint n_reserved; |
|
1626 |
||
1627 |
mutex_enter(&srv_innodb_monitor_mutex); |
|
1628 |
||
1629 |
current_time = time(NULL); |
|
1630 |
||
1631 |
/* We add 0.001 seconds to time_elapsed to prevent division
|
|
1632 |
by zero if two users happen to call SHOW INNODB STATUS at the same
|
|
1633 |
time */
|
|
1634 |
||
1635 |
time_elapsed = difftime(current_time, srv_last_monitor_time) |
|
1636 |
+ 0.001; |
|
1637 |
||
1638 |
srv_last_monitor_time = time(NULL); |
|
1639 |
||
1640 |
fputs("\n=====================================\n", file); |
|
1641 |
||
1642 |
ut_print_timestamp(file); |
|
1643 |
fprintf(file, |
|
1644 |
" INNODB MONITOR OUTPUT\n" |
|
1645 |
"=====================================\n" |
|
1646 |
"Per second averages calculated from the last %lu seconds\n", |
|
1647 |
(ulong)time_elapsed); |
|
1648 |
||
1649 |
fputs("----------\n" |
|
1650 |
"SEMAPHORES\n" |
|
1651 |
"----------\n", file); |
|
1652 |
sync_print(file); |
|
1653 |
||
1654 |
/* Conceptually, srv_innodb_monitor_mutex has a very high latching
|
|
1655 |
order level in sync0sync.h, while dict_foreign_err_mutex has a very
|
|
1656 |
low level 135. Therefore we can reserve the latter mutex here without
|
|
1657 |
a danger of a deadlock of threads. */
|
|
1658 |
||
1659 |
mutex_enter(&dict_foreign_err_mutex); |
|
1660 |
||
1661 |
if (ftell(dict_foreign_err_file) != 0L) { |
|
1662 |
fputs("------------------------\n" |
|
1663 |
"LATEST FOREIGN KEY ERROR\n" |
|
1664 |
"------------------------\n", file); |
|
1665 |
ut_copy_file(file, dict_foreign_err_file); |
|
1666 |
}
|
|
1667 |
||
1668 |
mutex_exit(&dict_foreign_err_mutex); |
|
1669 |
||
1670 |
lock_print_info_summary(file); |
|
1671 |
if (trx_start) { |
|
1672 |
long t = ftell(file); |
|
1673 |
if (t < 0) { |
|
1674 |
*trx_start = ULINT_UNDEFINED; |
|
1675 |
} else { |
|
1676 |
*trx_start = (ulint) t; |
|
1677 |
}
|
|
1678 |
}
|
|
1679 |
lock_print_info_all_transactions(file); |
|
1680 |
if (trx_end) { |
|
1681 |
long t = ftell(file); |
|
1682 |
if (t < 0) { |
|
1683 |
*trx_end = ULINT_UNDEFINED; |
|
1684 |
} else { |
|
1685 |
*trx_end = (ulint) t; |
|
1686 |
}
|
|
1687 |
}
|
|
1688 |
fputs("--------\n" |
|
1689 |
"FILE I/O\n" |
|
1690 |
"--------\n", file); |
|
1691 |
os_aio_print(file); |
|
1692 |
||
1693 |
fputs("-------------------------------------\n" |
|
1694 |
"INSERT BUFFER AND ADAPTIVE HASH INDEX\n" |
|
1695 |
"-------------------------------------\n", file); |
|
1696 |
ibuf_print(file); |
|
1697 |
||
1698 |
ha_print_info(file, btr_search_sys->hash_index); |
|
1699 |
||
1700 |
fprintf(file, |
|
1701 |
"%.2f hash searches/s, %.2f non-hash searches/s\n", |
|
1702 |
(btr_cur_n_sea - btr_cur_n_sea_old) |
|
1703 |
/ time_elapsed, |
|
1704 |
(btr_cur_n_non_sea - btr_cur_n_non_sea_old) |
|
1705 |
/ time_elapsed); |
|
1706 |
btr_cur_n_sea_old = btr_cur_n_sea; |
|
1707 |
btr_cur_n_non_sea_old = btr_cur_n_non_sea; |
|
1708 |
||
1709 |
fputs("---\n" |
|
1710 |
"LOG\n" |
|
1711 |
"---\n", file); |
|
1712 |
log_print(file); |
|
1713 |
||
1714 |
fputs("----------------------\n" |
|
1715 |
"BUFFER POOL AND MEMORY\n" |
|
1716 |
"----------------------\n", file); |
|
1717 |
fprintf(file, |
|
1718 |
"Total memory allocated " ULINTPF |
|
1719 |
"; in additional pool allocated " ULINTPF "\n", |
|
1720 |
ut_total_allocated_memory, |
|
1721 |
mem_pool_get_reserved(mem_comm_pool)); |
|
1722 |
fprintf(file, "Dictionary memory allocated " ULINTPF "\n", |
|
1723 |
dict_sys->size); |
|
1724 |
||
1725 |
if (srv_use_awe) { |
|
1726 |
fprintf(file, |
|
1727 |
"In addition to that %lu MB of AWE memory allocated\n", |
|
1728 |
(ulong) (srv_pool_size |
|
1729 |
/ ((1024 * 1024) / UNIV_PAGE_SIZE))); |
|
1730 |
}
|
|
1731 |
||
1732 |
buf_print_io(file); |
|
1733 |
||
1734 |
fputs("--------------\n" |
|
1735 |
"ROW OPERATIONS\n" |
|
1736 |
"--------------\n", file); |
|
1737 |
fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n", |
|
1738 |
(long) srv_conc_n_threads, |
|
1739 |
(ulong) srv_conc_n_waiting_threads); |
|
1740 |
||
1741 |
fprintf(file, "%lu read views open inside InnoDB\n", |
|
1742 |
UT_LIST_GET_LEN(trx_sys->view_list)); |
|
1743 |
||
1744 |
n_reserved = fil_space_get_n_reserved_extents(0); |
|
1745 |
if (n_reserved > 0) { |
|
1746 |
fprintf(file, |
|
1747 |
"%lu tablespace extents now reserved for"
|
|
1748 |
" B-tree split operations\n", |
|
1749 |
(ulong) n_reserved); |
|
1750 |
}
|
|
1751 |
||
1752 |
#ifdef UNIV_LINUX
|
|
1753 |
fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n", |
|
1754 |
(ulong) srv_main_thread_process_no, |
|
1755 |
(ulong) srv_main_thread_id, |
|
1756 |
srv_main_thread_op_info); |
|
1757 |
#else
|
|
1758 |
fprintf(file, "Main thread id %lu, state: %s\n", |
|
1759 |
(ulong) srv_main_thread_id, |
|
1760 |
srv_main_thread_op_info); |
|
1761 |
#endif
|
|
1762 |
fprintf(file, |
|
1763 |
"Number of rows inserted " ULINTPF |
|
1764 |
", updated " ULINTPF ", deleted " ULINTPF |
|
1765 |
", read " ULINTPF "\n", |
|
1766 |
srv_n_rows_inserted, |
|
1767 |
srv_n_rows_updated, |
|
1768 |
srv_n_rows_deleted, |
|
1769 |
srv_n_rows_read); |
|
1770 |
fprintf(file, |
|
1771 |
"%.2f inserts/s, %.2f updates/s,"
|
|
1772 |
" %.2f deletes/s, %.2f reads/s\n", |
|
1773 |
(srv_n_rows_inserted - srv_n_rows_inserted_old) |
|
1774 |
/ time_elapsed, |
|
1775 |
(srv_n_rows_updated - srv_n_rows_updated_old) |
|
1776 |
/ time_elapsed, |
|
1777 |
(srv_n_rows_deleted - srv_n_rows_deleted_old) |
|
1778 |
/ time_elapsed, |
|
1779 |
(srv_n_rows_read - srv_n_rows_read_old) |
|
1780 |
/ time_elapsed); |
|
1781 |
||
1782 |
srv_n_rows_inserted_old = srv_n_rows_inserted; |
|
1783 |
srv_n_rows_updated_old = srv_n_rows_updated; |
|
1784 |
srv_n_rows_deleted_old = srv_n_rows_deleted; |
|
1785 |
srv_n_rows_read_old = srv_n_rows_read; |
|
1786 |
||
1787 |
fputs("----------------------------\n" |
|
1788 |
"END OF INNODB MONITOR OUTPUT\n" |
|
1789 |
"============================\n", file); |
|
1790 |
mutex_exit(&srv_innodb_monitor_mutex); |
|
1791 |
fflush(file); |
|
1792 |
}
|
|
1793 |
||
1794 |
/**********************************************************************
|
|
1795 |
Function to pass InnoDB status variables to MySQL */
|
|
1796 |
||
1797 |
void
|
|
1798 |
srv_export_innodb_status(void) |
|
1799 |
{
|
|
1800 |
mutex_enter(&srv_innodb_monitor_mutex); |
|
1801 |
||
1802 |
export_vars.innodb_data_pending_reads |
|
1803 |
= os_n_pending_reads; |
|
1804 |
export_vars.innodb_data_pending_writes |
|
1805 |
= os_n_pending_writes; |
|
1806 |
export_vars.innodb_data_pending_fsyncs |
|
1807 |
= fil_n_pending_log_flushes |
|
1808 |
+ fil_n_pending_tablespace_flushes; |
|
1809 |
export_vars.innodb_data_fsyncs = os_n_fsyncs; |
|
1810 |
export_vars.innodb_data_read = srv_data_read; |
|
1811 |
export_vars.innodb_data_reads = os_n_file_reads; |
|
1812 |
export_vars.innodb_data_writes = os_n_file_writes; |
|
1813 |
export_vars.innodb_data_written = srv_data_written; |
|
1814 |
export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets; |
|
1815 |
export_vars.innodb_buffer_pool_write_requests |
|
1816 |
= srv_buf_pool_write_requests; |
|
1817 |
export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free; |
|
1818 |
export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed; |
|
1819 |
export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads; |
|
1820 |
export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd; |
|
1821 |
export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq; |
|
1822 |
export_vars.innodb_buffer_pool_pages_data |
|
1823 |
= UT_LIST_GET_LEN(buf_pool->LRU); |
|
1824 |
export_vars.innodb_buffer_pool_pages_dirty |
|
1825 |
= UT_LIST_GET_LEN(buf_pool->flush_list); |
|
1826 |
export_vars.innodb_buffer_pool_pages_free |
|
1827 |
= UT_LIST_GET_LEN(buf_pool->free); |
|
1828 |
export_vars.innodb_buffer_pool_pages_latched |
|
1829 |
= buf_get_latched_pages_number(); |
|
1830 |
export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size; |
|
1831 |
||
1832 |
export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size |
|
1833 |
- UT_LIST_GET_LEN(buf_pool->LRU) |
|
1834 |
- UT_LIST_GET_LEN(buf_pool->free); |
|
1835 |
export_vars.innodb_page_size = UNIV_PAGE_SIZE; |
|
1836 |
export_vars.innodb_log_waits = srv_log_waits; |
|
1837 |
export_vars.innodb_os_log_written = srv_os_log_written; |
|
1838 |
export_vars.innodb_os_log_fsyncs = fil_n_log_flushes; |
|
1839 |
export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes; |
|
1840 |
export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes; |
|
1841 |
export_vars.innodb_log_write_requests = srv_log_write_requests; |
|
1842 |
export_vars.innodb_log_writes = srv_log_writes; |
|
1843 |
export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written; |
|
1844 |
export_vars.innodb_dblwr_writes = srv_dblwr_writes; |
|
1845 |
export_vars.innodb_pages_created = buf_pool->n_pages_created; |
|
1846 |
export_vars.innodb_pages_read = buf_pool->n_pages_read; |
|
1847 |
export_vars.innodb_pages_written = buf_pool->n_pages_written; |
|
1848 |
export_vars.innodb_row_lock_waits = srv_n_lock_wait_count; |
|
1849 |
export_vars.innodb_row_lock_current_waits |
|
1850 |
= srv_n_lock_wait_current_count; |
|
1851 |
export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000; |
|
1852 |
if (srv_n_lock_wait_count > 0) { |
|
1853 |
export_vars.innodb_row_lock_time_avg = (ulint) |
|
1854 |
(srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count); |
|
1855 |
} else { |
|
1856 |
export_vars.innodb_row_lock_time_avg = 0; |
|
1857 |
}
|
|
1858 |
export_vars.innodb_row_lock_time_max |
|
1859 |
= srv_n_lock_max_wait_time / 1000; |
|
1860 |
export_vars.innodb_rows_read = srv_n_rows_read; |
|
1861 |
export_vars.innodb_rows_inserted = srv_n_rows_inserted; |
|
1862 |
export_vars.innodb_rows_updated = srv_n_rows_updated; |
|
1863 |
export_vars.innodb_rows_deleted = srv_n_rows_deleted; |
|
1864 |
||
1865 |
mutex_exit(&srv_innodb_monitor_mutex); |
|
1866 |
}
|
|
1867 |
||
1868 |
/*************************************************************************
|
|
1869 |
A thread which wakes up threads whose lock wait may have lasted too long.
|
|
1870 |
This also prints the info output by various InnoDB monitors. */
|
|
1871 |
||
1872 |
os_thread_ret_t
|
|
1873 |
srv_lock_timeout_and_monitor_thread( |
|
1874 |
/*================================*/
|
|
1875 |
/* out: a dummy parameter */
|
|
1876 |
void* arg __attribute__((unused))) |
|
1877 |
/* in: a dummy parameter required by
|
|
1878 |
os_thread_create */
|
|
1879 |
{
|
|
1880 |
srv_slot_t* slot; |
|
1881 |
double time_elapsed; |
|
1882 |
time_t current_time; |
|
1883 |
time_t last_table_monitor_time; |
|
1884 |
time_t last_tablespace_monitor_time; |
|
1885 |
time_t last_monitor_time; |
|
1886 |
ibool some_waits; |
|
1887 |
double wait_time; |
|
1888 |
ulint i; |
|
1889 |
||
1890 |
#ifdef UNIV_DEBUG_THREAD_CREATION
|
|
1891 |
fprintf(stderr, "Lock timeout thread starts, id %lu\n", |
|
1892 |
os_thread_pf(os_thread_get_curr_id())); |
|
1893 |
#endif
|
|
1894 |
UT_NOT_USED(arg); |
|
1895 |
srv_last_monitor_time = time(NULL); |
|
1896 |
last_table_monitor_time = time(NULL); |
|
1897 |
last_tablespace_monitor_time = time(NULL); |
|
1898 |
last_monitor_time = time(NULL); |
|
1899 |
loop: |
|
1900 |
srv_lock_timeout_and_monitor_active = TRUE; |
|
1901 |
||
1902 |
/* When someone is waiting for a lock, we wake up every second
|
|
1903 |
and check if a timeout has passed for a lock wait */
|
|
1904 |
||
1905 |
os_thread_sleep(1000000); |
|
1906 |
||
1907 |
/* In case mutex_exit is not a memory barrier, it is
|
|
1908 |
theoretically possible some threads are left waiting though
|
|
1909 |
the semaphore is already released. Wake up those threads: */
|
|
1910 |
||
1911 |
sync_arr_wake_threads_if_sema_free(); |
|
1912 |
||
1913 |
current_time = time(NULL); |
|
1914 |
||
1915 |
time_elapsed = difftime(current_time, last_monitor_time); |
|
1916 |
||
1917 |
if (time_elapsed > 15) { |
|
1918 |
last_monitor_time = time(NULL); |
|
1919 |
||
1920 |
if (srv_print_innodb_monitor) { |
|
1921 |
srv_printf_innodb_monitor(stderr, NULL, NULL); |
|
1922 |
}
|
|
1923 |
||
1924 |
if (srv_innodb_status) { |
|
1925 |
mutex_enter(&srv_monitor_file_mutex); |
|
1926 |
rewind(srv_monitor_file); |
|
1927 |
srv_printf_innodb_monitor(srv_monitor_file, NULL, |
|
1928 |
NULL); |
|
1929 |
os_file_set_eof(srv_monitor_file); |
|
1930 |
mutex_exit(&srv_monitor_file_mutex); |
|
1931 |
}
|
|
1932 |
||
1933 |
if (srv_print_innodb_tablespace_monitor |
|
1934 |
&& difftime(current_time, |
|
1935 |
last_tablespace_monitor_time) > 60) { |
|
1936 |
last_tablespace_monitor_time = time(NULL); |
|
1937 |
||
1938 |
fputs("========================" |
|
1939 |
"========================\n", |
|
1940 |
stderr); |
|
1941 |
||
1942 |
ut_print_timestamp(stderr); |
|
1943 |
||
1944 |
fputs(" INNODB TABLESPACE MONITOR OUTPUT\n" |
|
1945 |
"========================"
|
|
1946 |
"========================\n", |
|
1947 |
stderr); |
|
1948 |
||
1949 |
fsp_print(0); |
|
1950 |
fputs("Validating tablespace\n", stderr); |
|
1951 |
fsp_validate(0); |
|
1952 |
fputs("Validation ok\n" |
|
1953 |
"---------------------------------------\n" |
|
1954 |
"END OF INNODB TABLESPACE MONITOR OUTPUT\n" |
|
1955 |
"=======================================\n", |
|
1956 |
stderr); |
|
1957 |
}
|
|
1958 |
||
1959 |
if (srv_print_innodb_table_monitor |
|
1960 |
&& difftime(current_time, last_table_monitor_time) > 60) { |
|
1961 |
||
1962 |
last_table_monitor_time = time(NULL); |
|
1963 |
||
1964 |
fputs("===========================================\n", |
|
1965 |
stderr); |
|
1966 |
||
1967 |
ut_print_timestamp(stderr); |
|
1968 |
||
1969 |
fputs(" INNODB TABLE MONITOR OUTPUT\n" |
|
1970 |
"===========================================\n", |
|
1971 |
stderr); |
|
1972 |
dict_print(); |
|
1973 |
||
1974 |
fputs("-----------------------------------\n" |
|
1975 |
"END OF INNODB TABLE MONITOR OUTPUT\n" |
|
1976 |
"==================================\n", |
|
1977 |
stderr); |
|
1978 |
}
|
|
1979 |
}
|
|
1980 |
||
1981 |
mutex_enter(&kernel_mutex); |
|
1982 |
||
1983 |
some_waits = FALSE; |
|
1984 |
||
1985 |
/* Check of all slots if a thread is waiting there, and if it
|
|
1986 |
has exceeded the time limit */
|
|
1987 |
||
1988 |
for (i = 0; i < OS_THREAD_MAX_N; i++) { |
|
1989 |
||
1990 |
slot = srv_mysql_table + i; |
|
1991 |
||
1992 |
if (slot->in_use) { |
|
1993 |
some_waits = TRUE; |
|
1994 |
||
1995 |
wait_time = ut_difftime(ut_time(), slot->suspend_time); |
|
1996 |
||
1997 |
if (srv_lock_wait_timeout < 100000000 |
|
1998 |
&& (wait_time > (double) srv_lock_wait_timeout |
|
1999 |
|| wait_time < 0)) { |
|
2000 |
||
2001 |
/* Timeout exceeded or a wrap-around in system
|
|
2002 |
time counter: cancel the lock request queued
|
|
2003 |
by the transaction and release possible
|
|
2004 |
other transactions waiting behind; it is
|
|
2005 |
possible that the lock has already been
|
|
2006 |
granted: in that case do nothing */
|
|
2007 |
||
2008 |
if (thr_get_trx(slot->thr)->wait_lock) { |
|
2009 |
lock_cancel_waiting_and_release( |
|
2010 |
thr_get_trx(slot->thr) |
|
2011 |
->wait_lock); |
|
2012 |
}
|
|
2013 |
}
|
|
2014 |
}
|
|
2015 |
}
|
|
2016 |
||
2017 |
os_event_reset(srv_lock_timeout_thread_event); |
|
2018 |
||
2019 |
mutex_exit(&kernel_mutex); |
|
2020 |
||
2021 |
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { |
|
2022 |
goto exit_func; |
|
2023 |
}
|
|
2024 |
||
2025 |
if (some_waits || srv_print_innodb_monitor |
|
2026 |
|| srv_print_innodb_lock_monitor |
|
2027 |
|| srv_print_innodb_tablespace_monitor |
|
2028 |
|| srv_print_innodb_table_monitor) { |
|
2029 |
goto loop; |
|
2030 |
}
|
|
2031 |
||
2032 |
/* No one was waiting for a lock and no monitor was active:
|
|
2033 |
suspend this thread */
|
|
2034 |
||
2035 |
srv_lock_timeout_and_monitor_active = FALSE; |
|
2036 |
||
2037 |
#if 0
|
|
2038 |
/* The following synchronisation is disabled, since
|
|
2039 |
the InnoDB monitor output is to be updated every 15 seconds. */
|
|
2040 |
os_event_wait(srv_lock_timeout_thread_event);
|
|
2041 |
#endif
|
|
2042 |
goto loop; |
|
2043 |
||
2044 |
exit_func: |
|
2045 |
srv_lock_timeout_and_monitor_active = FALSE; |
|
2046 |
||
2047 |
/* We count the number of threads in os_thread_exit(). A created
|
|
2048 |
thread should always use that to exit and not use return() to exit. */
|
|
2049 |
||
2050 |
os_thread_exit(NULL); |
|
2051 |
||
2052 |
OS_THREAD_DUMMY_RETURN; |
|
2053 |
}
|
|
2054 |
||
2055 |
/*************************************************************************
|
|
2056 |
A thread which prints warnings about semaphore waits which have lasted
|
|
2057 |
too long. These can be used to track bugs which cause hangs. */
|
|
2058 |
||
2059 |
os_thread_ret_t
|
|
2060 |
srv_error_monitor_thread( |
|
2061 |
/*=====================*/
|
|
2062 |
/* out: a dummy parameter */
|
|
2063 |
void* arg __attribute__((unused))) |
|
2064 |
/* in: a dummy parameter required by
|
|
2065 |
os_thread_create */
|
|
2066 |
{
|
|
2067 |
/* number of successive fatal timeouts observed */
|
|
2068 |
ulint fatal_cnt = 0; |
|
2069 |
dulint old_lsn; |
|
2070 |
dulint new_lsn; |
|
2071 |
||
2072 |
old_lsn = srv_start_lsn; |
|
2073 |
||
2074 |
#ifdef UNIV_DEBUG_THREAD_CREATION
|
|
2075 |
fprintf(stderr, "Error monitor thread starts, id %lu\n", |
|
2076 |
os_thread_pf(os_thread_get_curr_id())); |
|
2077 |
#endif
|
|
2078 |
loop: |
|
2079 |
srv_error_monitor_active = TRUE; |
|
2080 |
||
2081 |
/* Try to track a strange bug reported by Harald Fuchs and others,
|
|
2082 |
where the lsn seems to decrease at times */
|
|
2083 |
||
2084 |
new_lsn = log_get_lsn(); |
|
2085 |
||
2086 |
if (ut_dulint_cmp(new_lsn, old_lsn) < 0) { |
|
2087 |
ut_print_timestamp(stderr); |
|
2088 |
fprintf(stderr, |
|
2089 |
" InnoDB: Error: old log sequence number %lu %lu"
|
|
2090 |
" was greater\n" |
|
2091 |
"InnoDB: than the new log sequence number %lu %lu!\n" |
|
2092 |
"InnoDB: Please submit a bug report"
|
|
2093 |
" to http://bugs.mysql.com\n", |
|
2094 |
(ulong) ut_dulint_get_high(old_lsn), |
|
2095 |
(ulong) ut_dulint_get_low(old_lsn), |
|
2096 |
(ulong) ut_dulint_get_high(new_lsn), |
|
2097 |
(ulong) ut_dulint_get_low(new_lsn)); |
|
2098 |
}
|
|
2099 |
||
2100 |
old_lsn = new_lsn; |
|
2101 |
||
2102 |
if (difftime(time(NULL), srv_last_monitor_time) > 60) { |
|
2103 |
/* We referesh InnoDB Monitor values so that averages are
|
|
2104 |
printed from at most 60 last seconds */
|
|
2105 |
||
2106 |
srv_refresh_innodb_monitor_stats(); |
|
2107 |
}
|
|
2108 |
||
2109 |
if (sync_array_print_long_waits()) { |
|
2110 |
fatal_cnt++; |
|
2111 |
if (fatal_cnt > 5) { |
|
2112 |
||
2113 |
fprintf(stderr, |
|
2114 |
"InnoDB: Error: semaphore wait has lasted"
|
|
2115 |
" > %lu seconds\n" |
|
2116 |
"InnoDB: We intentionally crash the server,"
|
|
2117 |
" because it appears to be hung.\n", |
|
2118 |
(ulong) srv_fatal_semaphore_wait_threshold); |
|
2119 |
||
2120 |
ut_error; |
|
2121 |
}
|
|
2122 |
} else { |
|
2123 |
fatal_cnt = 0; |
|
2124 |
}
|
|
2125 |
||
2126 |
/* Flush stderr so that a database user gets the output
|
|
2127 |
to possible MySQL error file */
|
|
2128 |
||
2129 |
fflush(stderr); |
|
2130 |
||
2131 |
os_thread_sleep(2000000); |
|
2132 |
||
2133 |
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) { |
|
2134 |
||
2135 |
goto loop; |
|
2136 |
}
|
|
2137 |
||
2138 |
srv_error_monitor_active = FALSE; |
|
2139 |
||
2140 |
/* We count the number of threads in os_thread_exit(). A created
|
|
2141 |
thread should always use that to exit and not use return() to exit. */
|
|
2142 |
||
2143 |
os_thread_exit(NULL); |
|
2144 |
||
2145 |
OS_THREAD_DUMMY_RETURN; |
|
2146 |
}
|
|
2147 |
||
2148 |
/***********************************************************************
|
|
2149 |
Tells the InnoDB server that there has been activity in the database
|
|
2150 |
and wakes up the master thread if it is suspended (not sleeping). Used
|
|
2151 |
in the MySQL interface. Note that there is a small chance that the master
|
|
2152 |
thread stays suspended (we do not protect our operation with the kernel
|
|
2153 |
mutex, for performace reasons). */
|
|
2154 |
||
2155 |
void
|
|
2156 |
srv_active_wake_master_thread(void) |
|
2157 |
/*===============================*/
|
|
2158 |
{
|
|
2159 |
srv_activity_count++; |
|
2160 |
||
2161 |
if (srv_n_threads_active[SRV_MASTER] == 0) { |
|
2162 |
||
2163 |
mutex_enter(&kernel_mutex); |
|
2164 |
||
2165 |
srv_release_threads(SRV_MASTER, 1); |
|
2166 |
||
2167 |
mutex_exit(&kernel_mutex); |
|
2168 |
}
|
|
2169 |
}
|
|
2170 |
||
2171 |
/***********************************************************************
|
|
2172 |
Wakes up the master thread if it is suspended or being suspended. */
|
|
2173 |
||
2174 |
void
|
|
2175 |
srv_wake_master_thread(void) |
|
2176 |
/*========================*/
|
|
2177 |
{
|
|
2178 |
srv_activity_count++; |
|
2179 |
||
2180 |
mutex_enter(&kernel_mutex); |
|
2181 |
||
2182 |
srv_release_threads(SRV_MASTER, 1); |
|
2183 |
||
2184 |
mutex_exit(&kernel_mutex); |
|
2185 |
}
|
|
2186 |
||
2187 |
/*************************************************************************
|
|
2188 |
The master thread controlling the server. */
|
|
2189 |
||
2190 |
os_thread_ret_t
|
|
2191 |
srv_master_thread( |
|
2192 |
/*==============*/
|
|
2193 |
/* out: a dummy parameter */
|
|
2194 |
void* arg __attribute__((unused))) |
|
2195 |
/* in: a dummy parameter required by
|
|
2196 |
os_thread_create */
|
|
2197 |
{
|
|
2198 |
os_event_t event; |
|
2199 |
time_t last_flush_time; |
|
2200 |
time_t current_time; |
|
2201 |
ulint old_activity_count; |
|
2202 |
ulint n_pages_purged; |
|
2203 |
ulint n_bytes_merged; |
|
2204 |
ulint n_pages_flushed; |
|
2205 |
ulint n_bytes_archived; |
|
2206 |
ulint n_tables_to_drop; |
|
2207 |
ulint n_ios; |
|
2208 |
ulint n_ios_old; |
|
2209 |
ulint n_ios_very_old; |
|
2210 |
ulint n_pend_ios; |
|
2211 |
ibool skip_sleep = FALSE; |
|
2212 |
ulint i; |
|
2213 |
||
2214 |
#ifdef UNIV_DEBUG_THREAD_CREATION
|
|
2215 |
fprintf(stderr, "Master thread starts, id %lu\n", |
|
2216 |
os_thread_pf(os_thread_get_curr_id())); |
|
2217 |
#endif
|
|
2218 |
srv_main_thread_process_no = os_proc_get_number(); |
|
2219 |
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); |
|
2220 |
||
2221 |
srv_table_reserve_slot(SRV_MASTER); |
|
2222 |
||
2223 |
mutex_enter(&kernel_mutex); |
|
2224 |
||
2225 |
srv_n_threads_active[SRV_MASTER]++; |
|
2226 |
||
2227 |
mutex_exit(&kernel_mutex); |
|
2228 |
||
2229 |
loop: |
|
2230 |
/*****************************************************************/
|
|
2231 |
/* ---- When there is database activity by users, we cycle in this
|
|
2232 |
loop */
|
|
2233 |
||
2234 |
srv_main_thread_op_info = "reserving kernel mutex"; |
|
2235 |
||
2236 |
n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read |
|
2237 |
+ buf_pool->n_pages_written; |
|
2238 |
mutex_enter(&kernel_mutex); |
|
2239 |
||
2240 |
/* Store the user activity counter at the start of this loop */
|
|
2241 |
old_activity_count = srv_activity_count; |
|
2242 |
||
2243 |
mutex_exit(&kernel_mutex); |
|
2244 |
||
2245 |
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) { |
|
2246 |
||
2247 |
goto suspend_thread; |
|
2248 |
}
|
|
2249 |
||
2250 |
/* ---- We run the following loop approximately once per second
|
|
2251 |
when there is database activity */
|
|
2252 |
||
2253 |
skip_sleep = FALSE; |
|
2254 |
||
2255 |
for (i = 0; i < 10; i++) { |
|
2256 |
n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read |
|
2257 |
+ buf_pool->n_pages_written; |
|
2258 |
srv_main_thread_op_info = "sleeping"; |
|
2259 |
||
2260 |
if (!skip_sleep) { |
|
2261 |
||
2262 |
os_thread_sleep(1000000); |
|
2263 |
}
|
|
2264 |
||
2265 |
skip_sleep = FALSE; |
|
2266 |
||
2267 |
/* ALTER TABLE in MySQL requires on Unix that the table handler
|
|
2268 |
can drop tables lazily after there no longer are SELECT
|
|
2269 |
queries to them. */
|
|
2270 |
||
2271 |
srv_main_thread_op_info = "doing background drop tables"; |
|
2272 |
||
2273 |
row_drop_tables_for_mysql_in_background(); |
|
2274 |
||
2275 |
srv_main_thread_op_info = ""; |
|
2276 |
||
2277 |
if (srv_fast_shutdown && srv_shutdown_state > 0) { |
|
2278 |
||
2279 |
goto background_loop; |
|
2280 |
}
|
|
2281 |
||
2282 |
/* We flush the log once in a second even if no commit
|
|
2283 |
is issued or the we have specified in my.cnf no flush
|
|
2284 |
at transaction commit */
|
|
2285 |
||
2286 |
srv_main_thread_op_info = "flushing log"; |
|
2287 |
log_buffer_flush_to_disk(); |
|
2288 |
||
2289 |
srv_main_thread_op_info = "making checkpoint"; |
|
2290 |
log_free_check(); |
|
2291 |
||
2292 |
/* If there were less than 5 i/os during the
|
|
2293 |
one second sleep, we assume that there is free
|
|
2294 |
disk i/o capacity available, and it makes sense to
|
|
2295 |
do an insert buffer merge. */
|
|
2296 |
||
2297 |
n_pend_ios = buf_get_n_pending_ios() |
|
2298 |
+ log_sys->n_pending_writes; |
|
2299 |
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read |
|
2300 |
+ buf_pool->n_pages_written; |
|
2301 |
if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) { |
|
2302 |
srv_main_thread_op_info = "doing insert buffer merge"; |
|
2303 |
ibuf_contract_for_n_pages( |
|
2304 |
TRUE, srv_insert_buffer_batch_size / 4); |
|
2305 |
||
2306 |
srv_main_thread_op_info = "flushing log"; |
|
2307 |
||
2308 |
log_buffer_flush_to_disk(); |
|
2309 |
}
|
|
2310 |
||
2311 |
if (UNIV_UNLIKELY(buf_get_modified_ratio_pct() |
|
2312 |
> srv_max_buf_pool_modified_pct)) { |
|
2313 |
||
2314 |
/* Try to keep the number of modified pages in the
|
|
2315 |
buffer pool under the limit wished by the user */
|
|
2316 |
||
2317 |
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, |
|
2318 |
ut_dulint_max); |
|
2319 |
||
2320 |
/* If we had to do the flush, it may have taken
|
|
2321 |
even more than 1 second, and also, there may be more
|
|
2322 |
to flush. Do not sleep 1 second during the next
|
|
2323 |
iteration of this loop. */
|
|
2324 |
||
2325 |
skip_sleep = TRUE; |
|
2326 |
}
|
|
2327 |
||
2328 |
if (srv_activity_count == old_activity_count) { |
|
2329 |
||
2330 |
/* There is no user activity at the moment, go to
|
|
2331 |
the background loop */
|
|
2332 |
||
2333 |
goto background_loop; |
|
2334 |
}
|
|
2335 |
}
|
|
2336 |
||
2337 |
/* ---- We perform the following code approximately once per
|
|
2338 |
10 seconds when there is database activity */
|
|
2339 |
||
2340 |
#ifdef MEM_PERIODIC_CHECK
|
|
2341 |
/* Check magic numbers of every allocated mem block once in 10
|
|
2342 |
seconds */
|
|
2343 |
mem_validate_all_blocks(); |
|
2344 |
#endif
|
|
2345 |
/* If there were less than 200 i/os during the 10 second period,
|
|
2346 |
we assume that there is free disk i/o capacity available, and it
|
|
2347 |
makes sense to flush 100 pages. */
|
|
2348 |
||
2349 |
n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; |
|
2350 |
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read |
|
2351 |
+ buf_pool->n_pages_written; |
|
2352 |
if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { |
|
2353 |
||
2354 |
srv_main_thread_op_info = "flushing buffer pool pages"; |
|
2355 |
buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); |
|
2356 |
||
2357 |
srv_main_thread_op_info = "flushing log"; |
|
2358 |
log_buffer_flush_to_disk(); |
|
2359 |
}
|
|
2360 |
||
2361 |
/* We run a batch of insert buffer merge every 10 seconds,
|
|
2362 |
even if the server were active */
|
|
2363 |
||
2364 |
srv_main_thread_op_info = "doing insert buffer merge"; |
|
2365 |
ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4); |
|
2366 |
||
2367 |
srv_main_thread_op_info = "flushing log"; |
|
2368 |
log_buffer_flush_to_disk(); |
|
2369 |
||
2370 |
/* We run a full purge every 10 seconds, even if the server
|
|
2371 |
were active */
|
|
2372 |
||
2373 |
n_pages_purged = 1; |
|
2374 |
||
2375 |
last_flush_time = time(NULL); |
|
2376 |
||
2377 |
while (n_pages_purged) { |
|
2378 |
||
2379 |
if (srv_fast_shutdown && srv_shutdown_state > 0) { |
|
2380 |
||
2381 |
goto background_loop; |
|
2382 |
}
|
|
2383 |
||
2384 |
srv_main_thread_op_info = "purging"; |
|
2385 |
n_pages_purged = trx_purge(); |
|
2386 |
||
2387 |
current_time = time(NULL); |
|
2388 |
||
2389 |
if (difftime(current_time, last_flush_time) > 1) { |
|
2390 |
srv_main_thread_op_info = "flushing log"; |
|
2391 |
||
2392 |
log_buffer_flush_to_disk(); |
|
2393 |
last_flush_time = current_time; |
|
2394 |
}
|
|
2395 |
}
|
|
2396 |
||
2397 |
srv_main_thread_op_info = "flushing buffer pool pages"; |
|
2398 |
||
2399 |
/* Flush a few oldest pages to make a new checkpoint younger */
|
|
2400 |
||
2401 |
if (buf_get_modified_ratio_pct() > 70) { |
|
2402 |
||
2403 |
/* If there are lots of modified pages in the buffer pool
|
|
2404 |
(> 70 %), we assume we can afford reserving the disk(s) for
|
|
2405 |
the time it requires to flush 100 pages */
|
|
2406 |
||
2407 |
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, |
|
2408 |
ut_dulint_max); |
|
2409 |
} else { |
|
2410 |
/* Otherwise, we only flush a small number of pages so that
|
|
2411 |
we do not unnecessarily use much disk i/o capacity from
|
|
2412 |
other work */
|
|
2413 |
||
2414 |
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, |
|
2415 |
ut_dulint_max); |
|
2416 |
}
|
|
2417 |
||
2418 |
srv_main_thread_op_info = "making checkpoint"; |
|
2419 |
||
2420 |
/* Make a new checkpoint about once in 10 seconds */
|
|
2421 |
||
2422 |
log_checkpoint(TRUE, FALSE); |
|
2423 |
||
2424 |
srv_main_thread_op_info = "reserving kernel mutex"; |
|
2425 |
||
2426 |
mutex_enter(&kernel_mutex); |
|
2427 |
||
2428 |
/* ---- When there is database activity, we jump from here back to
|
|
2429 |
the start of loop */
|
|
2430 |
||
2431 |
if (srv_activity_count != old_activity_count) { |
|
2432 |
mutex_exit(&kernel_mutex); |
|
2433 |
goto loop; |
|
2434 |
}
|
|
2435 |
||
2436 |
mutex_exit(&kernel_mutex); |
|
2437 |
||
2438 |
/* If the database is quiet, we enter the background loop */
|
|
2439 |
||
2440 |
/*****************************************************************/
|
|
2441 |
background_loop: |
|
2442 |
/* ---- In this loop we run background operations when the server
|
|
2443 |
is quiet from user activity. Also in the case of a shutdown, we
|
|
2444 |
loop here, flushing the buffer pool to the data files. */
|
|
2445 |
||
2446 |
/* The server has been quiet for a while: start running background
|
|
2447 |
operations */
|
|
2448 |
||
2449 |
srv_main_thread_op_info = "doing background drop tables"; |
|
2450 |
||
2451 |
n_tables_to_drop = row_drop_tables_for_mysql_in_background(); |
|
2452 |
||
2453 |
if (n_tables_to_drop > 0) { |
|
2454 |
/* Do not monopolize the CPU even if there are tables waiting
|
|
2455 |
in the background drop queue. (It is essentially a bug if
|
|
2456 |
MySQL tries to drop a table while there are still open handles
|
|
2457 |
to it and we had to put it to the background drop queue.) */
|
|
2458 |
||
2459 |
os_thread_sleep(100000); |
|
2460 |
}
|
|
2461 |
||
2462 |
srv_main_thread_op_info = "purging"; |
|
2463 |
||
2464 |
/* Run a full purge */
|
|
2465 |
||
2466 |
n_pages_purged = 1; |
|
2467 |
||
2468 |
last_flush_time = time(NULL); |
|
2469 |
||
2470 |
while (n_pages_purged) { |
|
2471 |
if (srv_fast_shutdown && srv_shutdown_state > 0) { |
|
2472 |
||
2473 |
break; |
|
2474 |
}
|
|
2475 |
||
2476 |
srv_main_thread_op_info = "purging"; |
|
2477 |
n_pages_purged = trx_purge(); |
|
2478 |
||
2479 |
current_time = time(NULL); |
|
2480 |
||
2481 |
if (difftime(current_time, last_flush_time) > 1) { |
|
2482 |
srv_main_thread_op_info = "flushing log"; |
|
2483 |
||
2484 |
log_buffer_flush_to_disk(); |
|
2485 |
last_flush_time = current_time; |
|
2486 |
}
|
|
2487 |
}
|
|
2488 |
||
2489 |
srv_main_thread_op_info = "reserving kernel mutex"; |
|
2490 |
||
2491 |
mutex_enter(&kernel_mutex); |
|
2492 |
if (srv_activity_count != old_activity_count) { |
|
2493 |
mutex_exit(&kernel_mutex); |
|
2494 |
goto loop; |
|
2495 |
}
|
|
2496 |
mutex_exit(&kernel_mutex); |
|
2497 |
||
2498 |
srv_main_thread_op_info = "doing insert buffer merge"; |
|
2499 |
||
2500 |
if (srv_fast_shutdown && srv_shutdown_state > 0) { |
|
2501 |
n_bytes_merged = 0; |
|
2502 |
} else { |
|
2503 |
n_bytes_merged = ibuf_contract_for_n_pages( |
|
2504 |
TRUE, srv_insert_buffer_batch_size); |
|
2505 |
}
|
|
2506 |
||
2507 |
srv_main_thread_op_info = "reserving kernel mutex"; |
|
2508 |
||
2509 |
mutex_enter(&kernel_mutex); |
|
2510 |
if (srv_activity_count != old_activity_count) { |
|
2511 |
mutex_exit(&kernel_mutex); |
|
2512 |
goto loop; |
|
2513 |
}
|
|
2514 |
mutex_exit(&kernel_mutex); |
|
2515 |
||
2516 |
flush_loop: |
|
2517 |
srv_main_thread_op_info = "flushing buffer pool pages"; |
|
2518 |
||
2519 |
if (srv_fast_shutdown < 2) { |
|
2520 |
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, |
|
2521 |
ut_dulint_max); |
|
2522 |
} else { |
|
2523 |
/* In the fastest shutdown we do not flush the buffer pool
|
|
2524 |
to data files: we set n_pages_flushed to 0 artificially. */
|
|
2525 |
||
2526 |
n_pages_flushed = 0; |
|
2527 |
}
|
|
2528 |
||
2529 |
srv_main_thread_op_info = "reserving kernel mutex"; |
|
2530 |
||
2531 |
mutex_enter(&kernel_mutex); |
|
2532 |
if (srv_activity_count != old_activity_count) { |
|
2533 |
mutex_exit(&kernel_mutex); |
|
2534 |
goto loop; |
|
2535 |
}
|
|
2536 |
mutex_exit(&kernel_mutex); |
|
2537 |
||
2538 |
srv_main_thread_op_info = "waiting for buffer pool flush to end"; |
|
2539 |
buf_flush_wait_batch_end(BUF_FLUSH_LIST); |
|
2540 |
||
2541 |
srv_main_thread_op_info = "flushing log"; |
|
2542 |
||
2543 |
log_buffer_flush_to_disk(); |
|
2544 |
||
2545 |
srv_main_thread_op_info = "making checkpoint"; |
|
2546 |
||
2547 |
log_checkpoint(TRUE, FALSE); |
|
2548 |
||
2549 |
if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) { |
|
2550 |
||
2551 |
/* Try to keep the number of modified pages in the
|
|
2552 |
buffer pool under the limit wished by the user */
|
|
2553 |
||
2554 |
goto flush_loop; |
|
2555 |
}
|
|
2556 |
||
2557 |
srv_main_thread_op_info = "reserving kernel mutex"; |
|
2558 |
||
2559 |
mutex_enter(&kernel_mutex); |
|
2560 |
if (srv_activity_count != old_activity_count) { |
|
2561 |
mutex_exit(&kernel_mutex); |
|
2562 |
goto loop; |
|
2563 |
}
|
|
2564 |
mutex_exit(&kernel_mutex); |
|
2565 |
/*
|
|
2566 |
srv_main_thread_op_info = "archiving log (if log archive is on)";
|
|
2567 |
||
2568 |
log_archive_do(FALSE, &n_bytes_archived);
|
|
2569 |
*/
|
|
2570 |
n_bytes_archived = 0; |
|
2571 |
||
2572 |
/* Keep looping in the background loop if still work to do */
|
|
2573 |
||
2574 |
if (srv_fast_shutdown && srv_shutdown_state > 0) { |
|
2575 |
if (n_tables_to_drop + n_pages_flushed |
|
2576 |
+ n_bytes_archived != 0) { |
|
2577 |
||
2578 |
/* If we are doing a fast shutdown (= the default)
|
|
2579 |
we do not do purge or insert buffer merge. But we
|
|
2580 |
flush the buffer pool completely to disk.
|
|
2581 |
In a 'very fast' shutdown we do not flush the buffer
|
|
2582 |
pool to data files: we have set n_pages_flushed to
|
|
2583 |
0 artificially. */
|
|
2584 |
||
2585 |
goto background_loop; |
|
2586 |
}
|
|
2587 |
} else if (n_tables_to_drop |
|
2588 |
+ n_pages_purged + n_bytes_merged + n_pages_flushed |
|
2589 |
+ n_bytes_archived != 0) { |
|
2590 |
/* In a 'slow' shutdown we run purge and the insert buffer
|
|
2591 |
merge to completion */
|
|
2592 |
||
2593 |
goto background_loop; |
|
2594 |
}
|
|
2595 |
||
2596 |
/* There is no work for background operations either: suspend
|
|
2597 |
master thread to wait for more server activity */
|
|
2598 |
||
2599 |
suspend_thread: |
|
2600 |
srv_main_thread_op_info = "suspending"; |
|
2601 |
||
2602 |
mutex_enter(&kernel_mutex); |
|
2603 |
||
2604 |
if (row_get_background_drop_list_len_low() > 0) { |
|
2605 |
mutex_exit(&kernel_mutex); |
|
2606 |
||
2607 |
goto loop; |
|
2608 |
}
|
|
2609 |
||
2610 |
event = srv_suspend_thread(); |
|
2611 |
||
2612 |
mutex_exit(&kernel_mutex); |
|
2613 |
||
2614 |
/* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
|
|
2615 |
waits for database activity to die down when converting < 4.1.x
|
|
2616 |
databases, and relies on this string being exactly as it is. InnoDB
|
|
2617 |
manual also mentions this string in several places. */
|
|
2618 |
srv_main_thread_op_info = "waiting for server activity"; |
|
2619 |
||
2620 |
os_event_wait(event); |
|
2621 |
||
2622 |
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { |
|
2623 |
/* This is only extra safety, the thread should exit
|
|
2624 |
already when the event wait ends */
|
|
2625 |
||
2626 |
os_thread_exit(NULL); |
|
2627 |
}
|
|
2628 |
||
2629 |
/* When there is user activity, InnoDB will set the event and the
|
|
2630 |
main thread goes back to loop. */
|
|
2631 |
||
2632 |
goto loop; |
|
2633 |
||
2634 |
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ |
|
2635 |
}
|
|
2636 |
#endif /* !UNIV_HOTBACKUP */ |