~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/************************************************************************
2
Starts the InnoDB database server
3
4
(c) 1996-2000 Innobase Oy
5
6
Created 2/16/1996 Heikki Tuuri
7
*************************************************************************/
8
9
#include "os0proc.h"
10
#include "sync0sync.h"
11
#include "ut0mem.h"
12
#include "mem0mem.h"
13
#include "mem0pool.h"
14
#include "data0data.h"
15
#include "data0type.h"
16
#include "dict0dict.h"
17
#include "buf0buf.h"
18
#include "buf0flu.h"
19
#include "buf0rea.h"
20
#include "os0file.h"
21
#include "os0thread.h"
22
#include "fil0fil.h"
23
#include "fsp0fsp.h"
24
#include "rem0rec.h"
25
#include "rem0cmp.h"
26
#include "mtr0mtr.h"
27
#include "log0log.h"
28
#include "log0recv.h"
29
#include "page0page.h"
30
#include "page0cur.h"
31
#include "trx0trx.h"
32
#include "dict0boot.h"
33
#include "dict0load.h"
34
#include "trx0sys.h"
35
#include "dict0crea.h"
36
#include "btr0btr.h"
37
#include "btr0pcur.h"
38
#include "btr0cur.h"
39
#include "btr0sea.h"
40
#include "rem0rec.h"
41
#include "srv0srv.h"
42
#include "que0que.h"
43
#include "usr0sess.h"
44
#include "lock0lock.h"
45
#include "trx0roll.h"
46
#include "trx0purge.h"
47
#include "row0ins.h"
48
#include "row0sel.h"
49
#include "row0upd.h"
50
#include "row0row.h"
51
#include "row0mysql.h"
52
#include "lock0lock.h"
53
#include "ibuf0ibuf.h"
54
#include "pars0pars.h"
55
#include "btr0sea.h"
56
#include "srv0start.h"
57
#include "que0que.h"
58
59
/* Log sequence number immediately after startup */
60
dulint		srv_start_lsn;
61
/* Log sequence number at shutdown */
62
dulint		srv_shutdown_lsn;
63
64
#ifdef HAVE_DARWIN_THREADS
65
# include <sys/utsname.h>
66
ibool		srv_have_fullfsync = FALSE;
67
#endif
68
69
ibool		srv_start_raw_disk_in_use = FALSE;
70
71
ulint		srv_sizeof_trx_t_in_ha_innodb_cc;
72
73
ibool		srv_startup_is_before_trx_rollback_phase = FALSE;
74
ibool		srv_is_being_started = FALSE;
75
#ifndef UNIV_HOTBACKUP
76
static ibool	srv_start_has_been_called = FALSE;
77
static ibool	srv_was_started = FALSE;
78
#endif /* !UNIV_HOTBACKUP */
79
80
/* At a shutdown the value first climbs to SRV_SHUTDOWN_CLEANUP
81
and then to SRV_SHUTDOWN_LAST_PHASE */
82
ulint		srv_shutdown_state = 0;
83
84
#ifndef UNIV_HOTBACKUP
85
static os_file_t	files[1000];
86
87
static mutex_t		ios_mutex;
88
static ulint		ios;
89
90
static ulint		n[SRV_MAX_N_IO_THREADS + 5];
91
static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 5];
92
93
/* We use this mutex to test the return value of pthread_mutex_trylock
94
   on successful locking. HP-UX does NOT return 0, though Linux et al do. */
95
static os_fast_mutex_t	srv_os_test_mutex;
96
97
/* Name of srv_monitor_file */
98
static char*	srv_monitor_file_name;
99
#endif /* !UNIV_HOTBACKUP */
100
101
#define SRV_N_PENDING_IOS_PER_THREAD	OS_AIO_N_PENDING_IOS_PER_THREAD
102
#define SRV_MAX_N_PENDING_SYNC_IOS	100
103
104
105
/* Avoid warnings when using purify */
106
107
#ifdef HAVE_purify
108
static int inno_bcmp(register const char *s1, register const char *s2,
109
	register uint len)
110
{
111
	while ((len-- != 0) && (*s1++ == *s2++))
112
		;
113
114
	return(len + 1);
115
}
116
#define memcmp(A,B,C) inno_bcmp((A),(B),(C))
117
#endif
118
119
static
120
char*
121
srv_parse_megabytes(
122
/*================*/
123
			/* out: next character in string */
124
	char*	str,	/* in: string containing a quantity in bytes */
125
	ulint*	megs)	/* out: the number in megabytes */
126
{
127
	char*	endp;
128
	ulint	size;
129
130
	size = strtoul(str, &endp, 10);
131
132
	str = endp;
133
134
	switch (*str) {
135
	case 'G': case 'g':
136
		size *= 1024;
137
		/* fall through */
138
	case 'M': case 'm':
139
		str++;
140
		break;
141
	default:
142
		size /= 1024 * 1024;
143
		break;
144
	}
145
146
	*megs = size;
147
	return(str);
148
}
149
150
/*************************************************************************
151
Reads the data files and their sizes from a character string given in
152
the .cnf file. */
153
154
ibool
155
srv_parse_data_file_paths_and_sizes(
156
/*================================*/
157
					/* out: TRUE if ok, FALSE if parsing
158
					error */
159
	char*	str,			/* in: the data file path string */
160
	char***	data_file_names,	/* out, own: array of data file
161
					names */
162
	ulint**	data_file_sizes,	/* out, own: array of data file sizes
163
					in megabytes */
164
	ulint**	data_file_is_raw_partition,/* out, own: array of flags
165
					showing which data files are raw
166
					partitions */
167
	ulint*	n_data_files,		/* out: number of data files */
168
	ibool*	is_auto_extending,	/* out: TRUE if the last data file is
169
					auto-extending */
170
	ulint*	max_auto_extend_size)	/* out: max auto extend size for the
171
					last file if specified, 0 if not */
172
{
173
	char*	input_str;
174
	char*	path;
175
	ulint	size;
176
	ulint	i	= 0;
177
178
	*is_auto_extending = FALSE;
179
	*max_auto_extend_size = 0;
180
181
	input_str = str;
182
183
	/* First calculate the number of data files and check syntax:
184
	path:size[M | G];path:size[M | G]... . Note that a Windows path may
185
	contain a drive name and a ':'. */
186
187
	while (*str != '\0') {
188
		path = str;
189
190
		while ((*str != ':' && *str != '\0')
191
		       || (*str == ':'
192
			   && (*(str + 1) == '\\' || *(str + 1) == '/'
193
			       || *(str + 1) == ':'))) {
194
			str++;
195
		}
196
197
		if (*str == '\0') {
198
			return(FALSE);
199
		}
200
201
		str++;
202
203
		str = srv_parse_megabytes(str, &size);
204
205
		if (0 == memcmp(str, ":autoextend",
206
				(sizeof ":autoextend") - 1)) {
207
208
			str += (sizeof ":autoextend") - 1;
209
210
			if (0 == memcmp(str, ":max:",
211
					(sizeof ":max:") - 1)) {
212
213
				str += (sizeof ":max:") - 1;
214
215
				str = srv_parse_megabytes(str, &size);
216
			}
217
218
			if (*str != '\0') {
219
220
				return(FALSE);
221
			}
222
		}
223
224
		if (strlen(str) >= 6
225
		    && *str == 'n'
226
		    && *(str + 1) == 'e'
227
		    && *(str + 2) == 'w') {
228
			str += 3;
229
		}
230
231
		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
232
			str += 3;
233
		}
234
235
		if (size == 0) {
236
			return(FALSE);
237
		}
238
239
		i++;
240
241
		if (*str == ';') {
242
			str++;
243
		} else if (*str != '\0') {
244
245
			return(FALSE);
246
		}
247
	}
248
249
	if (i == 0) {
250
		/* If innodb_data_file_path was defined it must contain
251
		at least one data file definition */
252
253
		return(FALSE);
254
	}
255
256
	*data_file_names = (char**)ut_malloc(i * sizeof(void*));
257
	*data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint));
258
	*data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint));
259
260
	*n_data_files = i;
261
262
	/* Then store the actual values to our arrays */
263
264
	str = input_str;
265
	i = 0;
266
267
	while (*str != '\0') {
268
		path = str;
269
270
		/* Note that we must step over the ':' in a Windows path;
271
		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
272
		a Windows raw partition may have a specification like
273
		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
274
275
		while ((*str != ':' && *str != '\0')
276
		       || (*str == ':'
277
			   && (*(str + 1) == '\\' || *(str + 1) == '/'
278
			       || *(str + 1) == ':'))) {
279
			str++;
280
		}
281
282
		if (*str == ':') {
283
			/* Make path a null-terminated string */
284
			*str = '\0';
285
			str++;
286
		}
287
288
		str = srv_parse_megabytes(str, &size);
289
290
		(*data_file_names)[i] = path;
291
		(*data_file_sizes)[i] = size;
292
293
		if (0 == memcmp(str, ":autoextend",
294
				(sizeof ":autoextend") - 1)) {
295
296
			*is_auto_extending = TRUE;
297
298
			str += (sizeof ":autoextend") - 1;
299
300
			if (0 == memcmp(str, ":max:", (sizeof ":max:") - 1)) {
301
302
				str += (sizeof ":max:") - 1;
303
304
				str = srv_parse_megabytes(
305
					str, max_auto_extend_size);
306
			}
307
308
			if (*str != '\0') {
309
310
				return(FALSE);
311
			}
312
		}
313
314
		(*data_file_is_raw_partition)[i] = 0;
315
316
		if (strlen(str) >= 6
317
		    && *str == 'n'
318
		    && *(str + 1) == 'e'
319
		    && *(str + 2) == 'w') {
320
			str += 3;
321
			(*data_file_is_raw_partition)[i] = SRV_NEW_RAW;
322
		}
323
324
		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
325
			str += 3;
326
327
			if ((*data_file_is_raw_partition)[i] == 0) {
328
				(*data_file_is_raw_partition)[i] = SRV_OLD_RAW;
329
			}
330
		}
331
332
		i++;
333
334
		if (*str == ';') {
335
			str++;
336
		}
337
	}
338
339
	return(TRUE);
340
}
341
342
/*************************************************************************
343
Reads log group home directories from a character string given in
344
the .cnf file. */
345
346
ibool
347
srv_parse_log_group_home_dirs(
348
/*==========================*/
349
					/* out: TRUE if ok, FALSE if parsing
350
					error */
351
	char*	str,			/* in: character string */
352
	char***	log_group_home_dirs)	/* out, own: log group home dirs */
353
{
354
	char*	input_str;
355
	char*	path;
356
	ulint	i	= 0;
357
358
	input_str = str;
359
360
	/* First calculate the number of directories and check syntax:
361
	path;path;... */
362
363
	while (*str != '\0') {
364
		path = str;
365
366
		while (*str != ';' && *str != '\0') {
367
			str++;
368
		}
369
370
		i++;
371
372
		if (*str == ';') {
373
			str++;
374
		} else if (*str != '\0') {
375
376
			return(FALSE);
377
		}
378
	}
379
380
	if (i != 1) {
381
		/* If innodb_log_group_home_dir was defined it must
382
		contain exactly one path definition under current MySQL */
383
384
		return(FALSE);
385
	}
386
387
	*log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*));
388
389
	/* Then store the actual values to our array */
390
391
	str = input_str;
392
	i = 0;
393
394
	while (*str != '\0') {
395
		path = str;
396
397
		while (*str != ';' && *str != '\0') {
398
			str++;
399
		}
400
401
		if (*str == ';') {
402
			*str = '\0';
403
			str++;
404
		}
405
406
		(*log_group_home_dirs)[i] = path;
407
408
		i++;
409
	}
410
411
	return(TRUE);
412
}
413
414
#ifndef UNIV_HOTBACKUP
415
/************************************************************************
416
I/o-handler thread function. */
417
static
418
419
os_thread_ret_t
420
io_handler_thread(
421
/*==============*/
422
	void*	arg)
423
{
424
	ulint	segment;
425
	ulint	i;
426
427
	segment = *((ulint*)arg);
428
429
#ifdef UNIV_DEBUG_THREAD_CREATION
430
	fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
431
		os_thread_pf(os_thread_get_curr_id()));
432
#endif
433
	for (i = 0;; i++) {
434
		fil_aio_wait(segment);
435
436
		mutex_enter(&ios_mutex);
437
		ios++;
438
		mutex_exit(&ios_mutex);
439
	}
440
441
	/* We count the number of threads in os_thread_exit(). A created
442
	thread should always use that to exit and not use return() to exit.
443
	The thread actually never comes here because it is exited in an
444
	os_event_wait(). */
445
446
	os_thread_exit(NULL);
447
448
	OS_THREAD_DUMMY_RETURN;
449
}
450
#endif /* !UNIV_HOTBACKUP */
451
452
#ifdef __WIN__
453
#define SRV_PATH_SEPARATOR	'\\'
454
#else
455
#define SRV_PATH_SEPARATOR	'/'
456
#endif
457
458
/*************************************************************************
459
Normalizes a directory path for Windows: converts slashes to backslashes. */
460
461
void
462
srv_normalize_path_for_win(
463
/*=======================*/
464
	char*	str __attribute__((unused)))	/* in/out: null-terminated
465
						character string */
466
{
467
#ifdef __WIN__
468
	for (; *str; str++) {
469
470
		if (*str == '/') {
471
			*str = '\\';
472
		}
473
	}
474
#endif
475
}
476
477
/*************************************************************************
478
Adds a slash or a backslash to the end of a string if it is missing
479
and the string is not empty. */
480
481
char*
482
srv_add_path_separator_if_needed(
483
/*=============================*/
484
			/* out: string which has the separator if the
485
			string is not empty */
486
	char*	str)	/* in: null-terminated character string */
487
{
488
	char*	out_str;
489
	ulint	len	= ut_strlen(str);
490
491
	if (len == 0 || str[len - 1] == SRV_PATH_SEPARATOR) {
492
493
		return(str);
494
	}
495
496
	out_str = ut_malloc(len + 2);
497
	memcpy(out_str, str, len);
498
	out_str[len] = SRV_PATH_SEPARATOR;
499
	out_str[len + 1] = 0;
500
501
	return(out_str);
502
}
503
504
#ifndef UNIV_HOTBACKUP
505
/*************************************************************************
506
Calculates the low 32 bits when a file size which is given as a number
507
database pages is converted to the number of bytes. */
508
static
509
ulint
510
srv_calc_low32(
511
/*===========*/
512
				/* out: low 32 bytes of file size when
513
				expressed in bytes */
514
	ulint	file_size)	/* in: file size in database pages */
515
{
516
	return(0xFFFFFFFFUL & (file_size << UNIV_PAGE_SIZE_SHIFT));
517
}
518
519
/*************************************************************************
520
Calculates the high 32 bits when a file size which is given as a number
521
database pages is converted to the number of bytes. */
522
static
523
ulint
524
srv_calc_high32(
525
/*============*/
526
				/* out: high 32 bytes of file size when
527
				expressed in bytes */
528
	ulint	file_size)	/* in: file size in database pages */
529
{
530
	return(file_size >> (32 - UNIV_PAGE_SIZE_SHIFT));
531
}
532
533
/*************************************************************************
534
Creates or opens the log files and closes them. */
535
static
536
ulint
537
open_or_create_log_file(
538
/*====================*/
539
					/* out: DB_SUCCESS or error code */
540
	ibool	create_new_db,		/* in: TRUE if we should create a
541
					new database */
542
	ibool*	log_file_created,	/* out: TRUE if new log file
543
					created */
544
	ibool	log_file_has_been_opened,/* in: TRUE if a log file has been
545
					opened before: then it is an error
546
					to try to create another log file */
547
	ulint	k,			/* in: log group number */
548
	ulint	i)			/* in: log file number in group */
549
{
550
	ibool	ret;
551
	ulint	size;
552
	ulint	size_high;
553
	char	name[10000];
554
555
	UT_NOT_USED(create_new_db);
556
557
	*log_file_created = FALSE;
558
559
	srv_normalize_path_for_win(srv_log_group_home_dirs[k]);
560
	srv_log_group_home_dirs[k] = srv_add_path_separator_if_needed(
561
		srv_log_group_home_dirs[k]);
562
563
	ut_a(strlen(srv_log_group_home_dirs[k])
564
	     < (sizeof name) - 10 - sizeof "ib_logfile");
565
	sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k],
566
		"ib_logfile", (ulong) i);
567
568
	files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL,
569
				  OS_LOG_FILE, &ret);
570
	if (ret == FALSE) {
571
		if (os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS
572
#ifdef UNIV_AIX
573
		    /* AIX 5.1 after security patch ML7 may have errno set
574
		    to 0 here, which causes our function to return 100;
575
		    work around that AIX problem */
576
		    && os_file_get_last_error(FALSE) != 100
577
#endif
578
		    ) {
579
			fprintf(stderr,
580
				"InnoDB: Error in creating"
581
				" or opening %s\n", name);
582
583
			return(DB_ERROR);
584
		}
585
586
		files[i] = os_file_create(name, OS_FILE_OPEN, OS_FILE_AIO,
587
					  OS_LOG_FILE, &ret);
588
		if (!ret) {
589
			fprintf(stderr,
590
				"InnoDB: Error in opening %s\n", name);
591
592
			return(DB_ERROR);
593
		}
594
595
		ret = os_file_get_size(files[i], &size, &size_high);
596
		ut_a(ret);
597
598
		if (size != srv_calc_low32(srv_log_file_size)
599
		    || size_high != srv_calc_high32(srv_log_file_size)) {
600
601
			fprintf(stderr,
602
				"InnoDB: Error: log file %s is"
603
				" of different size %lu %lu bytes\n"
604
				"InnoDB: than specified in the .cnf"
605
				" file %lu %lu bytes!\n",
606
				name, (ulong) size_high, (ulong) size,
607
				(ulong) srv_calc_high32(srv_log_file_size),
608
				(ulong) srv_calc_low32(srv_log_file_size));
609
610
			return(DB_ERROR);
611
		}
612
	} else {
613
		*log_file_created = TRUE;
614
615
		ut_print_timestamp(stderr);
616
617
		fprintf(stderr,
618
			"  InnoDB: Log file %s did not exist:"
619
			" new to be created\n",
620
			name);
621
		if (log_file_has_been_opened) {
622
623
			return(DB_ERROR);
624
		}
625
626
		fprintf(stderr, "InnoDB: Setting log file %s size to %lu MB\n",
627
			name, (ulong) srv_log_file_size
628
			>> (20 - UNIV_PAGE_SIZE_SHIFT));
629
630
		fprintf(stderr,
631
			"InnoDB: Database physically writes the file"
632
			" full: wait...\n");
633
634
		ret = os_file_set_size(name, files[i],
635
				       srv_calc_low32(srv_log_file_size),
636
				       srv_calc_high32(srv_log_file_size));
637
		if (!ret) {
638
			fprintf(stderr,
639
				"InnoDB: Error in creating %s:"
640
				" probably out of disk space\n",
641
				name);
642
643
			return(DB_ERROR);
644
		}
645
	}
646
647
	ret = os_file_close(files[i]);
648
	ut_a(ret);
649
650
	if (i == 0) {
651
		/* Create in memory the file space object
652
		which is for this log group */
653
654
		fil_space_create(name,
655
				 2 * k + SRV_LOG_SPACE_FIRST_ID, FIL_LOG);
656
	}
657
658
	ut_a(fil_validate());
659
660
	fil_node_create(name, srv_log_file_size,
661
			2 * k + SRV_LOG_SPACE_FIRST_ID, FALSE);
662
#ifdef UNIV_LOG_ARCHIVE
663
	/* If this is the first log group, create the file space object
664
	for archived logs.
665
	Under MySQL, no archiving ever done. */
666
667
	if (k == 0 && i == 0) {
668
		arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
669
670
		fil_space_create("arch_log_space", arch_space_id, FIL_LOG);
671
	} else {
672
		arch_space_id = ULINT_UNDEFINED;
673
	}
674
#endif /* UNIV_LOG_ARCHIVE */
675
	if (i == 0) {
676
		log_group_init(k, srv_n_log_files,
677
			       srv_log_file_size * UNIV_PAGE_SIZE,
678
			       2 * k + SRV_LOG_SPACE_FIRST_ID,
679
			       SRV_LOG_SPACE_FIRST_ID + 1); /* dummy arch
680
							    space id */
681
	}
682
683
	return(DB_SUCCESS);
684
}
685
686
/*************************************************************************
687
Creates or opens database data files and closes them. */
688
static
689
ulint
690
open_or_create_data_files(
691
/*======================*/
692
				/* out: DB_SUCCESS or error code */
693
	ibool*	create_new_db,	/* out: TRUE if new database should be
694
								created */
695
#ifdef UNIV_LOG_ARCHIVE
696
	ulint*	min_arch_log_no,/* out: min of archived log numbers in data
697
				files */
698
	ulint*	max_arch_log_no,/* out: */
699
#endif /* UNIV_LOG_ARCHIVE */
700
	dulint*	min_flushed_lsn,/* out: min of flushed lsn values in data
701
				files */
702
	dulint*	max_flushed_lsn,/* out: */
703
	ulint*	sum_of_new_sizes)/* out: sum of sizes of the new files added */
704
{
705
	ibool	ret;
706
	ulint	i;
707
	ibool	one_opened	= FALSE;
708
	ibool	one_created	= FALSE;
709
	ulint	size;
710
	ulint	size_high;
711
	ulint	rounded_size_pages;
712
	char	name[10000];
713
714
	if (srv_n_data_files >= 1000) {
715
		fprintf(stderr, "InnoDB: can only have < 1000 data files\n"
716
			"InnoDB: you have defined %lu\n",
717
			(ulong) srv_n_data_files);
718
		return(DB_ERROR);
719
	}
720
721
	*sum_of_new_sizes = 0;
722
723
	*create_new_db = FALSE;
724
725
	srv_normalize_path_for_win(srv_data_home);
726
	srv_data_home = srv_add_path_separator_if_needed(srv_data_home);
727
728
	for (i = 0; i < srv_n_data_files; i++) {
729
		srv_normalize_path_for_win(srv_data_file_names[i]);
730
731
		ut_a(strlen(srv_data_home) + strlen(srv_data_file_names[i])
732
		     < (sizeof name) - 1);
733
		sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]);
734
735
		if (srv_data_file_is_raw_partition[i] == 0) {
736
737
			/* First we try to create the file: if it already
738
			exists, ret will get value FALSE */
739
740
			files[i] = os_file_create(name, OS_FILE_CREATE,
741
						  OS_FILE_NORMAL,
742
						  OS_DATA_FILE, &ret);
743
744
			if (ret == FALSE && os_file_get_last_error(FALSE)
745
			    != OS_FILE_ALREADY_EXISTS
746
#ifdef UNIV_AIX
747
			    /* AIX 5.1 after security patch ML7 may have
748
			    errno set to 0 here, which causes our function
749
			    to return 100; work around that AIX problem */
750
			    && os_file_get_last_error(FALSE) != 100
751
#endif
752
			    ) {
753
				fprintf(stderr,
754
					"InnoDB: Error in creating"
755
					" or opening %s\n",
756
					name);
757
758
				return(DB_ERROR);
759
			}
760
		} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
761
			/* The partition is opened, not created; then it is
762
			written over */
763
764
			srv_start_raw_disk_in_use = TRUE;
765
			srv_created_new_raw = TRUE;
766
767
			files[i] = os_file_create(name, OS_FILE_OPEN_RAW,
768
						  OS_FILE_NORMAL,
769
						  OS_DATA_FILE, &ret);
770
			if (!ret) {
771
				fprintf(stderr,
772
					"InnoDB: Error in opening %s\n", name);
773
774
				return(DB_ERROR);
775
			}
776
		} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
777
			srv_start_raw_disk_in_use = TRUE;
778
779
			ret = FALSE;
780
		} else {
781
			ut_a(0);
782
		}
783
784
		if (ret == FALSE) {
785
			/* We open the data file */
786
787
			if (one_created) {
788
				fprintf(stderr,
789
					"InnoDB: Error: data files can only"
790
					" be added at the end\n");
791
				fprintf(stderr,
792
					"InnoDB: of a tablespace, but"
793
					" data file %s existed beforehand.\n",
794
					name);
795
				return(DB_ERROR);
796
			}
797
798
			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
799
				files[i] = os_file_create(
800
					name, OS_FILE_OPEN_RAW,
801
					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
802
			} else if (i == 0) {
803
				files[i] = os_file_create(
804
					name, OS_FILE_OPEN_RETRY,
805
					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
806
			} else {
807
				files[i] = os_file_create(
808
					name, OS_FILE_OPEN, OS_FILE_NORMAL,
809
					OS_DATA_FILE, &ret);
810
			}
811
812
			if (!ret) {
813
				fprintf(stderr,
814
					"InnoDB: Error in opening %s\n", name);
815
				os_file_get_last_error(TRUE);
816
817
				return(DB_ERROR);
818
			}
819
820
			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
821
822
				goto skip_size_check;
823
			}
824
825
			ret = os_file_get_size(files[i], &size, &size_high);
826
			ut_a(ret);
827
			/* Round size downward to megabytes */
828
829
			rounded_size_pages
830
				= (size / (1024 * 1024) + 4096 * size_high)
831
					<< (20 - UNIV_PAGE_SIZE_SHIFT);
832
833
			if (i == srv_n_data_files - 1
834
			    && srv_auto_extend_last_data_file) {
835
836
				if (srv_data_file_sizes[i] > rounded_size_pages
837
				    || (srv_last_file_size_max > 0
838
					&& srv_last_file_size_max
839
					< rounded_size_pages)) {
840
841
					fprintf(stderr,
842
						"InnoDB: Error: auto-extending"
843
						" data file %s is"
844
						" of a different size\n"
845
						"InnoDB: %lu pages (rounded"
846
						" down to MB) than specified"
847
						" in the .cnf file:\n"
848
						"InnoDB: initial %lu pages,"
849
						" max %lu (relevant if"
850
						" non-zero) pages!\n",
851
						name,
852
						(ulong) rounded_size_pages,
853
						(ulong) srv_data_file_sizes[i],
854
						(ulong)
855
						srv_last_file_size_max);
856
857
					return(DB_ERROR);
858
				}
859
860
				srv_data_file_sizes[i] = rounded_size_pages;
861
			}
862
863
			if (rounded_size_pages != srv_data_file_sizes[i]) {
864
865
				fprintf(stderr,
866
					"InnoDB: Error: data file %s"
867
					" is of a different size\n"
868
					"InnoDB: %lu pages"
869
					" (rounded down to MB)\n"
870
					"InnoDB: than specified"
871
					" in the .cnf file %lu pages!\n",
872
					name,
873
					(ulong) rounded_size_pages,
874
					(ulong) srv_data_file_sizes[i]);
875
876
				return(DB_ERROR);
877
			}
878
skip_size_check:
879
			fil_read_flushed_lsn_and_arch_log_no(
880
				files[i], one_opened,
881
#ifdef UNIV_LOG_ARCHIVE
882
				min_arch_log_no, max_arch_log_no,
883
#endif /* UNIV_LOG_ARCHIVE */
884
				min_flushed_lsn, max_flushed_lsn);
885
			one_opened = TRUE;
886
		} else {
887
			/* We created the data file and now write it full of
888
			zeros */
889
890
			one_created = TRUE;
891
892
			if (i > 0) {
893
				ut_print_timestamp(stderr);
894
				fprintf(stderr,
895
					"  InnoDB: Data file %s did not"
896
					" exist: new to be created\n",
897
					name);
898
			} else {
899
				fprintf(stderr,
900
					"InnoDB: The first specified"
901
					" data file %s did not exist:\n"
902
					"InnoDB: a new database"
903
					" to be created!\n", name);
904
				*create_new_db = TRUE;
905
			}
906
907
			ut_print_timestamp(stderr);
908
			fprintf(stderr,
909
				"  InnoDB: Setting file %s size to %lu MB\n",
910
				name,
911
				(ulong) (srv_data_file_sizes[i]
912
					 >> (20 - UNIV_PAGE_SIZE_SHIFT)));
913
914
			fprintf(stderr,
915
				"InnoDB: Database physically writes the"
916
				" file full: wait...\n");
917
918
			ret = os_file_set_size(
919
				name, files[i],
920
				srv_calc_low32(srv_data_file_sizes[i]),
921
				srv_calc_high32(srv_data_file_sizes[i]));
922
923
			if (!ret) {
924
				fprintf(stderr,
925
					"InnoDB: Error in creating %s:"
926
					" probably out of disk space\n", name);
927
928
				return(DB_ERROR);
929
			}
930
931
			*sum_of_new_sizes = *sum_of_new_sizes
932
				+ srv_data_file_sizes[i];
933
		}
934
935
		ret = os_file_close(files[i]);
936
		ut_a(ret);
937
938
		if (i == 0) {
939
			fil_space_create(name, 0, FIL_TABLESPACE);
940
		}
941
942
		ut_a(fil_validate());
943
944
		if (srv_data_file_is_raw_partition[i]) {
945
946
			fil_node_create(name, srv_data_file_sizes[i], 0, TRUE);
947
		} else {
948
			fil_node_create(name, srv_data_file_sizes[i], 0,
949
					FALSE);
950
		}
951
	}
952
953
	ios = 0;
954
955
	mutex_create(&ios_mutex, SYNC_NO_ORDER_CHECK);
956
957
	return(DB_SUCCESS);
958
}
959
960
/********************************************************************
961
Starts InnoDB and creates a new database if database files
962
are not found and the user wants. Server parameters are
963
read from a file of name "srv_init" in the ib_home directory. */
964
965
int
966
innobase_start_or_create_for_mysql(void)
967
/*====================================*/
968
				/* out: DB_SUCCESS or error code */
969
{
970
	buf_pool_t*	ret;
971
	ibool	create_new_db;
972
	ibool	log_file_created;
973
	ibool	log_created	= FALSE;
974
	ibool	log_opened	= FALSE;
975
	dulint	min_flushed_lsn;
976
	dulint	max_flushed_lsn;
977
#ifdef UNIV_LOG_ARCHIVE
978
	ulint	min_arch_log_no;
979
	ulint	max_arch_log_no;
980
#endif /* UNIV_LOG_ARCHIVE */
981
	ulint	sum_of_new_sizes;
982
	ulint	sum_of_data_file_sizes;
983
	ulint	tablespace_size_in_header;
984
	ulint	err;
985
	ulint	i;
986
	ibool	srv_file_per_table_original_value  = srv_file_per_table;
987
	mtr_t	mtr;
988
#ifdef HAVE_DARWIN_THREADS
989
# ifdef F_FULLFSYNC
990
	/* This executable has been compiled on Mac OS X 10.3 or later.
991
	Assume that F_FULLFSYNC is available at run-time. */
992
	srv_have_fullfsync = TRUE;
993
# else /* F_FULLFSYNC */
994
	/* This executable has been compiled on Mac OS X 10.2
995
	or earlier.  Determine if the executable is running
996
	on Mac OS X 10.3 or later. */
997
	struct utsname utsname;
998
	if (uname(&utsname)) {
999
		fputs("InnoDB: cannot determine Mac OS X version!\n", stderr);
1000
	} else {
1001
		srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
1002
	}
1003
	if (!srv_have_fullfsync) {
1004
		fputs("InnoDB: On Mac OS X, fsync() may be"
1005
		      " broken on internal drives,\n"
1006
		      "InnoDB: making transactions unsafe!\n", stderr);
1007
	}
1008
# endif /* F_FULLFSYNC */
1009
#endif /* HAVE_DARWIN_THREADS */
1010
1011
	if (sizeof(ulint) != sizeof(void*)) {
1012
		fprintf(stderr,
1013
			"InnoDB: Error: size of InnoDB's ulint is %lu,"
1014
			" but size of void* is %lu.\n"
1015
			"InnoDB: The sizes should be the same"
1016
			" so that on a 64-bit platform you can\n"
1017
			"InnoDB: allocate more than 4 GB of memory.",
1018
			(ulong)sizeof(ulint), (ulong)sizeof(void*));
1019
	}
1020
1021
	srv_file_per_table = FALSE; /* system tables are created in tablespace
1022
				    0 */
1023
#ifdef UNIV_DEBUG
1024
	fprintf(stderr,
1025
		"InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
1026
#endif
1027
1028
#ifdef UNIV_IBUF_DEBUG
1029
	fprintf(stderr,
1030
		"InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n"
1031
		"InnoDB: Crash recovery will fail with UNIV_IBUF_DEBUG\n");
1032
#endif
1033
1034
#ifdef UNIV_SYNC_DEBUG
1035
	fprintf(stderr,
1036
		"InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
1037
#endif
1038
1039
#ifdef UNIV_SEARCH_DEBUG
1040
	fprintf(stderr,
1041
		"InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
1042
#endif
1043
1044
#ifdef UNIV_MEM_DEBUG
1045
	fprintf(stderr,
1046
		"InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
1047
#endif
1048
1049
#ifdef UNIV_SIMULATE_AWE
1050
	fprintf(stderr,
1051
		"InnoDB: !!!!!!!! UNIV_SIMULATE_AWE switched on !!!!!!!!!\n");
1052
#endif
1053
	if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) {
1054
		fprintf(stderr,
1055
			"InnoDB: Error: trx_t size is %lu in ha_innodb.cc"
1056
			" but %lu in srv0start.c\n"
1057
			"InnoDB: Check that pthread_mutex_t is defined"
1058
			" in the same way in these\n"
1059
			"InnoDB: compilation modules. Cannot continue.\n",
1060
			(ulong)  srv_sizeof_trx_t_in_ha_innodb_cc,
1061
			(ulong) sizeof(trx_t));
1062
		return(DB_ERROR);
1063
	}
1064
1065
	/* Since InnoDB does not currently clean up all its internal data
1066
	structures in MySQL Embedded Server Library server_end(), we
1067
	print an error message if someone tries to start up InnoDB a
1068
	second time during the process lifetime. */
1069
1070
	if (srv_start_has_been_called) {
1071
		fprintf(stderr,
1072
			"InnoDB: Error:startup called second time"
1073
			" during the process lifetime.\n"
1074
			"InnoDB: In the MySQL Embedded Server Library"
1075
			" you cannot call server_init()\n"
1076
			"InnoDB: more than once during"
1077
			" the process lifetime.\n");
1078
	}
1079
1080
	srv_start_has_been_called = TRUE;
1081
1082
#ifdef UNIV_DEBUG
1083
	log_do_write = TRUE;
1084
#endif /* UNIV_DEBUG */
1085
	/*	yydebug = TRUE; */
1086
1087
	srv_is_being_started = TRUE;
1088
	srv_startup_is_before_trx_rollback_phase = TRUE;
1089
	os_aio_use_native_aio = FALSE;
1090
1091
#if !defined(__WIN2000__) && !defined(UNIV_SIMULATE_AWE)
1092
	if (srv_use_awe) {
1093
1094
		fprintf(stderr,
1095
			"InnoDB: Error: You have specified"
1096
			" innodb_buffer_pool_awe_mem_mb\n"
1097
			"InnoDB: in my.cnf, but AWE can only"
1098
			" be used in Windows 2000 and later.\n"
1099
			"InnoDB: To use AWE, InnoDB must"
1100
			" be compiled with __WIN2000__ defined.\n");
1101
1102
		return(DB_ERROR);
1103
	}
1104
#endif
1105
1106
#ifdef __WIN__
1107
	if (os_get_os_version() == OS_WIN95
1108
	    || os_get_os_version() == OS_WIN31
1109
	    || os_get_os_version() == OS_WINNT) {
1110
1111
		/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
1112
		and NT use simulated aio. In NT Windows provides async i/o,
1113
		but when run in conjunction with InnoDB Hot Backup, it seemed
1114
		to corrupt the data files. */
1115
1116
		os_aio_use_native_aio = FALSE;
1117
	} else {
1118
		/* On Win 2000 and XP use async i/o */
1119
		os_aio_use_native_aio = TRUE;
1120
	}
1121
#endif
1122
	if (srv_file_flush_method_str == NULL) {
1123
		/* These are the default options */
1124
1125
		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1126
1127
		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1128
#ifndef __WIN__
1129
	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
1130
		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1131
1132
	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
1133
		srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
1134
1135
	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1136
		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1137
1138
	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1139
		srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1140
1141
	} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
1142
		srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
1143
#else
1144
	} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
1145
		srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
1146
		os_aio_use_native_aio = FALSE;
1147
1148
	} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
1149
		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1150
		os_aio_use_native_aio = FALSE;
1151
1152
	} else if (0 == ut_strcmp(srv_file_flush_method_str,
1153
				  "async_unbuffered")) {
1154
		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1155
#endif
1156
	} else {
1157
		fprintf(stderr,
1158
			"InnoDB: Unrecognized value %s for"
1159
			" innodb_flush_method\n",
1160
			srv_file_flush_method_str);
1161
		return(DB_ERROR);
1162
	}
1163
1164
	/* Note that the call srv_boot() also changes the values of
1165
	srv_pool_size etc. to the units used by InnoDB internally */
1166
1167
	/* Set the maximum number of threads which can wait for a semaphore
1168
	inside InnoDB: this is the 'sync wait array' size, as well as the
1169
	maximum number of threads that can wait in the 'srv_conc array' for
1170
	their time to enter InnoDB. */
1171
1172
#if defined(__NETWARE__)
1173
1174
	/* Create less event semaphores because Win 98/ME had
1175
	difficulty creating 40000 event semaphores.  Comment from
1176
	Novell, Inc.: also, these just take a lot of memory on
1177
	NetWare. */
1178
	srv_max_n_threads = 1000;
1179
#else
1180
	if (srv_pool_size >= 1000 * 1024) {
1181
		/* Here we still have srv_pool_size counted
1182
		in kilobytes (in 4.0 this was in bytes)
1183
		srv_boot() converts the value to
1184
		pages; if buffer pool is less than 1000 MB,
1185
		assume fewer threads. */
1186
		srv_max_n_threads = 50000;
1187
1188
	} else if (srv_pool_size >= 8 * 1024) {
1189
1190
		srv_max_n_threads = 10000;
1191
	} else {
1192
		srv_max_n_threads = 1000;	/* saves several MB of memory,
1193
						especially in 64-bit
1194
						computers */
1195
	}
1196
#endif
1197
	err = srv_boot(); /* This changes srv_pool_size to units of a page */
1198
1199
	if (err != DB_SUCCESS) {
1200
1201
		return((int) err);
1202
	}
1203
1204
	mutex_create(&srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
1205
1206
	if (srv_innodb_status) {
1207
		srv_monitor_file_name = mem_alloc(
1208
			strlen(fil_path_to_mysql_datadir)
1209
			+ 20 + sizeof "/innodb_status.");
1210
		sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
1211
			fil_path_to_mysql_datadir, os_proc_get_number());
1212
		srv_monitor_file = fopen(srv_monitor_file_name, "w+");
1213
		if (!srv_monitor_file) {
1214
			fprintf(stderr, "InnoDB: unable to create %s: %s\n",
1215
				srv_monitor_file_name, strerror(errno));
1216
			return(DB_ERROR);
1217
		}
1218
	} else {
1219
		srv_monitor_file_name = NULL;
1220
		srv_monitor_file = os_file_create_tmpfile();
1221
		if (!srv_monitor_file) {
1222
			return(DB_ERROR);
1223
		}
1224
	}
1225
1226
	mutex_create(&srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
1227
1228
	srv_dict_tmpfile = os_file_create_tmpfile();
1229
	if (!srv_dict_tmpfile) {
1230
		return(DB_ERROR);
1231
	}
1232
1233
	mutex_create(&srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
1234
1235
	srv_misc_tmpfile = os_file_create_tmpfile();
1236
	if (!srv_misc_tmpfile) {
1237
		return(DB_ERROR);
1238
	}
1239
1240
	/* Restrict the maximum number of file i/o threads */
1241
	if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
1242
1243
		srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
1244
	}
1245
1246
	if (!os_aio_use_native_aio) {
1247
		/* In simulated aio we currently have use only for 4 threads */
1248
		srv_n_file_io_threads = 4;
1249
1250
		os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
1251
			    * srv_n_file_io_threads,
1252
			    srv_n_file_io_threads,
1253
			    SRV_MAX_N_PENDING_SYNC_IOS);
1254
	} else {
1255
		os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
1256
			    * srv_n_file_io_threads,
1257
			    srv_n_file_io_threads,
1258
			    SRV_MAX_N_PENDING_SYNC_IOS);
1259
	}
1260
1261
	fil_init(srv_max_n_open_files);
1262
1263
	if (srv_use_awe) {
1264
		fprintf(stderr,
1265
			"InnoDB: Using AWE: Memory window is %lu MB"
1266
			" and AWE memory is %lu MB\n",
1267
			(ulong) (srv_awe_window_size / ((1024 * 1024)
1268
							/ UNIV_PAGE_SIZE)),
1269
			(ulong) (srv_pool_size / ((1024 * 1024)
1270
						  / UNIV_PAGE_SIZE)));
1271
1272
		/* We must disable adaptive hash indexes because they do not
1273
		tolerate remapping of pages in AWE */
1274
1275
		srv_use_adaptive_hash_indexes = FALSE;
1276
		ret = buf_pool_init(srv_pool_size, srv_pool_size,
1277
				    srv_awe_window_size);
1278
	} else {
1279
		ret = buf_pool_init(srv_pool_size, srv_pool_size,
1280
				    srv_pool_size);
1281
	}
1282
1283
	if (ret == NULL) {
1284
		fprintf(stderr,
1285
			"InnoDB: Fatal error: cannot allocate the memory"
1286
			" for the buffer pool\n");
1287
1288
		return(DB_ERROR);
1289
	}
1290
1291
	fsp_init();
1292
	log_init();
1293
1294
	lock_sys_create(srv_lock_table_size);
1295
1296
	/* Create i/o-handler threads: */
1297
1298
	for (i = 0; i < srv_n_file_io_threads; i++) {
1299
		n[i] = i;
1300
1301
		os_thread_create(io_handler_thread, n + i, thread_ids + i);
1302
	}
1303
1304
#ifdef UNIV_LOG_ARCHIVE
1305
	if (0 != ut_strcmp(srv_log_group_home_dirs[0], srv_arch_dir)) {
1306
		fprintf(stderr,
1307
			"InnoDB: Error: you must set the log group"
1308
			" home dir in my.cnf the\n"
1309
			"InnoDB: same as log arch dir.\n");
1310
1311
		return(DB_ERROR);
1312
	}
1313
#endif /* UNIV_LOG_ARCHIVE */
1314
1315
	if (srv_n_log_files * srv_log_file_size >= 262144) {
1316
		fprintf(stderr,
1317
			"InnoDB: Error: combined size of log files"
1318
			" must be < 4 GB\n");
1319
1320
		return(DB_ERROR);
1321
	}
1322
1323
	sum_of_new_sizes = 0;
1324
1325
	for (i = 0; i < srv_n_data_files; i++) {
1326
#ifndef __WIN__
1327
		if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= 262144) {
1328
			fprintf(stderr,
1329
				"InnoDB: Error: file size must be < 4 GB"
1330
				" with this MySQL binary\n"
1331
				"InnoDB: and operating system combination,"
1332
				" in some OS's < 2 GB\n");
1333
1334
			return(DB_ERROR);
1335
		}
1336
#endif
1337
		sum_of_new_sizes += srv_data_file_sizes[i];
1338
	}
1339
1340
	if (sum_of_new_sizes < 640) {
1341
		fprintf(stderr,
1342
			"InnoDB: Error: tablespace size must be"
1343
			" at least 10 MB\n");
1344
1345
		return(DB_ERROR);
1346
	}
1347
1348
	err = open_or_create_data_files(&create_new_db,
1349
#ifdef UNIV_LOG_ARCHIVE
1350
					&min_arch_log_no, &max_arch_log_no,
1351
#endif /* UNIV_LOG_ARCHIVE */
1352
					&min_flushed_lsn, &max_flushed_lsn,
1353
					&sum_of_new_sizes);
1354
	if (err != DB_SUCCESS) {
1355
		fprintf(stderr,
1356
			"InnoDB: Could not open or create data files.\n"
1357
			"InnoDB: If you tried to add new data files,"
1358
			" and it failed here,\n"
1359
			"InnoDB: you should now edit innodb_data_file_path"
1360
			" in my.cnf back\n"
1361
			"InnoDB: to what it was, and remove the"
1362
			" new ibdata files InnoDB created\n"
1363
			"InnoDB: in this failed attempt. InnoDB only wrote"
1364
			" those files full of\n"
1365
			"InnoDB: zeros, but did not yet use them in any way."
1366
			" But be careful: do not\n"
1367
			"InnoDB: remove old data files"
1368
			" which contain your precious data!\n");
1369
1370
		return((int) err);
1371
	}
1372
1373
#ifdef UNIV_LOG_ARCHIVE
1374
	srv_normalize_path_for_win(srv_arch_dir);
1375
	srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
1376
#endif /* UNIV_LOG_ARCHIVE */
1377
1378
	for (i = 0; i < srv_n_log_files; i++) {
1379
		err = open_or_create_log_file(create_new_db, &log_file_created,
1380
					      log_opened, 0, i);
1381
		if (err != DB_SUCCESS) {
1382
1383
			return((int) err);
1384
		}
1385
1386
		if (log_file_created) {
1387
			log_created = TRUE;
1388
		} else {
1389
			log_opened = TRUE;
1390
		}
1391
		if ((log_opened && create_new_db)
1392
		    || (log_opened && log_created)) {
1393
			fprintf(stderr,
1394
				"InnoDB: Error: all log files must be"
1395
				" created at the same time.\n"
1396
				"InnoDB: All log files must be"
1397
				" created also in database creation.\n"
1398
				"InnoDB: If you want bigger or smaller"
1399
				" log files, shut down the\n"
1400
				"InnoDB: database and make sure there"
1401
				" were no errors in shutdown.\n"
1402
				"InnoDB: Then delete the existing log files."
1403
				" Edit the .cnf file\n"
1404
				"InnoDB: and start the database again.\n");
1405
1406
			return(DB_ERROR);
1407
		}
1408
	}
1409
1410
	/* Open all log files and data files in the system tablespace: we
1411
	keep them open until database shutdown */
1412
1413
	fil_open_log_and_system_tablespace_files();
1414
1415
	if (log_created && !create_new_db
1416
#ifdef UNIV_LOG_ARCHIVE
1417
	    && !srv_archive_recovery
1418
#endif /* UNIV_LOG_ARCHIVE */
1419
	    ) {
1420
		if (ut_dulint_cmp(max_flushed_lsn, min_flushed_lsn) != 0
1421
#ifdef UNIV_LOG_ARCHIVE
1422
		    || max_arch_log_no != min_arch_log_no
1423
#endif /* UNIV_LOG_ARCHIVE */
1424
		    ) {
1425
			fprintf(stderr,
1426
				"InnoDB: Cannot initialize created"
1427
				" log files because\n"
1428
				"InnoDB: data files were not in sync"
1429
				" with each other\n"
1430
				"InnoDB: or the data files are corrupt.\n");
1431
1432
			return(DB_ERROR);
1433
		}
1434
1435
		if (ut_dulint_cmp(max_flushed_lsn, ut_dulint_create(0, 1000))
1436
		    < 0) {
1437
			fprintf(stderr,
1438
				"InnoDB: Cannot initialize created"
1439
				" log files because\n"
1440
				"InnoDB: data files are corrupt,"
1441
				" or new data files were\n"
1442
				"InnoDB: created when the database"
1443
				" was started previous\n"
1444
				"InnoDB: time but the database"
1445
				" was not shut down\n"
1446
				"InnoDB: normally after that.\n");
1447
1448
			return(DB_ERROR);
1449
		}
1450
1451
		mutex_enter(&(log_sys->mutex));
1452
1453
#ifdef UNIV_LOG_ARCHIVE
1454
		/* Do not + 1 arch_log_no because we do not use log
1455
		archiving */
1456
		recv_reset_logs(max_flushed_lsn, max_arch_log_no, TRUE);
1457
#else
1458
		recv_reset_logs(max_flushed_lsn, TRUE);
1459
#endif /* UNIV_LOG_ARCHIVE */
1460
1461
		mutex_exit(&(log_sys->mutex));
1462
	}
1463
1464
	if (create_new_db) {
1465
		mtr_start(&mtr);
1466
1467
		fsp_header_init(0, sum_of_new_sizes, &mtr);
1468
1469
		mtr_commit(&mtr);
1470
1471
		trx_sys_create();
1472
		dict_create();
1473
		srv_startup_is_before_trx_rollback_phase = FALSE;
1474
1475
#ifdef UNIV_LOG_ARCHIVE
1476
	} else if (srv_archive_recovery) {
1477
		fprintf(stderr,
1478
			"InnoDB: Starting archive"
1479
			" recovery from a backup...\n");
1480
		err = recv_recovery_from_archive_start(
1481
			min_flushed_lsn, srv_archive_recovery_limit_lsn,
1482
			min_arch_log_no);
1483
		if (err != DB_SUCCESS) {
1484
1485
			return(DB_ERROR);
1486
		}
1487
		/* Since ibuf init is in dict_boot, and ibuf is needed
1488
		in any disk i/o, first call dict_boot */
1489
1490
		dict_boot();
1491
		trx_sys_init_at_db_start();
1492
		srv_startup_is_before_trx_rollback_phase = FALSE;
1493
1494
		/* Initialize the fsp free limit global variable in the log
1495
		system */
1496
		fsp_header_get_free_limit(0);
1497
1498
		recv_recovery_from_archive_finish();
1499
#endif /* UNIV_LOG_ARCHIVE */
1500
	} else {
1501
		/* We always try to do a recovery, even if the database had
1502
		been shut down normally: this is the normal startup path */
1503
1504
		err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
1505
							  ut_dulint_max,
1506
							  min_flushed_lsn,
1507
							  max_flushed_lsn);
1508
		if (err != DB_SUCCESS) {
1509
1510
			return(DB_ERROR);
1511
		}
1512
1513
		/* Since the insert buffer init is in dict_boot, and the
1514
		insert buffer is needed in any disk i/o, first we call
1515
		dict_boot(). Note that trx_sys_init_at_db_start() only needs
1516
		to access space 0, and the insert buffer at this stage already
1517
		works for space 0. */
1518
1519
		dict_boot();
1520
		trx_sys_init_at_db_start();
1521
1522
		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
1523
			/* The following call is necessary for the insert
1524
			buffer to work with multiple tablespaces. We must
1525
			know the mapping between space id's and .ibd file
1526
			names.
1527
1528
			In a crash recovery, we check that the info in data
1529
			dictionary is consistent with what we already know
1530
			about space id's from the call of
1531
			fil_load_single_table_tablespaces().
1532
1533
			In a normal startup, we create the space objects for
1534
			every table in the InnoDB data dictionary that has
1535
			an .ibd file.
1536
1537
			We also determine the maximum tablespace id used.
1538
1539
			TODO: We may have incomplete transactions in the
1540
			data dictionary tables. Does that harm the scanning of
1541
			the data dictionary below? */
1542
1543
			dict_check_tablespaces_and_store_max_id(
1544
				recv_needed_recovery);
1545
		}
1546
1547
		srv_startup_is_before_trx_rollback_phase = FALSE;
1548
1549
		/* Initialize the fsp free limit global variable in the log
1550
		system */
1551
		fsp_header_get_free_limit(0);
1552
1553
		/* recv_recovery_from_checkpoint_finish needs trx lists which
1554
		are initialized in trx_sys_init_at_db_start(). */
1555
1556
		recv_recovery_from_checkpoint_finish();
1557
	}
1558
1559
	if (!create_new_db && sum_of_new_sizes > 0) {
1560
		/* New data file(s) were added */
1561
		mtr_start(&mtr);
1562
1563
		fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
1564
1565
		mtr_commit(&mtr);
1566
1567
		/* Immediately write the log record about increased tablespace
1568
		size to disk, so that it is durable even if mysqld would crash
1569
		quickly */
1570
1571
		log_buffer_flush_to_disk();
1572
	}
1573
1574
#ifdef UNIV_LOG_ARCHIVE
1575
	/* Archiving is always off under MySQL */
1576
	if (!srv_log_archive_on) {
1577
		ut_a(DB_SUCCESS == log_archive_noarchivelog());
1578
	} else {
1579
		mutex_enter(&(log_sys->mutex));
1580
1581
		start_archive = FALSE;
1582
1583
		if (log_sys->archiving_state == LOG_ARCH_OFF) {
1584
			start_archive = TRUE;
1585
		}
1586
1587
		mutex_exit(&(log_sys->mutex));
1588
1589
		if (start_archive) {
1590
			ut_a(DB_SUCCESS == log_archive_archivelog());
1591
		}
1592
	}
1593
#endif /* UNIV_LOG_ARCHIVE */
1594
1595
	/* fprintf(stderr, "Max allowed record size %lu\n",
1596
	page_get_free_space_of_empty() / 2); */
1597
1598
	/* Create the thread which watches the timeouts for lock waits
1599
	and prints InnoDB monitor info */
1600
1601
	os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL,
1602
			 thread_ids + 2 + SRV_MAX_N_IO_THREADS);
1603
1604
	/* Create the thread which warns of long semaphore waits */
1605
	os_thread_create(&srv_error_monitor_thread, NULL,
1606
			 thread_ids + 3 + SRV_MAX_N_IO_THREADS);
1607
	srv_was_started = TRUE;
1608
	srv_is_being_started = FALSE;
1609
1610
	if (trx_doublewrite == NULL) {
1611
		/* Create the doublewrite buffer to a new tablespace */
1612
1613
		trx_sys_create_doublewrite_buf();
1614
	}
1615
1616
	err = dict_create_or_check_foreign_constraint_tables();
1617
1618
	if (err != DB_SUCCESS) {
1619
		return((int)DB_ERROR);
1620
	}
1621
1622
	/* Create the master thread which does purge and other utility
1623
	operations */
1624
1625
	os_thread_create(&srv_master_thread, NULL, thread_ids
1626
			 + (1 + SRV_MAX_N_IO_THREADS));
1627
#ifdef UNIV_DEBUG
1628
	/* buf_debug_prints = TRUE; */
1629
#endif /* UNIV_DEBUG */
1630
	sum_of_data_file_sizes = 0;
1631
1632
	for (i = 0; i < srv_n_data_files; i++) {
1633
		sum_of_data_file_sizes += srv_data_file_sizes[i];
1634
	}
1635
1636
	tablespace_size_in_header = fsp_header_get_tablespace_size(0);
1637
1638
	if (!srv_auto_extend_last_data_file
1639
	    && sum_of_data_file_sizes != tablespace_size_in_header) {
1640
1641
		fprintf(stderr,
1642
			"InnoDB: Error: tablespace size"
1643
			" stored in header is %lu pages, but\n"
1644
			"InnoDB: the sum of data file sizes is %lu pages\n",
1645
			(ulong) tablespace_size_in_header,
1646
			(ulong) sum_of_data_file_sizes);
1647
1648
		if (srv_force_recovery == 0
1649
		    && sum_of_data_file_sizes < tablespace_size_in_header) {
1650
			/* This is a fatal error, the tail of a tablespace is
1651
			missing */
1652
1653
			fprintf(stderr,
1654
				"InnoDB: Cannot start InnoDB."
1655
				" The tail of the system tablespace is\n"
1656
				"InnoDB: missing. Have you edited"
1657
				" innodb_data_file_path in my.cnf in an\n"
1658
				"InnoDB: inappropriate way, removing"
1659
				" ibdata files from there?\n"
1660
				"InnoDB: You can set innodb_force_recovery=1"
1661
				" in my.cnf to force\n"
1662
				"InnoDB: a startup if you are trying"
1663
				" to recover a badly corrupt database.\n");
1664
1665
			return(DB_ERROR);
1666
		}
1667
	}
1668
1669
	if (srv_auto_extend_last_data_file
1670
	    && sum_of_data_file_sizes < tablespace_size_in_header) {
1671
1672
		fprintf(stderr,
1673
			"InnoDB: Error: tablespace size stored in header"
1674
			" is %lu pages, but\n"
1675
			"InnoDB: the sum of data file sizes"
1676
			" is only %lu pages\n",
1677
			(ulong) tablespace_size_in_header,
1678
			(ulong) sum_of_data_file_sizes);
1679
1680
		if (srv_force_recovery == 0) {
1681
1682
			fprintf(stderr,
1683
				"InnoDB: Cannot start InnoDB. The tail of"
1684
				" the system tablespace is\n"
1685
				"InnoDB: missing. Have you edited"
1686
				" innodb_data_file_path in my.cnf in an\n"
1687
				"InnoDB: inappropriate way, removing"
1688
				" ibdata files from there?\n"
1689
				"InnoDB: You can set innodb_force_recovery=1"
1690
				" in my.cnf to force\n"
1691
				"InnoDB: a startup if you are trying to"
1692
				" recover a badly corrupt database.\n");
1693
1694
			return(DB_ERROR);
1695
		}
1696
	}
1697
1698
	/* Check that os_fast_mutexes work as expected */
1699
	os_fast_mutex_init(&srv_os_test_mutex);
1700
1701
	if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
1702
		fprintf(stderr,
1703
			"InnoDB: Error: pthread_mutex_trylock returns"
1704
			" an unexpected value on\n"
1705
			"InnoDB: success! Cannot continue.\n");
1706
		exit(1);
1707
	}
1708
1709
	os_fast_mutex_unlock(&srv_os_test_mutex);
1710
1711
	os_fast_mutex_lock(&srv_os_test_mutex);
1712
1713
	os_fast_mutex_unlock(&srv_os_test_mutex);
1714
1715
	os_fast_mutex_free(&srv_os_test_mutex);
1716
1717
	if (srv_print_verbose_log) {
1718
		ut_print_timestamp(stderr);
1719
		fprintf(stderr,
1720
			"  InnoDB: Started; log sequence number %lu %lu\n",
1721
			(ulong) ut_dulint_get_high(srv_start_lsn),
1722
			(ulong) ut_dulint_get_low(srv_start_lsn));
1723
	}
1724
1725
	if (srv_force_recovery > 0) {
1726
		fprintf(stderr,
1727
			"InnoDB: !!! innodb_force_recovery"
1728
			" is set to %lu !!!\n",
1729
			(ulong) srv_force_recovery);
1730
	}
1731
1732
	fflush(stderr);
1733
1734
	if (trx_doublewrite_must_reset_space_ids) {
1735
		/* Actually, we did not change the undo log format between
1736
		4.0 and 4.1.1, and we would not need to run purge to
1737
		completion. Note also that the purge algorithm in 4.1.1
1738
		can process the the history list again even after a full
1739
		purge, because our algorithm does not cut the end of the
1740
		history list in all cases so that it would become empty
1741
		after a full purge. That mean that we may purge 4.0 type
1742
		undo log even after this phase.
1743
1744
		The insert buffer record format changed between 4.0 and
1745
		4.1.1. It is essential that the insert buffer is emptied
1746
		here! */
1747
1748
		fprintf(stderr,
1749
			"InnoDB: You are upgrading to an"
1750
			" InnoDB version which allows multiple\n"
1751
			"InnoDB: tablespaces. Wait that purge"
1752
			" and insert buffer merge run to\n"
1753
			"InnoDB: completion...\n");
1754
		for (;;) {
1755
			os_thread_sleep(1000000);
1756
1757
			if (0 == strcmp(srv_main_thread_op_info,
1758
					"waiting for server activity")) {
1759
1760
				ut_a(ibuf_is_empty());
1761
1762
				break;
1763
			}
1764
		}
1765
		fprintf(stderr,
1766
			"InnoDB: Full purge and insert buffer merge"
1767
			" completed.\n");
1768
1769
		trx_sys_mark_upgraded_to_multiple_tablespaces();
1770
1771
		fprintf(stderr,
1772
			"InnoDB: You have now successfully upgraded"
1773
			" to the multiple tablespaces\n"
1774
			"InnoDB: format. You should NOT DOWNGRADE"
1775
			" to an earlier version of\n"
1776
			"InnoDB: InnoDB! But if you absolutely need to"
1777
			" downgrade, see\n"
1778
			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
1779
			"multiple-tablespaces.html\n"
1780
			"InnoDB: for instructions.\n");
1781
	}
1782
1783
	if (srv_force_recovery == 0) {
1784
		/* In the insert buffer we may have even bigger tablespace
1785
		id's, because we may have dropped those tablespaces, but
1786
		insert buffer merge has not had time to clean the records from
1787
		the ibuf tree. */
1788
1789
		ibuf_update_max_tablespace_id();
1790
	}
1791
1792
	srv_file_per_table = srv_file_per_table_original_value;
1793
1794
	return((int) DB_SUCCESS);
1795
}
1796
1797
/********************************************************************
1798
Shuts down the InnoDB database. */
1799
1800
int
1801
innobase_shutdown_for_mysql(void)
1802
/*=============================*/
1803
				/* out: DB_SUCCESS or error code */
1804
{
1805
	ulint	i;
1806
#ifdef __NETWARE__
1807
	extern ibool panic_shutdown;
1808
#endif
1809
	if (!srv_was_started) {
1810
		if (srv_is_being_started) {
1811
			ut_print_timestamp(stderr);
1812
			fprintf(stderr,
1813
				"  InnoDB: Warning: shutting down"
1814
				" a not properly started\n"
1815
				"InnoDB: or created database!\n");
1816
		}
1817
1818
		return(DB_SUCCESS);
1819
	}
1820
1821
	/* 1. Flush the buffer pool to disk, write the current lsn to
1822
	the tablespace header(s), and copy all log data to archive.
1823
	The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
1824
	just free data structures after the shutdown. */
1825
1826
1827
	if (srv_fast_shutdown == 2) {
1828
		ut_print_timestamp(stderr);
1829
		fprintf(stderr,
1830
			"  InnoDB: MySQL has requested a very fast shutdown"
1831
			" without flushing "
1832
			"the InnoDB buffer pool to data files."
1833
			" At the next mysqld startup "
1834
			"InnoDB will do a crash recovery!\n");
1835
	}
1836
1837
#ifdef __NETWARE__
1838
	if(!panic_shutdown)
1839
#endif
1840
		logs_empty_and_mark_files_at_shutdown();
1841
1842
	if (srv_conc_n_threads != 0) {
1843
		fprintf(stderr,
1844
			"InnoDB: Warning: query counter shows %ld queries"
1845
			" still\n"
1846
			"InnoDB: inside InnoDB at shutdown\n",
1847
			srv_conc_n_threads);
1848
	}
1849
1850
	/* 2. Make all threads created by InnoDB to exit */
1851
1852
	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
1853
1854
	/* In a 'very fast' shutdown, we do not need to wait for these threads
1855
	to die; all which counts is that we flushed the log; a 'very fast'
1856
	shutdown is essentially a crash. */
1857
1858
	if (srv_fast_shutdown == 2) {
1859
		return(DB_SUCCESS);
1860
	}
1861
1862
	/* All threads end up waiting for certain events. Put those events
1863
	to the signaled state. Then the threads will exit themselves in
1864
	os_thread_event_wait(). */
1865
1866
	for (i = 0; i < 1000; i++) {
1867
		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
1868
		HERE OR EARLIER */
1869
1870
		/* a. Let the lock timeout thread exit */
1871
		os_event_set(srv_lock_timeout_thread_event);
1872
1873
		/* b. srv error monitor thread exits automatically, no need
1874
		to do anything here */
1875
1876
		/* c. We wake the master thread so that it exits */
1877
		srv_wake_master_thread();
1878
1879
		/* d. Exit the i/o threads */
1880
1881
		os_aio_wake_all_threads_at_shutdown();
1882
1883
		os_mutex_enter(os_sync_mutex);
1884
1885
		if (os_thread_count == 0) {
1886
			/* All the threads have exited or are just exiting;
1887
			NOTE that the threads may not have completed their
1888
			exit yet. Should we use pthread_join() to make sure
1889
			they have exited? Now we just sleep 0.1 seconds and
1890
			hope that is enough! */
1891
1892
			os_mutex_exit(os_sync_mutex);
1893
1894
			os_thread_sleep(100000);
1895
1896
			break;
1897
		}
1898
1899
		os_mutex_exit(os_sync_mutex);
1900
1901
		os_thread_sleep(100000);
1902
	}
1903
1904
	if (i == 1000) {
1905
		fprintf(stderr,
1906
			"InnoDB: Warning: %lu threads created by InnoDB"
1907
			" had not exited at shutdown!\n",
1908
			(ulong) os_thread_count);
1909
	}
1910
1911
	if (srv_monitor_file) {
1912
		fclose(srv_monitor_file);
1913
		srv_monitor_file = 0;
1914
		if (srv_monitor_file_name) {
1915
			unlink(srv_monitor_file_name);
1916
			mem_free(srv_monitor_file_name);
1917
		}
1918
	}
1919
	if (srv_dict_tmpfile) {
1920
		fclose(srv_dict_tmpfile);
1921
		srv_dict_tmpfile = 0;
1922
	}
1923
1924
	if (srv_misc_tmpfile) {
1925
		fclose(srv_misc_tmpfile);
1926
		srv_misc_tmpfile = 0;
1927
	}
1928
1929
	mutex_free(&srv_monitor_file_mutex);
1930
	mutex_free(&srv_dict_tmpfile_mutex);
1931
	mutex_free(&srv_misc_tmpfile_mutex);
1932
1933
	/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
1934
	them */
1935
	sync_close();
1936
1937
	/* 4. Free the os_conc_mutex and all os_events and os_mutexes */
1938
1939
	srv_free();
1940
	os_sync_free();
1941
1942
	/* Check that all read views are closed except read view owned
1943
	by a purge. */
1944
1945
	if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
1946
		fprintf(stderr,
1947
			"InnoDB: Error: all read views were not closed"
1948
			" before shutdown:\n"
1949
			"InnoDB: %lu read views open \n",
1950
			UT_LIST_GET_LEN(trx_sys->view_list) - 1);
1951
	}
1952
1953
	/* 5. Free all allocated memory and the os_fast_mutex created in
1954
	ut0mem.c */
1955
1956
	ut_free_all_mem();
1957
1958
	if (os_thread_count != 0
1959
	    || os_event_count != 0
1960
	    || os_mutex_count != 0
1961
	    || os_fast_mutex_count != 0) {
1962
		fprintf(stderr,
1963
			"InnoDB: Warning: some resources were not"
1964
			" cleaned up in shutdown:\n"
1965
			"InnoDB: threads %lu, events %lu,"
1966
			" os_mutexes %lu, os_fast_mutexes %lu\n",
1967
			(ulong) os_thread_count, (ulong) os_event_count,
1968
			(ulong) os_mutex_count, (ulong) os_fast_mutex_count);
1969
	}
1970
1971
	if (dict_foreign_err_file) {
1972
		fclose(dict_foreign_err_file);
1973
	}
1974
	if (lock_latest_err_file) {
1975
		fclose(lock_latest_err_file);
1976
	}
1977
1978
	if (srv_print_verbose_log) {
1979
		ut_print_timestamp(stderr);
1980
		fprintf(stderr,
1981
			"  InnoDB: Shutdown completed;"
1982
			" log sequence number %lu %lu\n",
1983
			(ulong) ut_dulint_get_high(srv_shutdown_lsn),
1984
			(ulong) ut_dulint_get_low(srv_shutdown_lsn));
1985
	}
1986
1987
	return((int) DB_SUCCESS);
1988
}
1989
1990
#ifdef __NETWARE__
1991
void set_panic_flag_for_netware()
1992
{
1993
	extern ibool panic_shutdown;
1994
	panic_shutdown = TRUE;
1995
}
1996
#endif /* __NETWARE__ */
1997
#endif /* !UNIV_HOTBACKUP */