~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/******************************************************
2
The tablespace memory cache
3
4
(c) 1995 Innobase Oy
5
6
Created 10/25/1995 Heikki Tuuri
7
*******************************************************/
8
9
#include "fil0fil.h"
10
11
#include "mem0mem.h"
12
#include "sync0sync.h"
13
#include "hash0hash.h"
14
#include "os0file.h"
15
#include "os0sync.h"
16
#include "mach0data.h"
17
#include "ibuf0ibuf.h"
18
#include "buf0buf.h"
19
#include "buf0flu.h"
20
#include "buf0lru.h"
21
#include "log0log.h"
22
#include "log0recv.h"
23
#include "fsp0fsp.h"
24
#include "srv0srv.h"
25
#include "srv0start.h"
26
#include "mtr0mtr.h"
27
#include "mtr0log.h"
28
#include "dict0dict.h"
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
29
#include "page0zip.h"
1 by brian
clean slate
30
31
32
/*
33
		IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
34
		=============================================
35
36
The tablespace cache is responsible for providing fast read/write access to
37
tablespaces and logs of the database. File creation and deletion is done
38
in other modules which know more of the logic of the operation, however.
39
40
A tablespace consists of a chain of files. The size of the files does not
41
have to be divisible by the database block size, because we may just leave
42
the last incomplete block unused. When a new file is appended to the
43
tablespace, the maximum size of the file is also specified. At the moment,
44
we think that it is best to extend the file to its maximum size already at
45
the creation of the file, because then we can avoid dynamically extending
46
the file when more space is needed for the tablespace.
47
48
A block's position in the tablespace is specified with a 32-bit unsigned
49
integer. The files in the chain are thought to be catenated, and the block
50
corresponding to an address n is the nth block in the catenated file (where
51
the first block is named the 0th block, and the incomplete block fragments
52
at the end of files are not taken into account). A tablespace can be extended
53
by appending a new file at the end of the chain.
54
55
Our tablespace concept is similar to the one of Oracle.
56
57
To acquire more speed in disk transfers, a technique called disk striping is
58
sometimes used. This means that logical block addresses are divided in a
59
round-robin fashion across several disks. Windows NT supports disk striping,
60
so there we do not need to support it in the database. Disk striping is
61
implemented in hardware in RAID disks. We conclude that it is not necessary
62
to implement it in the database. Oracle 7 does not support disk striping,
63
either.
64
65
Another trick used at some database sites is replacing tablespace files by
66
raw disks, that is, the whole physical disk drive, or a partition of it, is
67
opened as a single file, and it is accessed through byte offsets calculated
68
from the start of the disk or the partition. This is recommended in some
69
books on database tuning to achieve more speed in i/o. Using raw disk
70
certainly prevents the OS from fragmenting disk space, but it is not clear
71
if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
72
system + EIDE Conner disk only a negligible difference in speed when reading
73
from a file, versus reading from a raw disk.
74
75
To have fast access to a tablespace or a log file, we put the data structures
76
to a hash table. Each tablespace and log file is given an unique 32-bit
77
identifier.
78
79
Some operating systems do not support many open files at the same time,
80
though NT seems to tolerate at least 900 open files. Therefore, we put the
81
open files in an LRU-list. If we need to open another file, we may close the
82
file at the end of the LRU-list. When an i/o-operation is pending on a file,
83
the file cannot be closed. We take the file nodes with pending i/o-operations
84
out of the LRU-list and keep a count of pending operations. When an operation
85
completes, we decrement the count and return the file node to the LRU-list if
86
the count drops to zero. */
87
88
/* When mysqld is run, the default directory "." is the mysqld datadir,
89
but in the MySQL Embedded Server Library and ibbackup it is not the default
90
directory, and we must set the base file path explicitly */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
91
UNIV_INTERN const char*	fil_path_to_mysql_datadir	= ".";
1 by brian
clean slate
92
93
/* The number of fsyncs done to the log */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
94
UNIV_INTERN ulint	fil_n_log_flushes			= 0;
1 by brian
clean slate
95
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
96
UNIV_INTERN ulint	fil_n_pending_log_flushes		= 0;
97
UNIV_INTERN ulint	fil_n_pending_tablespace_flushes	= 0;
1 by brian
clean slate
98
99
/* Null file address */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
100
UNIV_INTERN fil_addr_t	fil_addr_null = {FIL_NULL, 0};
1 by brian
clean slate
101
102
/* File node of a tablespace or the log data space */
103
struct fil_node_struct {
104
	fil_space_t*	space;	/* backpointer to the space where this node
105
				belongs */
106
	char*		name;	/* path to the file */
107
	ibool		open;	/* TRUE if file open */
108
	os_file_t	handle;	/* OS handle to the file, if file open */
109
	ibool		is_raw_disk;/* TRUE if the 'file' is actually a raw
110
				device or a raw disk partition */
111
	ulint		size;	/* size of the file in database pages, 0 if
112
				not known yet; the possible last incomplete
113
				megabyte may be ignored if space == 0 */
114
	ulint		n_pending;
115
				/* count of pending i/o's on this file;
116
				closing of the file is not allowed if
117
				this is > 0 */
118
	ulint		n_pending_flushes;
119
				/* count of pending flushes on this file;
120
				closing of the file is not allowed if
121
				this is > 0 */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
122
	ib_int64_t	modification_counter;/* when we write to the file we
1 by brian
clean slate
123
				increment this by one */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
124
	ib_int64_t	flush_counter;/* up to what modification_counter value
1 by brian
clean slate
125
				we have flushed the modifications to disk */
126
	UT_LIST_NODE_T(fil_node_t) chain;
127
				/* link field for the file chain */
128
	UT_LIST_NODE_T(fil_node_t) LRU;
129
				/* link field for the LRU list */
130
	ulint		magic_n;
131
};
132
133
#define	FIL_NODE_MAGIC_N	89389
134
135
/* Tablespace or log data space: let us call them by a common name space */
136
struct fil_space_struct {
137
	char*		name;	/* space name = the path to the first file in
138
				it */
139
	ulint		id;	/* space id */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
140
	ib_int64_t	tablespace_version;
1 by brian
clean slate
141
				/* in DISCARD/IMPORT this timestamp is used to
142
				check if we should ignore an insert buffer
143
				merge request for a page because it actually
144
				was for the previous incarnation of the
145
				space */
146
	ibool		mark;	/* this is set to TRUE at database startup if
147
				the space corresponds to a table in the InnoDB
148
				data dictionary; so we can print a warning of
149
				orphaned tablespaces */
150
	ibool		stop_ios;/* TRUE if we want to rename the .ibd file of
151
				tablespace and want to stop temporarily
152
				posting of new i/o requests on the file */
153
	ibool		stop_ibuf_merges;
154
				/* we set this TRUE when we start deleting a
155
				single-table tablespace */
156
	ibool		is_being_deleted;
157
				/* this is set to TRUE when we start
158
				deleting a single-table tablespace and its
159
				file; when this flag is set no further i/o
160
				or flush requests can be placed on this space,
161
				though there may be such requests still being
162
				processed on this space */
163
	ulint		purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
164
	UT_LIST_BASE_NODE_T(fil_node_t) chain;
165
				/* base node for the file chain */
166
	ulint		size;	/* space size in pages; 0 if a single-table
167
				tablespace whose size we do not know yet;
168
				last incomplete megabytes in data files may be
169
				ignored if space == 0 */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
170
	ulint		flags;	/* in: compressed page size
171
				and file format, or 0 */
1 by brian
clean slate
172
	ulint		n_reserved_extents;
173
				/* number of reserved free extents for
174
				ongoing operations like B-tree page split */
175
	ulint		n_pending_flushes; /* this is > 0 when flushing
176
				the tablespace to disk; dropping of the
177
				tablespace is forbidden if this is > 0 */
178
	ulint		n_pending_ibuf_merges;/* this is > 0 when merging
179
				insert buffer entries to a page so that we
180
				may need to access the ibuf bitmap page in the
181
				tablespade: dropping of the tablespace is
182
				forbidden if this is > 0 */
183
	hash_node_t	hash;	/* hash chain node */
184
	hash_node_t	name_hash;/* hash chain the name_hash table */
185
	rw_lock_t	latch;	/* latch protecting the file space storage
186
				allocation */
187
	UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
188
				/* list of spaces with at least one unflushed
189
				file we have written to */
190
	ibool		is_in_unflushed_spaces; /* TRUE if this space is
191
				currently in the list above */
192
	UT_LIST_NODE_T(fil_space_t) space_list;
193
				/* list of all spaces */
194
	ibuf_data_t*	ibuf_data;
195
				/* insert buffer data */
196
	ulint		magic_n;
197
};
198
199
#define	FIL_SPACE_MAGIC_N	89472
200
201
/* The tablespace memory cache; also the totality of logs = the log data space,
202
is stored here; below we talk about tablespaces, but also the ib_logfiles
203
form a 'space' and it is handled here */
204
205
typedef	struct fil_system_struct	fil_system_t;
206
struct fil_system_struct {
207
	mutex_t		mutex;		/* The mutex protecting the cache */
208
	hash_table_t*	spaces;		/* The hash table of spaces in the
209
					system; they are hashed on the space
210
					id */
211
	hash_table_t*	name_hash;	/* hash table based on the space
212
					name */
213
	UT_LIST_BASE_NODE_T(fil_node_t) LRU;
214
					/* base node for the LRU list of the
215
					most recently used open files with no
216
					pending i/o's; if we start an i/o on
217
					the file, we first remove it from this
218
					list, and return it to the start of
219
					the list when the i/o ends;
220
					log files and the system tablespace are
221
					not put to this list: they are opened
222
					after the startup, and kept open until
223
					shutdown */
224
	UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
225
					/* base node for the list of those
226
					tablespaces whose files contain
227
					unflushed writes; those spaces have
228
					at least one file node where
229
					modification_counter > flush_counter */
230
	ulint		n_open;		/* number of files currently open */
231
	ulint		max_n_open;	/* n_open is not allowed to exceed
232
					this */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
233
	ib_int64_t	modification_counter;/* when we write to a file we
1 by brian
clean slate
234
					increment this by one */
235
	ulint		max_assigned_id;/* maximum space id in the existing
236
					tables, or assigned during the time
237
					mysqld has been up; at an InnoDB
238
					startup we scan the data dictionary
239
					and set here the maximum of the
240
					space id's of the tables there */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
241
	ib_int64_t	tablespace_version;
1 by brian
clean slate
242
					/* a counter which is incremented for
243
					every space object memory creation;
244
					every space mem object gets a
245
					'timestamp' from this; in DISCARD/
246
					IMPORT this is used to check if we
247
					should ignore an insert buffer merge
248
					request */
249
	UT_LIST_BASE_NODE_T(fil_space_t) space_list;
250
					/* list of all file spaces */
251
};
252
253
/* The tablespace memory cache. This variable is NULL before the module is
254
initialized. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
255
UNIV_INTERN fil_system_t*	fil_system	= NULL;
1 by brian
clean slate
256
257
258
/************************************************************************
259
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
260
261
Prepares a file node for i/o. Opens the file if it is closed. Updates the
262
pending i/o's field in the node and the system appropriately. Takes the node
263
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
264
mutex. */
265
static
266
void
267
fil_node_prepare_for_io(
268
/*====================*/
269
	fil_node_t*	node,	/* in: file node */
270
	fil_system_t*	system,	/* in: tablespace memory cache */
271
	fil_space_t*	space);	/* in: space */
272
/************************************************************************
273
Updates the data structures when an i/o operation finishes. Updates the
274
pending i/o's field in the node appropriately. */
275
static
276
void
277
fil_node_complete_io(
278
/*=================*/
279
	fil_node_t*	node,	/* in: file node */
280
	fil_system_t*	system,	/* in: tablespace memory cache */
281
	ulint		type);	/* in: OS_FILE_WRITE or OS_FILE_READ; marks
282
				the node as modified if
283
				type == OS_FILE_WRITE */
284
/***********************************************************************
285
Checks if a single-table tablespace for a given table name exists in the
286
tablespace memory cache. */
287
static
288
ulint
289
fil_get_space_id_for_table(
290
/*=======================*/
291
				/* out: space id, ULINT_UNDEFINED if not
292
				found */
293
	const char*	name);	/* in: table name in the standard
294
				'databasename/tablename' format */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
295
/************************************************************************
296
Reads data from a space to a buffer. Remember that the possible incomplete
297
blocks at the end of file are ignored: they are not taken into account when
298
calculating the byte offset within a space. */
299
UNIV_INLINE
300
ulint
301
fil_read(
302
/*=====*/
303
				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
304
				if we are trying to do i/o on a tablespace
305
				which does not exist */
306
	ibool	sync,		/* in: TRUE if synchronous aio is desired */
307
	ulint	space_id,	/* in: space id */
308
	ulint	zip_size,	/* in: compressed page size in bytes;
309
				0 for uncompressed pages */
310
	ulint	block_offset,	/* in: offset in number of blocks */
311
	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
312
				this must be divisible by the OS block size */
313
	ulint	len,		/* in: how many bytes to read; this must not
314
				cross a file boundary; in aio this must be a
315
				block size multiple */
316
	void*	buf,		/* in/out: buffer where to store data read;
317
				in aio this must be appropriately aligned */
318
	void*	message)	/* in: message for aio handler if non-sync
319
				aio used, else ignored */
320
{
321
	return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
322
					  byte_offset, len, buf, message));
323
}
324
325
/************************************************************************
326
Writes data to a space from a buffer. Remember that the possible incomplete
327
blocks at the end of file are ignored: they are not taken into account when
328
calculating the byte offset within a space. */
329
UNIV_INLINE
330
ulint
331
fil_write(
332
/*======*/
333
				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
334
				if we are trying to do i/o on a tablespace
335
				which does not exist */
336
	ibool	sync,		/* in: TRUE if synchronous aio is desired */
337
	ulint	space_id,	/* in: space id */
338
	ulint	zip_size,	/* in: compressed page size in bytes;
339
				0 for uncompressed pages */
340
	ulint	block_offset,	/* in: offset in number of blocks */
341
	ulint	byte_offset,	/* in: remainder of offset in bytes; in aio
342
				this must be divisible by the OS block size */
343
	ulint	len,		/* in: how many bytes to write; this must
344
				not cross a file boundary; in aio this must
345
				be a block size multiple */
346
	void*	buf,		/* in: buffer from which to write; in aio
347
				this must be appropriately aligned */
348
	void*	message)	/* in: message for aio handler if non-sync
349
				aio used, else ignored */
350
{
351
	return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
352
					   byte_offset, len, buf, message));
353
}
354
355
/***********************************************************************
356
Returns the table space by a given id, NULL if not found. */
357
UNIV_INLINE
358
fil_space_t*
359
fil_space_get_by_id(
360
/*================*/
361
	ulint	id)	/* in: space id */
362
{
363
	fil_space_t*	space;
364
365
	ut_ad(mutex_own(&fil_system->mutex));
366
367
	HASH_SEARCH(hash, fil_system->spaces, id,
368
		    fil_space_t*, space, space->id == id);
369
370
	return(space);
371
}
372
373
/***********************************************************************
374
Returns the table space by a given name, NULL if not found. */
375
UNIV_INLINE
376
fil_space_t*
377
fil_space_get_by_name(
378
/*==================*/
379
	const char*	name)	/* in: space name */
380
{
381
	fil_space_t*	space;
382
	ulint		fold;
383
384
	ut_ad(mutex_own(&fil_system->mutex));
385
386
	fold = ut_fold_string(name);
387
388
	HASH_SEARCH(name_hash, fil_system->name_hash, fold,
389
		    fil_space_t*, space, !strcmp(name, space->name));
390
391
	return(space);
392
}
1 by brian
clean slate
393
394
/***********************************************************************
395
Returns the version number of a tablespace, -1 if not found. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
396
UNIV_INTERN
397
ib_int64_t
1 by brian
clean slate
398
fil_space_get_version(
399
/*==================*/
400
			/* out: version number, -1 if the tablespace does not
401
			exist in the memory cache */
402
	ulint	id)	/* in: space id */
403
{
404
	fil_system_t*	system		= fil_system;
405
	fil_space_t*	space;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
406
	ib_int64_t	version		= -1;
1 by brian
clean slate
407
408
	ut_ad(system);
409
410
	mutex_enter(&(system->mutex));
411
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
412
	space = fil_space_get_by_id(id);
1 by brian
clean slate
413
414
	if (space) {
415
		version = space->tablespace_version;
416
	}
417
418
	mutex_exit(&(system->mutex));
419
420
	return(version);
421
}
422
423
/***********************************************************************
424
Returns the latch of a file space. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
425
UNIV_INTERN
1 by brian
clean slate
426
rw_lock_t*
427
fil_space_get_latch(
428
/*================*/
429
			/* out: latch protecting storage allocation */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
430
	ulint	id,	/* in: space id */
431
	ulint*	flags)	/* out: tablespace flags */
1 by brian
clean slate
432
{
433
	fil_system_t*	system		= fil_system;
434
	fil_space_t*	space;
435
436
	ut_ad(system);
437
438
	mutex_enter(&(system->mutex));
439
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
440
	space = fil_space_get_by_id(id);
1 by brian
clean slate
441
442
	ut_a(space);
443
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
444
	if (flags) {
445
		*flags = space->flags;
446
	}
447
1 by brian
clean slate
448
	mutex_exit(&(system->mutex));
449
450
	return(&(space->latch));
451
}
452
453
/***********************************************************************
454
Returns the type of a file space. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
455
UNIV_INTERN
1 by brian
clean slate
456
ulint
457
fil_space_get_type(
458
/*===============*/
459
			/* out: FIL_TABLESPACE or FIL_LOG */
460
	ulint	id)	/* in: space id */
461
{
462
	fil_system_t*	system		= fil_system;
463
	fil_space_t*	space;
464
465
	ut_ad(system);
466
467
	mutex_enter(&(system->mutex));
468
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
469
	space = fil_space_get_by_id(id);
1 by brian
clean slate
470
471
	ut_a(space);
472
473
	mutex_exit(&(system->mutex));
474
475
	return(space->purpose);
476
}
477
478
/***********************************************************************
479
Returns the ibuf data of a file space. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
480
UNIV_INTERN
1 by brian
clean slate
481
ibuf_data_t*
482
fil_space_get_ibuf_data(
483
/*====================*/
484
			/* out: ibuf data for this space */
485
	ulint	id)	/* in: space id */
486
{
487
	fil_system_t*	system		= fil_system;
488
	fil_space_t*	space;
489
490
	ut_ad(system);
491
492
	ut_a(id == 0);
493
494
	mutex_enter(&(system->mutex));
495
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
496
	space = fil_space_get_by_id(id);
1 by brian
clean slate
497
498
	mutex_exit(&(system->mutex));
499
500
	ut_a(space);
501
502
	return(space->ibuf_data);
503
}
504
505
/**************************************************************************
506
Checks if all the file nodes in a space are flushed. The caller must hold
507
the fil_system mutex. */
508
static
509
ibool
510
fil_space_is_flushed(
511
/*=================*/
512
				/* out: TRUE if all are flushed */
513
	fil_space_t*	space)	/* in: space */
514
{
515
	fil_node_t*	node;
516
517
	ut_ad(mutex_own(&(fil_system->mutex)));
518
519
	node = UT_LIST_GET_FIRST(space->chain);
520
521
	while (node) {
522
		if (node->modification_counter > node->flush_counter) {
523
524
			return(FALSE);
525
		}
526
527
		node = UT_LIST_GET_NEXT(chain, node);
528
	}
529
530
	return(TRUE);
531
}
532
533
/***********************************************************************
534
Appends a new file to the chain of files of a space. File must be closed. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
535
UNIV_INTERN
1 by brian
clean slate
536
void
537
fil_node_create(
538
/*============*/
539
	const char*	name,	/* in: file name (file must be closed) */
540
	ulint		size,	/* in: file size in database blocks, rounded
541
				downwards to an integer */
542
	ulint		id,	/* in: space id where to append */
543
	ibool		is_raw)	/* in: TRUE if a raw device or
544
				a raw disk partition */
545
{
546
	fil_system_t*	system	= fil_system;
547
	fil_node_t*	node;
548
	fil_space_t*	space;
549
550
	ut_a(system);
551
	ut_a(name);
552
553
	mutex_enter(&(system->mutex));
554
555
	node = mem_alloc(sizeof(fil_node_t));
556
557
	node->name = mem_strdup(name);
558
	node->open = FALSE;
559
560
	ut_a(!is_raw || srv_start_raw_disk_in_use);
561
562
	node->is_raw_disk = is_raw;
563
	node->size = size;
564
	node->magic_n = FIL_NODE_MAGIC_N;
565
	node->n_pending = 0;
566
	node->n_pending_flushes = 0;
567
568
	node->modification_counter = 0;
569
	node->flush_counter = 0;
570
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
571
	space = fil_space_get_by_id(id);
1 by brian
clean slate
572
573
	if (!space) {
574
		ut_print_timestamp(stderr);
575
		fprintf(stderr,
576
			"  InnoDB: Error: Could not find tablespace %lu for\n"
577
			"InnoDB: file ", (ulong) id);
578
		ut_print_filename(stderr, name);
579
		fputs(" in the tablespace memory cache.\n", stderr);
580
		mem_free(node->name);
581
582
		mem_free(node);
583
584
		mutex_exit(&(system->mutex));
585
586
		return;
587
	}
588
589
	space->size += size;
590
591
	node->space = space;
592
593
	UT_LIST_ADD_LAST(chain, space->chain, node);
594
595
	mutex_exit(&(system->mutex));
596
}
597
598
/************************************************************************
599
Opens a the file of a node of a tablespace. The caller must own the fil_system
600
mutex. */
601
static
602
void
603
fil_node_open_file(
604
/*===============*/
605
	fil_node_t*	node,	/* in: file node */
606
	fil_system_t*	system,	/* in: tablespace memory cache */
607
	fil_space_t*	space)	/* in: space */
608
{
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
609
	ib_int64_t	size_bytes;
1 by brian
clean slate
610
	ulint		size_low;
611
	ulint		size_high;
612
	ibool		ret;
613
	ibool		success;
614
#ifndef UNIV_HOTBACKUP
615
	byte*		buf2;
616
	byte*		page;
617
	ulint		space_id;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
618
	ulint		flags;
1 by brian
clean slate
619
#endif /* !UNIV_HOTBACKUP */
620
621
	ut_ad(mutex_own(&(system->mutex)));
622
	ut_a(node->n_pending == 0);
623
	ut_a(node->open == FALSE);
624
625
	if (node->size == 0) {
626
		/* It must be a single-table tablespace and we do not know the
627
		size of the file yet. First we open the file in the normal
628
		mode, no async I/O here, for simplicity. Then do some checks,
629
		and close the file again.
630
		NOTE that we could not use the simple file read function
631
		os_file_read() in Windows to read from a file opened for
632
		async I/O! */
633
634
		node->handle = os_file_create_simple_no_error_handling(
635
			node->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
636
		if (!success) {
637
			/* The following call prints an error message */
638
			os_file_get_last_error(TRUE);
639
640
			ut_print_timestamp(stderr);
641
642
			fprintf(stderr,
643
				"  InnoDB: Fatal error: cannot open %s\n."
644
				"InnoDB: Have you deleted .ibd files"
645
				" under a running mysqld server?\n",
646
				node->name);
647
			ut_a(0);
648
		}
649
650
		os_file_get_size(node->handle, &size_low, &size_high);
651
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
652
		size_bytes = (((ib_int64_t)size_high) << 32)
653
			+ (ib_int64_t)size_low;
1 by brian
clean slate
654
#ifdef UNIV_HOTBACKUP
655
		node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
656
		/* TODO: adjust to zip_size, like below? */
1 by brian
clean slate
657
#else
658
		ut_a(space->purpose != FIL_LOG);
659
		ut_a(space->id != 0);
660
661
		if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
662
			fprintf(stderr,
663
				"InnoDB: Error: the size of single-table"
664
				" tablespace file %s\n"
665
				"InnoDB: is only %lu %lu,"
666
				" should be at least %lu!\n",
667
				node->name,
668
				(ulong) size_high,
669
				(ulong) size_low,
670
				(ulong) (FIL_IBD_FILE_INITIAL_SIZE
671
					 * UNIV_PAGE_SIZE));
672
673
			ut_a(0);
674
		}
675
676
		/* Read the first page of the tablespace */
677
678
		buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
679
		/* Align the memory for file i/o if we might have O_DIRECT
680
		set */
681
		page = ut_align(buf2, UNIV_PAGE_SIZE);
682
683
		success = os_file_read(node->handle, page, 0, 0,
684
				       UNIV_PAGE_SIZE);
685
		space_id = fsp_header_get_space_id(page);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
686
		flags = fsp_header_get_flags(page);
1 by brian
clean slate
687
688
		ut_free(buf2);
689
690
		/* Close the file now that we have read the space id from it */
691
692
		os_file_close(node->handle);
693
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
694
		if (UNIV_UNLIKELY(space_id != space->id)) {
695
			fprintf(stderr,
696
				"InnoDB: Error: tablespace id is %lu"
697
				" in the data dictionary\n"
698
				"InnoDB: but in file %s it is %lu!\n",
699
				space->id, node->name, space_id);
700
701
			ut_error;
702
		}
703
704
		if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
705
				  || space_id == 0)) {
1 by brian
clean slate
706
			fprintf(stderr,
707
				"InnoDB: Error: tablespace id %lu"
708
				" in file %s is not sensible\n",
709
				(ulong) space_id, node->name);
710
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
711
			ut_error;
1 by brian
clean slate
712
		}
713
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
714
		if (UNIV_UNLIKELY(space->flags != flags)) {
1 by brian
clean slate
715
			fprintf(stderr,
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
716
				"InnoDB: Error: table flags are %lx"
1 by brian
clean slate
717
				" in the data dictionary\n"
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
718
				"InnoDB: but the flags in file %s are %lx!\n",
719
				space->flags, node->name, flags);
720
721
			ut_error;
722
		}
723
724
		if (size_bytes >= 1024 * 1024) {
725
			/* Truncate the size to whole megabytes. */
726
			size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
727
		}
728
729
		if (!(flags & DICT_TF_ZSSIZE_MASK)) {
730
			node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
731
		} else {
1 by brian
clean slate
732
			node->size = (ulint)
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
733
				(size_bytes
734
				 / dict_table_flags_to_zip_size(flags));
1 by brian
clean slate
735
		}
736
#endif
737
		space->size += node->size;
738
	}
739
740
	/* printf("Opening file %s\n", node->name); */
741
742
	/* Open the file for reading and writing, in Windows normally in the
743
	unbuffered async I/O mode, though global variables may make
744
	os_file_create() to fall back to the normal file I/O mode. */
745
746
	if (space->purpose == FIL_LOG) {
747
		node->handle = os_file_create(node->name, OS_FILE_OPEN,
748
					      OS_FILE_AIO, OS_LOG_FILE, &ret);
749
	} else if (node->is_raw_disk) {
750
		node->handle = os_file_create(node->name,
751
					      OS_FILE_OPEN_RAW,
752
					      OS_FILE_AIO, OS_DATA_FILE, &ret);
753
	} else {
754
		node->handle = os_file_create(node->name, OS_FILE_OPEN,
755
					      OS_FILE_AIO, OS_DATA_FILE, &ret);
756
	}
757
758
	ut_a(ret);
759
760
	node->open = TRUE;
761
762
	system->n_open++;
763
764
	if (space->purpose == FIL_TABLESPACE && space->id != 0) {
765
		/* Put the node to the LRU list */
766
		UT_LIST_ADD_FIRST(LRU, system->LRU, node);
767
	}
768
}
769
770
/**************************************************************************
771
Closes a file. */
772
static
773
void
774
fil_node_close_file(
775
/*================*/
776
	fil_node_t*	node,	/* in: file node */
777
	fil_system_t*	system)	/* in: tablespace memory cache */
778
{
779
	ibool	ret;
780
781
	ut_ad(node && system);
782
	ut_ad(mutex_own(&(system->mutex)));
783
	ut_a(node->open);
784
	ut_a(node->n_pending == 0);
785
	ut_a(node->n_pending_flushes == 0);
786
	ut_a(node->modification_counter == node->flush_counter);
787
788
	ret = os_file_close(node->handle);
789
	ut_a(ret);
790
791
	/* printf("Closing file %s\n", node->name); */
792
793
	node->open = FALSE;
794
	ut_a(system->n_open > 0);
795
	system->n_open--;
796
797
	if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
798
		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
799
800
		/* The node is in the LRU list, remove it */
801
		UT_LIST_REMOVE(LRU, system->LRU, node);
802
	}
803
}
804
805
/************************************************************************
806
Tries to close a file in the LRU list. The caller must hold the fil_sys
807
mutex. */
808
static
809
ibool
810
fil_try_to_close_file_in_LRU(
811
/*=========================*/
812
				/* out: TRUE if success, FALSE if should retry
813
				later; since i/o's generally complete in <
814
				100 ms, and as InnoDB writes at most 128 pages
815
				from the buffer pool in a batch, and then
816
				immediately flushes the files, there is a good
817
				chance that the next time we find a suitable
818
				node from the LRU list */
819
	ibool	print_info)	/* in: if TRUE, prints information why it
820
				cannot close a file */
821
{
822
	fil_system_t*	system		= fil_system;
823
	fil_node_t*	node;
824
825
	ut_ad(mutex_own(&(system->mutex)));
826
827
	node = UT_LIST_GET_LAST(system->LRU);
828
829
	if (print_info) {
830
		fprintf(stderr,
831
			"InnoDB: fil_sys open file LRU len %lu\n",
832
			(ulong) UT_LIST_GET_LEN(system->LRU));
833
	}
834
835
	while (node != NULL) {
836
		if (node->modification_counter == node->flush_counter
837
		    && node->n_pending_flushes == 0) {
838
839
			fil_node_close_file(node, system);
840
841
			return(TRUE);
842
		}
843
844
		if (print_info && node->n_pending_flushes > 0) {
845
			fputs("InnoDB: cannot close file ", stderr);
846
			ut_print_filename(stderr, node->name);
847
			fprintf(stderr, ", because n_pending_flushes %lu\n",
848
				(ulong) node->n_pending_flushes);
849
		}
850
851
		if (print_info
852
		    && node->modification_counter != node->flush_counter) {
853
			fputs("InnoDB: cannot close file ", stderr);
854
			ut_print_filename(stderr, node->name);
855
			fprintf(stderr,
856
				", because mod_count %ld != fl_count %ld\n",
857
				(long) node->modification_counter,
858
				(long) node->flush_counter);
859
		}
860
861
		node = UT_LIST_GET_PREV(LRU, node);
862
	}
863
864
	return(FALSE);
865
}
866
867
/***********************************************************************
868
Reserves the fil_system mutex and tries to make sure we can open at least one
869
file while holding it. This should be called before calling
870
fil_node_prepare_for_io(), because that function may need to open a file. */
871
static
872
void
873
fil_mutex_enter_and_prepare_for_io(
874
/*===============================*/
875
	ulint	space_id)	/* in: space id */
876
{
877
	fil_system_t*	system		= fil_system;
878
	fil_space_t*	space;
879
	ibool		success;
880
	ibool		print_info	= FALSE;
881
	ulint		count		= 0;
882
	ulint		count2		= 0;
883
884
	ut_ad(!mutex_own(&(system->mutex)));
885
retry:
886
	mutex_enter(&(system->mutex));
887
888
	if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
889
		/* We keep log files and system tablespace files always open;
890
		this is important in preventing deadlocks in this module, as
891
		a page read completion often performs another read from the
892
		insert buffer. The insert buffer is in tablespace 0, and we
893
		cannot end up waiting in this function. */
894
895
		return;
896
	}
897
898
	if (system->n_open < system->max_n_open) {
899
900
		return;
901
	}
902
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
903
	space = fil_space_get_by_id(space_id);
904
1 by brian
clean slate
905
	if (space != NULL && space->stop_ios) {
906
		/* We are going to do a rename file and want to stop new i/o's
907
		for a while */
908
909
		if (count2 > 20000) {
910
			fputs("InnoDB: Warning: tablespace ", stderr);
911
			ut_print_filename(stderr, space->name);
912
			fprintf(stderr,
913
				" has i/o ops stopped for a long time %lu\n",
914
				(ulong) count2);
915
		}
916
917
		mutex_exit(&(system->mutex));
918
919
		os_thread_sleep(20000);
920
921
		count2++;
922
923
		goto retry;
924
	}
925
926
	/* If the file is already open, no need to do anything; if the space
927
	does not exist, we handle the situation in the function which called
928
	this function */
929
930
	if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
931
932
		return;
933
	}
934
935
	if (count > 1) {
936
		print_info = TRUE;
937
	}
938
939
	/* Too many files are open, try to close some */
940
close_more:
941
	success = fil_try_to_close_file_in_LRU(print_info);
942
943
	if (success && system->n_open >= system->max_n_open) {
944
945
		goto close_more;
946
	}
947
948
	if (system->n_open < system->max_n_open) {
949
		/* Ok */
950
951
		return;
952
	}
953
954
	if (count >= 2) {
955
		ut_print_timestamp(stderr);
956
		fprintf(stderr,
957
			"  InnoDB: Warning: too many (%lu) files stay open"
958
			" while the maximum\n"
959
			"InnoDB: allowed value would be %lu.\n"
960
			"InnoDB: You may need to raise the value of"
961
			" innodb_max_files_open in\n"
962
			"InnoDB: my.cnf.\n",
963
			(ulong) system->n_open, (ulong) system->max_n_open);
964
965
		return;
966
	}
967
968
	mutex_exit(&(system->mutex));
969
970
#ifndef UNIV_HOTBACKUP
971
	/* Wake the i/o-handler threads to make sure pending i/o's are
972
	performed */
973
	os_aio_simulated_wake_handler_threads();
974
975
	os_thread_sleep(20000);
976
#endif
977
	/* Flush tablespaces so that we can close modified files in the LRU
978
	list */
979
980
	fil_flush_file_spaces(FIL_TABLESPACE);
981
982
	count++;
983
984
	goto retry;
985
}
986
987
/***********************************************************************
988
Frees a file node object from a tablespace memory cache. */
989
static
990
void
991
fil_node_free(
992
/*==========*/
993
	fil_node_t*	node,	/* in, own: file node */
994
	fil_system_t*	system,	/* in: tablespace memory cache */
995
	fil_space_t*	space)	/* in: space where the file node is chained */
996
{
997
	ut_ad(node && system && space);
998
	ut_ad(mutex_own(&(system->mutex)));
999
	ut_a(node->magic_n == FIL_NODE_MAGIC_N);
1000
	ut_a(node->n_pending == 0);
1001
1002
	if (node->open) {
1003
		/* We fool the assertion in fil_node_close_file() to think
1004
		there are no unflushed modifications in the file */
1005
1006
		node->modification_counter = node->flush_counter;
1007
1008
		if (space->is_in_unflushed_spaces
1009
		    && fil_space_is_flushed(space)) {
1010
1011
			space->is_in_unflushed_spaces = FALSE;
1012
1013
			UT_LIST_REMOVE(unflushed_spaces,
1014
				       system->unflushed_spaces,
1015
				       space);
1016
		}
1017
1018
		fil_node_close_file(node, system);
1019
	}
1020
1021
	space->size -= node->size;
1022
1023
	UT_LIST_REMOVE(chain, space->chain, node);
1024
1025
	mem_free(node->name);
1026
	mem_free(node);
1027
}
1028
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1029
#ifdef UNIV_LOG_ARCHIVE
1 by brian
clean slate
1030
/********************************************************************
1031
Drops files from the start of a file space, so that its size is cut by
1032
the amount given. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1033
UNIV_INTERN
1 by brian
clean slate
1034
void
1035
fil_space_truncate_start(
1036
/*=====================*/
1037
	ulint	id,		/* in: space id */
1038
	ulint	trunc_len)	/* in: truncate by this much; it is an error
1039
				if this does not equal to the combined size of
1040
				some initial files in the space */
1041
{
1042
	fil_system_t*	system		= fil_system;
1043
	fil_node_t*	node;
1044
	fil_space_t*	space;
1045
1046
	mutex_enter(&(system->mutex));
1047
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1048
	space = fil_space_get_by_id(id);
1 by brian
clean slate
1049
1050
	ut_a(space);
1051
1052
	while (trunc_len > 0) {
1053
		node = UT_LIST_GET_FIRST(space->chain);
1054
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1055
		ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
1 by brian
clean slate
1056
1057
		trunc_len -= node->size * UNIV_PAGE_SIZE;
1058
1059
		fil_node_free(node, system, space);
1060
	}
1061
1062
	mutex_exit(&(system->mutex));
1063
}
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1064
#endif /* UNIV_LOG_ARCHIVE */
1 by brian
clean slate
1065
1066
/***********************************************************************
1067
Creates a space memory object and puts it to the tablespace memory cache. If
1068
there is an error, prints an error message to the .err log. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1069
UNIV_INTERN
1 by brian
clean slate
1070
ibool
1071
fil_space_create(
1072
/*=============*/
1073
				/* out: TRUE if success */
1074
	const char*	name,	/* in: space name */
1075
	ulint		id,	/* in: space id */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1076
	ulint		flags,	/* in: compressed page size
1077
				and file format, or 0 */
1 by brian
clean slate
1078
	ulint		purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
1079
{
1080
	fil_system_t*	system		= fil_system;
1081
	fil_space_t*	space;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1082
1083
	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
1084
	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
1085
	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
1086
	format, the tablespace flags should equal table->flags. */
1087
	ut_a(flags != DICT_TF_COMPACT);
1088
1 by brian
clean slate
1089
try_again:
1090
	/*printf(
1091
	"InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
1092
	purpose);*/
1093
1094
	ut_a(system);
1095
	ut_a(name);
1096
1097
	mutex_enter(&(system->mutex));
1098
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1099
	space = fil_space_get_by_name(name);
1100
1101
	if (UNIV_LIKELY_NULL(space)) {
1102
		ulint	namesake_id;
1103
1 by brian
clean slate
1104
		ut_print_timestamp(stderr);
1105
		fprintf(stderr,
1106
			"  InnoDB: Warning: trying to init to the"
1107
			" tablespace memory cache\n"
1108
			"InnoDB: a tablespace %lu of name ", (ulong) id);
1109
		ut_print_filename(stderr, name);
1110
		fprintf(stderr, ",\n"
1111
			"InnoDB: but a tablespace %lu of the same name\n"
1112
			"InnoDB: already exists in the"
1113
			" tablespace memory cache!\n",
1114
			(ulong) space->id);
1115
1116
		if (id == 0 || purpose != FIL_TABLESPACE) {
1117
1118
			mutex_exit(&(system->mutex));
1119
1120
			return(FALSE);
1121
		}
1122
1123
		fprintf(stderr,
1124
			"InnoDB: We assume that InnoDB did a crash recovery,"
1125
			" and you had\n"
1126
			"InnoDB: an .ibd file for which the table"
1127
			" did not exist in the\n"
1128
			"InnoDB: InnoDB internal data dictionary in the"
1129
			" ibdata files.\n"
1130
			"InnoDB: We assume that you later removed the"
1131
			" .ibd and .frm files,\n"
1132
			"InnoDB: and are now trying to recreate the table."
1133
			" We now remove the\n"
1134
			"InnoDB: conflicting tablespace object"
1135
			" from the memory cache and try\n"
1136
			"InnoDB: the init again.\n");
1137
1138
		namesake_id = space->id;
1139
1140
		mutex_exit(&(system->mutex));
1141
1142
		fil_space_free(namesake_id);
1143
1144
		goto try_again;
1145
	}
1146
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1147
	space = fil_space_get_by_id(id);
1 by brian
clean slate
1148
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1149
	if (UNIV_LIKELY_NULL(space)) {
1 by brian
clean slate
1150
		fprintf(stderr,
1151
			"InnoDB: Error: trying to add tablespace %lu"
1152
			" of name ", (ulong) id);
1153
		ut_print_filename(stderr, name);
1154
		fprintf(stderr, "\n"
1155
			"InnoDB: to the tablespace memory cache,"
1156
			" but tablespace\n"
1157
			"InnoDB: %lu of name ", (ulong) space->id);
1158
		ut_print_filename(stderr, space->name);
1159
		fputs(" already exists in the tablespace\n"
1160
		      "InnoDB: memory cache!\n", stderr);
1161
1162
		mutex_exit(&(system->mutex));
1163
1164
		return(FALSE);
1165
	}
1166
1167
	space = mem_alloc(sizeof(fil_space_t));
1168
1169
	space->name = mem_strdup(name);
1170
	space->id = id;
1171
1172
	system->tablespace_version++;
1173
	space->tablespace_version = system->tablespace_version;
1174
	space->mark = FALSE;
1175
1176
	if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) {
1177
		system->max_assigned_id = id;
1178
	}
1179
1180
	space->stop_ios = FALSE;
1181
	space->stop_ibuf_merges = FALSE;
1182
	space->is_being_deleted = FALSE;
1183
	space->purpose = purpose;
1184
	space->size = 0;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1185
	space->flags = flags;
1 by brian
clean slate
1186
1187
	space->n_reserved_extents = 0;
1188
1189
	space->n_pending_flushes = 0;
1190
	space->n_pending_ibuf_merges = 0;
1191
1192
	UT_LIST_INIT(space->chain);
1193
	space->magic_n = FIL_SPACE_MAGIC_N;
1194
1195
	space->ibuf_data = NULL;
1196
1197
	rw_lock_create(&space->latch, SYNC_FSP);
1198
1199
	HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
1200
1201
	HASH_INSERT(fil_space_t, name_hash, system->name_hash,
1202
		    ut_fold_string(name), space);
1203
	space->is_in_unflushed_spaces = FALSE;
1204
1205
	UT_LIST_ADD_LAST(space_list, system->space_list, space);
1206
1207
	mutex_exit(&(system->mutex));
1208
1209
	return(TRUE);
1210
}
1211
1212
/***********************************************************************
1213
Assigns a new space id for a new single-table tablespace. This works simply by
1214
incrementing the global counter. If 4 billion id's is not enough, we may need
1215
to recycle id's. */
1216
static
1217
ulint
1218
fil_assign_new_space_id(void)
1219
/*=========================*/
1220
			/* out: new tablespace id; ULINT_UNDEFINED if could
1221
			not assign an id */
1222
{
1223
	fil_system_t*	system = fil_system;
1224
	ulint		id;
1225
1226
	mutex_enter(&(system->mutex));
1227
1228
	system->max_assigned_id++;
1229
1230
	id = system->max_assigned_id;
1231
1232
	if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1233
		ut_print_timestamp(stderr);
1234
		fprintf(stderr,
1235
			"InnoDB: Warning: you are running out of new"
1236
			" single-table tablespace id's.\n"
1237
			"InnoDB: Current counter is %lu and it"
1238
			" must not exceed %lu!\n"
1239
			"InnoDB: To reset the counter to zero"
1240
			" you have to dump all your tables and\n"
1241
			"InnoDB: recreate the whole InnoDB installation.\n",
1242
			(ulong) id,
1243
			(ulong) SRV_LOG_SPACE_FIRST_ID);
1244
	}
1245
1246
	if (id >= SRV_LOG_SPACE_FIRST_ID) {
1247
		ut_print_timestamp(stderr);
1248
		fprintf(stderr,
1249
			"InnoDB: You have run out of single-table"
1250
			" tablespace id's!\n"
1251
			"InnoDB: Current counter is %lu.\n"
1252
			"InnoDB: To reset the counter to zero you"
1253
			" have to dump all your tables and\n"
1254
			"InnoDB: recreate the whole InnoDB installation.\n",
1255
			(ulong) id);
1256
		system->max_assigned_id--;
1257
1258
		id = ULINT_UNDEFINED;
1259
	}
1260
1261
	mutex_exit(&(system->mutex));
1262
1263
	return(id);
1264
}
1265
1266
/***********************************************************************
1267
Frees a space object from the tablespace memory cache. Closes the files in
1268
the chain but does not delete them. There must not be any pending i/o's or
1269
flushes on the files. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1270
UNIV_INTERN
1 by brian
clean slate
1271
ibool
1272
fil_space_free(
1273
/*===========*/
1274
			/* out: TRUE if success */
1275
	ulint	id)	/* in: space id */
1276
{
1277
	fil_system_t*	system = fil_system;
1278
	fil_space_t*	space;
1279
	fil_space_t*	namespace;
1280
	fil_node_t*	fil_node;
1281
1282
	mutex_enter(&(system->mutex));
1283
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1284
	space = fil_space_get_by_id(id);
1 by brian
clean slate
1285
1286
	if (!space) {
1287
		ut_print_timestamp(stderr);
1288
		fprintf(stderr,
1289
			"  InnoDB: Error: trying to remove tablespace %lu"
1290
			" from the cache but\n"
1291
			"InnoDB: it is not there.\n", (ulong) id);
1292
1293
		mutex_exit(&(system->mutex));
1294
1295
		return(FALSE);
1296
	}
1297
1298
	HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
1299
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1300
	namespace = fil_space_get_by_name(space->name);
1 by brian
clean slate
1301
	ut_a(namespace);
1302
	ut_a(space == namespace);
1303
1304
	HASH_DELETE(fil_space_t, name_hash, system->name_hash,
1305
		    ut_fold_string(space->name), space);
1306
1307
	if (space->is_in_unflushed_spaces) {
1308
		space->is_in_unflushed_spaces = FALSE;
1309
1310
		UT_LIST_REMOVE(unflushed_spaces, system->unflushed_spaces,
1311
			       space);
1312
	}
1313
1314
	UT_LIST_REMOVE(space_list, system->space_list, space);
1315
1316
	ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1317
	ut_a(0 == space->n_pending_flushes);
1318
1319
	fil_node = UT_LIST_GET_FIRST(space->chain);
1320
1321
	while (fil_node != NULL) {
1322
		fil_node_free(fil_node, system, space);
1323
1324
		fil_node = UT_LIST_GET_FIRST(space->chain);
1325
	}
1326
1327
	ut_a(0 == UT_LIST_GET_LEN(space->chain));
1328
1329
	mutex_exit(&(system->mutex));
1330
1331
	rw_lock_free(&(space->latch));
1332
1333
	mem_free(space->name);
1334
	mem_free(space);
1335
1336
	return(TRUE);
1337
}
1338
1339
#ifdef UNIV_HOTBACKUP
1340
/***********************************************************************
1341
Returns the tablespace object for a given id, or NULL if not found from the
1342
tablespace memory cache. */
1343
static
1344
fil_space_t*
1345
fil_get_space_for_id_low(
1346
/*=====================*/
1347
			/* out: tablespace object or NULL; NOTE that you must
1348
			own &(fil_system->mutex) to call this function! */
1349
	ulint	id)	/* in: space id */
1350
{
1351
	fil_system_t*	system		= fil_system;
1352
	fil_space_t*	space;
1353
1354
	ut_ad(system);
1355
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1356
	space = fil_space_get_by_id(id);
1 by brian
clean slate
1357
1358
	return(space);
1359
}
1360
#endif
1361
1362
/***********************************************************************
1363
Returns the size of the space in pages. The tablespace must be cached in the
1364
memory cache. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1365
UNIV_INTERN
1 by brian
clean slate
1366
ulint
1367
fil_space_get_size(
1368
/*===============*/
1369
			/* out: space size, 0 if space not found */
1370
	ulint	id)	/* in: space id */
1371
{
1372
	fil_system_t*	system		= fil_system;
1373
	fil_node_t*	node;
1374
	fil_space_t*	space;
1375
	ulint		size;
1376
1377
	ut_ad(system);
1378
1379
	fil_mutex_enter_and_prepare_for_io(id);
1380
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1381
	space = fil_space_get_by_id(id);
1 by brian
clean slate
1382
1383
	if (space == NULL) {
1384
		mutex_exit(&(system->mutex));
1385
1386
		return(0);
1387
	}
1388
1389
	if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1390
		ut_a(id != 0);
1391
1392
		ut_a(1 == UT_LIST_GET_LEN(space->chain));
1393
1394
		node = UT_LIST_GET_FIRST(space->chain);
1395
1396
		/* It must be a single-table tablespace and we have not opened
1397
		the file yet; the following calls will open it and update the
1398
		size fields */
1399
1400
		fil_node_prepare_for_io(node, system, space);
1401
		fil_node_complete_io(node, system, OS_FILE_READ);
1402
	}
1403
1404
	size = space->size;
1405
1406
	mutex_exit(&(system->mutex));
1407
1408
	return(size);
1409
}
1410
1411
/***********************************************************************
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1412
Returns the flags of the space. The tablespace must be cached
1413
in the memory cache. */
1414
UNIV_INTERN
1415
ulint
1416
fil_space_get_flags(
1417
/*================*/
1418
			/* out: flags, ULINT_UNDEFINED if space not found */
1419
	ulint	id)	/* in: space id */
1420
{
1421
	fil_system_t*	system		= fil_system;
1422
	fil_node_t*	node;
1423
	fil_space_t*	space;
1424
	ulint		flags;
1425
1426
	ut_ad(system);
1427
1428
	if (UNIV_UNLIKELY(!id)) {
1429
		return(0);
1430
	}
1431
1432
	fil_mutex_enter_and_prepare_for_io(id);
1433
1434
	space = fil_space_get_by_id(id);
1435
1436
	if (space == NULL) {
1437
		mutex_exit(&(system->mutex));
1438
1439
		return(ULINT_UNDEFINED);
1440
	}
1441
1442
	if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1443
		ut_a(id != 0);
1444
1445
		ut_a(1 == UT_LIST_GET_LEN(space->chain));
1446
1447
		node = UT_LIST_GET_FIRST(space->chain);
1448
1449
		/* It must be a single-table tablespace and we have not opened
1450
		the file yet; the following calls will open it and update the
1451
		size fields */
1452
1453
		fil_node_prepare_for_io(node, system, space);
1454
		fil_node_complete_io(node, system, OS_FILE_READ);
1455
	}
1456
1457
	flags = space->flags;
1458
1459
	mutex_exit(&(system->mutex));
1460
1461
	return(flags);
1462
}
1463
1464
/***********************************************************************
1465
Returns the compressed page size of the space, or 0 if the space
1466
is not compressed. The tablespace must be cached in the memory cache. */
1467
UNIV_INTERN
1468
ulint
1469
fil_space_get_zip_size(
1470
/*===================*/
1471
			/* out: compressed page size, ULINT_UNDEFINED
1472
			if space not found */
1473
	ulint	id)	/* in: space id */
1474
{
1475
	ulint	flags;
1476
1477
	flags = fil_space_get_flags(id);
1478
1479
	if (flags && flags != ULINT_UNDEFINED) {
1480
1481
		return(dict_table_flags_to_zip_size(flags));
1482
	}
1483
1484
	return(flags);
1485
}
1486
1487
/***********************************************************************
1 by brian
clean slate
1488
Checks if the pair space, page_no refers to an existing page in a tablespace
1489
file space. The tablespace must be cached in the memory cache. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1490
UNIV_INTERN
1 by brian
clean slate
1491
ibool
1492
fil_check_adress_in_tablespace(
1493
/*===========================*/
1494
			/* out: TRUE if the address is meaningful */
1495
	ulint	id,	/* in: space id */
1496
	ulint	page_no)/* in: page number */
1497
{
1498
	if (fil_space_get_size(id) > page_no) {
1499
1500
		return(TRUE);
1501
	}
1502
1503
	return(FALSE);
1504
}
1505
1506
/********************************************************************
1507
Creates a the tablespace memory cache. */
1508
static
1509
fil_system_t*
1510
fil_system_create(
1511
/*==============*/
1512
				/* out, own: tablespace memory cache */
1513
	ulint	hash_size,	/* in: hash table size */
1514
	ulint	max_n_open)	/* in: maximum number of open files; must be
1515
				> 10 */
1516
{
1517
	fil_system_t*	system;
1518
1519
	ut_a(hash_size > 0);
1520
	ut_a(max_n_open > 0);
1521
1522
	system = mem_alloc(sizeof(fil_system_t));
1523
1524
	mutex_create(&system->mutex, SYNC_ANY_LATCH);
1525
1526
	system->spaces = hash_create(hash_size);
1527
	system->name_hash = hash_create(hash_size);
1528
1529
	UT_LIST_INIT(system->LRU);
1530
1531
	system->n_open = 0;
1532
	system->max_n_open = max_n_open;
1533
1534
	system->modification_counter = 0;
1535
	system->max_assigned_id = 0;
1536
1537
	system->tablespace_version = 0;
1538
1539
	UT_LIST_INIT(system->unflushed_spaces);
1540
	UT_LIST_INIT(system->space_list);
1541
1542
	return(system);
1543
}
1544
1545
/********************************************************************
1546
Initializes the tablespace memory cache. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1547
UNIV_INTERN
1 by brian
clean slate
1548
void
1549
fil_init(
1550
/*=====*/
1551
	ulint	max_n_open)	/* in: max number of open files */
1552
{
1553
	ulint	hash_size;
1554
1555
	ut_a(fil_system == NULL);
1556
1557
	if (srv_file_per_table) {
1558
		hash_size = 50000;
1559
	} else {
1560
		hash_size = 5000;
1561
	}
1562
1563
	fil_system = fil_system_create(hash_size, max_n_open);
1564
}
1565
1566
/***********************************************************************
1567
Opens all log files and system tablespace data files. They stay open until the
1568
database server shutdown. This should be called at a server startup after the
1569
space objects for the log and the system tablespace have been created. The
1570
purpose of this operation is to make sure we never run out of file descriptors
1571
if we need to read from the insert buffer or to write to the log. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1572
UNIV_INTERN
1 by brian
clean slate
1573
void
1574
fil_open_log_and_system_tablespace_files(void)
1575
/*==========================================*/
1576
{
1577
	fil_system_t*	system = fil_system;
1578
	fil_space_t*	space;
1579
	fil_node_t*	node;
1580
1581
	mutex_enter(&(system->mutex));
1582
1583
	space = UT_LIST_GET_FIRST(system->space_list);
1584
1585
	while (space != NULL) {
1586
		if (space->purpose != FIL_TABLESPACE || space->id == 0) {
1587
			node = UT_LIST_GET_FIRST(space->chain);
1588
1589
			while (node != NULL) {
1590
				if (!node->open) {
1591
					fil_node_open_file(node, system,
1592
							   space);
1593
				}
1594
				if (system->max_n_open < 10 + system->n_open) {
1595
					fprintf(stderr,
1596
						"InnoDB: Warning: you must"
1597
						" raise the value of"
1598
						" innodb_max_open_files in\n"
1599
						"InnoDB: my.cnf! Remember that"
1600
						" InnoDB keeps all log files"
1601
						" and all system\n"
1602
						"InnoDB: tablespace files open"
1603
						" for the whole time mysqld is"
1604
						" running, and\n"
1605
						"InnoDB: needs to open also"
1606
						" some .ibd files if the"
1607
						" file-per-table storage\n"
1608
						"InnoDB: model is used."
1609
						" Current open files %lu,"
1610
						" max allowed"
1611
						" open files %lu.\n",
1612
						(ulong) system->n_open,
1613
						(ulong) system->max_n_open);
1614
				}
1615
				node = UT_LIST_GET_NEXT(chain, node);
1616
			}
1617
		}
1618
		space = UT_LIST_GET_NEXT(space_list, space);
1619
	}
1620
1621
	mutex_exit(&(system->mutex));
1622
}
1623
1624
/***********************************************************************
1625
Closes all open files. There must not be any pending i/o's or not flushed
1626
modifications in the files. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1627
UNIV_INTERN
1 by brian
clean slate
1628
void
1629
fil_close_all_files(void)
1630
/*=====================*/
1631
{
1632
	fil_system_t*	system = fil_system;
1633
	fil_space_t*	space;
1634
	fil_node_t*	node;
1635
1636
	mutex_enter(&(system->mutex));
1637
1638
	space = UT_LIST_GET_FIRST(system->space_list);
1639
1640
	while (space != NULL) {
1641
		node = UT_LIST_GET_FIRST(space->chain);
1642
1643
		while (node != NULL) {
1644
			if (node->open) {
1645
				fil_node_close_file(node, system);
1646
			}
1647
			node = UT_LIST_GET_NEXT(chain, node);
1648
		}
1649
		space = UT_LIST_GET_NEXT(space_list, space);
1650
	}
1651
1652
	mutex_exit(&(system->mutex));
1653
}
1654
1655
/***********************************************************************
1656
Sets the max tablespace id counter if the given number is bigger than the
1657
previous value. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1658
UNIV_INTERN
1 by brian
clean slate
1659
void
1660
fil_set_max_space_id_if_bigger(
1661
/*===========================*/
1662
	ulint	max_id)	/* in: maximum known id */
1663
{
1664
	fil_system_t*	system = fil_system;
1665
1666
	if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1667
		fprintf(stderr,
1668
			"InnoDB: Fatal error: max tablespace id"
1669
			" is too high, %lu\n", (ulong) max_id);
1670
		ut_a(0);
1671
	}
1672
1673
	mutex_enter(&(system->mutex));
1674
1675
	if (system->max_assigned_id < max_id) {
1676
1677
		system->max_assigned_id = max_id;
1678
	}
1679
1680
	mutex_exit(&(system->mutex));
1681
}
1682
1683
/********************************************************************
1684
Initializes the ibuf data structure for space 0 == the system tablespace.
1685
This can be called after the file space headers have been created and the
1686
dictionary system has been initialized. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1687
UNIV_INTERN
1 by brian
clean slate
1688
void
1689
fil_ibuf_init_at_db_start(void)
1690
/*===========================*/
1691
{
1692
	fil_space_t*	space;
1693
1694
	space = UT_LIST_GET_FIRST(fil_system->space_list);
1695
1696
	ut_a(space);
1697
	ut_a(space->purpose == FIL_TABLESPACE);
1698
1699
	space->ibuf_data = ibuf_data_init_for_space(space->id);
1700
}
1701
1702
/********************************************************************
1703
Writes the flushed lsn and the latest archived log number to the page header
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1704
of the first page of a data file of the system tablespace (space 0),
1705
which is uncompressed. */
1 by brian
clean slate
1706
static
1707
ulint
1708
fil_write_lsn_and_arch_no_to_file(
1709
/*==============================*/
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1710
	ulint		sum_of_sizes,	/* in: combined size of previous files
1711
					in space, in database pages */
1712
	ib_uint64_t	lsn,		/* in: lsn to write */
1713
	ulint		arch_log_no	/* in: archived log number to write */
1 by brian
clean slate
1714
	__attribute__((unused)))
1715
{
1716
	byte*	buf1;
1717
	byte*	buf;
1718
1719
	buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
1720
	buf = ut_align(buf1, UNIV_PAGE_SIZE);
1721
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1722
	fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1723
1724
	mach_write_ull(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
1725
1726
	fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1 by brian
clean slate
1727
1728
	return(DB_SUCCESS);
1729
}
1730
1731
/********************************************************************
1732
Writes the flushed lsn and the latest archived log number to the page
1733
header of the first page of each data file in the system tablespace. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1734
UNIV_INTERN
1 by brian
clean slate
1735
ulint
1736
fil_write_flushed_lsn_to_data_files(
1737
/*================================*/
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1738
					/* out: DB_SUCCESS or error number */
1739
	ib_uint64_t	lsn,		/* in: lsn to write */
1740
	ulint		arch_log_no)	/* in: latest archived log
1741
					file number */
1 by brian
clean slate
1742
{
1743
	fil_space_t*	space;
1744
	fil_node_t*	node;
1745
	ulint		sum_of_sizes;
1746
	ulint		err;
1747
1748
	mutex_enter(&(fil_system->mutex));
1749
1750
	space = UT_LIST_GET_FIRST(fil_system->space_list);
1751
1752
	while (space) {
1753
		/* We only write the lsn to all existing data files which have
1754
		been open during the lifetime of the mysqld process; they are
1755
		represented by the space objects in the tablespace memory
1756
		cache. Note that all data files in the system tablespace 0 are
1757
		always open. */
1758
1759
		if (space->purpose == FIL_TABLESPACE
1760
		    && space->id == 0) {
1761
			sum_of_sizes = 0;
1762
1763
			node = UT_LIST_GET_FIRST(space->chain);
1764
			while (node) {
1765
				mutex_exit(&(fil_system->mutex));
1766
1767
				err = fil_write_lsn_and_arch_no_to_file(
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1768
					sum_of_sizes, lsn, arch_log_no);
1 by brian
clean slate
1769
				if (err != DB_SUCCESS) {
1770
1771
					return(err);
1772
				}
1773
1774
				mutex_enter(&(fil_system->mutex));
1775
1776
				sum_of_sizes += node->size;
1777
				node = UT_LIST_GET_NEXT(chain, node);
1778
			}
1779
		}
1780
		space = UT_LIST_GET_NEXT(space_list, space);
1781
	}
1782
1783
	mutex_exit(&(fil_system->mutex));
1784
1785
	return(DB_SUCCESS);
1786
}
1787
1788
/***********************************************************************
1789
Reads the flushed lsn and arch no fields from a data file at database
1790
startup. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1791
UNIV_INTERN
1 by brian
clean slate
1792
void
1793
fil_read_flushed_lsn_and_arch_log_no(
1794
/*=================================*/
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1795
	os_file_t	data_file,		/* in: open data file */
1796
	ibool		one_read_already,	/* in: TRUE if min and max
1797
						parameters below already
1798
						contain sensible data */
1 by brian
clean slate
1799
#ifdef UNIV_LOG_ARCHIVE
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1800
	ulint*		min_arch_log_no,	/* in/out: */
1801
	ulint*		max_arch_log_no,	/* in/out: */
1 by brian
clean slate
1802
#endif /* UNIV_LOG_ARCHIVE */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1803
	ib_uint64_t*	min_flushed_lsn,	/* in/out: */
1804
	ib_uint64_t*	max_flushed_lsn)	/* in/out: */
1 by brian
clean slate
1805
{
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1806
	byte*		buf;
1807
	byte*		buf2;
1808
	ib_uint64_t	flushed_lsn;
1 by brian
clean slate
1809
1810
	buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
1811
	/* Align the memory for a possible read from a raw device */
1812
	buf = ut_align(buf2, UNIV_PAGE_SIZE);
1813
1814
	os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
1815
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1816
	flushed_lsn = mach_read_ull(buf + FIL_PAGE_FILE_FLUSH_LSN);
1 by brian
clean slate
1817
1818
	ut_free(buf2);
1819
1820
	if (!one_read_already) {
1821
		*min_flushed_lsn = flushed_lsn;
1822
		*max_flushed_lsn = flushed_lsn;
1823
#ifdef UNIV_LOG_ARCHIVE
1824
		*min_arch_log_no = arch_log_no;
1825
		*max_arch_log_no = arch_log_no;
1826
#endif /* UNIV_LOG_ARCHIVE */
1827
		return;
1828
	}
1829
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1830
	if (*min_flushed_lsn > flushed_lsn) {
1 by brian
clean slate
1831
		*min_flushed_lsn = flushed_lsn;
1832
	}
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1833
	if (*max_flushed_lsn < flushed_lsn) {
1 by brian
clean slate
1834
		*max_flushed_lsn = flushed_lsn;
1835
	}
1836
#ifdef UNIV_LOG_ARCHIVE
1837
	if (*min_arch_log_no > arch_log_no) {
1838
		*min_arch_log_no = arch_log_no;
1839
	}
1840
	if (*max_arch_log_no < arch_log_no) {
1841
		*max_arch_log_no = arch_log_no;
1842
	}
1843
#endif /* UNIV_LOG_ARCHIVE */
1844
}
1845
1846
/*================ SINGLE-TABLE TABLESPACES ==========================*/
1847
1848
/***********************************************************************
1849
Increments the count of pending insert buffer page merges, if space is not
1850
being deleted. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1851
UNIV_INTERN
1 by brian
clean slate
1852
ibool
1853
fil_inc_pending_ibuf_merges(
1854
/*========================*/
1855
			/* out: TRUE if being deleted, and ibuf merges should
1856
			be skipped */
1857
	ulint	id)	/* in: space id */
1858
{
1859
	fil_system_t*	system		= fil_system;
1860
	fil_space_t*	space;
1861
1862
	mutex_enter(&(system->mutex));
1863
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1864
	space = fil_space_get_by_id(id);
1 by brian
clean slate
1865
1866
	if (space == NULL) {
1867
		fprintf(stderr,
1868
			"InnoDB: Error: trying to do ibuf merge to a"
1869
			" dropped tablespace %lu\n",
1870
			(ulong) id);
1871
	}
1872
1873
	if (space == NULL || space->stop_ibuf_merges) {
1874
		mutex_exit(&(system->mutex));
1875
1876
		return(TRUE);
1877
	}
1878
1879
	space->n_pending_ibuf_merges++;
1880
1881
	mutex_exit(&(system->mutex));
1882
1883
	return(FALSE);
1884
}
1885
1886
/***********************************************************************
1887
Decrements the count of pending insert buffer page merges. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1888
UNIV_INTERN
1 by brian
clean slate
1889
void
1890
fil_decr_pending_ibuf_merges(
1891
/*=========================*/
1892
	ulint	id)	/* in: space id */
1893
{
1894
	fil_system_t*	system		= fil_system;
1895
	fil_space_t*	space;
1896
1897
	mutex_enter(&(system->mutex));
1898
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1899
	space = fil_space_get_by_id(id);
1 by brian
clean slate
1900
1901
	if (space == NULL) {
1902
		fprintf(stderr,
1903
			"InnoDB: Error: decrementing ibuf merge of a"
1904
			" dropped tablespace %lu\n",
1905
			(ulong) id);
1906
	}
1907
1908
	if (space != NULL) {
1909
		space->n_pending_ibuf_merges--;
1910
	}
1911
1912
	mutex_exit(&(system->mutex));
1913
}
1914
1915
/************************************************************
1916
Creates the database directory for a table if it does not exist yet. */
1917
static
1918
void
1919
fil_create_directory_for_tablename(
1920
/*===============================*/
1921
	const char*	name)	/* in: name in the standard
1922
				'databasename/tablename' format */
1923
{
1924
	const char*	namend;
1925
	char*		path;
1926
	ulint		len;
1927
1928
	len = strlen(fil_path_to_mysql_datadir);
1929
	namend = strchr(name, '/');
1930
	ut_a(namend);
1931
	path = mem_alloc(len + (namend - name) + 2);
1932
1933
	memcpy(path, fil_path_to_mysql_datadir, len);
1934
	path[len] = '/';
1935
	memcpy(path + len + 1, name, namend - name);
1936
	path[len + (namend - name) + 1] = 0;
1937
1938
	srv_normalize_path_for_win(path);
1939
1940
	ut_a(os_file_create_directory(path, FALSE));
1941
	mem_free(path);
1942
}
1943
1944
#ifndef UNIV_HOTBACKUP
1945
/************************************************************
1946
Writes a log record about an .ibd file create/rename/delete. */
1947
static
1948
void
1949
fil_op_write_log(
1950
/*=============*/
1951
	ulint		type,		/* in: MLOG_FILE_CREATE,
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1952
					MLOG_FILE_CREATE2,
1 by brian
clean slate
1953
					MLOG_FILE_DELETE, or
1954
					MLOG_FILE_RENAME */
1955
	ulint		space_id,	/* in: space id */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1956
	ulint		flags,		/* in: compressed page size
1957
					and file format
1958
					if type==MLOG_FILE_CREATE2, or 0 */
1 by brian
clean slate
1959
	const char*	name,		/* in: table name in the familiar
1960
					'databasename/tablename' format, or
1961
					the file path in the case of
1962
					MLOG_FILE_DELETE */
1963
	const char*	new_name,	/* in: if type is MLOG_FILE_RENAME,
1964
					the new table name in the
1965
					'databasename/tablename' format */
1966
	mtr_t*		mtr)		/* in: mini-transaction handle */
1967
{
1968
	byte*	log_ptr;
1969
	ulint	len;
1970
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1971
	log_ptr = mlog_open(mtr, 11 + 2 + 1);
1 by brian
clean slate
1972
1973
	if (!log_ptr) {
1974
		/* Logging in mtr is switched off during crash recovery:
1975
		in that case mlog_open returns NULL */
1976
		return;
1977
	}
1978
1979
	log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0,
1980
							    log_ptr, mtr);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1981
	if (type == MLOG_FILE_CREATE2) {
1982
		mach_write_to_4(log_ptr, flags);
1983
		log_ptr += 4;
1984
	}
1 by brian
clean slate
1985
	/* Let us store the strings as null-terminated for easier readability
1986
	and handling */
1987
1988
	len = strlen(name) + 1;
1989
1990
	mach_write_to_2(log_ptr, len);
1991
	log_ptr += 2;
1992
	mlog_close(mtr, log_ptr);
1993
1994
	mlog_catenate_string(mtr, (byte*) name, len);
1995
1996
	if (type == MLOG_FILE_RENAME) {
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
1997
		len = strlen(new_name) + 1;
1 by brian
clean slate
1998
		log_ptr = mlog_open(mtr, 2 + len);
1999
		ut_a(log_ptr);
2000
		mach_write_to_2(log_ptr, len);
2001
		log_ptr += 2;
2002
		mlog_close(mtr, log_ptr);
2003
2004
		mlog_catenate_string(mtr, (byte*) new_name, len);
2005
	}
2006
}
2007
#endif
2008
2009
/***********************************************************************
2010
Parses the body of a log record written about an .ibd file operation. That is,
2011
the log record part after the standard (type, space id, page no) header of the
2012
log record.
2013
2014
If desired, also replays the delete or rename operation if the .ibd file
2015
exists and the space id in it matches. Replays the create operation if a file
2016
at that path does not exist yet. If the database directory for the file to be
2017
created does not exist, then we create the directory, too.
2018
2019
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
2020
datadir that we should use in replaying the file operations. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2021
UNIV_INTERN
1 by brian
clean slate
2022
byte*
2023
fil_op_log_parse_or_replay(
2024
/*=======================*/
2025
				/* out: end of log record, or NULL if the
2026
				record was not completely contained between
2027
				ptr and end_ptr */
2028
	byte*	ptr,		/* in: buffer containing the log record body,
2029
				or an initial segment of it, if the record does
2030
				not fir completely between ptr and end_ptr */
2031
	byte*	end_ptr,	/* in: buffer end */
2032
	ulint	type,		/* in: the type of this log record */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2033
	ulint	space_id)	/* in: the space id of the tablespace in
2034
				question, or 0 if the log record should
2035
				only be parsed but not replayed */
1 by brian
clean slate
2036
{
2037
	ulint		name_len;
2038
	ulint		new_name_len;
2039
	const char*	name;
2040
	const char*	new_name	= NULL;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2041
	ulint		flags		= 0;
2042
2043
	if (type == MLOG_FILE_CREATE2) {
2044
		if (end_ptr < ptr + 4) {
2045
2046
			return(NULL);
2047
		}
2048
2049
		flags = mach_read_from_4(ptr);
2050
		ptr += 4;
2051
	}
1 by brian
clean slate
2052
2053
	if (end_ptr < ptr + 2) {
2054
2055
		return(NULL);
2056
	}
2057
2058
	name_len = mach_read_from_2(ptr);
2059
2060
	ptr += 2;
2061
2062
	if (end_ptr < ptr + name_len) {
2063
2064
		return(NULL);
2065
	}
2066
2067
	name = (const char*) ptr;
2068
2069
	ptr += name_len;
2070
2071
	if (type == MLOG_FILE_RENAME) {
2072
		if (end_ptr < ptr + 2) {
2073
2074
			return(NULL);
2075
		}
2076
2077
		new_name_len = mach_read_from_2(ptr);
2078
2079
		ptr += 2;
2080
2081
		if (end_ptr < ptr + new_name_len) {
2082
2083
			return(NULL);
2084
		}
2085
2086
		new_name = (const char*) ptr;
2087
2088
		ptr += new_name_len;
2089
	}
2090
2091
	/* We managed to parse a full log record body */
2092
	/*
2093
	printf("Parsed log rec of type %lu space %lu\n"
2094
	"name %s\n", type, space_id, name);
2095
2096
	if (type == MLOG_FILE_RENAME) {
2097
	printf("new name %s\n", new_name);
2098
	}
2099
	*/
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2100
	if (!space_id) {
1 by brian
clean slate
2101
2102
		return(ptr);
2103
	}
2104
2105
	/* Let us try to perform the file operation, if sensible. Note that
2106
	ibbackup has at this stage already read in all space id info to the
2107
	fil0fil.c data structures.
2108
2109
	NOTE that our algorithm is not guaranteed to work correctly if there
2110
	were renames of tables during the backup. See ibbackup code for more
2111
	on the problem. */
2112
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2113
	switch (type) {
2114
	case MLOG_FILE_DELETE:
1 by brian
clean slate
2115
		if (fil_tablespace_exists_in_mem(space_id)) {
2116
			ut_a(fil_delete_tablespace(space_id));
2117
		}
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2118
2119
		break;
2120
2121
	case MLOG_FILE_RENAME:
1 by brian
clean slate
2122
		/* We do the rename based on space id, not old file name;
2123
		this should guarantee that after the log replay each .ibd file
2124
		has the correct name for the latest log sequence number; the
2125
		proof is left as an exercise :) */
2126
2127
		if (fil_tablespace_exists_in_mem(space_id)) {
2128
			/* Create the database directory for the new name, if
2129
			it does not exist yet */
2130
			fil_create_directory_for_tablename(new_name);
2131
2132
			/* Rename the table if there is not yet a tablespace
2133
			with the same name */
2134
2135
			if (fil_get_space_id_for_table(new_name)
2136
			    == ULINT_UNDEFINED) {
2137
				/* We do not care of the old name, that is
2138
				why we pass NULL as the first argument */
2139
				if (!fil_rename_tablespace(NULL, space_id,
2140
							   new_name)) {
2141
					ut_error;
2142
				}
2143
			}
2144
		}
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2145
2146
		break;
2147
2148
	case MLOG_FILE_CREATE:
2149
	case MLOG_FILE_CREATE2:
1 by brian
clean slate
2150
		if (fil_tablespace_exists_in_mem(space_id)) {
2151
			/* Do nothing */
2152
		} else if (fil_get_space_id_for_table(name)
2153
			   != ULINT_UNDEFINED) {
2154
			/* Do nothing */
2155
		} else {
2156
			/* Create the database directory for name, if it does
2157
			not exist yet */
2158
			fil_create_directory_for_tablename(name);
2159
2160
			if (fil_create_new_single_table_tablespace(
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2161
				    &space_id, name, FALSE, flags,
1 by brian
clean slate
2162
				    FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
2163
				ut_error;
2164
			}
2165
		}
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2166
2167
		break;
2168
2169
	default:
2170
		ut_error;
1 by brian
clean slate
2171
	}
2172
2173
	return(ptr);
2174
}
2175
2176
/***********************************************************************
2177
Deletes a single-table tablespace. The tablespace must be cached in the
2178
memory cache. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2179
UNIV_INTERN
1 by brian
clean slate
2180
ibool
2181
fil_delete_tablespace(
2182
/*==================*/
2183
			/* out: TRUE if success */
2184
	ulint	id)	/* in: space id */
2185
{
2186
	fil_system_t*	system		= fil_system;
2187
	ibool		success;
2188
	fil_space_t*	space;
2189
	fil_node_t*	node;
2190
	ulint		count		= 0;
2191
	char*		path;
2192
2193
	ut_a(id != 0);
2194
stop_ibuf_merges:
2195
	mutex_enter(&(system->mutex));
2196
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2197
	space = fil_space_get_by_id(id);
1 by brian
clean slate
2198
2199
	if (space != NULL) {
2200
		space->stop_ibuf_merges = TRUE;
2201
2202
		if (space->n_pending_ibuf_merges == 0) {
2203
			mutex_exit(&(system->mutex));
2204
2205
			count = 0;
2206
2207
			goto try_again;
2208
		} else {
2209
			if (count > 5000) {
2210
				ut_print_timestamp(stderr);
2211
				fputs("  InnoDB: Warning: trying to"
2212
				      " delete tablespace ", stderr);
2213
				ut_print_filename(stderr, space->name);
2214
				fprintf(stderr, ",\n"
2215
					"InnoDB: but there are %lu pending"
2216
					" ibuf merges on it.\n"
2217
					"InnoDB: Loop %lu.\n",
2218
					(ulong) space->n_pending_ibuf_merges,
2219
					(ulong) count);
2220
			}
2221
2222
			mutex_exit(&(system->mutex));
2223
2224
			os_thread_sleep(20000);
2225
			count++;
2226
2227
			goto stop_ibuf_merges;
2228
		}
2229
	}
2230
2231
	mutex_exit(&(system->mutex));
2232
	count = 0;
2233
2234
try_again:
2235
	mutex_enter(&(system->mutex));
2236
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2237
	space = fil_space_get_by_id(id);
1 by brian
clean slate
2238
2239
	if (space == NULL) {
2240
		ut_print_timestamp(stderr);
2241
		fprintf(stderr,
2242
			"  InnoDB: Error: cannot delete tablespace %lu\n"
2243
			"InnoDB: because it is not found in the"
2244
			" tablespace memory cache.\n",
2245
			(ulong) id);
2246
2247
		mutex_exit(&(system->mutex));
2248
2249
		return(FALSE);
2250
	}
2251
2252
	ut_a(space);
2253
	ut_a(space->n_pending_ibuf_merges == 0);
2254
2255
	space->is_being_deleted = TRUE;
2256
2257
	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2258
	node = UT_LIST_GET_FIRST(space->chain);
2259
2260
	if (space->n_pending_flushes > 0 || node->n_pending > 0) {
2261
		if (count > 1000) {
2262
			ut_print_timestamp(stderr);
2263
			fputs("  InnoDB: Warning: trying to"
2264
			      " delete tablespace ", stderr);
2265
			ut_print_filename(stderr, space->name);
2266
			fprintf(stderr, ",\n"
2267
				"InnoDB: but there are %lu flushes"
2268
				" and %lu pending i/o's on it\n"
2269
				"InnoDB: Loop %lu.\n",
2270
				(ulong) space->n_pending_flushes,
2271
				(ulong) node->n_pending,
2272
				(ulong) count);
2273
		}
2274
		mutex_exit(&(system->mutex));
2275
		os_thread_sleep(20000);
2276
2277
		count++;
2278
2279
		goto try_again;
2280
	}
2281
2282
	path = mem_strdup(space->name);
2283
2284
	mutex_exit(&(system->mutex));
2285
#ifndef UNIV_HOTBACKUP
2286
	/* Invalidate in the buffer pool all pages belonging to the
2287
	tablespace. Since we have set space->is_being_deleted = TRUE, readahead
2288
	or ibuf merge can no longer read more pages of this tablespace to the
2289
	buffer pool. Thus we can clean the tablespace out of the buffer pool
2290
	completely and permanently. The flag is_being_deleted also prevents
2291
	fil_flush() from being applied to this tablespace. */
2292
2293
	buf_LRU_invalidate_tablespace(id);
2294
#endif
2295
	/* printf("Deleting tablespace %s id %lu\n", space->name, id); */
2296
2297
	success = fil_space_free(id);
2298
2299
	if (success) {
2300
		success = os_file_delete(path);
2301
2302
		if (!success) {
2303
			success = os_file_delete_if_exists(path);
2304
		}
2305
	}
2306
2307
	if (success) {
2308
#ifndef UNIV_HOTBACKUP
2309
		/* Write a log record about the deletion of the .ibd
2310
		file, so that ibbackup can replay it in the
2311
		--apply-log phase. We use a dummy mtr and the familiar
2312
		log write mechanism. */
2313
		mtr_t		mtr;
2314
2315
		/* When replaying the operation in ibbackup, do not try
2316
		to write any log record */
2317
		mtr_start(&mtr);
2318
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2319
		fil_op_write_log(MLOG_FILE_DELETE, id, 0, path, NULL, &mtr);
1 by brian
clean slate
2320
		mtr_commit(&mtr);
2321
#endif
2322
		mem_free(path);
2323
2324
		return(TRUE);
2325
	}
2326
2327
	mem_free(path);
2328
2329
	return(FALSE);
2330
}
2331
2332
/***********************************************************************
2333
Discards a single-table tablespace. The tablespace must be cached in the
2334
memory cache. Discarding is like deleting a tablespace, but
2335
1) we do not drop the table from the data dictionary;
2336
2) we remove all insert buffer entries for the tablespace immediately; in DROP
2337
TABLE they are only removed gradually in the background;
2338
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
2339
as it originally had. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2340
UNIV_INTERN
1 by brian
clean slate
2341
ibool
2342
fil_discard_tablespace(
2343
/*===================*/
2344
			/* out: TRUE if success */
2345
	ulint	id)	/* in: space id */
2346
{
2347
	ibool	success;
2348
2349
	success = fil_delete_tablespace(id);
2350
2351
	if (!success) {
2352
		fprintf(stderr,
2353
			"InnoDB: Warning: cannot delete tablespace %lu"
2354
			" in DISCARD TABLESPACE.\n"
2355
			"InnoDB: But let us remove the"
2356
			" insert buffer entries for this tablespace.\n",
2357
			(ulong) id);
2358
	}
2359
2360
	/* Remove all insert buffer entries for the tablespace */
2361
2362
	ibuf_delete_for_discarded_space(id);
2363
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2364
	return(success);
1 by brian
clean slate
2365
}
2366
2367
/***********************************************************************
2368
Renames the memory cache structures of a single-table tablespace. */
2369
static
2370
ibool
2371
fil_rename_tablespace_in_mem(
2372
/*=========================*/
2373
				/* out: TRUE if success */
2374
	fil_space_t*	space,	/* in: tablespace memory object */
2375
	fil_node_t*	node,	/* in: file node of that tablespace */
2376
	const char*	path)	/* in: new name */
2377
{
2378
	fil_system_t*	system		= fil_system;
2379
	fil_space_t*	space2;
2380
	const char*	old_name	= space->name;
2381
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2382
	space2 = fil_space_get_by_name(old_name);
1 by brian
clean slate
2383
	if (space != space2) {
2384
		fputs("InnoDB: Error: cannot find ", stderr);
2385
		ut_print_filename(stderr, old_name);
2386
		fputs(" in tablespace memory cache\n", stderr);
2387
2388
		return(FALSE);
2389
	}
2390
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2391
	space2 = fil_space_get_by_name(path);
1 by brian
clean slate
2392
	if (space2 != NULL) {
2393
		fputs("InnoDB: Error: ", stderr);
2394
		ut_print_filename(stderr, path);
2395
		fputs(" is already in tablespace memory cache\n", stderr);
2396
2397
		return(FALSE);
2398
	}
2399
2400
	HASH_DELETE(fil_space_t, name_hash, system->name_hash,
2401
		    ut_fold_string(space->name), space);
2402
	mem_free(space->name);
2403
	mem_free(node->name);
2404
2405
	space->name = mem_strdup(path);
2406
	node->name = mem_strdup(path);
2407
2408
	HASH_INSERT(fil_space_t, name_hash, system->name_hash,
2409
		    ut_fold_string(path), space);
2410
	return(TRUE);
2411
}
2412
2413
/***********************************************************************
2414
Allocates a file name for a single-table tablespace. The string must be freed
2415
by caller with mem_free(). */
2416
static
2417
char*
2418
fil_make_ibd_name(
2419
/*==============*/
2420
					/* out, own: file name */
2421
	const char*	name,		/* in: table name or a dir path of a
2422
					TEMPORARY table */
2423
	ibool		is_temp)	/* in: TRUE if it is a dir path */
2424
{
2425
	ulint	namelen		= strlen(name);
2426
	ulint	dirlen		= strlen(fil_path_to_mysql_datadir);
2427
	char*	filename	= mem_alloc(namelen + dirlen + sizeof "/.ibd");
2428
2429
	if (is_temp) {
2430
		memcpy(filename, name, namelen);
2431
		memcpy(filename + namelen, ".ibd", sizeof ".ibd");
2432
	} else {
2433
		memcpy(filename, fil_path_to_mysql_datadir, dirlen);
2434
		filename[dirlen] = '/';
2435
2436
		memcpy(filename + dirlen + 1, name, namelen);
2437
		memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
2438
	}
2439
2440
	srv_normalize_path_for_win(filename);
2441
2442
	return(filename);
2443
}
2444
2445
/***********************************************************************
2446
Renames a single-table tablespace. The tablespace must be cached in the
2447
tablespace memory cache. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2448
UNIV_INTERN
1 by brian
clean slate
2449
ibool
2450
fil_rename_tablespace(
2451
/*==================*/
2452
					/* out: TRUE if success */
2453
	const char*	old_name,	/* in: old table name in the standard
2454
					databasename/tablename format of
2455
					InnoDB, or NULL if we do the rename
2456
					based on the space id only */
2457
	ulint		id,		/* in: space id */
2458
	const char*	new_name)	/* in: new table name in the standard
2459
					databasename/tablename format
2460
					of InnoDB */
2461
{
2462
	fil_system_t*	system		= fil_system;
2463
	ibool		success;
2464
	fil_space_t*	space;
2465
	fil_node_t*	node;
2466
	ulint		count		= 0;
2467
	char*		path;
2468
	ibool		old_name_was_specified		= TRUE;
2469
	char*		old_path;
2470
2471
	ut_a(id != 0);
2472
2473
	if (old_name == NULL) {
2474
		old_name = "(name not specified)";
2475
		old_name_was_specified = FALSE;
2476
	}
2477
retry:
2478
	count++;
2479
2480
	if (count > 1000) {
2481
		ut_print_timestamp(stderr);
2482
		fputs("  InnoDB: Warning: problems renaming ", stderr);
2483
		ut_print_filename(stderr, old_name);
2484
		fputs(" to ", stderr);
2485
		ut_print_filename(stderr, new_name);
2486
		fprintf(stderr, ", %lu iterations\n", (ulong) count);
2487
	}
2488
2489
	mutex_enter(&(system->mutex));
2490
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2491
	space = fil_space_get_by_id(id);
1 by brian
clean slate
2492
2493
	if (space == NULL) {
2494
		fprintf(stderr,
2495
			"InnoDB: Error: cannot find space id %lu"
2496
			" in the tablespace memory cache\n"
2497
			"InnoDB: though the table ", (ulong) id);
2498
		ut_print_filename(stderr, old_name);
2499
		fputs(" in a rename operation should have that id\n", stderr);
2500
		mutex_exit(&(system->mutex));
2501
2502
		return(FALSE);
2503
	}
2504
2505
	if (count > 25000) {
2506
		space->stop_ios = FALSE;
2507
		mutex_exit(&(system->mutex));
2508
2509
		return(FALSE);
2510
	}
2511
2512
	/* We temporarily close the .ibd file because we do not trust that
2513
	operating systems can rename an open file. For the closing we have to
2514
	wait until there are no pending i/o's or flushes on the file. */
2515
2516
	space->stop_ios = TRUE;
2517
2518
	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2519
	node = UT_LIST_GET_FIRST(space->chain);
2520
2521
	if (node->n_pending > 0 || node->n_pending_flushes > 0) {
2522
		/* There are pending i/o's or flushes, sleep for a while and
2523
		retry */
2524
2525
		mutex_exit(&(system->mutex));
2526
2527
		os_thread_sleep(20000);
2528
2529
		goto retry;
2530
2531
	} else if (node->modification_counter > node->flush_counter) {
2532
		/* Flush the space */
2533
2534
		mutex_exit(&(system->mutex));
2535
2536
		os_thread_sleep(20000);
2537
2538
		fil_flush(id);
2539
2540
		goto retry;
2541
2542
	} else if (node->open) {
2543
		/* Close the file */
2544
2545
		fil_node_close_file(node, system);
2546
	}
2547
2548
	/* Check that the old name in the space is right */
2549
2550
	if (old_name_was_specified) {
2551
		old_path = fil_make_ibd_name(old_name, FALSE);
2552
2553
		ut_a(strcmp(space->name, old_path) == 0);
2554
		ut_a(strcmp(node->name, old_path) == 0);
2555
	} else {
2556
		old_path = mem_strdup(space->name);
2557
	}
2558
2559
	/* Rename the tablespace and the node in the memory cache */
2560
	path = fil_make_ibd_name(new_name, FALSE);
2561
	success = fil_rename_tablespace_in_mem(space, node, path);
2562
2563
	if (success) {
2564
		success = os_file_rename(old_path, path);
2565
2566
		if (!success) {
2567
			/* We have to revert the changes we made
2568
			to the tablespace memory cache */
2569
2570
			ut_a(fil_rename_tablespace_in_mem(space, node,
2571
							  old_path));
2572
		}
2573
	}
2574
2575
	mem_free(path);
2576
	mem_free(old_path);
2577
2578
	space->stop_ios = FALSE;
2579
2580
	mutex_exit(&(system->mutex));
2581
2582
#ifndef UNIV_HOTBACKUP
2583
	if (success) {
2584
		mtr_t		mtr;
2585
2586
		mtr_start(&mtr);
2587
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2588
		fil_op_write_log(MLOG_FILE_RENAME, id, 0, old_name, new_name,
1 by brian
clean slate
2589
				 &mtr);
2590
		mtr_commit(&mtr);
2591
	}
2592
#endif
2593
	return(success);
2594
}
2595
2596
/***********************************************************************
2597
Creates a new single-table tablespace to a database directory of MySQL.
2598
Database directories are under the 'datadir' of MySQL. The datadir is the
2599
directory of a running mysqld program. We can refer to it by simply the
2600
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
2601
dir of the mysqld server. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2602
UNIV_INTERN
1 by brian
clean slate
2603
ulint
2604
fil_create_new_single_table_tablespace(
2605
/*===================================*/
2606
					/* out: DB_SUCCESS or error code */
2607
	ulint*		space_id,	/* in/out: space id; if this is != 0,
2608
					then this is an input parameter,
2609
					otherwise output */
2610
	const char*	tablename,	/* in: the table name in the usual
2611
					databasename/tablename format
2612
					of InnoDB, or a dir path to a temp
2613
					table */
2614
	ibool		is_temp,	/* in: TRUE if a table created with
2615
					CREATE TEMPORARY TABLE */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2616
	ulint		flags,		/* in: tablespace flags */
1 by brian
clean slate
2617
	ulint		size)		/* in: the initial size of the
2618
					tablespace file in pages,
2619
					must be >= FIL_IBD_FILE_INITIAL_SIZE */
2620
{
2621
	os_file_t	file;
2622
	ibool		ret;
2623
	ulint		err;
2624
	byte*		buf2;
2625
	byte*		page;
2626
	ibool		success;
2627
	char*		path;
2628
2629
	ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2630
	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
2631
	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
2632
	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
2633
	format, the tablespace flags should equal table->flags. */
2634
	ut_a(flags != DICT_TF_COMPACT);
1 by brian
clean slate
2635
2636
	path = fil_make_ibd_name(tablename, is_temp);
2637
2638
	file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL,
2639
			      OS_DATA_FILE, &ret);
2640
	if (ret == FALSE) {
2641
		ut_print_timestamp(stderr);
2642
		fputs("  InnoDB: Error creating file ", stderr);
2643
		ut_print_filename(stderr, path);
2644
		fputs(".\n", stderr);
2645
2646
		/* The following call will print an error message */
2647
2648
		err = os_file_get_last_error(TRUE);
2649
2650
		if (err == OS_FILE_ALREADY_EXISTS) {
2651
			fputs("InnoDB: The file already exists though"
2652
			      " the corresponding table did not\n"
2653
			      "InnoDB: exist in the InnoDB data dictionary."
2654
			      " Have you moved InnoDB\n"
2655
			      "InnoDB: .ibd files around without using the"
2656
			      " SQL commands\n"
2657
			      "InnoDB: DISCARD TABLESPACE and"
2658
			      " IMPORT TABLESPACE, or did\n"
2659
			      "InnoDB: mysqld crash in the middle of"
2660
			      " CREATE TABLE? You can\n"
2661
			      "InnoDB: resolve the problem by"
2662
			      " removing the file ", stderr);
2663
			ut_print_filename(stderr, path);
2664
			fputs("\n"
2665
			      "InnoDB: under the 'datadir' of MySQL.\n",
2666
			      stderr);
2667
2668
			mem_free(path);
2669
			return(DB_TABLESPACE_ALREADY_EXISTS);
2670
		}
2671
2672
		if (err == OS_FILE_DISK_FULL) {
2673
2674
			mem_free(path);
2675
			return(DB_OUT_OF_FILE_SPACE);
2676
		}
2677
2678
		mem_free(path);
2679
		return(DB_ERROR);
2680
	}
2681
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2682
	buf2 = ut_malloc(3 * UNIV_PAGE_SIZE);
1 by brian
clean slate
2683
	/* Align the memory for file i/o if we might have O_DIRECT set */
2684
	page = ut_align(buf2, UNIV_PAGE_SIZE);
2685
2686
	ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
2687
2688
	if (!ret) {
2689
		ut_free(buf2);
2690
		os_file_close(file);
2691
		os_file_delete(path);
2692
2693
		mem_free(path);
2694
		return(DB_OUT_OF_FILE_SPACE);
2695
	}
2696
2697
	if (*space_id == 0) {
2698
		*space_id = fil_assign_new_space_id();
2699
	}
2700
2701
	/* printf("Creating tablespace %s id %lu\n", path, *space_id); */
2702
2703
	if (*space_id == ULINT_UNDEFINED) {
2704
		ut_free(buf2);
2705
error_exit:
2706
		os_file_close(file);
2707
error_exit2:
2708
		os_file_delete(path);
2709
2710
		mem_free(path);
2711
		return(DB_ERROR);
2712
	}
2713
2714
	/* We have to write the space id to the file immediately and flush the
2715
	file to disk. This is because in crash recovery we must be aware what
2716
	tablespaces exist and what are their space id's, so that we can apply
2717
	the log records to the right file. It may take quite a while until
2718
	buffer pool flush algorithms write anything to the file and flush it to
2719
	disk. If we would not write here anything, the file would be filled
2720
	with zeros from the call of os_file_set_size(), until a buffer pool
2721
	flush would write to it. */
2722
2723
	memset(page, '\0', UNIV_PAGE_SIZE);
2724
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2725
	fsp_header_init_fields(page, *space_id, flags);
2726
	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, *space_id);
2727
2728
	if (!(flags & DICT_TF_ZSSIZE_MASK)) {
2729
		buf_flush_init_for_writing(page, NULL, 0);
2730
		ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
2731
	} else {
2732
		page_zip_des_t	page_zip;
2733
		ulint		zip_size;
2734
2735
		zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
2736
			    << ((flags & DICT_TF_ZSSIZE_MASK)
2737
				>> DICT_TF_ZSSIZE_SHIFT));
2738
2739
		page_zip_set_size(&page_zip, zip_size);
2740
		page_zip.data = page + UNIV_PAGE_SIZE;
2741
#ifdef UNIV_DEBUG
2742
		page_zip.m_start =
2743
#endif /* UNIV_DEBUG */
2744
			page_zip.m_end = page_zip.m_nonempty =
2745
			page_zip.n_blobs = 0;
2746
		buf_flush_init_for_writing(page, &page_zip, 0);
2747
		ret = os_file_write(path, file, page_zip.data, 0, 0, zip_size);
2748
	}
1 by brian
clean slate
2749
2750
	ut_free(buf2);
2751
2752
	if (!ret) {
2753
		fputs("InnoDB: Error: could not write the first page"
2754
		      " to tablespace ", stderr);
2755
		ut_print_filename(stderr, path);
2756
		putc('\n', stderr);
2757
		goto error_exit;
2758
	}
2759
2760
	ret = os_file_flush(file);
2761
2762
	if (!ret) {
2763
		fputs("InnoDB: Error: file flush of tablespace ", stderr);
2764
		ut_print_filename(stderr, path);
2765
		fputs(" failed\n", stderr);
2766
		goto error_exit;
2767
	}
2768
2769
	os_file_close(file);
2770
2771
	if (*space_id == ULINT_UNDEFINED) {
2772
		goto error_exit2;
2773
	}
2774
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2775
	success = fil_space_create(path, *space_id, flags, FIL_TABLESPACE);
1 by brian
clean slate
2776
2777
	if (!success) {
2778
		goto error_exit2;
2779
	}
2780
2781
	fil_node_create(path, size, *space_id, FALSE);
2782
2783
#ifndef UNIV_HOTBACKUP
2784
	{
2785
		mtr_t		mtr;
2786
2787
		mtr_start(&mtr);
2788
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2789
		fil_op_write_log(flags
2790
				 ? MLOG_FILE_CREATE2
2791
				 : MLOG_FILE_CREATE,
2792
				 *space_id, flags,
2793
				 tablename, NULL, &mtr);
1 by brian
clean slate
2794
2795
		mtr_commit(&mtr);
2796
	}
2797
#endif
2798
	mem_free(path);
2799
	return(DB_SUCCESS);
2800
}
2801
2802
/************************************************************************
2803
It is possible, though very improbable, that the lsn's in the tablespace to be
2804
imported have risen above the current system lsn, if a lengthy purge, ibuf
2805
merge, or rollback was performed on a backup taken with ibbackup. If that is
2806
the case, reset page lsn's in the file. We assume that mysqld was shut down
2807
after it performed these cleanup operations on the .ibd file, so that it at
2808
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
2809
first page of the .ibd file, and we can determine whether we need to reset the
2810
lsn's just by looking at that flush lsn. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2811
UNIV_INTERN
1 by brian
clean slate
2812
ibool
2813
fil_reset_too_high_lsns(
2814
/*====================*/
2815
					/* out: TRUE if success */
2816
	const char*	name,		/* in: table name in the
2817
					databasename/tablename format */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2818
	ib_uint64_t	current_lsn)	/* in: reset lsn's if the lsn stamped
1 by brian
clean slate
2819
					to FIL_PAGE_FILE_FLUSH_LSN in the
2820
					first page is too high */
2821
{
2822
	os_file_t	file;
2823
	char*		filepath;
2824
	byte*		page;
2825
	byte*		buf2;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2826
	ib_uint64_t	flush_lsn;
1 by brian
clean slate
2827
	ulint		space_id;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2828
	ib_int64_t	file_size;
2829
	ib_int64_t	offset;
2830
	ulint		zip_size;
1 by brian
clean slate
2831
	ibool		success;
2832
2833
	filepath = fil_make_ibd_name(name, FALSE);
2834
2835
	file = os_file_create_simple_no_error_handling(
2836
		filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
2837
	if (!success) {
2838
		/* The following call prints an error message */
2839
		os_file_get_last_error(TRUE);
2840
2841
		ut_print_timestamp(stderr);
2842
2843
		fputs("  InnoDB: Error: trying to open a table,"
2844
		      " but could not\n"
2845
		      "InnoDB: open the tablespace file ", stderr);
2846
		ut_print_filename(stderr, filepath);
2847
		fputs("!\n", stderr);
2848
		mem_free(filepath);
2849
2850
		return(FALSE);
2851
	}
2852
2853
	/* Read the first page of the tablespace */
2854
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2855
	buf2 = ut_malloc(3 * UNIV_PAGE_SIZE);
1 by brian
clean slate
2856
	/* Align the memory for file i/o if we might have O_DIRECT set */
2857
	page = ut_align(buf2, UNIV_PAGE_SIZE);
2858
2859
	success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2860
	if (!success) {
2861
2862
		goto func_exit;
2863
	}
2864
2865
	/* We have to read the file flush lsn from the header of the file */
2866
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2867
	flush_lsn = mach_read_ull(page + FIL_PAGE_FILE_FLUSH_LSN);
1 by brian
clean slate
2868
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2869
	if (current_lsn >= flush_lsn) {
1 by brian
clean slate
2870
		/* Ok */
2871
		success = TRUE;
2872
2873
		goto func_exit;
2874
	}
2875
2876
	space_id = fsp_header_get_space_id(page);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2877
	zip_size = fsp_header_get_zip_size(page);
1 by brian
clean slate
2878
2879
	ut_print_timestamp(stderr);
2880
	fprintf(stderr,
2881
		"  InnoDB: Flush lsn in the tablespace file %lu"
2882
		" to be imported\n"
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2883
		"InnoDB: is %"PRIu64", which exceeds current"
2884
		" system lsn %"PRIu64".\n"
1 by brian
clean slate
2885
		"InnoDB: We reset the lsn's in the file ",
2886
		(ulong) space_id,
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2887
		flush_lsn, current_lsn);
1 by brian
clean slate
2888
	ut_print_filename(stderr, filepath);
2889
	fputs(".\n", stderr);
2890
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2891
	ut_a(ut_is_2pow(zip_size));
2892
	ut_a(zip_size <= UNIV_PAGE_SIZE);
2893
1 by brian
clean slate
2894
	/* Loop through all the pages in the tablespace and reset the lsn and
2895
	the page checksum if necessary */
2896
2897
	file_size = os_file_get_size_as_iblonglong(file);
2898
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2899
	for (offset = 0; offset < file_size;
2900
	     offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
1 by brian
clean slate
2901
		success = os_file_read(file, page,
2902
				       (ulint)(offset & 0xFFFFFFFFUL),
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2903
				       (ulint)(offset >> 32),
2904
				       zip_size ? zip_size : UNIV_PAGE_SIZE);
1 by brian
clean slate
2905
		if (!success) {
2906
2907
			goto func_exit;
2908
		}
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2909
		if (mach_read_ull(page + FIL_PAGE_LSN) > current_lsn) {
1 by brian
clean slate
2910
			/* We have to reset the lsn */
2911
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2912
			if (zip_size) {
2913
				memcpy(page + UNIV_PAGE_SIZE, page, zip_size);
2914
				buf_flush_init_for_writing(
2915
					page, page + UNIV_PAGE_SIZE,
2916
					current_lsn);
2917
			} else {
2918
				buf_flush_init_for_writing(
2919
					page, NULL, current_lsn);
2920
			}
1 by brian
clean slate
2921
			success = os_file_write(filepath, file, page,
2922
						(ulint)(offset & 0xFFFFFFFFUL),
2923
						(ulint)(offset >> 32),
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2924
						zip_size
2925
						? zip_size
2926
						: UNIV_PAGE_SIZE);
1 by brian
clean slate
2927
			if (!success) {
2928
2929
				goto func_exit;
2930
			}
2931
		}
2932
	}
2933
2934
	success = os_file_flush(file);
2935
	if (!success) {
2936
2937
		goto func_exit;
2938
	}
2939
2940
	/* We now update the flush_lsn stamp at the start of the file */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2941
	success = os_file_read(file, page, 0, 0,
2942
			       zip_size ? zip_size : UNIV_PAGE_SIZE);
1 by brian
clean slate
2943
	if (!success) {
2944
2945
		goto func_exit;
2946
	}
2947
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2948
	mach_write_ull(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
1 by brian
clean slate
2949
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2950
	success = os_file_write(filepath, file, page, 0, 0,
2951
				zip_size ? zip_size : UNIV_PAGE_SIZE);
1 by brian
clean slate
2952
	if (!success) {
2953
2954
		goto func_exit;
2955
	}
2956
	success = os_file_flush(file);
2957
func_exit:
2958
	os_file_close(file);
2959
	ut_free(buf2);
2960
	mem_free(filepath);
2961
2962
	return(success);
2963
}
2964
2965
/************************************************************************
2966
Tries to open a single-table tablespace and optionally checks the space id is
2967
right in it. If does not succeed, prints an error message to the .err log. This
2968
function is used to open a tablespace when we start up mysqld, and also in
2969
IMPORT TABLESPACE.
2970
NOTE that we assume this operation is used either at the database startup
2971
or under the protection of the dictionary mutex, so that two users cannot
2972
race here. This operation does not leave the file associated with the
2973
tablespace open, but closes it after we have looked at the space id in it. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2974
UNIV_INTERN
1 by brian
clean slate
2975
ibool
2976
fil_open_single_table_tablespace(
2977
/*=============================*/
2978
					/* out: TRUE if success */
2979
	ibool		check_space_id,	/* in: should we check that the space
2980
					id in the file is right; we assume
2981
					that this function runs much faster
2982
					if no check is made, since accessing
2983
					the file inode probably is much
2984
					faster (the OS caches them) than
2985
					accessing the first page of the file */
2986
	ulint		id,		/* in: space id */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2987
	ulint		flags,		/* in: tablespace flags */
1 by brian
clean slate
2988
	const char*	name)		/* in: table name in the
2989
					databasename/tablename format */
2990
{
2991
	os_file_t	file;
2992
	char*		filepath;
2993
	ibool		success;
2994
	byte*		buf2;
2995
	byte*		page;
2996
	ulint		space_id;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
2997
	ulint		space_flags;
1 by brian
clean slate
2998
	ibool		ret		= TRUE;
2999
3000
	filepath = fil_make_ibd_name(name, FALSE);
3001
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3002
	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
3003
	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
3004
	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
3005
	format, the tablespace flags should equal table->flags. */
3006
	ut_a(flags != DICT_TF_COMPACT);
3007
1 by brian
clean slate
3008
	file = os_file_create_simple_no_error_handling(
3009
		filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
3010
	if (!success) {
3011
		/* The following call prints an error message */
3012
		os_file_get_last_error(TRUE);
3013
3014
		ut_print_timestamp(stderr);
3015
3016
		fputs("  InnoDB: Error: trying to open a table,"
3017
		      " but could not\n"
3018
		      "InnoDB: open the tablespace file ", stderr);
3019
		ut_print_filename(stderr, filepath);
3020
		fputs("!\n"
3021
		      "InnoDB: Have you moved InnoDB .ibd files around"
3022
		      " without using the\n"
3023
		      "InnoDB: commands DISCARD TABLESPACE and"
3024
		      " IMPORT TABLESPACE?\n"
3025
		      "InnoDB: It is also possible that this is"
3026
		      " a temporary table #sql...,\n"
3027
		      "InnoDB: and MySQL removed the .ibd file for this.\n"
3028
		      "InnoDB: Please refer to\n"
3029
		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
3030
		      "innodb-troubleshooting.html\n"
3031
		      "InnoDB: for how to resolve the issue.\n", stderr);
3032
3033
		mem_free(filepath);
3034
3035
		return(FALSE);
3036
	}
3037
3038
	if (!check_space_id) {
3039
		space_id = id;
3040
3041
		goto skip_check;
3042
	}
3043
3044
	/* Read the first page of the tablespace */
3045
3046
	buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
3047
	/* Align the memory for file i/o if we might have O_DIRECT set */
3048
	page = ut_align(buf2, UNIV_PAGE_SIZE);
3049
3050
	success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3051
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3052
	/* We have to read the tablespace id and flags from the file. */
1 by brian
clean slate
3053
3054
	space_id = fsp_header_get_space_id(page);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3055
	space_flags = fsp_header_get_flags(page);
1 by brian
clean slate
3056
3057
	ut_free(buf2);
3058
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3059
	if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {
1 by brian
clean slate
3060
		ut_print_timestamp(stderr);
3061
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3062
		fputs("  InnoDB: Error: tablespace id and flags in file ",
3063
		      stderr);
1 by brian
clean slate
3064
		ut_print_filename(stderr, filepath);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3065
		fprintf(stderr, " are %lu and %lu, but in the InnoDB\n"
3066
			"InnoDB: data dictionary they are %lu and %lu.\n"
1 by brian
clean slate
3067
			"InnoDB: Have you moved InnoDB .ibd files"
3068
			" around without using the\n"
3069
			"InnoDB: commands DISCARD TABLESPACE and"
3070
			" IMPORT TABLESPACE?\n"
3071
			"InnoDB: Please refer to\n"
3072
			"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
3073
			"innodb-troubleshooting.html\n"
3074
			"InnoDB: for how to resolve the issue.\n",
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3075
			(ulong) space_id, (ulong) space_flags,
3076
			(ulong) id, (ulong) flags);
1 by brian
clean slate
3077
3078
		ret = FALSE;
3079
3080
		goto func_exit;
3081
	}
3082
3083
skip_check:
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3084
	success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
1 by brian
clean slate
3085
3086
	if (!success) {
3087
		goto func_exit;
3088
	}
3089
3090
	/* We do not measure the size of the file, that is why we pass the 0
3091
	below */
3092
3093
	fil_node_create(filepath, 0, space_id, FALSE);
3094
func_exit:
3095
	os_file_close(file);
3096
	mem_free(filepath);
3097
3098
	return(ret);
3099
}
3100
3101
#ifdef UNIV_HOTBACKUP
3102
/***********************************************************************
3103
Allocates a file name for an old version of a single-table tablespace.
3104
The string must be freed by caller with mem_free()! */
3105
static
3106
char*
3107
fil_make_ibbackup_old_name(
3108
/*=======================*/
3109
					/* out, own: file name */
3110
	const char*	name)		/* in: original file name */
3111
{
3112
	static const char suffix[] = "_ibbackup_old_vers_";
3113
	ulint	len	= strlen(name);
3114
	char*	path	= mem_alloc(len + (15 + sizeof suffix));
3115
3116
	memcpy(path, name, len);
3117
	memcpy(path + len, suffix, (sizeof suffix) - 1);
3118
	ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
3119
	return(path);
3120
}
3121
#endif /* UNIV_HOTBACKUP */
3122
3123
/************************************************************************
3124
Opens an .ibd file and adds the associated single-table tablespace to the
3125
InnoDB fil0fil.c data structures. */
3126
static
3127
void
3128
fil_load_single_table_tablespace(
3129
/*=============================*/
3130
	const char*	dbname,		/* in: database name */
3131
	const char*	filename)	/* in: file name (not a path),
3132
					including the .ibd extension */
3133
{
3134
	os_file_t	file;
3135
	char*		filepath;
3136
	ibool		success;
3137
	byte*		buf2;
3138
	byte*		page;
3139
	ulint		space_id;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3140
	ulint		flags;
1 by brian
clean slate
3141
	ulint		size_low;
3142
	ulint		size_high;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3143
	ib_int64_t	size;
1 by brian
clean slate
3144
#ifdef UNIV_HOTBACKUP
3145
	fil_space_t*	space;
3146
#endif
3147
	filepath = mem_alloc(strlen(dbname) + strlen(filename)
3148
			     + strlen(fil_path_to_mysql_datadir) + 3);
3149
3150
	sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
3151
		filename);
3152
	srv_normalize_path_for_win(filepath);
3153
#ifdef __WIN__
3154
# ifndef UNIV_HOTBACKUP
3155
	/* If lower_case_table_names is 0 or 2, then MySQL allows database
3156
	directory names with upper case letters. On Windows, all table and
3157
	database names in InnoDB are internally always in lower case. Put the
3158
	file path to lower case, so that we are consistent with InnoDB's
3159
	internal data dictionary. */
3160
3161
	dict_casedn_str(filepath);
3162
# endif /* !UNIV_HOTBACKUP */
3163
#endif
3164
	file = os_file_create_simple_no_error_handling(
3165
		filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
3166
	if (!success) {
3167
		/* The following call prints an error message */
3168
		os_file_get_last_error(TRUE);
3169
3170
		fprintf(stderr,
3171
			"InnoDB: Error: could not open single-table tablespace"
3172
			" file\n"
3173
			"InnoDB: %s!\n"
3174
			"InnoDB: We do not continue the crash recovery,"
3175
			" because the table may become\n"
3176
			"InnoDB: corrupt if we cannot apply the log records"
3177
			" in the InnoDB log to it.\n"
3178
			"InnoDB: To fix the problem and start mysqld:\n"
3179
			"InnoDB: 1) If there is a permission problem"
3180
			" in the file and mysqld cannot\n"
3181
			"InnoDB: open the file, you should"
3182
			" modify the permissions.\n"
3183
			"InnoDB: 2) If the table is not needed, or you can"
3184
			" restore it from a backup,\n"
3185
			"InnoDB: then you can remove the .ibd file,"
3186
			" and InnoDB will do a normal\n"
3187
			"InnoDB: crash recovery and ignore that table.\n"
3188
			"InnoDB: 3) If the file system or the"
3189
			" disk is broken, and you cannot remove\n"
3190
			"InnoDB: the .ibd file, you can set"
3191
			" innodb_force_recovery > 0 in my.cnf\n"
3192
			"InnoDB: and force InnoDB to continue crash"
3193
			" recovery here.\n", filepath);
3194
3195
		mem_free(filepath);
3196
3197
		if (srv_force_recovery > 0) {
3198
			fprintf(stderr,
3199
				"InnoDB: innodb_force_recovery"
3200
				" was set to %lu. Continuing crash recovery\n"
3201
				"InnoDB: even though we cannot access"
3202
				" the .ibd file of this table.\n",
3203
				srv_force_recovery);
3204
			return;
3205
		}
3206
3207
		exit(1);
3208
	}
3209
3210
	success = os_file_get_size(file, &size_low, &size_high);
3211
3212
	if (!success) {
3213
		/* The following call prints an error message */
3214
		os_file_get_last_error(TRUE);
3215
3216
		fprintf(stderr,
3217
			"InnoDB: Error: could not measure the size"
3218
			" of single-table tablespace file\n"
3219
			"InnoDB: %s!\n"
3220
			"InnoDB: We do not continue crash recovery,"
3221
			" because the table will become\n"
3222
			"InnoDB: corrupt if we cannot apply the log records"
3223
			" in the InnoDB log to it.\n"
3224
			"InnoDB: To fix the problem and start mysqld:\n"
3225
			"InnoDB: 1) If there is a permission problem"
3226
			" in the file and mysqld cannot\n"
3227
			"InnoDB: access the file, you should"
3228
			" modify the permissions.\n"
3229
			"InnoDB: 2) If the table is not needed,"
3230
			" or you can restore it from a backup,\n"
3231
			"InnoDB: then you can remove the .ibd file,"
3232
			" and InnoDB will do a normal\n"
3233
			"InnoDB: crash recovery and ignore that table.\n"
3234
			"InnoDB: 3) If the file system or the disk is broken,"
3235
			" and you cannot remove\n"
3236
			"InnoDB: the .ibd file, you can set"
3237
			" innodb_force_recovery > 0 in my.cnf\n"
3238
			"InnoDB: and force InnoDB to continue"
3239
			" crash recovery here.\n", filepath);
3240
3241
		os_file_close(file);
3242
		mem_free(filepath);
3243
3244
		if (srv_force_recovery > 0) {
3245
			fprintf(stderr,
3246
				"InnoDB: innodb_force_recovery"
3247
				" was set to %lu. Continuing crash recovery\n"
3248
				"InnoDB: even though we cannot access"
3249
				" the .ibd file of this table.\n",
3250
				srv_force_recovery);
3251
			return;
3252
		}
3253
3254
		exit(1);
3255
	}
3256
3257
	/* TODO: What to do in other cases where we cannot access an .ibd
3258
	file during a crash recovery? */
3259
3260
	/* Every .ibd file is created >= 4 pages in size. Smaller files
3261
	cannot be ok. */
3262
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3263
	size = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low;
1 by brian
clean slate
3264
#ifndef UNIV_HOTBACKUP
3265
	if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3266
		fprintf(stderr,
3267
			"InnoDB: Error: the size of single-table tablespace"
3268
			" file %s\n"
3269
			"InnoDB: is only %lu %lu, should be at least %lu!",
3270
			filepath,
3271
			(ulong) size_high,
3272
			(ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE));
3273
		os_file_close(file);
3274
		mem_free(filepath);
3275
3276
		return;
3277
	}
3278
#endif
3279
	/* Read the first page of the tablespace if the size big enough */
3280
3281
	buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
3282
	/* Align the memory for file i/o if we might have O_DIRECT set */
3283
	page = ut_align(buf2, UNIV_PAGE_SIZE);
3284
3285
	if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3286
		success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3287
3288
		/* We have to read the tablespace id from the file */
3289
3290
		space_id = fsp_header_get_space_id(page);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3291
		flags = fsp_header_get_flags(page);
1 by brian
clean slate
3292
	} else {
3293
		space_id = ULINT_UNDEFINED;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3294
		flags = 0;
1 by brian
clean slate
3295
	}
3296
3297
#ifndef UNIV_HOTBACKUP
3298
	if (space_id == ULINT_UNDEFINED || space_id == 0) {
3299
		fprintf(stderr,
3300
			"InnoDB: Error: tablespace id %lu in file %s"
3301
			" is not sensible\n",
3302
			(ulong) space_id,
3303
			filepath);
3304
		goto func_exit;
3305
	}
3306
#else
3307
	if (space_id == ULINT_UNDEFINED || space_id == 0) {
3308
		char*	new_path;
3309
3310
		fprintf(stderr,
3311
			"InnoDB: Renaming tablespace %s of id %lu,\n"
3312
			"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3313
			"InnoDB: because its size %lld is too small"
3314
			" (< 4 pages 16 kB each),\n"
3315
			"InnoDB: or the space id in the file header"
3316
			" is not sensible.\n"
3317
			"InnoDB: This can happen in an ibbackup run,"
3318
			" and is not dangerous.\n",
3319
			filepath, space_id, filepath, size);
3320
		os_file_close(file);
3321
3322
		new_path = fil_make_ibbackup_old_name(filepath);
3323
		ut_a(os_file_rename(filepath, new_path));
3324
3325
		ut_free(buf2);
3326
		mem_free(filepath);
3327
		mem_free(new_path);
3328
3329
		return;
3330
	}
3331
3332
	/* A backup may contain the same space several times, if the space got
3333
	renamed at a sensitive time. Since it is enough to have one version of
3334
	the space, we rename the file if a space with the same space id
3335
	already exists in the tablespace memory cache. We rather rename the
3336
	file than delete it, because if there is a bug, we do not want to
3337
	destroy valuable data. */
3338
3339
	mutex_enter(&(fil_system->mutex));
3340
3341
	space = fil_get_space_for_id_low(space_id);
3342
3343
	if (space) {
3344
		char*	new_path;
3345
3346
		fprintf(stderr,
3347
			"InnoDB: Renaming tablespace %s of id %lu,\n"
3348
			"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3349
			"InnoDB: because space %s with the same id\n"
3350
			"InnoDB: was scanned earlier. This can happen"
3351
			" if you have renamed tables\n"
3352
			"InnoDB: during an ibbackup run.\n",
3353
			filepath, space_id, filepath,
3354
			space->name);
3355
		os_file_close(file);
3356
3357
		new_path = fil_make_ibbackup_old_name(filepath);
3358
3359
		mutex_exit(&(fil_system->mutex));
3360
3361
		ut_a(os_file_rename(filepath, new_path));
3362
3363
		ut_free(buf2);
3364
		mem_free(filepath);
3365
		mem_free(new_path);
3366
3367
		return;
3368
	}
3369
	mutex_exit(&(fil_system->mutex));
3370
#endif
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3371
	success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
1 by brian
clean slate
3372
3373
	if (!success) {
3374
3375
		goto func_exit;
3376
	}
3377
3378
	/* We do not use the size information we have about the file, because
3379
	the rounding formula for extents and pages is somewhat complex; we
3380
	let fil_node_open() do that task. */
3381
3382
	fil_node_create(filepath, 0, space_id, FALSE);
3383
func_exit:
3384
	os_file_close(file);
3385
	ut_free(buf2);
3386
	mem_free(filepath);
3387
}
3388
3389
/***************************************************************************
3390
A fault-tolerant function that tries to read the next file name in the
3391
directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
3392
idea is to read as much good data as we can and jump over bad data. */
3393
static
3394
int
3395
fil_file_readdir_next_file(
3396
/*=======================*/
3397
				/* out: 0 if ok, -1 if error even after the
3398
				retries, 1 if at the end of the directory */
3399
	ulint*		err,	/* out: this is set to DB_ERROR if an error
3400
				was encountered, otherwise not changed */
3401
	const char*	dirname,/* in: directory name or path */
3402
	os_file_dir_t	dir,	/* in: directory stream */
3403
	os_file_stat_t*	info)	/* in/out: buffer where the info is returned */
3404
{
3405
	ulint	i;
3406
	int	ret;
3407
3408
	for (i = 0; i < 100; i++) {
3409
		ret = os_file_readdir_next_file(dirname, dir, info);
3410
3411
		if (ret != -1) {
3412
3413
			return(ret);
3414
		}
3415
3416
		fprintf(stderr,
3417
			"InnoDB: Error: os_file_readdir_next_file()"
3418
			" returned -1 in\n"
3419
			"InnoDB: directory %s\n"
3420
			"InnoDB: Crash recovery may have failed"
3421
			" for some .ibd files!\n", dirname);
3422
3423
		*err = DB_ERROR;
3424
	}
3425
3426
	return(-1);
3427
}
3428
3429
/************************************************************************
3430
At the server startup, if we need crash recovery, scans the database
3431
directories under the MySQL datadir, looking for .ibd files. Those files are
3432
single-table tablespaces. We need to know the space id in each of them so that
3433
we know into which file we should look to check the contents of a page stored
3434
in the doublewrite buffer, also to know where to apply log records where the
3435
space id is != 0. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3436
UNIV_INTERN
1 by brian
clean slate
3437
ulint
3438
fil_load_single_table_tablespaces(void)
3439
/*===================================*/
3440
			/* out: DB_SUCCESS or error number */
3441
{
3442
	int		ret;
3443
	char*		dbpath		= NULL;
3444
	ulint		dbpath_len	= 100;
3445
	os_file_dir_t	dir;
3446
	os_file_dir_t	dbdir;
3447
	os_file_stat_t	dbinfo;
3448
	os_file_stat_t	fileinfo;
3449
	ulint		err		= DB_SUCCESS;
3450
3451
	/* The datadir of MySQL is always the default directory of mysqld */
3452
3453
	dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
3454
3455
	if (dir == NULL) {
3456
3457
		return(DB_ERROR);
3458
	}
3459
3460
	dbpath = mem_alloc(dbpath_len);
3461
3462
	/* Scan all directories under the datadir. They are the database
3463
	directories of MySQL. */
3464
3465
	ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
3466
					 &dbinfo);
3467
	while (ret == 0) {
3468
		ulint len;
3469
		/* printf("Looking at %s in datadir\n", dbinfo.name); */
3470
3471
		if (dbinfo.type == OS_FILE_TYPE_FILE
3472
		    || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
3473
3474
			goto next_datadir_item;
3475
		}
3476
3477
		/* We found a symlink or a directory; try opening it to see
3478
		if a symlink is a directory */
3479
3480
		len = strlen(fil_path_to_mysql_datadir)
3481
			+ strlen (dbinfo.name) + 2;
3482
		if (len > dbpath_len) {
3483
			dbpath_len = len;
3484
3485
			if (dbpath) {
3486
				mem_free(dbpath);
3487
			}
3488
3489
			dbpath = mem_alloc(dbpath_len);
3490
		}
3491
		sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir,
3492
			dbinfo.name);
3493
		srv_normalize_path_for_win(dbpath);
3494
3495
		dbdir = os_file_opendir(dbpath, FALSE);
3496
3497
		if (dbdir != NULL) {
3498
			/* printf("Opened dir %s\n", dbinfo.name); */
3499
3500
			/* We found a database directory; loop through it,
3501
			looking for possible .ibd files in it */
3502
3503
			ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
3504
							 &fileinfo);
3505
			while (ret == 0) {
3506
				/* printf(
3507
				"     Looking at file %s\n", fileinfo.name); */
3508
3509
				if (fileinfo.type == OS_FILE_TYPE_DIR) {
3510
3511
					goto next_file_item;
3512
				}
3513
3514
				/* We found a symlink or a file */
3515
				if (strlen(fileinfo.name) > 4
3516
				    && 0 == strcmp(fileinfo.name
3517
						   + strlen(fileinfo.name) - 4,
3518
						   ".ibd")) {
3519
					/* The name ends in .ibd; try opening
3520
					the file */
3521
					fil_load_single_table_tablespace(
3522
						dbinfo.name, fileinfo.name);
3523
				}
3524
next_file_item:
3525
				ret = fil_file_readdir_next_file(&err,
3526
								 dbpath, dbdir,
3527
								 &fileinfo);
3528
			}
3529
3530
			if (0 != os_file_closedir(dbdir)) {
3531
				fputs("InnoDB: Warning: could not"
3532
				      " close database directory ", stderr);
3533
				ut_print_filename(stderr, dbpath);
3534
				putc('\n', stderr);
3535
3536
				err = DB_ERROR;
3537
			}
3538
		}
3539
3540
next_datadir_item:
3541
		ret = fil_file_readdir_next_file(&err,
3542
						 fil_path_to_mysql_datadir,
3543
						 dir, &dbinfo);
3544
	}
3545
3546
	mem_free(dbpath);
3547
3548
	if (0 != os_file_closedir(dir)) {
3549
		fprintf(stderr,
3550
			"InnoDB: Error: could not close MySQL datadir\n");
3551
3552
		return(DB_ERROR);
3553
	}
3554
3555
	return(err);
3556
}
3557
3558
/************************************************************************
3559
If we need crash recovery, and we have called
3560
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
3561
we can call this function to print an error message of orphaned .ibd files
3562
for which there is not a data dictionary entry with a matching table name
3563
and space id. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3564
UNIV_INTERN
1 by brian
clean slate
3565
void
3566
fil_print_orphaned_tablespaces(void)
3567
/*================================*/
3568
{
3569
	fil_system_t*	system		= fil_system;
3570
	fil_space_t*	space;
3571
3572
	mutex_enter(&(system->mutex));
3573
3574
	space = UT_LIST_GET_FIRST(system->space_list);
3575
3576
	while (space) {
3577
		if (space->purpose == FIL_TABLESPACE && space->id != 0
3578
		    && !space->mark) {
3579
			fputs("InnoDB: Warning: tablespace ", stderr);
3580
			ut_print_filename(stderr, space->name);
3581
			fprintf(stderr, " of id %lu has no matching table in\n"
3582
				"InnoDB: the InnoDB data dictionary.\n",
3583
				(ulong) space->id);
3584
		}
3585
3586
		space = UT_LIST_GET_NEXT(space_list, space);
3587
	}
3588
3589
	mutex_exit(&(system->mutex));
3590
}
3591
3592
/***********************************************************************
3593
Returns TRUE if a single-table tablespace does not exist in the memory cache,
3594
or is being deleted there. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3595
UNIV_INTERN
1 by brian
clean slate
3596
ibool
3597
fil_tablespace_deleted_or_being_deleted_in_mem(
3598
/*===========================================*/
3599
				/* out: TRUE if does not exist or is being\
3600
				deleted */
3601
	ulint		id,	/* in: space id */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3602
	ib_int64_t	version)/* in: tablespace_version should be this; if
1 by brian
clean slate
3603
				you pass -1 as the value of this, then this
3604
				parameter is ignored */
3605
{
3606
	fil_system_t*	system	= fil_system;
3607
	fil_space_t*	space;
3608
3609
	ut_ad(system);
3610
3611
	mutex_enter(&(system->mutex));
3612
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3613
	space = fil_space_get_by_id(id);
1 by brian
clean slate
3614
3615
	if (space == NULL || space->is_being_deleted) {
3616
		mutex_exit(&(system->mutex));
3617
3618
		return(TRUE);
3619
	}
3620
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3621
	if (version != ((ib_int64_t)-1)
1 by brian
clean slate
3622
	    && space->tablespace_version != version) {
3623
		mutex_exit(&(system->mutex));
3624
3625
		return(TRUE);
3626
	}
3627
3628
	mutex_exit(&(system->mutex));
3629
3630
	return(FALSE);
3631
}
3632
3633
/***********************************************************************
3634
Returns TRUE if a single-table tablespace exists in the memory cache. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3635
UNIV_INTERN
1 by brian
clean slate
3636
ibool
3637
fil_tablespace_exists_in_mem(
3638
/*=========================*/
3639
			/* out: TRUE if exists */
3640
	ulint	id)	/* in: space id */
3641
{
3642
	fil_system_t*	system		= fil_system;
3643
	fil_space_t*	space;
3644
3645
	ut_ad(system);
3646
3647
	mutex_enter(&(system->mutex));
3648
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3649
	space = fil_space_get_by_id(id);
1 by brian
clean slate
3650
3651
	if (space == NULL) {
3652
		mutex_exit(&(system->mutex));
3653
3654
		return(FALSE);
3655
	}
3656
3657
	mutex_exit(&(system->mutex));
3658
3659
	return(TRUE);
3660
}
3661
3662
/***********************************************************************
3663
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
3664
cache. Note that if we have not done a crash recovery at the database startup,
3665
there may be many tablespaces which are not yet in the memory cache. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3666
UNIV_INTERN
1 by brian
clean slate
3667
ibool
3668
fil_space_for_table_exists_in_mem(
3669
/*==============================*/
3670
					/* out: TRUE if a matching tablespace
3671
					exists in the memory cache */
3672
	ulint		id,		/* in: space id */
3673
	const char*	name,		/* in: table name in the standard
3674
					'databasename/tablename' format or
3675
					the dir path to a temp table */
3676
	ibool		is_temp,	/* in: TRUE if created with CREATE
3677
					TEMPORARY TABLE */
3678
	ibool		mark_space,	/* in: in crash recovery, at database
3679
					startup we mark all spaces which have
3680
					an associated table in the InnoDB
3681
					data dictionary, so that
3682
					we can print a warning about orphaned
3683
					tablespaces */
3684
	ibool		print_error_if_does_not_exist)
3685
					/* in: print detailed error
3686
					information to the .err log if a
3687
					matching tablespace is not found from
3688
					memory */
3689
{
3690
	fil_system_t*	system		= fil_system;
3691
	fil_space_t*	namespace;
3692
	fil_space_t*	space;
3693
	char*		path;
3694
3695
	ut_ad(system);
3696
3697
	mutex_enter(&(system->mutex));
3698
3699
	path = fil_make_ibd_name(name, is_temp);
3700
3701
	/* Look if there is a space with the same id */
3702
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3703
	space = fil_space_get_by_id(id);
1 by brian
clean slate
3704
3705
	/* Look if there is a space with the same name; the name is the
3706
	directory path from the datadir to the file */
3707
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3708
	namespace = fil_space_get_by_name(path);
1 by brian
clean slate
3709
	if (space && space == namespace) {
3710
		/* Found */
3711
3712
		if (mark_space) {
3713
			space->mark = TRUE;
3714
		}
3715
3716
		mem_free(path);
3717
		mutex_exit(&(system->mutex));
3718
3719
		return(TRUE);
3720
	}
3721
3722
	if (!print_error_if_does_not_exist) {
3723
3724
		mem_free(path);
3725
		mutex_exit(&(system->mutex));
3726
3727
		return(FALSE);
3728
	}
3729
3730
	if (space == NULL) {
3731
		if (namespace == NULL) {
3732
			ut_print_timestamp(stderr);
3733
			fputs("  InnoDB: Error: table ", stderr);
3734
			ut_print_filename(stderr, name);
3735
			fprintf(stderr, "\n"
3736
				"InnoDB: in InnoDB data dictionary"
3737
				" has tablespace id %lu,\n"
3738
				"InnoDB: but tablespace with that id"
3739
				" or name does not exist. Have\n"
3740
				"InnoDB: you deleted or moved .ibd files?\n"
3741
				"InnoDB: This may also be a table created with"
3742
				" CREATE TEMPORARY TABLE\n"
3743
				"InnoDB: whose .ibd and .frm files"
3744
				" MySQL automatically removed, but the\n"
3745
				"InnoDB: table still exists in the"
3746
				" InnoDB internal data dictionary.\n",
3747
				(ulong) id);
3748
		} else {
3749
			ut_print_timestamp(stderr);
3750
			fputs("  InnoDB: Error: table ", stderr);
3751
			ut_print_filename(stderr, name);
3752
			fprintf(stderr, "\n"
3753
				"InnoDB: in InnoDB data dictionary has"
3754
				" tablespace id %lu,\n"
3755
				"InnoDB: but a tablespace with that id"
3756
				" does not exist. There is\n"
3757
				"InnoDB: a tablespace of name %s and id %lu,"
3758
				" though. Have\n"
3759
				"InnoDB: you deleted or moved .ibd files?\n",
3760
				(ulong) id, namespace->name,
3761
				(ulong) namespace->id);
3762
		}
3763
error_exit:
3764
		fputs("InnoDB: Please refer to\n"
3765
		      "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
3766
		      "innodb-troubleshooting.html\n"
3767
		      "InnoDB: for how to resolve the issue.\n", stderr);
3768
3769
		mem_free(path);
3770
		mutex_exit(&(system->mutex));
3771
3772
		return(FALSE);
3773
	}
3774
3775
	if (0 != strcmp(space->name, path)) {
3776
		ut_print_timestamp(stderr);
3777
		fputs("  InnoDB: Error: table ", stderr);
3778
		ut_print_filename(stderr, name);
3779
		fprintf(stderr, "\n"
3780
			"InnoDB: in InnoDB data dictionary has"
3781
			" tablespace id %lu,\n"
3782
			"InnoDB: but the tablespace with that id"
3783
			" has name %s.\n"
3784
			"InnoDB: Have you deleted or moved .ibd files?\n",
3785
			(ulong) id, space->name);
3786
3787
		if (namespace != NULL) {
3788
			fputs("InnoDB: There is a tablespace"
3789
			      " with the right name\n"
3790
			      "InnoDB: ", stderr);
3791
			ut_print_filename(stderr, namespace->name);
3792
			fprintf(stderr, ", but its id is %lu.\n",
3793
				(ulong) namespace->id);
3794
		}
3795
3796
		goto error_exit;
3797
	}
3798
3799
	mem_free(path);
3800
	mutex_exit(&(system->mutex));
3801
3802
	return(FALSE);
3803
}
3804
3805
/***********************************************************************
3806
Checks if a single-table tablespace for a given table name exists in the
3807
tablespace memory cache. */
3808
static
3809
ulint
3810
fil_get_space_id_for_table(
3811
/*=======================*/
3812
				/* out: space id, ULINT_UNDEFINED if not
3813
				found */
3814
	const char*	name)	/* in: table name in the standard
3815
				'databasename/tablename' format */
3816
{
3817
	fil_system_t*	system		= fil_system;
3818
	fil_space_t*	namespace;
3819
	ulint		id		= ULINT_UNDEFINED;
3820
	char*		path;
3821
3822
	ut_ad(system);
3823
3824
	mutex_enter(&(system->mutex));
3825
3826
	path = fil_make_ibd_name(name, FALSE);
3827
3828
	/* Look if there is a space with the same name; the name is the
3829
	directory path to the file */
3830
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3831
	namespace = fil_space_get_by_name(path);
3832
1 by brian
clean slate
3833
	if (namespace) {
3834
		id = namespace->id;
3835
	}
3836
3837
	mem_free(path);
3838
3839
	mutex_exit(&(system->mutex));
3840
3841
	return(id);
3842
}
3843
3844
/**************************************************************************
3845
Tries to extend a data file so that it would accommodate the number of pages
3846
given. The tablespace must be cached in the memory cache. If the space is big
3847
enough already, does nothing. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3848
UNIV_INTERN
1 by brian
clean slate
3849
ibool
3850
fil_extend_space_to_desired_size(
3851
/*=============================*/
3852
				/* out: TRUE if success */
3853
	ulint*	actual_size,	/* out: size of the space after extension;
3854
				if we ran out of disk space this may be lower
3855
				than the desired size */
3856
	ulint	space_id,	/* in: space id */
3857
	ulint	size_after_extend)/* in: desired size in pages after the
3858
				extension; if the current space size is bigger
3859
				than this already, the function does nothing */
3860
{
3861
	fil_system_t*	system		= fil_system;
3862
	fil_node_t*	node;
3863
	fil_space_t*	space;
3864
	byte*		buf2;
3865
	byte*		buf;
3866
	ulint		buf_size;
3867
	ulint		start_page_no;
3868
	ulint		file_start_page_no;
3869
	ulint		offset_high;
3870
	ulint		offset_low;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3871
	ulint		page_size;
1 by brian
clean slate
3872
	ibool		success		= TRUE;
3873
3874
	fil_mutex_enter_and_prepare_for_io(space_id);
3875
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3876
	space = fil_space_get_by_id(space_id);
1 by brian
clean slate
3877
	ut_a(space);
3878
3879
	if (space->size >= size_after_extend) {
3880
		/* Space already big enough */
3881
3882
		*actual_size = space->size;
3883
3884
		mutex_exit(&(system->mutex));
3885
3886
		return(TRUE);
3887
	}
3888
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3889
	page_size = dict_table_flags_to_zip_size(space->flags);
3890
	if (!page_size) {
3891
		page_size = UNIV_PAGE_SIZE;
3892
	}
3893
1 by brian
clean slate
3894
	node = UT_LIST_GET_LAST(space->chain);
3895
3896
	fil_node_prepare_for_io(node, system, space);
3897
3898
	start_page_no = space->size;
3899
	file_start_page_no = space->size - node->size;
3900
3901
	/* Extend at most 64 pages at a time */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3902
	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
3903
	buf2 = mem_alloc(buf_size + page_size);
3904
	buf = ut_align(buf2, page_size);
1 by brian
clean slate
3905
3906
	memset(buf, 0, buf_size);
3907
3908
	while (start_page_no < size_after_extend) {
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3909
		ulint	n_pages = ut_min(buf_size / page_size,
1 by brian
clean slate
3910
					 size_after_extend - start_page_no);
3911
3912
		offset_high = (start_page_no - file_start_page_no)
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3913
			/ (4096 * ((1024 * 1024) / page_size));
1 by brian
clean slate
3914
		offset_low  = ((start_page_no - file_start_page_no)
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3915
			       % (4096 * ((1024 * 1024) / page_size)))
3916
			* page_size;
1 by brian
clean slate
3917
#ifdef UNIV_HOTBACKUP
3918
		success = os_file_write(node->name, node->handle, buf,
3919
					offset_low, offset_high,
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3920
					page_size * n_pages);
1 by brian
clean slate
3921
#else
3922
		success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
3923
				 node->name, node->handle, buf,
3924
				 offset_low, offset_high,
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3925
				 page_size * n_pages,
1 by brian
clean slate
3926
				 NULL, NULL);
3927
#endif
3928
		if (success) {
3929
			node->size += n_pages;
3930
			space->size += n_pages;
3931
3932
			os_has_said_disk_full = FALSE;
3933
		} else {
3934
			/* Let us measure the size of the file to determine
3935
			how much we were able to extend it */
3936
3937
			n_pages = ((ulint)
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3938
				   (os_file_get_size_as_iblonglong(
3939
					   node->handle)
3940
				    / page_size)) - node->size;
1 by brian
clean slate
3941
3942
			node->size += n_pages;
3943
			space->size += n_pages;
3944
3945
			break;
3946
		}
3947
3948
		start_page_no += n_pages;
3949
	}
3950
3951
	mem_free(buf2);
3952
3953
	fil_node_complete_io(node, system, OS_FILE_WRITE);
3954
3955
	*actual_size = space->size;
3956
3957
#ifndef UNIV_HOTBACKUP
3958
	if (space_id == 0) {
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3959
		ulint pages_per_mb = (1024 * 1024) / page_size;
1 by brian
clean slate
3960
3961
		/* Keep the last data file size info up to date, rounded to
3962
		full megabytes */
3963
3964
		srv_data_file_sizes[srv_n_data_files - 1]
3965
			= (node->size / pages_per_mb) * pages_per_mb;
3966
	}
3967
#endif /* !UNIV_HOTBACKUP */
3968
3969
	/*
3970
	printf("Extended %s to %lu, actual size %lu pages\n", space->name,
3971
	size_after_extend, *actual_size); */
3972
	mutex_exit(&(system->mutex));
3973
3974
	fil_flush(space_id);
3975
3976
	return(success);
3977
}
3978
3979
#ifdef UNIV_HOTBACKUP
3980
/************************************************************************
3981
Extends all tablespaces to the size stored in the space header. During the
3982
ibbackup --apply-log phase we extended the spaces on-demand so that log records
3983
could be applied, but that may have left spaces still too small compared to
3984
the size stored in the space header. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
3985
UNIV_INTERN
1 by brian
clean slate
3986
void
3987
fil_extend_tablespaces_to_stored_len(void)
3988
/*======================================*/
3989
{
3990
	fil_system_t*	system		= fil_system;
3991
	fil_space_t*	space;
3992
	byte*		buf;
3993
	ulint		actual_size;
3994
	ulint		size_in_header;
3995
	ulint		error;
3996
	ibool		success;
3997
3998
	buf = mem_alloc(UNIV_PAGE_SIZE);
3999
4000
	mutex_enter(&(system->mutex));
4001
4002
	space = UT_LIST_GET_FIRST(system->space_list);
4003
4004
	while (space) {
4005
		ut_a(space->purpose == FIL_TABLESPACE);
4006
4007
		mutex_exit(&(system->mutex)); /* no need to protect with a
4008
					      mutex, because this is a
4009
					      single-threaded operation */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4010
		error = fil_read(TRUE, space->id, space->zip_size,
4011
				 0, 0, UNIV_PAGE_SIZE, buf, NULL);
1 by brian
clean slate
4012
		ut_a(error == DB_SUCCESS);
4013
4014
		size_in_header = fsp_get_size_low(buf);
4015
4016
		success = fil_extend_space_to_desired_size(
4017
			&actual_size, space->id, size_in_header);
4018
		if (!success) {
4019
			fprintf(stderr,
4020
				"InnoDB: Error: could not extend the"
4021
				" tablespace of %s\n"
4022
				"InnoDB: to the size stored in header,"
4023
				" %lu pages;\n"
4024
				"InnoDB: size after extension %lu pages\n"
4025
				"InnoDB: Check that you have free disk space"
4026
				" and retry!\n",
4027
				space->name, size_in_header, actual_size);
4028
			exit(1);
4029
		}
4030
4031
		mutex_enter(&(system->mutex));
4032
4033
		space = UT_LIST_GET_NEXT(space_list, space);
4034
	}
4035
4036
	mutex_exit(&(system->mutex));
4037
4038
	mem_free(buf);
4039
}
4040
#endif
4041
4042
/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
4043
4044
/***********************************************************************
4045
Tries to reserve free extents in a file space. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4046
UNIV_INTERN
1 by brian
clean slate
4047
ibool
4048
fil_space_reserve_free_extents(
4049
/*===========================*/
4050
				/* out: TRUE if succeed */
4051
	ulint	id,		/* in: space id */
4052
	ulint	n_free_now,	/* in: number of free extents now */
4053
	ulint	n_to_reserve)	/* in: how many one wants to reserve */
4054
{
4055
	fil_system_t*	system		= fil_system;
4056
	fil_space_t*	space;
4057
	ibool		success;
4058
4059
	ut_ad(system);
4060
4061
	mutex_enter(&(system->mutex));
4062
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4063
	space = fil_space_get_by_id(id);
1 by brian
clean slate
4064
4065
	ut_a(space);
4066
4067
	if (space->n_reserved_extents + n_to_reserve > n_free_now) {
4068
		success = FALSE;
4069
	} else {
4070
		space->n_reserved_extents += n_to_reserve;
4071
		success = TRUE;
4072
	}
4073
4074
	mutex_exit(&(system->mutex));
4075
4076
	return(success);
4077
}
4078
4079
/***********************************************************************
4080
Releases free extents in a file space. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4081
UNIV_INTERN
1 by brian
clean slate
4082
void
4083
fil_space_release_free_extents(
4084
/*===========================*/
4085
	ulint	id,		/* in: space id */
4086
	ulint	n_reserved)	/* in: how many one reserved */
4087
{
4088
	fil_system_t*	system		= fil_system;
4089
	fil_space_t*	space;
4090
4091
	ut_ad(system);
4092
4093
	mutex_enter(&(system->mutex));
4094
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4095
	space = fil_space_get_by_id(id);
1 by brian
clean slate
4096
4097
	ut_a(space);
4098
	ut_a(space->n_reserved_extents >= n_reserved);
4099
4100
	space->n_reserved_extents -= n_reserved;
4101
4102
	mutex_exit(&(system->mutex));
4103
}
4104
4105
/***********************************************************************
4106
Gets the number of reserved extents. If the database is silent, this number
4107
should be zero. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4108
UNIV_INTERN
1 by brian
clean slate
4109
ulint
4110
fil_space_get_n_reserved_extents(
4111
/*=============================*/
4112
	ulint	id)		/* in: space id */
4113
{
4114
	fil_system_t*	system		= fil_system;
4115
	fil_space_t*	space;
4116
	ulint		n;
4117
4118
	ut_ad(system);
4119
4120
	mutex_enter(&(system->mutex));
4121
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4122
	space = fil_space_get_by_id(id);
1 by brian
clean slate
4123
4124
	ut_a(space);
4125
4126
	n = space->n_reserved_extents;
4127
4128
	mutex_exit(&(system->mutex));
4129
4130
	return(n);
4131
}
4132
4133
/*============================ FILE I/O ================================*/
4134
4135
/************************************************************************
4136
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
4137
4138
Prepares a file node for i/o. Opens the file if it is closed. Updates the
4139
pending i/o's field in the node and the system appropriately. Takes the node
4140
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
4141
mutex. */
4142
static
4143
void
4144
fil_node_prepare_for_io(
4145
/*====================*/
4146
	fil_node_t*	node,	/* in: file node */
4147
	fil_system_t*	system,	/* in: tablespace memory cache */
4148
	fil_space_t*	space)	/* in: space */
4149
{
4150
	ut_ad(node && system && space);
4151
	ut_ad(mutex_own(&(system->mutex)));
4152
4153
	if (system->n_open > system->max_n_open + 5) {
4154
		ut_print_timestamp(stderr);
4155
		fprintf(stderr,
4156
			"  InnoDB: Warning: open files %lu"
4157
			" exceeds the limit %lu\n",
4158
			(ulong) system->n_open,
4159
			(ulong) system->max_n_open);
4160
	}
4161
4162
	if (node->open == FALSE) {
4163
		/* File is closed: open it */
4164
		ut_a(node->n_pending == 0);
4165
4166
		fil_node_open_file(node, system, space);
4167
	}
4168
4169
	if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
4170
	    && space->id != 0) {
4171
		/* The node is in the LRU list, remove it */
4172
4173
		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
4174
4175
		UT_LIST_REMOVE(LRU, system->LRU, node);
4176
	}
4177
4178
	node->n_pending++;
4179
}
4180
4181
/************************************************************************
4182
Updates the data structures when an i/o operation finishes. Updates the
4183
pending i/o's field in the node appropriately. */
4184
static
4185
void
4186
fil_node_complete_io(
4187
/*=================*/
4188
	fil_node_t*	node,	/* in: file node */
4189
	fil_system_t*	system,	/* in: tablespace memory cache */
4190
	ulint		type)	/* in: OS_FILE_WRITE or OS_FILE_READ; marks
4191
				the node as modified if
4192
				type == OS_FILE_WRITE */
4193
{
4194
	ut_ad(node);
4195
	ut_ad(system);
4196
	ut_ad(mutex_own(&(system->mutex)));
4197
4198
	ut_a(node->n_pending > 0);
4199
4200
	node->n_pending--;
4201
4202
	if (type == OS_FILE_WRITE) {
4203
		system->modification_counter++;
4204
		node->modification_counter = system->modification_counter;
4205
4206
		if (!node->space->is_in_unflushed_spaces) {
4207
4208
			node->space->is_in_unflushed_spaces = TRUE;
4209
			UT_LIST_ADD_FIRST(unflushed_spaces,
4210
					  system->unflushed_spaces,
4211
					  node->space);
4212
		}
4213
	}
4214
4215
	if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
4216
	    && node->space->id != 0) {
4217
		/* The node must be put back to the LRU list */
4218
		UT_LIST_ADD_FIRST(LRU, system->LRU, node);
4219
	}
4220
}
4221
4222
/************************************************************************
4223
Report information about an invalid page access. */
4224
static
4225
void
4226
fil_report_invalid_page_access(
4227
/*===========================*/
4228
	ulint		block_offset,	/* in: block offset */
4229
	ulint		space_id,	/* in: space id */
4230
	const char*	space_name,	/* in: space name */
4231
	ulint		byte_offset,	/* in: byte offset */
4232
	ulint		len,		/* in: I/O length */
4233
	ulint		type)		/* in: I/O type */
4234
{
4235
	fprintf(stderr,
4236
		"InnoDB: Error: trying to access page number %lu"
4237
		" in space %lu,\n"
4238
		"InnoDB: space name %s,\n"
4239
		"InnoDB: which is outside the tablespace bounds.\n"
4240
		"InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
4241
		"InnoDB: If you get this error at mysqld startup,"
4242
		" please check that\n"
4243
		"InnoDB: your my.cnf matches the ibdata files"
4244
		" that you have in the\n"
4245
		"InnoDB: MySQL server.\n",
4246
		(ulong) block_offset, (ulong) space_id, space_name,
4247
		(ulong) byte_offset, (ulong) len, (ulong) type);
4248
}
4249
4250
/************************************************************************
4251
Reads or writes data. This operation is asynchronous (aio). */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4252
UNIV_INTERN
1 by brian
clean slate
4253
ulint
4254
fil_io(
4255
/*===*/
4256
				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
4257
				if we are trying to do i/o on a tablespace
4258
				which does not exist */
4259
	ulint	type,		/* in: OS_FILE_READ or OS_FILE_WRITE,
4260
				ORed to OS_FILE_LOG, if a log i/o
4261
				and ORed to OS_AIO_SIMULATED_WAKE_LATER
4262
				if simulated aio and we want to post a
4263
				batch of i/os; NOTE that a simulated batch
4264
				may introduce hidden chances of deadlocks,
4265
				because i/os are not actually handled until
4266
				all have been posted: use with great
4267
				caution! */
4268
	ibool	sync,		/* in: TRUE if synchronous aio is desired */
4269
	ulint	space_id,	/* in: space id */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4270
	ulint	zip_size,	/* in: compressed page size in bytes;
4271
				0 for uncompressed pages */
1 by brian
clean slate
4272
	ulint	block_offset,	/* in: offset in number of blocks */
4273
	ulint	byte_offset,	/* in: remainder of offset in bytes; in
4274
				aio this must be divisible by the OS block
4275
				size */
4276
	ulint	len,		/* in: how many bytes to read or write; this
4277
				must not cross a file boundary; in aio this
4278
				must be a block size multiple */
4279
	void*	buf,		/* in/out: buffer where to store read data
4280
				or from where to write; in aio this must be
4281
				appropriately aligned */
4282
	void*	message)	/* in: message for aio handler if non-sync
4283
				aio used, else ignored */
4284
{
4285
	fil_system_t*	system		= fil_system;
4286
	ulint		mode;
4287
	fil_space_t*	space;
4288
	fil_node_t*	node;
4289
	ulint		offset_high;
4290
	ulint		offset_low;
4291
	ibool		ret;
4292
	ulint		is_log;
4293
	ulint		wake_later;
4294
4295
	is_log = type & OS_FILE_LOG;
4296
	type = type & ~OS_FILE_LOG;
4297
4298
	wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
4299
	type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
4300
4301
	ut_ad(byte_offset < UNIV_PAGE_SIZE);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4302
	ut_ad(!zip_size || !byte_offset);
4303
	ut_ad(ut_is_2pow(zip_size));
1 by brian
clean slate
4304
	ut_ad(buf);
4305
	ut_ad(len > 0);
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4306
#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
4307
# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
4308
#endif
1 by brian
clean slate
4309
	ut_ad(fil_validate());
4310
#ifndef UNIV_LOG_DEBUG
4311
	/* ibuf bitmap pages must be read in the sync aio mode: */
4312
	ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4313
	      || !ibuf_bitmap_page(zip_size, block_offset)
4314
	      || sync || is_log);
1 by brian
clean slate
4315
#ifdef UNIV_SYNC_DEBUG
4316
	ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4317
	      || ibuf_page(space_id, zip_size, block_offset));
1 by brian
clean slate
4318
#endif
4319
#endif
4320
	if (sync) {
4321
		mode = OS_AIO_SYNC;
4322
	} else if (type == OS_FILE_READ && !is_log
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4323
		   && ibuf_page(space_id, zip_size, block_offset)) {
1 by brian
clean slate
4324
		mode = OS_AIO_IBUF;
4325
	} else if (is_log) {
4326
		mode = OS_AIO_LOG;
4327
	} else {
4328
		mode = OS_AIO_NORMAL;
4329
	}
4330
4331
	if (type == OS_FILE_READ) {
4332
		srv_data_read+= len;
4333
	} else if (type == OS_FILE_WRITE) {
4334
		srv_data_written+= len;
4335
	}
4336
4337
	/* Reserve the fil_system mutex and make sure that we can open at
4338
	least one file while holding it, if the file is not already open */
4339
4340
	fil_mutex_enter_and_prepare_for_io(space_id);
4341
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4342
	space = fil_space_get_by_id(space_id);
4343
1 by brian
clean slate
4344
	if (!space) {
4345
		mutex_exit(&(system->mutex));
4346
4347
		ut_print_timestamp(stderr);
4348
		fprintf(stderr,
4349
			"  InnoDB: Error: trying to do i/o"
4350
			" to a tablespace which does not exist.\n"
4351
			"InnoDB: i/o type %lu, space id %lu,"
4352
			" page no. %lu, i/o length %lu bytes\n",
4353
			(ulong) type, (ulong) space_id, (ulong) block_offset,
4354
			(ulong) len);
4355
4356
		return(DB_TABLESPACE_DELETED);
4357
	}
4358
4359
	ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
4360
4361
	node = UT_LIST_GET_FIRST(space->chain);
4362
4363
	for (;;) {
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4364
		if (UNIV_UNLIKELY(node == NULL)) {
1 by brian
clean slate
4365
			fil_report_invalid_page_access(
4366
				block_offset, space_id, space->name,
4367
				byte_offset, len, type);
4368
4369
			ut_error;
4370
		}
4371
4372
		if (space->id != 0 && node->size == 0) {
4373
			/* We do not know the size of a single-table tablespace
4374
			before we open the file */
4375
4376
			break;
4377
		}
4378
4379
		if (node->size > block_offset) {
4380
			/* Found! */
4381
			break;
4382
		} else {
4383
			block_offset -= node->size;
4384
			node = UT_LIST_GET_NEXT(chain, node);
4385
		}
4386
	}
4387
4388
	/* Open file if closed */
4389
	fil_node_prepare_for_io(node, system, space);
4390
4391
	/* Check that at least the start offset is within the bounds of a
4392
	single-table tablespace */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4393
	if (UNIV_UNLIKELY(node->size <= block_offset)
4394
	    && space->id != 0 && space->purpose == FIL_TABLESPACE) {
1 by brian
clean slate
4395
4396
		fil_report_invalid_page_access(
4397
			block_offset, space_id, space->name, byte_offset,
4398
			len, type);
4399
4400
		ut_error;
4401
	}
4402
4403
	/* Now we have made the changes in the data structures of system */
4404
	mutex_exit(&(system->mutex));
4405
4406
	/* Calculate the low 32 bits and the high 32 bits of the file offset */
4407
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4408
	if (!zip_size) {
4409
		offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
4410
		offset_low  = ((block_offset << UNIV_PAGE_SIZE_SHIFT)
4411
			       & 0xFFFFFFFFUL) + byte_offset;
1 by brian
clean slate
4412
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4413
		ut_a(node->size - block_offset
4414
		     >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
4415
			 / UNIV_PAGE_SIZE));
4416
	} else {
4417
		ulint	zip_size_shift;
4418
		switch (zip_size) {
4419
		case 1024: zip_size_shift = 10; break;
4420
		case 2048: zip_size_shift = 11; break;
4421
		case 4096: zip_size_shift = 12; break;
4422
		case 8192: zip_size_shift = 13; break;
4423
		case 16384: zip_size_shift = 14; break;
4424
		default: ut_error;
4425
		}
4426
		offset_high = block_offset >> (32 - zip_size_shift);
4427
		offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL)
4428
			+ byte_offset;
4429
		ut_a(node->size - block_offset
4430
		     >= (len + (zip_size - 1)) / zip_size);
4431
	}
1 by brian
clean slate
4432
4433
	/* Do aio */
4434
4435
	ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
4436
	ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
4437
4438
#ifdef UNIV_HOTBACKUP
4439
	/* In ibbackup do normal i/o, not aio */
4440
	if (type == OS_FILE_READ) {
4441
		ret = os_file_read(node->handle, buf, offset_low, offset_high,
4442
				   len);
4443
	} else {
4444
		ret = os_file_write(node->name, node->handle, buf,
4445
				    offset_low, offset_high, len);
4446
	}
4447
#else
4448
	/* Queue the aio request */
4449
	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
4450
		     offset_low, offset_high, len, node, message);
4451
#endif
4452
	ut_a(ret);
4453
4454
	if (mode == OS_AIO_SYNC) {
4455
		/* The i/o operation is already completed when we return from
4456
		os_aio: */
4457
4458
		mutex_enter(&(system->mutex));
4459
4460
		fil_node_complete_io(node, system, type);
4461
4462
		mutex_exit(&(system->mutex));
4463
4464
		ut_ad(fil_validate());
4465
	}
4466
4467
	return(DB_SUCCESS);
4468
}
4469
4470
/**************************************************************************
4471
Waits for an aio operation to complete. This function is used to write the
4472
handler for completed requests. The aio array of pending requests is divided
4473
into segments (see os0file.c for more info). The thread specifies which
4474
segment it wants to wait for. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4475
UNIV_INTERN
1 by brian
clean slate
4476
void
4477
fil_aio_wait(
4478
/*=========*/
4479
	ulint	segment)	/* in: the number of the segment in the aio
4480
				array to wait for */
4481
{
4482
	fil_system_t*	system		= fil_system;
4483
	ibool		ret;
4484
	fil_node_t*	fil_node;
4485
	void*		message;
4486
	ulint		type;
4487
4488
	ut_ad(fil_validate());
4489
4490
	if (os_aio_use_native_aio) {
4491
		srv_set_io_thread_op_info(segment, "native aio handle");
4492
#ifdef WIN_ASYNC_IO
4493
		ret = os_aio_windows_handle(segment, 0, &fil_node,
4494
					    &message, &type);
4495
#elif defined(POSIX_ASYNC_IO)
4496
		ret = os_aio_posix_handle(segment, &fil_node, &message);
4497
#else
4498
		ret = 0; /* Eliminate compiler warning */
4499
		ut_error;
4500
#endif
4501
	} else {
4502
		srv_set_io_thread_op_info(segment, "simulated aio handle");
4503
4504
		ret = os_aio_simulated_handle(segment, &fil_node,
4505
					      &message, &type);
4506
	}
4507
4508
	ut_a(ret);
4509
4510
	srv_set_io_thread_op_info(segment, "complete io for fil node");
4511
4512
	mutex_enter(&(system->mutex));
4513
4514
	fil_node_complete_io(fil_node, fil_system, type);
4515
4516
	mutex_exit(&(system->mutex));
4517
4518
	ut_ad(fil_validate());
4519
4520
	/* Do the i/o handling */
4521
	/* IMPORTANT: since i/o handling for reads will read also the insert
4522
	buffer in tablespace 0, you have to be very careful not to introduce
4523
	deadlocks in the i/o system. We keep tablespace 0 data files always
4524
	open, and use a special i/o thread to serve insert buffer requests. */
4525
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4526
	if (fil_node->space->purpose == FIL_TABLESPACE) {
1 by brian
clean slate
4527
		srv_set_io_thread_op_info(segment, "complete io for buf page");
4528
		buf_page_io_complete(message);
4529
	} else {
4530
		srv_set_io_thread_op_info(segment, "complete io for log");
4531
		log_io_complete(message);
4532
	}
4533
}
4534
4535
/**************************************************************************
4536
Flushes to disk possible writes cached by the OS. If the space does not exist
4537
or is being dropped, does not do anything. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4538
UNIV_INTERN
1 by brian
clean slate
4539
void
4540
fil_flush(
4541
/*======*/
4542
	ulint	space_id)	/* in: file space id (this can be a group of
4543
				log files or a tablespace of the database) */
4544
{
4545
	fil_system_t*	system	= fil_system;
4546
	fil_space_t*	space;
4547
	fil_node_t*	node;
4548
	os_file_t	file;
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4549
	ib_int64_t	old_mod_counter;
1 by brian
clean slate
4550
4551
	mutex_enter(&(system->mutex));
4552
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4553
	space = fil_space_get_by_id(space_id);
4554
1 by brian
clean slate
4555
	if (!space || space->is_being_deleted) {
4556
		mutex_exit(&(system->mutex));
4557
4558
		return;
4559
	}
4560
4561
	space->n_pending_flushes++;	/* prevent dropping of the space while
4562
					we are flushing */
4563
	node = UT_LIST_GET_FIRST(space->chain);
4564
4565
	while (node) {
4566
		if (node->modification_counter > node->flush_counter) {
4567
			ut_a(node->open);
4568
4569
			/* We want to flush the changes at least up to
4570
			old_mod_counter */
4571
			old_mod_counter = node->modification_counter;
4572
4573
			if (space->purpose == FIL_TABLESPACE) {
4574
				fil_n_pending_tablespace_flushes++;
4575
			} else {
4576
				fil_n_pending_log_flushes++;
4577
				fil_n_log_flushes++;
4578
			}
4579
#ifdef __WIN__
4580
			if (node->is_raw_disk) {
4581
4582
				goto skip_flush;
4583
			}
4584
#endif
4585
retry:
4586
			if (node->n_pending_flushes > 0) {
4587
				/* We want to avoid calling os_file_flush() on
4588
				the file twice at the same time, because we do
4589
				not know what bugs OS's may contain in file
4590
				i/o; sleep for a while */
4591
4592
				mutex_exit(&(system->mutex));
4593
4594
				os_thread_sleep(20000);
4595
4596
				mutex_enter(&(system->mutex));
4597
4598
				if (node->flush_counter >= old_mod_counter) {
4599
4600
					goto skip_flush;
4601
				}
4602
4603
				goto retry;
4604
			}
4605
4606
			ut_a(node->open);
4607
			file = node->handle;
4608
			node->n_pending_flushes++;
4609
4610
			mutex_exit(&(system->mutex));
4611
4612
			/* fprintf(stderr, "Flushing to file %s\n",
4613
			node->name); */
4614
4615
			os_file_flush(file);
4616
4617
			mutex_enter(&(system->mutex));
4618
4619
			node->n_pending_flushes--;
4620
skip_flush:
4621
			if (node->flush_counter < old_mod_counter) {
4622
				node->flush_counter = old_mod_counter;
4623
4624
				if (space->is_in_unflushed_spaces
4625
				    && fil_space_is_flushed(space)) {
4626
4627
					space->is_in_unflushed_spaces = FALSE;
4628
4629
					UT_LIST_REMOVE(
4630
						unflushed_spaces,
4631
						system->unflushed_spaces,
4632
						space);
4633
				}
4634
			}
4635
4636
			if (space->purpose == FIL_TABLESPACE) {
4637
				fil_n_pending_tablespace_flushes--;
4638
			} else {
4639
				fil_n_pending_log_flushes--;
4640
			}
4641
		}
4642
4643
		node = UT_LIST_GET_NEXT(chain, node);
4644
	}
4645
4646
	space->n_pending_flushes--;
4647
4648
	mutex_exit(&(system->mutex));
4649
}
4650
4651
/**************************************************************************
4652
Flushes to disk the writes in file spaces of the given type possibly cached by
4653
the OS. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4654
UNIV_INTERN
1 by brian
clean slate
4655
void
4656
fil_flush_file_spaces(
4657
/*==================*/
4658
	ulint	purpose)	/* in: FIL_TABLESPACE, FIL_LOG */
4659
{
4660
	fil_system_t*	system	= fil_system;
4661
	fil_space_t*	space;
4662
	ulint*		space_ids;
4663
	ulint		n_space_ids;
4664
	ulint		i;
4665
4666
	mutex_enter(&(system->mutex));
4667
4668
	n_space_ids = UT_LIST_GET_LEN(system->unflushed_spaces);
4669
	if (n_space_ids == 0) {
4670
4671
		mutex_exit(&system->mutex);
4672
		return;
4673
	}
4674
4675
	/* Assemble a list of space ids to flush.  Previously, we
4676
	traversed system->unflushed_spaces and called UT_LIST_GET_NEXT()
4677
	on a space that was just removed from the list by fil_flush().
4678
	Thus, the space could be dropped and the memory overwritten. */
4679
	space_ids = mem_alloc(n_space_ids * sizeof *space_ids);
4680
4681
	n_space_ids = 0;
4682
4683
	for (space = UT_LIST_GET_FIRST(system->unflushed_spaces);
4684
	     space;
4685
	     space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
4686
4687
		if (space->purpose == purpose && !space->is_being_deleted) {
4688
4689
			space_ids[n_space_ids++] = space->id;
4690
		}
4691
	}
4692
4693
	mutex_exit(&system->mutex);
4694
4695
	/* Flush the spaces.  It will not hurt to call fil_flush() on
4696
	a non-existing space id. */
4697
	for (i = 0; i < n_space_ids; i++) {
4698
4699
		fil_flush(space_ids[i]);
4700
	}
4701
4702
	mem_free(space_ids);
4703
}
4704
4705
/**********************************************************************
4706
Checks the consistency of the tablespace cache. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4707
UNIV_INTERN
1 by brian
clean slate
4708
ibool
4709
fil_validate(void)
4710
/*==============*/
4711
			/* out: TRUE if ok */
4712
{
4713
	fil_system_t*	system		= fil_system;
4714
	fil_space_t*	space;
4715
	fil_node_t*	fil_node;
4716
	ulint		n_open		= 0;
4717
	ulint		i;
4718
4719
	mutex_enter(&(system->mutex));
4720
4721
	/* Look for spaces in the hash table */
4722
4723
	for (i = 0; i < hash_get_n_cells(system->spaces); i++) {
4724
4725
		space = HASH_GET_FIRST(system->spaces, i);
4726
4727
		while (space != NULL) {
4728
			UT_LIST_VALIDATE(chain, fil_node_t, space->chain);
4729
4730
			fil_node = UT_LIST_GET_FIRST(space->chain);
4731
4732
			while (fil_node != NULL) {
4733
				if (fil_node->n_pending > 0) {
4734
					ut_a(fil_node->open);
4735
				}
4736
4737
				if (fil_node->open) {
4738
					n_open++;
4739
				}
4740
				fil_node = UT_LIST_GET_NEXT(chain, fil_node);
4741
			}
4742
			space = HASH_GET_NEXT(hash, space);
4743
		}
4744
	}
4745
4746
	ut_a(system->n_open == n_open);
4747
4748
	UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
4749
4750
	fil_node = UT_LIST_GET_FIRST(system->LRU);
4751
4752
	while (fil_node != NULL) {
4753
		ut_a(fil_node->n_pending == 0);
4754
		ut_a(fil_node->open);
4755
		ut_a(fil_node->space->purpose == FIL_TABLESPACE);
4756
		ut_a(fil_node->space->id != 0);
4757
4758
		fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
4759
	}
4760
4761
	mutex_exit(&(system->mutex));
4762
4763
	return(TRUE);
4764
}
4765
4766
/************************************************************************
4767
Returns TRUE if file address is undefined. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4768
UNIV_INTERN
1 by brian
clean slate
4769
ibool
4770
fil_addr_is_null(
4771
/*=============*/
4772
				/* out: TRUE if undefined */
4773
	fil_addr_t	addr)	/* in: address */
4774
{
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4775
	return(addr.page == FIL_NULL);
1 by brian
clean slate
4776
}
4777
4778
/************************************************************************
4779
Accessor functions for a file page */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4780
UNIV_INTERN
1 by brian
clean slate
4781
ulint
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4782
fil_page_get_prev(const byte*	page)
1 by brian
clean slate
4783
{
4784
	return(mach_read_from_4(page + FIL_PAGE_PREV));
4785
}
4786
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4787
UNIV_INTERN
1 by brian
clean slate
4788
ulint
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4789
fil_page_get_next(const byte*	page)
1 by brian
clean slate
4790
{
4791
	return(mach_read_from_4(page + FIL_PAGE_NEXT));
4792
}
4793
4794
/*************************************************************************
4795
Sets the file page type. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4796
UNIV_INTERN
1 by brian
clean slate
4797
void
4798
fil_page_set_type(
4799
/*==============*/
4800
	byte*	page,	/* in: file page */
4801
	ulint	type)	/* in: type */
4802
{
4803
	ut_ad(page);
4804
4805
	mach_write_to_2(page + FIL_PAGE_TYPE, type);
4806
}
4807
4808
/*************************************************************************
4809
Gets the file page type. */
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4810
UNIV_INTERN
1 by brian
clean slate
4811
ulint
4812
fil_page_get_type(
4813
/*==============*/
520.4.1 by Monty Taylor
Imported InnoDB plugin with changes.
4814
				/* out: type; NOTE that if the type
4815
				has not been written to page, the return value
4816
				not defined */
4817
	const byte*	page)	/* in: file page */
1 by brian
clean slate
4818
{
4819
	ut_ad(page);
4820
4821
	return(mach_read_from_2(page + FIL_PAGE_TYPE));
4822
}