1
/******************************************************
2
The tablespace memory cache
6
Created 10/25/1995 Heikki Tuuri
7
*******************************************************/
12
#include "sync0sync.h"
13
#include "hash0hash.h"
16
#include "mach0data.h"
17
#include "ibuf0ibuf.h"
25
#include "srv0start.h"
28
#include "dict0dict.h"
32
IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
33
=============================================
35
The tablespace cache is responsible for providing fast read/write access to
36
tablespaces and logs of the database. File creation and deletion is done
37
in other modules which know more of the logic of the operation, however.
39
A tablespace consists of a chain of files. The size of the files does not
40
have to be divisible by the database block size, because we may just leave
41
the last incomplete block unused. When a new file is appended to the
42
tablespace, the maximum size of the file is also specified. At the moment,
43
we think that it is best to extend the file to its maximum size already at
44
the creation of the file, because then we can avoid dynamically extending
45
the file when more space is needed for the tablespace.
47
A block's position in the tablespace is specified with a 32-bit unsigned
48
integer. The files in the chain are thought to be catenated, and the block
49
corresponding to an address n is the nth block in the catenated file (where
50
the first block is named the 0th block, and the incomplete block fragments
51
at the end of files are not taken into account). A tablespace can be extended
52
by appending a new file at the end of the chain.
54
Our tablespace concept is similar to the one of Oracle.
56
To acquire more speed in disk transfers, a technique called disk striping is
57
sometimes used. This means that logical block addresses are divided in a
58
round-robin fashion across several disks. Windows NT supports disk striping,
59
so there we do not need to support it in the database. Disk striping is
60
implemented in hardware in RAID disks. We conclude that it is not necessary
61
to implement it in the database. Oracle 7 does not support disk striping,
64
Another trick used at some database sites is replacing tablespace files by
65
raw disks, that is, the whole physical disk drive, or a partition of it, is
66
opened as a single file, and it is accessed through byte offsets calculated
67
from the start of the disk or the partition. This is recommended in some
68
books on database tuning to achieve more speed in i/o. Using raw disk
69
certainly prevents the OS from fragmenting disk space, but it is not clear
70
if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
71
system + EIDE Conner disk only a negligible difference in speed when reading
72
from a file, versus reading from a raw disk.
74
To have fast access to a tablespace or a log file, we put the data structures
75
to a hash table. Each tablespace and log file is given an unique 32-bit
78
Some operating systems do not support many open files at the same time,
79
though NT seems to tolerate at least 900 open files. Therefore, we put the
80
open files in an LRU-list. If we need to open another file, we may close the
81
file at the end of the LRU-list. When an i/o-operation is pending on a file,
82
the file cannot be closed. We take the file nodes with pending i/o-operations
83
out of the LRU-list and keep a count of pending operations. When an operation
84
completes, we decrement the count and return the file node to the LRU-list if
85
the count drops to zero. */
87
/* When mysqld is run, the default directory "." is the mysqld datadir,
88
but in the MySQL Embedded Server Library and ibbackup it is not the default
89
directory, and we must set the base file path explicitly */
90
const char* fil_path_to_mysql_datadir = ".";
92
/* The number of fsyncs done to the log */
93
ulint fil_n_log_flushes = 0;
95
ulint fil_n_pending_log_flushes = 0;
96
ulint fil_n_pending_tablespace_flushes = 0;
98
/* Null file address */
99
fil_addr_t fil_addr_null = {FIL_NULL, 0};
101
/* File node of a tablespace or the log data space */
102
struct fil_node_struct {
103
fil_space_t* space; /* backpointer to the space where this node
105
char* name; /* path to the file */
106
ibool open; /* TRUE if file open */
107
os_file_t handle; /* OS handle to the file, if file open */
108
ibool is_raw_disk;/* TRUE if the 'file' is actually a raw
109
device or a raw disk partition */
110
ulint size; /* size of the file in database pages, 0 if
111
not known yet; the possible last incomplete
112
megabyte may be ignored if space == 0 */
114
/* count of pending i/o's on this file;
115
closing of the file is not allowed if
117
ulint n_pending_flushes;
118
/* count of pending flushes on this file;
119
closing of the file is not allowed if
121
ib_longlong modification_counter;/* when we write to the file we
122
increment this by one */
123
ib_longlong flush_counter;/* up to what modification_counter value
124
we have flushed the modifications to disk */
125
UT_LIST_NODE_T(fil_node_t) chain;
126
/* link field for the file chain */
127
UT_LIST_NODE_T(fil_node_t) LRU;
128
/* link field for the LRU list */
132
#define FIL_NODE_MAGIC_N 89389
134
/* Tablespace or log data space: let us call them by a common name space */
135
struct fil_space_struct {
136
char* name; /* space name = the path to the first file in
138
ulint id; /* space id */
139
ib_longlong tablespace_version;
140
/* in DISCARD/IMPORT this timestamp is used to
141
check if we should ignore an insert buffer
142
merge request for a page because it actually
143
was for the previous incarnation of the
145
ibool mark; /* this is set to TRUE at database startup if
146
the space corresponds to a table in the InnoDB
147
data dictionary; so we can print a warning of
148
orphaned tablespaces */
149
ibool stop_ios;/* TRUE if we want to rename the .ibd file of
150
tablespace and want to stop temporarily
151
posting of new i/o requests on the file */
152
ibool stop_ibuf_merges;
153
/* we set this TRUE when we start deleting a
154
single-table tablespace */
155
ibool is_being_deleted;
156
/* this is set to TRUE when we start
157
deleting a single-table tablespace and its
158
file; when this flag is set no further i/o
159
or flush requests can be placed on this space,
160
though there may be such requests still being
161
processed on this space */
162
ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
163
UT_LIST_BASE_NODE_T(fil_node_t) chain;
164
/* base node for the file chain */
165
ulint size; /* space size in pages; 0 if a single-table
166
tablespace whose size we do not know yet;
167
last incomplete megabytes in data files may be
168
ignored if space == 0 */
169
ulint n_reserved_extents;
170
/* number of reserved free extents for
171
ongoing operations like B-tree page split */
172
ulint n_pending_flushes; /* this is > 0 when flushing
173
the tablespace to disk; dropping of the
174
tablespace is forbidden if this is > 0 */
175
ulint n_pending_ibuf_merges;/* this is > 0 when merging
176
insert buffer entries to a page so that we
177
may need to access the ibuf bitmap page in the
178
tablespade: dropping of the tablespace is
179
forbidden if this is > 0 */
180
hash_node_t hash; /* hash chain node */
181
hash_node_t name_hash;/* hash chain the name_hash table */
182
rw_lock_t latch; /* latch protecting the file space storage
184
UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
185
/* list of spaces with at least one unflushed
186
file we have written to */
187
ibool is_in_unflushed_spaces; /* TRUE if this space is
188
currently in the list above */
189
UT_LIST_NODE_T(fil_space_t) space_list;
190
/* list of all spaces */
191
ibuf_data_t* ibuf_data;
192
/* insert buffer data */
196
#define FIL_SPACE_MAGIC_N 89472
198
/* The tablespace memory cache; also the totality of logs = the log data space,
199
is stored here; below we talk about tablespaces, but also the ib_logfiles
200
form a 'space' and it is handled here */
202
typedef struct fil_system_struct fil_system_t;
203
struct fil_system_struct {
204
mutex_t mutex; /* The mutex protecting the cache */
205
hash_table_t* spaces; /* The hash table of spaces in the
206
system; they are hashed on the space
208
hash_table_t* name_hash; /* hash table based on the space
210
UT_LIST_BASE_NODE_T(fil_node_t) LRU;
211
/* base node for the LRU list of the
212
most recently used open files with no
213
pending i/o's; if we start an i/o on
214
the file, we first remove it from this
215
list, and return it to the start of
216
the list when the i/o ends;
217
log files and the system tablespace are
218
not put to this list: they are opened
219
after the startup, and kept open until
221
UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
222
/* base node for the list of those
223
tablespaces whose files contain
224
unflushed writes; those spaces have
225
at least one file node where
226
modification_counter > flush_counter */
227
ulint n_open; /* number of files currently open */
228
ulint max_n_open; /* n_open is not allowed to exceed
230
ib_longlong modification_counter;/* when we write to a file we
231
increment this by one */
232
ulint max_assigned_id;/* maximum space id in the existing
233
tables, or assigned during the time
234
mysqld has been up; at an InnoDB
235
startup we scan the data dictionary
236
and set here the maximum of the
237
space id's of the tables there */
238
ib_longlong tablespace_version;
239
/* a counter which is incremented for
240
every space object memory creation;
241
every space mem object gets a
242
'timestamp' from this; in DISCARD/
243
IMPORT this is used to check if we
244
should ignore an insert buffer merge
246
UT_LIST_BASE_NODE_T(fil_space_t) space_list;
247
/* list of all file spaces */
250
/* The tablespace memory cache. This variable is NULL before the module is
252
fil_system_t* fil_system = NULL;
255
/************************************************************************
256
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
258
Prepares a file node for i/o. Opens the file if it is closed. Updates the
259
pending i/o's field in the node and the system appropriately. Takes the node
260
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
264
fil_node_prepare_for_io(
265
/*====================*/
266
fil_node_t* node, /* in: file node */
267
fil_system_t* system, /* in: tablespace memory cache */
268
fil_space_t* space); /* in: space */
269
/************************************************************************
270
Updates the data structures when an i/o operation finishes. Updates the
271
pending i/o's field in the node appropriately. */
274
fil_node_complete_io(
275
/*=================*/
276
fil_node_t* node, /* in: file node */
277
fil_system_t* system, /* in: tablespace memory cache */
278
ulint type); /* in: OS_FILE_WRITE or OS_FILE_READ; marks
279
the node as modified if
280
type == OS_FILE_WRITE */
281
/***********************************************************************
282
Checks if a single-table tablespace for a given table name exists in the
283
tablespace memory cache. */
286
fil_get_space_id_for_table(
287
/*=======================*/
288
/* out: space id, ULINT_UNDEFINED if not
290
const char* name); /* in: table name in the standard
291
'databasename/tablename' format */
294
/***********************************************************************
295
Returns the version number of a tablespace, -1 if not found. */
298
fil_space_get_version(
299
/*==================*/
300
/* out: version number, -1 if the tablespace does not
301
exist in the memory cache */
302
ulint id) /* in: space id */
304
fil_system_t* system = fil_system;
306
ib_longlong version = -1;
310
mutex_enter(&(system->mutex));
312
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
315
version = space->tablespace_version;
318
mutex_exit(&(system->mutex));
323
/***********************************************************************
324
Returns the latch of a file space. */
329
/* out: latch protecting storage allocation */
330
ulint id) /* in: space id */
332
fil_system_t* system = fil_system;
337
mutex_enter(&(system->mutex));
339
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
343
mutex_exit(&(system->mutex));
345
return(&(space->latch));
348
/***********************************************************************
349
Returns the type of a file space. */
354
/* out: FIL_TABLESPACE or FIL_LOG */
355
ulint id) /* in: space id */
357
fil_system_t* system = fil_system;
362
mutex_enter(&(system->mutex));
364
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
368
mutex_exit(&(system->mutex));
370
return(space->purpose);
373
/***********************************************************************
374
Returns the ibuf data of a file space. */
377
fil_space_get_ibuf_data(
378
/*====================*/
379
/* out: ibuf data for this space */
380
ulint id) /* in: space id */
382
fil_system_t* system = fil_system;
389
mutex_enter(&(system->mutex));
391
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
393
mutex_exit(&(system->mutex));
397
return(space->ibuf_data);
400
/**************************************************************************
401
Checks if all the file nodes in a space are flushed. The caller must hold
402
the fil_system mutex. */
405
fil_space_is_flushed(
406
/*=================*/
407
/* out: TRUE if all are flushed */
408
fil_space_t* space) /* in: space */
412
ut_ad(mutex_own(&(fil_system->mutex)));
414
node = UT_LIST_GET_FIRST(space->chain);
417
if (node->modification_counter > node->flush_counter) {
422
node = UT_LIST_GET_NEXT(chain, node);
428
/***********************************************************************
429
Appends a new file to the chain of files of a space. File must be closed. */
434
const char* name, /* in: file name (file must be closed) */
435
ulint size, /* in: file size in database blocks, rounded
436
downwards to an integer */
437
ulint id, /* in: space id where to append */
438
ibool is_raw) /* in: TRUE if a raw device or
439
a raw disk partition */
441
fil_system_t* system = fil_system;
448
mutex_enter(&(system->mutex));
450
node = mem_alloc(sizeof(fil_node_t));
452
node->name = mem_strdup(name);
455
ut_a(!is_raw || srv_start_raw_disk_in_use);
457
node->is_raw_disk = is_raw;
459
node->magic_n = FIL_NODE_MAGIC_N;
461
node->n_pending_flushes = 0;
463
node->modification_counter = 0;
464
node->flush_counter = 0;
466
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
469
ut_print_timestamp(stderr);
471
" InnoDB: Error: Could not find tablespace %lu for\n"
472
"InnoDB: file ", (ulong) id);
473
ut_print_filename(stderr, name);
474
fputs(" in the tablespace memory cache.\n", stderr);
475
mem_free(node->name);
479
mutex_exit(&(system->mutex));
488
UT_LIST_ADD_LAST(chain, space->chain, node);
490
mutex_exit(&(system->mutex));
493
/************************************************************************
494
Opens a the file of a node of a tablespace. The caller must own the fil_system
500
fil_node_t* node, /* in: file node */
501
fil_system_t* system, /* in: tablespace memory cache */
502
fil_space_t* space) /* in: space */
504
ib_longlong size_bytes;
509
#ifndef UNIV_HOTBACKUP
513
#endif /* !UNIV_HOTBACKUP */
515
ut_ad(mutex_own(&(system->mutex)));
516
ut_a(node->n_pending == 0);
517
ut_a(node->open == FALSE);
519
if (node->size == 0) {
520
/* It must be a single-table tablespace and we do not know the
521
size of the file yet. First we open the file in the normal
522
mode, no async I/O here, for simplicity. Then do some checks,
523
and close the file again.
524
NOTE that we could not use the simple file read function
525
os_file_read() in Windows to read from a file opened for
528
node->handle = os_file_create_simple_no_error_handling(
529
node->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
531
/* The following call prints an error message */
532
os_file_get_last_error(TRUE);
534
ut_print_timestamp(stderr);
537
" InnoDB: Fatal error: cannot open %s\n."
538
"InnoDB: Have you deleted .ibd files"
539
" under a running mysqld server?\n",
544
os_file_get_size(node->handle, &size_low, &size_high);
546
size_bytes = (((ib_longlong)size_high) << 32)
547
+ (ib_longlong)size_low;
548
#ifdef UNIV_HOTBACKUP
549
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
552
ut_a(space->purpose != FIL_LOG);
553
ut_a(space->id != 0);
555
if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
557
"InnoDB: Error: the size of single-table"
558
" tablespace file %s\n"
559
"InnoDB: is only %lu %lu,"
560
" should be at least %lu!\n",
564
(ulong) (FIL_IBD_FILE_INITIAL_SIZE
570
/* Read the first page of the tablespace */
572
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
573
/* Align the memory for file i/o if we might have O_DIRECT
575
page = ut_align(buf2, UNIV_PAGE_SIZE);
577
success = os_file_read(node->handle, page, 0, 0,
579
space_id = fsp_header_get_space_id(page);
583
/* Close the file now that we have read the space id from it */
585
os_file_close(node->handle);
587
if (space_id == ULINT_UNDEFINED || space_id == 0) {
589
"InnoDB: Error: tablespace id %lu"
590
" in file %s is not sensible\n",
591
(ulong) space_id, node->name);
596
if (space_id != space->id) {
598
"InnoDB: Error: tablespace id is %lu"
599
" in the data dictionary\n"
600
"InnoDB: but in file %s it is %lu!\n",
601
space->id, node->name, space_id);
606
if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
608
((size_bytes / (1024 * 1024))
609
* ((1024 * 1024) / UNIV_PAGE_SIZE));
611
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
614
space->size += node->size;
617
/* printf("Opening file %s\n", node->name); */
619
/* Open the file for reading and writing, in Windows normally in the
620
unbuffered async I/O mode, though global variables may make
621
os_file_create() to fall back to the normal file I/O mode. */
623
if (space->purpose == FIL_LOG) {
624
node->handle = os_file_create(node->name, OS_FILE_OPEN,
625
OS_FILE_AIO, OS_LOG_FILE, &ret);
626
} else if (node->is_raw_disk) {
627
node->handle = os_file_create(node->name,
629
OS_FILE_AIO, OS_DATA_FILE, &ret);
631
node->handle = os_file_create(node->name, OS_FILE_OPEN,
632
OS_FILE_AIO, OS_DATA_FILE, &ret);
641
if (space->purpose == FIL_TABLESPACE && space->id != 0) {
642
/* Put the node to the LRU list */
643
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
647
/**************************************************************************
653
fil_node_t* node, /* in: file node */
654
fil_system_t* system) /* in: tablespace memory cache */
658
ut_ad(node && system);
659
ut_ad(mutex_own(&(system->mutex)));
661
ut_a(node->n_pending == 0);
662
ut_a(node->n_pending_flushes == 0);
663
ut_a(node->modification_counter == node->flush_counter);
665
ret = os_file_close(node->handle);
668
/* printf("Closing file %s\n", node->name); */
671
ut_a(system->n_open > 0);
674
if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
675
ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
677
/* The node is in the LRU list, remove it */
678
UT_LIST_REMOVE(LRU, system->LRU, node);
682
/************************************************************************
683
Tries to close a file in the LRU list. The caller must hold the fil_sys
687
fil_try_to_close_file_in_LRU(
688
/*=========================*/
689
/* out: TRUE if success, FALSE if should retry
690
later; since i/o's generally complete in <
691
100 ms, and as InnoDB writes at most 128 pages
692
from the buffer pool in a batch, and then
693
immediately flushes the files, there is a good
694
chance that the next time we find a suitable
695
node from the LRU list */
696
ibool print_info) /* in: if TRUE, prints information why it
697
cannot close a file */
699
fil_system_t* system = fil_system;
702
ut_ad(mutex_own(&(system->mutex)));
704
node = UT_LIST_GET_LAST(system->LRU);
708
"InnoDB: fil_sys open file LRU len %lu\n",
709
(ulong) UT_LIST_GET_LEN(system->LRU));
712
while (node != NULL) {
713
if (node->modification_counter == node->flush_counter
714
&& node->n_pending_flushes == 0) {
716
fil_node_close_file(node, system);
721
if (print_info && node->n_pending_flushes > 0) {
722
fputs("InnoDB: cannot close file ", stderr);
723
ut_print_filename(stderr, node->name);
724
fprintf(stderr, ", because n_pending_flushes %lu\n",
725
(ulong) node->n_pending_flushes);
729
&& node->modification_counter != node->flush_counter) {
730
fputs("InnoDB: cannot close file ", stderr);
731
ut_print_filename(stderr, node->name);
733
", because mod_count %ld != fl_count %ld\n",
734
(long) node->modification_counter,
735
(long) node->flush_counter);
738
node = UT_LIST_GET_PREV(LRU, node);
744
/***********************************************************************
745
Reserves the fil_system mutex and tries to make sure we can open at least one
746
file while holding it. This should be called before calling
747
fil_node_prepare_for_io(), because that function may need to open a file. */
750
fil_mutex_enter_and_prepare_for_io(
751
/*===============================*/
752
ulint space_id) /* in: space id */
754
fil_system_t* system = fil_system;
757
ibool print_info = FALSE;
761
ut_ad(!mutex_own(&(system->mutex)));
763
mutex_enter(&(system->mutex));
765
if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
766
/* We keep log files and system tablespace files always open;
767
this is important in preventing deadlocks in this module, as
768
a page read completion often performs another read from the
769
insert buffer. The insert buffer is in tablespace 0, and we
770
cannot end up waiting in this function. */
775
if (system->n_open < system->max_n_open) {
780
HASH_SEARCH(hash, system->spaces, space_id, space,
781
space->id == space_id);
782
if (space != NULL && space->stop_ios) {
783
/* We are going to do a rename file and want to stop new i/o's
786
if (count2 > 20000) {
787
fputs("InnoDB: Warning: tablespace ", stderr);
788
ut_print_filename(stderr, space->name);
790
" has i/o ops stopped for a long time %lu\n",
794
mutex_exit(&(system->mutex));
796
os_thread_sleep(20000);
803
/* If the file is already open, no need to do anything; if the space
804
does not exist, we handle the situation in the function which called
807
if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
816
/* Too many files are open, try to close some */
818
success = fil_try_to_close_file_in_LRU(print_info);
820
if (success && system->n_open >= system->max_n_open) {
825
if (system->n_open < system->max_n_open) {
832
ut_print_timestamp(stderr);
834
" InnoDB: Warning: too many (%lu) files stay open"
835
" while the maximum\n"
836
"InnoDB: allowed value would be %lu.\n"
837
"InnoDB: You may need to raise the value of"
838
" innodb_max_files_open in\n"
840
(ulong) system->n_open, (ulong) system->max_n_open);
845
mutex_exit(&(system->mutex));
847
#ifndef UNIV_HOTBACKUP
848
/* Wake the i/o-handler threads to make sure pending i/o's are
850
os_aio_simulated_wake_handler_threads();
852
os_thread_sleep(20000);
854
/* Flush tablespaces so that we can close modified files in the LRU
857
fil_flush_file_spaces(FIL_TABLESPACE);
864
/***********************************************************************
865
Frees a file node object from a tablespace memory cache. */
870
fil_node_t* node, /* in, own: file node */
871
fil_system_t* system, /* in: tablespace memory cache */
872
fil_space_t* space) /* in: space where the file node is chained */
874
ut_ad(node && system && space);
875
ut_ad(mutex_own(&(system->mutex)));
876
ut_a(node->magic_n == FIL_NODE_MAGIC_N);
877
ut_a(node->n_pending == 0);
880
/* We fool the assertion in fil_node_close_file() to think
881
there are no unflushed modifications in the file */
883
node->modification_counter = node->flush_counter;
885
if (space->is_in_unflushed_spaces
886
&& fil_space_is_flushed(space)) {
888
space->is_in_unflushed_spaces = FALSE;
890
UT_LIST_REMOVE(unflushed_spaces,
891
system->unflushed_spaces,
895
fil_node_close_file(node, system);
898
space->size -= node->size;
900
UT_LIST_REMOVE(chain, space->chain, node);
902
mem_free(node->name);
906
/********************************************************************
907
Drops files from the start of a file space, so that its size is cut by
911
fil_space_truncate_start(
912
/*=====================*/
913
ulint id, /* in: space id */
914
ulint trunc_len) /* in: truncate by this much; it is an error
915
if this does not equal to the combined size of
916
some initial files in the space */
918
fil_system_t* system = fil_system;
922
mutex_enter(&(system->mutex));
924
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
928
while (trunc_len > 0) {
929
node = UT_LIST_GET_FIRST(space->chain);
931
ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len);
933
trunc_len -= node->size * UNIV_PAGE_SIZE;
935
fil_node_free(node, system, space);
938
mutex_exit(&(system->mutex));
941
/***********************************************************************
942
Creates a space memory object and puts it to the tablespace memory cache. If
943
there is an error, prints an error message to the .err log. */
948
/* out: TRUE if success */
949
const char* name, /* in: space name */
950
ulint id, /* in: space id */
951
ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
953
fil_system_t* system = fil_system;
958
"InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
964
mutex_enter(&(system->mutex));
966
HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(name), space,
967
0 == strcmp(name, space->name));
969
ut_print_timestamp(stderr);
971
" InnoDB: Warning: trying to init to the"
972
" tablespace memory cache\n"
973
"InnoDB: a tablespace %lu of name ", (ulong) id);
974
ut_print_filename(stderr, name);
975
fprintf(stderr, ",\n"
976
"InnoDB: but a tablespace %lu of the same name\n"
977
"InnoDB: already exists in the"
978
" tablespace memory cache!\n",
981
if (id == 0 || purpose != FIL_TABLESPACE) {
983
mutex_exit(&(system->mutex));
989
"InnoDB: We assume that InnoDB did a crash recovery,"
991
"InnoDB: an .ibd file for which the table"
992
" did not exist in the\n"
993
"InnoDB: InnoDB internal data dictionary in the"
995
"InnoDB: We assume that you later removed the"
996
" .ibd and .frm files,\n"
997
"InnoDB: and are now trying to recreate the table."
998
" We now remove the\n"
999
"InnoDB: conflicting tablespace object"
1000
" from the memory cache and try\n"
1001
"InnoDB: the init again.\n");
1003
namesake_id = space->id;
1005
mutex_exit(&(system->mutex));
1007
fil_space_free(namesake_id);
1012
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1014
if (space != NULL) {
1016
"InnoDB: Error: trying to add tablespace %lu"
1017
" of name ", (ulong) id);
1018
ut_print_filename(stderr, name);
1019
fprintf(stderr, "\n"
1020
"InnoDB: to the tablespace memory cache,"
1022
"InnoDB: %lu of name ", (ulong) space->id);
1023
ut_print_filename(stderr, space->name);
1024
fputs(" already exists in the tablespace\n"
1025
"InnoDB: memory cache!\n", stderr);
1027
mutex_exit(&(system->mutex));
1032
space = mem_alloc(sizeof(fil_space_t));
1034
space->name = mem_strdup(name);
1037
system->tablespace_version++;
1038
space->tablespace_version = system->tablespace_version;
1039
space->mark = FALSE;
1041
if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) {
1042
system->max_assigned_id = id;
1045
space->stop_ios = FALSE;
1046
space->stop_ibuf_merges = FALSE;
1047
space->is_being_deleted = FALSE;
1048
space->purpose = purpose;
1051
space->n_reserved_extents = 0;
1053
space->n_pending_flushes = 0;
1054
space->n_pending_ibuf_merges = 0;
1056
UT_LIST_INIT(space->chain);
1057
space->magic_n = FIL_SPACE_MAGIC_N;
1059
space->ibuf_data = NULL;
1061
rw_lock_create(&space->latch, SYNC_FSP);
1063
HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
1065
HASH_INSERT(fil_space_t, name_hash, system->name_hash,
1066
ut_fold_string(name), space);
1067
space->is_in_unflushed_spaces = FALSE;
1069
UT_LIST_ADD_LAST(space_list, system->space_list, space);
1071
mutex_exit(&(system->mutex));
1076
/***********************************************************************
1077
Assigns a new space id for a new single-table tablespace. This works simply by
1078
incrementing the global counter. If 4 billion id's is not enough, we may need
1082
fil_assign_new_space_id(void)
1083
/*=========================*/
1084
/* out: new tablespace id; ULINT_UNDEFINED if could
1087
fil_system_t* system = fil_system;
1090
mutex_enter(&(system->mutex));
1092
system->max_assigned_id++;
1094
id = system->max_assigned_id;
1096
if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1097
ut_print_timestamp(stderr);
1099
"InnoDB: Warning: you are running out of new"
1100
" single-table tablespace id's.\n"
1101
"InnoDB: Current counter is %lu and it"
1102
" must not exceed %lu!\n"
1103
"InnoDB: To reset the counter to zero"
1104
" you have to dump all your tables and\n"
1105
"InnoDB: recreate the whole InnoDB installation.\n",
1107
(ulong) SRV_LOG_SPACE_FIRST_ID);
1110
if (id >= SRV_LOG_SPACE_FIRST_ID) {
1111
ut_print_timestamp(stderr);
1113
"InnoDB: You have run out of single-table"
1114
" tablespace id's!\n"
1115
"InnoDB: Current counter is %lu.\n"
1116
"InnoDB: To reset the counter to zero you"
1117
" have to dump all your tables and\n"
1118
"InnoDB: recreate the whole InnoDB installation.\n",
1120
system->max_assigned_id--;
1122
id = ULINT_UNDEFINED;
1125
mutex_exit(&(system->mutex));
1130
/***********************************************************************
1131
Frees a space object from the tablespace memory cache. Closes the files in
1132
the chain but does not delete them. There must not be any pending i/o's or
1133
flushes on the files. */
1138
/* out: TRUE if success */
1139
ulint id) /* in: space id */
1141
fil_system_t* system = fil_system;
1143
fil_space_t* namespace;
1144
fil_node_t* fil_node;
1146
mutex_enter(&(system->mutex));
1148
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1151
ut_print_timestamp(stderr);
1153
" InnoDB: Error: trying to remove tablespace %lu"
1154
" from the cache but\n"
1155
"InnoDB: it is not there.\n", (ulong) id);
1157
mutex_exit(&(system->mutex));
1162
HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
1164
HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(space->name),
1165
namespace, 0 == strcmp(space->name, namespace->name));
1167
ut_a(space == namespace);
1169
HASH_DELETE(fil_space_t, name_hash, system->name_hash,
1170
ut_fold_string(space->name), space);
1172
if (space->is_in_unflushed_spaces) {
1173
space->is_in_unflushed_spaces = FALSE;
1175
UT_LIST_REMOVE(unflushed_spaces, system->unflushed_spaces,
1179
UT_LIST_REMOVE(space_list, system->space_list, space);
1181
ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1182
ut_a(0 == space->n_pending_flushes);
1184
fil_node = UT_LIST_GET_FIRST(space->chain);
1186
while (fil_node != NULL) {
1187
fil_node_free(fil_node, system, space);
1189
fil_node = UT_LIST_GET_FIRST(space->chain);
1192
ut_a(0 == UT_LIST_GET_LEN(space->chain));
1194
mutex_exit(&(system->mutex));
1196
rw_lock_free(&(space->latch));
1198
mem_free(space->name);
1204
#ifdef UNIV_HOTBACKUP
1205
/***********************************************************************
1206
Returns the tablespace object for a given id, or NULL if not found from the
1207
tablespace memory cache. */
1210
fil_get_space_for_id_low(
1211
/*=====================*/
1212
/* out: tablespace object or NULL; NOTE that you must
1213
own &(fil_system->mutex) to call this function! */
1214
ulint id) /* in: space id */
1216
fil_system_t* system = fil_system;
1221
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1227
/***********************************************************************
1228
Returns the size of the space in pages. The tablespace must be cached in the
1234
/* out: space size, 0 if space not found */
1235
ulint id) /* in: space id */
1237
fil_system_t* system = fil_system;
1244
fil_mutex_enter_and_prepare_for_io(id);
1246
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1248
if (space == NULL) {
1249
mutex_exit(&(system->mutex));
1254
if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1257
ut_a(1 == UT_LIST_GET_LEN(space->chain));
1259
node = UT_LIST_GET_FIRST(space->chain);
1261
/* It must be a single-table tablespace and we have not opened
1262
the file yet; the following calls will open it and update the
1265
fil_node_prepare_for_io(node, system, space);
1266
fil_node_complete_io(node, system, OS_FILE_READ);
1271
mutex_exit(&(system->mutex));
1276
/***********************************************************************
1277
Checks if the pair space, page_no refers to an existing page in a tablespace
1278
file space. The tablespace must be cached in the memory cache. */
1281
fil_check_adress_in_tablespace(
1282
/*===========================*/
1283
/* out: TRUE if the address is meaningful */
1284
ulint id, /* in: space id */
1285
ulint page_no)/* in: page number */
1287
if (fil_space_get_size(id) > page_no) {
1295
/********************************************************************
1296
Creates a the tablespace memory cache. */
1301
/* out, own: tablespace memory cache */
1302
ulint hash_size, /* in: hash table size */
1303
ulint max_n_open) /* in: maximum number of open files; must be
1306
fil_system_t* system;
1308
ut_a(hash_size > 0);
1309
ut_a(max_n_open > 0);
1311
system = mem_alloc(sizeof(fil_system_t));
1313
mutex_create(&system->mutex, SYNC_ANY_LATCH);
1315
system->spaces = hash_create(hash_size);
1316
system->name_hash = hash_create(hash_size);
1318
UT_LIST_INIT(system->LRU);
1321
system->max_n_open = max_n_open;
1323
system->modification_counter = 0;
1324
system->max_assigned_id = 0;
1326
system->tablespace_version = 0;
1328
UT_LIST_INIT(system->unflushed_spaces);
1329
UT_LIST_INIT(system->space_list);
1334
/********************************************************************
1335
Initializes the tablespace memory cache. */
1340
ulint max_n_open) /* in: max number of open files */
1344
ut_a(fil_system == NULL);
1346
if (srv_file_per_table) {
1352
fil_system = fil_system_create(hash_size, max_n_open);
1355
/***********************************************************************
1356
Opens all log files and system tablespace data files. They stay open until the
1357
database server shutdown. This should be called at a server startup after the
1358
space objects for the log and the system tablespace have been created. The
1359
purpose of this operation is to make sure we never run out of file descriptors
1360
if we need to read from the insert buffer or to write to the log. */
1363
fil_open_log_and_system_tablespace_files(void)
1364
/*==========================================*/
1366
fil_system_t* system = fil_system;
1370
mutex_enter(&(system->mutex));
1372
space = UT_LIST_GET_FIRST(system->space_list);
1374
while (space != NULL) {
1375
if (space->purpose != FIL_TABLESPACE || space->id == 0) {
1376
node = UT_LIST_GET_FIRST(space->chain);
1378
while (node != NULL) {
1380
fil_node_open_file(node, system,
1383
if (system->max_n_open < 10 + system->n_open) {
1385
"InnoDB: Warning: you must"
1386
" raise the value of"
1387
" innodb_max_open_files in\n"
1388
"InnoDB: my.cnf! Remember that"
1389
" InnoDB keeps all log files"
1391
"InnoDB: tablespace files open"
1392
" for the whole time mysqld is"
1394
"InnoDB: needs to open also"
1395
" some .ibd files if the"
1396
" file-per-table storage\n"
1397
"InnoDB: model is used."
1398
" Current open files %lu,"
1400
" open files %lu.\n",
1401
(ulong) system->n_open,
1402
(ulong) system->max_n_open);
1404
node = UT_LIST_GET_NEXT(chain, node);
1407
space = UT_LIST_GET_NEXT(space_list, space);
1410
mutex_exit(&(system->mutex));
1413
/***********************************************************************
1414
Closes all open files. There must not be any pending i/o's or not flushed
1415
modifications in the files. */
1418
fil_close_all_files(void)
1419
/*=====================*/
1421
fil_system_t* system = fil_system;
1425
mutex_enter(&(system->mutex));
1427
space = UT_LIST_GET_FIRST(system->space_list);
1429
while (space != NULL) {
1430
node = UT_LIST_GET_FIRST(space->chain);
1432
while (node != NULL) {
1434
fil_node_close_file(node, system);
1436
node = UT_LIST_GET_NEXT(chain, node);
1438
space = UT_LIST_GET_NEXT(space_list, space);
1441
mutex_exit(&(system->mutex));
1444
/***********************************************************************
1445
Sets the max tablespace id counter if the given number is bigger than the
1449
fil_set_max_space_id_if_bigger(
1450
/*===========================*/
1451
ulint max_id) /* in: maximum known id */
1453
fil_system_t* system = fil_system;
1455
if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1457
"InnoDB: Fatal error: max tablespace id"
1458
" is too high, %lu\n", (ulong) max_id);
1462
mutex_enter(&(system->mutex));
1464
if (system->max_assigned_id < max_id) {
1466
system->max_assigned_id = max_id;
1469
mutex_exit(&(system->mutex));
1472
/********************************************************************
1473
Initializes the ibuf data structure for space 0 == the system tablespace.
1474
This can be called after the file space headers have been created and the
1475
dictionary system has been initialized. */
1478
fil_ibuf_init_at_db_start(void)
1479
/*===========================*/
1483
space = UT_LIST_GET_FIRST(fil_system->space_list);
1486
ut_a(space->purpose == FIL_TABLESPACE);
1488
space->ibuf_data = ibuf_data_init_for_space(space->id);
1491
/********************************************************************
1492
Writes the flushed lsn and the latest archived log number to the page header
1493
of the first page of a data file. */
1496
fil_write_lsn_and_arch_no_to_file(
1497
/*==============================*/
1498
ulint space_id, /* in: space number */
1499
ulint sum_of_sizes, /* in: combined size of previous files in
1500
space, in database pages */
1501
dulint lsn, /* in: lsn to write */
1502
ulint arch_log_no /* in: archived log number to write */
1503
__attribute__((unused)))
1508
buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
1509
buf = ut_align(buf1, UNIV_PAGE_SIZE);
1511
fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1513
mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
1515
fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1520
/********************************************************************
1521
Writes the flushed lsn and the latest archived log number to the page
1522
header of the first page of each data file in the system tablespace. */
1525
fil_write_flushed_lsn_to_data_files(
1526
/*================================*/
1527
/* out: DB_SUCCESS or error number */
1528
dulint lsn, /* in: lsn to write */
1529
ulint arch_log_no) /* in: latest archived log file number */
1536
mutex_enter(&(fil_system->mutex));
1538
space = UT_LIST_GET_FIRST(fil_system->space_list);
1541
/* We only write the lsn to all existing data files which have
1542
been open during the lifetime of the mysqld process; they are
1543
represented by the space objects in the tablespace memory
1544
cache. Note that all data files in the system tablespace 0 are
1547
if (space->purpose == FIL_TABLESPACE
1548
&& space->id == 0) {
1551
node = UT_LIST_GET_FIRST(space->chain);
1553
mutex_exit(&(fil_system->mutex));
1555
err = fil_write_lsn_and_arch_no_to_file(
1556
space->id, sum_of_sizes, lsn,
1558
if (err != DB_SUCCESS) {
1563
mutex_enter(&(fil_system->mutex));
1565
sum_of_sizes += node->size;
1566
node = UT_LIST_GET_NEXT(chain, node);
1569
space = UT_LIST_GET_NEXT(space_list, space);
1572
mutex_exit(&(fil_system->mutex));
1577
/***********************************************************************
1578
Reads the flushed lsn and arch no fields from a data file at database
1582
fil_read_flushed_lsn_and_arch_log_no(
1583
/*=================================*/
1584
os_file_t data_file, /* in: open data file */
1585
ibool one_read_already, /* in: TRUE if min and max parameters
1586
below already contain sensible data */
1587
#ifdef UNIV_LOG_ARCHIVE
1588
ulint* min_arch_log_no, /* in/out: */
1589
ulint* max_arch_log_no, /* in/out: */
1590
#endif /* UNIV_LOG_ARCHIVE */
1591
dulint* min_flushed_lsn, /* in/out: */
1592
dulint* max_flushed_lsn) /* in/out: */
1598
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
1599
/* Align the memory for a possible read from a raw device */
1600
buf = ut_align(buf2, UNIV_PAGE_SIZE);
1602
os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
1604
flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
1608
if (!one_read_already) {
1609
*min_flushed_lsn = flushed_lsn;
1610
*max_flushed_lsn = flushed_lsn;
1611
#ifdef UNIV_LOG_ARCHIVE
1612
*min_arch_log_no = arch_log_no;
1613
*max_arch_log_no = arch_log_no;
1614
#endif /* UNIV_LOG_ARCHIVE */
1618
if (ut_dulint_cmp(*min_flushed_lsn, flushed_lsn) > 0) {
1619
*min_flushed_lsn = flushed_lsn;
1621
if (ut_dulint_cmp(*max_flushed_lsn, flushed_lsn) < 0) {
1622
*max_flushed_lsn = flushed_lsn;
1624
#ifdef UNIV_LOG_ARCHIVE
1625
if (*min_arch_log_no > arch_log_no) {
1626
*min_arch_log_no = arch_log_no;
1628
if (*max_arch_log_no < arch_log_no) {
1629
*max_arch_log_no = arch_log_no;
1631
#endif /* UNIV_LOG_ARCHIVE */
1634
/*================ SINGLE-TABLE TABLESPACES ==========================*/
1636
/***********************************************************************
1637
Increments the count of pending insert buffer page merges, if space is not
1641
fil_inc_pending_ibuf_merges(
1642
/*========================*/
1643
/* out: TRUE if being deleted, and ibuf merges should
1645
ulint id) /* in: space id */
1647
fil_system_t* system = fil_system;
1650
mutex_enter(&(system->mutex));
1652
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1654
if (space == NULL) {
1656
"InnoDB: Error: trying to do ibuf merge to a"
1657
" dropped tablespace %lu\n",
1661
if (space == NULL || space->stop_ibuf_merges) {
1662
mutex_exit(&(system->mutex));
1667
space->n_pending_ibuf_merges++;
1669
mutex_exit(&(system->mutex));
1674
/***********************************************************************
1675
Decrements the count of pending insert buffer page merges. */
1678
fil_decr_pending_ibuf_merges(
1679
/*=========================*/
1680
ulint id) /* in: space id */
1682
fil_system_t* system = fil_system;
1685
mutex_enter(&(system->mutex));
1687
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1689
if (space == NULL) {
1691
"InnoDB: Error: decrementing ibuf merge of a"
1692
" dropped tablespace %lu\n",
1696
if (space != NULL) {
1697
space->n_pending_ibuf_merges--;
1700
mutex_exit(&(system->mutex));
1703
/************************************************************
1704
Creates the database directory for a table if it does not exist yet. */
1707
fil_create_directory_for_tablename(
1708
/*===============================*/
1709
const char* name) /* in: name in the standard
1710
'databasename/tablename' format */
1716
len = strlen(fil_path_to_mysql_datadir);
1717
namend = strchr(name, '/');
1719
path = mem_alloc(len + (namend - name) + 2);
1721
memcpy(path, fil_path_to_mysql_datadir, len);
1723
memcpy(path + len + 1, name, namend - name);
1724
path[len + (namend - name) + 1] = 0;
1726
srv_normalize_path_for_win(path);
1728
ut_a(os_file_create_directory(path, FALSE));
1732
#ifndef UNIV_HOTBACKUP
1733
/************************************************************
1734
Writes a log record about an .ibd file create/rename/delete. */
1739
ulint type, /* in: MLOG_FILE_CREATE,
1740
MLOG_FILE_DELETE, or
1742
ulint space_id, /* in: space id */
1743
const char* name, /* in: table name in the familiar
1744
'databasename/tablename' format, or
1745
the file path in the case of
1747
const char* new_name, /* in: if type is MLOG_FILE_RENAME,
1748
the new table name in the
1749
'databasename/tablename' format */
1750
mtr_t* mtr) /* in: mini-transaction handle */
1755
log_ptr = mlog_open(mtr, 11 + 2);
1758
/* Logging in mtr is switched off during crash recovery:
1759
in that case mlog_open returns NULL */
1763
log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0,
1765
/* Let us store the strings as null-terminated for easier readability
1768
len = strlen(name) + 1;
1770
mach_write_to_2(log_ptr, len);
1772
mlog_close(mtr, log_ptr);
1774
mlog_catenate_string(mtr, (byte*) name, len);
1776
if (type == MLOG_FILE_RENAME) {
1777
ulint len = strlen(new_name) + 1;
1778
log_ptr = mlog_open(mtr, 2 + len);
1780
mach_write_to_2(log_ptr, len);
1782
mlog_close(mtr, log_ptr);
1784
mlog_catenate_string(mtr, (byte*) new_name, len);
1789
/***********************************************************************
1790
Parses the body of a log record written about an .ibd file operation. That is,
1791
the log record part after the standard (type, space id, page no) header of the
1794
If desired, also replays the delete or rename operation if the .ibd file
1795
exists and the space id in it matches. Replays the create operation if a file
1796
at that path does not exist yet. If the database directory for the file to be
1797
created does not exist, then we create the directory, too.
1799
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
1800
datadir that we should use in replaying the file operations. */
1803
fil_op_log_parse_or_replay(
1804
/*=======================*/
1805
/* out: end of log record, or NULL if the
1806
record was not completely contained between
1808
byte* ptr, /* in: buffer containing the log record body,
1809
or an initial segment of it, if the record does
1810
not fir completely between ptr and end_ptr */
1811
byte* end_ptr, /* in: buffer end */
1812
ulint type, /* in: the type of this log record */
1813
ibool do_replay, /* in: TRUE if we want to replay the
1814
operation, and not just parse the log record */
1815
ulint space_id) /* in: if do_replay is TRUE, the space id of
1816
the tablespace in question; otherwise
1822
const char* new_name = NULL;
1824
if (end_ptr < ptr + 2) {
1829
name_len = mach_read_from_2(ptr);
1833
if (end_ptr < ptr + name_len) {
1838
name = (const char*) ptr;
1842
if (type == MLOG_FILE_RENAME) {
1843
if (end_ptr < ptr + 2) {
1848
new_name_len = mach_read_from_2(ptr);
1852
if (end_ptr < ptr + new_name_len) {
1857
new_name = (const char*) ptr;
1859
ptr += new_name_len;
1862
/* We managed to parse a full log record body */
1864
printf("Parsed log rec of type %lu space %lu\n"
1865
"name %s\n", type, space_id, name);
1867
if (type == MLOG_FILE_RENAME) {
1868
printf("new name %s\n", new_name);
1871
if (do_replay == FALSE) {
1876
/* Let us try to perform the file operation, if sensible. Note that
1877
ibbackup has at this stage already read in all space id info to the
1878
fil0fil.c data structures.
1880
NOTE that our algorithm is not guaranteed to work correctly if there
1881
were renames of tables during the backup. See ibbackup code for more
1884
if (type == MLOG_FILE_DELETE) {
1885
if (fil_tablespace_exists_in_mem(space_id)) {
1886
ut_a(fil_delete_tablespace(space_id));
1888
} else if (type == MLOG_FILE_RENAME) {
1889
/* We do the rename based on space id, not old file name;
1890
this should guarantee that after the log replay each .ibd file
1891
has the correct name for the latest log sequence number; the
1892
proof is left as an exercise :) */
1894
if (fil_tablespace_exists_in_mem(space_id)) {
1895
/* Create the database directory for the new name, if
1896
it does not exist yet */
1897
fil_create_directory_for_tablename(new_name);
1899
/* Rename the table if there is not yet a tablespace
1900
with the same name */
1902
if (fil_get_space_id_for_table(new_name)
1903
== ULINT_UNDEFINED) {
1904
/* We do not care of the old name, that is
1905
why we pass NULL as the first argument */
1906
if (!fil_rename_tablespace(NULL, space_id,
1913
ut_a(type == MLOG_FILE_CREATE);
1915
if (fil_tablespace_exists_in_mem(space_id)) {
1917
} else if (fil_get_space_id_for_table(name)
1918
!= ULINT_UNDEFINED) {
1921
/* Create the database directory for name, if it does
1923
fil_create_directory_for_tablename(name);
1925
ut_a(space_id != 0);
1927
if (fil_create_new_single_table_tablespace(
1928
&space_id, name, FALSE,
1929
FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
1938
/***********************************************************************
1939
Deletes a single-table tablespace. The tablespace must be cached in the
1943
fil_delete_tablespace(
1944
/*==================*/
1945
/* out: TRUE if success */
1946
ulint id) /* in: space id */
1948
fil_system_t* system = fil_system;
1957
mutex_enter(&(system->mutex));
1959
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
1961
if (space != NULL) {
1962
space->stop_ibuf_merges = TRUE;
1964
if (space->n_pending_ibuf_merges == 0) {
1965
mutex_exit(&(system->mutex));
1972
ut_print_timestamp(stderr);
1973
fputs(" InnoDB: Warning: trying to"
1974
" delete tablespace ", stderr);
1975
ut_print_filename(stderr, space->name);
1976
fprintf(stderr, ",\n"
1977
"InnoDB: but there are %lu pending"
1978
" ibuf merges on it.\n"
1979
"InnoDB: Loop %lu.\n",
1980
(ulong) space->n_pending_ibuf_merges,
1984
mutex_exit(&(system->mutex));
1986
os_thread_sleep(20000);
1989
goto stop_ibuf_merges;
1993
mutex_exit(&(system->mutex));
1997
mutex_enter(&(system->mutex));
1999
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
2001
if (space == NULL) {
2002
ut_print_timestamp(stderr);
2004
" InnoDB: Error: cannot delete tablespace %lu\n"
2005
"InnoDB: because it is not found in the"
2006
" tablespace memory cache.\n",
2009
mutex_exit(&(system->mutex));
2015
ut_a(space->n_pending_ibuf_merges == 0);
2017
space->is_being_deleted = TRUE;
2019
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2020
node = UT_LIST_GET_FIRST(space->chain);
2022
if (space->n_pending_flushes > 0 || node->n_pending > 0) {
2024
ut_print_timestamp(stderr);
2025
fputs(" InnoDB: Warning: trying to"
2026
" delete tablespace ", stderr);
2027
ut_print_filename(stderr, space->name);
2028
fprintf(stderr, ",\n"
2029
"InnoDB: but there are %lu flushes"
2030
" and %lu pending i/o's on it\n"
2031
"InnoDB: Loop %lu.\n",
2032
(ulong) space->n_pending_flushes,
2033
(ulong) node->n_pending,
2036
mutex_exit(&(system->mutex));
2037
os_thread_sleep(20000);
2044
path = mem_strdup(space->name);
2046
mutex_exit(&(system->mutex));
2047
#ifndef UNIV_HOTBACKUP
2048
/* Invalidate in the buffer pool all pages belonging to the
2049
tablespace. Since we have set space->is_being_deleted = TRUE, readahead
2050
or ibuf merge can no longer read more pages of this tablespace to the
2051
buffer pool. Thus we can clean the tablespace out of the buffer pool
2052
completely and permanently. The flag is_being_deleted also prevents
2053
fil_flush() from being applied to this tablespace. */
2055
buf_LRU_invalidate_tablespace(id);
2057
/* printf("Deleting tablespace %s id %lu\n", space->name, id); */
2059
success = fil_space_free(id);
2062
success = os_file_delete(path);
2065
success = os_file_delete_if_exists(path);
2070
#ifndef UNIV_HOTBACKUP
2071
/* Write a log record about the deletion of the .ibd
2072
file, so that ibbackup can replay it in the
2073
--apply-log phase. We use a dummy mtr and the familiar
2074
log write mechanism. */
2077
/* When replaying the operation in ibbackup, do not try
2078
to write any log record */
2081
fil_op_write_log(MLOG_FILE_DELETE, id, path, NULL, &mtr);
2094
/***********************************************************************
2095
Discards a single-table tablespace. The tablespace must be cached in the
2096
memory cache. Discarding is like deleting a tablespace, but
2097
1) we do not drop the table from the data dictionary;
2098
2) we remove all insert buffer entries for the tablespace immediately; in DROP
2099
TABLE they are only removed gradually in the background;
2100
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
2101
as it originally had. */
2104
fil_discard_tablespace(
2105
/*===================*/
2106
/* out: TRUE if success */
2107
ulint id) /* in: space id */
2111
success = fil_delete_tablespace(id);
2115
"InnoDB: Warning: cannot delete tablespace %lu"
2116
" in DISCARD TABLESPACE.\n"
2117
"InnoDB: But let us remove the"
2118
" insert buffer entries for this tablespace.\n",
2122
/* Remove all insert buffer entries for the tablespace */
2124
ibuf_delete_for_discarded_space(id);
2129
/***********************************************************************
2130
Renames the memory cache structures of a single-table tablespace. */
2133
fil_rename_tablespace_in_mem(
2134
/*=========================*/
2135
/* out: TRUE if success */
2136
fil_space_t* space, /* in: tablespace memory object */
2137
fil_node_t* node, /* in: file node of that tablespace */
2138
const char* path) /* in: new name */
2140
fil_system_t* system = fil_system;
2141
fil_space_t* space2;
2142
const char* old_name = space->name;
2144
HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(old_name),
2145
space2, 0 == strcmp(old_name, space2->name));
2146
if (space != space2) {
2147
fputs("InnoDB: Error: cannot find ", stderr);
2148
ut_print_filename(stderr, old_name);
2149
fputs(" in tablespace memory cache\n", stderr);
2154
HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(path),
2155
space2, 0 == strcmp(path, space2->name));
2156
if (space2 != NULL) {
2157
fputs("InnoDB: Error: ", stderr);
2158
ut_print_filename(stderr, path);
2159
fputs(" is already in tablespace memory cache\n", stderr);
2164
HASH_DELETE(fil_space_t, name_hash, system->name_hash,
2165
ut_fold_string(space->name), space);
2166
mem_free(space->name);
2167
mem_free(node->name);
2169
space->name = mem_strdup(path);
2170
node->name = mem_strdup(path);
2172
HASH_INSERT(fil_space_t, name_hash, system->name_hash,
2173
ut_fold_string(path), space);
2177
/***********************************************************************
2178
Allocates a file name for a single-table tablespace. The string must be freed
2179
by caller with mem_free(). */
2184
/* out, own: file name */
2185
const char* name, /* in: table name or a dir path of a
2187
ibool is_temp) /* in: TRUE if it is a dir path */
2189
ulint namelen = strlen(name);
2190
ulint dirlen = strlen(fil_path_to_mysql_datadir);
2191
char* filename = mem_alloc(namelen + dirlen + sizeof "/.ibd");
2194
memcpy(filename, name, namelen);
2195
memcpy(filename + namelen, ".ibd", sizeof ".ibd");
2197
memcpy(filename, fil_path_to_mysql_datadir, dirlen);
2198
filename[dirlen] = '/';
2200
memcpy(filename + dirlen + 1, name, namelen);
2201
memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
2204
srv_normalize_path_for_win(filename);
2209
/***********************************************************************
2210
Renames a single-table tablespace. The tablespace must be cached in the
2211
tablespace memory cache. */
2214
fil_rename_tablespace(
2215
/*==================*/
2216
/* out: TRUE if success */
2217
const char* old_name, /* in: old table name in the standard
2218
databasename/tablename format of
2219
InnoDB, or NULL if we do the rename
2220
based on the space id only */
2221
ulint id, /* in: space id */
2222
const char* new_name) /* in: new table name in the standard
2223
databasename/tablename format
2226
fil_system_t* system = fil_system;
2232
ibool old_name_was_specified = TRUE;
2237
if (old_name == NULL) {
2238
old_name = "(name not specified)";
2239
old_name_was_specified = FALSE;
2245
ut_print_timestamp(stderr);
2246
fputs(" InnoDB: Warning: problems renaming ", stderr);
2247
ut_print_filename(stderr, old_name);
2248
fputs(" to ", stderr);
2249
ut_print_filename(stderr, new_name);
2250
fprintf(stderr, ", %lu iterations\n", (ulong) count);
2253
mutex_enter(&(system->mutex));
2255
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
2257
if (space == NULL) {
2259
"InnoDB: Error: cannot find space id %lu"
2260
" in the tablespace memory cache\n"
2261
"InnoDB: though the table ", (ulong) id);
2262
ut_print_filename(stderr, old_name);
2263
fputs(" in a rename operation should have that id\n", stderr);
2264
mutex_exit(&(system->mutex));
2269
if (count > 25000) {
2270
space->stop_ios = FALSE;
2271
mutex_exit(&(system->mutex));
2276
/* We temporarily close the .ibd file because we do not trust that
2277
operating systems can rename an open file. For the closing we have to
2278
wait until there are no pending i/o's or flushes on the file. */
2280
space->stop_ios = TRUE;
2282
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2283
node = UT_LIST_GET_FIRST(space->chain);
2285
if (node->n_pending > 0 || node->n_pending_flushes > 0) {
2286
/* There are pending i/o's or flushes, sleep for a while and
2289
mutex_exit(&(system->mutex));
2291
os_thread_sleep(20000);
2295
} else if (node->modification_counter > node->flush_counter) {
2296
/* Flush the space */
2298
mutex_exit(&(system->mutex));
2300
os_thread_sleep(20000);
2306
} else if (node->open) {
2307
/* Close the file */
2309
fil_node_close_file(node, system);
2312
/* Check that the old name in the space is right */
2314
if (old_name_was_specified) {
2315
old_path = fil_make_ibd_name(old_name, FALSE);
2317
ut_a(strcmp(space->name, old_path) == 0);
2318
ut_a(strcmp(node->name, old_path) == 0);
2320
old_path = mem_strdup(space->name);
2323
/* Rename the tablespace and the node in the memory cache */
2324
path = fil_make_ibd_name(new_name, FALSE);
2325
success = fil_rename_tablespace_in_mem(space, node, path);
2328
success = os_file_rename(old_path, path);
2331
/* We have to revert the changes we made
2332
to the tablespace memory cache */
2334
ut_a(fil_rename_tablespace_in_mem(space, node,
2342
space->stop_ios = FALSE;
2344
mutex_exit(&(system->mutex));
2346
#ifndef UNIV_HOTBACKUP
2352
fil_op_write_log(MLOG_FILE_RENAME, id, old_name, new_name,
2360
/***********************************************************************
2361
Creates a new single-table tablespace to a database directory of MySQL.
2362
Database directories are under the 'datadir' of MySQL. The datadir is the
2363
directory of a running mysqld program. We can refer to it by simply the
2364
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
2365
dir of the mysqld server. */
2368
fil_create_new_single_table_tablespace(
2369
/*===================================*/
2370
/* out: DB_SUCCESS or error code */
2371
ulint* space_id, /* in/out: space id; if this is != 0,
2372
then this is an input parameter,
2374
const char* tablename, /* in: the table name in the usual
2375
databasename/tablename format
2376
of InnoDB, or a dir path to a temp
2378
ibool is_temp, /* in: TRUE if a table created with
2379
CREATE TEMPORARY TABLE */
2380
ulint size) /* in: the initial size of the
2381
tablespace file in pages,
2382
must be >= FIL_IBD_FILE_INITIAL_SIZE */
2392
ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
2394
path = fil_make_ibd_name(tablename, is_temp);
2396
file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL,
2397
OS_DATA_FILE, &ret);
2399
ut_print_timestamp(stderr);
2400
fputs(" InnoDB: Error creating file ", stderr);
2401
ut_print_filename(stderr, path);
2402
fputs(".\n", stderr);
2404
/* The following call will print an error message */
2406
err = os_file_get_last_error(TRUE);
2408
if (err == OS_FILE_ALREADY_EXISTS) {
2409
fputs("InnoDB: The file already exists though"
2410
" the corresponding table did not\n"
2411
"InnoDB: exist in the InnoDB data dictionary."
2412
" Have you moved InnoDB\n"
2413
"InnoDB: .ibd files around without using the"
2415
"InnoDB: DISCARD TABLESPACE and"
2416
" IMPORT TABLESPACE, or did\n"
2417
"InnoDB: mysqld crash in the middle of"
2418
" CREATE TABLE? You can\n"
2419
"InnoDB: resolve the problem by"
2420
" removing the file ", stderr);
2421
ut_print_filename(stderr, path);
2423
"InnoDB: under the 'datadir' of MySQL.\n",
2427
return(DB_TABLESPACE_ALREADY_EXISTS);
2430
if (err == OS_FILE_DISK_FULL) {
2433
return(DB_OUT_OF_FILE_SPACE);
2440
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
2441
/* Align the memory for file i/o if we might have O_DIRECT set */
2442
page = ut_align(buf2, UNIV_PAGE_SIZE);
2444
ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
2448
os_file_close(file);
2449
os_file_delete(path);
2452
return(DB_OUT_OF_FILE_SPACE);
2455
if (*space_id == 0) {
2456
*space_id = fil_assign_new_space_id();
2459
/* printf("Creating tablespace %s id %lu\n", path, *space_id); */
2461
if (*space_id == ULINT_UNDEFINED) {
2464
os_file_close(file);
2466
os_file_delete(path);
2472
/* We have to write the space id to the file immediately and flush the
2473
file to disk. This is because in crash recovery we must be aware what
2474
tablespaces exist and what are their space id's, so that we can apply
2475
the log records to the right file. It may take quite a while until
2476
buffer pool flush algorithms write anything to the file and flush it to
2477
disk. If we would not write here anything, the file would be filled
2478
with zeros from the call of os_file_set_size(), until a buffer pool
2479
flush would write to it. */
2481
memset(page, '\0', UNIV_PAGE_SIZE);
2483
fsp_header_write_space_id(page, *space_id);
2485
buf_flush_init_for_writing(page, ut_dulint_zero, *space_id, 0);
2487
ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
2492
fputs("InnoDB: Error: could not write the first page"
2493
" to tablespace ", stderr);
2494
ut_print_filename(stderr, path);
2499
ret = os_file_flush(file);
2502
fputs("InnoDB: Error: file flush of tablespace ", stderr);
2503
ut_print_filename(stderr, path);
2504
fputs(" failed\n", stderr);
2508
os_file_close(file);
2510
if (*space_id == ULINT_UNDEFINED) {
2514
success = fil_space_create(path, *space_id, FIL_TABLESPACE);
2520
fil_node_create(path, size, *space_id, FALSE);
2522
#ifndef UNIV_HOTBACKUP
2528
fil_op_write_log(MLOG_FILE_CREATE, *space_id, tablename,
2538
/************************************************************************
2539
It is possible, though very improbable, that the lsn's in the tablespace to be
2540
imported have risen above the current system lsn, if a lengthy purge, ibuf
2541
merge, or rollback was performed on a backup taken with ibbackup. If that is
2542
the case, reset page lsn's in the file. We assume that mysqld was shut down
2543
after it performed these cleanup operations on the .ibd file, so that it at
2544
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
2545
first page of the .ibd file, and we can determine whether we need to reset the
2546
lsn's just by looking at that flush lsn. */
2549
fil_reset_too_high_lsns(
2550
/*====================*/
2551
/* out: TRUE if success */
2552
const char* name, /* in: table name in the
2553
databasename/tablename format */
2554
dulint current_lsn) /* in: reset lsn's if the lsn stamped
2555
to FIL_PAGE_FILE_FLUSH_LSN in the
2556
first page is too high */
2564
ib_longlong file_size;
2569
filepath = fil_make_ibd_name(name, FALSE);
2571
file = os_file_create_simple_no_error_handling(
2572
filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
2574
/* The following call prints an error message */
2575
os_file_get_last_error(TRUE);
2577
ut_print_timestamp(stderr);
2579
fputs(" InnoDB: Error: trying to open a table,"
2581
"InnoDB: open the tablespace file ", stderr);
2582
ut_print_filename(stderr, filepath);
2583
fputs("!\n", stderr);
2589
/* Read the first page of the tablespace */
2591
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
2592
/* Align the memory for file i/o if we might have O_DIRECT set */
2593
page = ut_align(buf2, UNIV_PAGE_SIZE);
2595
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2601
/* We have to read the file flush lsn from the header of the file */
2603
flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
2605
if (ut_dulint_cmp(current_lsn, flush_lsn) >= 0) {
2612
space_id = fsp_header_get_space_id(page);
2614
ut_print_timestamp(stderr);
2616
" InnoDB: Flush lsn in the tablespace file %lu"
2618
"InnoDB: is %lu %lu, which exceeds current"
2619
" system lsn %lu %lu.\n"
2620
"InnoDB: We reset the lsn's in the file ",
2622
(ulong) ut_dulint_get_high(flush_lsn),
2623
(ulong) ut_dulint_get_low(flush_lsn),
2624
(ulong) ut_dulint_get_high(current_lsn),
2625
(ulong) ut_dulint_get_low(current_lsn));
2626
ut_print_filename(stderr, filepath);
2627
fputs(".\n", stderr);
2629
/* Loop through all the pages in the tablespace and reset the lsn and
2630
the page checksum if necessary */
2632
file_size = os_file_get_size_as_iblonglong(file);
2634
for (offset = 0; offset < file_size; offset += UNIV_PAGE_SIZE) {
2635
success = os_file_read(file, page,
2636
(ulint)(offset & 0xFFFFFFFFUL),
2637
(ulint)(offset >> 32), UNIV_PAGE_SIZE);
2642
if (ut_dulint_cmp(mach_read_from_8(page + FIL_PAGE_LSN),
2644
/* We have to reset the lsn */
2645
space_id = mach_read_from_4(
2646
page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
2647
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
2649
buf_flush_init_for_writing(page, current_lsn, space_id,
2651
success = os_file_write(filepath, file, page,
2652
(ulint)(offset & 0xFFFFFFFFUL),
2653
(ulint)(offset >> 32),
2662
success = os_file_flush(file);
2668
/* We now update the flush_lsn stamp at the start of the file */
2669
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2675
mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
2677
success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
2682
success = os_file_flush(file);
2684
os_file_close(file);
2691
/************************************************************************
2692
Tries to open a single-table tablespace and optionally checks the space id is
2693
right in it. If does not succeed, prints an error message to the .err log. This
2694
function is used to open a tablespace when we start up mysqld, and also in
2696
NOTE that we assume this operation is used either at the database startup
2697
or under the protection of the dictionary mutex, so that two users cannot
2698
race here. This operation does not leave the file associated with the
2699
tablespace open, but closes it after we have looked at the space id in it. */
2702
fil_open_single_table_tablespace(
2703
/*=============================*/
2704
/* out: TRUE if success */
2705
ibool check_space_id, /* in: should we check that the space
2706
id in the file is right; we assume
2707
that this function runs much faster
2708
if no check is made, since accessing
2709
the file inode probably is much
2710
faster (the OS caches them) than
2711
accessing the first page of the file */
2712
ulint id, /* in: space id */
2713
const char* name) /* in: table name in the
2714
databasename/tablename format */
2724
filepath = fil_make_ibd_name(name, FALSE);
2726
file = os_file_create_simple_no_error_handling(
2727
filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
2729
/* The following call prints an error message */
2730
os_file_get_last_error(TRUE);
2732
ut_print_timestamp(stderr);
2734
fputs(" InnoDB: Error: trying to open a table,"
2736
"InnoDB: open the tablespace file ", stderr);
2737
ut_print_filename(stderr, filepath);
2739
"InnoDB: Have you moved InnoDB .ibd files around"
2740
" without using the\n"
2741
"InnoDB: commands DISCARD TABLESPACE and"
2742
" IMPORT TABLESPACE?\n"
2743
"InnoDB: It is also possible that this is"
2744
" a temporary table #sql...,\n"
2745
"InnoDB: and MySQL removed the .ibd file for this.\n"
2746
"InnoDB: Please refer to\n"
2747
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
2748
"innodb-troubleshooting.html\n"
2749
"InnoDB: for how to resolve the issue.\n", stderr);
2756
if (!check_space_id) {
2762
/* Read the first page of the tablespace */
2764
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
2765
/* Align the memory for file i/o if we might have O_DIRECT set */
2766
page = ut_align(buf2, UNIV_PAGE_SIZE);
2768
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2770
/* We have to read the tablespace id from the file */
2772
space_id = fsp_header_get_space_id(page);
2776
if (space_id != id) {
2777
ut_print_timestamp(stderr);
2779
fputs(" InnoDB: Error: tablespace id in file ", stderr);
2780
ut_print_filename(stderr, filepath);
2781
fprintf(stderr, " is %lu, but in the InnoDB\n"
2782
"InnoDB: data dictionary it is %lu.\n"
2783
"InnoDB: Have you moved InnoDB .ibd files"
2784
" around without using the\n"
2785
"InnoDB: commands DISCARD TABLESPACE and"
2786
" IMPORT TABLESPACE?\n"
2787
"InnoDB: Please refer to\n"
2788
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
2789
"innodb-troubleshooting.html\n"
2790
"InnoDB: for how to resolve the issue.\n",
2791
(ulong) space_id, (ulong) id);
2799
success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
2805
/* We do not measure the size of the file, that is why we pass the 0
2808
fil_node_create(filepath, 0, space_id, FALSE);
2810
os_file_close(file);
2816
#ifdef UNIV_HOTBACKUP
2817
/***********************************************************************
2818
Allocates a file name for an old version of a single-table tablespace.
2819
The string must be freed by caller with mem_free()! */
2822
fil_make_ibbackup_old_name(
2823
/*=======================*/
2824
/* out, own: file name */
2825
const char* name) /* in: original file name */
2827
static const char suffix[] = "_ibbackup_old_vers_";
2828
ulint len = strlen(name);
2829
char* path = mem_alloc(len + (15 + sizeof suffix));
2831
memcpy(path, name, len);
2832
memcpy(path + len, suffix, (sizeof suffix) - 1);
2833
ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
2836
#endif /* UNIV_HOTBACKUP */
2838
/************************************************************************
2839
Opens an .ibd file and adds the associated single-table tablespace to the
2840
InnoDB fil0fil.c data structures. */
2843
fil_load_single_table_tablespace(
2844
/*=============================*/
2845
const char* dbname, /* in: database name */
2846
const char* filename) /* in: file name (not a path),
2847
including the .ibd extension */
2858
#ifdef UNIV_HOTBACKUP
2861
filepath = mem_alloc(strlen(dbname) + strlen(filename)
2862
+ strlen(fil_path_to_mysql_datadir) + 3);
2864
sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
2866
srv_normalize_path_for_win(filepath);
2868
# ifndef UNIV_HOTBACKUP
2869
/* If lower_case_table_names is 0 or 2, then MySQL allows database
2870
directory names with upper case letters. On Windows, all table and
2871
database names in InnoDB are internally always in lower case. Put the
2872
file path to lower case, so that we are consistent with InnoDB's
2873
internal data dictionary. */
2875
dict_casedn_str(filepath);
2876
# endif /* !UNIV_HOTBACKUP */
2878
file = os_file_create_simple_no_error_handling(
2879
filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
2881
/* The following call prints an error message */
2882
os_file_get_last_error(TRUE);
2885
"InnoDB: Error: could not open single-table tablespace"
2888
"InnoDB: We do not continue the crash recovery,"
2889
" because the table may become\n"
2890
"InnoDB: corrupt if we cannot apply the log records"
2891
" in the InnoDB log to it.\n"
2892
"InnoDB: To fix the problem and start mysqld:\n"
2893
"InnoDB: 1) If there is a permission problem"
2894
" in the file and mysqld cannot\n"
2895
"InnoDB: open the file, you should"
2896
" modify the permissions.\n"
2897
"InnoDB: 2) If the table is not needed, or you can"
2898
" restore it from a backup,\n"
2899
"InnoDB: then you can remove the .ibd file,"
2900
" and InnoDB will do a normal\n"
2901
"InnoDB: crash recovery and ignore that table.\n"
2902
"InnoDB: 3) If the file system or the"
2903
" disk is broken, and you cannot remove\n"
2904
"InnoDB: the .ibd file, you can set"
2905
" innodb_force_recovery > 0 in my.cnf\n"
2906
"InnoDB: and force InnoDB to continue crash"
2907
" recovery here.\n", filepath);
2911
if (srv_force_recovery > 0) {
2913
"InnoDB: innodb_force_recovery"
2914
" was set to %lu. Continuing crash recovery\n"
2915
"InnoDB: even though we cannot access"
2916
" the .ibd file of this table.\n",
2917
srv_force_recovery);
2924
success = os_file_get_size(file, &size_low, &size_high);
2927
/* The following call prints an error message */
2928
os_file_get_last_error(TRUE);
2931
"InnoDB: Error: could not measure the size"
2932
" of single-table tablespace file\n"
2934
"InnoDB: We do not continue crash recovery,"
2935
" because the table will become\n"
2936
"InnoDB: corrupt if we cannot apply the log records"
2937
" in the InnoDB log to it.\n"
2938
"InnoDB: To fix the problem and start mysqld:\n"
2939
"InnoDB: 1) If there is a permission problem"
2940
" in the file and mysqld cannot\n"
2941
"InnoDB: access the file, you should"
2942
" modify the permissions.\n"
2943
"InnoDB: 2) If the table is not needed,"
2944
" or you can restore it from a backup,\n"
2945
"InnoDB: then you can remove the .ibd file,"
2946
" and InnoDB will do a normal\n"
2947
"InnoDB: crash recovery and ignore that table.\n"
2948
"InnoDB: 3) If the file system or the disk is broken,"
2949
" and you cannot remove\n"
2950
"InnoDB: the .ibd file, you can set"
2951
" innodb_force_recovery > 0 in my.cnf\n"
2952
"InnoDB: and force InnoDB to continue"
2953
" crash recovery here.\n", filepath);
2955
os_file_close(file);
2958
if (srv_force_recovery > 0) {
2960
"InnoDB: innodb_force_recovery"
2961
" was set to %lu. Continuing crash recovery\n"
2962
"InnoDB: even though we cannot access"
2963
" the .ibd file of this table.\n",
2964
srv_force_recovery);
2971
/* TODO: What to do in other cases where we cannot access an .ibd
2972
file during a crash recovery? */
2974
/* Every .ibd file is created >= 4 pages in size. Smaller files
2977
size = (((ib_longlong)size_high) << 32) + (ib_longlong)size_low;
2978
#ifndef UNIV_HOTBACKUP
2979
if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
2981
"InnoDB: Error: the size of single-table tablespace"
2983
"InnoDB: is only %lu %lu, should be at least %lu!",
2986
(ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE));
2987
os_file_close(file);
2993
/* Read the first page of the tablespace if the size big enough */
2995
buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
2996
/* Align the memory for file i/o if we might have O_DIRECT set */
2997
page = ut_align(buf2, UNIV_PAGE_SIZE);
2999
if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3000
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3002
/* We have to read the tablespace id from the file */
3004
space_id = fsp_header_get_space_id(page);
3006
space_id = ULINT_UNDEFINED;
3009
#ifndef UNIV_HOTBACKUP
3010
if (space_id == ULINT_UNDEFINED || space_id == 0) {
3012
"InnoDB: Error: tablespace id %lu in file %s"
3013
" is not sensible\n",
3019
if (space_id == ULINT_UNDEFINED || space_id == 0) {
3023
"InnoDB: Renaming tablespace %s of id %lu,\n"
3024
"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3025
"InnoDB: because its size %lld is too small"
3026
" (< 4 pages 16 kB each),\n"
3027
"InnoDB: or the space id in the file header"
3028
" is not sensible.\n"
3029
"InnoDB: This can happen in an ibbackup run,"
3030
" and is not dangerous.\n",
3031
filepath, space_id, filepath, size);
3032
os_file_close(file);
3034
new_path = fil_make_ibbackup_old_name(filepath);
3035
ut_a(os_file_rename(filepath, new_path));
3044
/* A backup may contain the same space several times, if the space got
3045
renamed at a sensitive time. Since it is enough to have one version of
3046
the space, we rename the file if a space with the same space id
3047
already exists in the tablespace memory cache. We rather rename the
3048
file than delete it, because if there is a bug, we do not want to
3049
destroy valuable data. */
3051
mutex_enter(&(fil_system->mutex));
3053
space = fil_get_space_for_id_low(space_id);
3059
"InnoDB: Renaming tablespace %s of id %lu,\n"
3060
"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3061
"InnoDB: because space %s with the same id\n"
3062
"InnoDB: was scanned earlier. This can happen"
3063
" if you have renamed tables\n"
3064
"InnoDB: during an ibbackup run.\n",
3065
filepath, space_id, filepath,
3067
os_file_close(file);
3069
new_path = fil_make_ibbackup_old_name(filepath);
3071
mutex_exit(&(fil_system->mutex));
3073
ut_a(os_file_rename(filepath, new_path));
3081
mutex_exit(&(fil_system->mutex));
3083
success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
3090
/* We do not use the size information we have about the file, because
3091
the rounding formula for extents and pages is somewhat complex; we
3092
let fil_node_open() do that task. */
3094
fil_node_create(filepath, 0, space_id, FALSE);
3096
os_file_close(file);
3101
/***************************************************************************
3102
A fault-tolerant function that tries to read the next file name in the
3103
directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
3104
idea is to read as much good data as we can and jump over bad data. */
3107
fil_file_readdir_next_file(
3108
/*=======================*/
3109
/* out: 0 if ok, -1 if error even after the
3110
retries, 1 if at the end of the directory */
3111
ulint* err, /* out: this is set to DB_ERROR if an error
3112
was encountered, otherwise not changed */
3113
const char* dirname,/* in: directory name or path */
3114
os_file_dir_t dir, /* in: directory stream */
3115
os_file_stat_t* info) /* in/out: buffer where the info is returned */
3120
for (i = 0; i < 100; i++) {
3121
ret = os_file_readdir_next_file(dirname, dir, info);
3129
"InnoDB: Error: os_file_readdir_next_file()"
3131
"InnoDB: directory %s\n"
3132
"InnoDB: Crash recovery may have failed"
3133
" for some .ibd files!\n", dirname);
3141
/************************************************************************
3142
At the server startup, if we need crash recovery, scans the database
3143
directories under the MySQL datadir, looking for .ibd files. Those files are
3144
single-table tablespaces. We need to know the space id in each of them so that
3145
we know into which file we should look to check the contents of a page stored
3146
in the doublewrite buffer, also to know where to apply log records where the
3147
space id is != 0. */
3150
fil_load_single_table_tablespaces(void)
3151
/*===================================*/
3152
/* out: DB_SUCCESS or error number */
3155
char* dbpath = NULL;
3156
ulint dbpath_len = 100;
3158
os_file_dir_t dbdir;
3159
os_file_stat_t dbinfo;
3160
os_file_stat_t fileinfo;
3161
ulint err = DB_SUCCESS;
3163
/* The datadir of MySQL is always the default directory of mysqld */
3165
dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
3172
dbpath = mem_alloc(dbpath_len);
3174
/* Scan all directories under the datadir. They are the database
3175
directories of MySQL. */
3177
ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
3181
/* printf("Looking at %s in datadir\n", dbinfo.name); */
3183
if (dbinfo.type == OS_FILE_TYPE_FILE
3184
|| dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
3186
goto next_datadir_item;
3189
/* We found a symlink or a directory; try opening it to see
3190
if a symlink is a directory */
3192
len = strlen(fil_path_to_mysql_datadir)
3193
+ strlen (dbinfo.name) + 2;
3194
if (len > dbpath_len) {
3201
dbpath = mem_alloc(dbpath_len);
3203
sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir,
3205
srv_normalize_path_for_win(dbpath);
3207
dbdir = os_file_opendir(dbpath, FALSE);
3209
if (dbdir != NULL) {
3210
/* printf("Opened dir %s\n", dbinfo.name); */
3212
/* We found a database directory; loop through it,
3213
looking for possible .ibd files in it */
3215
ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
3219
" Looking at file %s\n", fileinfo.name); */
3221
if (fileinfo.type == OS_FILE_TYPE_DIR) {
3223
goto next_file_item;
3226
/* We found a symlink or a file */
3227
if (strlen(fileinfo.name) > 4
3228
&& 0 == strcmp(fileinfo.name
3229
+ strlen(fileinfo.name) - 4,
3231
/* The name ends in .ibd; try opening
3233
fil_load_single_table_tablespace(
3234
dbinfo.name, fileinfo.name);
3237
ret = fil_file_readdir_next_file(&err,
3242
if (0 != os_file_closedir(dbdir)) {
3243
fputs("InnoDB: Warning: could not"
3244
" close database directory ", stderr);
3245
ut_print_filename(stderr, dbpath);
3253
ret = fil_file_readdir_next_file(&err,
3254
fil_path_to_mysql_datadir,
3260
if (0 != os_file_closedir(dir)) {
3262
"InnoDB: Error: could not close MySQL datadir\n");
3270
/************************************************************************
3271
If we need crash recovery, and we have called
3272
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
3273
we can call this function to print an error message of orphaned .ibd files
3274
for which there is not a data dictionary entry with a matching table name
3278
fil_print_orphaned_tablespaces(void)
3279
/*================================*/
3281
fil_system_t* system = fil_system;
3284
mutex_enter(&(system->mutex));
3286
space = UT_LIST_GET_FIRST(system->space_list);
3289
if (space->purpose == FIL_TABLESPACE && space->id != 0
3291
fputs("InnoDB: Warning: tablespace ", stderr);
3292
ut_print_filename(stderr, space->name);
3293
fprintf(stderr, " of id %lu has no matching table in\n"
3294
"InnoDB: the InnoDB data dictionary.\n",
3298
space = UT_LIST_GET_NEXT(space_list, space);
3301
mutex_exit(&(system->mutex));
3304
/***********************************************************************
3305
Returns TRUE if a single-table tablespace does not exist in the memory cache,
3306
or is being deleted there. */
3309
fil_tablespace_deleted_or_being_deleted_in_mem(
3310
/*===========================================*/
3311
/* out: TRUE if does not exist or is being\
3313
ulint id, /* in: space id */
3314
ib_longlong version)/* in: tablespace_version should be this; if
3315
you pass -1 as the value of this, then this
3316
parameter is ignored */
3318
fil_system_t* system = fil_system;
3323
mutex_enter(&(system->mutex));
3325
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3327
if (space == NULL || space->is_being_deleted) {
3328
mutex_exit(&(system->mutex));
3333
if (version != ((ib_longlong)-1)
3334
&& space->tablespace_version != version) {
3335
mutex_exit(&(system->mutex));
3340
mutex_exit(&(system->mutex));
3345
/***********************************************************************
3346
Returns TRUE if a single-table tablespace exists in the memory cache. */
3349
fil_tablespace_exists_in_mem(
3350
/*=========================*/
3351
/* out: TRUE if exists */
3352
ulint id) /* in: space id */
3354
fil_system_t* system = fil_system;
3359
mutex_enter(&(system->mutex));
3361
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3363
if (space == NULL) {
3364
mutex_exit(&(system->mutex));
3369
mutex_exit(&(system->mutex));
3374
/***********************************************************************
3375
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
3376
cache. Note that if we have not done a crash recovery at the database startup,
3377
there may be many tablespaces which are not yet in the memory cache. */
3380
fil_space_for_table_exists_in_mem(
3381
/*==============================*/
3382
/* out: TRUE if a matching tablespace
3383
exists in the memory cache */
3384
ulint id, /* in: space id */
3385
const char* name, /* in: table name in the standard
3386
'databasename/tablename' format or
3387
the dir path to a temp table */
3388
ibool is_temp, /* in: TRUE if created with CREATE
3390
ibool mark_space, /* in: in crash recovery, at database
3391
startup we mark all spaces which have
3392
an associated table in the InnoDB
3393
data dictionary, so that
3394
we can print a warning about orphaned
3396
ibool print_error_if_does_not_exist)
3397
/* in: print detailed error
3398
information to the .err log if a
3399
matching tablespace is not found from
3402
fil_system_t* system = fil_system;
3403
fil_space_t* namespace;
3409
mutex_enter(&(system->mutex));
3411
path = fil_make_ibd_name(name, is_temp);
3413
/* Look if there is a space with the same id */
3415
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3417
/* Look if there is a space with the same name; the name is the
3418
directory path from the datadir to the file */
3420
HASH_SEARCH(name_hash, system->name_hash,
3421
ut_fold_string(path), namespace,
3422
0 == strcmp(namespace->name, path));
3423
if (space && space == namespace) {
3431
mutex_exit(&(system->mutex));
3436
if (!print_error_if_does_not_exist) {
3439
mutex_exit(&(system->mutex));
3444
if (space == NULL) {
3445
if (namespace == NULL) {
3446
ut_print_timestamp(stderr);
3447
fputs(" InnoDB: Error: table ", stderr);
3448
ut_print_filename(stderr, name);
3449
fprintf(stderr, "\n"
3450
"InnoDB: in InnoDB data dictionary"
3451
" has tablespace id %lu,\n"
3452
"InnoDB: but tablespace with that id"
3453
" or name does not exist. Have\n"
3454
"InnoDB: you deleted or moved .ibd files?\n"
3455
"InnoDB: This may also be a table created with"
3456
" CREATE TEMPORARY TABLE\n"
3457
"InnoDB: whose .ibd and .frm files"
3458
" MySQL automatically removed, but the\n"
3459
"InnoDB: table still exists in the"
3460
" InnoDB internal data dictionary.\n",
3463
ut_print_timestamp(stderr);
3464
fputs(" InnoDB: Error: table ", stderr);
3465
ut_print_filename(stderr, name);
3466
fprintf(stderr, "\n"
3467
"InnoDB: in InnoDB data dictionary has"
3468
" tablespace id %lu,\n"
3469
"InnoDB: but a tablespace with that id"
3470
" does not exist. There is\n"
3471
"InnoDB: a tablespace of name %s and id %lu,"
3473
"InnoDB: you deleted or moved .ibd files?\n",
3474
(ulong) id, namespace->name,
3475
(ulong) namespace->id);
3478
fputs("InnoDB: Please refer to\n"
3479
"InnoDB: http://dev.mysql.com/doc/refman/5.1/en/"
3480
"innodb-troubleshooting.html\n"
3481
"InnoDB: for how to resolve the issue.\n", stderr);
3484
mutex_exit(&(system->mutex));
3489
if (0 != strcmp(space->name, path)) {
3490
ut_print_timestamp(stderr);
3491
fputs(" InnoDB: Error: table ", stderr);
3492
ut_print_filename(stderr, name);
3493
fprintf(stderr, "\n"
3494
"InnoDB: in InnoDB data dictionary has"
3495
" tablespace id %lu,\n"
3496
"InnoDB: but the tablespace with that id"
3498
"InnoDB: Have you deleted or moved .ibd files?\n",
3499
(ulong) id, space->name);
3501
if (namespace != NULL) {
3502
fputs("InnoDB: There is a tablespace"
3503
" with the right name\n"
3504
"InnoDB: ", stderr);
3505
ut_print_filename(stderr, namespace->name);
3506
fprintf(stderr, ", but its id is %lu.\n",
3507
(ulong) namespace->id);
3514
mutex_exit(&(system->mutex));
3519
/***********************************************************************
3520
Checks if a single-table tablespace for a given table name exists in the
3521
tablespace memory cache. */
3524
fil_get_space_id_for_table(
3525
/*=======================*/
3526
/* out: space id, ULINT_UNDEFINED if not
3528
const char* name) /* in: table name in the standard
3529
'databasename/tablename' format */
3531
fil_system_t* system = fil_system;
3532
fil_space_t* namespace;
3533
ulint id = ULINT_UNDEFINED;
3538
mutex_enter(&(system->mutex));
3540
path = fil_make_ibd_name(name, FALSE);
3542
/* Look if there is a space with the same name; the name is the
3543
directory path to the file */
3545
HASH_SEARCH(name_hash, system->name_hash,
3546
ut_fold_string(path), namespace,
3547
0 == strcmp(namespace->name, path));
3554
mutex_exit(&(system->mutex));
3559
/**************************************************************************
3560
Tries to extend a data file so that it would accommodate the number of pages
3561
given. The tablespace must be cached in the memory cache. If the space is big
3562
enough already, does nothing. */
3565
fil_extend_space_to_desired_size(
3566
/*=============================*/
3567
/* out: TRUE if success */
3568
ulint* actual_size, /* out: size of the space after extension;
3569
if we ran out of disk space this may be lower
3570
than the desired size */
3571
ulint space_id, /* in: space id */
3572
ulint size_after_extend)/* in: desired size in pages after the
3573
extension; if the current space size is bigger
3574
than this already, the function does nothing */
3576
fil_system_t* system = fil_system;
3582
ulint start_page_no;
3583
ulint file_start_page_no;
3586
ibool success = TRUE;
3588
fil_mutex_enter_and_prepare_for_io(space_id);
3590
HASH_SEARCH(hash, system->spaces, space_id, space,
3591
space->id == space_id);
3594
if (space->size >= size_after_extend) {
3595
/* Space already big enough */
3597
*actual_size = space->size;
3599
mutex_exit(&(system->mutex));
3604
node = UT_LIST_GET_LAST(space->chain);
3606
fil_node_prepare_for_io(node, system, space);
3608
start_page_no = space->size;
3609
file_start_page_no = space->size - node->size;
3611
/* Extend at most 64 pages at a time */
3612
buf_size = ut_min(64, size_after_extend - start_page_no)
3614
buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE);
3615
buf = ut_align(buf2, UNIV_PAGE_SIZE);
3617
memset(buf, 0, buf_size);
3619
while (start_page_no < size_after_extend) {
3620
ulint n_pages = ut_min(buf_size / UNIV_PAGE_SIZE,
3621
size_after_extend - start_page_no);
3623
offset_high = (start_page_no - file_start_page_no)
3624
/ (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE));
3625
offset_low = ((start_page_no - file_start_page_no)
3626
% (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)))
3628
#ifdef UNIV_HOTBACKUP
3629
success = os_file_write(node->name, node->handle, buf,
3630
offset_low, offset_high,
3631
UNIV_PAGE_SIZE * n_pages);
3633
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
3634
node->name, node->handle, buf,
3635
offset_low, offset_high,
3636
UNIV_PAGE_SIZE * n_pages,
3640
node->size += n_pages;
3641
space->size += n_pages;
3643
os_has_said_disk_full = FALSE;
3645
/* Let us measure the size of the file to determine
3646
how much we were able to extend it */
3649
(os_file_get_size_as_iblonglong
3651
/ UNIV_PAGE_SIZE)) - node->size;
3653
node->size += n_pages;
3654
space->size += n_pages;
3659
start_page_no += n_pages;
3664
fil_node_complete_io(node, system, OS_FILE_WRITE);
3666
*actual_size = space->size;
3668
#ifndef UNIV_HOTBACKUP
3669
if (space_id == 0) {
3670
ulint pages_per_mb = (1024 * 1024) / UNIV_PAGE_SIZE;
3672
/* Keep the last data file size info up to date, rounded to
3675
srv_data_file_sizes[srv_n_data_files - 1]
3676
= (node->size / pages_per_mb) * pages_per_mb;
3678
#endif /* !UNIV_HOTBACKUP */
3681
printf("Extended %s to %lu, actual size %lu pages\n", space->name,
3682
size_after_extend, *actual_size); */
3683
mutex_exit(&(system->mutex));
3685
fil_flush(space_id);
3690
#ifdef UNIV_HOTBACKUP
3691
/************************************************************************
3692
Extends all tablespaces to the size stored in the space header. During the
3693
ibbackup --apply-log phase we extended the spaces on-demand so that log records
3694
could be applied, but that may have left spaces still too small compared to
3695
the size stored in the space header. */
3698
fil_extend_tablespaces_to_stored_len(void)
3699
/*======================================*/
3701
fil_system_t* system = fil_system;
3705
ulint size_in_header;
3709
buf = mem_alloc(UNIV_PAGE_SIZE);
3711
mutex_enter(&(system->mutex));
3713
space = UT_LIST_GET_FIRST(system->space_list);
3716
ut_a(space->purpose == FIL_TABLESPACE);
3718
mutex_exit(&(system->mutex)); /* no need to protect with a
3719
mutex, because this is a
3720
single-threaded operation */
3721
error = fil_read(TRUE, space->id, 0, 0, UNIV_PAGE_SIZE, buf,
3723
ut_a(error == DB_SUCCESS);
3725
size_in_header = fsp_get_size_low(buf);
3727
success = fil_extend_space_to_desired_size(
3728
&actual_size, space->id, size_in_header);
3731
"InnoDB: Error: could not extend the"
3732
" tablespace of %s\n"
3733
"InnoDB: to the size stored in header,"
3735
"InnoDB: size after extension %lu pages\n"
3736
"InnoDB: Check that you have free disk space"
3738
space->name, size_in_header, actual_size);
3742
mutex_enter(&(system->mutex));
3744
space = UT_LIST_GET_NEXT(space_list, space);
3747
mutex_exit(&(system->mutex));
3753
/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
3755
/***********************************************************************
3756
Tries to reserve free extents in a file space. */
3759
fil_space_reserve_free_extents(
3760
/*===========================*/
3761
/* out: TRUE if succeed */
3762
ulint id, /* in: space id */
3763
ulint n_free_now, /* in: number of free extents now */
3764
ulint n_to_reserve) /* in: how many one wants to reserve */
3766
fil_system_t* system = fil_system;
3772
mutex_enter(&(system->mutex));
3774
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3778
if (space->n_reserved_extents + n_to_reserve > n_free_now) {
3781
space->n_reserved_extents += n_to_reserve;
3785
mutex_exit(&(system->mutex));
3790
/***********************************************************************
3791
Releases free extents in a file space. */
3794
fil_space_release_free_extents(
3795
/*===========================*/
3796
ulint id, /* in: space id */
3797
ulint n_reserved) /* in: how many one reserved */
3799
fil_system_t* system = fil_system;
3804
mutex_enter(&(system->mutex));
3806
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3809
ut_a(space->n_reserved_extents >= n_reserved);
3811
space->n_reserved_extents -= n_reserved;
3813
mutex_exit(&(system->mutex));
3816
/***********************************************************************
3817
Gets the number of reserved extents. If the database is silent, this number
3821
fil_space_get_n_reserved_extents(
3822
/*=============================*/
3823
ulint id) /* in: space id */
3825
fil_system_t* system = fil_system;
3831
mutex_enter(&(system->mutex));
3833
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
3837
n = space->n_reserved_extents;
3839
mutex_exit(&(system->mutex));
3844
/*============================ FILE I/O ================================*/
3846
/************************************************************************
3847
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
3849
Prepares a file node for i/o. Opens the file if it is closed. Updates the
3850
pending i/o's field in the node and the system appropriately. Takes the node
3851
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
3855
fil_node_prepare_for_io(
3856
/*====================*/
3857
fil_node_t* node, /* in: file node */
3858
fil_system_t* system, /* in: tablespace memory cache */
3859
fil_space_t* space) /* in: space */
3861
ut_ad(node && system && space);
3862
ut_ad(mutex_own(&(system->mutex)));
3864
if (system->n_open > system->max_n_open + 5) {
3865
ut_print_timestamp(stderr);
3867
" InnoDB: Warning: open files %lu"
3868
" exceeds the limit %lu\n",
3869
(ulong) system->n_open,
3870
(ulong) system->max_n_open);
3873
if (node->open == FALSE) {
3874
/* File is closed: open it */
3875
ut_a(node->n_pending == 0);
3877
fil_node_open_file(node, system, space);
3880
if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
3881
&& space->id != 0) {
3882
/* The node is in the LRU list, remove it */
3884
ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
3886
UT_LIST_REMOVE(LRU, system->LRU, node);
3892
/************************************************************************
3893
Updates the data structures when an i/o operation finishes. Updates the
3894
pending i/o's field in the node appropriately. */
3897
fil_node_complete_io(
3898
/*=================*/
3899
fil_node_t* node, /* in: file node */
3900
fil_system_t* system, /* in: tablespace memory cache */
3901
ulint type) /* in: OS_FILE_WRITE or OS_FILE_READ; marks
3902
the node as modified if
3903
type == OS_FILE_WRITE */
3907
ut_ad(mutex_own(&(system->mutex)));
3909
ut_a(node->n_pending > 0);
3913
if (type == OS_FILE_WRITE) {
3914
system->modification_counter++;
3915
node->modification_counter = system->modification_counter;
3917
if (!node->space->is_in_unflushed_spaces) {
3919
node->space->is_in_unflushed_spaces = TRUE;
3920
UT_LIST_ADD_FIRST(unflushed_spaces,
3921
system->unflushed_spaces,
3926
if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
3927
&& node->space->id != 0) {
3928
/* The node must be put back to the LRU list */
3929
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
3933
/************************************************************************
3934
Report information about an invalid page access. */
3937
fil_report_invalid_page_access(
3938
/*===========================*/
3939
ulint block_offset, /* in: block offset */
3940
ulint space_id, /* in: space id */
3941
const char* space_name, /* in: space name */
3942
ulint byte_offset, /* in: byte offset */
3943
ulint len, /* in: I/O length */
3944
ulint type) /* in: I/O type */
3947
"InnoDB: Error: trying to access page number %lu"
3949
"InnoDB: space name %s,\n"
3950
"InnoDB: which is outside the tablespace bounds.\n"
3951
"InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
3952
"InnoDB: If you get this error at mysqld startup,"
3953
" please check that\n"
3954
"InnoDB: your my.cnf matches the ibdata files"
3955
" that you have in the\n"
3956
"InnoDB: MySQL server.\n",
3957
(ulong) block_offset, (ulong) space_id, space_name,
3958
(ulong) byte_offset, (ulong) len, (ulong) type);
3961
/************************************************************************
3962
Reads or writes data. This operation is asynchronous (aio). */
3967
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
3968
if we are trying to do i/o on a tablespace
3969
which does not exist */
3970
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
3971
ORed to OS_FILE_LOG, if a log i/o
3972
and ORed to OS_AIO_SIMULATED_WAKE_LATER
3973
if simulated aio and we want to post a
3974
batch of i/os; NOTE that a simulated batch
3975
may introduce hidden chances of deadlocks,
3976
because i/os are not actually handled until
3977
all have been posted: use with great
3979
ibool sync, /* in: TRUE if synchronous aio is desired */
3980
ulint space_id, /* in: space id */
3981
ulint block_offset, /* in: offset in number of blocks */
3982
ulint byte_offset, /* in: remainder of offset in bytes; in
3983
aio this must be divisible by the OS block
3985
ulint len, /* in: how many bytes to read or write; this
3986
must not cross a file boundary; in aio this
3987
must be a block size multiple */
3988
void* buf, /* in/out: buffer where to store read data
3989
or from where to write; in aio this must be
3990
appropriately aligned */
3991
void* message) /* in: message for aio handler if non-sync
3992
aio used, else ignored */
3994
fil_system_t* system = fil_system;
4004
is_log = type & OS_FILE_LOG;
4005
type = type & ~OS_FILE_LOG;
4007
wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
4008
type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
4010
ut_ad(byte_offset < UNIV_PAGE_SIZE);
4013
ut_a((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
4014
ut_ad(fil_validate());
4015
#ifndef UNIV_LOG_DEBUG
4016
/* ibuf bitmap pages must be read in the sync aio mode: */
4017
ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
4018
|| !ibuf_bitmap_page(block_offset) || sync || is_log);
4019
#ifdef UNIV_SYNC_DEBUG
4020
ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
4021
|| ibuf_page(space_id, block_offset));
4026
} else if (type == OS_FILE_READ && !is_log
4027
&& ibuf_page(space_id, block_offset)) {
4029
} else if (is_log) {
4032
mode = OS_AIO_NORMAL;
4035
if (type == OS_FILE_READ) {
4036
srv_data_read+= len;
4037
} else if (type == OS_FILE_WRITE) {
4038
srv_data_written+= len;
4041
/* Reserve the fil_system mutex and make sure that we can open at
4042
least one file while holding it, if the file is not already open */
4044
fil_mutex_enter_and_prepare_for_io(space_id);
4046
HASH_SEARCH(hash, system->spaces, space_id, space,
4047
space->id == space_id);
4049
mutex_exit(&(system->mutex));
4051
ut_print_timestamp(stderr);
4053
" InnoDB: Error: trying to do i/o"
4054
" to a tablespace which does not exist.\n"
4055
"InnoDB: i/o type %lu, space id %lu,"
4056
" page no. %lu, i/o length %lu bytes\n",
4057
(ulong) type, (ulong) space_id, (ulong) block_offset,
4060
return(DB_TABLESPACE_DELETED);
4063
ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
4065
node = UT_LIST_GET_FIRST(space->chain);
4069
fil_report_invalid_page_access(
4070
block_offset, space_id, space->name,
4071
byte_offset, len, type);
4076
if (space->id != 0 && node->size == 0) {
4077
/* We do not know the size of a single-table tablespace
4078
before we open the file */
4083
if (node->size > block_offset) {
4087
block_offset -= node->size;
4088
node = UT_LIST_GET_NEXT(chain, node);
4092
/* Open file if closed */
4093
fil_node_prepare_for_io(node, system, space);
4095
/* Check that at least the start offset is within the bounds of a
4096
single-table tablespace */
4097
if (space->purpose == FIL_TABLESPACE && space->id != 0
4098
&& node->size <= block_offset) {
4100
fil_report_invalid_page_access(
4101
block_offset, space_id, space->name, byte_offset,
4107
/* Now we have made the changes in the data structures of system */
4108
mutex_exit(&(system->mutex));
4110
/* Calculate the low 32 bits and the high 32 bits of the file offset */
4112
offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
4113
offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL)
4116
ut_a(node->size - block_offset
4117
>= (byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE);
4121
ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
4122
ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
4124
#ifdef UNIV_HOTBACKUP
4125
/* In ibbackup do normal i/o, not aio */
4126
if (type == OS_FILE_READ) {
4127
ret = os_file_read(node->handle, buf, offset_low, offset_high,
4130
ret = os_file_write(node->name, node->handle, buf,
4131
offset_low, offset_high, len);
4134
/* Queue the aio request */
4135
ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
4136
offset_low, offset_high, len, node, message);
4140
if (mode == OS_AIO_SYNC) {
4141
/* The i/o operation is already completed when we return from
4144
mutex_enter(&(system->mutex));
4146
fil_node_complete_io(node, system, type);
4148
mutex_exit(&(system->mutex));
4150
ut_ad(fil_validate());
4156
/************************************************************************
4157
Reads data from a space to a buffer. Remember that the possible incomplete
4158
blocks at the end of file are ignored: they are not taken into account when
4159
calculating the byte offset within a space. */
4164
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
4165
if we are trying to do i/o on a tablespace
4166
which does not exist */
4167
ibool sync, /* in: TRUE if synchronous aio is desired */
4168
ulint space_id, /* in: space id */
4169
ulint block_offset, /* in: offset in number of blocks */
4170
ulint byte_offset, /* in: remainder of offset in bytes; in aio
4171
this must be divisible by the OS block size */
4172
ulint len, /* in: how many bytes to read; this must not
4173
cross a file boundary; in aio this must be a
4174
block size multiple */
4175
void* buf, /* in/out: buffer where to store data read;
4176
in aio this must be appropriately aligned */
4177
void* message) /* in: message for aio handler if non-sync
4178
aio used, else ignored */
4180
return(fil_io(OS_FILE_READ, sync, space_id, block_offset,
4181
byte_offset, len, buf, message));
4184
/************************************************************************
4185
Writes data to a space from a buffer. Remember that the possible incomplete
4186
blocks at the end of file are ignored: they are not taken into account when
4187
calculating the byte offset within a space. */
4192
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
4193
if we are trying to do i/o on a tablespace
4194
which does not exist */
4195
ibool sync, /* in: TRUE if synchronous aio is desired */
4196
ulint space_id, /* in: space id */
4197
ulint block_offset, /* in: offset in number of blocks */
4198
ulint byte_offset, /* in: remainder of offset in bytes; in aio
4199
this must be divisible by the OS block size */
4200
ulint len, /* in: how many bytes to write; this must
4201
not cross a file boundary; in aio this must
4202
be a block size multiple */
4203
void* buf, /* in: buffer from which to write; in aio
4204
this must be appropriately aligned */
4205
void* message) /* in: message for aio handler if non-sync
4206
aio used, else ignored */
4208
return(fil_io(OS_FILE_WRITE, sync, space_id, block_offset,
4209
byte_offset, len, buf, message));
4212
/**************************************************************************
4213
Waits for an aio operation to complete. This function is used to write the
4214
handler for completed requests. The aio array of pending requests is divided
4215
into segments (see os0file.c for more info). The thread specifies which
4216
segment it wants to wait for. */
4221
ulint segment) /* in: the number of the segment in the aio
4222
array to wait for */
4224
fil_system_t* system = fil_system;
4226
fil_node_t* fil_node;
4230
ut_ad(fil_validate());
4232
if (os_aio_use_native_aio) {
4233
srv_set_io_thread_op_info(segment, "native aio handle");
4235
ret = os_aio_windows_handle(segment, 0, &fil_node,
4237
#elif defined(POSIX_ASYNC_IO)
4238
ret = os_aio_posix_handle(segment, &fil_node, &message);
4240
ret = 0; /* Eliminate compiler warning */
4244
srv_set_io_thread_op_info(segment, "simulated aio handle");
4246
ret = os_aio_simulated_handle(segment, &fil_node,
4252
srv_set_io_thread_op_info(segment, "complete io for fil node");
4254
mutex_enter(&(system->mutex));
4256
fil_node_complete_io(fil_node, fil_system, type);
4258
mutex_exit(&(system->mutex));
4260
ut_ad(fil_validate());
4262
/* Do the i/o handling */
4263
/* IMPORTANT: since i/o handling for reads will read also the insert
4264
buffer in tablespace 0, you have to be very careful not to introduce
4265
deadlocks in the i/o system. We keep tablespace 0 data files always
4266
open, and use a special i/o thread to serve insert buffer requests. */
4268
if (buf_pool_is_block(message)) {
4269
srv_set_io_thread_op_info(segment, "complete io for buf page");
4270
buf_page_io_complete(message);
4272
srv_set_io_thread_op_info(segment, "complete io for log");
4273
log_io_complete(message);
4277
/**************************************************************************
4278
Flushes to disk possible writes cached by the OS. If the space does not exist
4279
or is being dropped, does not do anything. */
4284
ulint space_id) /* in: file space id (this can be a group of
4285
log files or a tablespace of the database) */
4287
fil_system_t* system = fil_system;
4291
ib_longlong old_mod_counter;
4293
mutex_enter(&(system->mutex));
4295
HASH_SEARCH(hash, system->spaces, space_id, space,
4296
space->id == space_id);
4297
if (!space || space->is_being_deleted) {
4298
mutex_exit(&(system->mutex));
4303
space->n_pending_flushes++; /* prevent dropping of the space while
4305
node = UT_LIST_GET_FIRST(space->chain);
4308
if (node->modification_counter > node->flush_counter) {
4311
/* We want to flush the changes at least up to
4313
old_mod_counter = node->modification_counter;
4315
if (space->purpose == FIL_TABLESPACE) {
4316
fil_n_pending_tablespace_flushes++;
4318
fil_n_pending_log_flushes++;
4319
fil_n_log_flushes++;
4322
if (node->is_raw_disk) {
4328
if (node->n_pending_flushes > 0) {
4329
/* We want to avoid calling os_file_flush() on
4330
the file twice at the same time, because we do
4331
not know what bugs OS's may contain in file
4332
i/o; sleep for a while */
4334
mutex_exit(&(system->mutex));
4336
os_thread_sleep(20000);
4338
mutex_enter(&(system->mutex));
4340
if (node->flush_counter >= old_mod_counter) {
4349
file = node->handle;
4350
node->n_pending_flushes++;
4352
mutex_exit(&(system->mutex));
4354
/* fprintf(stderr, "Flushing to file %s\n",
4357
os_file_flush(file);
4359
mutex_enter(&(system->mutex));
4361
node->n_pending_flushes--;
4363
if (node->flush_counter < old_mod_counter) {
4364
node->flush_counter = old_mod_counter;
4366
if (space->is_in_unflushed_spaces
4367
&& fil_space_is_flushed(space)) {
4369
space->is_in_unflushed_spaces = FALSE;
4373
system->unflushed_spaces,
4378
if (space->purpose == FIL_TABLESPACE) {
4379
fil_n_pending_tablespace_flushes--;
4381
fil_n_pending_log_flushes--;
4385
node = UT_LIST_GET_NEXT(chain, node);
4388
space->n_pending_flushes--;
4390
mutex_exit(&(system->mutex));
4393
/**************************************************************************
4394
Flushes to disk the writes in file spaces of the given type possibly cached by
4398
fil_flush_file_spaces(
4399
/*==================*/
4400
ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */
4402
fil_system_t* system = fil_system;
4408
mutex_enter(&(system->mutex));
4410
n_space_ids = UT_LIST_GET_LEN(system->unflushed_spaces);
4411
if (n_space_ids == 0) {
4413
mutex_exit(&system->mutex);
4417
/* Assemble a list of space ids to flush. Previously, we
4418
traversed system->unflushed_spaces and called UT_LIST_GET_NEXT()
4419
on a space that was just removed from the list by fil_flush().
4420
Thus, the space could be dropped and the memory overwritten. */
4421
space_ids = mem_alloc(n_space_ids * sizeof *space_ids);
4425
for (space = UT_LIST_GET_FIRST(system->unflushed_spaces);
4427
space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
4429
if (space->purpose == purpose && !space->is_being_deleted) {
4431
space_ids[n_space_ids++] = space->id;
4435
mutex_exit(&system->mutex);
4437
/* Flush the spaces. It will not hurt to call fil_flush() on
4438
a non-existing space id. */
4439
for (i = 0; i < n_space_ids; i++) {
4441
fil_flush(space_ids[i]);
4444
mem_free(space_ids);
4447
/**********************************************************************
4448
Checks the consistency of the tablespace cache. */
4453
/* out: TRUE if ok */
4455
fil_system_t* system = fil_system;
4457
fil_node_t* fil_node;
4461
mutex_enter(&(system->mutex));
4463
/* Look for spaces in the hash table */
4465
for (i = 0; i < hash_get_n_cells(system->spaces); i++) {
4467
space = HASH_GET_FIRST(system->spaces, i);
4469
while (space != NULL) {
4470
UT_LIST_VALIDATE(chain, fil_node_t, space->chain);
4472
fil_node = UT_LIST_GET_FIRST(space->chain);
4474
while (fil_node != NULL) {
4475
if (fil_node->n_pending > 0) {
4476
ut_a(fil_node->open);
4479
if (fil_node->open) {
4482
fil_node = UT_LIST_GET_NEXT(chain, fil_node);
4484
space = HASH_GET_NEXT(hash, space);
4488
ut_a(system->n_open == n_open);
4490
UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
4492
fil_node = UT_LIST_GET_FIRST(system->LRU);
4494
while (fil_node != NULL) {
4495
ut_a(fil_node->n_pending == 0);
4496
ut_a(fil_node->open);
4497
ut_a(fil_node->space->purpose == FIL_TABLESPACE);
4498
ut_a(fil_node->space->id != 0);
4500
fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
4503
mutex_exit(&(system->mutex));
4508
/************************************************************************
4509
Returns TRUE if file address is undefined. */
4513
/* out: TRUE if undefined */
4514
fil_addr_t addr) /* in: address */
4516
if (addr.page == FIL_NULL) {
4524
/************************************************************************
4525
Accessor functions for a file page */
4528
fil_page_get_prev(byte* page)
4530
return(mach_read_from_4(page + FIL_PAGE_PREV));
4534
fil_page_get_next(byte* page)
4536
return(mach_read_from_4(page + FIL_PAGE_NEXT));
4539
/*************************************************************************
4540
Sets the file page type. */
4545
byte* page, /* in: file page */
4546
ulint type) /* in: type */
4550
mach_write_to_2(page + FIL_PAGE_TYPE, type);
4553
/*************************************************************************
4554
Gets the file page type. */
4559
/* out: type; NOTE that if the type has not been
4560
written to page, the return value not defined */
4561
byte* page) /* in: file page */
4565
return(mach_read_from_2(page + FIL_PAGE_TYPE));