1
/*****************************************************************************
3
Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
21
The tablespace memory cache
23
Created 10/25/1995 Heikki Tuuri
24
*******************************************************/
29
#include "hash0hash.h"
31
#include "mach0data.h"
37
#include "srv0start.h"
40
#include "dict0dict.h"
41
#include "page0page.h"
43
#ifndef UNIV_HOTBACKUP
45
# include "ibuf0ibuf.h"
46
# include "sync0sync.h"
48
#else /* !UNIV_HOTBACKUP */
49
static ulint srv_data_read, srv_data_written;
50
#endif /* !UNIV_HOTBACKUP */
53
IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
54
=============================================
56
The tablespace cache is responsible for providing fast read/write access to
57
tablespaces and logs of the database. File creation and deletion is done
58
in other modules which know more of the logic of the operation, however.
60
A tablespace consists of a chain of files. The size of the files does not
61
have to be divisible by the database block size, because we may just leave
62
the last incomplete block unused. When a new file is appended to the
63
tablespace, the maximum size of the file is also specified. At the moment,
64
we think that it is best to extend the file to its maximum size already at
65
the creation of the file, because then we can avoid dynamically extending
66
the file when more space is needed for the tablespace.
68
A block's position in the tablespace is specified with a 32-bit unsigned
69
integer. The files in the chain are thought to be catenated, and the block
70
corresponding to an address n is the nth block in the catenated file (where
71
the first block is named the 0th block, and the incomplete block fragments
72
at the end of files are not taken into account). A tablespace can be extended
73
by appending a new file at the end of the chain.
75
Our tablespace concept is similar to the one of Oracle.
77
To acquire more speed in disk transfers, a technique called disk striping is
78
sometimes used. This means that logical block addresses are divided in a
79
round-robin fashion across several disks. Windows NT supports disk striping,
80
so there we do not need to support it in the database. Disk striping is
81
implemented in hardware in RAID disks. We conclude that it is not necessary
82
to implement it in the database. Oracle 7 does not support disk striping,
85
Another trick used at some database sites is replacing tablespace files by
86
raw disks, that is, the whole physical disk drive, or a partition of it, is
87
opened as a single file, and it is accessed through byte offsets calculated
88
from the start of the disk or the partition. This is recommended in some
89
books on database tuning to achieve more speed in i/o. Using raw disk
90
certainly prevents the OS from fragmenting disk space, but it is not clear
91
if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
92
system + EIDE Conner disk only a negligible difference in speed when reading
93
from a file, versus reading from a raw disk.
95
To have fast access to a tablespace or a log file, we put the data structures
96
to a hash table. Each tablespace and log file is given an unique 32-bit
99
Some operating systems do not support many open files at the same time,
100
though NT seems to tolerate at least 900 open files. Therefore, we put the
101
open files in an LRU-list. If we need to open another file, we may close the
102
file at the end of the LRU-list. When an i/o-operation is pending on a file,
103
the file cannot be closed. We take the file nodes with pending i/o-operations
104
out of the LRU-list and keep a count of pending operations. When an operation
105
completes, we decrement the count and return the file node to the LRU-list if
106
the count drops to zero. */
108
/** When mysqld is run, the default directory "." is the mysqld datadir,
109
but in the MySQL Embedded Server Library and ibbackup it is not the default
110
directory, and we must set the base file path explicitly */
111
UNIV_INTERN const char* fil_path_to_mysql_datadir = ".";
113
/** The number of fsyncs done to the log */
114
UNIV_INTERN ulint fil_n_log_flushes = 0;
116
/** Number of pending redo log flushes */
117
UNIV_INTERN ulint fil_n_pending_log_flushes = 0;
118
/** Number of pending tablespace flushes */
119
UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
121
/** The null file address */
122
UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
124
#ifdef UNIV_PFS_MUTEX
125
/* Key to register fil_system_mutex with performance schema */
126
UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key;
127
#endif /* UNIV_PFS_MUTEX */
129
#ifdef UNIV_PFS_RWLOCK
130
/* Key to register file space latch with performance schema */
131
UNIV_INTERN mysql_pfs_key_t fil_space_latch_key;
132
#endif /* UNIV_PFS_RWLOCK */
134
/** File node of a tablespace or the log data space */
135
struct fil_node_struct {
136
fil_space_t* space; /*!< backpointer to the space where this node
138
char* name; /*!< path to the file */
139
ibool open; /*!< TRUE if file open */
140
os_file_t handle; /*!< OS handle to the file, if file open */
141
ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw
142
device or a raw disk partition */
143
ulint size; /*!< size of the file in database pages, 0 if
144
not known yet; the possible last incomplete
145
megabyte may be ignored if space == 0 */
147
/*!< count of pending i/o's on this file;
148
closing of the file is not allowed if
150
ulint n_pending_flushes;
151
/*!< count of pending flushes on this file;
152
closing of the file is not allowed if
154
ib_int64_t modification_counter;/*!< when we write to the file we
155
increment this by one */
156
ib_int64_t flush_counter;/*!< up to what
157
modification_counter value we have
158
flushed the modifications to disk */
159
UT_LIST_NODE_T(fil_node_t) chain;
160
/*!< link field for the file chain */
161
UT_LIST_NODE_T(fil_node_t) LRU;
162
/*!< link field for the LRU list */
163
ulint magic_n;/*!< FIL_NODE_MAGIC_N */
166
/** Value of fil_node_struct::magic_n */
167
#define FIL_NODE_MAGIC_N 89389
169
/** Tablespace or log data space: let us call them by a common name space */
170
struct fil_space_struct {
171
char* name; /*!< space name = the path to the first file in
173
ulint id; /*!< space id */
174
ib_int64_t tablespace_version;
175
/*!< in DISCARD/IMPORT this timestamp
176
is used to check if we should ignore
177
an insert buffer merge request for a
178
page because it actually was for the
179
previous incarnation of the space */
180
ibool mark; /*!< this is set to TRUE at database startup if
181
the space corresponds to a table in the InnoDB
182
data dictionary; so we can print a warning of
183
orphaned tablespaces */
184
ibool stop_ios;/*!< TRUE if we want to rename the
185
.ibd file of tablespace and want to
186
stop temporarily posting of new i/o
187
requests on the file */
188
ibool stop_ibuf_merges;
189
/*!< we set this TRUE when we start
190
deleting a single-table tablespace */
191
ibool is_being_deleted;
192
/*!< this is set to TRUE when we start
193
deleting a single-table tablespace and its
194
file; when this flag is set no further i/o
195
or flush requests can be placed on this space,
196
though there may be such requests still being
197
processed on this space */
198
ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
200
UT_LIST_BASE_NODE_T(fil_node_t) chain;
201
/*!< base node for the file chain */
202
ulint size; /*!< space size in pages; 0 if a single-table
203
tablespace whose size we do not know yet;
204
last incomplete megabytes in data files may be
205
ignored if space == 0 */
206
ulint flags; /*!< compressed page size and file format, or 0 */
207
ulint n_reserved_extents;
208
/*!< number of reserved free extents for
209
ongoing operations like B-tree page split */
210
ulint n_pending_flushes; /*!< this is positive when flushing
211
the tablespace to disk; dropping of the
212
tablespace is forbidden if this is positive */
213
ulint n_pending_ibuf_merges;/*!< this is positive
214
when merging insert buffer entries to
215
a page so that we may need to access
216
the ibuf bitmap page in the
217
tablespade: dropping of the tablespace
218
is forbidden if this is positive */
219
hash_node_t hash; /*!< hash chain node */
220
hash_node_t name_hash;/*!< hash chain the name_hash table */
221
#ifndef UNIV_HOTBACKUP
222
rw_lock_t latch; /*!< latch protecting the file space storage
224
#endif /* !UNIV_HOTBACKUP */
225
UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
226
/*!< list of spaces with at least one unflushed
227
file we have written to */
228
ibool is_in_unflushed_spaces; /*!< TRUE if this space is
229
currently in unflushed_spaces */
230
UT_LIST_NODE_T(fil_space_t) space_list;
231
/*!< list of all spaces */
232
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
235
/** Value of fil_space_struct::magic_n */
236
#define FIL_SPACE_MAGIC_N 89472
238
/** The tablespace memory cache */
239
typedef struct fil_system_struct fil_system_t;
241
/** The tablespace memory cache; also the totality of logs (the log
242
data space) is stored here; below we talk about tablespaces, but also
243
the ib_logfiles form a 'space' and it is handled here */
245
struct fil_system_struct {
246
#ifndef UNIV_HOTBACKUP
247
mutex_t mutex; /*!< The mutex protecting the cache */
248
#endif /* !UNIV_HOTBACKUP */
249
hash_table_t* spaces; /*!< The hash table of spaces in the
250
system; they are hashed on the space
252
hash_table_t* name_hash; /*!< hash table based on the space
254
UT_LIST_BASE_NODE_T(fil_node_t) LRU;
255
/*!< base node for the LRU list of the
256
most recently used open files with no
257
pending i/o's; if we start an i/o on
258
the file, we first remove it from this
259
list, and return it to the start of
260
the list when the i/o ends;
261
log files and the system tablespace are
262
not put to this list: they are opened
263
after the startup, and kept open until
265
UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
266
/*!< base node for the list of those
267
tablespaces whose files contain
268
unflushed writes; those spaces have
269
at least one file node where
270
modification_counter > flush_counter */
271
ulint n_open; /*!< number of files currently open */
272
ulint max_n_open; /*!< n_open is not allowed to exceed
274
ib_int64_t modification_counter;/*!< when we write to a file we
275
increment this by one */
276
ulint max_assigned_id;/*!< maximum space id in the existing
277
tables, or assigned during the time
278
mysqld has been up; at an InnoDB
279
startup we scan the data dictionary
280
and set here the maximum of the
281
space id's of the tables there */
282
ib_int64_t tablespace_version;
283
/*!< a counter which is incremented for
284
every space object memory creation;
285
every space mem object gets a
286
'timestamp' from this; in DISCARD/
287
IMPORT this is used to check if we
288
should ignore an insert buffer merge
290
UT_LIST_BASE_NODE_T(fil_space_t) space_list;
291
/*!< list of all file spaces */
292
ibool space_id_reuse_warned;
293
/* !< TRUE if fil_space_create()
294
has issued a warning about
295
potential space_id reuse */
298
/** The tablespace memory cache. This variable is NULL before the module is
300
static fil_system_t* fil_system = NULL;
303
/********************************************************************//**
304
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
306
Prepares a file node for i/o. Opens the file if it is closed. Updates the
307
pending i/o's field in the node and the system appropriately. Takes the node
308
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
312
fil_node_prepare_for_io(
313
/*====================*/
314
fil_node_t* node, /*!< in: file node */
315
fil_system_t* system, /*!< in: tablespace memory cache */
316
fil_space_t* space); /*!< in: space */
317
/********************************************************************//**
318
Updates the data structures when an i/o operation finishes. Updates the
319
pending i/o's field in the node appropriately. */
322
fil_node_complete_io(
323
/*=================*/
324
fil_node_t* node, /*!< in: file node */
325
fil_system_t* system, /*!< in: tablespace memory cache */
326
ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
327
the node as modified if
328
type == OS_FILE_WRITE */
329
/*******************************************************************//**
330
Checks if a single-table tablespace for a given table name exists in the
331
tablespace memory cache.
332
@return space id, ULINT_UNDEFINED if not found */
335
fil_get_space_id_for_table(
336
/*=======================*/
337
const char* name); /*!< in: table name in the standard
338
'databasename/tablename' format */
339
/*******************************************************************//**
340
Frees a space object from the tablespace memory cache. Closes the files in
341
the chain but does not delete them. There must not be any pending i/o's or
342
flushes on the files.
343
@return TRUE on success */
348
ulint id, /* in: space id */
349
ibool x_latched); /* in: TRUE if caller has space->latch
351
/********************************************************************//**
352
Reads data from a space to a buffer. Remember that the possible incomplete
353
blocks at the end of file are ignored: they are not taken into account when
354
calculating the byte offset within a space.
355
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
356
i/o on a tablespace which does not exist */
361
ibool sync, /*!< in: TRUE if synchronous aio is desired */
362
ulint space_id, /*!< in: space id */
363
ulint zip_size, /*!< in: compressed page size in bytes;
364
0 for uncompressed pages */
365
ulint block_offset, /*!< in: offset in number of blocks */
366
ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
367
this must be divisible by the OS block size */
368
ulint len, /*!< in: how many bytes to read; this must not
369
cross a file boundary; in aio this must be a
370
block size multiple */
371
void* buf, /*!< in/out: buffer where to store data read;
372
in aio this must be appropriately aligned */
373
void* message) /*!< in: message for aio handler if non-sync
374
aio used, else ignored */
376
return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
377
byte_offset, len, buf, message));
380
/********************************************************************//**
381
Writes data to a space from a buffer. Remember that the possible incomplete
382
blocks at the end of file are ignored: they are not taken into account when
383
calculating the byte offset within a space.
384
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
385
i/o on a tablespace which does not exist */
390
ibool sync, /*!< in: TRUE if synchronous aio is desired */
391
ulint space_id, /*!< in: space id */
392
ulint zip_size, /*!< in: compressed page size in bytes;
393
0 for uncompressed pages */
394
ulint block_offset, /*!< in: offset in number of blocks */
395
ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
396
this must be divisible by the OS block size */
397
ulint len, /*!< in: how many bytes to write; this must
398
not cross a file boundary; in aio this must
399
be a block size multiple */
400
void* buf, /*!< in: buffer from which to write; in aio
401
this must be appropriately aligned */
402
void* message) /*!< in: message for aio handler if non-sync
403
aio used, else ignored */
405
return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
406
byte_offset, len, buf, message));
409
/*******************************************************************//**
410
Returns the table space by a given id, NULL if not found. */
415
ulint id) /*!< in: space id */
419
ut_ad(mutex_own(&fil_system->mutex));
421
HASH_SEARCH(hash, fil_system->spaces, id,
423
ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
429
/*******************************************************************//**
430
Returns the table space by a given name, NULL if not found. */
433
fil_space_get_by_name(
434
/*==================*/
435
const char* name) /*!< in: space name */
440
ut_ad(mutex_own(&fil_system->mutex));
442
fold = ut_fold_string(name);
444
HASH_SEARCH(name_hash, fil_system->name_hash, fold,
446
ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
447
!strcmp(name, space->name));
452
#ifndef UNIV_HOTBACKUP
453
/*******************************************************************//**
454
Returns the version number of a tablespace, -1 if not found.
455
@return version number, -1 if the tablespace does not exist in the
459
fil_space_get_version(
460
/*==================*/
461
ulint id) /*!< in: space id */
464
ib_int64_t version = -1;
468
mutex_enter(&fil_system->mutex);
470
space = fil_space_get_by_id(id);
473
version = space->tablespace_version;
476
mutex_exit(&fil_system->mutex);
481
/*******************************************************************//**
482
Returns the latch of a file space.
483
@return latch protecting storage allocation */
488
ulint id, /*!< in: space id */
489
ulint* flags) /*!< out: tablespace flags */
495
mutex_enter(&fil_system->mutex);
497
space = fil_space_get_by_id(id);
502
*flags = space->flags;
505
mutex_exit(&fil_system->mutex);
507
return(&(space->latch));
510
/*******************************************************************//**
511
Returns the type of a file space.
512
@return FIL_TABLESPACE or FIL_LOG */
517
ulint id) /*!< in: space id */
523
mutex_enter(&fil_system->mutex);
525
space = fil_space_get_by_id(id);
529
mutex_exit(&fil_system->mutex);
531
return(space->purpose);
533
#endif /* !UNIV_HOTBACKUP */
535
/**********************************************************************//**
536
Checks if all the file nodes in a space are flushed. The caller must hold
537
the fil_system mutex.
538
@return TRUE if all are flushed */
541
fil_space_is_flushed(
542
/*=================*/
543
fil_space_t* space) /*!< in: space */
547
ut_ad(mutex_own(&fil_system->mutex));
549
node = UT_LIST_GET_FIRST(space->chain);
552
if (node->modification_counter > node->flush_counter) {
557
node = UT_LIST_GET_NEXT(chain, node);
563
/*******************************************************************//**
564
Appends a new file to the chain of files of a space. File must be closed. */
569
const char* name, /*!< in: file name (file must be closed) */
570
ulint size, /*!< in: file size in database blocks, rounded
571
downwards to an integer */
572
ulint id, /*!< in: space id where to append */
573
ibool is_raw) /*!< in: TRUE if a raw device or
574
a raw disk partition */
582
mutex_enter(&fil_system->mutex);
584
node = static_cast<fil_node_t *>(mem_alloc(sizeof(fil_node_t)));
586
node->name = mem_strdup(name);
589
ut_a(!is_raw || srv_start_raw_disk_in_use);
591
node->is_raw_disk = is_raw;
593
node->magic_n = FIL_NODE_MAGIC_N;
595
node->n_pending_flushes = 0;
597
node->modification_counter = 0;
598
node->flush_counter = 0;
600
space = fil_space_get_by_id(id);
603
ut_print_timestamp(stderr);
605
" InnoDB: Error: Could not find tablespace %lu for\n"
606
"InnoDB: file ", (ulong) id);
607
ut_print_filename(stderr, name);
608
fputs(" in the tablespace memory cache.\n", stderr);
609
mem_free(node->name);
613
mutex_exit(&fil_system->mutex);
622
UT_LIST_ADD_LAST(chain, space->chain, node);
624
if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
626
fil_system->max_assigned_id = id;
629
mutex_exit(&fil_system->mutex);
632
/********************************************************************//**
633
Opens a the file of a node of a tablespace. The caller must own the fil_system
639
fil_node_t* node, /*!< in: file node */
640
fil_system_t* system, /*!< in: tablespace memory cache */
641
fil_space_t* space) /*!< in: space */
643
ib_int64_t size_bytes;
653
ut_ad(mutex_own(&(system->mutex)));
654
ut_a(node->n_pending == 0);
655
ut_a(node->open == FALSE);
657
if (node->size == 0) {
658
/* It must be a single-table tablespace and we do not know the
659
size of the file yet. First we open the file in the normal
660
mode, no async I/O here, for simplicity. Then do some checks,
661
and close the file again.
662
NOTE that we could not use the simple file read function
663
os_file_read() in Windows to read from a file opened for
666
node->handle = os_file_create_simple_no_error_handling(
667
innodb_file_data_key, node->name, OS_FILE_OPEN,
668
OS_FILE_READ_ONLY, &success);
670
/* The following call prints an error message */
671
os_file_get_last_error(TRUE);
673
ut_print_timestamp(stderr);
676
" InnoDB: Fatal error: cannot open %s\n."
677
"InnoDB: Have you deleted .ibd files"
678
" under a running mysqld server?\n",
683
os_file_get_size(node->handle, &size_low, &size_high);
685
size_bytes = (((ib_int64_t)size_high) << 32)
686
+ (ib_int64_t)size_low;
687
#ifdef UNIV_HOTBACKUP
688
if (space->id == 0) {
689
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
690
os_file_close(node->handle);
693
#endif /* UNIV_HOTBACKUP */
694
ut_a(space->purpose != FIL_LOG);
695
ut_a(space->id != 0);
697
if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
699
"InnoDB: Error: the size of single-table"
700
" tablespace file %s\n"
701
"InnoDB: is only %lu %lu,"
702
" should be at least %lu!\n",
706
(ulong) (FIL_IBD_FILE_INITIAL_SIZE
712
/* Read the first page of the tablespace */
714
buf2 = static_cast<unsigned char *>(ut_malloc(2 * UNIV_PAGE_SIZE));
715
/* Align the memory for file i/o if we might have O_DIRECT
717
page = static_cast<unsigned char *>(ut_align(buf2, UNIV_PAGE_SIZE));
719
success = os_file_read(node->handle, page, 0, 0,
721
space_id = fsp_header_get_space_id(page);
722
flags = fsp_header_get_flags(page);
726
/* Close the file now that we have read the space id from it */
728
os_file_close(node->handle);
730
if (UNIV_UNLIKELY(space_id != space->id)) {
732
"InnoDB: Error: tablespace id is %lu"
733
" in the data dictionary\n"
734
"InnoDB: but in file %s it is %lu!\n",
735
space->id, node->name, space_id);
740
if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
743
"InnoDB: Error: tablespace id %lu"
744
" in file %s is not sensible\n",
745
(ulong) space_id, node->name);
750
if (UNIV_UNLIKELY(space->flags != flags)) {
752
"InnoDB: Error: table flags are %lx"
753
" in the data dictionary\n"
754
"InnoDB: but the flags in file %s are %lx!\n",
755
space->flags, node->name, flags);
760
if (size_bytes >= 1024 * 1024) {
761
/* Truncate the size to whole megabytes. */
762
size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
765
if (!(flags & DICT_TF_ZSSIZE_MASK)) {
766
node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
770
/ dict_table_flags_to_zip_size(flags));
773
#ifdef UNIV_HOTBACKUP
775
#endif /* UNIV_HOTBACKUP */
776
space->size += node->size;
779
/* printf("Opening file %s\n", node->name); */
781
/* Open the file for reading and writing, in Windows normally in the
782
unbuffered async I/O mode, though global variables may make
783
os_file_create() to fall back to the normal file I/O mode. */
785
if (space->purpose == FIL_LOG) {
786
node->handle = os_file_create(innodb_file_log_key,
787
node->name, OS_FILE_OPEN,
788
OS_FILE_AIO, OS_LOG_FILE,
790
} else if (node->is_raw_disk) {
791
node->handle = os_file_create(innodb_file_data_key,
794
OS_FILE_AIO, OS_DATA_FILE,
797
node->handle = os_file_create(innodb_file_data_key,
798
node->name, OS_FILE_OPEN,
799
OS_FILE_AIO, OS_DATA_FILE,
809
if (space->purpose == FIL_TABLESPACE && space->id != 0) {
810
/* Put the node to the LRU list */
811
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
815
/**********************************************************************//**
821
fil_node_t* node, /*!< in: file node */
822
fil_system_t* system) /*!< in: tablespace memory cache */
826
ut_ad(node && system);
827
ut_ad(mutex_own(&(system->mutex)));
829
ut_a(node->n_pending == 0);
830
ut_a(node->n_pending_flushes == 0);
831
ut_a(node->modification_counter == node->flush_counter);
833
ret = os_file_close(node->handle);
836
/* printf("Closing file %s\n", node->name); */
839
ut_a(system->n_open > 0);
842
if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
843
ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
845
/* The node is in the LRU list, remove it */
846
UT_LIST_REMOVE(LRU, system->LRU, node);
850
/********************************************************************//**
851
Tries to close a file in the LRU list. The caller must hold the fil_sys
853
@return TRUE if success, FALSE if should retry later; since i/o's
854
generally complete in < 100 ms, and as InnoDB writes at most 128 pages
855
from the buffer pool in a batch, and then immediately flushes the
856
files, there is a good chance that the next time we find a suitable
857
node from the LRU list */
860
fil_try_to_close_file_in_LRU(
861
/*=========================*/
862
ibool print_info) /*!< in: if TRUE, prints information why it
863
cannot close a file */
867
ut_ad(mutex_own(&fil_system->mutex));
869
node = UT_LIST_GET_LAST(fil_system->LRU);
873
"InnoDB: fil_sys open file LRU len %lu\n",
874
(ulong) UT_LIST_GET_LEN(fil_system->LRU));
877
while (node != NULL) {
878
if (node->modification_counter == node->flush_counter
879
&& node->n_pending_flushes == 0) {
881
fil_node_close_file(node, fil_system);
886
if (print_info && node->n_pending_flushes > 0) {
887
fputs("InnoDB: cannot close file ", stderr);
888
ut_print_filename(stderr, node->name);
889
fprintf(stderr, ", because n_pending_flushes %lu\n",
890
(ulong) node->n_pending_flushes);
894
&& node->modification_counter != node->flush_counter) {
895
fputs("InnoDB: cannot close file ", stderr);
896
ut_print_filename(stderr, node->name);
898
", because mod_count %ld != fl_count %ld\n",
899
(long) node->modification_counter,
900
(long) node->flush_counter);
903
node = UT_LIST_GET_PREV(LRU, node);
909
/*******************************************************************//**
910
Reserves the fil_system mutex and tries to make sure we can open at least one
911
file while holding it. This should be called before calling
912
fil_node_prepare_for_io(), because that function may need to open a file. */
915
fil_mutex_enter_and_prepare_for_io(
916
/*===============================*/
917
ulint space_id) /*!< in: space id */
921
ibool print_info = FALSE;
926
mutex_enter(&fil_system->mutex);
928
if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
929
/* We keep log files and system tablespace files always open;
930
this is important in preventing deadlocks in this module, as
931
a page read completion often performs another read from the
932
insert buffer. The insert buffer is in tablespace 0, and we
933
cannot end up waiting in this function. */
938
if (fil_system->n_open < fil_system->max_n_open) {
943
space = fil_space_get_by_id(space_id);
945
if (space != NULL && space->stop_ios) {
946
/* We are going to do a rename file and want to stop new i/o's
949
if (count2 > 20000) {
950
fputs("InnoDB: Warning: tablespace ", stderr);
951
ut_print_filename(stderr, space->name);
953
" has i/o ops stopped for a long time %lu\n",
957
mutex_exit(&fil_system->mutex);
959
os_thread_sleep(20000);
966
/* If the file is already open, no need to do anything; if the space
967
does not exist, we handle the situation in the function which called
970
if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
979
/* Too many files are open, try to close some */
981
success = fil_try_to_close_file_in_LRU(print_info);
983
if (success && fil_system->n_open >= fil_system->max_n_open) {
988
if (fil_system->n_open < fil_system->max_n_open) {
995
ut_print_timestamp(stderr);
997
" InnoDB: Warning: too many (%lu) files stay open"
998
" while the maximum\n"
999
"InnoDB: allowed value would be %lu.\n"
1000
"InnoDB: You may need to raise the value of"
1001
" innodb_open_files in\n"
1002
"InnoDB: my.cnf.\n",
1003
(ulong) fil_system->n_open,
1004
(ulong) fil_system->max_n_open);
1009
mutex_exit(&fil_system->mutex);
1011
#ifndef UNIV_HOTBACKUP
1012
/* Wake the i/o-handler threads to make sure pending i/o's are
1014
os_aio_simulated_wake_handler_threads();
1016
os_thread_sleep(20000);
1018
/* Flush tablespaces so that we can close modified files in the LRU
1021
fil_flush_file_spaces(FIL_TABLESPACE);
1028
/*******************************************************************//**
1029
Frees a file node object from a tablespace memory cache. */
1034
fil_node_t* node, /*!< in, own: file node */
1035
fil_system_t* system, /*!< in: tablespace memory cache */
1036
fil_space_t* space) /*!< in: space where the file node is chained */
1038
ut_ad(node && system && space);
1039
ut_ad(mutex_own(&(system->mutex)));
1040
ut_a(node->magic_n == FIL_NODE_MAGIC_N);
1041
ut_a(node->n_pending == 0);
1044
/* We fool the assertion in fil_node_close_file() to think
1045
there are no unflushed modifications in the file */
1047
node->modification_counter = node->flush_counter;
1049
if (space->is_in_unflushed_spaces
1050
&& fil_space_is_flushed(space)) {
1052
space->is_in_unflushed_spaces = FALSE;
1054
UT_LIST_REMOVE(unflushed_spaces,
1055
system->unflushed_spaces,
1059
fil_node_close_file(node, system);
1062
space->size -= node->size;
1064
UT_LIST_REMOVE(chain, space->chain, node);
1066
mem_free(node->name);
1070
#ifdef UNIV_LOG_ARCHIVE
1071
/****************************************************************//**
1072
Drops files from the start of a file space, so that its size is cut by
1073
the amount given. */
1076
fil_space_truncate_start(
1077
/*=====================*/
1078
ulint id, /*!< in: space id */
1079
ulint trunc_len) /*!< in: truncate by this much; it is an error
1080
if this does not equal to the combined size of
1081
some initial files in the space */
1086
mutex_enter(&fil_system->mutex);
1088
space = fil_space_get_by_id(id);
1092
while (trunc_len > 0) {
1093
node = UT_LIST_GET_FIRST(space->chain);
1095
ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
1097
trunc_len -= node->size * UNIV_PAGE_SIZE;
1099
fil_node_free(node, fil_system, space);
1102
mutex_exit(&fil_system->mutex);
1104
#endif /* UNIV_LOG_ARCHIVE */
1106
/*******************************************************************//**
1107
Creates a space memory object and puts it to the tablespace memory cache. If
1108
there is an error, prints an error message to the .err log.
1109
@return TRUE if success */
1114
const char* name, /*!< in: space name */
1115
ulint id, /*!< in: space id */
1116
ulint flags, /*!< in: compressed page size
1117
and file format, or 0 */
1118
ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
1122
/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
1124
((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
1125
ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
1126
format, the tablespace flags should equal
1127
(table->flags & ~(~0 << DICT_TF_BITS)). */
1128
ut_a(flags != DICT_TF_COMPACT);
1129
ut_a(!(flags & (~0UL << DICT_TF_BITS)));
1133
"InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
1139
mutex_enter(&fil_system->mutex);
1141
space = fil_space_get_by_name(name);
1143
if (UNIV_LIKELY_NULL(space)) {
1147
ut_print_timestamp(stderr);
1149
" InnoDB: Warning: trying to init to the"
1150
" tablespace memory cache\n"
1151
"InnoDB: a tablespace %lu of name ", (ulong) id);
1152
ut_print_filename(stderr, name);
1153
fprintf(stderr, ",\n"
1154
"InnoDB: but a tablespace %lu of the same name\n"
1155
"InnoDB: already exists in the"
1156
" tablespace memory cache!\n",
1159
if (id == 0 || purpose != FIL_TABLESPACE) {
1161
mutex_exit(&fil_system->mutex);
1167
"InnoDB: We assume that InnoDB did a crash recovery,"
1169
"InnoDB: an .ibd file for which the table"
1170
" did not exist in the\n"
1171
"InnoDB: InnoDB internal data dictionary in the"
1173
"InnoDB: We assume that you later removed the"
1174
" .ibd and .frm files,\n"
1175
"InnoDB: and are now trying to recreate the table."
1176
" We now remove the\n"
1177
"InnoDB: conflicting tablespace object"
1178
" from the memory cache and try\n"
1179
"InnoDB: the init again.\n");
1181
namesake_id = space->id;
1183
success = fil_space_free(namesake_id, FALSE);
1186
mutex_exit(&fil_system->mutex);
1191
space = fil_space_get_by_id(id);
1193
if (UNIV_LIKELY_NULL(space)) {
1195
"InnoDB: Error: trying to add tablespace %lu"
1196
" of name ", (ulong) id);
1197
ut_print_filename(stderr, name);
1198
fprintf(stderr, "\n"
1199
"InnoDB: to the tablespace memory cache,"
1201
"InnoDB: %lu of name ", (ulong) space->id);
1202
ut_print_filename(stderr, space->name);
1203
fputs(" already exists in the tablespace\n"
1204
"InnoDB: memory cache!\n", stderr);
1206
mutex_exit(&fil_system->mutex);
1211
space = static_cast<fil_space_t *>(mem_alloc(sizeof(fil_space_t)));
1213
space->name = mem_strdup(name);
1216
fil_system->tablespace_version++;
1217
space->tablespace_version = fil_system->tablespace_version;
1218
space->mark = FALSE;
1220
if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on)
1221
&& UNIV_UNLIKELY(id > fil_system->max_assigned_id)) {
1222
if (!fil_system->space_id_reuse_warned) {
1223
fil_system->space_id_reuse_warned = TRUE;
1225
ut_print_timestamp(stderr);
1227
" InnoDB: Warning: allocated tablespace %lu,"
1228
" old maximum was %lu\n",
1230
(ulong) fil_system->max_assigned_id);
1233
fil_system->max_assigned_id = id;
1236
space->stop_ios = FALSE;
1237
space->stop_ibuf_merges = FALSE;
1238
space->is_being_deleted = FALSE;
1239
space->purpose = purpose;
1241
space->flags = flags;
1243
space->n_reserved_extents = 0;
1245
space->n_pending_flushes = 0;
1246
space->n_pending_ibuf_merges = 0;
1248
UT_LIST_INIT(space->chain);
1249
space->magic_n = FIL_SPACE_MAGIC_N;
1251
rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
1253
HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
1255
HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
1256
ut_fold_string(name), space);
1257
space->is_in_unflushed_spaces = FALSE;
1259
UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
1261
mutex_exit(&fil_system->mutex);
1266
/*******************************************************************//**
1267
Assigns a new space id for a new single-table tablespace. This works simply by
1268
incrementing the global counter. If 4 billion id's is not enough, we may need
1270
@return TRUE if assigned, FALSE if not */
1273
fil_assign_new_space_id(
1274
/*====================*/
1275
ulint* space_id) /*!< in/out: space id */
1280
mutex_enter(&fil_system->mutex);
1284
if (id < fil_system->max_assigned_id) {
1285
id = fil_system->max_assigned_id;
1290
if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1291
ut_print_timestamp(stderr);
1293
"InnoDB: Warning: you are running out of new"
1294
" single-table tablespace id's.\n"
1295
"InnoDB: Current counter is %lu and it"
1296
" must not exceed %lu!\n"
1297
"InnoDB: To reset the counter to zero"
1298
" you have to dump all your tables and\n"
1299
"InnoDB: recreate the whole InnoDB installation.\n",
1301
(ulong) SRV_LOG_SPACE_FIRST_ID);
1304
success = (id < SRV_LOG_SPACE_FIRST_ID);
1307
*space_id = fil_system->max_assigned_id = id;
1309
ut_print_timestamp(stderr);
1311
"InnoDB: You have run out of single-table"
1312
" tablespace id's!\n"
1313
"InnoDB: Current counter is %lu.\n"
1314
"InnoDB: To reset the counter to zero you"
1315
" have to dump all your tables and\n"
1316
"InnoDB: recreate the whole InnoDB installation.\n",
1318
*space_id = ULINT_UNDEFINED;
1321
mutex_exit(&fil_system->mutex);
1326
/*******************************************************************//**
1327
Frees a space object from the tablespace memory cache. Closes the files in
1328
the chain but does not delete them. There must not be any pending i/o's or
1329
flushes on the files.
1330
@return TRUE if success */
1335
/* out: TRUE if success */
1336
ulint id, /* in: space id */
1337
ibool x_latched) /* in: TRUE if caller has space->latch
1341
fil_space_t* tablespace;
1342
fil_node_t* fil_node;
1344
ut_ad(mutex_own(&fil_system->mutex));
1346
space = fil_space_get_by_id(id);
1349
ut_print_timestamp(stderr);
1351
" InnoDB: Error: trying to remove tablespace %lu"
1352
" from the cache but\n"
1353
"InnoDB: it is not there.\n", (ulong) id);
1358
HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
1360
tablespace = fil_space_get_by_name(space->name);
1362
ut_a(space == tablespace);
1364
HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
1365
ut_fold_string(space->name), space);
1367
if (space->is_in_unflushed_spaces) {
1368
space->is_in_unflushed_spaces = FALSE;
1370
UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
1374
UT_LIST_REMOVE(space_list, fil_system->space_list, space);
1376
ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1377
ut_a(0 == space->n_pending_flushes);
1379
fil_node = UT_LIST_GET_FIRST(space->chain);
1381
while (fil_node != NULL) {
1382
fil_node_free(fil_node, fil_system, space);
1384
fil_node = UT_LIST_GET_FIRST(space->chain);
1387
ut_a(0 == UT_LIST_GET_LEN(space->chain));
1390
rw_lock_x_unlock(&space->latch);
1393
rw_lock_free(&(space->latch));
1395
mem_free(space->name);
1401
/*******************************************************************//**
1402
Returns the size of the space in pages. The tablespace must be cached in the
1404
@return space size, 0 if space not found */
1409
ulint id) /*!< in: space id */
1417
fil_mutex_enter_and_prepare_for_io(id);
1419
space = fil_space_get_by_id(id);
1421
if (space == NULL) {
1422
mutex_exit(&fil_system->mutex);
1427
if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1430
ut_a(1 == UT_LIST_GET_LEN(space->chain));
1432
node = UT_LIST_GET_FIRST(space->chain);
1434
/* It must be a single-table tablespace and we have not opened
1435
the file yet; the following calls will open it and update the
1438
fil_node_prepare_for_io(node, fil_system, space);
1439
fil_node_complete_io(node, fil_system, OS_FILE_READ);
1444
mutex_exit(&fil_system->mutex);
1449
/*******************************************************************//**
1450
Returns the flags of the space. The tablespace must be cached
1451
in the memory cache.
1452
@return flags, ULINT_UNDEFINED if space not found */
1455
fil_space_get_flags(
1456
/*================*/
1457
ulint id) /*!< in: space id */
1465
if (UNIV_UNLIKELY(!id)) {
1469
fil_mutex_enter_and_prepare_for_io(id);
1471
space = fil_space_get_by_id(id);
1473
if (space == NULL) {
1474
mutex_exit(&fil_system->mutex);
1476
return(ULINT_UNDEFINED);
1479
if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1482
ut_a(1 == UT_LIST_GET_LEN(space->chain));
1484
node = UT_LIST_GET_FIRST(space->chain);
1486
/* It must be a single-table tablespace and we have not opened
1487
the file yet; the following calls will open it and update the
1490
fil_node_prepare_for_io(node, fil_system, space);
1491
fil_node_complete_io(node, fil_system, OS_FILE_READ);
1494
flags = space->flags;
1496
mutex_exit(&fil_system->mutex);
1501
/*******************************************************************//**
1502
Returns the compressed page size of the space, or 0 if the space
1503
is not compressed. The tablespace must be cached in the memory cache.
1504
@return compressed page size, ULINT_UNDEFINED if space not found */
1507
fil_space_get_zip_size(
1508
/*===================*/
1509
ulint id) /*!< in: space id */
1513
flags = fil_space_get_flags(id);
1515
if (flags && flags != ULINT_UNDEFINED) {
1517
return(dict_table_flags_to_zip_size(flags));
1523
/*******************************************************************//**
1524
Checks if the pair space, page_no refers to an existing page in a tablespace
1525
file space. The tablespace must be cached in the memory cache.
1526
@return TRUE if the address is meaningful */
1529
fil_check_adress_in_tablespace(
1530
/*===========================*/
1531
ulint id, /*!< in: space id */
1532
ulint page_no)/*!< in: page number */
1534
if (fil_space_get_size(id) > page_no) {
1542
/****************************************************************//**
1543
Initializes the tablespace memory cache. */
1548
ulint hash_size, /*!< in: hash table size */
1549
ulint max_n_open) /*!< in: max number of open files */
1551
ut_a(fil_system == NULL);
1553
ut_a(hash_size > 0);
1554
ut_a(max_n_open > 0);
1556
void *fil_system_ptr= mem_zalloc(sizeof(fil_system_t));
1557
fil_system = static_cast<fil_system_t *>(fil_system_ptr);
1559
mutex_create(fil_system_mutex_key,
1560
&fil_system->mutex, SYNC_ANY_LATCH);
1562
fil_system->spaces = hash_create(hash_size);
1563
fil_system->name_hash = hash_create(hash_size);
1565
UT_LIST_INIT(fil_system->LRU);
1567
fil_system->max_n_open = max_n_open;
1570
/*******************************************************************//**
1571
Opens all log files and system tablespace data files. They stay open until the
1572
database server shutdown. This should be called at a server startup after the
1573
space objects for the log and the system tablespace have been created. The
1574
purpose of this operation is to make sure we never run out of file descriptors
1575
if we need to read from the insert buffer or to write to the log. */
1578
fil_open_log_and_system_tablespace_files(void)
1579
/*==========================================*/
1584
mutex_enter(&fil_system->mutex);
1586
space = UT_LIST_GET_FIRST(fil_system->space_list);
1588
while (space != NULL) {
1589
if (space->purpose != FIL_TABLESPACE || space->id == 0) {
1590
node = UT_LIST_GET_FIRST(space->chain);
1592
while (node != NULL) {
1594
fil_node_open_file(node, fil_system,
1597
if (fil_system->max_n_open
1598
< 10 + fil_system->n_open) {
1600
"InnoDB: Warning: you must"
1601
" raise the value of"
1602
" innodb_open_files in\n"
1603
"InnoDB: my.cnf! Remember that"
1604
" InnoDB keeps all log files"
1606
"InnoDB: tablespace files open"
1607
" for the whole time mysqld is"
1609
"InnoDB: needs to open also"
1610
" some .ibd files if the"
1611
" file-per-table storage\n"
1612
"InnoDB: model is used."
1613
" Current open files %lu,"
1615
" open files %lu.\n",
1616
(ulong) fil_system->n_open,
1617
(ulong) fil_system->max_n_open);
1619
node = UT_LIST_GET_NEXT(chain, node);
1622
space = UT_LIST_GET_NEXT(space_list, space);
1625
mutex_exit(&fil_system->mutex);
1628
/*******************************************************************//**
1629
Closes all open files. There must not be any pending i/o's or not flushed
1630
modifications in the files. */
1633
fil_close_all_files(void)
1634
/*=====================*/
1638
mutex_enter(&fil_system->mutex);
1640
space = UT_LIST_GET_FIRST(fil_system->space_list);
1642
while (space != NULL) {
1644
fil_space_t* prev_space = space;
1646
for (node = UT_LIST_GET_FIRST(space->chain);
1648
node = UT_LIST_GET_NEXT(chain, node)) {
1651
fil_node_close_file(node, fil_system);
1655
space = UT_LIST_GET_NEXT(space_list, space);
1657
fil_space_free(prev_space->id, FALSE);
1660
mutex_exit(&fil_system->mutex);
1663
/*******************************************************************//**
1664
Sets the max tablespace id counter if the given number is bigger than the
1668
fil_set_max_space_id_if_bigger(
1669
/*===========================*/
1670
ulint max_id) /*!< in: maximum known id */
1672
if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1674
"InnoDB: Fatal error: max tablespace id"
1675
" is too high, %lu\n", (ulong) max_id);
1679
mutex_enter(&fil_system->mutex);
1681
if (fil_system->max_assigned_id < max_id) {
1683
fil_system->max_assigned_id = max_id;
1686
mutex_exit(&fil_system->mutex);
1689
/****************************************************************//**
1690
Writes the flushed lsn and the latest archived log number to the page header
1691
of the first page of a data file of the system tablespace (space 0),
1692
which is uncompressed. */
1695
fil_write_lsn_and_arch_no_to_file(
1696
/*==============================*/
1697
ulint sum_of_sizes, /*!< in: combined size of previous files
1698
in space, in database pages */
1699
ib_uint64_t lsn, /*!< in: lsn to write */
1700
ulint /*arch_log_no __attribute__((unused))*/)
1701
/*!< in: archived log number to write */
1706
buf1 = static_cast<byte *>(mem_alloc(2 * UNIV_PAGE_SIZE));
1707
buf = static_cast<byte *>(ut_align(buf1, UNIV_PAGE_SIZE));
1709
fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1711
mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
1713
fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1720
/****************************************************************//**
1721
Writes the flushed lsn and the latest archived log number to the page
1722
header of the first page of each data file in the system tablespace.
1723
@return DB_SUCCESS or error number */
1726
fil_write_flushed_lsn_to_data_files(
1727
/*================================*/
1728
ib_uint64_t lsn, /*!< in: lsn to write */
1729
ulint arch_log_no) /*!< in: latest archived log
1737
mutex_enter(&fil_system->mutex);
1739
space = UT_LIST_GET_FIRST(fil_system->space_list);
1742
/* We only write the lsn to all existing data files which have
1743
been open during the lifetime of the mysqld process; they are
1744
represented by the space objects in the tablespace memory
1745
cache. Note that all data files in the system tablespace 0 are
1748
if (space->purpose == FIL_TABLESPACE
1749
&& space->id == 0) {
1752
node = UT_LIST_GET_FIRST(space->chain);
1754
mutex_exit(&fil_system->mutex);
1756
err = fil_write_lsn_and_arch_no_to_file(
1757
sum_of_sizes, lsn, arch_log_no);
1758
if (err != DB_SUCCESS) {
1763
mutex_enter(&fil_system->mutex);
1765
sum_of_sizes += node->size;
1766
node = UT_LIST_GET_NEXT(chain, node);
1769
space = UT_LIST_GET_NEXT(space_list, space);
1772
mutex_exit(&fil_system->mutex);
1777
/*******************************************************************//**
1778
Reads the flushed lsn and arch no fields from a data file at database
1782
fil_read_flushed_lsn_and_arch_log_no(
1783
/*=================================*/
1784
os_file_t data_file, /*!< in: open data file */
1785
ibool one_read_already, /*!< in: TRUE if min and max
1786
parameters below already
1787
contain sensible data */
1788
#ifdef UNIV_LOG_ARCHIVE
1789
ulint* min_arch_log_no, /*!< in/out: */
1790
ulint* max_arch_log_no, /*!< in/out: */
1791
#endif /* UNIV_LOG_ARCHIVE */
1792
ib_uint64_t* min_flushed_lsn, /*!< in/out: */
1793
ib_uint64_t* max_flushed_lsn) /*!< in/out: */
1797
ib_uint64_t flushed_lsn;
1799
buf2 = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
1800
/* Align the memory for a possible read from a raw device */
1801
buf = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
1803
os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
1805
flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
1809
if (!one_read_already) {
1810
*min_flushed_lsn = flushed_lsn;
1811
*max_flushed_lsn = flushed_lsn;
1812
#ifdef UNIV_LOG_ARCHIVE
1813
*min_arch_log_no = arch_log_no;
1814
*max_arch_log_no = arch_log_no;
1815
#endif /* UNIV_LOG_ARCHIVE */
1819
if (*min_flushed_lsn > flushed_lsn) {
1820
*min_flushed_lsn = flushed_lsn;
1822
if (*max_flushed_lsn < flushed_lsn) {
1823
*max_flushed_lsn = flushed_lsn;
1825
#ifdef UNIV_LOG_ARCHIVE
1826
if (*min_arch_log_no > arch_log_no) {
1827
*min_arch_log_no = arch_log_no;
1829
if (*max_arch_log_no < arch_log_no) {
1830
*max_arch_log_no = arch_log_no;
1832
#endif /* UNIV_LOG_ARCHIVE */
1835
/*================ SINGLE-TABLE TABLESPACES ==========================*/
1837
#ifndef UNIV_HOTBACKUP
1838
/*******************************************************************//**
1839
Increments the count of pending insert buffer page merges, if space is not
1841
@return TRUE if being deleted, and ibuf merges should be skipped */
1844
fil_inc_pending_ibuf_merges(
1845
/*========================*/
1846
ulint id) /*!< in: space id */
1850
mutex_enter(&fil_system->mutex);
1852
space = fil_space_get_by_id(id);
1854
if (space == NULL) {
1856
"InnoDB: Error: trying to do ibuf merge to a"
1857
" dropped tablespace %lu\n",
1861
if (space == NULL || space->stop_ibuf_merges) {
1862
mutex_exit(&fil_system->mutex);
1867
space->n_pending_ibuf_merges++;
1869
mutex_exit(&fil_system->mutex);
1874
/*******************************************************************//**
1875
Decrements the count of pending insert buffer page merges. */
1878
fil_decr_pending_ibuf_merges(
1879
/*=========================*/
1880
ulint id) /*!< in: space id */
1884
mutex_enter(&fil_system->mutex);
1886
space = fil_space_get_by_id(id);
1888
if (space == NULL) {
1890
"InnoDB: Error: decrementing ibuf merge of a"
1891
" dropped tablespace %lu\n",
1895
if (space != NULL) {
1896
space->n_pending_ibuf_merges--;
1899
mutex_exit(&fil_system->mutex);
1901
#endif /* !UNIV_HOTBACKUP */
1903
/********************************************************//**
1904
Creates the database directory for a table if it does not exist yet. */
1907
fil_create_directory_for_tablename(
1908
/*===============================*/
1909
const char* name) /*!< in: name in the standard
1910
'databasename/tablename' format */
1916
len = strlen(fil_path_to_mysql_datadir);
1917
namend = strchr(name, '/');
1919
path = static_cast<char *>(mem_alloc(len + (namend - name) + 2));
1921
memcpy(path, fil_path_to_mysql_datadir, len);
1923
memcpy(path + len + 1, name, namend - name);
1924
path[len + (namend - name) + 1] = 0;
1926
srv_normalize_path_for_win(path);
1928
ut_a(os_file_create_directory(path, FALSE));
1932
#ifndef UNIV_HOTBACKUP
1933
/********************************************************//**
1934
Writes a log record about an .ibd file create/rename/delete. */
1939
ulint type, /*!< in: MLOG_FILE_CREATE,
1941
MLOG_FILE_DELETE, or
1943
ulint space_id, /*!< in: space id */
1944
ulint log_flags, /*!< in: redo log flags (stored
1945
in the page number field) */
1946
ulint flags, /*!< in: compressed page size
1948
if type==MLOG_FILE_CREATE2, or 0 */
1949
const char* name, /*!< in: table name in the familiar
1950
'databasename/tablename' format, or
1951
the file path in the case of
1953
const char* new_name, /*!< in: if type is MLOG_FILE_RENAME,
1954
the new table name in the
1955
'databasename/tablename' format */
1956
mtr_t* mtr) /*!< in: mini-transaction handle */
1961
log_ptr = mlog_open(mtr, 11 + 2 + 1);
1964
/* Logging in mtr is switched off during crash recovery:
1965
in that case mlog_open returns NULL */
1969
log_ptr = mlog_write_initial_log_record_for_file_op(
1970
type, space_id, log_flags, log_ptr, mtr);
1971
if (type == MLOG_FILE_CREATE2) {
1972
mach_write_to_4(log_ptr, flags);
1975
/* Let us store the strings as null-terminated for easier readability
1978
len = strlen(name) + 1;
1980
mach_write_to_2(log_ptr, len);
1982
mlog_close(mtr, log_ptr);
1984
mlog_catenate_string(mtr, (byte*) name, len);
1986
if (type == MLOG_FILE_RENAME) {
1987
len = strlen(new_name) + 1;
1988
log_ptr = mlog_open(mtr, 2 + len);
1990
mach_write_to_2(log_ptr, len);
1992
mlog_close(mtr, log_ptr);
1994
mlog_catenate_string(mtr, (byte*) new_name, len);
1999
/*******************************************************************//**
2000
Parses the body of a log record written about an .ibd file operation. That is,
2001
the log record part after the standard (type, space id, page no) header of the
2004
If desired, also replays the delete or rename operation if the .ibd file
2005
exists and the space id in it matches. Replays the create operation if a file
2006
at that path does not exist yet. If the database directory for the file to be
2007
created does not exist, then we create the directory, too.
2009
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
2010
datadir that we should use in replaying the file operations.
2011
@return end of log record, or NULL if the record was not completely
2012
contained between ptr and end_ptr */
2015
fil_op_log_parse_or_replay(
2016
/*=======================*/
2017
byte* ptr, /*!< in: buffer containing the log record body,
2018
or an initial segment of it, if the record does
2019
not fir completely between ptr and end_ptr */
2020
byte* end_ptr, /*!< in: buffer end */
2021
ulint type, /*!< in: the type of this log record */
2022
ulint space_id, /*!< in: the space id of the tablespace in
2023
question, or 0 if the log record should
2024
only be parsed but not replayed */
2025
ulint log_flags) /*!< in: redo log flags
2026
(stored in the page number parameter) */
2031
const char* new_name = NULL;
2034
if (type == MLOG_FILE_CREATE2) {
2035
if (end_ptr < ptr + 4) {
2040
flags = mach_read_from_4(ptr);
2044
if (end_ptr < ptr + 2) {
2049
name_len = mach_read_from_2(ptr);
2053
if (end_ptr < ptr + name_len) {
2058
name = (const char*) ptr;
2062
if (type == MLOG_FILE_RENAME) {
2063
if (end_ptr < ptr + 2) {
2068
new_name_len = mach_read_from_2(ptr);
2072
if (end_ptr < ptr + new_name_len) {
2077
new_name = (const char*) ptr;
2079
ptr += new_name_len;
2082
/* We managed to parse a full log record body */
2084
printf("Parsed log rec of type %lu space %lu\n"
2085
"name %s\n", type, space_id, name);
2087
if (type == MLOG_FILE_RENAME) {
2088
printf("new name %s\n", new_name);
2096
/* Let us try to perform the file operation, if sensible. Note that
2097
ibbackup has at this stage already read in all space id info to the
2098
fil0fil.c data structures.
2100
NOTE that our algorithm is not guaranteed to work correctly if there
2101
were renames of tables during the backup. See ibbackup code for more
2105
case MLOG_FILE_DELETE:
2106
if (fil_tablespace_exists_in_mem(space_id)) {
2107
ut_a(fil_delete_tablespace(space_id));
2112
case MLOG_FILE_RENAME:
2113
/* We do the rename based on space id, not old file name;
2114
this should guarantee that after the log replay each .ibd file
2115
has the correct name for the latest log sequence number; the
2116
proof is left as an exercise :) */
2118
if (fil_tablespace_exists_in_mem(space_id)) {
2119
/* Create the database directory for the new name, if
2120
it does not exist yet */
2121
fil_create_directory_for_tablename(new_name);
2123
/* Rename the table if there is not yet a tablespace
2124
with the same name */
2126
if (fil_get_space_id_for_table(new_name)
2127
== ULINT_UNDEFINED) {
2128
/* We do not care of the old name, that is
2129
why we pass NULL as the first argument */
2130
if (!fil_rename_tablespace(NULL, space_id,
2139
case MLOG_FILE_CREATE:
2140
case MLOG_FILE_CREATE2:
2141
if (fil_tablespace_exists_in_mem(space_id)) {
2143
} else if (fil_get_space_id_for_table(name)
2144
!= ULINT_UNDEFINED) {
2146
} else if (log_flags & MLOG_FILE_FLAG_TEMP) {
2147
/* Temporary table, do nothing */
2149
/* Create the database directory for name, if it does
2151
fil_create_directory_for_tablename(name);
2153
if (fil_create_new_single_table_tablespace(
2154
space_id, name, FALSE, flags,
2155
FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
2169
/*******************************************************************//**
2170
Deletes a single-table tablespace. The tablespace must be cached in the
2172
@return TRUE if success */
2175
fil_delete_tablespace(
2176
/*==================*/
2177
ulint id) /*!< in: space id */
2187
mutex_enter(&fil_system->mutex);
2189
space = fil_space_get_by_id(id);
2191
if (space != NULL) {
2192
space->stop_ibuf_merges = TRUE;
2194
if (space->n_pending_ibuf_merges == 0) {
2195
mutex_exit(&fil_system->mutex);
2202
ut_print_timestamp(stderr);
2203
fputs(" InnoDB: Warning: trying to"
2204
" delete tablespace ", stderr);
2205
ut_print_filename(stderr, space->name);
2206
fprintf(stderr, ",\n"
2207
"InnoDB: but there are %lu pending"
2208
" ibuf merges on it.\n"
2209
"InnoDB: Loop %lu.\n",
2210
(ulong) space->n_pending_ibuf_merges,
2214
mutex_exit(&fil_system->mutex);
2216
os_thread_sleep(20000);
2219
goto stop_ibuf_merges;
2223
mutex_exit(&fil_system->mutex);
2227
mutex_enter(&fil_system->mutex);
2229
space = fil_space_get_by_id(id);
2231
if (space == NULL) {
2232
ut_print_timestamp(stderr);
2234
" InnoDB: Error: cannot delete tablespace %lu\n"
2235
"InnoDB: because it is not found in the"
2236
" tablespace memory cache.\n",
2239
mutex_exit(&fil_system->mutex);
2245
ut_a(space->n_pending_ibuf_merges == 0);
2247
space->is_being_deleted = TRUE;
2249
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2250
node = UT_LIST_GET_FIRST(space->chain);
2252
if (space->n_pending_flushes > 0 || node->n_pending > 0) {
2254
ut_print_timestamp(stderr);
2255
fputs(" InnoDB: Warning: trying to"
2256
" delete tablespace ", stderr);
2257
ut_print_filename(stderr, space->name);
2258
fprintf(stderr, ",\n"
2259
"InnoDB: but there are %lu flushes"
2260
" and %lu pending i/o's on it\n"
2261
"InnoDB: Loop %lu.\n",
2262
(ulong) space->n_pending_flushes,
2263
(ulong) node->n_pending,
2266
mutex_exit(&fil_system->mutex);
2267
os_thread_sleep(20000);
2274
path = mem_strdup(space->name);
2276
mutex_exit(&fil_system->mutex);
2278
/* Important: We rely on the data dictionary mutex to ensure
2279
that a race is not possible here. It should serialize the tablespace
2280
drop/free. We acquire an X latch only to avoid a race condition
2281
when accessing the tablespace instance via:
2283
fsp_get_available_space_in_free_extents().
2285
There our main motivation is to reduce the contention on the
2286
dictionary mutex. */
2288
rw_lock_x_lock(&space->latch);
2290
#ifndef UNIV_HOTBACKUP
2291
/* Invalidate in the buffer pool all pages belonging to the
2292
tablespace. Since we have set space->is_being_deleted = TRUE, readahead
2293
or ibuf merge can no longer read more pages of this tablespace to the
2294
buffer pool. Thus we can clean the tablespace out of the buffer pool
2295
completely and permanently. The flag is_being_deleted also prevents
2296
fil_flush() from being applied to this tablespace. */
2298
buf_LRU_invalidate_tablespace(id);
2300
/* printf("Deleting tablespace %s id %lu\n", space->name, id); */
2302
mutex_enter(&fil_system->mutex);
2304
success = fil_space_free(id, TRUE);
2306
mutex_exit(&fil_system->mutex);
2309
success = os_file_delete(path);
2312
success = os_file_delete_if_exists(path);
2315
rw_lock_x_unlock(&space->latch);
2319
#ifndef UNIV_HOTBACKUP
2320
/* Write a log record about the deletion of the .ibd
2321
file, so that ibbackup can replay it in the
2322
--apply-log phase. We use a dummy mtr and the familiar
2323
log write mechanism. */
2326
/* When replaying the operation in ibbackup, do not try
2327
to write any log record */
2330
fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
2343
/*******************************************************************//**
2344
Returns TRUE if a single-table tablespace is being deleted.
2345
@return TRUE if being deleted */
2348
fil_tablespace_is_being_deleted(
2349
/*============================*/
2350
ulint id) /*!< in: space id */
2353
ibool is_being_deleted;
2355
mutex_enter(&fil_system->mutex);
2357
space = fil_space_get_by_id(id);
2359
ut_a(space != NULL);
2361
is_being_deleted = space->is_being_deleted;
2363
mutex_exit(&fil_system->mutex);
2365
return(is_being_deleted);
2368
#ifndef UNIV_HOTBACKUP
2369
/*******************************************************************//**
2370
Discards a single-table tablespace. The tablespace must be cached in the
2371
memory cache. Discarding is like deleting a tablespace, but
2372
1) we do not drop the table from the data dictionary;
2373
2) we remove all insert buffer entries for the tablespace immediately; in DROP
2374
TABLE they are only removed gradually in the background;
2375
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
2376
as it originally had.
2377
@return TRUE if success */
2380
fil_discard_tablespace(
2381
/*===================*/
2382
ulint id) /*!< in: space id */
2386
success = fil_delete_tablespace(id);
2390
"InnoDB: Warning: cannot delete tablespace %lu"
2391
" in DISCARD TABLESPACE.\n"
2392
"InnoDB: But let us remove the"
2393
" insert buffer entries for this tablespace.\n",
2397
/* Remove all insert buffer entries for the tablespace */
2399
ibuf_delete_for_discarded_space(id);
2403
#endif /* !UNIV_HOTBACKUP */
2405
/*******************************************************************//**
2406
Renames the memory cache structures of a single-table tablespace.
2407
@return TRUE if success */
2410
fil_rename_tablespace_in_mem(
2411
/*=========================*/
2412
fil_space_t* space, /*!< in: tablespace memory object */
2413
fil_node_t* node, /*!< in: file node of that tablespace */
2414
const char* path) /*!< in: new name */
2416
fil_space_t* space2;
2417
const char* old_name = space->name;
2419
ut_ad(mutex_own(&fil_system->mutex));
2421
space2 = fil_space_get_by_name(old_name);
2422
if (space != space2) {
2423
fputs("InnoDB: Error: cannot find ", stderr);
2424
ut_print_filename(stderr, old_name);
2425
fputs(" in tablespace memory cache\n", stderr);
2430
space2 = fil_space_get_by_name(path);
2431
if (space2 != NULL) {
2432
fputs("InnoDB: Error: ", stderr);
2433
ut_print_filename(stderr, path);
2434
fputs(" is already in tablespace memory cache\n", stderr);
2439
HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
2440
ut_fold_string(space->name), space);
2441
mem_free(space->name);
2442
mem_free(node->name);
2444
space->name = mem_strdup(path);
2445
node->name = mem_strdup(path);
2447
HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
2448
ut_fold_string(path), space);
2452
/*******************************************************************//**
2453
Allocates a file name for a single-table tablespace. The string must be freed
2454
by caller with mem_free().
2455
@return own: file name */
2460
const char* name, /*!< in: table name or a dir path of a
2462
ibool is_temp) /*!< in: TRUE if it is a dir path */
2464
ulint namelen = strlen(name);
2465
ulint dirlen = strlen(fil_path_to_mysql_datadir);
2466
char* filename = static_cast<char *>(mem_alloc(namelen + dirlen + sizeof "/.ibd"));
2469
memcpy(filename, name, namelen);
2470
memcpy(filename + namelen, ".ibd", sizeof ".ibd");
2472
memcpy(filename, fil_path_to_mysql_datadir, dirlen);
2473
filename[dirlen] = '/';
2475
memcpy(filename + dirlen + 1, name, namelen);
2476
memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
2479
srv_normalize_path_for_win(filename);
2484
/*******************************************************************//**
2485
Renames a single-table tablespace. The tablespace must be cached in the
2486
tablespace memory cache.
2487
@return TRUE if success */
2490
fil_rename_tablespace(
2491
/*==================*/
2492
const char* old_name, /*!< in: old table name in the standard
2493
databasename/tablename format of
2494
InnoDB, or NULL if we do the rename
2495
based on the space id only */
2496
ulint id, /*!< in: space id */
2497
const char* new_name) /*!< in: new table name in the standard
2498
databasename/tablename format
2506
ibool old_name_was_specified = TRUE;
2511
if (old_name == NULL) {
2512
old_name = "(name not specified)";
2513
old_name_was_specified = FALSE;
2519
ut_print_timestamp(stderr);
2520
fputs(" InnoDB: Warning: problems renaming ", stderr);
2521
ut_print_filename(stderr, old_name);
2522
fputs(" to ", stderr);
2523
ut_print_filename(stderr, new_name);
2524
fprintf(stderr, ", %lu iterations\n", (ulong) count);
2527
mutex_enter(&fil_system->mutex);
2529
space = fil_space_get_by_id(id);
2531
if (space == NULL) {
2533
"InnoDB: Error: cannot find space id %lu"
2534
" in the tablespace memory cache\n"
2535
"InnoDB: though the table ", (ulong) id);
2536
ut_print_filename(stderr, old_name);
2537
fputs(" in a rename operation should have that id\n", stderr);
2538
mutex_exit(&fil_system->mutex);
2543
if (count > 25000) {
2544
space->stop_ios = FALSE;
2545
mutex_exit(&fil_system->mutex);
2550
/* We temporarily close the .ibd file because we do not trust that
2551
operating systems can rename an open file. For the closing we have to
2552
wait until there are no pending i/o's or flushes on the file. */
2554
space->stop_ios = TRUE;
2556
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2557
node = UT_LIST_GET_FIRST(space->chain);
2559
if (node->n_pending > 0 || node->n_pending_flushes > 0) {
2560
/* There are pending i/o's or flushes, sleep for a while and
2563
mutex_exit(&fil_system->mutex);
2565
os_thread_sleep(20000);
2569
} else if (node->modification_counter > node->flush_counter) {
2570
/* Flush the space */
2572
mutex_exit(&fil_system->mutex);
2574
os_thread_sleep(20000);
2580
} else if (node->open) {
2581
/* Close the file */
2583
fil_node_close_file(node, fil_system);
2586
/* Check that the old name in the space is right */
2588
if (old_name_was_specified) {
2589
old_path = fil_make_ibd_name(old_name, FALSE);
2591
ut_a(strcmp(space->name, old_path) == 0);
2592
ut_a(strcmp(node->name, old_path) == 0);
2594
old_path = mem_strdup(space->name);
2597
/* Rename the tablespace and the node in the memory cache */
2598
path = fil_make_ibd_name(new_name, FALSE);
2599
success = fil_rename_tablespace_in_mem(space, node, path);
2602
success = os_file_rename(innodb_file_data_key, old_path, path);
2605
/* We have to revert the changes we made
2606
to the tablespace memory cache */
2608
ut_a(fil_rename_tablespace_in_mem(space, node,
2616
space->stop_ios = FALSE;
2618
mutex_exit(&fil_system->mutex);
2620
#ifndef UNIV_HOTBACKUP
2626
fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
2634
/*******************************************************************//**
2635
Creates a new single-table tablespace to a database directory of MySQL.
2636
Database directories are under the 'datadir' of MySQL. The datadir is the
2637
directory of a running mysqld program. We can refer to it by simply the
2638
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
2639
dir of the mysqld server.
2640
@return DB_SUCCESS or error code */
2643
fil_create_new_single_table_tablespace(
2644
/*===================================*/
2645
ulint space_id, /*!< in: space id */
2646
const char* tablename, /*!< in: the table name in the usual
2647
databasename/tablename format
2648
of InnoDB, or a dir path to a temp
2650
ibool is_temp, /*!< in: TRUE if a table created with
2651
CREATE TEMPORARY TABLE */
2652
ulint flags, /*!< in: tablespace flags */
2653
ulint size) /*!< in: the initial size of the
2654
tablespace file in pages,
2655
must be >= FIL_IBD_FILE_INITIAL_SIZE */
2666
ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
2667
ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
2668
/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
2670
((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
2671
ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
2672
format, the tablespace flags should equal
2673
(table->flags & ~(~0 << DICT_TF_BITS)). */
2674
ut_a(flags != DICT_TF_COMPACT);
2675
ut_a(!(flags & (~0UL << DICT_TF_BITS)));
2677
path = fil_make_ibd_name(tablename, is_temp);
2679
file = os_file_create(innodb_file_data_key, path,
2680
OS_FILE_CREATE, OS_FILE_NORMAL,
2681
OS_DATA_FILE, &ret);
2683
ut_print_timestamp(stderr);
2684
fputs(" InnoDB: Error creating file ", stderr);
2685
ut_print_filename(stderr, path);
2686
fputs(".\n", stderr);
2688
/* The following call will print an error message */
2690
err = os_file_get_last_error(TRUE);
2692
if (err == OS_FILE_ALREADY_EXISTS) {
2693
fputs("InnoDB: The file already exists though"
2694
" the corresponding table did not\n"
2695
"InnoDB: exist in the InnoDB data dictionary."
2696
" Have you moved InnoDB\n"
2697
"InnoDB: .ibd files around without using the"
2699
"InnoDB: DISCARD TABLESPACE and"
2700
" IMPORT TABLESPACE, or did\n"
2701
"InnoDB: mysqld crash in the middle of"
2702
" CREATE TABLE? You can\n"
2703
"InnoDB: resolve the problem by"
2704
" removing the file ", stderr);
2705
ut_print_filename(stderr, path);
2707
"InnoDB: under the 'datadir' of MySQL.\n",
2711
return(DB_TABLESPACE_ALREADY_EXISTS);
2714
if (err == OS_FILE_DISK_FULL) {
2717
return(DB_OUT_OF_FILE_SPACE);
2724
ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
2727
err = DB_OUT_OF_FILE_SPACE;
2729
os_file_close(file);
2731
os_file_delete(path);
2737
/* printf("Creating tablespace %s id %lu\n", path, space_id); */
2739
/* We have to write the space id to the file immediately and flush the
2740
file to disk. This is because in crash recovery we must be aware what
2741
tablespaces exist and what are their space id's, so that we can apply
2742
the log records to the right file. It may take quite a while until
2743
buffer pool flush algorithms write anything to the file and flush it to
2744
disk. If we would not write here anything, the file would be filled
2745
with zeros from the call of os_file_set_size(), until a buffer pool
2746
flush would write to it. */
2748
buf2 = static_cast<byte *>(ut_malloc(3 * UNIV_PAGE_SIZE));
2749
/* Align the memory for file i/o if we might have O_DIRECT set */
2750
page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
2752
memset(page, '\0', UNIV_PAGE_SIZE);
2754
fsp_header_init_fields(page, space_id, flags);
2755
mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
2757
if (!(flags & DICT_TF_ZSSIZE_MASK)) {
2758
buf_flush_init_for_writing(page, NULL, 0);
2759
ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
2761
page_zip_des_t page_zip;
2764
zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
2765
<< ((flags & DICT_TF_ZSSIZE_MASK)
2766
>> DICT_TF_ZSSIZE_SHIFT));
2768
page_zip_set_size(&page_zip, zip_size);
2769
page_zip.data = page + UNIV_PAGE_SIZE;
2772
#endif /* UNIV_DEBUG */
2773
page_zip.m_end = page_zip.m_nonempty =
2774
page_zip.n_blobs = 0;
2775
buf_flush_init_for_writing(page, &page_zip, 0);
2776
ret = os_file_write(path, file, page_zip.data, 0, 0, zip_size);
2782
fputs("InnoDB: Error: could not write the first page"
2783
" to tablespace ", stderr);
2784
ut_print_filename(stderr, path);
2790
ret = os_file_flush(file);
2793
fputs("InnoDB: Error: file flush of tablespace ", stderr);
2794
ut_print_filename(stderr, path);
2795
fputs(" failed\n", stderr);
2800
os_file_close(file);
2802
success = fil_space_create(path, space_id, flags, FIL_TABLESPACE);
2809
fil_node_create(path, size, space_id, FALSE);
2811
#ifndef UNIV_HOTBACKUP
2817
fil_op_write_log(flags
2821
is_temp ? MLOG_FILE_FLAG_TEMP : 0,
2823
tablename, NULL, &mtr);
2832
#ifndef UNIV_HOTBACKUP
2833
/********************************************************************//**
2834
It is possible, though very improbable, that the lsn's in the tablespace to be
2835
imported have risen above the current system lsn, if a lengthy purge, ibuf
2836
merge, or rollback was performed on a backup taken with ibbackup. If that is
2837
the case, reset page lsn's in the file. We assume that mysqld was shut down
2838
after it performed these cleanup operations on the .ibd file, so that it at
2839
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
2840
first page of the .ibd file, and we can determine whether we need to reset the
2841
lsn's just by looking at that flush lsn.
2842
@return TRUE if success */
2845
fil_reset_too_high_lsns(
2846
/*====================*/
2847
const char* name, /*!< in: table name in the
2848
databasename/tablename format */
2849
ib_uint64_t current_lsn) /*!< in: reset lsn's if the lsn stamped
2850
to FIL_PAGE_FILE_FLUSH_LSN in the
2851
first page is too high */
2857
ib_uint64_t flush_lsn;
2859
ib_int64_t file_size;
2863
page_zip_des_t page_zip;
2865
filepath = fil_make_ibd_name(name, FALSE);
2867
file = os_file_create_simple_no_error_handling(
2868
innodb_file_data_key, filepath, OS_FILE_OPEN,
2869
OS_FILE_READ_WRITE, &success);
2871
/* The following call prints an error message */
2872
os_file_get_last_error(TRUE);
2874
ut_print_timestamp(stderr);
2876
fputs(" InnoDB: Error: trying to open a table,"
2878
"InnoDB: open the tablespace file ", stderr);
2879
ut_print_filename(stderr, filepath);
2880
fputs("!\n", stderr);
2886
/* Read the first page of the tablespace */
2888
buf2 = static_cast<byte *>(ut_malloc(3 * UNIV_PAGE_SIZE));
2889
/* Align the memory for file i/o if we might have O_DIRECT set */
2890
page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
2892
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2898
/* We have to read the file flush lsn from the header of the file */
2900
flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
2902
if (current_lsn >= flush_lsn) {
2909
space_id = fsp_header_get_space_id(page);
2910
zip_size = fsp_header_get_zip_size(page);
2912
page_zip_des_init(&page_zip);
2913
page_zip_set_size(&page_zip, zip_size);
2915
page_zip.data = page + UNIV_PAGE_SIZE;
2918
ut_print_timestamp(stderr);
2920
" InnoDB: Flush lsn in the tablespace file %lu"
2922
"InnoDB: is %"PRIu64", which exceeds current"
2923
" system lsn %"PRIu64".\n"
2924
"InnoDB: We reset the lsn's in the file ",
2926
flush_lsn, current_lsn);
2927
ut_print_filename(stderr, filepath);
2928
fputs(".\n", stderr);
2930
ut_a(ut_is_2pow(zip_size));
2931
ut_a(zip_size <= UNIV_PAGE_SIZE);
2933
/* Loop through all the pages in the tablespace and reset the lsn and
2934
the page checksum if necessary */
2936
file_size = os_file_get_size_as_iblonglong(file);
2938
for (offset = 0; offset < file_size;
2939
offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
2940
success = os_file_read(file, page,
2941
(ulint)(offset & 0xFFFFFFFFUL),
2942
(ulint)(offset >> 32),
2943
zip_size ? zip_size : UNIV_PAGE_SIZE);
2948
if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
2949
/* We have to reset the lsn */
2952
memcpy(page_zip.data, page, zip_size);
2953
buf_flush_init_for_writing(
2954
page, &page_zip, current_lsn);
2955
success = os_file_write(
2956
filepath, file, page_zip.data,
2957
(ulint) offset & 0xFFFFFFFFUL,
2958
(ulint) (offset >> 32), zip_size);
2960
buf_flush_init_for_writing(
2961
page, NULL, current_lsn);
2962
success = os_file_write(
2963
filepath, file, page,
2964
(ulint)(offset & 0xFFFFFFFFUL),
2965
(ulint)(offset >> 32),
2976
success = os_file_flush(file);
2982
/* We now update the flush_lsn stamp at the start of the file */
2983
success = os_file_read(file, page, 0, 0,
2984
zip_size ? zip_size : UNIV_PAGE_SIZE);
2990
mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
2992
success = os_file_write(filepath, file, page, 0, 0,
2993
zip_size ? zip_size : UNIV_PAGE_SIZE);
2998
success = os_file_flush(file);
3000
os_file_close(file);
3007
/********************************************************************//**
3008
Tries to open a single-table tablespace and optionally checks the space id is
3009
right in it. If does not succeed, prints an error message to the .err log. This
3010
function is used to open a tablespace when we start up mysqld, and also in
3012
NOTE that we assume this operation is used either at the database startup
3013
or under the protection of the dictionary mutex, so that two users cannot
3014
race here. This operation does not leave the file associated with the
3015
tablespace open, but closes it after we have looked at the space id in it.
3016
@return TRUE if success */
3019
fil_open_single_table_tablespace(
3020
/*=============================*/
3021
ibool check_space_id, /*!< in: should we check that the space
3022
id in the file is right; we assume
3023
that this function runs much faster
3024
if no check is made, since accessing
3025
the file inode probably is much
3026
faster (the OS caches them) than
3027
accessing the first page of the file */
3028
ulint id, /*!< in: space id */
3029
ulint flags, /*!< in: tablespace flags */
3030
const char* name) /*!< in: table name in the
3031
databasename/tablename format */
3041
filepath = fil_make_ibd_name(name, FALSE);
3043
/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
3045
((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
3046
ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
3047
format, the tablespace flags should equal
3048
(table->flags & ~(~0 << DICT_TF_BITS)). */
3049
ut_a(flags != DICT_TF_COMPACT);
3050
ut_a(!(flags & (~0UL << DICT_TF_BITS)));
3052
file = os_file_create_simple_no_error_handling(
3053
innodb_file_data_key, filepath, OS_FILE_OPEN,
3054
OS_FILE_READ_ONLY, &success);
3056
/* The following call prints an error message */
3057
os_file_get_last_error(TRUE);
3059
ut_print_timestamp(stderr);
3061
fputs(" InnoDB: Error: trying to open a table,"
3063
"InnoDB: open the tablespace file ", stderr);
3064
ut_print_filename(stderr, filepath);
3066
"InnoDB: Have you moved InnoDB .ibd files around"
3067
" without using the\n"
3068
"InnoDB: commands DISCARD TABLESPACE and"
3069
" IMPORT TABLESPACE?\n"
3070
"InnoDB: It is also possible that this is"
3071
" a temporary table #sql...,\n"
3072
"InnoDB: and MySQL removed the .ibd file for this.\n"
3073
"InnoDB: Please refer to\n"
3074
"InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
3075
"InnoDB: for how to resolve the issue.\n", stderr);
3082
if (!check_space_id) {
3088
/* Read the first page of the tablespace */
3090
buf2 = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
3091
/* Align the memory for file i/o if we might have O_DIRECT set */
3092
page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
3094
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3096
/* We have to read the tablespace id and flags from the file. */
3098
space_id = fsp_header_get_space_id(page);
3099
space_flags = fsp_header_get_flags(page);
3103
if (UNIV_UNLIKELY(space_id != id
3104
|| space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
3105
ut_print_timestamp(stderr);
3107
fputs(" InnoDB: Error: tablespace id and flags in file ",
3109
ut_print_filename(stderr, filepath);
3110
fprintf(stderr, " are %lu and %lu, but in the InnoDB\n"
3111
"InnoDB: data dictionary they are %lu and %lu.\n"
3112
"InnoDB: Have you moved InnoDB .ibd files"
3113
" around without using the\n"
3114
"InnoDB: commands DISCARD TABLESPACE and"
3115
" IMPORT TABLESPACE?\n"
3116
"InnoDB: Please refer to\n"
3117
"InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
3118
"InnoDB: for how to resolve the issue.\n",
3119
(ulong) space_id, (ulong) space_flags,
3120
(ulong) id, (ulong) flags);
3128
success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
3134
/* We do not measure the size of the file, that is why we pass the 0
3137
fil_node_create(filepath, 0, space_id, FALSE);
3139
os_file_close(file);
3144
#endif /* !UNIV_HOTBACKUP */
3146
#ifdef UNIV_HOTBACKUP
3147
/*******************************************************************//**
3148
Allocates a file name for an old version of a single-table tablespace.
3149
The string must be freed by caller with mem_free()!
3150
@return own: file name */
3153
fil_make_ibbackup_old_name(
3154
/*=======================*/
3155
const char* name) /*!< in: original file name */
3157
static const char suffix[] = "_ibbackup_old_vers_";
3158
ulint len = strlen(name);
3159
char* path = mem_alloc(len + (15 + sizeof suffix));
3161
memcpy(path, name, len);
3162
memcpy(path + len, suffix, (sizeof suffix) - 1);
3163
ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
3166
#endif /* UNIV_HOTBACKUP */
3168
/********************************************************************//**
3169
Opens an .ibd file and adds the associated single-table tablespace to the
3170
InnoDB fil0fil.c data structures. */
3173
fil_load_single_table_tablespace(
3174
/*=============================*/
3175
const char* dbname, /*!< in: database name */
3176
const char* filename) /*!< in: file name (not a path),
3177
including the .ibd extension */
3189
#ifdef UNIV_HOTBACKUP
3192
filepath = static_cast<char *>(mem_alloc(strlen(dbname) + strlen(filename)
3193
+ strlen(fil_path_to_mysql_datadir) + 3));
3195
sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
3197
srv_normalize_path_for_win(filepath);
3199
# ifndef UNIV_HOTBACKUP
3200
/* If lower_case_table_names is 0 or 2, then MySQL allows database
3201
directory names with upper case letters. On Windows, all table and
3202
database names in InnoDB are internally always in lower case. Put the
3203
file path to lower case, so that we are consistent with InnoDB's
3204
internal data dictionary. */
3206
dict_casedn_str(filepath);
3207
# endif /* !UNIV_HOTBACKUP */
3209
file = os_file_create_simple_no_error_handling(
3210
innodb_file_data_key, filepath, OS_FILE_OPEN,
3211
OS_FILE_READ_ONLY, &success);
3213
/* The following call prints an error message */
3214
os_file_get_last_error(TRUE);
3217
"InnoDB: Error: could not open single-table tablespace"
3220
"InnoDB: We do not continue the crash recovery,"
3221
" because the table may become\n"
3222
"InnoDB: corrupt if we cannot apply the log records"
3223
" in the InnoDB log to it.\n"
3224
"InnoDB: To fix the problem and start mysqld:\n"
3225
"InnoDB: 1) If there is a permission problem"
3226
" in the file and mysqld cannot\n"
3227
"InnoDB: open the file, you should"
3228
" modify the permissions.\n"
3229
"InnoDB: 2) If the table is not needed, or you can"
3230
" restore it from a backup,\n"
3231
"InnoDB: then you can remove the .ibd file,"
3232
" and InnoDB will do a normal\n"
3233
"InnoDB: crash recovery and ignore that table.\n"
3234
"InnoDB: 3) If the file system or the"
3235
" disk is broken, and you cannot remove\n"
3236
"InnoDB: the .ibd file, you can set"
3237
" innodb_force_recovery > 0 in my.cnf\n"
3238
"InnoDB: and force InnoDB to continue crash"
3239
" recovery here.\n", filepath);
3243
if (srv_force_recovery > 0) {
3245
"InnoDB: innodb_force_recovery"
3246
" was set to %lu. Continuing crash recovery\n"
3247
"InnoDB: even though we cannot access"
3248
" the .ibd file of this table.\n",
3249
srv_force_recovery);
3256
success = os_file_get_size(file, &size_low, &size_high);
3259
/* The following call prints an error message */
3260
os_file_get_last_error(TRUE);
3263
"InnoDB: Error: could not measure the size"
3264
" of single-table tablespace file\n"
3266
"InnoDB: We do not continue crash recovery,"
3267
" because the table will become\n"
3268
"InnoDB: corrupt if we cannot apply the log records"
3269
" in the InnoDB log to it.\n"
3270
"InnoDB: To fix the problem and start mysqld:\n"
3271
"InnoDB: 1) If there is a permission problem"
3272
" in the file and mysqld cannot\n"
3273
"InnoDB: access the file, you should"
3274
" modify the permissions.\n"
3275
"InnoDB: 2) If the table is not needed,"
3276
" or you can restore it from a backup,\n"
3277
"InnoDB: then you can remove the .ibd file,"
3278
" and InnoDB will do a normal\n"
3279
"InnoDB: crash recovery and ignore that table.\n"
3280
"InnoDB: 3) If the file system or the disk is broken,"
3281
" and you cannot remove\n"
3282
"InnoDB: the .ibd file, you can set"
3283
" innodb_force_recovery > 0 in my.cnf\n"
3284
"InnoDB: and force InnoDB to continue"
3285
" crash recovery here.\n", filepath);
3287
os_file_close(file);
3290
if (srv_force_recovery > 0) {
3292
"InnoDB: innodb_force_recovery"
3293
" was set to %lu. Continuing crash recovery\n"
3294
"InnoDB: even though we cannot access"
3295
" the .ibd file of this table.\n",
3296
srv_force_recovery);
3303
/* TODO: What to do in other cases where we cannot access an .ibd
3304
file during a crash recovery? */
3306
/* Every .ibd file is created >= 4 pages in size. Smaller files
3309
size = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low;
3310
#ifndef UNIV_HOTBACKUP
3311
if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3313
"InnoDB: Error: the size of single-table tablespace"
3315
"InnoDB: is only %lu %lu, should be at least %lu!",
3318
(ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE));
3319
os_file_close(file);
3325
/* Read the first page of the tablespace if the size big enough */
3327
buf2 = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
3328
/* Align the memory for file i/o if we might have O_DIRECT set */
3329
page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
3331
if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3332
success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3334
/* We have to read the tablespace id from the file */
3336
space_id = fsp_header_get_space_id(page);
3337
flags = fsp_header_get_flags(page);
3339
space_id = ULINT_UNDEFINED;
3343
#ifndef UNIV_HOTBACKUP
3344
if (space_id == ULINT_UNDEFINED || space_id == 0) {
3346
"InnoDB: Error: tablespace id %lu in file %s"
3347
" is not sensible\n",
3353
if (space_id == ULINT_UNDEFINED || space_id == 0) {
3357
"InnoDB: Renaming tablespace %s of id %lu,\n"
3358
"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3359
"InnoDB: because its size %" PRId64 " is too small"
3360
" (< 4 pages 16 kB each),\n"
3361
"InnoDB: or the space id in the file header"
3362
" is not sensible.\n"
3363
"InnoDB: This can happen in an ibbackup run,"
3364
" and is not dangerous.\n",
3365
filepath, space_id, filepath, size);
3366
os_file_close(file);
3368
new_path = fil_make_ibbackup_old_name(filepath);
3369
ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
3378
/* A backup may contain the same space several times, if the space got
3379
renamed at a sensitive time. Since it is enough to have one version of
3380
the space, we rename the file if a space with the same space id
3381
already exists in the tablespace memory cache. We rather rename the
3382
file than delete it, because if there is a bug, we do not want to
3383
destroy valuable data. */
3385
mutex_enter(&fil_system->mutex);
3387
space = fil_space_get_by_id(space_id);
3393
"InnoDB: Renaming tablespace %s of id %lu,\n"
3394
"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3395
"InnoDB: because space %s with the same id\n"
3396
"InnoDB: was scanned earlier. This can happen"
3397
" if you have renamed tables\n"
3398
"InnoDB: during an ibbackup run.\n",
3399
filepath, space_id, filepath,
3401
os_file_close(file);
3403
new_path = fil_make_ibbackup_old_name(filepath);
3405
mutex_exit(&fil_system->mutex);
3407
ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
3415
mutex_exit(&fil_system->mutex);
3417
success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
3421
if (srv_force_recovery > 0) {
3423
"InnoDB: innodb_force_recovery"
3424
" was set to %lu. Continuing crash recovery\n"
3425
"InnoDB: even though the tablespace creation"
3426
" of this table failed.\n",
3427
srv_force_recovery);
3434
/* We do not use the size information we have about the file, because
3435
the rounding formula for extents and pages is somewhat complex; we
3436
let fil_node_open() do that task. */
3438
fil_node_create(filepath, 0, space_id, FALSE);
3440
os_file_close(file);
3445
/***********************************************************************//**
3446
A fault-tolerant function that tries to read the next file name in the
3447
directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
3448
idea is to read as much good data as we can and jump over bad data.
3449
@return 0 if ok, -1 if error even after the retries, 1 if at the end
3453
fil_file_readdir_next_file(
3454
/*=======================*/
3455
ulint* err, /*!< out: this is set to DB_ERROR if an error
3456
was encountered, otherwise not changed */
3457
const char* dirname,/*!< in: directory name or path */
3458
os_file_dir_t dir, /*!< in: directory stream */
3459
os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
3464
for (i = 0; i < 100; i++) {
3465
ret = os_file_readdir_next_file(dirname, dir, info);
3473
"InnoDB: Error: os_file_readdir_next_file()"
3475
"InnoDB: directory %s\n"
3476
"InnoDB: Crash recovery may have failed"
3477
" for some .ibd files!\n", dirname);
3485
/********************************************************************//**
3486
At the server startup, if we need crash recovery, scans the database
3487
directories under the MySQL datadir, looking for .ibd files. Those files are
3488
single-table tablespaces. We need to know the space id in each of them so that
3489
we know into which file we should look to check the contents of a page stored
3490
in the doublewrite buffer, also to know where to apply log records where the
3492
@return DB_SUCCESS or error number */
3495
fil_load_single_table_tablespaces(void)
3496
/*===================================*/
3499
char* dbpath = NULL;
3500
ulint dbpath_len = 100;
3502
os_file_dir_t dbdir;
3503
os_file_stat_t dbinfo;
3504
os_file_stat_t fileinfo;
3505
ulint err = DB_SUCCESS;
3507
/* The datadir of MySQL is always the default directory of mysqld */
3509
dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
3516
dbpath = static_cast<char *>(mem_alloc(dbpath_len));
3518
/* Scan all directories under the datadir. They are the database
3519
directories of MySQL. */
3521
ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
3525
/* printf("Looking at %s in datadir\n", dbinfo.name); */
3527
if (dbinfo.type == OS_FILE_TYPE_FILE
3528
|| dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
3530
goto next_datadir_item;
3533
/* We found a symlink or a directory; try opening it to see
3534
if a symlink is a directory */
3536
len = strlen(fil_path_to_mysql_datadir)
3537
+ strlen (dbinfo.name) + 2;
3538
if (len > dbpath_len) {
3545
dbpath = static_cast<char *>(mem_alloc(dbpath_len));
3547
sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir,
3549
srv_normalize_path_for_win(dbpath);
3551
dbdir = os_file_opendir(dbpath, FALSE);
3553
if (dbdir != NULL) {
3554
/* printf("Opened dir %s\n", dbinfo.name); */
3556
/* We found a database directory; loop through it,
3557
looking for possible .ibd files in it */
3559
ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
3563
" Looking at file %s\n", fileinfo.name); */
3565
if (fileinfo.type == OS_FILE_TYPE_DIR) {
3567
goto next_file_item;
3570
/* We found a symlink or a file */
3571
if (strlen(fileinfo.name) > 4
3572
&& 0 == strcmp(fileinfo.name
3573
+ strlen(fileinfo.name) - 4,
3575
/* The name ends in .ibd; try opening
3577
fil_load_single_table_tablespace(
3578
dbinfo.name, fileinfo.name);
3581
ret = fil_file_readdir_next_file(&err,
3586
if (0 != os_file_closedir(dbdir)) {
3587
fputs("InnoDB: Warning: could not"
3588
" close database directory ", stderr);
3589
ut_print_filename(stderr, dbpath);
3597
ret = fil_file_readdir_next_file(&err,
3598
fil_path_to_mysql_datadir,
3604
if (0 != os_file_closedir(dir)) {
3606
"InnoDB: Error: could not close MySQL datadir\n");
3614
/*******************************************************************//**
3615
Returns TRUE if a single-table tablespace does not exist in the memory cache,
3616
or is being deleted there.
3617
@return TRUE if does not exist or is being\ deleted */
3620
fil_tablespace_deleted_or_being_deleted_in_mem(
3621
/*===========================================*/
3622
ulint id, /*!< in: space id */
3623
ib_int64_t version)/*!< in: tablespace_version should be this; if
3624
you pass -1 as the value of this, then this
3625
parameter is ignored */
3631
mutex_enter(&fil_system->mutex);
3633
space = fil_space_get_by_id(id);
3635
if (space == NULL || space->is_being_deleted) {
3636
mutex_exit(&fil_system->mutex);
3641
if (version != ((ib_int64_t)-1)
3642
&& space->tablespace_version != version) {
3643
mutex_exit(&fil_system->mutex);
3648
mutex_exit(&fil_system->mutex);
3653
/*******************************************************************//**
3654
Returns TRUE if a single-table tablespace exists in the memory cache.
3655
@return TRUE if exists */
3658
fil_tablespace_exists_in_mem(
3659
/*=========================*/
3660
ulint id) /*!< in: space id */
3666
mutex_enter(&fil_system->mutex);
3668
space = fil_space_get_by_id(id);
3670
mutex_exit(&fil_system->mutex);
3672
return(space != NULL);
3675
/*******************************************************************//**
3676
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
3677
cache. Note that if we have not done a crash recovery at the database startup,
3678
there may be many tablespaces which are not yet in the memory cache.
3679
@return TRUE if a matching tablespace exists in the memory cache */
3682
fil_space_for_table_exists_in_mem(
3683
/*==============================*/
3684
ulint id, /*!< in: space id */
3685
const char* name, /*!< in: table name in the standard
3686
'databasename/tablename' format or
3687
the dir path to a temp table */
3688
ibool is_temp, /*!< in: TRUE if created with CREATE
3690
ibool mark_space, /*!< in: in crash recovery, at database
3691
startup we mark all spaces which have
3692
an associated table in the InnoDB
3693
data dictionary, so that
3694
we can print a warning about orphaned
3696
ibool print_error_if_does_not_exist)
3697
/*!< in: print detailed error
3698
information to the .err log if a
3699
matching tablespace is not found from
3702
fil_space_t* tablespace;
3708
mutex_enter(&fil_system->mutex);
3710
path = fil_make_ibd_name(name, is_temp);
3712
/* Look if there is a space with the same id */
3714
space = fil_space_get_by_id(id);
3716
/* Look if there is a space with the same name; the name is the
3717
directory path from the datadir to the file */
3719
tablespace = fil_space_get_by_name(path);
3720
if (space && space == tablespace) {
3728
mutex_exit(&fil_system->mutex);
3733
if (!print_error_if_does_not_exist) {
3736
mutex_exit(&fil_system->mutex);
3741
if (space == NULL) {
3742
if (tablespace == NULL) {
3743
ut_print_timestamp(stderr);
3744
fputs(" InnoDB: Error: table ", stderr);
3745
ut_print_filename(stderr, name);
3746
fprintf(stderr, "\n"
3747
"InnoDB: in InnoDB data dictionary"
3748
" has tablespace id %lu,\n"
3749
"InnoDB: but tablespace with that id"
3750
" or name does not exist. Have\n"
3751
"InnoDB: you deleted or moved .ibd files?\n"
3752
"InnoDB: This may also be a table created with"
3753
" CREATE TEMPORARY TABLE\n"
3754
"InnoDB: whose .ibd and .frm files"
3755
" MySQL automatically removed, but the\n"
3756
"InnoDB: table still exists in the"
3757
" InnoDB internal data dictionary.\n",
3760
ut_print_timestamp(stderr);
3761
fputs(" InnoDB: Error: table ", stderr);
3762
ut_print_filename(stderr, name);
3763
fprintf(stderr, "\n"
3764
"InnoDB: in InnoDB data dictionary has"
3765
" tablespace id %lu,\n"
3766
"InnoDB: but a tablespace with that id"
3767
" does not exist. There is\n"
3768
"InnoDB: a tablespace of name %s and id %lu,"
3770
"InnoDB: you deleted or moved .ibd files?\n",
3771
(ulong) id, tablespace->name,
3772
(ulong) tablespace->id);
3775
fputs("InnoDB: Please refer to\n"
3776
"InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
3777
"InnoDB: for how to resolve the issue.\n", stderr);
3780
mutex_exit(&fil_system->mutex);
3785
if (0 != strcmp(space->name, path)) {
3786
ut_print_timestamp(stderr);
3787
fputs(" InnoDB: Error: table ", stderr);
3788
ut_print_filename(stderr, name);
3789
fprintf(stderr, "\n"
3790
"InnoDB: in InnoDB data dictionary has"
3791
" tablespace id %lu,\n"
3792
"InnoDB: but the tablespace with that id"
3794
"InnoDB: Have you deleted or moved .ibd files?\n",
3795
(ulong) id, space->name);
3797
if (tablespace != NULL) {
3798
fputs("InnoDB: There is a tablespace"
3799
" with the right name\n"
3800
"InnoDB: ", stderr);
3801
ut_print_filename(stderr, tablespace->name);
3802
fprintf(stderr, ", but its id is %lu.\n",
3803
(ulong) tablespace->id);
3810
mutex_exit(&fil_system->mutex);
3815
/*******************************************************************//**
3816
Checks if a single-table tablespace for a given table name exists in the
3817
tablespace memory cache.
3818
@return space id, ULINT_UNDEFINED if not found */
3821
fil_get_space_id_for_table(
3822
/*=======================*/
3823
const char* name) /*!< in: table name in the standard
3824
'databasename/tablename' format */
3826
fil_space_t* tablespace;
3827
ulint id = ULINT_UNDEFINED;
3832
mutex_enter(&fil_system->mutex);
3834
path = fil_make_ibd_name(name, FALSE);
3836
/* Look if there is a space with the same name; the name is the
3837
directory path to the file */
3839
tablespace = fil_space_get_by_name(path);
3842
id = tablespace->id;
3847
mutex_exit(&fil_system->mutex);
3852
/**********************************************************************//**
3853
Tries to extend a data file so that it would accommodate the number of pages
3854
given. The tablespace must be cached in the memory cache. If the space is big
3855
enough already, does nothing.
3856
@return TRUE if success */
3859
fil_extend_space_to_desired_size(
3860
/*=============================*/
3861
ulint* actual_size, /*!< out: size of the space after extension;
3862
if we ran out of disk space this may be lower
3863
than the desired size */
3864
ulint space_id, /*!< in: space id */
3865
ulint size_after_extend)/*!< in: desired size in pages after the
3866
extension; if the current space size is bigger
3867
than this already, the function does nothing */
3874
ulint start_page_no;
3875
ulint file_start_page_no;
3879
ibool success = TRUE;
3881
fil_mutex_enter_and_prepare_for_io(space_id);
3883
space = fil_space_get_by_id(space_id);
3886
if (space->size >= size_after_extend) {
3887
/* Space already big enough */
3889
*actual_size = space->size;
3891
mutex_exit(&fil_system->mutex);
3896
page_size = dict_table_flags_to_zip_size(space->flags);
3898
page_size = UNIV_PAGE_SIZE;
3901
node = UT_LIST_GET_LAST(space->chain);
3903
fil_node_prepare_for_io(node, fil_system, space);
3905
start_page_no = space->size;
3906
file_start_page_no = space->size - node->size;
3908
/* Extend at most 64 pages at a time */
3909
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
3910
buf2 = static_cast<byte *>(mem_alloc(buf_size + page_size));
3911
buf = static_cast<byte *>(ut_align(buf2, page_size));
3913
memset(buf, 0, buf_size);
3915
while (start_page_no < size_after_extend) {
3916
ulint n_pages = ut_min(buf_size / page_size,
3917
size_after_extend - start_page_no);
3919
offset_high = (start_page_no - file_start_page_no)
3920
/ (4096 * ((1024 * 1024) / page_size));
3921
offset_low = ((start_page_no - file_start_page_no)
3922
% (4096 * ((1024 * 1024) / page_size)))
3924
#ifdef UNIV_HOTBACKUP
3925
success = os_file_write(node->name, node->handle, buf,
3926
offset_low, offset_high,
3927
page_size * n_pages);
3929
success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
3930
node->name, node->handle, buf,
3931
offset_low, offset_high,
3932
page_size * n_pages,
3936
node->size += n_pages;
3937
space->size += n_pages;
3939
os_has_said_disk_full = FALSE;
3941
/* Let us measure the size of the file to determine
3942
how much we were able to extend it */
3945
(os_file_get_size_as_iblonglong(
3947
/ page_size)) - node->size;
3949
node->size += n_pages;
3950
space->size += n_pages;
3955
start_page_no += n_pages;
3960
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
3962
*actual_size = space->size;
3964
#ifndef UNIV_HOTBACKUP
3965
if (space_id == 0) {
3966
ulint pages_per_mb = (1024 * 1024) / page_size;
3968
/* Keep the last data file size info up to date, rounded to
3971
srv_data_file_sizes[srv_n_data_files - 1]
3972
= (node->size / pages_per_mb) * pages_per_mb;
3974
#endif /* !UNIV_HOTBACKUP */
3977
printf("Extended %s to %lu, actual size %lu pages\n", space->name,
3978
size_after_extend, *actual_size); */
3979
mutex_exit(&fil_system->mutex);
3981
fil_flush(space_id);
3986
#ifdef UNIV_HOTBACKUP
3987
/********************************************************************//**
3988
Extends all tablespaces to the size stored in the space header. During the
3989
ibbackup --apply-log phase we extended the spaces on-demand so that log records
3990
could be applied, but that may have left spaces still too small compared to
3991
the size stored in the space header. */
3994
fil_extend_tablespaces_to_stored_len(void)
3995
/*======================================*/
4000
ulint size_in_header;
4004
buf = mem_alloc(UNIV_PAGE_SIZE);
4006
mutex_enter(&fil_system->mutex);
4008
space = UT_LIST_GET_FIRST(fil_system->space_list);
4011
ut_a(space->purpose == FIL_TABLESPACE);
4013
mutex_exit(&fil_system->mutex); /* no need to protect with a
4014
mutex, because this is a
4015
single-threaded operation */
4016
error = fil_read(TRUE, space->id,
4017
dict_table_flags_to_zip_size(space->flags),
4018
0, 0, UNIV_PAGE_SIZE, buf, NULL);
4019
ut_a(error == DB_SUCCESS);
4021
size_in_header = fsp_get_size_low(buf);
4023
success = fil_extend_space_to_desired_size(
4024
&actual_size, space->id, size_in_header);
4027
"InnoDB: Error: could not extend the"
4028
" tablespace of %s\n"
4029
"InnoDB: to the size stored in header,"
4031
"InnoDB: size after extension %lu pages\n"
4032
"InnoDB: Check that you have free disk space"
4034
space->name, size_in_header, actual_size);
4038
mutex_enter(&fil_system->mutex);
4040
space = UT_LIST_GET_NEXT(space_list, space);
4043
mutex_exit(&fil_system->mutex);
4049
/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
4051
/*******************************************************************//**
4052
Tries to reserve free extents in a file space.
4053
@return TRUE if succeed */
4056
fil_space_reserve_free_extents(
4057
/*===========================*/
4058
ulint id, /*!< in: space id */
4059
ulint n_free_now, /*!< in: number of free extents now */
4060
ulint n_to_reserve) /*!< in: how many one wants to reserve */
4067
mutex_enter(&fil_system->mutex);
4069
space = fil_space_get_by_id(id);
4073
if (space->n_reserved_extents + n_to_reserve > n_free_now) {
4076
space->n_reserved_extents += n_to_reserve;
4080
mutex_exit(&fil_system->mutex);
4085
/*******************************************************************//**
4086
Releases free extents in a file space. */
4089
fil_space_release_free_extents(
4090
/*===========================*/
4091
ulint id, /*!< in: space id */
4092
ulint n_reserved) /*!< in: how many one reserved */
4098
mutex_enter(&fil_system->mutex);
4100
space = fil_space_get_by_id(id);
4103
ut_a(space->n_reserved_extents >= n_reserved);
4105
space->n_reserved_extents -= n_reserved;
4107
mutex_exit(&fil_system->mutex);
4110
/*******************************************************************//**
4111
Gets the number of reserved extents. If the database is silent, this number
4115
fil_space_get_n_reserved_extents(
4116
/*=============================*/
4117
ulint id) /*!< in: space id */
4124
mutex_enter(&fil_system->mutex);
4126
space = fil_space_get_by_id(id);
4130
n = space->n_reserved_extents;
4132
mutex_exit(&fil_system->mutex);
4137
/*============================ FILE I/O ================================*/
4139
/********************************************************************//**
4140
NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
4142
Prepares a file node for i/o. Opens the file if it is closed. Updates the
4143
pending i/o's field in the node and the system appropriately. Takes the node
4144
off the LRU list if it is in the LRU list. The caller must hold the fil_sys
4148
fil_node_prepare_for_io(
4149
/*====================*/
4150
fil_node_t* node, /*!< in: file node */
4151
fil_system_t* system, /*!< in: tablespace memory cache */
4152
fil_space_t* space) /*!< in: space */
4154
ut_ad(node && system && space);
4155
ut_ad(mutex_own(&(system->mutex)));
4157
if (system->n_open > system->max_n_open + 5) {
4158
ut_print_timestamp(stderr);
4160
" InnoDB: Warning: open files %lu"
4161
" exceeds the limit %lu\n",
4162
(ulong) system->n_open,
4163
(ulong) system->max_n_open);
4166
if (node->open == FALSE) {
4167
/* File is closed: open it */
4168
ut_a(node->n_pending == 0);
4170
fil_node_open_file(node, system, space);
4173
if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
4174
&& space->id != 0) {
4175
/* The node is in the LRU list, remove it */
4177
ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
4179
UT_LIST_REMOVE(LRU, system->LRU, node);
4185
/********************************************************************//**
4186
Updates the data structures when an i/o operation finishes. Updates the
4187
pending i/o's field in the node appropriately. */
4190
fil_node_complete_io(
4191
/*=================*/
4192
fil_node_t* node, /*!< in: file node */
4193
fil_system_t* system, /*!< in: tablespace memory cache */
4194
ulint type) /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
4195
the node as modified if
4196
type == OS_FILE_WRITE */
4200
ut_ad(mutex_own(&(system->mutex)));
4202
ut_a(node->n_pending > 0);
4206
if (type == OS_FILE_WRITE) {
4207
system->modification_counter++;
4208
node->modification_counter = system->modification_counter;
4210
if (!node->space->is_in_unflushed_spaces) {
4212
node->space->is_in_unflushed_spaces = TRUE;
4213
UT_LIST_ADD_FIRST(unflushed_spaces,
4214
system->unflushed_spaces,
4219
if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
4220
&& node->space->id != 0) {
4221
/* The node must be put back to the LRU list */
4222
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
4226
/********************************************************************//**
4227
Report information about an invalid page access. */
4230
fil_report_invalid_page_access(
4231
/*===========================*/
4232
ulint block_offset, /*!< in: block offset */
4233
ulint space_id, /*!< in: space id */
4234
const char* space_name, /*!< in: space name */
4235
ulint byte_offset, /*!< in: byte offset */
4236
ulint len, /*!< in: I/O length */
4237
ulint type) /*!< in: I/O type */
4240
"InnoDB: Error: trying to access page number %lu"
4242
"InnoDB: space name %s,\n"
4243
"InnoDB: which is outside the tablespace bounds.\n"
4244
"InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
4245
"InnoDB: If you get this error at mysqld startup,"
4246
" please check that\n"
4247
"InnoDB: your my.cnf matches the ibdata files"
4248
" that you have in the\n"
4249
"InnoDB: MySQL server.\n",
4250
(ulong) block_offset, (ulong) space_id, space_name,
4251
(ulong) byte_offset, (ulong) len, (ulong) type);
4254
/********************************************************************//**
4255
Reads or writes data. This operation is asynchronous (aio).
4256
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
4257
i/o on a tablespace which does not exist */
4262
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
4263
ORed to OS_FILE_LOG, if a log i/o
4264
and ORed to OS_AIO_SIMULATED_WAKE_LATER
4265
if simulated aio and we want to post a
4266
batch of i/os; NOTE that a simulated batch
4267
may introduce hidden chances of deadlocks,
4268
because i/os are not actually handled until
4269
all have been posted: use with great
4271
ibool sync, /*!< in: TRUE if synchronous aio is desired */
4272
ulint space_id, /*!< in: space id */
4273
ulint zip_size, /*!< in: compressed page size in bytes;
4274
0 for uncompressed pages */
4275
ulint block_offset, /*!< in: offset in number of blocks */
4276
ulint byte_offset, /*!< in: remainder of offset in bytes; in
4277
aio this must be divisible by the OS block
4279
ulint len, /*!< in: how many bytes to read or write; this
4280
must not cross a file boundary; in aio this
4281
must be a block size multiple */
4282
void* buf, /*!< in/out: buffer where to store read data
4283
or from where to write; in aio this must be
4284
appropriately aligned */
4285
void* message) /*!< in: message for aio handler if non-sync
4286
aio used, else ignored */
4297
is_log = type & OS_FILE_LOG;
4298
type = type & ~OS_FILE_LOG;
4300
wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
4301
type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
4303
ut_ad(byte_offset < UNIV_PAGE_SIZE);
4304
ut_ad(!zip_size || !byte_offset);
4305
ut_ad(ut_is_2pow(zip_size));
4308
#if (1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE
4309
# error "(1 << UNIV_PAGE_SIZE_SHIFT) != UNIV_PAGE_SIZE"
4311
ut_ad(fil_validate());
4312
#ifndef UNIV_HOTBACKUP
4313
# ifndef UNIV_LOG_DEBUG
4314
/* ibuf bitmap pages must be read in the sync aio mode: */
4315
ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
4316
|| !ibuf_bitmap_page(zip_size, block_offset)
4318
ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
4319
|| ibuf_page(space_id, zip_size, block_offset, NULL));
4320
# endif /* UNIV_LOG_DEBUG */
4323
} else if (is_log) {
4325
} else if (type == OS_FILE_READ
4326
&& !recv_no_ibuf_operations
4327
&& ibuf_page(space_id, zip_size, block_offset, NULL)) {
4330
mode = OS_AIO_NORMAL;
4332
#else /* !UNIV_HOTBACKUP */
4335
#endif /* !UNIV_HOTBACKUP */
4337
if (type == OS_FILE_READ) {
4338
srv_data_read+= len;
4339
} else if (type == OS_FILE_WRITE) {
4340
srv_data_written+= len;
4343
/* Reserve the fil_system mutex and make sure that we can open at
4344
least one file while holding it, if the file is not already open */
4346
fil_mutex_enter_and_prepare_for_io(space_id);
4348
space = fil_space_get_by_id(space_id);
4351
mutex_exit(&fil_system->mutex);
4353
ut_print_timestamp(stderr);
4355
" InnoDB: Error: trying to do i/o"
4356
" to a tablespace which does not exist.\n"
4357
"InnoDB: i/o type %lu, space id %lu,"
4358
" page no. %lu, i/o length %lu bytes\n",
4359
(ulong) type, (ulong) space_id, (ulong) block_offset,
4362
return(DB_TABLESPACE_DELETED);
4365
ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
4367
node = UT_LIST_GET_FIRST(space->chain);
4370
if (UNIV_UNLIKELY(node == NULL)) {
4371
fil_report_invalid_page_access(
4372
block_offset, space_id, space->name,
4373
byte_offset, len, type);
4378
if (space->id != 0 && node->size == 0) {
4379
/* We do not know the size of a single-table tablespace
4380
before we open the file */
4385
if (node->size > block_offset) {
4389
block_offset -= node->size;
4390
node = UT_LIST_GET_NEXT(chain, node);
4394
/* Open file if closed */
4395
fil_node_prepare_for_io(node, fil_system, space);
4397
/* Check that at least the start offset is within the bounds of a
4398
single-table tablespace */
4399
if (UNIV_UNLIKELY(node->size <= block_offset)
4400
&& space->id != 0 && space->purpose == FIL_TABLESPACE) {
4402
fil_report_invalid_page_access(
4403
block_offset, space_id, space->name, byte_offset,
4409
/* Now we have made the changes in the data structures of fil_system */
4410
mutex_exit(&fil_system->mutex);
4412
/* Calculate the low 32 bits and the high 32 bits of the file offset */
4415
offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
4416
offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT)
4417
& 0xFFFFFFFFUL) + byte_offset;
4419
ut_a(node->size - block_offset
4420
>= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
4423
ulint zip_size_shift;
4425
case 1024: zip_size_shift = 10; break;
4426
case 2048: zip_size_shift = 11; break;
4427
case 4096: zip_size_shift = 12; break;
4428
case 8192: zip_size_shift = 13; break;
4429
case 16384: zip_size_shift = 14; break;
4432
offset_high = block_offset >> (32 - zip_size_shift);
4433
offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL)
4435
ut_a(node->size - block_offset
4436
>= (len + (zip_size - 1)) / zip_size);
4441
ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
4442
ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
4444
#ifdef UNIV_HOTBACKUP
4445
/* In ibbackup do normal i/o, not aio */
4446
if (type == OS_FILE_READ) {
4447
ret = os_file_read(node->handle, buf, offset_low, offset_high,
4450
ret = os_file_write(node->name, node->handle, buf,
4451
offset_low, offset_high, len);
4454
/* Queue the aio request */
4455
ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
4456
offset_low, offset_high, len, node, message);
4460
if (mode == OS_AIO_SYNC) {
4461
/* The i/o operation is already completed when we return from
4464
mutex_enter(&fil_system->mutex);
4466
fil_node_complete_io(node, fil_system, type);
4468
mutex_exit(&fil_system->mutex);
4470
ut_ad(fil_validate());
4476
#ifndef UNIV_HOTBACKUP
4477
/**********************************************************************//**
4478
Waits for an aio operation to complete. This function is used to write the
4479
handler for completed requests. The aio array of pending requests is divided
4480
into segments (see os0file.c for more info). The thread specifies which
4481
segment it wants to wait for. */
4486
ulint segment) /*!< in: the number of the segment in the aio
4487
array to wait for */
4490
fil_node_t* fil_node;
4494
ut_ad(fil_validate());
4496
if (srv_use_native_aio) {
4497
srv_set_io_thread_op_info(segment, "native aio handle");
4499
ret = os_aio_windows_handle(segment, 0, &fil_node,
4501
#elif defined(LINUX_NATIVE_AIO)
4502
ret = os_aio_linux_handle(segment, &fil_node,
4505
ret = 0; /* Eliminate compiler warning */
4509
srv_set_io_thread_op_info(segment, "simulated aio handle");
4511
ret = os_aio_simulated_handle(segment, &fil_node,
4517
srv_set_io_thread_op_info(segment, "complete io for fil node");
4519
mutex_enter(&fil_system->mutex);
4521
fil_node_complete_io(fil_node, fil_system, type);
4523
mutex_exit(&fil_system->mutex);
4525
ut_ad(fil_validate());
4527
/* Do the i/o handling */
4528
/* IMPORTANT: since i/o handling for reads will read also the insert
4529
buffer in tablespace 0, you have to be very careful not to introduce
4530
deadlocks in the i/o system. We keep tablespace 0 data files always
4531
open, and use a special i/o thread to serve insert buffer requests. */
4533
if (fil_node->space->purpose == FIL_TABLESPACE) {
4534
srv_set_io_thread_op_info(segment, "complete io for buf page");
4535
buf_page_io_complete(static_cast<buf_page_t *>(message));
4537
srv_set_io_thread_op_info(segment, "complete io for log");
4538
log_io_complete(static_cast<log_group_t *>(message));
4541
#endif /* UNIV_HOTBACKUP */
4543
/**********************************************************************//**
4544
Flushes to disk possible writes cached by the OS. If the space does not exist
4545
or is being dropped, does not do anything. */
4550
ulint space_id) /*!< in: file space id (this can be a group of
4551
log files or a tablespace of the database) */
4556
ib_int64_t old_mod_counter;
4558
mutex_enter(&fil_system->mutex);
4560
space = fil_space_get_by_id(space_id);
4562
if (!space || space->is_being_deleted) {
4563
mutex_exit(&fil_system->mutex);
4568
space->n_pending_flushes++; /*!< prevent dropping of the space while
4570
node = UT_LIST_GET_FIRST(space->chain);
4573
if (node->modification_counter > node->flush_counter) {
4576
/* We want to flush the changes at least up to
4578
old_mod_counter = node->modification_counter;
4580
if (space->purpose == FIL_TABLESPACE) {
4581
fil_n_pending_tablespace_flushes++;
4583
fil_n_pending_log_flushes++;
4584
fil_n_log_flushes++;
4587
if (node->is_raw_disk) {
4593
if (node->n_pending_flushes > 0) {
4594
/* We want to avoid calling os_file_flush() on
4595
the file twice at the same time, because we do
4596
not know what bugs OS's may contain in file
4597
i/o; sleep for a while */
4599
mutex_exit(&fil_system->mutex);
4601
os_thread_sleep(20000);
4603
mutex_enter(&fil_system->mutex);
4605
if (node->flush_counter >= old_mod_counter) {
4614
file = node->handle;
4615
node->n_pending_flushes++;
4617
mutex_exit(&fil_system->mutex);
4619
/* fprintf(stderr, "Flushing to file %s\n",
4622
os_file_flush(file);
4624
mutex_enter(&fil_system->mutex);
4626
node->n_pending_flushes--;
4628
if (node->flush_counter < old_mod_counter) {
4629
node->flush_counter = old_mod_counter;
4631
if (space->is_in_unflushed_spaces
4632
&& fil_space_is_flushed(space)) {
4634
space->is_in_unflushed_spaces = FALSE;
4638
fil_system->unflushed_spaces,
4643
if (space->purpose == FIL_TABLESPACE) {
4644
fil_n_pending_tablespace_flushes--;
4646
fil_n_pending_log_flushes--;
4650
node = UT_LIST_GET_NEXT(chain, node);
4653
space->n_pending_flushes--;
4655
mutex_exit(&fil_system->mutex);
4658
/**********************************************************************//**
4659
Flushes to disk the writes in file spaces of the given type possibly cached by
4663
fil_flush_file_spaces(
4664
/*==================*/
4665
ulint purpose) /*!< in: FIL_TABLESPACE, FIL_LOG */
4672
mutex_enter(&fil_system->mutex);
4674
n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
4675
if (n_space_ids == 0) {
4677
mutex_exit(&fil_system->mutex);
4681
/* Assemble a list of space ids to flush. Previously, we
4682
traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
4683
on a space that was just removed from the list by fil_flush().
4684
Thus, the space could be dropped and the memory overwritten. */
4685
space_ids = static_cast<unsigned long *>(mem_alloc(n_space_ids * sizeof *space_ids));
4689
for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
4691
space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
4693
if (space->purpose == purpose && !space->is_being_deleted) {
4695
space_ids[n_space_ids++] = space->id;
4699
mutex_exit(&fil_system->mutex);
4701
/* Flush the spaces. It will not hurt to call fil_flush() on
4702
a non-existing space id. */
4703
for (i = 0; i < n_space_ids; i++) {
4705
fil_flush(space_ids[i]);
4708
mem_free(space_ids);
4711
/******************************************************************//**
4712
Checks the consistency of the tablespace cache.
4713
@return TRUE if ok */
4720
fil_node_t* fil_node;
4724
mutex_enter(&fil_system->mutex);
4726
/* Look for spaces in the hash table */
4728
for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
4730
space = static_cast<fil_space_t *>(HASH_GET_FIRST(fil_system->spaces, i));
4732
while (space != NULL) {
4733
UT_LIST_VALIDATE(chain, fil_node_t, space->chain,
4734
ut_a(ut_list_node_313->open
4735
|| !ut_list_node_313->n_pending));
4737
fil_node = UT_LIST_GET_FIRST(space->chain);
4739
while (fil_node != NULL) {
4740
if (fil_node->n_pending > 0) {
4741
ut_a(fil_node->open);
4744
if (fil_node->open) {
4747
fil_node = UT_LIST_GET_NEXT(chain, fil_node);
4749
space = static_cast<fil_space_t *>(HASH_GET_NEXT(hash, space));
4753
ut_a(fil_system->n_open == n_open);
4755
UT_LIST_VALIDATE(LRU, fil_node_t, fil_system->LRU, (void) 0);
4757
fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
4759
while (fil_node != NULL) {
4760
ut_a(fil_node->n_pending == 0);
4761
ut_a(fil_node->open);
4762
ut_a(fil_node->space->purpose == FIL_TABLESPACE);
4763
ut_a(fil_node->space->id != 0);
4765
fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
4768
mutex_exit(&fil_system->mutex);
4773
/********************************************************************//**
4774
Returns TRUE if file address is undefined.
4775
@return TRUE if undefined */
4780
fil_addr_t addr) /*!< in: address */
4782
return(addr.page == FIL_NULL);
4785
/********************************************************************//**
4786
Get the predecessor of a file page.
4787
@return FIL_PAGE_PREV */
4792
const byte* page) /*!< in: file page */
4794
return(mach_read_from_4(page + FIL_PAGE_PREV));
4797
/********************************************************************//**
4798
Get the successor of a file page.
4799
@return FIL_PAGE_NEXT */
4804
const byte* page) /*!< in: file page */
4806
return(mach_read_from_4(page + FIL_PAGE_NEXT));
4809
/*********************************************************************//**
4810
Sets the file page type. */
4815
byte* page, /*!< in/out: file page */
4816
ulint type) /*!< in: type */
4820
mach_write_to_2(page + FIL_PAGE_TYPE, type);
4823
/*********************************************************************//**
4824
Gets the file page type.
4825
@return type; NOTE that if the type has not been written to page, the
4826
return value not defined */
4831
const byte* page) /*!< in: file page */
4835
return(mach_read_from_2(page + FIL_PAGE_TYPE));
4838
/****************************************************************//**
4839
Initializes the tablespace memory cache. */
4845
#ifndef UNIV_HOTBACKUP
4846
/* The mutex should already have been freed. */
4847
ut_ad(fil_system->mutex.magic_n == 0);
4848
#endif /* !UNIV_HOTBACKUP */
4850
hash_table_free(fil_system->spaces);
4852
hash_table_free(fil_system->name_hash);
4854
ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
4855
ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
4856
ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
4858
mem_free(fil_system);