1
/******************************************************
2
The low-level file system
6
Created 10/25/1995 Heikki Tuuri
7
*******************************************************/
14
#include "dict0types.h"
15
#include "ibuf0types.h"
19
/* When mysqld is run, the default directory "." is the mysqld datadir, but in
20
ibbackup we must set it explicitly; the patgh must NOT contain the trailing
22
extern const char* fil_path_to_mysql_datadir;
24
/* Initial size of a single-table tablespace in pages */
25
#define FIL_IBD_FILE_INITIAL_SIZE 4
27
/* 'null' (undefined) page offset in the context of file spaces */
28
#define FIL_NULL ULINT32_UNDEFINED
30
/* Space address data type; this is intended to be used when
31
addresses accurate to a byte are stored in file pages. If the page part
32
of the address is FIL_NULL, the address is considered undefined. */
34
typedef byte fil_faddr_t; /* 'type' definition in C: an address
35
stored in a file page is a string of bytes */
36
#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
37
#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
39
#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
41
/* A struct for storing a space address FIL_ADDR, when it is used
42
in C program data structures. */
44
typedef struct fil_addr_struct fil_addr_t;
45
struct fil_addr_struct{
46
ulint page; /* page number within a space */
47
ulint boffset; /* byte offset within the page */
50
/* Null file address */
51
extern fil_addr_t fil_addr_null;
53
/* The byte offsets on a file page for various variables */
54
#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the
55
page belongs to (== 0) but in later
56
versions the 'new' checksum of the
58
#define FIL_PAGE_OFFSET 4 /* page offset inside space */
59
#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor
60
of the page, its offset.
62
This field is not set on BLOB pages,
63
which are stored as a singly-linked
64
list. See also FIL_PAGE_NEXT. */
65
#define FIL_PAGE_NEXT 12 /* if there is a 'natural' successor
66
of the page, its offset.
69
(FIL_PAGE_TYPE contains FIL_PAGE_INDEX)
70
on the same PAGE_LEVEL are maintained
71
as a doubly linked list via
72
FIL_PAGE_PREV and FIL_PAGE_NEXT
73
in the collation order of the
74
smallest user record on each page. */
75
#define FIL_PAGE_LSN 16 /* lsn of the end of the newest
76
modification log record to the page */
77
#define FIL_PAGE_TYPE 24 /* file page type: FIL_PAGE_INDEX,...,
80
The contents of this field can only
81
be trusted in the following case:
82
if the page is an uncompressed
83
B-tree index page, then it is
84
guaranteed that the value is
86
The opposite does not hold.
88
In tablespaces created by
89
MySQL/InnoDB 5.1.7 or later, the
90
contents of this field is valid
91
for all uncompressed pages. */
92
#define FIL_PAGE_FILE_FLUSH_LSN 26 /* this is only defined for the
93
first page in a data file: the file
94
has been flushed to disk at least up
96
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /* starting from 4.1.x this
97
contains the space id of the page */
98
#define FIL_PAGE_DATA 38 /* start of the data on the page */
100
/* File page trailer */
101
#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used
102
to store the page checksum, the
103
last 4 bytes should be identical
104
to the last 4 bytes of FIL_PAGE_LSN */
105
#define FIL_PAGE_DATA_END 8
107
/* File page types (values of FIL_PAGE_TYPE) */
108
#define FIL_PAGE_INDEX 17855 /* B-tree node */
109
#define FIL_PAGE_UNDO_LOG 2 /* Undo log page */
110
#define FIL_PAGE_INODE 3 /* Index node */
111
#define FIL_PAGE_IBUF_FREE_LIST 4 /* Insert buffer free list */
112
/* File page types introduced in MySQL/InnoDB 5.1.7 */
113
#define FIL_PAGE_TYPE_ALLOCATED 0 /* Freshly allocated page */
114
#define FIL_PAGE_IBUF_BITMAP 5 /* Insert buffer bitmap */
115
#define FIL_PAGE_TYPE_SYS 6 /* System page */
116
#define FIL_PAGE_TYPE_TRX_SYS 7 /* Transaction system data */
117
#define FIL_PAGE_TYPE_FSP_HDR 8 /* File space header */
118
#define FIL_PAGE_TYPE_XDES 9 /* Extent descriptor page */
119
#define FIL_PAGE_TYPE_BLOB 10 /* Uncompressed BLOB page */
122
#define FIL_TABLESPACE 501
125
extern ulint fil_n_log_flushes;
127
extern ulint fil_n_pending_log_flushes;
128
extern ulint fil_n_pending_tablespace_flushes;
131
/***********************************************************************
132
Returns the version number of a tablespace, -1 if not found. */
135
fil_space_get_version(
136
/*==================*/
137
/* out: version number, -1 if the tablespace does not
138
exist in the memory cache */
139
ulint id); /* in: space id */
140
/***********************************************************************
141
Returns the latch of a file space. */
146
/* out: latch protecting storage allocation */
147
ulint id); /* in: space id */
148
/***********************************************************************
149
Returns the type of a file space. */
154
/* out: FIL_TABLESPACE or FIL_LOG */
155
ulint id); /* in: space id */
156
/***********************************************************************
157
Returns the ibuf data of a file space. */
160
fil_space_get_ibuf_data(
161
/*====================*/
162
/* out: ibuf data for this space */
163
ulint id); /* in: space id */
164
/***********************************************************************
165
Appends a new file to the chain of files of a space. File must be closed. */
170
const char* name, /* in: file name (file must be closed) */
171
ulint size, /* in: file size in database blocks, rounded
172
downwards to an integer */
173
ulint id, /* in: space id where to append */
174
ibool is_raw);/* in: TRUE if a raw device or
175
a raw disk partition */
176
/********************************************************************
177
Drops files from the start of a file space, so that its size is cut by
181
fil_space_truncate_start(
182
/*=====================*/
183
ulint id, /* in: space id */
184
ulint trunc_len); /* in: truncate by this much; it is an error
185
if this does not equal to the combined size of
186
some initial files in the space */
187
/***********************************************************************
188
Creates a space memory object and puts it to the 'fil system' hash table. If
189
there is an error, prints an error message to the .err log. */
194
/* out: TRUE if success */
195
const char* name, /* in: space name */
196
ulint id, /* in: space id */
197
ulint purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
198
/***********************************************************************
199
Frees a space object from a the tablespace memory cache. Closes the files in
200
the chain but does not delete them. */
205
/* out: TRUE if success */
206
ulint id); /* in: space id */
207
/***********************************************************************
208
Returns the size of the space in pages. The tablespace must be cached in the
214
/* out: space size, 0 if space not found */
215
ulint id); /* in: space id */
216
/***********************************************************************
217
Checks if the pair space, page_no refers to an existing page in a tablespace
218
file space. The tablespace must be cached in the memory cache. */
221
fil_check_adress_in_tablespace(
222
/*===========================*/
223
/* out: TRUE if the address is meaningful */
224
ulint id, /* in: space id */
225
ulint page_no);/* in: page number */
226
/********************************************************************
227
Initializes the tablespace memory cache. */
232
ulint max_n_open); /* in: max number of open files */
233
/***********************************************************************
234
Opens all log files and system tablespace data files. They stay open until the
235
database server shutdown. This should be called at a server startup after the
236
space objects for the log and the system tablespace have been created. The
237
purpose of this operation is to make sure we never run out of file descriptors
238
if we need to read from the insert buffer or to write to the log. */
241
fil_open_log_and_system_tablespace_files(void);
242
/*==========================================*/
243
/***********************************************************************
244
Closes all open files. There must not be any pending i/o's or not flushed
245
modifications in the files. */
248
fil_close_all_files(void);
249
/*=====================*/
250
/***********************************************************************
251
Sets the max tablespace id counter if the given number is bigger than the
255
fil_set_max_space_id_if_bigger(
256
/*===========================*/
257
ulint max_id);/* in: maximum known id */
258
/********************************************************************
259
Initializes the ibuf data structure for space 0 == the system tablespace.
260
This can be called after the file space headers have been created and the
261
dictionary system has been initialized. */
264
fil_ibuf_init_at_db_start(void);
265
/*===========================*/
266
/********************************************************************
267
Writes the flushed lsn and the latest archived log number to the page
268
header of the first page of each data file in the system tablespace. */
271
fil_write_flushed_lsn_to_data_files(
272
/*================================*/
273
/* out: DB_SUCCESS or error number */
274
dulint lsn, /* in: lsn to write */
275
ulint arch_log_no); /* in: latest archived log file number */
276
/***********************************************************************
277
Reads the flushed lsn and arch no fields from a data file at database
281
fil_read_flushed_lsn_and_arch_log_no(
282
/*=================================*/
283
os_file_t data_file, /* in: open data file */
284
ibool one_read_already, /* in: TRUE if min and max parameters
285
below already contain sensible data */
286
#ifdef UNIV_LOG_ARCHIVE
287
ulint* min_arch_log_no, /* in/out: */
288
ulint* max_arch_log_no, /* in/out: */
289
#endif /* UNIV_LOG_ARCHIVE */
290
dulint* min_flushed_lsn, /* in/out: */
291
dulint* max_flushed_lsn); /* in/out: */
292
/***********************************************************************
293
Increments the count of pending insert buffer page merges, if space is not
297
fil_inc_pending_ibuf_merges(
298
/*========================*/
299
/* out: TRUE if being deleted, and ibuf merges should
301
ulint id); /* in: space id */
302
/***********************************************************************
303
Decrements the count of pending insert buffer page merges. */
306
fil_decr_pending_ibuf_merges(
307
/*=========================*/
308
ulint id); /* in: space id */
309
/***********************************************************************
310
Parses the body of a log record written about an .ibd file operation. That is,
311
the log record part after the standard (type, space id, page no) header of the
314
If desired, also replays the delete or rename operation if the .ibd file
315
exists and the space id in it matches. Replays the create operation if a file
316
at that path does not exist yet. If the database directory for the file to be
317
created does not exist, then we create the directory, too.
319
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
320
datadir that we should use in replaying the file operations. */
323
fil_op_log_parse_or_replay(
324
/*=======================*/
325
/* out: end of log record, or NULL if the
326
record was not completely contained between
328
byte* ptr, /* in: buffer containing the log record body,
329
or an initial segment of it, if the record does
330
not fir completely between ptr and end_ptr */
331
byte* end_ptr, /* in: buffer end */
332
ulint type, /* in: the type of this log record */
333
ibool do_replay, /* in: TRUE if we want to replay the
334
operation, and not just parse the log record */
335
ulint space_id); /* in: if do_replay is TRUE, the space id of
336
the tablespace in question; otherwise
338
/***********************************************************************
339
Deletes a single-table tablespace. The tablespace must be cached in the
343
fil_delete_tablespace(
344
/*==================*/
345
/* out: TRUE if success */
346
ulint id); /* in: space id */
347
/***********************************************************************
348
Discards a single-table tablespace. The tablespace must be cached in the
349
memory cache. Discarding is like deleting a tablespace, but
350
1) we do not drop the table from the data dictionary;
351
2) we remove all insert buffer entries for the tablespace immediately; in DROP
352
TABLE they are only removed gradually in the background;
353
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
354
as it originally had. */
357
fil_discard_tablespace(
358
/*===================*/
359
/* out: TRUE if success */
360
ulint id); /* in: space id */
361
/***********************************************************************
362
Renames a single-table tablespace. The tablespace must be cached in the
363
tablespace memory cache. */
366
fil_rename_tablespace(
367
/*==================*/
368
/* out: TRUE if success */
369
const char* old_name, /* in: old table name in the standard
370
databasename/tablename format of
371
InnoDB, or NULL if we do the rename
372
based on the space id only */
373
ulint id, /* in: space id */
374
const char* new_name); /* in: new table name in the standard
375
databasename/tablename format
378
/***********************************************************************
379
Creates a new single-table tablespace to a database directory of MySQL.
380
Database directories are under the 'datadir' of MySQL. The datadir is the
381
directory of a running mysqld program. We can refer to it by simply the
382
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
383
dir of the mysqld server. */
386
fil_create_new_single_table_tablespace(
387
/*===================================*/
388
/* out: DB_SUCCESS or error code */
389
ulint* space_id, /* in/out: space id; if this is != 0,
390
then this is an input parameter,
392
const char* tablename, /* in: the table name in the usual
393
databasename/tablename format
394
of InnoDB, or a dir path to a temp
396
ibool is_temp, /* in: TRUE if a table created with
397
CREATE TEMPORARY TABLE */
398
ulint size); /* in: the initial size of the
399
tablespace file in pages,
400
must be >= FIL_IBD_FILE_INITIAL_SIZE */
401
/************************************************************************
402
Tries to open a single-table tablespace and optionally checks the space id is
403
right in it. If does not succeed, prints an error message to the .err log. This
404
function is used to open a tablespace when we start up mysqld, and also in
406
NOTE that we assume this operation is used either at the database startup
407
or under the protection of the dictionary mutex, so that two users cannot
408
race here. This operation does not leave the file associated with the
409
tablespace open, but closes it after we have looked at the space id in it. */
412
fil_open_single_table_tablespace(
413
/*=============================*/
414
/* out: TRUE if success */
415
ibool check_space_id, /* in: should we check that the space
416
id in the file is right; we assume
417
that this function runs much faster
418
if no check is made, since accessing
419
the file inode probably is much
420
faster (the OS caches them) than
421
accessing the first page of the file */
422
ulint id, /* in: space id */
423
const char* name); /* in: table name in the
424
databasename/tablename format */
425
/************************************************************************
426
It is possible, though very improbable, that the lsn's in the tablespace to be
427
imported have risen above the current system lsn, if a lengthy purge, ibuf
428
merge, or rollback was performed on a backup taken with ibbackup. If that is
429
the case, reset page lsn's in the file. We assume that mysqld was shut down
430
after it performed these cleanup operations on the .ibd file, so that it at
431
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
432
first page of the .ibd file, and we can determine whether we need to reset the
433
lsn's just by looking at that flush lsn. */
436
fil_reset_too_high_lsns(
437
/*====================*/
438
/* out: TRUE if success */
439
const char* name, /* in: table name in the
440
databasename/tablename format */
441
dulint current_lsn); /* in: reset lsn's if the lsn stamped
442
to FIL_PAGE_FILE_FLUSH_LSN in the
443
first page is too high */
444
/************************************************************************
445
At the server startup, if we need crash recovery, scans the database
446
directories under the MySQL datadir, looking for .ibd files. Those files are
447
single-table tablespaces. We need to know the space id in each of them so that
448
we know into which file we should look to check the contents of a page stored
449
in the doublewrite buffer, also to know where to apply log records where the
453
fil_load_single_table_tablespaces(void);
454
/*===================================*/
455
/* out: DB_SUCCESS or error number */
456
/************************************************************************
457
If we need crash recovery, and we have called
458
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
459
we can call this function to print an error message of orphaned .ibd files
460
for which there is not a data dictionary entry with a matching table name
464
fil_print_orphaned_tablespaces(void);
465
/*================================*/
466
/***********************************************************************
467
Returns TRUE if a single-table tablespace does not exist in the memory cache,
468
or is being deleted there. */
471
fil_tablespace_deleted_or_being_deleted_in_mem(
472
/*===========================================*/
473
/* out: TRUE if does not exist or is being\
475
ulint id, /* in: space id */
476
ib_longlong version);/* in: tablespace_version should be this; if
477
you pass -1 as the value of this, then this
478
parameter is ignored */
479
/***********************************************************************
480
Returns TRUE if a single-table tablespace exists in the memory cache. */
483
fil_tablespace_exists_in_mem(
484
/*=========================*/
485
/* out: TRUE if exists */
486
ulint id); /* in: space id */
487
/***********************************************************************
488
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
489
cache. Note that if we have not done a crash recovery at the database startup,
490
there may be many tablespaces which are not yet in the memory cache. */
493
fil_space_for_table_exists_in_mem(
494
/*==============================*/
495
/* out: TRUE if a matching tablespace
496
exists in the memory cache */
497
ulint id, /* in: space id */
498
const char* name, /* in: table name in the standard
499
'databasename/tablename' format or
500
the dir path to a temp table */
501
ibool is_temp, /* in: TRUE if created with CREATE
503
ibool mark_space, /* in: in crash recovery, at database
504
startup we mark all spaces which have
505
an associated table in the InnoDB
506
data dictionary, so that
507
we can print a warning about orphaned
509
ibool print_error_if_does_not_exist);
510
/* in: print detailed error
511
information to the .err log if a
512
matching tablespace is not found from
514
/**************************************************************************
515
Tries to extend a data file so that it would accommodate the number of pages
516
given. The tablespace must be cached in the memory cache. If the space is big
517
enough already, does nothing. */
520
fil_extend_space_to_desired_size(
521
/*=============================*/
522
/* out: TRUE if success */
523
ulint* actual_size, /* out: size of the space after extension;
524
if we ran out of disk space this may be lower
525
than the desired size */
526
ulint space_id, /* in: space id */
527
ulint size_after_extend);/* in: desired size in pages after the
528
extension; if the current space size is bigger
529
than this already, the function does nothing */
530
#ifdef UNIV_HOTBACKUP
531
/************************************************************************
532
Extends all tablespaces to the size stored in the space header. During the
533
ibbackup --apply-log phase we extended the spaces on-demand so that log records
534
could be appllied, but that may have left spaces still too small compared to
535
the size stored in the space header. */
538
fil_extend_tablespaces_to_stored_len(void);
539
/*======================================*/
541
/***********************************************************************
542
Tries to reserve free extents in a file space. */
545
fil_space_reserve_free_extents(
546
/*===========================*/
547
/* out: TRUE if succeed */
548
ulint id, /* in: space id */
549
ulint n_free_now, /* in: number of free extents now */
550
ulint n_to_reserve); /* in: how many one wants to reserve */
551
/***********************************************************************
552
Releases free extents in a file space. */
555
fil_space_release_free_extents(
556
/*===========================*/
557
ulint id, /* in: space id */
558
ulint n_reserved); /* in: how many one reserved */
559
/***********************************************************************
560
Gets the number of reserved extents. If the database is silent, this number
564
fil_space_get_n_reserved_extents(
565
/*=============================*/
566
ulint id); /* in: space id */
567
/************************************************************************
568
Reads or writes data. This operation is asynchronous (aio). */
573
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
574
if we are trying to do i/o on a tablespace
575
which does not exist */
576
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
577
ORed to OS_FILE_LOG, if a log i/o
578
and ORed to OS_AIO_SIMULATED_WAKE_LATER
579
if simulated aio and we want to post a
580
batch of i/os; NOTE that a simulated batch
581
may introduce hidden chances of deadlocks,
582
because i/os are not actually handled until
583
all have been posted: use with great
585
ibool sync, /* in: TRUE if synchronous aio is desired */
586
ulint space_id, /* in: space id */
587
ulint block_offset, /* in: offset in number of blocks */
588
ulint byte_offset, /* in: remainder of offset in bytes; in
589
aio this must be divisible by the OS block
591
ulint len, /* in: how many bytes to read or write; this
592
must not cross a file boundary; in aio this
593
must be a block size multiple */
594
void* buf, /* in/out: buffer where to store read data
595
or from where to write; in aio this must be
596
appropriately aligned */
597
void* message); /* in: message for aio handler if non-sync
598
aio used, else ignored */
599
/************************************************************************
600
Reads data from a space to a buffer. Remember that the possible incomplete
601
blocks at the end of file are ignored: they are not taken into account when
602
calculating the byte offset within a space. */
607
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
608
if we are trying to do i/o on a tablespace
609
which does not exist */
610
ibool sync, /* in: TRUE if synchronous aio is desired */
611
ulint space_id, /* in: space id */
612
ulint block_offset, /* in: offset in number of blocks */
613
ulint byte_offset, /* in: remainder of offset in bytes; in aio
614
this must be divisible by the OS block size */
615
ulint len, /* in: how many bytes to read; this must not
616
cross a file boundary; in aio this must be a
617
block size multiple */
618
void* buf, /* in/out: buffer where to store data read;
619
in aio this must be appropriately aligned */
620
void* message); /* in: message for aio handler if non-sync
621
aio used, else ignored */
622
/************************************************************************
623
Writes data to a space from a buffer. Remember that the possible incomplete
624
blocks at the end of file are ignored: they are not taken into account when
625
calculating the byte offset within a space. */
630
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
631
if we are trying to do i/o on a tablespace
632
which does not exist */
633
ibool sync, /* in: TRUE if synchronous aio is desired */
634
ulint space_id, /* in: space id */
635
ulint block_offset, /* in: offset in number of blocks */
636
ulint byte_offset, /* in: remainder of offset in bytes; in aio
637
this must be divisible by the OS block size */
638
ulint len, /* in: how many bytes to write; this must
639
not cross a file boundary; in aio this must
640
be a block size multiple */
641
void* buf, /* in: buffer from which to write; in aio
642
this must be appropriately aligned */
643
void* message); /* in: message for aio handler if non-sync
644
aio used, else ignored */
645
/**************************************************************************
646
Waits for an aio operation to complete. This function is used to write the
647
handler for completed requests. The aio array of pending requests is divided
648
into segments (see os0file.c for more info). The thread specifies which
649
segment it wants to wait for. */
654
ulint segment); /* in: the number of the segment in the aio
656
/**************************************************************************
657
Flushes to disk possible writes cached by the OS. If the space does not exist
658
or is being dropped, does not do anything. */
663
ulint space_id); /* in: file space id (this can be a group of
664
log files or a tablespace of the database) */
665
/**************************************************************************
666
Flushes to disk writes in file spaces of the given type possibly cached by
670
fil_flush_file_spaces(
671
/*==================*/
672
ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */
673
/**********************************************************************
674
Checks the consistency of the tablespace cache. */
679
/* out: TRUE if ok */
680
/************************************************************************
681
Returns TRUE if file address is undefined. */
686
/* out: TRUE if undefined */
687
fil_addr_t addr); /* in: address */
688
/************************************************************************
689
Accessor functions for a file page */
692
fil_page_get_prev(byte* page);
694
fil_page_get_next(byte* page);
695
/*************************************************************************
696
Sets the file page type. */
701
byte* page, /* in: file page */
702
ulint type); /* in: type */
703
/*************************************************************************
704
Gets the file page type. */
709
/* out: type; NOTE that if the type has not been
710
written to page, the return value not defined */
711
byte* page); /* in: file page */
714
typedef struct fil_space_struct fil_space_t;