1
/******************************************************
2
The low-level file system
6
Created 10/25/1995 Heikki Tuuri
7
*******************************************************/
14
#include "dict0types.h"
15
#include "ibuf0types.h"
19
/* When mysqld is run, the default directory "." is the mysqld datadir, but in
20
ibbackup we must set it explicitly; the patgh must NOT contain the trailing
22
extern const char* fil_path_to_mysql_datadir;
24
/* Initial size of a single-table tablespace in pages */
25
#define FIL_IBD_FILE_INITIAL_SIZE 4
27
/* 'null' (undefined) page offset in the context of file spaces */
28
#define FIL_NULL ULINT32_UNDEFINED
30
/* Space address data type; this is intended to be used when
31
addresses accurate to a byte are stored in file pages. If the page part
32
of the address is FIL_NULL, the address is considered undefined. */
34
typedef byte fil_faddr_t; /* 'type' definition in C: an address
35
stored in a file page is a string of bytes */
36
#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
37
#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
39
#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
41
/* A struct for storing a space address FIL_ADDR, when it is used
42
in C program data structures. */
44
typedef struct fil_addr_struct fil_addr_t;
45
struct fil_addr_struct{
46
ulint page; /* page number within a space */
47
ulint boffset; /* byte offset within the page */
50
/* Null file address */
51
extern fil_addr_t fil_addr_null;
53
/* The byte offsets on a file page for various variables */
54
#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the
55
page belongs to (== 0) but in later
56
versions the 'new' checksum of the
58
#define FIL_PAGE_OFFSET 4 /* page offset inside space */
59
#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor
60
of the page, its offset.
62
This field is not set on BLOB pages,
63
which are stored as a singly-linked
64
list. See also FIL_PAGE_NEXT. */
65
#define FIL_PAGE_NEXT 12 /* if there is a 'natural' successor
66
of the page, its offset.
69
(FIL_PAGE_TYPE contains FIL_PAGE_INDEX)
70
on the same PAGE_LEVEL are maintained
71
as a doubly linked list via
72
FIL_PAGE_PREV and FIL_PAGE_NEXT
73
in the collation order of the
74
smallest user record on each page. */
75
#define FIL_PAGE_LSN 16 /* lsn of the end of the newest
76
modification log record to the page */
77
#define FIL_PAGE_TYPE 24 /* file page type: FIL_PAGE_INDEX,...,
80
The contents of this field can only
81
be trusted in the following case:
82
if the page is an uncompressed
83
B-tree index page, then it is
84
guaranteed that the value is
86
The opposite does not hold.
88
In tablespaces created by
89
MySQL/InnoDB 5.1.7 or later, the
90
contents of this field is valid
91
for all uncompressed pages. */
92
#define FIL_PAGE_FILE_FLUSH_LSN 26 /* this is only defined for the
93
first page in a data file: the file
94
has been flushed to disk at least up
96
#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /* starting from 4.1.x this
97
contains the space id of the page */
98
#define FIL_PAGE_DATA 38 /* start of the data on the page */
100
/* File page trailer */
101
#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used
102
to store the page checksum, the
103
last 4 bytes should be identical
104
to the last 4 bytes of FIL_PAGE_LSN */
105
#define FIL_PAGE_DATA_END 8
107
/* File page types (values of FIL_PAGE_TYPE) */
108
#define FIL_PAGE_INDEX 17855 /* B-tree node */
109
#define FIL_PAGE_UNDO_LOG 2 /* Undo log page */
110
#define FIL_PAGE_INODE 3 /* Index node */
111
#define FIL_PAGE_IBUF_FREE_LIST 4 /* Insert buffer free list */
112
/* File page types introduced in MySQL/InnoDB 5.1.7 */
113
#define FIL_PAGE_TYPE_ALLOCATED 0 /* Freshly allocated page */
114
#define FIL_PAGE_IBUF_BITMAP 5 /* Insert buffer bitmap */
115
#define FIL_PAGE_TYPE_SYS 6 /* System page */
116
#define FIL_PAGE_TYPE_TRX_SYS 7 /* Transaction system data */
117
#define FIL_PAGE_TYPE_FSP_HDR 8 /* File space header */
118
#define FIL_PAGE_TYPE_XDES 9 /* Extent descriptor page */
119
#define FIL_PAGE_TYPE_BLOB 10 /* Uncompressed BLOB page */
120
#define FIL_PAGE_TYPE_ZBLOB 11 /* First compressed BLOB page */
121
#define FIL_PAGE_TYPE_ZBLOB2 12 /* Subsequent compressed BLOB page */
124
#define FIL_TABLESPACE 501
127
extern ulint fil_n_log_flushes;
129
extern ulint fil_n_pending_log_flushes;
130
extern ulint fil_n_pending_tablespace_flushes;
133
/***********************************************************************
134
Returns the version number of a tablespace, -1 if not found. */
137
fil_space_get_version(
138
/*==================*/
139
/* out: version number, -1 if the tablespace does not
140
exist in the memory cache */
141
ulint id); /* in: space id */
142
/***********************************************************************
143
Returns the latch of a file space. */
148
/* out: latch protecting storage allocation */
149
ulint id, /* in: space id */
150
ulint* zip_size);/* out: compressed page size, or
151
0 for uncompressed tablespaces */
152
/***********************************************************************
153
Returns the type of a file space. */
158
/* out: FIL_TABLESPACE or FIL_LOG */
159
ulint id); /* in: space id */
160
/***********************************************************************
161
Returns the ibuf data of a file space. */
164
fil_space_get_ibuf_data(
165
/*====================*/
166
/* out: ibuf data for this space */
167
ulint id); /* in: space id */
168
/***********************************************************************
169
Appends a new file to the chain of files of a space. File must be closed. */
174
const char* name, /* in: file name (file must be closed) */
175
ulint size, /* in: file size in database blocks, rounded
176
downwards to an integer */
177
ulint id, /* in: space id where to append */
178
ibool is_raw);/* in: TRUE if a raw device or
179
a raw disk partition */
180
#ifdef UNIV_LOG_ARCHIVE
181
/********************************************************************
182
Drops files from the start of a file space, so that its size is cut by
186
fil_space_truncate_start(
187
/*=====================*/
188
ulint id, /* in: space id */
189
ulint trunc_len); /* in: truncate by this much; it is an error
190
if this does not equal to the combined size of
191
some initial files in the space */
192
#endif /* UNIV_LOG_ARCHIVE */
193
/***********************************************************************
194
Creates a space memory object and puts it to the 'fil system' hash table. If
195
there is an error, prints an error message to the .err log. */
200
/* out: TRUE if success */
201
const char* name, /* in: space name */
202
ulint id, /* in: space id */
203
ulint zip_size,/* in: compressed page size, or
204
0 for uncompressed tablespaces */
205
ulint purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
206
/***********************************************************************
207
Frees a space object from a the tablespace memory cache. Closes the files in
208
the chain but does not delete them. */
213
/* out: TRUE if success */
214
ulint id); /* in: space id */
215
/***********************************************************************
216
Returns the size of the space in pages. The tablespace must be cached in the
222
/* out: space size, 0 if space not found */
223
ulint id); /* in: space id */
224
/***********************************************************************
225
Returns the flags of the space. The tablespace must be cached
226
in the memory cache. */
231
/* out: flags, ULINT_UNDEFINED if space not found */
232
ulint id); /* in: space id */
233
/***********************************************************************
234
Returns the compressed page size of the space, or 0 if the space
235
is not compressed. The tablespace must be cached in the memory cache. */
238
fil_space_get_zip_size(
239
/*===================*/
240
/* out: compressed page size, ULINT_UNDEFINED
241
if space not found */
242
ulint id); /* in: space id */
243
/***********************************************************************
244
Checks if the pair space, page_no refers to an existing page in a tablespace
245
file space. The tablespace must be cached in the memory cache. */
248
fil_check_adress_in_tablespace(
249
/*===========================*/
250
/* out: TRUE if the address is meaningful */
251
ulint id, /* in: space id */
252
ulint page_no);/* in: page number */
253
/********************************************************************
254
Initializes the tablespace memory cache. */
259
ulint max_n_open); /* in: max number of open files */
260
/***********************************************************************
261
Opens all log files and system tablespace data files. They stay open until the
262
database server shutdown. This should be called at a server startup after the
263
space objects for the log and the system tablespace have been created. The
264
purpose of this operation is to make sure we never run out of file descriptors
265
if we need to read from the insert buffer or to write to the log. */
268
fil_open_log_and_system_tablespace_files(void);
269
/*==========================================*/
270
/***********************************************************************
271
Closes all open files. There must not be any pending i/o's or not flushed
272
modifications in the files. */
275
fil_close_all_files(void);
276
/*=====================*/
277
/***********************************************************************
278
Sets the max tablespace id counter if the given number is bigger than the
282
fil_set_max_space_id_if_bigger(
283
/*===========================*/
284
ulint max_id);/* in: maximum known id */
285
/********************************************************************
286
Initializes the ibuf data structure for space 0 == the system tablespace.
287
This can be called after the file space headers have been created and the
288
dictionary system has been initialized. */
291
fil_ibuf_init_at_db_start(void);
292
/*===========================*/
293
/********************************************************************
294
Writes the flushed lsn and the latest archived log number to the page
295
header of the first page of each data file in the system tablespace. */
298
fil_write_flushed_lsn_to_data_files(
299
/*================================*/
300
/* out: DB_SUCCESS or error number */
301
ib_uint64_t lsn, /* in: lsn to write */
302
ulint arch_log_no); /* in: latest archived log
304
/***********************************************************************
305
Reads the flushed lsn and arch no fields from a data file at database
309
fil_read_flushed_lsn_and_arch_log_no(
310
/*=================================*/
311
os_file_t data_file, /* in: open data file */
312
ibool one_read_already, /* in: TRUE if min and max
313
parameters below already
314
contain sensible data */
315
#ifdef UNIV_LOG_ARCHIVE
316
ulint* min_arch_log_no, /* in/out: */
317
ulint* max_arch_log_no, /* in/out: */
318
#endif /* UNIV_LOG_ARCHIVE */
319
ib_uint64_t* min_flushed_lsn, /* in/out: */
320
ib_uint64_t* max_flushed_lsn); /* in/out: */
321
/***********************************************************************
322
Increments the count of pending insert buffer page merges, if space is not
326
fil_inc_pending_ibuf_merges(
327
/*========================*/
328
/* out: TRUE if being deleted, and ibuf merges should
330
ulint id); /* in: space id */
331
/***********************************************************************
332
Decrements the count of pending insert buffer page merges. */
335
fil_decr_pending_ibuf_merges(
336
/*=========================*/
337
ulint id); /* in: space id */
338
/***********************************************************************
339
Parses the body of a log record written about an .ibd file operation. That is,
340
the log record part after the standard (type, space id, page no) header of the
343
If desired, also replays the delete or rename operation if the .ibd file
344
exists and the space id in it matches. Replays the create operation if a file
345
at that path does not exist yet. If the database directory for the file to be
346
created does not exist, then we create the directory, too.
348
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
349
datadir that we should use in replaying the file operations. */
352
fil_op_log_parse_or_replay(
353
/*=======================*/
354
/* out: end of log record, or NULL if the
355
record was not completely contained between
357
byte* ptr, /* in: buffer containing the log record body,
358
or an initial segment of it, if the record does
359
not fir completely between ptr and end_ptr */
360
byte* end_ptr, /* in: buffer end */
361
ulint type, /* in: the type of this log record */
362
ulint space_id); /* in: the space id of the tablespace in
363
question, or 0 if the log record should
364
only be parsed but not replayed */
365
/***********************************************************************
366
Deletes a single-table tablespace. The tablespace must be cached in the
370
fil_delete_tablespace(
371
/*==================*/
372
/* out: TRUE if success */
373
ulint id); /* in: space id */
374
/***********************************************************************
375
Discards a single-table tablespace. The tablespace must be cached in the
376
memory cache. Discarding is like deleting a tablespace, but
377
1) we do not drop the table from the data dictionary;
378
2) we remove all insert buffer entries for the tablespace immediately; in DROP
379
TABLE they are only removed gradually in the background;
380
3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
381
as it originally had. */
384
fil_discard_tablespace(
385
/*===================*/
386
/* out: TRUE if success */
387
ulint id); /* in: space id */
388
/***********************************************************************
389
Renames a single-table tablespace. The tablespace must be cached in the
390
tablespace memory cache. */
393
fil_rename_tablespace(
394
/*==================*/
395
/* out: TRUE if success */
396
const char* old_name, /* in: old table name in the standard
397
databasename/tablename format of
398
InnoDB, or NULL if we do the rename
399
based on the space id only */
400
ulint id, /* in: space id */
401
const char* new_name); /* in: new table name in the standard
402
databasename/tablename format
405
/***********************************************************************
406
Creates a new single-table tablespace to a database directory of MySQL.
407
Database directories are under the 'datadir' of MySQL. The datadir is the
408
directory of a running mysqld program. We can refer to it by simply the
409
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
410
dir of the mysqld server. */
413
fil_create_new_single_table_tablespace(
414
/*===================================*/
415
/* out: DB_SUCCESS or error code */
416
ulint* space_id, /* in/out: space id; if this is != 0,
417
then this is an input parameter,
419
const char* tablename, /* in: the table name in the usual
420
databasename/tablename format
421
of InnoDB, or a dir path to a temp
423
ibool is_temp, /* in: TRUE if a table created with
424
CREATE TEMPORARY TABLE */
425
ulint flags, /* in: tablespace flags */
426
ulint size); /* in: the initial size of the
427
tablespace file in pages,
428
must be >= FIL_IBD_FILE_INITIAL_SIZE */
429
/************************************************************************
430
Tries to open a single-table tablespace and optionally checks the space id is
431
right in it. If does not succeed, prints an error message to the .err log. This
432
function is used to open a tablespace when we start up mysqld, and also in
434
NOTE that we assume this operation is used either at the database startup
435
or under the protection of the dictionary mutex, so that two users cannot
436
race here. This operation does not leave the file associated with the
437
tablespace open, but closes it after we have looked at the space id in it. */
440
fil_open_single_table_tablespace(
441
/*=============================*/
442
/* out: TRUE if success */
443
ibool check_space_id, /* in: should we check that the space
444
id in the file is right; we assume
445
that this function runs much faster
446
if no check is made, since accessing
447
the file inode probably is much
448
faster (the OS caches them) than
449
accessing the first page of the file */
450
ulint id, /* in: space id */
451
ulint flags, /* in: tablespace flags */
452
const char* name); /* in: table name in the
453
databasename/tablename format */
454
/************************************************************************
455
It is possible, though very improbable, that the lsn's in the tablespace to be
456
imported have risen above the current system lsn, if a lengthy purge, ibuf
457
merge, or rollback was performed on a backup taken with ibbackup. If that is
458
the case, reset page lsn's in the file. We assume that mysqld was shut down
459
after it performed these cleanup operations on the .ibd file, so that it at
460
the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
461
first page of the .ibd file, and we can determine whether we need to reset the
462
lsn's just by looking at that flush lsn. */
465
fil_reset_too_high_lsns(
466
/*====================*/
467
/* out: TRUE if success */
468
const char* name, /* in: table name in the
469
databasename/tablename format */
470
ib_uint64_t current_lsn); /* in: reset lsn's if the lsn stamped
471
to FIL_PAGE_FILE_FLUSH_LSN in the
472
first page is too high */
473
/************************************************************************
474
At the server startup, if we need crash recovery, scans the database
475
directories under the MySQL datadir, looking for .ibd files. Those files are
476
single-table tablespaces. We need to know the space id in each of them so that
477
we know into which file we should look to check the contents of a page stored
478
in the doublewrite buffer, also to know where to apply log records where the
482
fil_load_single_table_tablespaces(void);
483
/*===================================*/
484
/* out: DB_SUCCESS or error number */
485
/************************************************************************
486
If we need crash recovery, and we have called
487
fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
488
we can call this function to print an error message of orphaned .ibd files
489
for which there is not a data dictionary entry with a matching table name
493
fil_print_orphaned_tablespaces(void);
494
/*================================*/
495
/***********************************************************************
496
Returns TRUE if a single-table tablespace does not exist in the memory cache,
497
or is being deleted there. */
500
fil_tablespace_deleted_or_being_deleted_in_mem(
501
/*===========================================*/
502
/* out: TRUE if does not exist or is being\
504
ulint id, /* in: space id */
505
ib_int64_t version);/* in: tablespace_version should be this; if
506
you pass -1 as the value of this, then this
507
parameter is ignored */
508
/***********************************************************************
509
Returns TRUE if a single-table tablespace exists in the memory cache. */
512
fil_tablespace_exists_in_mem(
513
/*=========================*/
514
/* out: TRUE if exists */
515
ulint id); /* in: space id */
516
/***********************************************************************
517
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
518
cache. Note that if we have not done a crash recovery at the database startup,
519
there may be many tablespaces which are not yet in the memory cache. */
522
fil_space_for_table_exists_in_mem(
523
/*==============================*/
524
/* out: TRUE if a matching tablespace
525
exists in the memory cache */
526
ulint id, /* in: space id */
527
const char* name, /* in: table name in the standard
528
'databasename/tablename' format or
529
the dir path to a temp table */
530
ibool is_temp, /* in: TRUE if created with CREATE
532
ibool mark_space, /* in: in crash recovery, at database
533
startup we mark all spaces which have
534
an associated table in the InnoDB
535
data dictionary, so that
536
we can print a warning about orphaned
538
ibool print_error_if_does_not_exist);
539
/* in: print detailed error
540
information to the .err log if a
541
matching tablespace is not found from
543
/**************************************************************************
544
Tries to extend a data file so that it would accommodate the number of pages
545
given. The tablespace must be cached in the memory cache. If the space is big
546
enough already, does nothing. */
549
fil_extend_space_to_desired_size(
550
/*=============================*/
551
/* out: TRUE if success */
552
ulint* actual_size, /* out: size of the space after extension;
553
if we ran out of disk space this may be lower
554
than the desired size */
555
ulint space_id, /* in: space id */
556
ulint size_after_extend);/* in: desired size in pages after the
557
extension; if the current space size is bigger
558
than this already, the function does nothing */
559
#ifdef UNIV_HOTBACKUP
560
/************************************************************************
561
Extends all tablespaces to the size stored in the space header. During the
562
ibbackup --apply-log phase we extended the spaces on-demand so that log records
563
could be appllied, but that may have left spaces still too small compared to
564
the size stored in the space header. */
567
fil_extend_tablespaces_to_stored_len(void);
568
/*======================================*/
570
/***********************************************************************
571
Tries to reserve free extents in a file space. */
574
fil_space_reserve_free_extents(
575
/*===========================*/
576
/* out: TRUE if succeed */
577
ulint id, /* in: space id */
578
ulint n_free_now, /* in: number of free extents now */
579
ulint n_to_reserve); /* in: how many one wants to reserve */
580
/***********************************************************************
581
Releases free extents in a file space. */
584
fil_space_release_free_extents(
585
/*===========================*/
586
ulint id, /* in: space id */
587
ulint n_reserved); /* in: how many one reserved */
588
/***********************************************************************
589
Gets the number of reserved extents. If the database is silent, this number
593
fil_space_get_n_reserved_extents(
594
/*=============================*/
595
ulint id); /* in: space id */
596
/************************************************************************
597
Reads or writes data. This operation is asynchronous (aio). */
602
/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
603
if we are trying to do i/o on a tablespace
604
which does not exist */
605
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE,
606
ORed to OS_FILE_LOG, if a log i/o
607
and ORed to OS_AIO_SIMULATED_WAKE_LATER
608
if simulated aio and we want to post a
609
batch of i/os; NOTE that a simulated batch
610
may introduce hidden chances of deadlocks,
611
because i/os are not actually handled until
612
all have been posted: use with great
614
ibool sync, /* in: TRUE if synchronous aio is desired */
615
ulint space_id, /* in: space id */
616
ulint zip_size, /* in: compressed page size in bytes;
617
0 for uncompressed pages */
618
ulint block_offset, /* in: offset in number of blocks */
619
ulint byte_offset, /* in: remainder of offset in bytes; in
620
aio this must be divisible by the OS block
622
ulint len, /* in: how many bytes to read or write; this
623
must not cross a file boundary; in aio this
624
must be a block size multiple */
625
void* buf, /* in/out: buffer where to store read data
626
or from where to write; in aio this must be
627
appropriately aligned */
628
void* message); /* in: message for aio handler if non-sync
629
aio used, else ignored */
630
/**************************************************************************
631
Waits for an aio operation to complete. This function is used to write the
632
handler for completed requests. The aio array of pending requests is divided
633
into segments (see os0file.c for more info). The thread specifies which
634
segment it wants to wait for. */
639
ulint segment); /* in: the number of the segment in the aio
641
/**************************************************************************
642
Flushes to disk possible writes cached by the OS. If the space does not exist
643
or is being dropped, does not do anything. */
648
ulint space_id); /* in: file space id (this can be a group of
649
log files or a tablespace of the database) */
650
/**************************************************************************
651
Flushes to disk writes in file spaces of the given type possibly cached by
655
fil_flush_file_spaces(
656
/*==================*/
657
ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */
658
/**********************************************************************
659
Checks the consistency of the tablespace cache. */
664
/* out: TRUE if ok */
665
/************************************************************************
666
Returns TRUE if file address is undefined. */
671
/* out: TRUE if undefined */
672
fil_addr_t addr); /* in: address */
673
/************************************************************************
674
Accessor functions for a file page */
677
fil_page_get_prev(const byte* page);
679
fil_page_get_next(const byte* page);
680
/*************************************************************************
681
Sets the file page type. */
686
byte* page, /* in: file page */
687
ulint type); /* in: type */
688
/*************************************************************************
689
Gets the file page type. */
694
/* out: type; NOTE that if the type
695
has not been written to page, the
696
return value not defined */
697
const byte* page); /* in: file page */
700
typedef struct fil_space_struct fil_space_t;