1
/******************************************************
2
The interface to the operating system file io
6
Created 10/21/1995 Heikki Tuuri
7
*******************************************************/
20
typedef struct fil_node_struct fil_node_t;
23
extern ibool os_do_not_call_flush_at_each_write;
24
#endif /* UNIV_DO_FLUSH */
25
extern ibool os_has_said_disk_full;
26
extern ibool os_aio_print_debug;
28
extern ulint os_file_n_pending_preads;
29
extern ulint os_file_n_pending_pwrites;
31
extern ulint os_n_pending_reads;
32
extern ulint os_n_pending_writes;
36
/* We define always WIN_ASYNC_IO, and check at run-time whether
37
the OS actually supports it: Win 95 does not, NT does. */
40
#define UNIV_NON_BUFFERED_IO
45
#define os_file_t HANDLE
47
typedef int os_file_t;
50
extern ulint os_innodb_umask;
52
/* If this flag is TRUE, then we will use the native aio of the
53
OS (provided we compiled Innobase with it in), otherwise we will
54
use simulated aio we build below with threads */
56
extern ibool os_aio_use_native_aio;
58
#define OS_FILE_SECTOR_SIZE 512
60
/* The next value should be smaller or equal to the smallest sector size used
61
on any disk. A log block is required to be a portion of disk which is written
62
so that if the start and the end of a block get written to disk, then the
63
whole block gets written. This should be true even in most cases of a crash:
64
if this fails for a log block, then it is equivalent to a media failure in the
67
#define OS_FILE_LOG_BLOCK_SIZE 512
69
/* Options for file_create */
70
#define OS_FILE_OPEN 51
71
#define OS_FILE_CREATE 52
72
#define OS_FILE_OVERWRITE 53
73
#define OS_FILE_OPEN_RAW 54
74
#define OS_FILE_CREATE_PATH 55
75
#define OS_FILE_OPEN_RETRY 56 /* for os_file_create() on
76
the first ibdata file */
78
#define OS_FILE_READ_ONLY 333
79
#define OS_FILE_READ_WRITE 444
80
#define OS_FILE_READ_ALLOW_DELETE 555 /* for ibbackup */
82
/* Options for file_create */
83
#define OS_FILE_AIO 61
84
#define OS_FILE_NORMAL 62
86
/* Types for file create */
87
#define OS_DATA_FILE 100
88
#define OS_LOG_FILE 101
90
/* Error codes from os_file_get_last_error */
91
#define OS_FILE_NOT_FOUND 71
92
#define OS_FILE_DISK_FULL 72
93
#define OS_FILE_ALREADY_EXISTS 73
94
#define OS_FILE_PATH_ERROR 74
95
#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources
96
to become available again */
97
#define OS_FILE_SHARING_VIOLATION 76
98
#define OS_FILE_ERROR_NOT_SPECIFIED 77
100
/* Types for aio operations */
101
#define OS_FILE_READ 10
102
#define OS_FILE_WRITE 11
104
#define OS_FILE_LOG 256 /* This can be ORed to type */
106
#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /* Win NT does not allow more
109
/* Modes for aio operations */
110
#define OS_AIO_NORMAL 21 /* Normal asynchronous i/o not for ibuf
111
pages or ibuf bitmap pages */
112
#define OS_AIO_IBUF 22 /* Asynchronous i/o for ibuf pages or ibuf
114
#define OS_AIO_LOG 23 /* Asynchronous i/o for the log */
115
#define OS_AIO_SYNC 24 /* Asynchronous i/o where the calling thread
116
will itself wait for the i/o to complete,
117
doing also the job of the i/o-handler thread;
118
can be used for any pages, ibuf or non-ibuf.
119
This is used to save CPU time, as we can do
120
with fewer thread switches. Plain synchronous
121
i/o is not as good, because it must serialize
122
the file seek and read or write, causing a
123
bottleneck for parallelism. */
125
#define OS_AIO_SIMULATED_WAKE_LATER 512 /* This can be ORed to mode
126
in the call of os_aio(...),
127
if the caller wants to post several i/o
128
requests in a batch, and only after that
129
wake the i/o-handler thread; this has
130
effect only in simulated aio */
136
extern ulint os_n_file_reads;
137
extern ulint os_n_file_writes;
138
extern ulint os_n_fsyncs;
140
/* File types for directory entry data type */
142
enum os_file_type_enum{
143
OS_FILE_TYPE_UNKNOWN = 0,
144
OS_FILE_TYPE_FILE, /* regular file */
145
OS_FILE_TYPE_DIR, /* directory */
146
OS_FILE_TYPE_LINK /* symbolic link */
148
typedef enum os_file_type_enum os_file_type_t;
150
/* Maximum path string length in bytes when referring to tables with in the
151
'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
152
of this size from the thread stack; that is why this should not be made much
153
bigger than 4000 bytes */
154
#define OS_FILE_MAX_PATH 4000
156
/* Struct used in fetching information of a file in a directory */
157
struct os_file_stat_struct{
158
char name[OS_FILE_MAX_PATH]; /* path to a file */
159
os_file_type_t type; /* file type */
160
ib_longlong size; /* file size */
161
time_t ctime; /* creation time */
162
time_t mtime; /* modification time */
163
time_t atime; /* access time */
165
typedef struct os_file_stat_struct os_file_stat_t;
168
typedef HANDLE os_file_dir_t; /* directory stream */
170
typedef DIR* os_file_dir_t; /* directory stream */
173
/***************************************************************************
174
Gets the operating system version. Currently works only on Windows. */
177
os_get_os_version(void);
178
/*===================*/
179
/* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
180
/********************************************************************
181
Creates the seek mutexes used in positioned reads and writes. */
184
os_io_init_simple(void);
185
/*===================*/
186
/***************************************************************************
187
Creates a temporary file. This function is like tmpfile(3), but
188
the temporary file is created in the MySQL temporary directory.
189
On Netware, this function is like tmpfile(3), because the C run-time
190
library of Netware does not expose the delete-on-close flag. */
193
os_file_create_tmpfile(void);
194
/*========================*/
195
/* out: temporary file handle, or NULL on error */
196
/***************************************************************************
197
The os_file_opendir() function opens a directory stream corresponding to the
198
directory named by the dirname argument. The directory stream is positioned
199
at the first entry. In both Unix and Windows we automatically skip the '.'
200
and '..' items at the start of the directory listing. */
205
/* out: directory stream, NULL if
207
const char* dirname, /* in: directory name; it must not
208
contain a trailing '\' or '/' */
209
ibool error_is_fatal);/* in: TRUE if we should treat an
210
error as a fatal error; if we try to
211
open symlinks then we do not wish a
212
fatal error if it happens not to be
214
/***************************************************************************
215
Closes a directory stream. */
220
/* out: 0 if success, -1 if failure */
221
os_file_dir_t dir); /* in: directory stream */
222
/***************************************************************************
223
This function returns information of the next file in the directory. We jump
224
over the '.' and '..' entries in the directory. */
227
os_file_readdir_next_file(
228
/*======================*/
229
/* out: 0 if ok, -1 if error, 1 if at the end
231
const char* dirname,/* in: directory name or path */
232
os_file_dir_t dir, /* in: directory stream */
233
os_file_stat_t* info); /* in/out: buffer where the info is returned */
234
/*********************************************************************
235
This function attempts to create a directory named pathname. The new directory
236
gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
237
directory exists already, nothing is done and the call succeeds, unless the
238
fail_if_exists arguments is true. */
241
os_file_create_directory(
242
/*=====================*/
243
/* out: TRUE if call succeeds,
245
const char* pathname, /* in: directory name as
246
null-terminated string */
247
ibool fail_if_exists);/* in: if TRUE, pre-existing directory
248
is treated as an error. */
249
/********************************************************************
250
A simple function to open or create a file. */
253
os_file_create_simple(
254
/*==================*/
255
/* out, own: handle to the file, not defined
256
if error, error number can be retrieved with
257
os_file_get_last_error */
258
const char* name, /* in: name of the file or path as a
259
null-terminated string */
260
ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
261
opened (if does not exist, error), or
262
OS_FILE_CREATE if a new file is created
263
(if exists, error), or
264
OS_FILE_CREATE_PATH if new file
265
(if exists, error) and subdirectories along
266
its path are created (if needed)*/
267
ulint access_type,/* in: OS_FILE_READ_ONLY or
268
OS_FILE_READ_WRITE */
269
ibool* success);/* out: TRUE if succeed, FALSE if error */
270
/********************************************************************
271
A simple function to open or create a file. */
274
os_file_create_simple_no_error_handling(
275
/*====================================*/
276
/* out, own: handle to the file, not defined
277
if error, error number can be retrieved with
278
os_file_get_last_error */
279
const char* name, /* in: name of the file or path as a
280
null-terminated string */
281
ulint create_mode,/* in: OS_FILE_OPEN if an existing file
282
is opened (if does not exist, error), or
283
OS_FILE_CREATE if a new file is created
284
(if exists, error) */
285
ulint access_type,/* in: OS_FILE_READ_ONLY,
286
OS_FILE_READ_WRITE, or
287
OS_FILE_READ_ALLOW_DELETE; the last option is
288
used by a backup program reading the file */
289
ibool* success);/* out: TRUE if succeed, FALSE if error */
290
/********************************************************************
291
Opens an existing file or creates a new. */
296
/* out, own: handle to the file, not defined
297
if error, error number can be retrieved with
298
os_file_get_last_error */
299
const char* name, /* in: name of the file or path as a
300
null-terminated string */
301
ulint create_mode,/* in: OS_FILE_OPEN if an existing file
302
is opened (if does not exist, error), or
303
OS_FILE_CREATE if a new file is created
305
OS_FILE_OVERWRITE if a new file is created
306
or an old overwritten;
307
OS_FILE_OPEN_RAW, if a raw device or disk
308
partition should be opened */
309
ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
310
non-buffered i/o is desired,
311
OS_FILE_NORMAL, if any normal file;
312
NOTE that it also depends on type, os_aio_..
313
and srv_.. variables whether we really use
314
async i/o or unbuffered i/o: look in the
315
function source code for the exact rules */
316
ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
317
ibool* success);/* out: TRUE if succeed, FALSE if error */
318
/***************************************************************************
319
Deletes a file. The file has to be closed before calling this. */
324
/* out: TRUE if success */
325
const char* name); /* in: file path as a null-terminated string */
327
/***************************************************************************
328
Deletes a file if it exists. The file has to be closed before calling this. */
331
os_file_delete_if_exists(
332
/*=====================*/
333
/* out: TRUE if success */
334
const char* name); /* in: file path as a null-terminated string */
335
/***************************************************************************
336
Renames a file (can also move it to another directory). It is safest that the
337
file is closed before calling this function. */
342
/* out: TRUE if success */
343
const char* oldpath, /* in: old file path as a
344
null-terminated string */
345
const char* newpath); /* in: new file path */
346
/***************************************************************************
347
Closes a file handle. In case of error, error number can be retrieved with
348
os_file_get_last_error. */
353
/* out: TRUE if success */
354
os_file_t file); /* in, own: handle to a file */
355
/***************************************************************************
356
Closes a file handle. */
359
os_file_close_no_error_handling(
360
/*============================*/
361
/* out: TRUE if success */
362
os_file_t file); /* in, own: handle to a file */
363
/***************************************************************************
369
/* out: TRUE if success */
370
os_file_t file, /* in: handle to a file */
371
ulint* size, /* out: least significant 32 bits of file
373
ulint* size_high);/* out: most significant 32 bits of size */
374
/***************************************************************************
375
Gets file size as a 64-bit integer ib_longlong. */
378
os_file_get_size_as_iblonglong(
379
/*===========================*/
380
/* out: size in bytes, -1 if error */
381
os_file_t file); /* in: handle to a file */
382
/***************************************************************************
383
Write the specified number of zeros to a newly created file. */
388
/* out: TRUE if success */
389
const char* name, /* in: name of the file or path as a
390
null-terminated string */
391
os_file_t file, /* in: handle to a file */
392
ulint size, /* in: least significant 32 bits of file
394
ulint size_high);/* in: most significant 32 bits of size */
395
/***************************************************************************
396
Truncates a file at its current position. */
401
/* out: TRUE if success */
402
FILE* file); /* in: file to be truncated */
403
/***************************************************************************
404
Flushes the write buffers of a given file to the disk. */
409
/* out: TRUE if success */
410
os_file_t file); /* in, own: handle to a file */
411
/***************************************************************************
412
Retrieves the last error number if an error occurs in a file io function.
413
The number should be retrieved before any other OS calls (because they may
414
overwrite the error number). If the number is not known to this program,
415
the OS error number + 100 is returned. */
418
os_file_get_last_error(
419
/*===================*/
420
/* out: error number, or OS error
422
ibool report_all_errors); /* in: TRUE if we want an error message
423
printed of all errors */
424
/***********************************************************************
425
Requests a synchronous read operation. */
430
/* out: TRUE if request was
431
successful, FALSE if fail */
432
os_file_t file, /* in: handle to a file */
433
void* buf, /* in: buffer where to read */
434
ulint offset, /* in: least significant 32 bits of file
435
offset where to read */
436
ulint offset_high,/* in: most significant 32 bits of
438
ulint n); /* in: number of bytes to read */
439
/***********************************************************************
440
Rewind file to its start, read at most size - 1 bytes from it to str, and
441
NUL-terminate str. All errors are silently ignored. This function is
442
mostly meant to be used with temporary files. */
447
FILE* file, /* in: file to read from */
448
char* str, /* in: buffer where to read */
449
ulint size); /* in: size of buffer */
450
/***********************************************************************
451
Requests a synchronous positioned read operation. This function does not do
452
any error handling. In case of error it returns FALSE. */
455
os_file_read_no_error_handling(
456
/*===========================*/
457
/* out: TRUE if request was
458
successful, FALSE if fail */
459
os_file_t file, /* in: handle to a file */
460
void* buf, /* in: buffer where to read */
461
ulint offset, /* in: least significant 32 bits of file
462
offset where to read */
463
ulint offset_high,/* in: most significant 32 bits of
465
ulint n); /* in: number of bytes to read */
467
/***********************************************************************
468
Requests a synchronous write operation. */
473
/* out: TRUE if request was
474
successful, FALSE if fail */
475
const char* name, /* in: name of the file or path as a
476
null-terminated string */
477
os_file_t file, /* in: handle to a file */
478
const void* buf, /* in: buffer from which to write */
479
ulint offset, /* in: least significant 32 bits of file
480
offset where to write */
481
ulint offset_high,/* in: most significant 32 bits of
483
ulint n); /* in: number of bytes to write */
484
/***********************************************************************
485
Check the existence and type of the given file. */
490
/* out: TRUE if call succeeded */
491
const char* path, /* in: pathname of the file */
492
ibool* exists, /* out: TRUE if file exists */
493
os_file_type_t* type); /* out: type of the file (if it exists) */
494
/********************************************************************
495
The function os_file_dirname returns a directory component of a
496
null-terminated pathname string. In the usual case, dirname returns
497
the string up to, but not including, the final '/', and basename
498
is the component following the final '/'. Trailing '/' charac�
499
ters are not counted as part of the pathname.
501
If path does not contain a slash, dirname returns the string ".".
503
Concatenating the string returned by dirname, a "/", and the basename
504
yields a complete pathname.
506
The return value is a copy of the directory component of the pathname.
507
The copy is allocated from heap. It is the caller responsibility
508
to free it after it is no longer needed.
510
The following list of examples (taken from SUSv2) shows the strings
511
returned by dirname and basename for different paths:
513
path dirname basename
514
"/usr/lib" "/usr" "lib"
525
/* out, own: directory component of the
527
const char* path); /* in: pathname */
528
/********************************************************************
529
Creates all missing subdirectories along the given path. */
532
os_file_create_subdirs_if_needed(
533
/*=============================*/
534
/* out: TRUE if call succeeded
536
const char* path); /* in: path name */
537
/****************************************************************************
538
Initializes the asynchronous io system. Creates separate aio array for
539
non-ibuf read and write, a third aio array for the ibuf i/o, with just one
540
segment, two aio arrays for log reads and writes with one segment, and a
541
synchronous aio array of the specified size. The combined number of segments
542
in the three first aio arrays is the parameter n_segments given to the
543
function. The caller must create an i/o handler thread for each segment in
544
the four first arrays, but not for the sync aio array. */
549
ulint n, /* in: maximum number of pending aio operations
550
allowed; n must be divisible by n_segments */
551
ulint n_segments, /* in: combined number of segments in the four
552
first aio arrays; must be >= 4 */
553
ulint n_slots_sync); /* in: number of slots in the sync aio array */
554
/***********************************************************************
555
Requests an asynchronous i/o operation. */
560
/* out: TRUE if request was queued
561
successfully, FALSE if fail */
562
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
563
ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
564
to OS_AIO_SIMULATED_WAKE_LATER: the
565
last flag advises this function not to wake
566
i/o-handler threads, but the caller will
567
do the waking explicitly later, in this
568
way the caller can post several requests in
569
a batch; NOTE that the batch must not be
570
so big that it exhausts the slots in aio
571
arrays! NOTE that a simulated batch
572
may introduce hidden chances of deadlocks,
573
because i/os are not actually handled until
574
all have been posted: use with great
576
const char* name, /* in: name of the file or path as a
577
null-terminated string */
578
os_file_t file, /* in: handle to a file */
579
void* buf, /* in: buffer where to read or from which
581
ulint offset, /* in: least significant 32 bits of file
582
offset where to read or write */
583
ulint offset_high, /* in: most significant 32 bits of
585
ulint n, /* in: number of bytes to read or write */
586
fil_node_t* message1,/* in: messages for the aio handler (these
587
can be used to identify a completed aio
588
operation); if mode is OS_AIO_SYNC, these
591
/****************************************************************************
592
Wakes up all async i/o threads so that they know to exit themselves in
596
os_aio_wake_all_threads_at_shutdown(void);
597
/*=====================================*/
598
/****************************************************************************
599
Waits until there are no pending writes in os_aio_write_array. There can
600
be other, synchronous, pending writes. */
603
os_aio_wait_until_no_pending_writes(void);
604
/*=====================================*/
605
/**************************************************************************
606
Wakes up simulated aio i/o-handler threads if they have something to do. */
609
os_aio_simulated_wake_handler_threads(void);
610
/*=======================================*/
611
/**************************************************************************
612
This function can be called if one wants to post a batch of reads and
613
prefers an i/o-handler thread to handle them all at once later. You must
614
call os_aio_simulated_wake_handler_threads later to ensure the threads
615
are not left sleeping! */
618
os_aio_simulated_put_read_threads_to_sleep(void);
619
/*============================================*/
622
/**************************************************************************
623
This function is only used in Windows asynchronous i/o.
624
Waits for an aio operation to complete. This function is used to wait the
625
for completed requests. The aio array of pending requests is divided
626
into segments. The thread specifies which segment or slot it wants to wait
627
for. NOTE: this function will also take care of freeing the aio slot,
628
therefore no other thread is allowed to do the freeing! */
631
os_aio_windows_handle(
632
/*==================*/
633
/* out: TRUE if the aio operation succeeded */
634
ulint segment, /* in: the number of the segment in the aio
635
arrays to wait for; segment 0 is the ibuf
636
i/o thread, segment 1 the log i/o thread,
637
then follow the non-ibuf read threads, and as
638
the last are the non-ibuf write threads; if
639
this is ULINT_UNDEFINED, then it means that
640
sync aio is used, and this parameter is
642
ulint pos, /* this parameter is used only in sync aio:
643
wait for the aio slot at this position */
644
fil_node_t**message1, /* out: the messages passed with the aio
645
request; note that also in the case where
646
the aio operation failed, these output
647
parameters are valid and can be used to
648
restart the operation, for example */
650
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
653
/* Currently we do not use Posix async i/o */
654
#ifdef POSIX_ASYNC_IO
655
/**************************************************************************
656
This function is only used in Posix asynchronous i/o. Waits for an aio
657
operation to complete. */
662
/* out: TRUE if the aio operation succeeded */
663
ulint array_no, /* in: array number 0 - 3 */
664
fil_node_t**message1, /* out: the messages passed with the aio
665
request; note that also in the case where
666
the aio operation failed, these output
667
parameters are valid and can be used to
668
restart the operation, for example */
671
/**************************************************************************
672
Does simulated aio. This function should be called by an i/o-handler
676
os_aio_simulated_handle(
677
/*====================*/
678
/* out: TRUE if the aio operation succeeded */
679
ulint segment, /* in: the number of the segment in the aio
680
arrays to wait for; segment 0 is the ibuf
681
i/o thread, segment 1 the log i/o thread,
682
then follow the non-ibuf read threads, and as
683
the last are the non-ibuf write threads */
684
fil_node_t**message1, /* out: the messages passed with the aio
685
request; note that also in the case where
686
the aio operation failed, these output
687
parameters are valid and can be used to
688
restart the operation, for example */
690
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
691
/**************************************************************************
692
Validates the consistency of the aio system. */
695
os_aio_validate(void);
696
/*=================*/
697
/* out: TRUE if ok */
698
/**************************************************************************
699
Prints info of the aio arrays. */
704
FILE* file); /* in: file where to print */
705
/**************************************************************************
706
Refreshes the statistics used to print per-second averages. */
709
os_aio_refresh_stats(void);
710
/*======================*/
713
/**************************************************************************
714
Checks that all slots in the system have been freed, that is, there are
715
no pending io operations. */
718
os_aio_all_slots_free(void);
719
/*=======================*/
720
#endif /* UNIV_DEBUG */
722
/***********************************************************************
723
This function returns information about the specified file */
729
const char* path, /* in: pathname of the file */
730
os_file_stat_t* stat_info); /* information of a file in a