1
/******************************************************
2
The interface to the operating system file io
6
Created 10/21/1995 Heikki Tuuri
7
*******************************************************/
20
typedef struct fil_node_struct fil_node_t;
23
extern ibool os_do_not_call_flush_at_each_write;
24
#endif /* UNIV_DO_FLUSH */
25
extern ibool os_has_said_disk_full;
26
extern ibool os_aio_print_debug;
28
extern ulint os_file_n_pending_preads;
29
extern ulint os_file_n_pending_pwrites;
31
extern ulint os_n_pending_reads;
32
extern ulint os_n_pending_writes;
36
/* We define always WIN_ASYNC_IO, and check at run-time whether
37
the OS actually supports it: Win 95 does not, NT does. */
40
#define UNIV_NON_BUFFERED_IO
45
#define os_file_t HANDLE
46
#define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
48
typedef int os_file_t;
49
#define OS_FILE_FROM_FD(fd) fd
52
extern ulint os_innodb_umask;
54
/* If this flag is TRUE, then we will use the native aio of the
55
OS (provided we compiled Innobase with it in), otherwise we will
56
use simulated aio we build below with threads */
58
extern ibool os_aio_use_native_aio;
60
#define OS_FILE_SECTOR_SIZE 512
62
/* The next value should be smaller or equal to the smallest sector size used
63
on any disk. A log block is required to be a portion of disk which is written
64
so that if the start and the end of a block get written to disk, then the
65
whole block gets written. This should be true even in most cases of a crash:
66
if this fails for a log block, then it is equivalent to a media failure in the
69
#define OS_FILE_LOG_BLOCK_SIZE 512
71
/* Options for file_create */
72
#define OS_FILE_OPEN 51
73
#define OS_FILE_CREATE 52
74
#define OS_FILE_OVERWRITE 53
75
#define OS_FILE_OPEN_RAW 54
76
#define OS_FILE_CREATE_PATH 55
77
#define OS_FILE_OPEN_RETRY 56 /* for os_file_create() on
78
the first ibdata file */
80
#define OS_FILE_READ_ONLY 333
81
#define OS_FILE_READ_WRITE 444
82
#define OS_FILE_READ_ALLOW_DELETE 555 /* for ibbackup */
84
/* Options for file_create */
85
#define OS_FILE_AIO 61
86
#define OS_FILE_NORMAL 62
88
/* Types for file create */
89
#define OS_DATA_FILE 100
90
#define OS_LOG_FILE 101
92
/* Error codes from os_file_get_last_error */
93
#define OS_FILE_NOT_FOUND 71
94
#define OS_FILE_DISK_FULL 72
95
#define OS_FILE_ALREADY_EXISTS 73
96
#define OS_FILE_PATH_ERROR 74
97
#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources
98
to become available again */
99
#define OS_FILE_SHARING_VIOLATION 76
100
#define OS_FILE_ERROR_NOT_SPECIFIED 77
102
/* Types for aio operations */
103
#define OS_FILE_READ 10
104
#define OS_FILE_WRITE 11
106
#define OS_FILE_LOG 256 /* This can be ORed to type */
108
#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /* Win NT does not allow more
111
/* Modes for aio operations */
112
#define OS_AIO_NORMAL 21 /* Normal asynchronous i/o not for ibuf
113
pages or ibuf bitmap pages */
114
#define OS_AIO_IBUF 22 /* Asynchronous i/o for ibuf pages or ibuf
116
#define OS_AIO_LOG 23 /* Asynchronous i/o for the log */
117
#define OS_AIO_SYNC 24 /* Asynchronous i/o where the calling thread
118
will itself wait for the i/o to complete,
119
doing also the job of the i/o-handler thread;
120
can be used for any pages, ibuf or non-ibuf.
121
This is used to save CPU time, as we can do
122
with fewer thread switches. Plain synchronous
123
i/o is not as good, because it must serialize
124
the file seek and read or write, causing a
125
bottleneck for parallelism. */
127
#define OS_AIO_SIMULATED_WAKE_LATER 512 /* This can be ORed to mode
128
in the call of os_aio(...),
129
if the caller wants to post several i/o
130
requests in a batch, and only after that
131
wake the i/o-handler thread; this has
132
effect only in simulated aio */
138
extern ulint os_n_file_reads;
139
extern ulint os_n_file_writes;
140
extern ulint os_n_fsyncs;
142
/* File types for directory entry data type */
144
enum os_file_type_enum{
145
OS_FILE_TYPE_UNKNOWN = 0,
146
OS_FILE_TYPE_FILE, /* regular file */
147
OS_FILE_TYPE_DIR, /* directory */
148
OS_FILE_TYPE_LINK /* symbolic link */
150
typedef enum os_file_type_enum os_file_type_t;
152
/* Maximum path string length in bytes when referring to tables with in the
153
'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
154
of this size from the thread stack; that is why this should not be made much
155
bigger than 4000 bytes */
156
#define OS_FILE_MAX_PATH 4000
158
/* Struct used in fetching information of a file in a directory */
159
struct os_file_stat_struct{
160
char name[OS_FILE_MAX_PATH]; /* path to a file */
161
os_file_type_t type; /* file type */
162
ib_int64_t size; /* file size */
163
time_t ctime; /* creation time */
164
time_t mtime; /* modification time */
165
time_t atime; /* access time */
167
typedef struct os_file_stat_struct os_file_stat_t;
170
typedef HANDLE os_file_dir_t; /* directory stream */
172
typedef DIR* os_file_dir_t; /* directory stream */
175
/***************************************************************************
176
Gets the operating system version. Currently works only on Windows. */
179
os_get_os_version(void);
180
/*===================*/
181
/* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
182
/********************************************************************
183
Creates the seek mutexes used in positioned reads and writes. */
186
os_io_init_simple(void);
187
/*===================*/
188
/***************************************************************************
189
Creates a temporary file. This function is like tmpfile(3), but
190
the temporary file is created in the MySQL temporary directory.
191
On Netware, this function is like tmpfile(3), because the C run-time
192
library of Netware does not expose the delete-on-close flag. */
195
os_file_create_tmpfile(void);
196
/*========================*/
197
/* out: temporary file handle, or NULL on error */
198
/***************************************************************************
199
The os_file_opendir() function opens a directory stream corresponding to the
200
directory named by the dirname argument. The directory stream is positioned
201
at the first entry. In both Unix and Windows we automatically skip the '.'
202
and '..' items at the start of the directory listing. */
207
/* out: directory stream, NULL if
209
const char* dirname, /* in: directory name; it must not
210
contain a trailing '\' or '/' */
211
ibool error_is_fatal);/* in: TRUE if we should treat an
212
error as a fatal error; if we try to
213
open symlinks then we do not wish a
214
fatal error if it happens not to be
216
/***************************************************************************
217
Closes a directory stream. */
222
/* out: 0 if success, -1 if failure */
223
os_file_dir_t dir); /* in: directory stream */
224
/***************************************************************************
225
This function returns information of the next file in the directory. We jump
226
over the '.' and '..' entries in the directory. */
229
os_file_readdir_next_file(
230
/*======================*/
231
/* out: 0 if ok, -1 if error, 1 if at the end
233
const char* dirname,/* in: directory name or path */
234
os_file_dir_t dir, /* in: directory stream */
235
os_file_stat_t* info); /* in/out: buffer where the info is returned */
236
/*********************************************************************
237
This function attempts to create a directory named pathname. The new directory
238
gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
239
directory exists already, nothing is done and the call succeeds, unless the
240
fail_if_exists arguments is true. */
243
os_file_create_directory(
244
/*=====================*/
245
/* out: TRUE if call succeeds,
247
const char* pathname, /* in: directory name as
248
null-terminated string */
249
ibool fail_if_exists);/* in: if TRUE, pre-existing directory
250
is treated as an error. */
251
/********************************************************************
252
A simple function to open or create a file. */
255
os_file_create_simple(
256
/*==================*/
257
/* out, own: handle to the file, not defined
258
if error, error number can be retrieved with
259
os_file_get_last_error */
260
const char* name, /* in: name of the file or path as a
261
null-terminated string */
262
ulint create_mode,/* in: OS_FILE_OPEN if an existing file is
263
opened (if does not exist, error), or
264
OS_FILE_CREATE if a new file is created
265
(if exists, error), or
266
OS_FILE_CREATE_PATH if new file
267
(if exists, error) and subdirectories along
268
its path are created (if needed)*/
269
ulint access_type,/* in: OS_FILE_READ_ONLY or
270
OS_FILE_READ_WRITE */
271
ibool* success);/* out: TRUE if succeed, FALSE if error */
272
/********************************************************************
273
A simple function to open or create a file. */
276
os_file_create_simple_no_error_handling(
277
/*====================================*/
278
/* out, own: handle to the file, not defined
279
if error, error number can be retrieved with
280
os_file_get_last_error */
281
const char* name, /* in: name of the file or path as a
282
null-terminated string */
283
ulint create_mode,/* in: OS_FILE_OPEN if an existing file
284
is opened (if does not exist, error), or
285
OS_FILE_CREATE if a new file is created
286
(if exists, error) */
287
ulint access_type,/* in: OS_FILE_READ_ONLY,
288
OS_FILE_READ_WRITE, or
289
OS_FILE_READ_ALLOW_DELETE; the last option is
290
used by a backup program reading the file */
291
ibool* success);/* out: TRUE if succeed, FALSE if error */
292
/********************************************************************
293
Tries to disable OS caching on an opened file descriptor. */
298
int fd, /* in: file descriptor to alter */
299
const char* file_name, /* in: file name, used in the
300
diagnostic message */
301
const char* operation_name);/* in: "open" or "create"; used in the
302
diagnostic message */
303
/********************************************************************
304
Opens an existing file or creates a new. */
309
/* out, own: handle to the file, not defined
310
if error, error number can be retrieved with
311
os_file_get_last_error */
312
const char* name, /* in: name of the file or path as a
313
null-terminated string */
314
ulint create_mode,/* in: OS_FILE_OPEN if an existing file
315
is opened (if does not exist, error), or
316
OS_FILE_CREATE if a new file is created
318
OS_FILE_OVERWRITE if a new file is created
319
or an old overwritten;
320
OS_FILE_OPEN_RAW, if a raw device or disk
321
partition should be opened */
322
ulint purpose,/* in: OS_FILE_AIO, if asynchronous,
323
non-buffered i/o is desired,
324
OS_FILE_NORMAL, if any normal file;
325
NOTE that it also depends on type, os_aio_..
326
and srv_.. variables whether we really use
327
async i/o or unbuffered i/o: look in the
328
function source code for the exact rules */
329
ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
330
ibool* success);/* out: TRUE if succeed, FALSE if error */
331
/***************************************************************************
332
Deletes a file. The file has to be closed before calling this. */
337
/* out: TRUE if success */
338
const char* name); /* in: file path as a null-terminated string */
340
/***************************************************************************
341
Deletes a file if it exists. The file has to be closed before calling this. */
344
os_file_delete_if_exists(
345
/*=====================*/
346
/* out: TRUE if success */
347
const char* name); /* in: file path as a null-terminated string */
348
/***************************************************************************
349
Renames a file (can also move it to another directory). It is safest that the
350
file is closed before calling this function. */
355
/* out: TRUE if success */
356
const char* oldpath, /* in: old file path as a
357
null-terminated string */
358
const char* newpath); /* in: new file path */
359
/***************************************************************************
360
Closes a file handle. In case of error, error number can be retrieved with
361
os_file_get_last_error. */
366
/* out: TRUE if success */
367
os_file_t file); /* in, own: handle to a file */
368
/***************************************************************************
369
Closes a file handle. */
372
os_file_close_no_error_handling(
373
/*============================*/
374
/* out: TRUE if success */
375
os_file_t file); /* in, own: handle to a file */
376
/***************************************************************************
382
/* out: TRUE if success */
383
os_file_t file, /* in: handle to a file */
384
ulint* size, /* out: least significant 32 bits of file
386
ulint* size_high);/* out: most significant 32 bits of size */
387
/***************************************************************************
388
Gets file size as a 64-bit integer ib_int64_t. */
391
os_file_get_size_as_iblonglong(
392
/*===========================*/
393
/* out: size in bytes, -1 if error */
394
os_file_t file); /* in: handle to a file */
395
/***************************************************************************
396
Write the specified number of zeros to a newly created file. */
401
/* out: TRUE if success */
402
const char* name, /* in: name of the file or path as a
403
null-terminated string */
404
os_file_t file, /* in: handle to a file */
405
ulint size, /* in: least significant 32 bits of file
407
ulint size_high);/* in: most significant 32 bits of size */
408
/***************************************************************************
409
Truncates a file at its current position. */
414
/* out: TRUE if success */
415
FILE* file); /* in: file to be truncated */
416
/***************************************************************************
417
Flushes the write buffers of a given file to the disk. */
422
/* out: TRUE if success */
423
os_file_t file); /* in, own: handle to a file */
424
/***************************************************************************
425
Retrieves the last error number if an error occurs in a file io function.
426
The number should be retrieved before any other OS calls (because they may
427
overwrite the error number). If the number is not known to this program,
428
the OS error number + 100 is returned. */
431
os_file_get_last_error(
432
/*===================*/
433
/* out: error number, or OS error
435
ibool report_all_errors); /* in: TRUE if we want an error message
436
printed of all errors */
437
/***********************************************************************
438
Requests a synchronous read operation. */
443
/* out: TRUE if request was
444
successful, FALSE if fail */
445
os_file_t file, /* in: handle to a file */
446
void* buf, /* in: buffer where to read */
447
ulint offset, /* in: least significant 32 bits of file
448
offset where to read */
449
ulint offset_high,/* in: most significant 32 bits of
451
ulint n); /* in: number of bytes to read */
452
/***********************************************************************
453
Rewind file to its start, read at most size - 1 bytes from it to str, and
454
NUL-terminate str. All errors are silently ignored. This function is
455
mostly meant to be used with temporary files. */
460
FILE* file, /* in: file to read from */
461
char* str, /* in: buffer where to read */
462
ulint size); /* in: size of buffer */
463
/***********************************************************************
464
Requests a synchronous positioned read operation. This function does not do
465
any error handling. In case of error it returns FALSE. */
468
os_file_read_no_error_handling(
469
/*===========================*/
470
/* out: TRUE if request was
471
successful, FALSE if fail */
472
os_file_t file, /* in: handle to a file */
473
void* buf, /* in: buffer where to read */
474
ulint offset, /* in: least significant 32 bits of file
475
offset where to read */
476
ulint offset_high,/* in: most significant 32 bits of
478
ulint n); /* in: number of bytes to read */
480
/***********************************************************************
481
Requests a synchronous write operation. */
486
/* out: TRUE if request was
487
successful, FALSE if fail */
488
const char* name, /* in: name of the file or path as a
489
null-terminated string */
490
os_file_t file, /* in: handle to a file */
491
const void* buf, /* in: buffer from which to write */
492
ulint offset, /* in: least significant 32 bits of file
493
offset where to write */
494
ulint offset_high,/* in: most significant 32 bits of
496
ulint n); /* in: number of bytes to write */
497
/***********************************************************************
498
Check the existence and type of the given file. */
503
/* out: TRUE if call succeeded */
504
const char* path, /* in: pathname of the file */
505
ibool* exists, /* out: TRUE if file exists */
506
os_file_type_t* type); /* out: type of the file (if it exists) */
507
/********************************************************************
508
The function os_file_dirname returns a directory component of a
509
null-terminated pathname string. In the usual case, dirname returns
510
the string up to, but not including, the final '/', and basename
511
is the component following the final '/'. Trailing '/' charac�
512
ters are not counted as part of the pathname.
514
If path does not contain a slash, dirname returns the string ".".
516
Concatenating the string returned by dirname, a "/", and the basename
517
yields a complete pathname.
519
The return value is a copy of the directory component of the pathname.
520
The copy is allocated from heap. It is the caller responsibility
521
to free it after it is no longer needed.
523
The following list of examples (taken from SUSv2) shows the strings
524
returned by dirname and basename for different paths:
526
path dirname basename
527
"/usr/lib" "/usr" "lib"
538
/* out, own: directory component of the
540
const char* path); /* in: pathname */
541
/********************************************************************
542
Creates all missing subdirectories along the given path. */
545
os_file_create_subdirs_if_needed(
546
/*=============================*/
547
/* out: TRUE if call succeeded
549
const char* path); /* in: path name */
550
/****************************************************************************
551
Initializes the asynchronous io system. Creates separate aio array for
552
non-ibuf read and write, a third aio array for the ibuf i/o, with just one
553
segment, two aio arrays for log reads and writes with one segment, and a
554
synchronous aio array of the specified size. The combined number of segments
555
in the three first aio arrays is the parameter n_segments given to the
556
function. The caller must create an i/o handler thread for each segment in
557
the four first arrays, but not for the sync aio array. */
562
ulint n, /* in: maximum number of pending aio operations
563
allowed; n must be divisible by n_segments */
564
ulint n_segments, /* in: combined number of segments in the four
565
first aio arrays; must be >= 4 */
566
ulint n_slots_sync); /* in: number of slots in the sync aio array */
567
/***********************************************************************
568
Requests an asynchronous i/o operation. */
573
/* out: TRUE if request was queued
574
successfully, FALSE if fail */
575
ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */
576
ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed
577
to OS_AIO_SIMULATED_WAKE_LATER: the
578
last flag advises this function not to wake
579
i/o-handler threads, but the caller will
580
do the waking explicitly later, in this
581
way the caller can post several requests in
582
a batch; NOTE that the batch must not be
583
so big that it exhausts the slots in aio
584
arrays! NOTE that a simulated batch
585
may introduce hidden chances of deadlocks,
586
because i/os are not actually handled until
587
all have been posted: use with great
589
const char* name, /* in: name of the file or path as a
590
null-terminated string */
591
os_file_t file, /* in: handle to a file */
592
void* buf, /* in: buffer where to read or from which
594
ulint offset, /* in: least significant 32 bits of file
595
offset where to read or write */
596
ulint offset_high, /* in: most significant 32 bits of
598
ulint n, /* in: number of bytes to read or write */
599
fil_node_t* message1,/* in: messages for the aio handler (these
600
can be used to identify a completed aio
601
operation); if mode is OS_AIO_SYNC, these
604
/****************************************************************************
605
Wakes up all async i/o threads so that they know to exit themselves in
609
os_aio_wake_all_threads_at_shutdown(void);
610
/*=====================================*/
611
/****************************************************************************
612
Waits until there are no pending writes in os_aio_write_array. There can
613
be other, synchronous, pending writes. */
616
os_aio_wait_until_no_pending_writes(void);
617
/*=====================================*/
618
/**************************************************************************
619
Wakes up simulated aio i/o-handler threads if they have something to do. */
622
os_aio_simulated_wake_handler_threads(void);
623
/*=======================================*/
624
/**************************************************************************
625
This function can be called if one wants to post a batch of reads and
626
prefers an i/o-handler thread to handle them all at once later. You must
627
call os_aio_simulated_wake_handler_threads later to ensure the threads
628
are not left sleeping! */
631
os_aio_simulated_put_read_threads_to_sleep(void);
632
/*============================================*/
635
/**************************************************************************
636
This function is only used in Windows asynchronous i/o.
637
Waits for an aio operation to complete. This function is used to wait the
638
for completed requests. The aio array of pending requests is divided
639
into segments. The thread specifies which segment or slot it wants to wait
640
for. NOTE: this function will also take care of freeing the aio slot,
641
therefore no other thread is allowed to do the freeing! */
644
os_aio_windows_handle(
645
/*==================*/
646
/* out: TRUE if the aio operation succeeded */
647
ulint segment, /* in: the number of the segment in the aio
648
arrays to wait for; segment 0 is the ibuf
649
i/o thread, segment 1 the log i/o thread,
650
then follow the non-ibuf read threads, and as
651
the last are the non-ibuf write threads; if
652
this is ULINT_UNDEFINED, then it means that
653
sync aio is used, and this parameter is
655
ulint pos, /* this parameter is used only in sync aio:
656
wait for the aio slot at this position */
657
fil_node_t**message1, /* out: the messages passed with the aio
658
request; note that also in the case where
659
the aio operation failed, these output
660
parameters are valid and can be used to
661
restart the operation, for example */
663
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
666
/* Currently we do not use Posix async i/o */
667
#ifdef POSIX_ASYNC_IO
668
/**************************************************************************
669
This function is only used in Posix asynchronous i/o. Waits for an aio
670
operation to complete. */
675
/* out: TRUE if the aio operation succeeded */
676
ulint array_no, /* in: array number 0 - 3 */
677
fil_node_t**message1, /* out: the messages passed with the aio
678
request; note that also in the case where
679
the aio operation failed, these output
680
parameters are valid and can be used to
681
restart the operation, for example */
684
/**************************************************************************
685
Does simulated aio. This function should be called by an i/o-handler
689
os_aio_simulated_handle(
690
/*====================*/
691
/* out: TRUE if the aio operation succeeded */
692
ulint segment, /* in: the number of the segment in the aio
693
arrays to wait for; segment 0 is the ibuf
694
i/o thread, segment 1 the log i/o thread,
695
then follow the non-ibuf read threads, and as
696
the last are the non-ibuf write threads */
697
fil_node_t**message1, /* out: the messages passed with the aio
698
request; note that also in the case where
699
the aio operation failed, these output
700
parameters are valid and can be used to
701
restart the operation, for example */
703
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
704
/**************************************************************************
705
Validates the consistency of the aio system. */
708
os_aio_validate(void);
709
/*=================*/
710
/* out: TRUE if ok */
711
/**************************************************************************
712
Prints info of the aio arrays. */
717
FILE* file); /* in: file where to print */
718
/**************************************************************************
719
Refreshes the statistics used to print per-second averages. */
722
os_aio_refresh_stats(void);
723
/*======================*/
726
/**************************************************************************
727
Checks that all slots in the system have been freed, that is, there are
728
no pending io operations. */
731
os_aio_all_slots_free(void);
732
/*=======================*/
733
#endif /* UNIV_DEBUG */
735
/***********************************************************************
736
This function returns information about the specified file */
743
const char* path, /* in: pathname of the file */
744
os_file_stat_t* stat_info); /* information of a file in a
747
#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__)
748
/*************************************************************************
749
Creates a temporary file that will be deleted on close.
750
This function is defined in ha_innodb.cc. */
753
innobase_mysql_tmpfile(void);
754
/*========================*/
755
/* out: temporary file descriptor, or < 0 on error */
756
#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */