1
/*****************************************************************************
3
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
Place, Suite 330, Boston, MA 02111-1307 USA
17
*****************************************************************************/
18
/***********************************************************************
20
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
21
Copyright (c) 2009, Percona Inc.
23
Portions of this file contain modifications contributed and copyrighted
24
by Percona Inc.. Those modifications are
25
gratefully acknowledged and are described briefly in the InnoDB
26
documentation. The contributions by Percona Inc. are incorporated with
27
their permission, and subject to the conditions contained in the file
30
This program is free software; you can redistribute it and/or modify it
31
under the terms of the GNU General Public License as published by the
32
Free Software Foundation; version 2 of the License.
34
This program is distributed in the hope that it will be useful, but
35
WITHOUT ANY WARRANTY; without even the implied warranty of
36
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
37
Public License for more details.
39
You should have received a copy of the GNU General Public License along
40
with this program; if not, write to the Free Software Foundation, Inc.,
41
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
43
***********************************************************************/
45
/**************************************************//**
47
The interface to the operating system file i/o primitives
49
Created 10/21/1995 Heikki Tuuri
50
*******************************************************/
55
#include "srv0start.h"
62
#ifndef UNIV_HOTBACKUP
64
# include "os0thread.h"
65
#else /* !UNIV_HOTBACKUP */
67
/* Add includes for the _stat() call to compile on Windows */
68
# include <sys/types.h>
69
# include <sys/stat.h>
71
#endif /* !UNIV_HOTBACKUP */
73
/* This specifies the file permissions InnoDB uses when it creates files in
74
Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
78
/** Umask for creating files */
79
UNIV_INTERN ulint os_innodb_umask
80
= S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
82
/** Umask for creating files */
83
UNIV_INTERN ulint os_innodb_umask = 0;
87
/* If the following is set to TRUE, we do not call os_file_flush in every
88
os_file_write. We can set this TRUE when the doublewrite buffer is used. */
89
UNIV_INTERN ibool os_do_not_call_flush_at_each_write = FALSE;
91
/* We do not call os_file_flush in every os_file_write. */
92
#endif /* UNIV_DO_FLUSH */
94
#ifndef UNIV_HOTBACKUP
95
/* We use these mutexes to protect lseek + file i/o operation, if the
96
OS does not provide an atomic pread or pwrite, or similar */
97
#define OS_FILE_N_SEEK_MUTEXES 16
98
UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
100
/* In simulated aio, merge at most this many consecutive i/os */
101
#define OS_AIO_MERGE_N_CONSECUTIVE 64
103
/** If this flag is TRUE, then we will use the native aio of the
104
OS (provided we compiled Innobase with it in), otherwise we will
105
use simulated aio we build below with threads */
107
UNIV_INTERN ibool os_aio_use_native_aio = FALSE;
109
/** Flag: enable debug printout for asynchronous i/o */
110
UNIV_INTERN ibool os_aio_print_debug = FALSE;
112
/** The asynchronous i/o array slot structure */
113
typedef struct os_aio_slot_struct os_aio_slot_t;
115
/** The asynchronous i/o array slot structure */
116
struct os_aio_slot_struct{
117
ibool is_read; /*!< TRUE if a read operation */
118
ulint pos; /*!< index of the slot in the aio
120
ibool reserved; /*!< TRUE if this slot is reserved */
121
time_t reservation_time;/*!< time when reserved */
122
ulint len; /*!< length of the block to read or
124
byte* buf; /*!< buffer used in i/o */
125
ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
126
ulint offset; /*!< 32 low bits of file offset in
128
ulint offset_high; /*!< 32 high bits of file offset */
129
os_file_t file; /*!< file where to read or write */
130
const char* name; /*!< file name or path */
131
ibool io_already_done;/*!< used only in simulated aio:
132
TRUE if the physical i/o already
133
made and only the slot message
134
needs to be passed to the caller
135
of os_aio_simulated_handle */
136
fil_node_t* message1; /*!< message which is given by the */
137
void* message2; /*!< the requester of an aio operation
138
and which can be used to identify
139
which pending aio operation was
142
os_event_t event; /*!< event object we need in the
144
OVERLAPPED control; /*!< Windows control block for the
149
/** The asynchronous i/o array structure */
150
typedef struct os_aio_array_struct os_aio_array_t;
152
/** The asynchronous i/o array structure */
153
struct os_aio_array_struct{
154
os_mutex_t mutex; /*!< the mutex protecting the aio array */
156
/*!< The event which is set to the
157
signaled state when there is space in
158
the aio outside the ibuf segment */
160
/*!< The event which is set to the
161
signaled state when there are no
162
pending i/os in this array */
163
ulint n_slots;/*!< Total number of slots in the aio
164
array. This must be divisible by
167
/*!< Number of segments in the aio
168
array of pending aio requests. A
169
thread can wait separately for any one
172
/*!< Number of reserved slots in the
173
aio array outside the ibuf segment */
174
os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
176
os_native_event_t* native_events;
177
/*!< Pointer to an array of OS native
178
event handles where we copied the
179
handles from slots, in the same
180
order. This can be used in
181
WaitForMultipleObjects; used only in
186
/** Array of events used in simulated aio */
187
static os_event_t* os_aio_segment_wait_events = NULL;
189
/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
190
are NULL when the module has not yet been initialized. @{ */
191
static os_aio_array_t* os_aio_read_array = NULL; /*!< Reads */
192
static os_aio_array_t* os_aio_write_array = NULL; /*!< Writes */
193
static os_aio_array_t* os_aio_ibuf_array = NULL; /*!< Insert buffer */
194
static os_aio_array_t* os_aio_log_array = NULL; /*!< Redo log */
195
static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */
198
/** Number of asynchronous I/O segments. Set by os_aio_init(). */
199
static ulint os_aio_n_segments = ULINT_UNDEFINED;
201
/** If the following is TRUE, read i/o handler threads try to
202
wait until a batch of new read requests have been posted */
203
static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
204
#endif /* !UNIV_HOTBACKUP */
206
UNIV_INTERN ulint os_n_file_reads = 0;
207
UNIV_INTERN ulint os_bytes_read_since_printout = 0;
208
UNIV_INTERN ulint os_n_file_writes = 0;
209
UNIV_INTERN ulint os_n_fsyncs = 0;
210
UNIV_INTERN ulint os_n_file_reads_old = 0;
211
UNIV_INTERN ulint os_n_file_writes_old = 0;
212
UNIV_INTERN ulint os_n_fsyncs_old = 0;
213
UNIV_INTERN time_t os_last_printout;
215
UNIV_INTERN ibool os_has_said_disk_full = FALSE;
217
#ifndef UNIV_HOTBACKUP
218
/** The mutex protecting the following counts of pending I/O operations */
219
static os_mutex_t os_file_count_mutex;
220
#endif /* !UNIV_HOTBACKUP */
221
/** Number of pending os_file_pread() operations */
222
UNIV_INTERN ulint os_file_n_pending_preads = 0;
223
/** Number of pending os_file_pwrite() operations */
224
UNIV_INTERN ulint os_file_n_pending_pwrites = 0;
225
/** Number of pending write operations */
226
UNIV_INTERN ulint os_n_pending_writes = 0;
227
/** Number of pending read operations */
228
UNIV_INTERN ulint os_n_pending_reads = 0;
230
/***********************************************************************//**
231
Gets the operating system version. Currently works only on Windows.
232
@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */
235
os_get_os_version(void)
236
/*===================*/
239
OSVERSIONINFO os_info;
241
os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
243
ut_a(GetVersionEx(&os_info));
245
if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
247
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
249
} else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
250
if (os_info.dwMajorVersion <= 4) {
266
/***********************************************************************//**
267
Retrieves the last error number if an error occurs in a file io function.
268
The number should be retrieved before any other OS calls (because they may
269
overwrite the error number). If the number is not known to this program,
270
the OS error number + 100 is returned.
271
@return error number, or OS error number + 100 */
274
os_file_get_last_error(
275
/*===================*/
276
ibool report_all_errors) /*!< in: TRUE if we want an error message
277
printed of all errors */
283
err = (ulint) GetLastError();
285
if (report_all_errors
286
|| (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
288
ut_print_timestamp(stderr);
290
" InnoDB: Operating system error number %lu"
291
" in a file operation.\n", (ulong) err);
293
if (err == ERROR_PATH_NOT_FOUND) {
295
"InnoDB: The error means the system"
296
" cannot find the path specified.\n");
298
if (srv_is_being_started) {
300
"InnoDB: If you are installing InnoDB,"
301
" remember that you must create\n"
302
"InnoDB: directories yourself, InnoDB"
303
" does not create them.\n");
305
} else if (err == ERROR_ACCESS_DENIED) {
307
"InnoDB: The error means mysqld does not have"
308
" the access rights to\n"
309
"InnoDB: the directory. It may also be"
310
" you have created a subdirectory\n"
311
"InnoDB: of the same name as a data file.\n");
312
} else if (err == ERROR_SHARING_VIOLATION
313
|| err == ERROR_LOCK_VIOLATION) {
315
"InnoDB: The error means that another program"
316
" is using InnoDB's files.\n"
317
"InnoDB: This might be a backup or antivirus"
318
" software or another instance\n"
320
" Please close it to get rid of this error.\n");
323
"InnoDB: Some operating system error numbers"
324
" are described at\n"
327
"operating-system-error-codes.html\n");
333
if (err == ERROR_FILE_NOT_FOUND) {
334
return(OS_FILE_NOT_FOUND);
335
} else if (err == ERROR_DISK_FULL) {
336
return(OS_FILE_DISK_FULL);
337
} else if (err == ERROR_FILE_EXISTS) {
338
return(OS_FILE_ALREADY_EXISTS);
339
} else if (err == ERROR_SHARING_VIOLATION
340
|| err == ERROR_LOCK_VIOLATION) {
341
return(OS_FILE_SHARING_VIOLATION);
348
if (report_all_errors
349
|| (err != ENOSPC && err != EEXIST)) {
351
ut_print_timestamp(stderr);
353
" InnoDB: Operating system error number %lu"
354
" in a file operation.\n", (ulong) err);
358
"InnoDB: The error means the system"
359
" cannot find the path specified.\n");
361
if (srv_is_being_started) {
363
"InnoDB: If you are installing InnoDB,"
364
" remember that you must create\n"
365
"InnoDB: directories yourself, InnoDB"
366
" does not create them.\n");
368
} else if (err == EACCES) {
370
"InnoDB: The error means mysqld does not have"
371
" the access rights to\n"
372
"InnoDB: the directory.\n");
374
if (strerror((int)err) != NULL) {
376
"InnoDB: Error number %lu"
378
err, strerror((int)err));
382
"InnoDB: Some operating system"
383
" error numbers are described at\n"
386
"operating-system-error-codes.html\n");
393
return(OS_FILE_DISK_FULL);
394
} else if (err == ENOENT) {
395
return(OS_FILE_NOT_FOUND);
396
} else if (err == EEXIST) {
397
return(OS_FILE_ALREADY_EXISTS);
398
} else if (err == EXDEV || err == ENOTDIR || err == EISDIR) {
399
return(OS_FILE_PATH_ERROR);
406
/****************************************************************//**
407
Does error handling when a file operation fails.
408
Conditionally exits (calling exit(3)) based on should_exit value and the
410
@return TRUE if we should retry the operation */
413
os_file_handle_error_cond_exit(
414
/*===========================*/
415
const char* name, /*!< in: name of a file or NULL */
416
const char* operation, /*!< in: operation */
417
ibool should_exit) /*!< in: call exit(3) if unknown error
418
and this parameter is TRUE */
422
err = os_file_get_last_error(FALSE);
424
if (err == OS_FILE_DISK_FULL) {
425
/* We only print a warning about disk full once */
427
if (os_has_said_disk_full) {
433
ut_print_timestamp(stderr);
435
" InnoDB: Encountered a problem with"
439
ut_print_timestamp(stderr);
441
" InnoDB: Disk is full. Try to clean the disk"
442
" to free space.\n");
444
os_has_said_disk_full = TRUE;
449
} else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
452
} else if (err == OS_FILE_ALREADY_EXISTS
453
|| err == OS_FILE_PATH_ERROR) {
456
} else if (err == OS_FILE_SHARING_VIOLATION) {
458
os_thread_sleep(10000000); /* 10 sec */
462
fprintf(stderr, "InnoDB: File name %s\n", name);
465
fprintf(stderr, "InnoDB: File operation call: '%s'.\n",
469
fprintf(stderr, "InnoDB: Cannot continue operation.\n");
480
/****************************************************************//**
481
Does error handling when a file operation fails.
482
@return TRUE if we should retry the operation */
485
os_file_handle_error(
486
/*=================*/
487
const char* name, /*!< in: name of a file or NULL */
488
const char* operation)/*!< in: operation */
490
/* exit in case of unknown error */
491
return(os_file_handle_error_cond_exit(name, operation, TRUE));
494
/****************************************************************//**
495
Does error handling when a file operation fails.
496
@return TRUE if we should retry the operation */
499
os_file_handle_error_no_exit(
500
/*=========================*/
501
const char* name, /*!< in: name of a file or NULL */
502
const char* operation)/*!< in: operation */
504
/* don't exit in case of unknown error */
505
return(os_file_handle_error_cond_exit(name, operation, FALSE));
509
#define USE_FILE_LOCK
510
#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__)
511
/* InnoDB Hot Backup does not lock the data files.
512
* On Windows, mandatory locking is used.
514
# undef USE_FILE_LOCK
517
/****************************************************************//**
518
Obtain an exclusive lock on a file.
519
@return 0 on success */
524
int fd, /*!< in: file descriptor */
525
const char* name) /*!< in: file name */
529
lk.l_whence = SEEK_SET;
530
lk.l_start = lk.l_len = 0;
531
if (fcntl(fd, F_SETLK, &lk) == -1) {
533
"InnoDB: Unable to lock %s, error: %d\n", name, errno);
535
if (errno == EAGAIN || errno == EACCES) {
537
"InnoDB: Check that you do not already have"
538
" another mysqld process\n"
539
"InnoDB: using the same InnoDB data"
548
#endif /* USE_FILE_LOCK */
550
#ifndef UNIV_HOTBACKUP
551
/****************************************************************//**
552
Creates the seek mutexes used in positioned reads and writes. */
555
os_io_init_simple(void)
556
/*===================*/
560
os_file_count_mutex = os_mutex_create(NULL);
562
for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
563
os_file_seek_mutexes[i] = os_mutex_create(NULL);
567
/***********************************************************************//**
568
Creates a temporary file. This function is like tmpfile(3), but
569
the temporary file is created in the MySQL temporary directory.
570
On Netware, this function is like tmpfile(3), because the C run-time
571
library of Netware does not expose the delete-on-close flag.
572
@return temporary file handle, or NULL on error */
575
os_file_create_tmpfile(void)
576
/*========================*/
579
FILE* file = tmpfile();
580
#else /* __NETWARE__ */
582
int fd = innobase_mysql_tmpfile();
585
file = fdopen(fd, "w+b");
587
#endif /* __NETWARE__ */
590
ut_print_timestamp(stderr);
592
" InnoDB: Error: unable to create temporary file;"
593
" errno: %d\n", errno);
598
#endif /* !__NETWARE__ */
603
#endif /* !UNIV_HOTBACKUP */
605
/***********************************************************************//**
606
The os_file_opendir() function opens a directory stream corresponding to the
607
directory named by the dirname argument. The directory stream is positioned
608
at the first entry. In both Unix and Windows we automatically skip the '.'
609
and '..' items at the start of the directory listing.
610
@return directory stream, NULL if error */
615
const char* dirname, /*!< in: directory name; it must not
616
contain a trailing '\' or '/' */
617
ibool error_is_fatal) /*!< in: TRUE if we should treat an
618
error as a fatal error; if we try to
619
open symlinks then we do not wish a
620
fatal error if it happens not to be
625
LPWIN32_FIND_DATA lpFindFileData;
626
char path[OS_FILE_MAX_PATH + 3];
628
ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
630
strcpy(path, dirname);
631
strcpy(path + strlen(path), "\\*");
633
/* Note that in Windows opening the 'directory stream' also retrieves
634
the first entry in the directory. Since it is '.', that is no problem,
635
as we will skip over the '.' and '..' entries anyway. */
637
lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
639
dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
641
ut_free(lpFindFileData);
643
if (dir == INVALID_HANDLE_VALUE) {
645
if (error_is_fatal) {
646
os_file_handle_error(dirname, "opendir");
654
dir = opendir(dirname);
656
if (dir == NULL && error_is_fatal) {
657
os_file_handle_error(dirname, "opendir");
664
/***********************************************************************//**
665
Closes a directory stream.
666
@return 0 if success, -1 if failure */
671
os_file_dir_t dir) /*!< in: directory stream */
676
ret = FindClose(dir);
679
os_file_handle_error_no_exit(NULL, "closedir");
691
os_file_handle_error_no_exit(NULL, "closedir");
698
/***********************************************************************//**
699
This function returns information of the next file in the directory. We jump
700
over the '.' and '..' entries in the directory.
701
@return 0 if ok, -1 if error, 1 if at the end of the directory */
704
os_file_readdir_next_file(
705
/*======================*/
706
const char* dirname,/*!< in: directory name or path */
707
os_file_dir_t dir, /*!< in: directory stream */
708
os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
711
LPWIN32_FIND_DATA lpFindFileData;
714
lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
716
ret = FindNextFile(dir, lpFindFileData);
719
ut_a(strlen((char *) lpFindFileData->cFileName)
722
if (strcmp((char *) lpFindFileData->cFileName, ".") == 0
723
|| strcmp((char *) lpFindFileData->cFileName, "..") == 0) {
728
strcpy(info->name, (char *) lpFindFileData->cFileName);
730
info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
731
+ (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
734
if (lpFindFileData->dwFileAttributes
735
& FILE_ATTRIBUTE_REPARSE_POINT) {
736
/* TODO: test Windows symlinks */
737
/* TODO: MySQL has apparently its own symlink
738
implementation in Windows, dbname.sym can
739
redirect a database directory:
740
REFMAN "windows-symbolic-links.html" */
741
info->type = OS_FILE_TYPE_LINK;
742
} else if (lpFindFileData->dwFileAttributes
743
& FILE_ATTRIBUTE_DIRECTORY) {
744
info->type = OS_FILE_TYPE_DIR;
746
/* It is probably safest to assume that all other
747
file types are normal. Better to check them rather
748
than blindly skip them. */
750
info->type = OS_FILE_TYPE_FILE;
754
ut_free(lpFindFileData);
758
} else if (GetLastError() == ERROR_NO_MORE_FILES) {
762
os_file_handle_error_no_exit(dirname,
763
"readdir_next_file");
770
struct stat statinfo;
771
#ifdef HAVE_READDIR_R
772
char dirent_buf[sizeof(struct dirent)
773
+ _POSIX_PATH_MAX + 100];
774
/* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as
775
the max file name len; but in most standards, the
776
length is NAME_MAX; we add 100 to be even safer */
781
#ifdef HAVE_READDIR_R
782
ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
786
"InnoDB: cannot read directory %s, error %lu\n",
787
dirname, (ulong)ret);
793
/* End of directory */
798
ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
807
ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
809
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
814
strcpy(info->name, ent->d_name);
816
full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10);
818
sprintf(full_path, "%s/%s", dirname, ent->d_name);
820
ret = stat(full_path, &statinfo);
823
os_file_handle_error_no_exit(full_path, "stat");
830
info->size = (ib_int64_t)statinfo.st_size;
832
if (S_ISDIR(statinfo.st_mode)) {
833
info->type = OS_FILE_TYPE_DIR;
834
} else if (S_ISLNK(statinfo.st_mode)) {
835
info->type = OS_FILE_TYPE_LINK;
836
} else if (S_ISREG(statinfo.st_mode)) {
837
info->type = OS_FILE_TYPE_FILE;
839
info->type = OS_FILE_TYPE_UNKNOWN;
848
/*****************************************************************//**
849
This function attempts to create a directory named pathname. The new directory
850
gets default permissions. On Unix the permissions are (0770 & ~umask). If the
851
directory exists already, nothing is done and the call succeeds, unless the
852
fail_if_exists arguments is true.
853
@return TRUE if call succeeds, FALSE on error */
856
os_file_create_directory(
857
/*=====================*/
858
const char* pathname, /*!< in: directory name as
859
null-terminated string */
860
ibool fail_if_exists) /*!< in: if TRUE, pre-existing directory
861
is treated as an error. */
866
rcode = CreateDirectory((LPCTSTR) pathname, NULL);
868
|| (GetLastError() == ERROR_ALREADY_EXISTS
869
&& !fail_if_exists))) {
871
os_file_handle_error(pathname, "CreateDirectory");
880
rcode = mkdir(pathname, 0770);
882
if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
884
os_file_handle_error(pathname, "mkdir");
893
/****************************************************************//**
894
A simple function to open or create a file.
895
@return own: handle to the file, not defined if error, error number
896
can be retrieved with os_file_get_last_error */
899
os_file_create_simple(
900
/*==================*/
901
const char* name, /*!< in: name of the file or path as a
902
null-terminated string */
903
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
904
opened (if does not exist, error), or
905
OS_FILE_CREATE if a new file is created
906
(if exists, error), or
907
OS_FILE_CREATE_PATH if new file
908
(if exists, error) and subdirectories along
909
its path are created (if needed)*/
910
ulint access_type,/*!< in: OS_FILE_READ_ONLY or
911
OS_FILE_READ_WRITE */
912
ibool* success)/*!< out: TRUE if succeed, FALSE if error */
918
DWORD attributes = 0;
924
if (create_mode == OS_FILE_OPEN) {
925
create_flag = OPEN_EXISTING;
926
} else if (create_mode == OS_FILE_CREATE) {
927
create_flag = CREATE_NEW;
928
} else if (create_mode == OS_FILE_CREATE_PATH) {
929
/* create subdirs along the path if needed */
930
*success = os_file_create_subdirs_if_needed(name);
934
create_flag = CREATE_NEW;
935
create_mode = OS_FILE_CREATE;
941
if (access_type == OS_FILE_READ_ONLY) {
942
access = GENERIC_READ;
943
} else if (access_type == OS_FILE_READ_WRITE) {
944
access = GENERIC_READ | GENERIC_WRITE;
950
file = CreateFile((LPCTSTR) name,
952
FILE_SHARE_READ | FILE_SHARE_WRITE,
953
/* file can be read and written also
954
by other processes */
955
NULL, /* default security attributes */
958
NULL); /*!< no template file */
960
if (file == INVALID_HANDLE_VALUE) {
963
retry = os_file_handle_error(name,
964
create_mode == OS_FILE_OPEN ?
982
if (create_mode == OS_FILE_OPEN) {
983
if (access_type == OS_FILE_READ_ONLY) {
984
create_flag = O_RDONLY;
986
create_flag = O_RDWR;
988
} else if (create_mode == OS_FILE_CREATE) {
989
create_flag = O_RDWR | O_CREAT | O_EXCL;
990
} else if (create_mode == OS_FILE_CREATE_PATH) {
991
/* create subdirs along the path if needed */
992
*success = os_file_create_subdirs_if_needed(name);
996
create_flag = O_RDWR | O_CREAT | O_EXCL;
997
create_mode = OS_FILE_CREATE;
1003
if (create_mode == OS_FILE_CREATE) {
1004
file = open(name, create_flag, S_IRUSR | S_IWUSR
1005
| S_IRGRP | S_IWGRP);
1007
file = open(name, create_flag);
1013
retry = os_file_handle_error(name,
1014
create_mode == OS_FILE_OPEN ?
1019
#ifdef USE_FILE_LOCK
1020
} else if (access_type == OS_FILE_READ_WRITE
1021
&& os_file_lock(file, name)) {
1031
#endif /* __WIN__ */
1034
/****************************************************************//**
1035
A simple function to open or create a file.
1036
@return own: handle to the file, not defined if error, error number
1037
can be retrieved with os_file_get_last_error */
1040
os_file_create_simple_no_error_handling(
1041
/*====================================*/
1042
const char* name, /*!< in: name of the file or path as a
1043
null-terminated string */
1044
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
1045
is opened (if does not exist, error), or
1046
OS_FILE_CREATE if a new file is created
1047
(if exists, error) */
1048
ulint access_type,/*!< in: OS_FILE_READ_ONLY,
1049
OS_FILE_READ_WRITE, or
1050
OS_FILE_READ_ALLOW_DELETE; the last option is
1051
used by a backup program reading the file */
1052
ibool* success)/*!< out: TRUE if succeed, FALSE if error */
1058
DWORD attributes = 0;
1059
DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
1063
if (create_mode == OS_FILE_OPEN) {
1064
create_flag = OPEN_EXISTING;
1065
} else if (create_mode == OS_FILE_CREATE) {
1066
create_flag = CREATE_NEW;
1072
if (access_type == OS_FILE_READ_ONLY) {
1073
access = GENERIC_READ;
1074
} else if (access_type == OS_FILE_READ_WRITE) {
1075
access = GENERIC_READ | GENERIC_WRITE;
1076
} else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
1077
access = GENERIC_READ;
1078
share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
1079
| FILE_SHARE_WRITE; /*!< A backup program has to give
1080
mysqld the maximum freedom to
1081
do what it likes with the
1088
file = CreateFile((LPCTSTR) name,
1091
NULL, /* default security attributes */
1094
NULL); /*!< no template file */
1096
if (file == INVALID_HANDLE_VALUE) {
1109
if (create_mode == OS_FILE_OPEN) {
1110
if (access_type == OS_FILE_READ_ONLY) {
1111
create_flag = O_RDONLY;
1113
create_flag = O_RDWR;
1115
} else if (create_mode == OS_FILE_CREATE) {
1116
create_flag = O_RDWR | O_CREAT | O_EXCL;
1122
if (create_mode == OS_FILE_CREATE) {
1123
file = open(name, create_flag, S_IRUSR | S_IWUSR
1124
| S_IRGRP | S_IWGRP);
1126
file = open(name, create_flag);
1131
#ifdef USE_FILE_LOCK
1132
} else if (access_type == OS_FILE_READ_WRITE
1133
&& os_file_lock(file, name)) {
1143
#endif /* __WIN__ */
1146
/****************************************************************//**
1147
Tries to disable OS caching on an opened file descriptor. */
1150
os_file_set_nocache(
1151
/*================*/
1152
int fd, /*!< in: file descriptor to alter */
1153
const char* file_name, /*!< in: file name, used in the
1154
diagnostic message */
1155
const char* operation_name) /*!< in: "open" or "create"; used in the
1156
diagnostic message */
1158
/* some versions of Solaris may not have DIRECTIO_ON */
1159
#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
1160
if (directio(fd, DIRECTIO_ON) == -1) {
1162
errno_save = (int)errno;
1163
ut_print_timestamp(stderr);
1165
" InnoDB: Failed to set DIRECTIO_ON "
1166
"on file %s: %s: %s, continuing anyway\n",
1167
file_name, operation_name, strerror(errno_save));
1169
#elif defined(O_DIRECT)
1170
if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
1172
errno_save = (int)errno;
1173
ut_print_timestamp(stderr);
1175
" InnoDB: Failed to set O_DIRECT "
1176
"on file %s: %s: %s, continuing anyway\n",
1177
file_name, operation_name, strerror(errno_save));
1178
if (errno_save == EINVAL) {
1179
ut_print_timestamp(stderr);
1181
" InnoDB: O_DIRECT is known to result in "
1182
"'Invalid argument' on Linux on tmpfs, "
1183
"see MySQL Bug#26662\n");
1186
#else /* Required for OSX */
1189
(void)operation_name;
1193
/****************************************************************//**
1194
Opens an existing file or creates a new.
1195
@return own: handle to the file, not defined if error, error number
1196
can be retrieved with os_file_get_last_error */
1201
const char* name, /*!< in: name of the file or path as a
1202
null-terminated string */
1203
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
1204
is opened (if does not exist, error), or
1205
OS_FILE_CREATE if a new file is created
1207
OS_FILE_OVERWRITE if a new file is created
1208
or an old overwritten;
1209
OS_FILE_OPEN_RAW, if a raw device or disk
1210
partition should be opened */
1211
ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
1212
non-buffered i/o is desired,
1213
OS_FILE_NORMAL, if any normal file;
1214
NOTE that it also depends on type, os_aio_..
1215
and srv_.. variables whether we really use
1216
async i/o or unbuffered i/o: look in the
1217
function source code for the exact rules */
1218
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
1219
ibool* success)/*!< out: TRUE if succeed, FALSE if error */
1223
DWORD share_mode = FILE_SHARE_READ;
1230
if (create_mode == OS_FILE_OPEN_RAW) {
1231
create_flag = OPEN_EXISTING;
1232
share_mode = FILE_SHARE_WRITE;
1233
} else if (create_mode == OS_FILE_OPEN
1234
|| create_mode == OS_FILE_OPEN_RETRY) {
1235
create_flag = OPEN_EXISTING;
1236
} else if (create_mode == OS_FILE_CREATE) {
1237
create_flag = CREATE_NEW;
1238
} else if (create_mode == OS_FILE_OVERWRITE) {
1239
create_flag = CREATE_ALWAYS;
1245
if (purpose == OS_FILE_AIO) {
1246
/* If specified, use asynchronous (overlapped) io and no
1247
buffering of writes in the OS */
1250
if (os_aio_use_native_aio) {
1251
attributes = attributes | FILE_FLAG_OVERLAPPED;
1254
#ifdef UNIV_NON_BUFFERED_IO
1255
if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
1256
/* Do not use unbuffered i/o to log files because
1257
value 2 denotes that we do not flush the log at every
1258
commit, but only once per second */
1259
} else if (srv_win_file_flush_method
1260
== SRV_WIN_IO_UNBUFFERED) {
1261
attributes = attributes | FILE_FLAG_NO_BUFFERING;
1264
} else if (purpose == OS_FILE_NORMAL) {
1266
#ifdef UNIV_NON_BUFFERED_IO
1267
if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
1268
/* Do not use unbuffered i/o to log files because
1269
value 2 denotes that we do not flush the log at every
1270
commit, but only once per second */
1271
} else if (srv_win_file_flush_method
1272
== SRV_WIN_IO_UNBUFFERED) {
1273
attributes = attributes | FILE_FLAG_NO_BUFFERING;
1281
file = CreateFile((LPCTSTR) name,
1282
GENERIC_READ | GENERIC_WRITE, /* read and write
1284
share_mode, /* File can be read also by other
1285
processes; we must give the read
1286
permission because of ibbackup. We do
1287
not give the write permission to
1288
others because if one would succeed to
1289
start 2 instances of mysqld on the
1290
SAME files, that could cause severe
1291
database corruption! When opening
1292
raw disk partitions, Microsoft manuals
1293
say that we must give also the write
1295
NULL, /* default security attributes */
1298
NULL); /*!< no template file */
1300
if (file == INVALID_HANDLE_VALUE) {
1303
/* When srv_file_per_table is on, file creation failure may not
1304
be critical to the whole instance. Do not crash the server in
1305
case of unknown errors. */
1306
if (srv_file_per_table) {
1307
retry = os_file_handle_error_no_exit(name,
1308
create_mode == OS_FILE_CREATE ?
1311
retry = os_file_handle_error(name,
1312
create_mode == OS_FILE_CREATE ?
1328
const char* mode_str = NULL;
1329
const char* type_str = NULL;
1330
const char* purpose_str = NULL;
1335
if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
1336
|| create_mode == OS_FILE_OPEN_RETRY) {
1338
create_flag = O_RDWR;
1339
} else if (create_mode == OS_FILE_CREATE) {
1340
mode_str = "CREATE";
1341
create_flag = O_RDWR | O_CREAT | O_EXCL;
1342
} else if (create_mode == OS_FILE_OVERWRITE) {
1343
mode_str = "OVERWRITE";
1344
create_flag = O_RDWR | O_CREAT | O_TRUNC;
1350
if (type == OS_LOG_FILE) {
1352
} else if (type == OS_DATA_FILE) {
1358
if (purpose == OS_FILE_AIO) {
1359
purpose_str = "AIO";
1360
} else if (purpose == OS_FILE_NORMAL) {
1361
purpose_str = "NORMAL";
1367
fprintf(stderr, "Opening file %s, mode %s, type %s, purpose %s\n",
1368
name, mode_str, type_str, purpose_str);
1371
/* We let O_SYNC only affect log files; note that we map O_DSYNC to
1372
O_SYNC because the datasync options seemed to corrupt files in 2001
1373
in both Linux and Solaris */
1374
if (type == OS_LOG_FILE
1375
&& srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
1378
fprintf(stderr, "Using O_SYNC for file %s\n", name);
1381
create_flag = create_flag | O_SYNC;
1385
file = open(name, create_flag, os_innodb_umask);
1390
/* When srv_file_per_table is on, file creation failure may not
1391
be critical to the whole instance. Do not crash the server in
1392
case of unknown errors. */
1393
if (srv_file_per_table) {
1394
retry = os_file_handle_error_no_exit(name,
1395
create_mode == OS_FILE_CREATE ?
1398
retry = os_file_handle_error(name,
1399
create_mode == OS_FILE_CREATE ?
1406
return(file /* -1 */);
1413
/* We disable OS caching (O_DIRECT) only on data files */
1414
if (type != OS_LOG_FILE
1415
&& srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
1417
os_file_set_nocache(file, name, mode_str);
1420
#ifdef USE_FILE_LOCK
1421
if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
1423
if (create_mode == OS_FILE_OPEN_RETRY) {
1425
ut_print_timestamp(stderr);
1426
fputs(" InnoDB: Retrying to lock"
1427
" the first data file\n",
1429
for (i = 0; i < 100; i++) {
1430
os_thread_sleep(1000000);
1431
if (!os_file_lock(file, name)) {
1436
ut_print_timestamp(stderr);
1437
fputs(" InnoDB: Unable to open the first data file\n",
1445
#endif /* USE_FILE_LOCK */
1448
#endif /* __WIN__ */
1451
/***********************************************************************//**
1452
Deletes a file if it exists. The file has to be closed before calling this.
1453
@return TRUE if success */
1456
os_file_delete_if_exists(
1457
/*=====================*/
1458
const char* name) /*!< in: file path as a null-terminated string */
1464
/* In Windows, deleting an .ibd file may fail if ibbackup is copying
1467
ret = DeleteFile((LPCTSTR)name);
1473
if (GetLastError() == ERROR_FILE_NOT_FOUND) {
1474
/* the file does not exist, this not an error */
1481
if (count > 100 && 0 == (count % 10)) {
1483
"InnoDB: Warning: cannot delete file %s\n"
1484
"InnoDB: Are you running ibbackup"
1485
" to back up the file?\n", name);
1487
os_file_get_last_error(TRUE); /* print error information */
1490
os_thread_sleep(1000000); /* sleep for a second */
1503
if (ret != 0 && errno != ENOENT) {
1504
os_file_handle_error_no_exit(name, "delete");
1513
/***********************************************************************//**
1514
Deletes a file. The file has to be closed before calling this.
1515
@return TRUE if success */
1520
const char* name) /*!< in: file path as a null-terminated string */
1526
/* In Windows, deleting an .ibd file may fail if ibbackup is copying
1529
ret = DeleteFile((LPCTSTR)name);
1535
if (GetLastError() == ERROR_FILE_NOT_FOUND) {
1536
/* If the file does not exist, we classify this as a 'mild'
1544
if (count > 100 && 0 == (count % 10)) {
1546
"InnoDB: Warning: cannot delete file %s\n"
1547
"InnoDB: Are you running ibbackup"
1548
" to back up the file?\n", name);
1550
os_file_get_last_error(TRUE); /* print error information */
1553
os_thread_sleep(1000000); /* sleep for a second */
1567
os_file_handle_error_no_exit(name, "delete");
1576
/***********************************************************************//**
1577
Renames a file (can also move it to another directory). It is safest that the
1578
file is closed before calling this function.
1579
@return TRUE if success */
1584
const char* oldpath,/*!< in: old file path as a null-terminated
1586
const char* newpath)/*!< in: new file path */
1591
ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
1597
os_file_handle_error_no_exit(oldpath, "rename");
1603
ret = rename(oldpath, newpath);
1606
os_file_handle_error_no_exit(oldpath, "rename");
1615
/***********************************************************************//**
1616
Closes a file handle. In case of error, error number can be retrieved with
1617
os_file_get_last_error.
1618
@return TRUE if success */
1623
os_file_t file) /*!< in, own: handle to a file */
1630
ret = CloseHandle(file);
1636
os_file_handle_error(NULL, "close");
1645
os_file_handle_error(NULL, "close");
1654
#ifdef UNIV_HOTBACKUP
1655
/***********************************************************************//**
1656
Closes a file handle.
1657
@return TRUE if success */
1660
os_file_close_no_error_handling(
1661
/*============================*/
1662
os_file_t file) /*!< in, own: handle to a file */
1669
ret = CloseHandle(file);
1689
#endif /* UNIV_HOTBACKUP */
1691
/***********************************************************************//**
1693
@return TRUE if success */
1698
os_file_t file, /*!< in: handle to a file */
1699
ulint* size, /*!< out: least significant 32 bits of file
1701
ulint* size_high)/*!< out: most significant 32 bits of size */
1707
low = GetFileSize(file, &high);
1709
if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
1720
offs = lseek(file, 0, SEEK_END);
1722
if (offs == ((off_t)-1)) {
1727
if (sizeof(off_t) > 4) {
1728
*size = (ulint)(offs & 0xFFFFFFFFUL);
1729
*size_high = (ulint)(offs >> 32);
1731
*size = (ulint) offs;
1739
/***********************************************************************//**
1740
Gets file size as a 64-bit integer ib_int64_t.
1741
@return size in bytes, -1 if error */
1744
os_file_get_size_as_iblonglong(
1745
/*===========================*/
1746
os_file_t file) /*!< in: handle to a file */
1752
success = os_file_get_size(file, &size, &size_high);
1759
return((((ib_int64_t)size_high) << 32) + (ib_int64_t)size);
1762
/***********************************************************************//**
1763
Write the specified number of zeros to a newly created file.
1764
@return TRUE if success */
1769
const char* name, /*!< in: name of the file or path as a
1770
null-terminated string */
1771
os_file_t file, /*!< in: handle to a file */
1772
ulint size, /*!< in: least significant 32 bits of file
1774
ulint size_high)/*!< in: most significant 32 bits of size */
1776
ib_int64_t current_size;
1777
ib_int64_t desired_size;
1783
ut_a(size == (size & 0xFFFFFFFF));
1786
desired_size = (ib_int64_t)size + (((ib_int64_t)size_high) << 32);
1788
/* Write up to 1 megabyte at a time. */
1789
buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE))
1791
buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE);
1793
/* Align the buffer for possible raw i/o */
1794
buf = ut_align(buf2, UNIV_PAGE_SIZE);
1796
/* Write buffer full of zeros */
1797
memset(buf, 0, buf_size);
1799
if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
1801
fprintf(stderr, "InnoDB: Progress in MB:");
1804
while (current_size < desired_size) {
1807
if (desired_size - current_size < (ib_int64_t) buf_size) {
1808
n_bytes = (ulint) (desired_size - current_size);
1813
ret = os_file_write(name, file, buf,
1814
(ulint)(current_size & 0xFFFFFFFF),
1815
(ulint)(current_size >> 32),
1819
goto error_handling;
1822
/* Print about progress for each 100 MB written */
1823
if ((ib_int64_t) (current_size + n_bytes) / (ib_int64_t)(100 * 1024 * 1024)
1824
!= current_size / (ib_int64_t)(100 * 1024 * 1024)) {
1826
fprintf(stderr, " %lu00",
1827
(ulong) ((current_size + n_bytes)
1828
/ (ib_int64_t)(100 * 1024 * 1024)));
1831
current_size += n_bytes;
1834
if (desired_size >= (ib_int64_t)(100 * 1024 * 1024)) {
1836
fprintf(stderr, "\n");
1841
ret = os_file_flush(file);
1851
/***********************************************************************//**
1852
Truncates a file at its current position.
1853
@return TRUE if success */
1858
FILE* file) /*!< in: file to be truncated */
1861
HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
1862
return(SetEndOfFile(h));
1864
return(!ftruncate(fileno(file), ftell(file)));
1865
#endif /* __WIN__ */
1869
/***********************************************************************//**
1870
Wrapper to fsync(2) that retries the call on some errors.
1871
Returns the value 0 if successful; otherwise the value -1 is returned and
1872
the global variable errno is set to indicate the error.
1873
@return 0 if success, -1 otherwise */
1879
os_file_t file) /*!< in: handle to a file */
1892
if (ret == -1 && errno == ENOLCK) {
1894
if (failures % 100 == 0) {
1896
ut_print_timestamp(stderr);
1898
" InnoDB: fsync(): "
1899
"No locks available; retrying\n");
1902
os_thread_sleep(200000 /* 0.2 sec */);
1915
#endif /* !__WIN__ */
1917
/***********************************************************************//**
1918
Flushes the write buffers of a given file to the disk.
1919
@return TRUE if success */
1924
os_file_t file) /*!< in, own: handle to a file */
1933
ret = FlushFileBuffers(file);
1939
/* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
1940
actually a raw device, we choose to ignore that error if we are using
1943
if (srv_start_raw_disk_in_use && GetLastError()
1944
== ERROR_INVALID_FUNCTION) {
1948
os_file_handle_error(NULL, "flush");
1950
/* It is a fatal error if a file flush does not succeed, because then
1951
the database can get corrupt on disk */
1958
#if defined(HAVE_DARWIN_THREADS)
1959
# ifndef F_FULLFSYNC
1960
/* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
1961
# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
1962
# elif F_FULLFSYNC != 51
1963
# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
1965
/* Apple has disabled fsync() for internal disk drives in OS X. That
1966
caused corruption for a user when he tested a power outage. Let us in
1967
OS X use a nonstandard flush method recommended by an Apple
1970
if (!srv_have_fullfsync) {
1971
/* If we are not on an operating system that supports this,
1972
then fall back to a plain fsync. */
1974
ret = os_file_fsync(file);
1976
ret = fcntl(file, F_FULLFSYNC, NULL);
1979
/* If we are not on a file system that supports this,
1980
then fall back to a plain fsync. */
1981
ret = os_file_fsync(file);
1985
ret = os_file_fsync(file);
1992
/* Since Linux returns EINVAL if the 'file' is actually a raw device,
1993
we choose to ignore that error if we are using raw disks */
1995
if (srv_start_raw_disk_in_use && errno == EINVAL) {
2000
ut_print_timestamp(stderr);
2003
" InnoDB: Error: the OS said file flush did not succeed\n");
2005
os_file_handle_error(NULL, "flush");
2007
/* It is a fatal error if a file flush does not succeed, because then
2008
the database can get corrupt on disk */
2016
/*******************************************************************//**
2017
Does a synchronous read operation in Posix.
2018
@return number of bytes read, -1 if error */
2023
os_file_t file, /*!< in: handle to a file */
2024
void* buf, /*!< in: buffer where to read */
2025
ulint n, /*!< in: number of bytes to read */
2026
ulint offset, /*!< in: least significant 32 bits of file
2027
offset from where to read */
2028
ulint offset_high) /*!< in: most significant 32 bits of
2034
ut_a((offset & 0xFFFFFFFFUL) == offset);
2036
/* If off_t is > 4 bytes in size, then we assume we can pass a
2039
if (sizeof(off_t) > 4) {
2040
offs = (off_t)offset + (((off_t)offset_high) << 32);
2043
offs = (off_t)offset;
2045
if (offset_high > 0) {
2047
"InnoDB: Error: file read at offset > 4 GB\n");
2053
#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
2054
os_mutex_enter(os_file_count_mutex);
2055
os_file_n_pending_preads++;
2056
os_n_pending_reads++;
2057
os_mutex_exit(os_file_count_mutex);
2059
n_bytes = pread(file, buf, (ssize_t)n, offs);
2061
os_mutex_enter(os_file_count_mutex);
2062
os_file_n_pending_preads--;
2063
os_n_pending_reads--;
2064
os_mutex_exit(os_file_count_mutex);
2073
os_mutex_enter(os_file_count_mutex);
2074
os_n_pending_reads++;
2075
os_mutex_exit(os_file_count_mutex);
2077
/* Protect the seek / read operation with a mutex */
2078
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2080
os_mutex_enter(os_file_seek_mutexes[i]);
2082
ret_offset = lseek(file, offs, SEEK_SET);
2084
if (ret_offset < 0) {
2087
ret = read(file, buf, (ssize_t)n);
2090
os_mutex_exit(os_file_seek_mutexes[i]);
2092
os_mutex_enter(os_file_count_mutex);
2093
os_n_pending_reads--;
2094
os_mutex_exit(os_file_count_mutex);
2101
/*******************************************************************//**
2102
Does a synchronous write operation in Posix.
2103
@return number of bytes written, -1 if error */
2108
os_file_t file, /*!< in: handle to a file */
2109
const void* buf, /*!< in: buffer from where to write */
2110
ulint n, /*!< in: number of bytes to write */
2111
ulint offset, /*!< in: least significant 32 bits of file
2112
offset where to write */
2113
ulint offset_high) /*!< in: most significant 32 bits of
2119
ut_a((offset & 0xFFFFFFFFUL) == offset);
2121
/* If off_t is > 4 bytes in size, then we assume we can pass a
2124
if (sizeof(off_t) > 4) {
2125
offs = (off_t)offset + (((off_t)offset_high) << 32);
2127
offs = (off_t)offset;
2129
if (offset_high > 0) {
2131
"InnoDB: Error: file write"
2132
" at offset > 4 GB\n");
2138
#if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
2139
os_mutex_enter(os_file_count_mutex);
2140
os_file_n_pending_pwrites++;
2141
os_n_pending_writes++;
2142
os_mutex_exit(os_file_count_mutex);
2144
ret = pwrite(file, buf, (ssize_t)n, offs);
2146
os_mutex_enter(os_file_count_mutex);
2147
os_file_n_pending_pwrites--;
2148
os_n_pending_writes--;
2149
os_mutex_exit(os_file_count_mutex);
2151
# ifdef UNIV_DO_FLUSH
2152
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
2153
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
2154
&& !os_do_not_call_flush_at_each_write) {
2156
/* Always do fsync to reduce the probability that when
2157
the OS crashes, a database page is only partially
2158
physically written to disk. */
2160
ut_a(TRUE == os_file_flush(file));
2162
# endif /* UNIV_DO_FLUSH */
2170
os_mutex_enter(os_file_count_mutex);
2171
os_n_pending_writes++;
2172
os_mutex_exit(os_file_count_mutex);
2174
/* Protect the seek / write operation with a mutex */
2175
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2177
os_mutex_enter(os_file_seek_mutexes[i]);
2179
ret_offset = lseek(file, offs, SEEK_SET);
2181
if (ret_offset < 0) {
2187
ret = write(file, buf, (ssize_t)n);
2189
# ifdef UNIV_DO_FLUSH
2190
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
2191
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
2192
&& !os_do_not_call_flush_at_each_write) {
2194
/* Always do fsync to reduce the probability that when
2195
the OS crashes, a database page is only partially
2196
physically written to disk. */
2198
ut_a(TRUE == os_file_flush(file));
2200
# endif /* UNIV_DO_FLUSH */
2203
os_mutex_exit(os_file_seek_mutexes[i]);
2205
os_mutex_enter(os_file_count_mutex);
2206
os_n_pending_writes--;
2207
os_mutex_exit(os_file_count_mutex);
2215
/*******************************************************************//**
2216
Requests a synchronous positioned read operation.
2217
@return TRUE if request was successful, FALSE if fail */
2222
os_file_t file, /*!< in: handle to a file */
2223
void* buf, /*!< in: buffer where to read */
2224
ulint offset, /*!< in: least significant 32 bits of file
2225
offset where to read */
2226
ulint offset_high, /*!< in: most significant 32 bits of
2228
ulint n) /*!< in: number of bytes to read */
2239
ut_a((offset & 0xFFFFFFFFUL) == offset);
2242
os_bytes_read_since_printout += n;
2249
low = (DWORD) offset;
2250
high = (DWORD) offset_high;
2252
os_mutex_enter(os_file_count_mutex);
2253
os_n_pending_reads++;
2254
os_mutex_exit(os_file_count_mutex);
2256
/* Protect the seek / read operation with a mutex */
2257
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2259
os_mutex_enter(os_file_seek_mutexes[i]);
2261
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
2263
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2265
os_mutex_exit(os_file_seek_mutexes[i]);
2267
os_mutex_enter(os_file_count_mutex);
2268
os_n_pending_reads--;
2269
os_mutex_exit(os_file_count_mutex);
2271
goto error_handling;
2274
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
2276
os_mutex_exit(os_file_seek_mutexes[i]);
2278
os_mutex_enter(os_file_count_mutex);
2279
os_n_pending_reads--;
2280
os_mutex_exit(os_file_count_mutex);
2282
if (ret && len == n) {
2289
os_bytes_read_since_printout += n;
2292
ret = os_file_pread(file, buf, n, offset, offset_high);
2294
if ((ulint)ret == n) {
2300
"InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n"
2301
"InnoDB: Was only able to read %ld.\n",
2302
(ulong)n, (ulong)offset_high,
2303
(ulong)offset, (long)ret);
2308
retry = os_file_handle_error(NULL, "read");
2315
"InnoDB: Fatal error: cannot read from file."
2316
" OS error number %lu.\n",
2318
(ulong) GetLastError()
2330
/*******************************************************************//**
2331
Requests a synchronous positioned read operation. This function does not do
2332
any error handling. In case of error it returns FALSE.
2333
@return TRUE if request was successful, FALSE if fail */
2336
os_file_read_no_error_handling(
2337
/*===========================*/
2338
os_file_t file, /*!< in: handle to a file */
2339
void* buf, /*!< in: buffer where to read */
2340
ulint offset, /*!< in: least significant 32 bits of file
2341
offset where to read */
2342
ulint offset_high, /*!< in: most significant 32 bits of
2344
ulint n) /*!< in: number of bytes to read */
2355
ut_a((offset & 0xFFFFFFFFUL) == offset);
2358
os_bytes_read_since_printout += n;
2365
low = (DWORD) offset;
2366
high = (DWORD) offset_high;
2368
os_mutex_enter(os_file_count_mutex);
2369
os_n_pending_reads++;
2370
os_mutex_exit(os_file_count_mutex);
2372
/* Protect the seek / read operation with a mutex */
2373
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2375
os_mutex_enter(os_file_seek_mutexes[i]);
2377
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
2379
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2381
os_mutex_exit(os_file_seek_mutexes[i]);
2383
os_mutex_enter(os_file_count_mutex);
2384
os_n_pending_reads--;
2385
os_mutex_exit(os_file_count_mutex);
2387
goto error_handling;
2390
ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
2392
os_mutex_exit(os_file_seek_mutexes[i]);
2394
os_mutex_enter(os_file_count_mutex);
2395
os_n_pending_reads--;
2396
os_mutex_exit(os_file_count_mutex);
2398
if (ret && len == n) {
2405
os_bytes_read_since_printout += n;
2408
ret = os_file_pread(file, buf, n, offset, offset_high);
2410
if ((ulint)ret == n) {
2418
retry = os_file_handle_error_no_exit(NULL, "read");
2427
/*******************************************************************//**
2428
Rewind file to its start, read at most size - 1 bytes from it to str, and
2429
NUL-terminate str. All errors are silently ignored. This function is
2430
mostly meant to be used with temporary files. */
2433
os_file_read_string(
2434
/*================*/
2435
FILE* file, /*!< in: file to read from */
2436
char* str, /*!< in: buffer where to read */
2437
ulint size) /*!< in: size of buffer */
2446
flen = fread(str, 1, size - 1, file);
2450
/*******************************************************************//**
2451
Requests a synchronous write operation.
2452
@return TRUE if request was successful, FALSE if fail */
2457
const char* name, /*!< in: name of the file or path as a
2458
null-terminated string */
2459
os_file_t file, /*!< in: handle to a file */
2460
const void* buf, /*!< in: buffer from which to write */
2461
ulint offset, /*!< in: least significant 32 bits of file
2462
offset where to write */
2463
ulint offset_high, /*!< in: most significant 32 bits of
2465
ulint n) /*!< in: number of bytes to write */
2474
ulint n_retries = 0;
2477
ut_a((offset & 0xFFFFFFFF) == offset);
2485
low = (DWORD) offset;
2486
high = (DWORD) offset_high;
2488
os_mutex_enter(os_file_count_mutex);
2489
os_n_pending_writes++;
2490
os_mutex_exit(os_file_count_mutex);
2492
/* Protect the seek / write operation with a mutex */
2493
i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2495
os_mutex_enter(os_file_seek_mutexes[i]);
2497
ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
2499
if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2501
os_mutex_exit(os_file_seek_mutexes[i]);
2503
os_mutex_enter(os_file_count_mutex);
2504
os_n_pending_writes--;
2505
os_mutex_exit(os_file_count_mutex);
2507
ut_print_timestamp(stderr);
2510
" InnoDB: Error: File pointer positioning to"
2511
" file %s failed at\n"
2512
"InnoDB: offset %lu %lu. Operating system"
2513
" error number %lu.\n"
2514
"InnoDB: Some operating system error numbers"
2515
" are described at\n"
2517
REFMAN "operating-system-error-codes.html\n",
2518
name, (ulong) offset_high, (ulong) offset,
2519
(ulong) GetLastError());
2524
ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
2526
/* Always do fsync to reduce the probability that when the OS crashes,
2527
a database page is only partially physically written to disk. */
2529
# ifdef UNIV_DO_FLUSH
2530
if (!os_do_not_call_flush_at_each_write) {
2531
ut_a(TRUE == os_file_flush(file));
2533
# endif /* UNIV_DO_FLUSH */
2535
os_mutex_exit(os_file_seek_mutexes[i]);
2537
os_mutex_enter(os_file_count_mutex);
2538
os_n_pending_writes--;
2539
os_mutex_exit(os_file_count_mutex);
2541
if (ret && len == n) {
2546
/* If some background file system backup tool is running, then, at
2547
least in Windows 2000, we may get here a specific error. Let us
2548
retry the operation 100 times, with 1 second waits. */
2550
if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
2552
os_thread_sleep(1000000);
2559
if (!os_has_said_disk_full) {
2561
err = (ulint)GetLastError();
2563
ut_print_timestamp(stderr);
2566
" InnoDB: Error: Write to file %s failed"
2567
" at offset %lu %lu.\n"
2568
"InnoDB: %lu bytes should have been written,"
2569
" only %lu were written.\n"
2570
"InnoDB: Operating system error number %lu.\n"
2571
"InnoDB: Check that your OS and file system"
2572
" support files of this size.\n"
2573
"InnoDB: Check also that the disk is not full"
2574
" or a disk quota exceeded.\n",
2575
name, (ulong) offset_high, (ulong) offset,
2576
(ulong) n, (ulong) len, (ulong) err);
2578
if (strerror((int)err) != NULL) {
2580
"InnoDB: Error number %lu means '%s'.\n",
2581
(ulong) err, strerror((int)err));
2585
"InnoDB: Some operating system error numbers"
2586
" are described at\n"
2588
REFMAN "operating-system-error-codes.html\n");
2590
os_has_said_disk_full = TRUE;
2597
ret = os_file_pwrite(file, buf, n, offset, offset_high);
2599
if ((ulint)ret == n) {
2604
if (!os_has_said_disk_full) {
2606
ut_print_timestamp(stderr);
2609
" InnoDB: Error: Write to file %s failed"
2610
" at offset %lu %lu.\n"
2611
"InnoDB: %lu bytes should have been written,"
2612
" only %ld were written.\n"
2613
"InnoDB: Operating system error number %lu.\n"
2614
"InnoDB: Check that your OS and file system"
2615
" support files of this size.\n"
2616
"InnoDB: Check also that the disk is not full"
2617
" or a disk quota exceeded.\n",
2618
name, offset_high, offset, n, (long int)ret,
2620
if (strerror(errno) != NULL) {
2622
"InnoDB: Error number %lu means '%s'.\n",
2623
(ulint)errno, strerror(errno));
2627
"InnoDB: Some operating system error numbers"
2628
" are described at\n"
2630
REFMAN "operating-system-error-codes.html\n");
2632
os_has_said_disk_full = TRUE;
2639
/*******************************************************************//**
2640
Check the existence and type of the given file.
2641
@return TRUE if call succeeded */
2646
const char* path, /*!< in: pathname of the file */
2647
ibool* exists, /*!< out: TRUE if file exists */
2648
os_file_type_t* type) /*!< out: type of the file (if it exists) */
2652
struct _stat statinfo;
2654
ret = _stat(path, &statinfo);
2655
if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2656
/* file does not exist */
2660
/* file exists, but stat call failed */
2662
os_file_handle_error_no_exit(path, "stat");
2667
if (_S_IFDIR & statinfo.st_mode) {
2668
*type = OS_FILE_TYPE_DIR;
2669
} else if (_S_IFREG & statinfo.st_mode) {
2670
*type = OS_FILE_TYPE_FILE;
2672
*type = OS_FILE_TYPE_UNKNOWN;
2680
struct stat statinfo;
2682
ret = stat(path, &statinfo);
2683
if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2684
/* file does not exist */
2688
/* file exists, but stat call failed */
2690
os_file_handle_error_no_exit(path, "stat");
2695
if (S_ISDIR(statinfo.st_mode)) {
2696
*type = OS_FILE_TYPE_DIR;
2697
} else if (S_ISLNK(statinfo.st_mode)) {
2698
*type = OS_FILE_TYPE_LINK;
2699
} else if (S_ISREG(statinfo.st_mode)) {
2700
*type = OS_FILE_TYPE_FILE;
2702
*type = OS_FILE_TYPE_UNKNOWN;
2711
/*******************************************************************//**
2712
This function returns information about the specified file
2713
@return TRUE if stat information found */
2718
const char* path, /*!< in: pathname of the file */
2719
os_file_stat_t* stat_info) /*!< information of a file in a
2724
struct _stat statinfo;
2726
ret = _stat(path, &statinfo);
2727
if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2728
/* file does not exist */
2732
/* file exists, but stat call failed */
2734
os_file_handle_error_no_exit(path, "stat");
2738
if (_S_IFDIR & statinfo.st_mode) {
2739
stat_info->type = OS_FILE_TYPE_DIR;
2740
} else if (_S_IFREG & statinfo.st_mode) {
2741
stat_info->type = OS_FILE_TYPE_FILE;
2743
stat_info->type = OS_FILE_TYPE_UNKNOWN;
2746
stat_info->ctime = statinfo.st_ctime;
2747
stat_info->atime = statinfo.st_atime;
2748
stat_info->mtime = statinfo.st_mtime;
2749
stat_info->size = statinfo.st_size;
2754
struct stat statinfo;
2756
ret = stat(path, &statinfo);
2758
if (ret && (errno == ENOENT || errno == ENOTDIR)) {
2759
/* file does not exist */
2763
/* file exists, but stat call failed */
2765
os_file_handle_error_no_exit(path, "stat");
2770
if (S_ISDIR(statinfo.st_mode)) {
2771
stat_info->type = OS_FILE_TYPE_DIR;
2772
} else if (S_ISLNK(statinfo.st_mode)) {
2773
stat_info->type = OS_FILE_TYPE_LINK;
2774
} else if (S_ISREG(statinfo.st_mode)) {
2775
stat_info->type = OS_FILE_TYPE_FILE;
2777
stat_info->type = OS_FILE_TYPE_UNKNOWN;
2780
stat_info->ctime = statinfo.st_ctime;
2781
stat_info->atime = statinfo.st_atime;
2782
stat_info->mtime = statinfo.st_mtime;
2783
stat_info->size = statinfo.st_size;
2789
/* path name separator character */
2791
# define OS_FILE_PATH_SEPARATOR '\\'
2793
# define OS_FILE_PATH_SEPARATOR '/'
2796
/****************************************************************//**
2797
The function os_file_dirname returns a directory component of a
2798
null-terminated pathname string. In the usual case, dirname returns
2799
the string up to, but not including, the final '/', and basename
2800
is the component following the final '/'. Trailing '/' charac�
2801
ters are not counted as part of the pathname.
2803
If path does not contain a slash, dirname returns the string ".".
2805
Concatenating the string returned by dirname, a "/", and the basename
2806
yields a complete pathname.
2808
The return value is a copy of the directory component of the pathname.
2809
The copy is allocated from heap. It is the caller responsibility
2810
to free it after it is no longer needed.
2812
The following list of examples (taken from SUSv2) shows the strings
2813
returned by dirname and basename for different paths:
2815
path dirname basename
2816
"/usr/lib" "/usr" "lib"
2823
@return own: directory component of the pathname */
2828
const char* path) /*!< in: pathname */
2830
/* Find the offset of the last slash */
2831
const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
2833
/* No slash in the path, return "." */
2835
return(mem_strdup("."));
2838
/* Ok, there is a slash */
2840
if (last_slash == path) {
2841
/* last slash is the first char of the path */
2843
return(mem_strdup("/"));
2846
/* Non-trivial directory component */
2848
return(mem_strdupl(path, last_slash - path));
2851
/****************************************************************//**
2852
Creates all missing subdirectories along the given path.
2853
@return TRUE if call succeeded FALSE otherwise */
2856
os_file_create_subdirs_if_needed(
2857
/*=============================*/
2858
const char* path) /*!< in: path name */
2861
ibool success, subdir_exists;
2862
os_file_type_t type;
2864
subdir = os_file_dirname(path);
2865
if (strlen(subdir) == 1
2866
&& (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
2867
/* subdir is root or cwd, nothing to do */
2873
/* Test if subdir exists */
2874
success = os_file_status(subdir, &subdir_exists, &type);
2875
if (success && !subdir_exists) {
2876
/* subdir does not exist, create it */
2877
success = os_file_create_subdirs_if_needed(subdir);
2883
success = os_file_create_directory(subdir, FALSE);
2891
#ifndef UNIV_HOTBACKUP
2892
/****************************************************************//**
2893
Returns a pointer to the nth slot in the aio array.
2894
@return pointer to slot */
2897
os_aio_array_get_nth_slot(
2898
/*======================*/
2899
os_aio_array_t* array, /*!< in: aio array */
2900
ulint index) /*!< in: index of the slot */
2902
ut_a(index < array->n_slots);
2904
return((array->slots) + index);
2907
/************************************************************************//**
2908
Creates an aio wait array.
2909
@return own: aio array */
2912
os_aio_array_create(
2913
/*================*/
2914
ulint n, /*!< in: maximum number of pending aio operations
2915
allowed; n must be divisible by n_segments */
2916
ulint n_segments) /*!< in: number of segments in the aio array */
2918
os_aio_array_t* array;
2920
os_aio_slot_t* slot;
2925
ut_a(n_segments > 0);
2927
array = ut_malloc(sizeof(os_aio_array_t));
2929
array->mutex = os_mutex_create(NULL);
2930
array->not_full = os_event_create(NULL);
2931
array->is_empty = os_event_create(NULL);
2933
os_event_set(array->is_empty);
2936
array->n_segments = n_segments;
2937
array->n_reserved = 0;
2938
array->slots = ut_malloc(n * sizeof(os_aio_slot_t));
2940
array->native_events = ut_malloc(n * sizeof(os_native_event_t));
2942
for (i = 0; i < n; i++) {
2943
slot = os_aio_array_get_nth_slot(array, i);
2946
slot->reserved = FALSE;
2948
slot->event = os_event_create(NULL);
2950
over = &(slot->control);
2952
over->hEvent = slot->event->handle;
2954
*((array->native_events) + i) = over->hEvent;
2961
/***********************************************************************
2962
Initializes the asynchronous io system. Creates one array each for ibuf
2963
and log i/o. Also creates one array each for read and write where each
2964
array is divided logically into n_read_segs and n_write_segs
2965
respectively. The caller must create an i/o handler thread for each
2966
segment in these arrays. This function also creates the sync array.
2967
No i/o handler thread needs to be created for that */
2972
ulint n_per_seg, /*<! in: maximum number of pending aio
2973
operations allowed per segment */
2974
ulint n_read_segs, /*<! in: number of reader threads */
2975
ulint n_write_segs, /*<! in: number of writer threads */
2976
ulint n_slots_sync) /*<! in: number of slots in the sync aio
2980
ulint n_segments = 2 + n_read_segs + n_write_segs;
2982
ut_ad(n_segments >= 4);
2984
os_io_init_simple();
2986
for (i = 0; i < n_segments; i++) {
2987
srv_set_io_thread_op_info(i, "not started yet");
2991
/* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
2993
os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
2995
srv_io_thread_function[0] = "insert buffer thread";
2997
os_aio_log_array = os_aio_array_create(n_per_seg, 1);
2999
srv_io_thread_function[1] = "log thread";
3001
os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
3003
for (i = 2; i < 2 + n_read_segs; i++) {
3004
ut_a(i < SRV_MAX_N_IO_THREADS);
3005
srv_io_thread_function[i] = "read thread";
3008
os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
3010
for (i = 2 + n_read_segs; i < n_segments; i++) {
3011
ut_a(i < SRV_MAX_N_IO_THREADS);
3012
srv_io_thread_function[i] = "write thread";
3015
os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
3017
os_aio_n_segments = n_segments;
3021
os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));
3023
for (i = 0; i < n_segments; i++) {
3024
os_aio_segment_wait_events[i] = os_event_create(NULL);
3027
os_last_printout = time(NULL);
3032
/************************************************************************//**
3033
Wakes up all async i/o threads in the array in Windows async i/o at
3037
os_aio_array_wake_win_aio_at_shutdown(
3038
/*==================================*/
3039
os_aio_array_t* array) /*!< in: aio array */
3043
for (i = 0; i < array->n_slots; i++) {
3045
os_event_set((array->slots + i)->event);
3050
/************************************************************************//**
3051
Wakes up all async i/o threads so that they know to exit themselves in
3055
os_aio_wake_all_threads_at_shutdown(void)
3056
/*=====================================*/
3061
/* This code wakes up all ai/o threads in Windows native aio */
3062
os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
3063
os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
3064
os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
3065
os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
3067
/* This loop wakes up all simulated ai/o threads */
3069
for (i = 0; i < os_aio_n_segments; i++) {
3071
os_event_set(os_aio_segment_wait_events[i]);
3075
/************************************************************************//**
3076
Waits until there are no pending writes in os_aio_write_array. There can
3077
be other, synchronous, pending writes. */
3080
os_aio_wait_until_no_pending_writes(void)
3081
/*=====================================*/
3083
os_event_wait(os_aio_write_array->is_empty);
3086
/**********************************************************************//**
3087
Calculates segment number for a slot.
3088
@return segment number (which is the number used by, for example,
3089
i/o-handler threads) */
3092
os_aio_get_segment_no_from_slot(
3093
/*============================*/
3094
os_aio_array_t* array, /*!< in: aio wait array */
3095
os_aio_slot_t* slot) /*!< in: slot in this array */
3100
if (array == os_aio_ibuf_array) {
3103
} else if (array == os_aio_log_array) {
3106
} else if (array == os_aio_read_array) {
3107
seg_len = os_aio_read_array->n_slots
3108
/ os_aio_read_array->n_segments;
3110
segment = 2 + slot->pos / seg_len;
3112
ut_a(array == os_aio_write_array);
3113
seg_len = os_aio_write_array->n_slots
3114
/ os_aio_write_array->n_segments;
3116
segment = os_aio_read_array->n_segments + 2
3117
+ slot->pos / seg_len;
3123
/**********************************************************************//**
3124
Calculates local segment number and aio array from global segment number.
3125
@return local segment number within the aio array */
3128
os_aio_get_array_and_local_segment(
3129
/*===============================*/
3130
os_aio_array_t** array, /*!< out: aio wait array */
3131
ulint global_segment)/*!< in: global segment number */
3135
ut_a(global_segment < os_aio_n_segments);
3137
if (global_segment == 0) {
3138
*array = os_aio_ibuf_array;
3141
} else if (global_segment == 1) {
3142
*array = os_aio_log_array;
3145
} else if (global_segment < os_aio_read_array->n_segments + 2) {
3146
*array = os_aio_read_array;
3148
segment = global_segment - 2;
3150
*array = os_aio_write_array;
3152
segment = global_segment - (os_aio_read_array->n_segments + 2);
3158
/*******************************************************************//**
3159
Requests for a slot in the aio array. If no slot is available, waits until
3160
not_full-event becomes signaled.
3161
@return pointer to slot */
3164
os_aio_array_reserve_slot(
3165
/*======================*/
3166
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
3167
os_aio_array_t* array, /*!< in: aio array */
3168
fil_node_t* message1,/*!< in: message to be passed along with
3169
the aio operation */
3170
void* message2,/*!< in: message to be passed along with
3171
the aio operation */
3172
os_file_t file, /*!< in: file handle */
3173
const char* name, /*!< in: name of the file or path as a
3174
null-terminated string */
3175
void* buf, /*!< in: buffer where to read or from which
3177
ulint offset, /*!< in: least significant 32 bits of file
3179
ulint offset_high, /*!< in: most significant 32 bits of
3181
ulint len) /*!< in: length of the block to read or write */
3183
os_aio_slot_t* slot;
3185
OVERLAPPED* control;
3188
ulint slots_per_seg;
3191
/* No need of a mutex. Only reading constant fields */
3192
slots_per_seg = array->n_slots / array->n_segments;
3194
/* We attempt to keep adjacent blocks in the same local
3195
segment. This can help in merging IO requests when we are
3196
doing simulated AIO */
3197
local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
3198
% array->n_segments;
3201
os_mutex_enter(array->mutex);
3203
if (array->n_reserved == array->n_slots) {
3204
os_mutex_exit(array->mutex);
3206
if (!os_aio_use_native_aio) {
3207
/* If the handler threads are suspended, wake them
3208
so that we get more slots */
3210
os_aio_simulated_wake_handler_threads();
3213
os_event_wait(array->not_full);
3218
/* First try to find a slot in the preferred local segment */
3219
for (i = local_seg * slots_per_seg; i < array->n_slots; i++) {
3220
slot = os_aio_array_get_nth_slot(array, i);
3222
if (slot->reserved == FALSE) {
3227
/* Fall back to a full scan. We are guaranteed to find a slot */
3229
slot = os_aio_array_get_nth_slot(array, i);
3231
if (slot->reserved == FALSE) {
3237
ut_a(slot->reserved == FALSE);
3238
array->n_reserved++;
3240
if (array->n_reserved == 1) {
3241
os_event_reset(array->is_empty);
3244
if (array->n_reserved == array->n_slots) {
3245
os_event_reset(array->not_full);
3248
slot->reserved = TRUE;
3249
slot->reservation_time = time(NULL);
3250
slot->message1 = message1;
3251
slot->message2 = message2;
3257
slot->offset = offset;
3258
slot->offset_high = offset_high;
3259
slot->io_already_done = FALSE;
3262
control = &(slot->control);
3263
control->Offset = (DWORD)offset;
3264
control->OffsetHigh = (DWORD)offset_high;
3265
os_event_reset(slot->event);
3268
os_mutex_exit(array->mutex);
3273
/*******************************************************************//**
3274
Frees a slot in the aio array. */
3277
os_aio_array_free_slot(
3278
/*===================*/
3279
os_aio_array_t* array, /*!< in: aio array */
3280
os_aio_slot_t* slot) /*!< in: pointer to slot */
3285
os_mutex_enter(array->mutex);
3287
ut_ad(slot->reserved);
3289
slot->reserved = FALSE;
3291
array->n_reserved--;
3293
if (array->n_reserved == array->n_slots - 1) {
3294
os_event_set(array->not_full);
3297
if (array->n_reserved == 0) {
3298
os_event_set(array->is_empty);
3302
os_event_reset(slot->event);
3304
os_mutex_exit(array->mutex);
3307
/**********************************************************************//**
3308
Wakes up a simulated aio i/o-handler thread if it has something to do. */
3311
os_aio_simulated_wake_handler_thread(
3312
/*=================================*/
3313
ulint global_segment) /*!< in: the number of the segment in the aio
3316
os_aio_array_t* array;
3317
os_aio_slot_t* slot;
3322
ut_ad(!os_aio_use_native_aio);
3324
segment = os_aio_get_array_and_local_segment(&array, global_segment);
3326
n = array->n_slots / array->n_segments;
3328
/* Look through n slots after the segment * n'th slot */
3330
os_mutex_enter(array->mutex);
3332
for (i = 0; i < n; i++) {
3333
slot = os_aio_array_get_nth_slot(array, i + segment * n);
3335
if (slot->reserved) {
3336
/* Found an i/o request */
3342
os_mutex_exit(array->mutex);
3345
os_event_set(os_aio_segment_wait_events[global_segment]);
3349
/**********************************************************************//**
3350
Wakes up simulated aio i/o-handler threads if they have something to do. */
3353
os_aio_simulated_wake_handler_threads(void)
3354
/*=======================================*/
3358
if (os_aio_use_native_aio) {
3359
/* We do not use simulated aio: do nothing */
3364
os_aio_recommend_sleep_for_read_threads = FALSE;
3366
for (i = 0; i < os_aio_n_segments; i++) {
3367
os_aio_simulated_wake_handler_thread(i);
3371
/**********************************************************************//**
3372
This function can be called if one wants to post a batch of reads and
3373
prefers an i/o-handler thread to handle them all at once later. You must
3374
call os_aio_simulated_wake_handler_threads later to ensure the threads
3375
are not left sleeping! */
3378
os_aio_simulated_put_read_threads_to_sleep(void)
3379
/*============================================*/
3381
os_aio_array_t* array;
3384
os_aio_recommend_sleep_for_read_threads = TRUE;
3386
for (g = 0; g < os_aio_n_segments; g++) {
3387
os_aio_get_array_and_local_segment(&array, g);
3389
if (array == os_aio_read_array) {
3391
os_event_reset(os_aio_segment_wait_events[g]);
3396
/*******************************************************************//**
3397
Requests an asynchronous i/o operation.
3398
@return TRUE if request was queued successfully, FALSE if fail */
3403
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
3404
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
3405
to OS_AIO_SIMULATED_WAKE_LATER: the
3406
last flag advises this function not to wake
3407
i/o-handler threads, but the caller will
3408
do the waking explicitly later, in this
3409
way the caller can post several requests in
3410
a batch; NOTE that the batch must not be
3411
so big that it exhausts the slots in aio
3412
arrays! NOTE that a simulated batch
3413
may introduce hidden chances of deadlocks,
3414
because i/os are not actually handled until
3415
all have been posted: use with great
3417
const char* name, /*!< in: name of the file or path as a
3418
null-terminated string */
3419
os_file_t file, /*!< in: handle to a file */
3420
void* buf, /*!< in: buffer where to read or from which
3422
ulint offset, /*!< in: least significant 32 bits of file
3423
offset where to read or write */
3424
ulint offset_high, /*!< in: most significant 32 bits of
3426
ulint n, /*!< in: number of bytes to read or write */
3427
fil_node_t* message1,/*!< in: message for the aio handler
3428
(can be used to identify a completed
3429
aio operation); ignored if mode is
3431
void* message2)/*!< in: message for the aio handler
3432
(can be used to identify a completed
3433
aio operation); ignored if mode is
3436
os_aio_array_t* array;
3437
os_aio_slot_t* slot;
3441
DWORD len = (DWORD) n;
3442
struct fil_node_struct * dummy_mess1;
3453
ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0);
3454
ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0);
3455
ut_ad(os_aio_validate());
3457
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
3458
mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
3460
if (mode == OS_AIO_SYNC
3462
&& !os_aio_use_native_aio
3465
/* This is actually an ordinary synchronous read or write:
3466
no need to use an i/o-handler thread. NOTE that if we use
3467
Windows async i/o, Windows does not allow us to use
3468
ordinary synchronous os_file_read etc. on the same file,
3469
therefore we have built a special mechanism for synchronous
3470
wait in the Windows case. */
3472
if (type == OS_FILE_READ) {
3473
return(os_file_read(file, buf, offset,
3477
ut_a(type == OS_FILE_WRITE);
3479
return(os_file_write(name, file, buf, offset, offset_high, n));
3483
if (mode == OS_AIO_NORMAL) {
3484
if (type == OS_FILE_READ) {
3485
array = os_aio_read_array;
3487
array = os_aio_write_array;
3489
} else if (mode == OS_AIO_IBUF) {
3490
ut_ad(type == OS_FILE_READ);
3491
/* Reduce probability of deadlock bugs in connection with ibuf:
3492
do not let the ibuf i/o handler sleep */
3496
array = os_aio_ibuf_array;
3497
} else if (mode == OS_AIO_LOG) {
3499
array = os_aio_log_array;
3500
} else if (mode == OS_AIO_SYNC) {
3501
array = os_aio_sync_array;
3503
array = NULL; /* Eliminate compiler warning */
3507
slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
3508
name, buf, offset, offset_high, n);
3509
if (type == OS_FILE_READ) {
3510
if (os_aio_use_native_aio) {
3513
os_bytes_read_since_printout += len;
3515
ret = ReadFile(file, buf, (DWORD)n, &len,
3520
os_aio_simulated_wake_handler_thread(
3521
os_aio_get_segment_no_from_slot(
3525
} else if (type == OS_FILE_WRITE) {
3526
if (os_aio_use_native_aio) {
3529
ret = WriteFile(file, buf, (DWORD)n, &len,
3534
os_aio_simulated_wake_handler_thread(
3535
os_aio_get_segment_no_from_slot(
3544
if (os_aio_use_native_aio) {
3545
if ((ret && len == n)
3546
|| (!ret && GetLastError() == ERROR_IO_PENDING)) {
3547
/* aio was queued successfully! */
3549
if (mode == OS_AIO_SYNC) {
3550
/* We want a synchronous i/o operation on a
3551
file where we also use async i/o: in Windows
3552
we must use the same wait mechanism as for
3555
retval = os_aio_windows_handle(ULINT_UNDEFINED,
3567
err = 1; /* Fall through the next if */
3571
/* aio was queued successfully! */
3576
os_aio_array_free_slot(array, slot);
3578
retry = os_file_handle_error(name,
3579
type == OS_FILE_READ
3580
? "aio read" : "aio write");
3590
/**********************************************************************//**
3591
This function is only used in Windows asynchronous i/o.
3592
Waits for an aio operation to complete. This function is used to wait the
3593
for completed requests. The aio array of pending requests is divided
3594
into segments. The thread specifies which segment or slot it wants to wait
3595
for. NOTE: this function will also take care of freeing the aio slot,
3596
therefore no other thread is allowed to do the freeing!
3597
@return TRUE if the aio operation succeeded */
3600
os_aio_windows_handle(
3601
/*==================*/
3602
ulint segment, /*!< in: the number of the segment in the aio
3603
arrays to wait for; segment 0 is the ibuf
3604
i/o thread, segment 1 the log i/o thread,
3605
then follow the non-ibuf read threads, and as
3606
the last are the non-ibuf write threads; if
3607
this is ULINT_UNDEFINED, then it means that
3608
sync aio is used, and this parameter is
3610
ulint pos, /*!< this parameter is used only in sync aio:
3611
wait for the aio slot at this position */
3612
fil_node_t**message1, /*!< out: the messages passed with the aio
3613
request; note that also in the case where
3614
the aio operation failed, these output
3615
parameters are valid and can be used to
3616
restart the operation, for example */
3618
ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */
3620
ulint orig_seg = segment;
3621
os_aio_array_t* array;
3622
os_aio_slot_t* slot;
3629
if (segment == ULINT_UNDEFINED) {
3630
array = os_aio_sync_array;
3633
segment = os_aio_get_array_and_local_segment(&array, segment);
3636
/* NOTE! We only access constant fields in os_aio_array. Therefore
3637
we do not have to acquire the protecting mutex yet */
3639
ut_ad(os_aio_validate());
3640
ut_ad(segment < array->n_segments);
3642
n = array->n_slots / array->n_segments;
3644
if (array == os_aio_sync_array) {
3645
os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
3648
srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
3649
i = os_event_wait_multiple(n,
3650
(array->native_events)
3654
os_mutex_enter(array->mutex);
3656
slot = os_aio_array_get_nth_slot(array, i + segment * n);
3658
ut_a(slot->reserved);
3660
if (orig_seg != ULINT_UNDEFINED) {
3661
srv_set_io_thread_op_info(orig_seg,
3662
"get windows aio return value");
3665
ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
3667
*message1 = slot->message1;
3668
*message2 = slot->message2;
3672
if (ret && len == slot->len) {
3675
#ifdef UNIV_DO_FLUSH
3676
if (slot->type == OS_FILE_WRITE
3677
&& !os_do_not_call_flush_at_each_write) {
3678
ut_a(TRUE == os_file_flush(slot->file));
3680
#endif /* UNIV_DO_FLUSH */
3682
os_file_handle_error(slot->name, "Windows aio");
3687
os_mutex_exit(array->mutex);
3689
os_aio_array_free_slot(array, slot);
3695
/**********************************************************************//**
3696
Does simulated aio. This function should be called by an i/o-handler
3698
@return TRUE if the aio operation succeeded */
3701
os_aio_simulated_handle(
3702
/*====================*/
3703
ulint global_segment, /*!< in: the number of the segment in the aio
3704
arrays to wait for; segment 0 is the ibuf
3705
i/o thread, segment 1 the log i/o thread,
3706
then follow the non-ibuf read threads, and as
3707
the last are the non-ibuf write threads */
3708
fil_node_t**message1, /*!< out: the messages passed with the aio
3709
request; note that also in the case where
3710
the aio operation failed, these output
3711
parameters are valid and can be used to
3712
restart the operation, for example */
3714
ulint* type) /*!< out: OS_FILE_WRITE or ..._READ */
3716
os_aio_array_t* array;
3718
os_aio_slot_t* slot;
3719
os_aio_slot_t* slot2;
3720
os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
3721
ulint n_consecutive;
3724
ulint lowest_offset;
3728
byte* combined_buf2;
3733
memset(consecutive_ios, 0, sizeof(os_aio_slot_t*) * OS_AIO_MERGE_N_CONSECUTIVE);
3734
segment = os_aio_get_array_and_local_segment(&array, global_segment);
3737
/* NOTE! We only access constant fields in os_aio_array. Therefore
3738
we do not have to acquire the protecting mutex yet */
3740
srv_set_io_thread_op_info(global_segment,
3741
"looking for i/o requests (a)");
3742
ut_ad(os_aio_validate());
3743
ut_ad(segment < array->n_segments);
3745
n = array->n_slots / array->n_segments;
3747
/* Look through n slots after the segment * n'th slot */
3749
if (array == os_aio_read_array
3750
&& os_aio_recommend_sleep_for_read_threads) {
3752
/* Give other threads chance to add several i/os to the array
3755
goto recommended_sleep;
3758
os_mutex_enter(array->mutex);
3760
srv_set_io_thread_op_info(global_segment,
3761
"looking for i/o requests (b)");
3763
/* Check if there is a slot for which the i/o has already been
3766
for (i = 0; i < n; i++) {
3767
slot = os_aio_array_get_nth_slot(array, i + segment * n);
3769
if (slot->reserved && slot->io_already_done) {
3771
if (os_aio_print_debug) {
3773
"InnoDB: i/o for slot %lu"
3774
" already done, returning\n",
3786
/* If there are at least 2 seconds old requests, then pick the oldest
3787
one to prevent starvation. If several requests have the same age,
3788
then pick the one at the lowest offset. */
3791
lowest_offset = ULINT_MAX;
3793
for (i = 0; i < n; i++) {
3794
slot = os_aio_array_get_nth_slot(array, i + segment * n);
3796
if (slot->reserved) {
3797
age = (ulint)difftime(time(NULL),
3798
slot->reservation_time);
3800
if ((age >= 2 && age > biggest_age)
3801
|| (age >= 2 && age == biggest_age
3802
&& slot->offset < lowest_offset)) {
3804
/* Found an i/o request */
3805
consecutive_ios[0] = slot;
3810
lowest_offset = slot->offset;
3815
if (n_consecutive == 0) {
3816
/* There were no old requests. Look for an i/o request at the
3817
lowest offset in the array (we ignore the high 32 bits of the
3818
offset in these heuristics) */
3820
lowest_offset = ULINT_MAX;
3822
for (i = 0; i < n; i++) {
3823
slot = os_aio_array_get_nth_slot(array,
3826
if (slot->reserved && slot->offset < lowest_offset) {
3828
/* Found an i/o request */
3829
consecutive_ios[0] = slot;
3833
lowest_offset = slot->offset;
3838
if (n_consecutive == 0) {
3840
/* No i/o requested at the moment */
3845
slot = consecutive_ios[0];
3847
/* Check if there are several consecutive blocks to read or write */
3850
for (i = 0; i < n; i++) {
3851
slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
3853
if (slot2->reserved && slot2 != slot
3854
&& slot2->offset == slot->offset + slot->len
3855
/* check that sum does not wrap over */
3856
&& slot->offset + slot->len > slot->offset
3857
&& slot2->offset_high == slot->offset_high
3858
&& slot2->type == slot->type
3859
&& slot2->file == slot->file) {
3861
/* Found a consecutive i/o request */
3863
consecutive_ios[n_consecutive] = slot2;
3868
if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
3870
goto consecutive_loop;
3877
srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
3879
/* We have now collected n_consecutive i/o requests in the array;
3880
allocate a single buffer which can hold all data, and perform the
3884
slot = consecutive_ios[0];
3886
for (i = 0; i < n_consecutive; i++) {
3887
total_len += consecutive_ios[i]->len;
3890
if (n_consecutive == 1) {
3891
/* We can use the buffer of the i/o request */
3892
combined_buf = slot->buf;
3893
combined_buf2 = NULL;
3895
combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
3897
ut_a(combined_buf2);
3899
combined_buf = ut_align(combined_buf2, UNIV_PAGE_SIZE);
3902
/* We release the array mutex for the time of the i/o: NOTE that
3903
this assumes that there is just one i/o-handler thread serving
3904
a single segment of slots! */
3906
os_mutex_exit(array->mutex);
3908
if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
3909
/* Copy the buffers to the combined buffer */
3912
for (i = 0; i < n_consecutive; i++) {
3914
ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
3915
consecutive_ios[i]->len);
3916
offs += consecutive_ios[i]->len;
3920
srv_set_io_thread_op_info(global_segment, "doing file i/o");
3922
if (os_aio_print_debug) {
3924
"InnoDB: doing i/o of type %lu at offset %lu %lu,"
3926
(ulong) slot->type, (ulong) slot->offset_high,
3927
(ulong) slot->offset, (ulong) total_len);
3930
/* Do the i/o with ordinary, synchronous i/o functions: */
3931
if (slot->type == OS_FILE_WRITE) {
3932
ret = os_file_write(slot->name, slot->file, combined_buf,
3933
slot->offset, slot->offset_high,
3936
ret = os_file_read(slot->file, combined_buf,
3937
slot->offset, slot->offset_high, total_len);
3941
srv_set_io_thread_op_info(global_segment, "file i/o done");
3945
"aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
3946
n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE);
3949
if (slot->type == OS_FILE_READ && n_consecutive > 1) {
3950
/* Copy the combined buffer to individual buffers */
3953
for (i = 0; i < n_consecutive; i++) {
3955
ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
3956
consecutive_ios[i]->len);
3957
offs += consecutive_ios[i]->len;
3961
if (combined_buf2) {
3962
ut_free(combined_buf2);
3965
os_mutex_enter(array->mutex);
3967
/* Mark the i/os done in slots */
3969
for (i = 0; i < n_consecutive; i++) {
3970
consecutive_ios[i]->io_already_done = TRUE;
3973
/* We return the messages for the first slot now, and if there were
3974
several slots, the messages will be returned with subsequent calls
3979
ut_a(slot->reserved);
3981
*message1 = slot->message1;
3982
*message2 = slot->message2;
3986
os_mutex_exit(array->mutex);
3988
os_aio_array_free_slot(array, slot);
3993
srv_set_io_thread_op_info(global_segment, "resetting wait event");
3995
/* We wait here until there again can be i/os in the segment
3998
os_event_reset(os_aio_segment_wait_events[global_segment]);
4000
os_mutex_exit(array->mutex);
4003
srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
4005
os_event_wait(os_aio_segment_wait_events[global_segment]);
4007
if (os_aio_print_debug) {
4009
"InnoDB: i/o handler thread for i/o"
4010
" segment %lu wakes up\n",
4011
(ulong) global_segment);
4017
/**********************************************************************//**
4018
Validates the consistency of an aio array.
4019
@return TRUE if ok */
4022
os_aio_array_validate(
4023
/*==================*/
4024
os_aio_array_t* array) /*!< in: aio wait array */
4026
os_aio_slot_t* slot;
4027
ulint n_reserved = 0;
4032
os_mutex_enter(array->mutex);
4034
ut_a(array->n_slots > 0);
4035
ut_a(array->n_segments > 0);
4037
for (i = 0; i < array->n_slots; i++) {
4038
slot = os_aio_array_get_nth_slot(array, i);
4040
if (slot->reserved) {
4042
ut_a(slot->len > 0);
4046
ut_a(array->n_reserved == n_reserved);
4048
os_mutex_exit(array->mutex);
4053
/**********************************************************************//**
4054
Validates the consistency the aio system.
4055
@return TRUE if ok */
4058
os_aio_validate(void)
4059
/*=================*/
4061
os_aio_array_validate(os_aio_read_array);
4062
os_aio_array_validate(os_aio_write_array);
4063
os_aio_array_validate(os_aio_ibuf_array);
4064
os_aio_array_validate(os_aio_log_array);
4065
os_aio_array_validate(os_aio_sync_array);
4070
/**********************************************************************//**
4071
Prints info of the aio arrays. */
4076
FILE* file) /*!< in: file where to print */
4078
os_aio_array_t* array;
4079
os_aio_slot_t* slot;
4081
time_t current_time;
4082
double time_elapsed;
4083
double avg_bytes_read;
4086
for (i = 0; i < srv_n_file_io_threads; i++) {
4087
fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
4088
srv_io_thread_op_info[i],
4089
srv_io_thread_function[i]);
4092
if (os_aio_segment_wait_events[i]->is_set) {
4093
fprintf(file, " ev set");
4097
fprintf(file, "\n");
4100
fputs("Pending normal aio reads:", file);
4102
array = os_aio_read_array;
4106
os_mutex_enter(array->mutex);
4108
ut_a(array->n_slots > 0);
4109
ut_a(array->n_segments > 0);
4113
for (i = 0; i < array->n_slots; i++) {
4114
slot = os_aio_array_get_nth_slot(array, i);
4116
if (slot->reserved) {
4119
fprintf(stderr, "Reserved slot, messages %p %p\n",
4120
(void*) slot->message1,
4121
(void*) slot->message2);
4123
ut_a(slot->len > 0);
4127
ut_a(array->n_reserved == n_reserved);
4129
fprintf(file, " %lu", (ulong) n_reserved);
4131
os_mutex_exit(array->mutex);
4133
if (array == os_aio_read_array) {
4134
fputs(", aio writes:", file);
4136
array = os_aio_write_array;
4141
if (array == os_aio_write_array) {
4142
fputs(",\n ibuf aio reads:", file);
4143
array = os_aio_ibuf_array;
4148
if (array == os_aio_ibuf_array) {
4149
fputs(", log i/o's:", file);
4150
array = os_aio_log_array;
4155
if (array == os_aio_log_array) {
4156
fputs(", sync i/o's:", file);
4157
array = os_aio_sync_array;
4163
current_time = time(NULL);
4164
time_elapsed = 0.001 + difftime(current_time, os_last_printout);
4167
"Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
4168
"%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
4169
(ulong) fil_n_pending_log_flushes,
4170
(ulong) fil_n_pending_tablespace_flushes,
4171
(ulong) os_n_file_reads, (ulong) os_n_file_writes,
4172
(ulong) os_n_fsyncs);
4174
if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
4176
"%lu pending preads, %lu pending pwrites\n",
4177
(ulong) os_file_n_pending_preads,
4178
(ulong) os_file_n_pending_pwrites);
4181
if (os_n_file_reads == os_n_file_reads_old) {
4182
avg_bytes_read = 0.0;
4184
avg_bytes_read = (double) os_bytes_read_since_printout
4185
/ (os_n_file_reads - os_n_file_reads_old);
4189
"%.2f reads/s, %lu avg bytes/read,"
4190
" %.2f writes/s, %.2f fsyncs/s\n",
4191
(os_n_file_reads - os_n_file_reads_old)
4193
(ulong)avg_bytes_read,
4194
(os_n_file_writes - os_n_file_writes_old)
4196
(os_n_fsyncs - os_n_fsyncs_old)
4199
os_n_file_reads_old = os_n_file_reads;
4200
os_n_file_writes_old = os_n_file_writes;
4201
os_n_fsyncs_old = os_n_fsyncs;
4202
os_bytes_read_since_printout = 0;
4204
os_last_printout = current_time;
4207
/**********************************************************************//**
4208
Refreshes the statistics used to print per-second averages. */
4211
os_aio_refresh_stats(void)
4212
/*======================*/
4214
os_n_file_reads_old = os_n_file_reads;
4215
os_n_file_writes_old = os_n_file_writes;
4216
os_n_fsyncs_old = os_n_fsyncs;
4217
os_bytes_read_since_printout = 0;
4219
os_last_printout = time(NULL);
4223
/**********************************************************************//**
4224
Checks that all slots in the system have been freed, that is, there are
4225
no pending io operations.
4226
@return TRUE if all free */
4229
os_aio_all_slots_free(void)
4230
/*=======================*/
4232
os_aio_array_t* array;
4235
array = os_aio_read_array;
4237
os_mutex_enter(array->mutex);
4239
n_res += array->n_reserved;
4241
os_mutex_exit(array->mutex);
4243
array = os_aio_write_array;
4245
os_mutex_enter(array->mutex);
4247
n_res += array->n_reserved;
4249
os_mutex_exit(array->mutex);
4251
array = os_aio_ibuf_array;
4253
os_mutex_enter(array->mutex);
4255
n_res += array->n_reserved;
4257
os_mutex_exit(array->mutex);
4259
array = os_aio_log_array;
4261
os_mutex_enter(array->mutex);
4263
n_res += array->n_reserved;
4265
os_mutex_exit(array->mutex);
4267
array = os_aio_sync_array;
4269
os_mutex_enter(array->mutex);
4271
n_res += array->n_reserved;
4273
os_mutex_exit(array->mutex);
4282
#endif /* UNIV_DEBUG */
4284
#endif /* !UNIV_HOTBACKUP */