47 #ifndef UNIV_HOTBACKUP
53 # include <sys/types.h>
54 # include <sys/stat.h>
59 #if defined(LINUX_NATIVE_AIO)
64 static const ulint IO_IBUF_SEGMENT = 0;
67 static const ulint IO_LOG_SEGMENT = 1;
81 #ifndef UNIV_HOTBACKUP
84 #define OS_FILE_N_SEEK_MUTEXES 16
85 UNIV_INTERN
os_ib_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
88 #define OS_AIO_MERGE_N_CONSECUTIVE 64
149 UNIV_INTERN mysql_pfs_key_t innodb_file_data_key;
150 UNIV_INTERN mysql_pfs_key_t innodb_file_log_key;
151 UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key;
183 #elif defined(LINUX_NATIVE_AIO)
227 #if defined(LINUX_NATIVE_AIO)
228 io_context_t* aio_ctx;
232 struct io_event* aio_events;
240 #if defined(LINUX_NATIVE_AIO)
242 #define OS_AIO_REAP_TIMEOUT (500000000UL)
245 #define OS_AIO_IO_SETUP_RETRY_SLEEP (500000UL)
248 #define OS_AIO_IO_SETUP_RETRY_ATTEMPTS 5
252 static os_event_t* os_aio_segment_wait_events = NULL;
264 static ulint os_aio_n_segments = ULINT_UNDEFINED;
268 static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
271 UNIV_INTERN ulint os_n_file_reads = 0;
272 UNIV_INTERN ulint os_bytes_read_since_printout = 0;
273 UNIV_INTERN ulint os_n_file_writes = 0;
274 UNIV_INTERN ulint os_n_fsyncs = 0;
275 UNIV_INTERN ulint os_n_file_reads_old = 0;
276 UNIV_INTERN ulint os_n_file_writes_old = 0;
277 UNIV_INTERN ulint os_n_fsyncs_old = 0;
278 UNIV_INTERN time_t os_last_printout;
280 UNIV_INTERN ibool os_has_said_disk_full = FALSE;
282 #if !defined(UNIV_HOTBACKUP) \
283 && (!defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8)
298 # ifndef UNIV_HOTBACKUP
304 os_aio_validate_skip(
void)
308 # define OS_AIO_VALIDATE_SKIP 13
312 static int os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
318 if (--os_aio_validate_count > 0) {
322 os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
335 os_get_os_version(
void)
338 OSVERSIONINFO os_info;
340 os_info.dwOSVersionInfoSize =
sizeof(OSVERSIONINFO);
342 ut_a(GetVersionEx(&os_info));
344 if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
346 }
else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
348 }
else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
349 switch (os_info.dwMajorVersion) {
354 return (os_info.dwMinorVersion == 0)
357 return (os_info.dwMinorVersion == 0)
377 os_file_get_last_error_low(
379 bool report_all_errors,
381 bool on_error_silent)
386 ulint err = (ulint) GetLastError();
387 if (err == ERROR_SUCCESS) {
391 if (report_all_errors
393 && err != ERROR_DISK_FULL
394 && err != ERROR_FILE_EXISTS)) {
398 " InnoDB: Operating system error number %lu"
399 " in a file operation.\n", (ulong) err);
401 if (err == ERROR_PATH_NOT_FOUND) {
403 "InnoDB: The error means the system"
404 " cannot find the path specified.\n");
408 "InnoDB: If you are installing InnoDB,"
409 " remember that you must create\n"
410 "InnoDB: directories yourself, InnoDB"
411 " does not create them.\n");
413 }
else if (err == ERROR_ACCESS_DENIED) {
415 "InnoDB: The error means mysqld does not have"
416 " the access rights to\n"
417 "InnoDB: the directory. It may also be"
418 " you have created a subdirectory\n"
419 "InnoDB: of the same name as a data file.\n");
420 }
else if (err == ERROR_SHARING_VIOLATION
421 || err == ERROR_LOCK_VIOLATION) {
423 "InnoDB: The error means that another program"
424 " is using InnoDB's files.\n"
425 "InnoDB: This might be a backup or antivirus"
426 " software or another instance\n"
428 " Please close it to get rid of this error.\n");
429 }
else if (err == ERROR_WORKING_SET_QUOTA
430 || err == ERROR_NO_SYSTEM_RESOURCES) {
432 "InnoDB: The error means that there are no"
433 " sufficient system resources or quota to"
434 " complete the operation.\n");
435 }
else if (err == ERROR_OPERATION_ABORTED) {
437 "InnoDB: The error means that the I/O"
438 " operation has been aborted\n"
439 "InnoDB: because of either a thread exit"
440 " or an application request.\n"
441 "InnoDB: Retry attempt is made.\n");
444 "InnoDB: Some operating system error numbers"
445 " are described at\n"
448 "operating-system-error-codes.html\n");
454 if (err == ERROR_FILE_NOT_FOUND) {
456 }
else if (err == ERROR_DISK_FULL) {
457 return(OS_FILE_DISK_FULL);
458 }
else if (err == ERROR_FILE_EXISTS) {
459 return(OS_FILE_ALREADY_EXISTS);
460 }
else if (err == ERROR_SHARING_VIOLATION
461 || err == ERROR_LOCK_VIOLATION) {
462 return(OS_FILE_SHARING_VIOLATION);
463 }
else if (err == ERROR_WORKING_SET_QUOTA
464 || err == ERROR_NO_SYSTEM_RESOURCES) {
465 return(OS_FILE_INSUFFICIENT_RESOURCE);
466 }
else if (err == ERROR_OPERATION_ABORTED) {
467 return(OS_FILE_OPERATION_ABORTED);
477 if (report_all_errors
478 || (err != ENOSPC && err != EEXIST && !on_error_silent)) {
482 " InnoDB: Operating system error number %d"
483 " in a file operation.\n", err);
487 "InnoDB: The error means the system"
488 " cannot find the path specified.\n");
492 "InnoDB: If you are installing InnoDB,"
493 " remember that you must create\n"
494 "InnoDB: directories yourself, InnoDB"
495 " does not create them.\n");
497 }
else if (err == EACCES) {
499 "InnoDB: The error means mysqld does not have"
500 " the access rights to\n"
501 "InnoDB: the directory.\n");
503 if (strerror(err) != NULL) {
505 "InnoDB: Error number %d"
512 "InnoDB: Some operating system"
513 " error numbers are described at\n"
516 "operating-system-error-codes.html\n");
524 return(OS_FILE_DISK_FULL);
528 return(OS_FILE_ALREADY_EXISTS);
532 return(OS_FILE_PATH_ERROR);
534 if (srv_use_native_aio) {
535 return(OS_FILE_AIO_RESOURCES_RESERVED);
539 if (srv_use_native_aio) {
540 return(OS_FILE_AIO_INTERRUPTED);
558 bool report_all_errors)
561 return(os_file_get_last_error_low(report_all_errors,
false));
571 os_file_handle_error_cond_exit(
574 const char* operation,
577 ibool on_error_silent)
583 err = os_file_get_last_error_low(
false, on_error_silent);
586 case OS_FILE_DISK_FULL:
589 if (os_has_said_disk_full) {
600 " InnoDB: Encountered a problem with"
606 " InnoDB: Disk is full. Try to clean the disk"
607 " to free space.\n");
609 os_has_said_disk_full = TRUE;
615 case OS_FILE_AIO_RESOURCES_RESERVED:
616 case OS_FILE_AIO_INTERRUPTED:
620 case OS_FILE_PATH_ERROR:
621 case OS_FILE_ALREADY_EXISTS:
625 case OS_FILE_SHARING_VIOLATION:
630 case OS_FILE_OPERATION_ABORTED:
631 case OS_FILE_INSUFFICIENT_RESOURCE:
642 if (should_exit || !on_error_silent) {
643 ib_logf(IB_LOG_LEVEL_ERROR,
"File %s: '%s' returned OS "
644 "error " ULINTPF
".%s", name ? name :
"(unknown)",
645 operation, err, should_exit
646 ?
" Cannot continue operation" :
"");
662 os_file_handle_error(
665 const char* operation)
668 return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE));
676 os_file_handle_error_no_exit(
679 const char* operation,
680 ibool on_error_silent)
684 return(os_file_handle_error_cond_exit(
685 name, operation, FALSE, on_error_silent));
689 #define USE_FILE_LOCK
690 #if defined(UNIV_HOTBACKUP) || defined(__WIN__)
694 # undef USE_FILE_LOCK
712 lk.l_whence = SEEK_SET;
713 lk.l_start = lk.l_len = 0;
715 if (fcntl(fd, F_SETLK, &lk) == -1) {
718 "Unable to lock %s, error: %d", name, errno);
720 if (errno == EAGAIN || errno == EACCES) {
722 "Check that you do not already have "
723 "another mysqld process using the "
724 "same InnoDB data or log files.");
734 #ifndef UNIV_HOTBACKUP
742 #if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
746 for (ulint
i = 0;
i < OS_FILE_N_SEEK_MUTEXES;
i++) {
766 file = fdopen(fd,
"w+b");
772 " InnoDB: Error: unable to create temporary file;"
773 " errno: %d\n", errno);
795 ibool error_is_fatal)
803 LPWIN32_FIND_DATA lpFindFileData;
804 char path[OS_FILE_MAX_PATH + 3];
806 ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
808 strcpy(path, dirname);
809 strcpy(path + strlen(path),
"\\*");
815 lpFindFileData =
static_cast<LPWIN32_FIND_DATA
>(
818 dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
822 if (dir == INVALID_HANDLE_VALUE) {
824 if (error_is_fatal) {
825 os_file_handle_error(dirname,
"opendir");
833 dir = opendir(dirname);
835 if (dir == NULL && error_is_fatal) {
836 os_file_handle_error(dirname,
"opendir");
855 ret = FindClose(dir);
858 os_file_handle_error_no_exit(NULL,
"closedir", FALSE);
870 os_file_handle_error_no_exit(NULL,
"closedir", FALSE);
890 LPWIN32_FIND_DATA lpFindFileData;
893 lpFindFileData =
static_cast<LPWIN32_FIND_DATA
>(
896 ret = FindNextFile(dir, lpFindFileData);
899 ut_a(strlen((
char*) lpFindFileData->cFileName)
902 if (strcmp((
char*) lpFindFileData->cFileName,
".") == 0
903 || strcmp((
char*) lpFindFileData->cFileName,
"..") == 0) {
908 strcpy(info->
name, (
char*) lpFindFileData->cFileName);
910 info->
size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
911 + (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
914 if (lpFindFileData->dwFileAttributes
915 & FILE_ATTRIBUTE_REPARSE_POINT) {
921 info->
type = OS_FILE_TYPE_LINK;
922 }
else if (lpFindFileData->dwFileAttributes
923 & FILE_ATTRIBUTE_DIRECTORY) {
924 info->
type = OS_FILE_TYPE_DIR;
930 info->
type = OS_FILE_TYPE_FILE;
938 }
else if (GetLastError() == ERROR_NO_MORE_FILES) {
942 os_file_handle_error_no_exit(NULL,
"readdir_next_file", FALSE);
949 struct stat statinfo;
950 #ifdef HAVE_READDIR_R
951 char dirent_buf[
sizeof(
struct dirent)
952 + _POSIX_PATH_MAX + 100];
960 #ifdef HAVE_READDIR_R
961 ret = readdir_r(dir, (
struct dirent*) dirent_buf, &ent);
973 "InnoDB: cannot read directory %s, error %lu\n",
974 dirname, (ulong) ret);
985 ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1);
994 ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
996 if (strcmp(ent->d_name,
".") == 0 || strcmp(ent->d_name,
"..") == 0) {
1001 strcpy(info->
name, ent->d_name);
1003 full_path =
static_cast<char*
>(
1004 ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10));
1006 sprintf(full_path,
"%s/%s", dirname, ent->d_name);
1008 ret = stat(full_path, &statinfo);
1012 if (errno == ENOENT) {
1028 os_file_handle_error_no_exit(full_path,
"stat", FALSE);
1035 info->
size = (ib_int64_t) statinfo.st_size;
1037 if (S_ISDIR(statinfo.st_mode)) {
1038 info->
type = OS_FILE_TYPE_DIR;
1039 }
else if (S_ISLNK(statinfo.st_mode)) {
1040 info->
type = OS_FILE_TYPE_LINK;
1041 }
else if (S_ISREG(statinfo.st_mode)) {
1042 info->
type = OS_FILE_TYPE_FILE;
1044 info->
type = OS_FILE_TYPE_UNKNOWN;
1065 const char* pathname,
1067 ibool fail_if_exists)
1073 rcode = CreateDirectory((LPCTSTR) pathname, NULL);
1075 || (GetLastError() == ERROR_ALREADY_EXISTS
1076 && !fail_if_exists))) {
1078 os_file_handle_error_no_exit(
1079 pathname,
"CreateDirectory", FALSE);
1088 rcode = mkdir(pathname, 0770);
1090 if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
1092 os_file_handle_error_no_exit(pathname,
"mkdir", FALSE);
1125 DWORD attributes = 0;
1132 create_flag = OPEN_EXISTING;
1136 create_flag = OPEN_EXISTING;
1140 create_flag = CREATE_NEW;
1152 "Unable to create subdirectories '%s'",
1158 create_flag = CREATE_NEW;
1163 "Unknown file create mode (%lu) for file '%s'",
1169 if (access_type == OS_FILE_READ_ONLY) {
1170 access = GENERIC_READ;
1174 "read only mode set. Unable to "
1175 "open file '%s' in RW mode, trying RO mode", name);
1177 access = GENERIC_READ;
1179 }
else if (access_type == OS_FILE_READ_WRITE) {
1180 access = GENERIC_READ | GENERIC_WRITE;
1183 "Unknown file access type (%lu) for file '%s'",
1193 (LPCTSTR) name, access, FILE_SHARE_READ, NULL,
1194 create_flag, attributes, NULL);
1196 if (file == INVALID_HANDLE_VALUE) {
1200 retry = os_file_handle_error(
1214 ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
1215 ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
1219 if (access_type == OS_FILE_READ_ONLY) {
1220 create_flag = O_RDONLY;
1222 create_flag = O_RDONLY;
1224 create_flag = O_RDWR;
1229 create_flag = O_RDONLY;
1233 create_flag = O_RDWR | O_CREAT | O_EXCL;
1244 "Unable to create subdirectories '%s'",
1250 create_flag = O_RDWR | O_CREAT | O_EXCL;
1255 "Unknown file create mode (%lu) for file '%s'",
1267 retry = os_file_handle_error(
1270 ?
"open" :
"create");
1278 #ifdef USE_FILE_LOCK
1281 && access_type == OS_FILE_READ_WRITE
1282 && os_file_lock(file, name)) {
1320 DWORD attributes = 0;
1321 DWORD share_mode = FILE_SHARE_READ;
1329 create_flag = OPEN_EXISTING;
1331 create_flag = OPEN_EXISTING;
1333 create_flag = CREATE_NEW;
1337 "Unknown file create mode (%lu) for file '%s'",
1343 if (access_type == OS_FILE_READ_ONLY) {
1344 access = GENERIC_READ;
1346 access = GENERIC_READ;
1347 }
else if (access_type == OS_FILE_READ_WRITE) {
1348 access = GENERIC_READ | GENERIC_WRITE;
1349 }
else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
1353 access = GENERIC_READ;
1358 share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE;
1361 "Unknown file access type (%lu) for file '%s'",
1367 file = CreateFile((LPCTSTR) name,
1375 *success = (file != INVALID_HANDLE_VALUE);
1381 ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
1382 ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
1386 if (access_type == OS_FILE_READ_ONLY) {
1388 create_flag = O_RDONLY;
1392 create_flag = O_RDONLY;
1396 ut_a(access_type == OS_FILE_READ_WRITE
1397 || access_type == OS_FILE_READ_ALLOW_DELETE);
1399 create_flag = O_RDWR;
1404 create_flag = O_RDONLY;
1408 create_flag = O_RDWR | O_CREAT | O_EXCL;
1412 "Unknown file create mode (%lu) for file '%s'",
1420 *success = file == -1 ? FALSE : TRUE;
1422 #ifdef USE_FILE_LOCK
1425 && access_type == OS_FILE_READ_WRITE
1426 && os_file_lock(file, name)) {
1447 __attribute__((unused)),
1448 const char* file_name
1450 __attribute__((unused)),
1451 const char* operation_name __attribute__((unused)))
1456 #if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
1457 if (directio(fd, DIRECTIO_ON) == -1) {
1458 int errno_save = errno;
1461 "Failed to set DIRECTIO_ON on file %s: %s: %s, "
1462 "continuing anyway.",
1463 file_name, operation_name, strerror(errno_save));
1465 #elif defined(O_DIRECT)
1466 if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
1467 int errno_save = errno;
1470 "Failed to set O_DIRECT on file %s: %s: %s, "
1471 "continuing anyway",
1472 file_name, operation_name, strerror(errno_save));
1474 if (errno_save == EINVAL) {
1476 "O_DIRECT is known to result in 'Invalid "
1477 "argument' on Linux on tmpfs, see MySQL "
1509 ibool on_error_no_exit;
1510 ibool on_error_silent;
1514 "ib_create_table_fail_disk_full",
1516 SetLastError(ERROR_DISK_FULL);
1521 "ib_create_table_fail_disk_full",
1530 DWORD share_mode = FILE_SHARE_READ;
1545 create_flag = OPEN_EXISTING;
1551 share_mode |= FILE_SHARE_WRITE;
1556 create_flag = OPEN_EXISTING;
1560 create_flag = OPEN_EXISTING;
1564 create_flag = CREATE_NEW;
1568 create_flag = CREATE_ALWAYS;
1572 "Unknown file create mode (%lu) for file '%s'",
1578 DWORD attributes = 0;
1580 #ifdef UNIV_HOTBACKUP
1581 attributes |= FILE_FLAG_NO_BUFFERING;
1583 if (purpose == OS_FILE_AIO) {
1589 if (srv_use_native_aio) {
1590 attributes |= FILE_FLAG_OVERLAPPED;
1594 }
else if (purpose == OS_FILE_NORMAL) {
1598 "Unknown purpose flag (%lu) while opening file '%s'",
1604 #ifdef UNIV_NON_BUFFERED_IO
1607 if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
1615 attributes |= FILE_FLAG_NO_BUFFERING;
1620 DWORD access = GENERIC_READ;
1623 access |= GENERIC_WRITE;
1629 (LPCTSTR) name, access, share_mode, NULL,
1630 create_flag, attributes, NULL);
1632 if (file == INVALID_HANDLE_VALUE) {
1633 const char* operation;
1637 ?
"create" :
"open";
1641 if (on_error_no_exit) {
1642 retry = os_file_handle_error_no_exit(
1643 name, operation, on_error_silent);
1645 retry = os_file_handle_error(name, operation);
1656 const char* mode_str = NULL;
1678 create_flag = O_RDONLY;
1682 mode_str =
"CREATE";
1683 create_flag = O_RDWR | O_CREAT | O_EXCL;
1687 mode_str =
"OVERWRITE";
1688 create_flag = O_RDWR | O_CREAT | O_TRUNC;
1692 "Unknown file create mode (%lu) for file '%s'",
1699 ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL);
1707 && type == OS_LOG_FILE
1710 create_flag |= O_SYNC;
1718 const char* operation;
1722 ?
"create" :
"open";
1726 if (on_error_no_exit) {
1727 retry = os_file_handle_error_no_exit(
1728 name, operation, on_error_silent);
1730 retry = os_file_handle_error(name, operation);
1743 && type != OS_LOG_FILE
1750 #ifdef USE_FILE_LOCK
1754 && os_file_lock(file, name)) {
1761 "Retrying to lock the first data file");
1763 for (
int i = 0;
i < 100;
i++) {
1766 if (!os_file_lock(file, name)) {
1773 "Unable to open the first data file");
1804 ret = DeleteFile((LPCTSTR) name);
1810 DWORD lasterr = GetLastError();
1811 if (lasterr == ERROR_FILE_NOT_FOUND
1812 || lasterr == ERROR_PATH_NOT_FOUND) {
1820 if (count > 100 && 0 == (count % 10)) {
1823 ib_logf(IB_LOG_LEVEL_WARN,
"Delete of file %s failed.", name);
1839 if (ret != 0 && errno != ENOENT) {
1840 os_file_handle_error_no_exit(name,
"delete", FALSE);
1866 ret = DeleteFile((LPCTSTR) name);
1872 if (GetLastError() == ERROR_FILE_NOT_FOUND) {
1881 if (count > 100 && 0 == (count % 10)) {
1885 "InnoDB: Warning: cannot delete file %s\n"
1886 "InnoDB: Are you running ibbackup"
1887 " to back up the file?\n", name);
1904 os_file_handle_error_no_exit(name,
"delete", FALSE);
1922 const char* oldpath,
1924 const char* newpath)
1927 os_file_type_t
type;
1942 ret = MoveFile((LPCTSTR) oldpath, (LPCTSTR) newpath);
1948 os_file_handle_error_no_exit(oldpath,
"rename", FALSE);
1954 ret = rename(oldpath, newpath);
1957 os_file_handle_error_no_exit(oldpath,
"rename", FALSE);
1982 ret = CloseHandle(file);
1988 os_file_handle_error(NULL,
"close");
1997 os_file_handle_error(NULL,
"close");
2006 #ifdef UNIV_HOTBACKUP
2012 os_file_close_no_error_handling(
2021 ret = CloseHandle(file);
2057 low = GetFileSize(file, &high);
2059 if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
2092 buf_size =
ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
2094 buf2 =
static_cast<byte*
>(
ut_malloc(buf_size + UNIV_PAGE_SIZE));
2097 buf =
static_cast<byte*
>(
ut_align(buf2, UNIV_PAGE_SIZE));
2100 memset(buf, 0, buf_size);
2104 fprintf(stderr,
"InnoDB: Progress in MB:");
2107 while (current_size < size) {
2110 if (size - current_size < (
os_offset_t) buf_size) {
2111 n_bytes = (ulint) (size - current_size);
2116 ret = os_file_write(name, file, buf, current_size, n_bytes);
2119 goto error_handling;
2123 if ((current_size + n_bytes) / (100 << 20)
2124 != current_size / (100 << 20)) {
2126 fprintf(stderr,
" %lu00",
2127 (ulong) ((current_size + n_bytes)
2131 current_size += n_bytes;
2136 fprintf(stderr,
"\n");
2141 ret = os_file_flush(file);
2161 HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
2162 return(SetEndOfFile(h));
2164 return(!ftruncate(fileno(file), ftell(file)));
2192 if (ret == -1 && errno == ENOLCK) {
2194 if (failures % 100 == 0) {
2198 " InnoDB: fsync(): "
2199 "No locks available; retrying\n");
2234 ret = FlushFileBuffers(file);
2245 == ERROR_INVALID_FUNCTION) {
2249 os_file_handle_error(NULL,
"flush");
2259 #if defined(HAVE_DARWIN_THREADS)
2260 # ifndef F_FULLFSYNC
2262 # define F_FULLFSYNC 51
2263 # elif F_FULLFSYNC != 51
2264 # error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
2271 if (!srv_have_fullfsync) {
2275 ret = os_file_fsync(file);
2277 ret = fcntl(file, F_FULLFSYNC, NULL);
2282 ret = os_file_fsync(file);
2286 ret = os_file_fsync(file);
2301 ib_logf(IB_LOG_LEVEL_ERROR,
"The OS said file flush did not succeed");
2303 os_file_handle_error(NULL,
"flush");
2317 static __attribute__((nonnull, warn_unused_result))
2327 #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
2335 offs = (off_t) offset;
2337 if (
sizeof(off_t) <= 4) {
2340 "File read at offset > 4 GB");
2346 #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD)
2347 #if defined(HAVE_ATOMIC_BUILTINS) && UNIV_WORD_SIZE == 8
2359 n_bytes = pread(file, buf, n, offs);
2361 #if defined(HAVE_ATOMIC_BUILTINS) && UNIV_WORD_SIZE == 8
2369 MONITOR_DEC(MONITOR_OS_PENDING_READS);
2378 #ifndef UNIV_HOTBACKUP
2382 #if defined(HAVE_ATOMIC_BUILTINS) && UNIV_WORD_SIZE == 8
2391 #ifndef UNIV_HOTBACKUP
2393 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2398 ret_offset = lseek(file, offs, SEEK_SET);
2400 if (ret_offset < 0) {
2403 ret = read(file, buf, (ssize_t) n);
2406 #ifndef UNIV_HOTBACKUP
2410 #if defined(HAVE_ATOMIC_BUILTINS) && UNIV_WORD_SIZE == 8
2412 MONITOR_ATOIC_DEC(MONITOR_OS_PENDING_READS);
2416 MONITOR_DEC(MONITOR_OS_PENDING_READS);
2428 static __attribute__((nonnull, warn_unused_result))
2445 offs = (off_t) offset;
2447 if (
sizeof(off_t) <= 4) {
2450 "File write at offset > 4 GB.");
2456 #if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD)
2457 #if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
2469 ret = pwrite(file, buf, (ssize_t) n, offs);
2471 #if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
2475 MONITOR_DEC(MONITOR_OS_PENDING_WRITES);
2487 # ifndef UNIV_HOTBACKUP
2496 # ifndef UNIV_HOTBACKUP
2498 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2503 ret_offset = lseek(file, offs, SEEK_SET);
2505 if (ret_offset < 0) {
2511 ret = write(file, buf, (ssize_t) n);
2514 # ifndef UNIV_HOTBACKUP
2520 MONITOR_DEC(MONITOR_OS_PENDING_WRITES);
2550 #ifndef UNIV_HOTBACKUP
2556 ut_a((n & 0xFFFFFFFFUL) == n);
2559 os_bytes_read_since_printout +=
n;
2566 low = (DWORD) offset & 0xFFFFFFFF;
2567 high = (DWORD) (offset >> 32);
2574 #ifndef UNIV_HOTBACKUP
2576 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2581 ret2 = SetFilePointer(
2582 file, low, reinterpret_cast<PLONG>(&high), FILE_BEGIN);
2584 if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2586 #ifndef UNIV_HOTBACKUP
2592 MONITOR_DEC(MONITOR_OS_PENDING_READS);
2595 goto error_handling;
2598 ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
2600 #ifndef UNIV_HOTBACKUP
2606 MONITOR_DEC(MONITOR_OS_PENDING_READS);
2609 if (ret && len == n) {
2616 os_bytes_read_since_printout +=
n;
2619 ret = os_file_pread(file, buf, n, offset);
2621 if ((ulint) ret == n) {
2627 "Tried to read "ULINTPF
" bytes at offset " UINT64PF
". "
2628 "Was only able to read %ld.", n, offset, (lint) ret);
2633 retry = os_file_handle_error(NULL,
"read");
2640 "InnoDB: Fatal error: cannot read from file."
2641 " OS error number %lu.\n",
2643 (ulong) GetLastError()
2677 #ifndef UNIV_HOTBACKUP
2683 ut_a((n & 0xFFFFFFFFUL) == n);
2686 os_bytes_read_since_printout +=
n;
2693 low = (DWORD) offset & 0xFFFFFFFF;
2694 high = (DWORD) (offset >> 32);
2701 #ifndef UNIV_HOTBACKUP
2703 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2708 ret2 = SetFilePointer(
2709 file, low, reinterpret_cast<PLONG>(&high), FILE_BEGIN);
2711 if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2713 #ifndef UNIV_HOTBACKUP
2719 MONITOR_DEC(MONITOR_OS_PENDING_READS);
2722 goto error_handling;
2725 ret = ReadFile(file, buf, (DWORD) n, &len, NULL);
2727 #ifndef UNIV_HOTBACKUP
2733 MONITOR_DEC(MONITOR_OS_PENDING_READS);
2736 if (ret && len == n) {
2743 os_bytes_read_since_printout +=
n;
2746 ret = os_file_pread(file, buf, n, offset);
2748 if ((ulint) ret == n) {
2756 retry = os_file_handle_error_no_exit(NULL,
"read", FALSE);
2784 flen = fread(str, 1, size - 1, file);
2812 ulint n_retries = 0;
2814 #ifndef UNIV_HOTBACKUP
2820 ut_a((n & 0xFFFFFFFFUL) == n);
2828 low = (DWORD) offset & 0xFFFFFFFF;
2829 high = (DWORD) (offset >> 32);
2836 #ifndef UNIV_HOTBACKUP
2838 i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
2843 ret2 = SetFilePointer(
2844 file, low, reinterpret_cast<PLONG>(&high), FILE_BEGIN);
2846 if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
2848 #ifndef UNIV_HOTBACKUP
2854 MONITOR_DEC(MONITOR_OS_PENDING_WRITES);
2860 " InnoDB: Error: File pointer positioning to"
2861 " file %s failed at\n"
2862 "InnoDB: offset %llu. Operating system"
2863 " error number %lu.\n"
2864 "InnoDB: Some operating system error numbers"
2865 " are described at\n"
2867 REFMAN
"operating-system-error-codes.html\n",
2868 name, offset, (ulong) GetLastError());
2873 ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
2875 #ifndef UNIV_HOTBACKUP
2881 MONITOR_DEC(MONITOR_OS_PENDING_WRITES);
2884 if (ret && len == n) {
2893 if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
2902 if (!os_has_said_disk_full) {
2904 err = (ulint) GetLastError();
2909 " InnoDB: Error: Write to file %s failed"
2910 " at offset %llu.\n"
2911 "InnoDB: %lu bytes should have been written,"
2912 " only %lu were written.\n"
2913 "InnoDB: Operating system error number %lu.\n"
2914 "InnoDB: Check that your OS and file system"
2915 " support files of this size.\n"
2916 "InnoDB: Check also that the disk is not full"
2917 " or a disk quota exceeded.\n",
2919 (ulong) n, (ulong) len, (ulong) err);
2921 if (strerror((
int) err) != NULL) {
2923 "InnoDB: Error number %lu means '%s'.\n",
2924 (ulong) err, strerror((
int) err));
2928 "InnoDB: Some operating system error numbers"
2929 " are described at\n"
2931 REFMAN
"operating-system-error-codes.html\n");
2933 os_has_said_disk_full = TRUE;
2940 ret = os_file_pwrite(file, buf, n, offset);
2942 if ((ulint) ret == n) {
2947 if (!os_has_said_disk_full) {
2952 " InnoDB: Error: Write to file %s failed"
2953 " at offset "UINT64PF
".\n"
2954 "InnoDB: %lu bytes should have been written,"
2955 " only %ld were written.\n"
2956 "InnoDB: Operating system error number %lu.\n"
2957 "InnoDB: Check that your OS and file system"
2958 " support files of this size.\n"
2959 "InnoDB: Check also that the disk is not full"
2960 " or a disk quota exceeded.\n",
2961 name, offset, n, (lint) ret,
2963 if (strerror(errno) != NULL) {
2965 "InnoDB: Error number %d means '%s'.\n",
2966 errno, strerror(errno));
2970 "InnoDB: Some operating system error numbers"
2971 " are described at\n"
2973 REFMAN
"operating-system-error-codes.html\n");
2975 os_has_said_disk_full = TRUE;
2991 os_file_type_t*
type)
2995 struct _stat64 statinfo;
2997 ret = _stat64(path, &statinfo);
2998 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
3005 os_file_handle_error_no_exit(path,
"stat", FALSE);
3010 if (_S_IFDIR & statinfo.st_mode) {
3011 *type = OS_FILE_TYPE_DIR;
3012 }
else if (_S_IFREG & statinfo.st_mode) {
3013 *type = OS_FILE_TYPE_FILE;
3015 *type = OS_FILE_TYPE_UNKNOWN;
3023 struct stat statinfo;
3025 ret = stat(path, &statinfo);
3026 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
3033 os_file_handle_error_no_exit(path,
"stat", FALSE);
3038 if (S_ISDIR(statinfo.st_mode)) {
3039 *type = OS_FILE_TYPE_DIR;
3040 }
else if (S_ISLNK(statinfo.st_mode)) {
3041 *type = OS_FILE_TYPE_LINK;
3042 }
else if (S_ISREG(statinfo.st_mode)) {
3043 *type = OS_FILE_TYPE_FILE;
3045 *type = OS_FILE_TYPE_UNKNOWN;
3070 struct _stat64 statinfo;
3072 ret = _stat64(path, &statinfo);
3074 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
3082 os_file_handle_error_no_exit(path,
"stat", FALSE);
3086 }
else if (_S_IFDIR & statinfo.st_mode) {
3087 stat_info->
type = OS_FILE_TYPE_DIR;
3088 }
else if (_S_IFREG & statinfo.st_mode) {
3090 DWORD access = GENERIC_READ;
3093 access |= GENERIC_WRITE;
3096 stat_info->
type = OS_FILE_TYPE_FILE;
3100 if (check_rw_perm) {
3109 FILE_ATTRIBUTE_NORMAL,
3112 if (fh == INVALID_HANDLE_VALUE) {
3120 stat_info->
type = OS_FILE_TYPE_UNKNOWN;
3123 struct stat statinfo;
3125 ret = stat(path, &statinfo);
3127 if (ret && (errno == ENOENT || errno == ENOTDIR)) {
3135 os_file_handle_error_no_exit(path,
"stat", FALSE);
3139 }
else if (S_ISDIR(statinfo.st_mode)) {
3140 stat_info->
type = OS_FILE_TYPE_DIR;
3141 }
else if (S_ISLNK(statinfo.st_mode)) {
3142 stat_info->
type = OS_FILE_TYPE_LINK;
3143 }
else if (S_ISREG(statinfo.st_mode)) {
3144 stat_info->
type = OS_FILE_TYPE_FILE;
3146 if (check_rw_perm) {
3162 stat_info->
type = OS_FILE_TYPE_UNKNOWN;
3167 stat_info->
ctime = statinfo.st_ctime;
3168 stat_info->
atime = statinfo.st_atime;
3169 stat_info->
mtime = statinfo.st_mtime;
3170 stat_info->
size = statinfo.st_size;
3177 # define OS_FILE_PATH_SEPARATOR '\\'
3179 # define OS_FILE_PATH_SEPARATOR '/'
3197 const char* old_path,
3198 const char* tablename)
3208 last_slash = strrchr((
char*) tablename,
'/');
3209 base_name = last_slash ? last_slash + 1 : (
char*) tablename;
3213 last_slash = strrchr((
char*) old_path, OS_FILE_PATH_SEPARATOR);
3214 dir_len = last_slash ? last_slash - old_path : strlen(old_path);
3217 new_path_len = dir_len + strlen(base_name) +
sizeof "/.ibd";
3218 new_path =
static_cast<char*
>(mem_alloc(new_path_len));
3219 memcpy(new_path, old_path, dir_len);
3222 new_path_len - dir_len,
3224 OS_FILE_PATH_SEPARATOR,
3247 const char* data_dir_path,
3248 const char* tablename,
3249 const char* extention)
3256 ut_ad(extention && strlen(extention) == 3);
3260 last_slash = strrchr((
char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
3261 data_dir_len = last_slash ? last_slash - data_dir_path : strlen(data_dir_path);
3264 new_path_len = data_dir_len + strlen(tablename)
3265 +
sizeof "/." + strlen(extention);
3266 new_path =
static_cast<char*
>(mem_alloc(new_path_len));
3267 memcpy(new_path, data_dir_path, data_dir_len);
3269 new_path_len - data_dir_len,
3271 OS_FILE_PATH_SEPARATOR,
3296 char* data_dir_path)
3300 ulint tablename_len;
3303 ptr = strrchr((
char*) data_dir_path,
'.');
3310 ptr = strrchr((
char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
3315 tablename = ptr + 1;
3318 ptr = strrchr((
char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
3326 ptr[tablename_len] =
'\0';
3364 const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
3373 if (last_slash == path) {
3396 "read only mode set. Can't create subdirectories '%s'",
3405 if (strlen(subdir) == 1
3406 && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir ==
'.')) {
3414 os_file_type_t
type;
3415 ibool subdir_exists;
3418 if (success && !subdir_exists) {
3437 #ifndef UNIV_HOTBACKUP
3443 os_aio_array_get_nth_slot(
3448 ut_a(index < array->n_slots);
3450 return(&array->
slots[index]);
3453 #if defined(LINUX_NATIVE_AIO)
3459 os_aio_linux_create_io_ctx(
3462 io_context_t* io_ctx)
3468 memset(io_ctx, 0x0,
sizeof(*io_ctx));
3473 ret = io_setup(max_events, io_ctx);
3475 #if defined(UNIV_AIO_DEBUG)
3477 "InnoDB: Linux native AIO:"
3478 " initialized io_ctx for segment\n");
3492 " InnoDB: Warning: io_setup() failed"
3493 " with EAGAIN. Will make %d attempts"
3494 " before giving up.\n",
3495 OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
3498 if (retries < OS_AIO_IO_SETUP_RETRY_ATTEMPTS) {
3501 "InnoDB: Warning: io_setup() attempt"
3511 " InnoDB: Error: io_setup() failed"
3512 " with EAGAIN after %d attempts.\n",
3513 OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
3519 " InnoDB: Error: Linux Native AIO interface"
3520 " is not supported on this platform. Please"
3521 " check your OS documentation and install"
3522 " appropriate binary of InnoDB.\n");
3529 " InnoDB: Error: Linux Native AIO setup"
3530 " returned following error[%d]\n", -ret);
3535 "InnoDB: You can disable Linux Native AIO by"
3536 " setting innodb_use_native_aio = 0 in my.cnf\n");
3548 os_aio_native_aio_supported(
void)
3552 io_context_t io_ctx;
3555 if (!os_aio_linux_create_io_ctx(1, &io_ctx)) {
3564 "Unable to create temp file to check "
3565 "native AIO support.");
3573 ulint dirnamelen = strlen(srv_log_group_home_dir);
3574 ut_a(dirnamelen < (
sizeof name) - 10 -
sizeof "ib_logfile");
3575 memcpy(name, srv_log_group_home_dir, dirnamelen);
3578 if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
3579 name[dirnamelen++] = SRV_PATH_SEPARATOR;
3582 strcpy(name + dirnamelen,
"ib_logfile0");
3584 fd = ::open(name, O_RDONLY);
3589 "Unable to open \"%s\" to check "
3590 "native AIO read support.", name);
3596 struct io_event io_event;
3598 memset(&io_event, 0x0,
sizeof(io_event));
3600 byte* buf =
static_cast<byte*
>(
ut_malloc(UNIV_PAGE_SIZE * 2));
3601 byte* ptr =
static_cast<byte*
>(
ut_align(buf, UNIV_PAGE_SIZE));
3606 memset(buf, 0x00, UNIV_PAGE_SIZE * 2);
3607 memset(&iocb, 0x0,
sizeof(iocb));
3609 struct iocb* p_iocb = &iocb;
3612 io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
3614 ut_a(UNIV_PAGE_SIZE >= 512);
3615 io_prep_pread(p_iocb, fd, ptr, 512, 0);
3618 int err = io_submit(io_ctx, 1, &p_iocb);
3622 err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
3635 "Linux Native AIO not supported. You can either "
3636 "move %s to a file system that supports native "
3637 "AIO or you can set innodb_use_native_aio to "
3638 "FALSE to avoid this message.",
3644 "Linux Native AIO check on %s returned error[%d]",
3659 os_aio_array_create(
3669 #elif defined(LINUX_NATIVE_AIO)
3670 struct io_event* io_event = NULL;
3673 ut_a(n_segments > 0);
3676 memset(array, 0x0,
sizeof(*array));
3690 memset(array->
slots, 0x0,
sizeof(n *
sizeof(*array->
slots)));
3692 array->handles =
static_cast<HANDLE*
>(
ut_malloc(n *
sizeof(HANDLE)));
3695 #if defined(LINUX_NATIVE_AIO)
3696 array->aio_ctx = NULL;
3697 array->aio_events = NULL;
3701 if (!srv_use_native_aio) {
3702 goto skip_native_aio;
3708 array->aio_ctx =
static_cast<io_context**
>(
3709 ut_malloc(n_segments *
sizeof(*array->aio_ctx)));
3711 for (ulint i = 0; i < n_segments; ++
i) {
3712 if (!os_aio_linux_create_io_ctx(n/n_segments,
3713 &array->aio_ctx[i])) {
3725 io_event =
static_cast<struct io_event*
>(
3728 memset(io_event, 0x0,
sizeof(*io_event) * n);
3729 array->aio_events = io_event;
3733 for (ulint i = 0; i <
n; i++) {
3736 slot = os_aio_array_get_nth_slot(array, i);
3741 slot->handle = CreateEvent(NULL,TRUE, FALSE, NULL);
3743 over = &slot->control;
3745 over->hEvent = slot->handle;
3747 array->handles[
i] = over->hEvent;
3749 #elif defined(LINUX_NATIVE_AIO)
3750 memset(&slot->control, 0x0,
sizeof(slot->control));
3770 for (i = 0; i < array->
n_slots; i++) {
3772 CloseHandle(slot->handle);
3783 #if defined(LINUX_NATIVE_AIO)
3784 if (srv_use_native_aio) {
3816 #if defined(LINUX_NATIVE_AIO)
3818 if (srv_use_native_aio && !os_aio_native_aio_supported()) {
3820 ib_logf(IB_LOG_LEVEL_WARN,
"Linux Native AIO disabled.");
3822 srv_use_native_aio = FALSE;
3828 os_aio_read_array = os_aio_array_create(
3829 n_read_segs * n_per_seg, n_read_segs);
3831 if (os_aio_read_array == NULL) {
3836 ulint n_segs = n_read_segs + start;
3839 for (ulint i = start; i < n_segs; ++
i) {
3840 ut_a(i < SRV_MAX_N_IO_THREADS);
3841 srv_io_thread_function[
i] =
"read thread";
3844 ulint n_segments = n_read_segs;
3848 os_aio_log_array = os_aio_array_create(n_per_seg, 1);
3850 if (os_aio_log_array == NULL) {
3856 srv_io_thread_function[1] =
"log thread";
3858 os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
3860 if (os_aio_ibuf_array == NULL) {
3866 srv_io_thread_function[0] =
"insert buffer thread";
3868 os_aio_write_array = os_aio_array_create(
3869 n_write_segs * n_per_seg, n_write_segs);
3871 if (os_aio_write_array == NULL) {
3875 n_segments += n_write_segs;
3877 for (ulint i = start + n_read_segs; i < n_segments; ++
i) {
3878 ut_a(i < SRV_MAX_N_IO_THREADS);
3879 srv_io_thread_function[
i] =
"write thread";
3882 ut_ad(n_segments >= 4);
3884 ut_ad(n_segments > 0);
3887 os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
3889 if (os_aio_sync_array == NULL) {
3893 os_aio_n_segments = n_segments;
3897 os_aio_segment_wait_events =
static_cast<os_event_t*
>(
3898 ut_malloc(n_segments *
sizeof *os_aio_segment_wait_events));
3900 for (ulint i = 0; i < n_segments; ++
i) {
3917 if (os_aio_ibuf_array != 0) {
3918 os_aio_array_free(os_aio_ibuf_array);
3921 if (os_aio_log_array != 0) {
3922 os_aio_array_free(os_aio_log_array);
3925 if (os_aio_write_array != 0) {
3926 os_aio_array_free(os_aio_write_array);
3929 if (os_aio_sync_array != 0) {
3930 os_aio_array_free(os_aio_sync_array);
3933 os_aio_array_free(os_aio_read_array);
3935 for (ulint i = 0; i < os_aio_n_segments; i++) {
3939 ut_free(os_aio_segment_wait_events);
3940 os_aio_segment_wait_events = 0;
3941 os_aio_n_segments = 0;
3950 os_aio_array_wake_win_aio_at_shutdown(
3956 for (i = 0; i < array->
n_slots; i++) {
3958 SetEvent((array->
slots + i)->handle);
3973 os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
3974 if (os_aio_write_array != 0) {
3975 os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
3978 if (os_aio_ibuf_array != 0) {
3979 os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
3982 if (os_aio_log_array != 0) {
3983 os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
3986 #elif defined(LINUX_NATIVE_AIO)
3993 if (srv_use_native_aio) {
4003 for (ulint i = 0; i < os_aio_n_segments; i++) {
4018 os_event_wait(os_aio_write_array->
is_empty);
4027 os_aio_get_segment_no_from_slot(
4035 if (array == os_aio_ibuf_array) {
4038 segment = IO_IBUF_SEGMENT;
4040 }
else if (array == os_aio_log_array) {
4043 segment = IO_LOG_SEGMENT;
4045 }
else if (array == os_aio_read_array) {
4046 seg_len = os_aio_read_array->
n_slots
4052 ut_a(array == os_aio_write_array);
4054 seg_len = os_aio_write_array->
n_slots
4058 + slot->
pos / seg_len;
4069 os_aio_get_array_and_local_segment(
4072 ulint global_segment)
4076 ut_a(global_segment < os_aio_n_segments);
4079 *array = os_aio_read_array;
4081 return(global_segment);
4082 }
else if (global_segment == IO_IBUF_SEGMENT) {
4083 *array = os_aio_ibuf_array;
4086 }
else if (global_segment == IO_LOG_SEGMENT) {
4087 *array = os_aio_log_array;
4090 }
else if (global_segment < os_aio_read_array->n_segments + 2) {
4091 *array = os_aio_read_array;
4093 segment = global_segment - 2;
4095 *array = os_aio_write_array;
4097 segment = global_segment - (os_aio_read_array->
n_segments + 2);
4109 os_aio_array_reserve_slot(
4127 OVERLAPPED* control;
4129 #elif defined(LINUX_NATIVE_AIO)
4137 ulint slots_per_seg;
4141 ut_a((len & 0xFFFFFFFFUL) == len);
4150 local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
4159 if (!srv_use_native_aio) {
4174 for (i = local_seg * slots_per_seg, counter = 0;
4180 slot = os_aio_array_get_nth_slot(array, i);
4210 slot->
buf =
static_cast<byte*
>(
buf);
4215 control = &slot->control;
4216 control->Offset = (DWORD) offset & 0xFFFFFFFF;
4217 control->OffsetHigh = (DWORD) (offset >> 32);
4218 ResetEvent(slot->handle);
4220 #elif defined(LINUX_NATIVE_AIO)
4223 if (!srv_use_native_aio) {
4224 goto skip_native_aio;
4229 aio_offset = (off_t) offset;
4231 ut_a(
sizeof(aio_offset) >=
sizeof(offset)
4234 iocb = &slot->control;
4237 io_prep_pread(iocb, file, buf, len, aio_offset);
4239 ut_a(type == OS_FILE_WRITE);
4240 io_prep_pwrite(iocb, file, buf, len, aio_offset);
4243 iocb->data = (
void*) slot;
4258 os_aio_array_free_slot(
4281 ResetEvent(slot->handle);
4283 #elif defined(LINUX_NATIVE_AIO)
4285 if (srv_use_native_aio) {
4286 memset(&slot->control, 0x0,
sizeof(slot->control));
4293 ut_ad(slot->n_bytes == 0);
4294 ut_ad(slot->ret == 0);
4305 os_aio_simulated_wake_handler_thread(
4307 ulint global_segment)
4313 ut_ad(!srv_use_native_aio);
4315 segment = os_aio_get_array_and_local_segment(&array, global_segment);
4325 for (ulint i = 0; i <
n; ++
i) {
4328 slot = os_aio_array_get_nth_slot(array, segment + i);
4338 event = os_aio_segment_wait_events[global_segment];
4356 if (srv_use_native_aio) {
4362 os_aio_recommend_sleep_for_read_threads = FALSE;
4364 for (ulint i = 0; i < os_aio_n_segments; i++) {
4365 os_aio_simulated_wake_handler_thread(i);
4387 if (srv_use_native_aio) {
4393 os_aio_recommend_sleep_for_read_threads = TRUE;
4395 for (ulint i = 0; i < os_aio_n_segments; i++) {
4396 os_aio_get_array_and_local_segment(&array, i);
4398 if (array == os_aio_read_array) {
4406 #if defined(LINUX_NATIVE_AIO)
4412 os_aio_linux_dispatch(
4421 ut_ad(slot != NULL);
4430 iocb = &slot->control;
4433 ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb);
4435 #if defined(UNIV_AIO_DEBUG)
4437 "io_submit[%c] ret[%d]: slot[%p] ctx[%p] seg[%lu]\n",
4438 (slot->
type == OS_FILE_WRITE) ?
'w' :
'r', ret, slot,
4439 array->aio_ctx[io_ctx_index], (ulong) io_ctx_index);
4444 if (UNIV_UNLIKELY(ret != 1)) {
4497 DWORD len = (DWORD) n;
4509 ut_ad(os_aio_validate_skip());
4511 ut_ad((n & 0xFFFFFFFFUL) == n);
4515 mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
4519 && !srv_use_native_aio
4540 ut_a(type == OS_FILE_WRITE);
4549 array = os_aio_read_array;
4552 array = os_aio_write_array;
4563 array = os_aio_read_array;
4565 array = os_aio_ibuf_array;
4570 array = os_aio_read_array;
4572 array = os_aio_log_array;
4576 array = os_aio_sync_array;
4577 #if defined(LINUX_NATIVE_AIO)
4579 ut_a(!srv_use_native_aio);
4587 slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
4588 name, buf, offset, n);
4590 if (srv_use_native_aio) {
4592 os_bytes_read_since_printout +=
n;
4594 ret = ReadFile(file, buf, (DWORD) n, &len,
4597 #elif defined(LINUX_NATIVE_AIO)
4598 if (!os_aio_linux_dispatch(array, slot)) {
4604 os_aio_simulated_wake_handler_thread(
4605 os_aio_get_segment_no_from_slot(
4609 }
else if (type == OS_FILE_WRITE) {
4611 if (srv_use_native_aio) {
4614 ret = WriteFile(file, buf, (DWORD) n, &len,
4617 #elif defined(LINUX_NATIVE_AIO)
4618 if (!os_aio_linux_dispatch(array, slot)) {
4624 os_aio_simulated_wake_handler_thread(
4625 os_aio_get_segment_no_from_slot(
4634 if (srv_use_native_aio) {
4635 if ((ret && len == n)
4636 || (!ret && GetLastError() == ERROR_IO_PENDING)) {
4645 retval = os_aio_windows_handle(
4646 ULINT_UNDEFINED, slot->
pos,
4647 &dummy_mess1, &dummy_mess2,
4662 #if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
4665 os_aio_array_free_slot(array, slot);
4667 if (os_file_handle_error(
4668 name,type ==
OS_FILE_READ ?
"aio read" :
"aio write")) {
4687 os_aio_windows_handle(
4707 ulint orig_seg = segment;
4717 if (segment == ULINT_UNDEFINED) {
4719 array = os_aio_sync_array;
4721 segment = os_aio_get_array_and_local_segment(&array, segment);
4727 ut_ad(os_aio_validate_skip());
4728 ut_ad(segment < array->n_segments);
4732 if (array == os_aio_sync_array) {
4734 WaitForSingleObject(
4735 os_aio_array_get_nth_slot(array, pos)->
handle,
4741 if (orig_seg != ULINT_UNDEFINED) {
4745 i = WaitForMultipleObjects(
4746 (DWORD) n, array->handles + segment * n,
4760 ut_a(i >= WAIT_OBJECT_0 && i <= WAIT_OBJECT_0 + n);
4762 slot = os_aio_array_get_nth_slot(array, i + segment * n);
4766 if (orig_seg != ULINT_UNDEFINED) {
4768 orig_seg,
"get windows aio return value");
4771 ret = GetOverlappedResult(slot->
file, &(slot->control), &len, TRUE);
4778 if (ret && len == slot->
len) {
4781 }
else if (os_file_handle_error(slot->
name,
"Windows aio")) {
4799 struct PSI_file_locker* locker = NULL;
4800 register_pfs_file_io_begin(locker, slot->
file, slot->
len,
4801 (slot->
type == OS_FILE_WRITE)
4804 __FILE__, __LINE__);
4807 ut_a((slot->
len & 0xFFFFFFFFUL) == slot->
len);
4809 switch (slot->
type) {
4811 ret = WriteFile(slot->
file, slot->
buf,
4812 (DWORD) slot->
len, &len,
4817 ret = ReadFile(slot->
file, slot->
buf,
4818 (DWORD) slot->
len, &len,
4827 register_pfs_file_io_end(locker, len);
4830 if (!ret && GetLastError() == ERROR_IO_PENDING) {
4837 ret = GetOverlappedResult(slot->
file,
4842 ret_val = ret && len == slot->
len;
4845 os_aio_array_free_slot(array, slot);
4851 #if defined(LINUX_NATIVE_AIO)
4865 os_aio_linux_collect(
4876 struct io_event* events;
4877 struct io_context* io_ctx;
4880 ut_ad(array != NULL);
4881 ut_ad(seg_size > 0);
4882 ut_ad(segment < array->n_segments);
4885 events = &array->aio_events[segment * seg_size];
4888 io_ctx = array->aio_ctx[segment];
4891 start_pos = segment * seg_size;
4894 end_pos = start_pos + seg_size;
4900 memset(events, 0,
sizeof(*events) * seg_size);
4902 timeout.tv_nsec = OS_AIO_REAP_TIMEOUT;
4904 ret = io_getevents(io_ctx, 1, seg_size, events, &timeout);
4907 for (i = 0; i <
ret; i++) {
4909 struct iocb* control;
4911 control = (
struct iocb*) events[i].obj;
4912 ut_a(control != NULL);
4920 #if defined(UNIV_AIO_DEBUG)
4922 "io_getevents[%c]: slot[%p] ctx[%p]"
4924 (slot->
type == OS_FILE_WRITE) ?
'w' :
'r',
4925 slot, io_ctx, segment);
4937 slot->n_bytes = events[
i].res;
4938 slot->ret = events[
i].res2;
4969 " InnoDB: unexpected ret_code[%d] from io_getevents()!\n",
4984 os_aio_linux_handle(
5007 ut_a(global_seg != ULINT_UNDEFINED);
5010 segment = os_aio_get_array_and_local_segment(&array, global_seg);
5015 ibool any_reserved = FALSE;
5017 for (i = 0; i <
n; ++
i) {
5018 slot = os_aio_array_get_nth_slot(
5019 array, i + segment * n);
5026 any_reserved = TRUE;
5047 "waiting for completed aio requests");
5048 os_aio_linux_collect(array, segment, n);
5057 "processing completed aio requests");
5062 ut_ad(slot != NULL);
5071 if (slot->ret == 0 && slot->n_bytes == (
long) slot->
len) {
5084 os_file_handle_error(slot->
name,
"Linux aio");
5091 os_aio_array_free_slot(array, slot);
5105 ulint global_segment,
5121 ulint n_consecutive;
5128 byte* combined_buf2;
5135 *consecutive_ios = NULL;
5137 segment = os_aio_get_array_and_local_segment(&array, global_segment);
5144 "looking for i/o requests (a)");
5145 ut_ad(os_aio_validate_skip());
5146 ut_ad(segment < array->n_segments);
5152 if (array == os_aio_read_array
5153 && os_aio_recommend_sleep_for_read_threads) {
5158 goto recommended_sleep;
5162 "looking for i/o requests (b)");
5166 any_reserved = FALSE;
5170 for (ulint i = 0; i <
n; i++) {
5173 slot = os_aio_array_get_nth_slot(array, i + segment * n);
5181 "InnoDB: i/o for slot %lu"
5182 " already done, returning\n",
5190 any_reserved = TRUE;
5211 lowest_offset = IB_UINT64_MAX;
5213 for (ulint i = 0; i <
n; i++) {
5216 slot = os_aio_array_get_nth_slot(array, i + segment * n);
5220 age = (ulint) difftime(
5223 if ((age >= 2 && age > biggest_age)
5224 || (age >= 2 && age == biggest_age
5225 && slot->
offset < lowest_offset)) {
5228 consecutive_ios[0] = slot;
5233 lowest_offset = slot->
offset;
5238 if (n_consecutive == 0) {
5243 lowest_offset = IB_UINT64_MAX;
5245 for (ulint i = 0; i <
n; i++) {
5248 slot = os_aio_array_get_nth_slot(
5249 array, i + segment * n);
5254 consecutive_ios[0] = slot;
5258 lowest_offset = slot->
offset;
5263 if (n_consecutive == 0) {
5272 ut_ad(n_consecutive != 0);
5273 ut_ad(consecutive_ios[0] != NULL);
5275 aio_slot = consecutive_ios[0];
5280 for (ulint i = 0; i <
n; i++) {
5283 slot = os_aio_array_get_nth_slot(array, i + segment * n);
5293 consecutive_ios[n_consecutive] = slot;
5298 if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
5300 goto consecutive_loop;
5314 aio_slot = consecutive_ios[0];
5316 for (ulint i = 0; i < n_consecutive; i++) {
5317 total_len += consecutive_ios[
i]->
len;
5320 if (n_consecutive == 1) {
5322 combined_buf = aio_slot->
buf;
5323 combined_buf2 = NULL;
5325 combined_buf2 =
static_cast<byte*
>(
5328 ut_a(combined_buf2);
5330 combined_buf =
static_cast<byte*
>(
5331 ut_align(combined_buf2, UNIV_PAGE_SIZE));
5340 if (aio_slot->
type == OS_FILE_WRITE && n_consecutive > 1) {
5344 for (ulint i = 0; i < n_consecutive; i++) {
5346 ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
5347 consecutive_ios[i]->len);
5349 offs += consecutive_ios[
i]->
len;
5356 if (aio_slot->
type == OS_FILE_WRITE) {
5358 ret = os_file_write(
5359 aio_slot->
name, aio_slot->
file, combined_buf,
5360 aio_slot->
offset, total_len);
5363 aio_slot->
file, combined_buf,
5364 aio_slot->
offset, total_len);
5374 for (ulint i = 0; i < n_consecutive; i++) {
5376 ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
5377 consecutive_ios[i]->len);
5378 offs += consecutive_ios[
i]->
len;
5382 if (combined_buf2) {
5390 for (ulint i = 0; i < n_consecutive; i++) {
5405 *type = aio_slot->
type;
5409 os_aio_array_free_slot(array, aio_slot);
5426 os_event_wait(os_aio_segment_wait_events[global_segment]);
5436 os_aio_array_validate(
5441 ulint n_reserved = 0;
5448 for (i = 0; i < array->
n_slots; i++) {
5451 slot = os_aio_array_get_nth_slot(array, i);
5474 os_aio_array_validate(os_aio_read_array);
5476 if (os_aio_write_array != 0) {
5477 os_aio_array_validate(os_aio_write_array);
5480 if (os_aio_ibuf_array != 0) {
5481 os_aio_array_validate(os_aio_ibuf_array);
5484 if (os_aio_log_array != 0) {
5485 os_aio_array_validate(os_aio_log_array);
5488 if (os_aio_sync_array != 0) {
5489 os_aio_array_validate(os_aio_sync_array);
5502 os_aio_print_segment_info(
5518 fprintf(file,
" [");
5521 fprintf(file,
", ");
5524 fprintf(file,
"%lu", n_seg[i]);
5526 fprintf(file,
"] ");
5538 ulint n_reserved = 0;
5539 ulint n_res_seg[SRV_MAX_N_IO_THREADS];
5546 memset(n_res_seg, 0x0,
sizeof(n_res_seg));
5548 for (ulint i = 0; i < array->
n_slots; ++
i) {
5552 slot = os_aio_array_get_nth_slot(array, i);
5558 ++n_res_seg[seg_no];
5566 fprintf(file,
" %lu", (ulong) n_reserved);
5568 os_aio_print_segment_info(file, n_res_seg, array);
5581 time_t current_time;
5582 double time_elapsed;
5583 double avg_bytes_read;
5585 for (ulint i = 0; i < srv_n_file_io_threads; ++
i) {
5586 fprintf(file,
"I/O thread %lu state: %s (%s)",
5588 srv_io_thread_op_info[i],
5589 srv_io_thread_function[i]);
5592 if (os_aio_segment_wait_events[i]->is_set) {
5593 fprintf(file,
" ev set");
5597 fprintf(file,
"\n");
5600 fputs(
"Pending normal aio reads:", file);
5602 os_aio_print_array(file, os_aio_read_array);
5604 if (os_aio_write_array != 0) {
5605 fputs(
", aio writes:", file);
5606 os_aio_print_array(file, os_aio_write_array);
5609 if (os_aio_ibuf_array != 0) {
5610 fputs(
",\n ibuf aio reads:", file);
5611 os_aio_print_array(file, os_aio_ibuf_array);
5614 if (os_aio_log_array != 0) {
5615 fputs(
", log i/o's:", file);
5616 os_aio_print_array(file, os_aio_log_array);
5619 if (os_aio_sync_array != 0) {
5620 fputs(
", sync i/o's:", file);
5621 os_aio_print_array(file, os_aio_sync_array);
5626 time_elapsed = 0.001 + difftime(current_time, os_last_printout);
5629 "Pending flushes (fsync) log: %lu; buffer pool: %lu\n"
5630 "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
5633 (ulong) os_n_file_reads,
5634 (ulong) os_n_file_writes,
5635 (ulong) os_n_fsyncs);
5639 "%lu pending preads, %lu pending pwrites\n",
5644 if (os_n_file_reads == os_n_file_reads_old) {
5645 avg_bytes_read = 0.0;
5647 avg_bytes_read = (double) os_bytes_read_since_printout
5648 / (os_n_file_reads - os_n_file_reads_old);
5652 "%.2f reads/s, %lu avg bytes/read,"
5653 " %.2f writes/s, %.2f fsyncs/s\n",
5654 (os_n_file_reads - os_n_file_reads_old)
5656 (ulong) avg_bytes_read,
5657 (os_n_file_writes - os_n_file_writes_old)
5659 (os_n_fsyncs - os_n_fsyncs_old)
5662 os_n_file_reads_old = os_n_file_reads;
5663 os_n_file_writes_old = os_n_file_writes;
5664 os_n_fsyncs_old = os_n_fsyncs;
5665 os_bytes_read_since_printout = 0;
5667 os_last_printout = current_time;
5677 os_n_file_reads_old = os_n_file_reads;
5678 os_n_file_writes_old = os_n_file_writes;
5679 os_n_fsyncs_old = os_n_fsyncs;
5680 os_bytes_read_since_printout = 0;
5682 os_last_printout = time(NULL);
5692 os_aio_all_slots_free(
void)
5698 array = os_aio_read_array;
5707 ut_a(os_aio_write_array == 0);
5709 array = os_aio_write_array;
5717 ut_a(os_aio_ibuf_array == 0);
5719 array = os_aio_ibuf_array;
5728 ut_a(os_aio_log_array == 0);
5730 array = os_aio_log_array;
5738 array = os_aio_sync_array;