4 * IO engine using Windows IO Completion Ports.
14 #include "../optgroup.h"
16 typedef BOOL (WINAPI *CANCELIOEX)(HANDLE hFile, LPOVERLAPPED lpOverlapped);
18 int geterrno_from_win_error (DWORD code, int deferrno);
20 struct fio_overlapped {
26 struct windowsaio_data {
27 struct io_u **aio_events;
30 HANDLE iocomplete_event;
31 BOOL iothread_running;
36 struct windowsaio_data *wd;
39 struct windowsaio_options {
40 struct thread_data *td;
41 unsigned int no_completion_thread;
44 static struct fio_option options[] = {
46 .name = "no_completion_thread",
47 .lname = "No completion polling thread",
48 .type = FIO_OPT_STR_SET,
49 .off1 = offsetof(struct windowsaio_options, no_completion_thread),
50 .help = "Use to avoid separate completion polling thread",
51 .category = FIO_OPT_C_ENGINE,
52 .group = FIO_OPT_G_WINDOWSAIO,
59 static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter);
61 static int fio_windowsaio_init(struct thread_data *td)
63 struct windowsaio_data *wd;
66 wd = calloc(1, sizeof(struct windowsaio_data));
68 log_err("windowsaio: failed to allocate memory for engine data\n");
73 wd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u*));
74 if (wd->aio_events == NULL) {
75 log_err("windowsaio: failed to allocate memory for aio events list\n");
81 /* Create an auto-reset event */
82 wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL);
83 if (wd->iocomplete_event == NULL) {
84 log_err("windowsaio: failed to create io complete event handle\n");
91 if (wd->aio_events != NULL)
101 struct thread_ctx *ctx;
102 struct windowsaio_data *wd;
104 struct windowsaio_options *o = td->eo;
106 hFile = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
107 if (hFile == INVALID_HANDLE_VALUE) {
108 log_err("windowsaio: failed to create io completion port\n");
112 wd = td->io_ops_data;
113 wd->iothread_running = TRUE;
116 if (o->no_completion_thread == 0) {
118 ctx = malloc(sizeof(struct thread_ctx));
120 if (!rc && ctx == NULL) {
121 log_err("windowsaio: failed to allocate memory for thread context structure\n");
131 wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, &threadid);
133 log_err("windowsaio: failed to create io completion thread\n");
134 else if (fio_option_is_set(&td->o, cpumask))
135 fio_setaffinity(threadid, td->o.cpumask);
137 if (rc || wd->iothread == NULL)
145 static void fio_windowsaio_cleanup(struct thread_data *td)
147 struct windowsaio_data *wd;
149 wd = td->io_ops_data;
152 wd->iothread_running = FALSE;
153 WaitForSingleObject(wd->iothread, INFINITE);
155 CloseHandle(wd->iothread);
156 CloseHandle(wd->iocomplete_event);
158 free(wd->aio_events);
161 td->io_ops_data = NULL;
165 static int windowsaio_invalidate_cache(struct fio_file *f)
168 DWORD isharemode = (FILE_SHARE_DELETE | FILE_SHARE_READ |
174 * Encourage Windows to drop cached parts of a file by temporarily
175 * opening it for non-buffered access. Note: this will only work when
176 * the following is the only thing with the file open on the whole
179 dprint(FD_IO, "windowaio: attempt invalidate cache for %s\n",
181 ihFile = CreateFile(f->file_name, 0, isharemode, NULL, OPEN_EXISTING,
182 FILE_FLAG_NO_BUFFERING, NULL);
184 if (ihFile != INVALID_HANDLE_VALUE) {
185 if (!CloseHandle(ihFile)) {
186 error = GetLastError();
187 log_info("windowsaio: invalidation fd close %s failed: error %lu\n",
188 f->file_name, error);
192 error = GetLastError();
193 if (error != ERROR_FILE_NOT_FOUND) {
194 log_info("windowsaio: cache invalidation of %s failed: error %lu\n",
195 f->file_name, error);
203 static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f)
206 DWORD flags = FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED;
207 DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE;
208 DWORD openmode = OPEN_ALWAYS;
211 dprint(FD_FILE, "fd open %s\n", f->file_name);
213 if (f->filetype == FIO_TYPE_PIPE) {
214 log_err("windowsaio: pipes are not supported\n");
218 if (!strcmp(f->file_name, "-")) {
219 log_err("windowsaio: can't read/write to stdin/out\n");
224 flags |= FILE_FLAG_NO_BUFFERING;
226 flags |= FILE_FLAG_WRITE_THROUGH;
229 * Inform Windows whether we're going to be doing sequential or
230 * random IO so it can tune the Cache Manager
232 switch (td->o.fadvise_hint) {
235 flags |= FILE_FLAG_RANDOM_ACCESS;
237 flags |= FILE_FLAG_SEQUENTIAL_SCAN;
240 flags |= FILE_FLAG_RANDOM_ACCESS;
242 case F_ADV_SEQUENTIAL:
243 flags |= FILE_FLAG_SEQUENTIAL_SCAN;
248 log_err("fio: unknown fadvise type %d\n", td->o.fadvise_hint);
251 if ((!td_write(td) && !(td->flags & TD_F_SYNCS)) || read_only)
252 access = GENERIC_READ;
254 access = (GENERIC_READ | GENERIC_WRITE);
256 if (td->o.create_on_open)
257 openmode = OPEN_ALWAYS;
259 openmode = OPEN_EXISTING;
261 /* If we're going to use direct I/O, Windows will try and invalidate
262 * its cache at that point so there's no need to do it here */
263 if (td->o.invalidate_cache && !td->o.odirect)
264 windowsaio_invalidate_cache(f);
266 f->hFile = CreateFile(f->file_name, access, sharemode,
267 NULL, openmode, flags, NULL);
269 if (f->hFile == INVALID_HANDLE_VALUE) {
270 log_err("windowsaio: failed to open file \"%s\"\n", f->file_name);
274 /* Only set up the completion port and thread if we're not just
275 * querying the device size */
276 if (!rc && td->io_ops_data != NULL) {
277 struct windowsaio_data *wd;
279 wd = td->io_ops_data;
281 if (CreateIoCompletionPort(f->hFile, wd->iocp, 0, 0) == NULL) {
282 log_err("windowsaio: failed to create io completion port\n");
290 static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f)
294 dprint(FD_FILE, "fd close %s\n", f->file_name);
296 if (f->hFile != INVALID_HANDLE_VALUE) {
297 if (!CloseHandle(f->hFile)) {
298 log_info("windowsaio: failed to close file handle for \"%s\"\n", f->file_name);
303 f->hFile = INVALID_HANDLE_VALUE;
307 static BOOL timeout_expired(DWORD start_count, DWORD end_count)
309 BOOL expired = FALSE;
312 current_time = GetTickCount();
314 if ((end_count > start_count) && current_time >= end_count)
316 else if (current_time < start_count && current_time > end_count)
322 static struct io_u* fio_windowsaio_event(struct thread_data *td, int event)
324 struct windowsaio_data *wd = td->io_ops_data;
325 return wd->aio_events[event];
328 /* dequeue completion entrees directly (no separate completion thread) */
329 static int fio_windowsaio_getevents_nothread(struct thread_data *td, unsigned int min,
330 unsigned int max, const struct timespec *t)
332 struct windowsaio_data *wd = td->io_ops_data;
333 unsigned int dequeued = 0;
335 DWORD start_count = 0;
338 struct fio_overlapped *fov;
341 mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
342 start_count = GetTickCount();
343 end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
350 ULONG entries = min(16, max-dequeued);
351 OVERLAPPED_ENTRY oe[16];
352 ret = GetQueuedCompletionStatusEx(wd->iocp, oe, 16, &entries, mswait, 0);
353 if (ret && entries) {
356 for (entry_num=0; entry_num<entries; entry_num++) {
357 ovl = oe[entry_num].lpOverlapped;
358 fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
361 if (ovl->Internal == ERROR_SUCCESS) {
362 io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
365 io_u->resid = io_u->xfer_buflen;
366 io_u->error = win_to_posix_error(GetLastError());
369 fov->io_complete = FALSE;
370 wd->aio_events[dequeued] = io_u;
375 if (dequeued >= min ||
376 (t != NULL && timeout_expired(start_count, end_count)))
382 /* dequeue completion entrees creates by separate IoCompletionRoutine thread */
383 static int fio_windowaio_getevents_thread(struct thread_data *td, unsigned int min,
384 unsigned int max, const struct timespec *t)
386 struct windowsaio_data *wd = td->io_ops_data;
387 unsigned int dequeued = 0;
390 struct fio_overlapped *fov;
391 DWORD start_count = 0;
397 mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
398 start_count = GetTickCount();
399 end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
403 io_u_qiter(&td->io_u_all, io_u, i) {
404 if (!(io_u->flags & IO_U_F_FLIGHT))
407 fov = (struct fio_overlapped*)io_u->engine_data;
409 if (fov->io_complete) {
410 fov->io_complete = FALSE;
411 wd->aio_events[dequeued] = io_u;
418 if (dequeued < min) {
419 status = WaitForSingleObject(wd->iocomplete_event, mswait);
420 if (status != WAIT_OBJECT_0 && dequeued >= min)
424 if (dequeued >= min ||
425 (t != NULL && timeout_expired(start_count, end_count)))
432 static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
433 unsigned int max, const struct timespec *t)
435 struct windowsaio_options *o = td->eo;
437 if (o->no_completion_thread)
438 return fio_windowsaio_getevents_nothread(td, min, max, t);
439 return fio_windowaio_getevents_thread(td, min, max, t);
442 static enum fio_q_status fio_windowsaio_queue(struct thread_data *td,
445 struct fio_overlapped *o = io_u->engine_data;
446 LPOVERLAPPED lpOvl = &o->o;
447 BOOL success = FALSE;
448 int rc = FIO_Q_COMPLETED;
450 fio_ro_check(td, io_u);
453 lpOvl->InternalHigh = 0;
454 lpOvl->Offset = io_u->offset & 0xFFFFFFFF;
455 lpOvl->OffsetHigh = io_u->offset >> 32;
457 switch (io_u->ddir) {
459 success = WriteFile(io_u->file->hFile, io_u->xfer_buf,
460 io_u->xfer_buflen, NULL, lpOvl);
463 success = ReadFile(io_u->file->hFile, io_u->xfer_buf,
464 io_u->xfer_buflen, NULL, lpOvl);
468 case DDIR_SYNC_FILE_RANGE:
469 success = FlushFileBuffers(io_u->file->hFile);
471 log_err("windowsaio: failed to flush file buffers\n");
472 io_u->error = win_to_posix_error(GetLastError());
475 return FIO_Q_COMPLETED;
477 log_err("windowsaio: manual TRIM isn't supported on Windows\n");
479 io_u->resid = io_u->xfer_buflen;
480 return FIO_Q_COMPLETED;
486 if (success || GetLastError() == ERROR_IO_PENDING)
489 io_u->error = win_to_posix_error(GetLastError());
490 io_u->resid = io_u->xfer_buflen;
496 /* Runs as a thread and waits for queued IO to complete */
497 static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter)
500 struct fio_overlapped *fov;
502 struct windowsaio_data *wd;
503 struct thread_ctx *ctx;
507 ctx = (struct thread_ctx*)lpParameter;
513 ret = GetQueuedCompletionStatus(ctx->iocp, &bytes, &ulKey,
515 if (!ret && ovl == NULL)
518 fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
521 if (ovl->Internal == ERROR_SUCCESS) {
522 io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
525 io_u->resid = io_u->xfer_buflen;
526 io_u->error = win_to_posix_error(GetLastError());
529 fov->io_complete = TRUE;
530 SetEvent(wd->iocomplete_event);
531 } while (ctx->wd->iothread_running);
533 CloseHandle(ctx->iocp);
538 static void fio_windowsaio_io_u_free(struct thread_data *td, struct io_u *io_u)
540 struct fio_overlapped *o = io_u->engine_data;
543 io_u->engine_data = NULL;
548 static int fio_windowsaio_io_u_init(struct thread_data *td, struct io_u *io_u)
550 struct fio_overlapped *o;
552 o = malloc(sizeof(*o));
553 o->io_complete = FALSE;
556 io_u->engine_data = o;
560 static struct ioengine_ops ioengine = {
561 .name = "windowsaio",
562 .version = FIO_IOOPS_VERSION,
563 .init = fio_windowsaio_init,
564 .queue = fio_windowsaio_queue,
565 .getevents = fio_windowsaio_getevents,
566 .event = fio_windowsaio_event,
567 .cleanup = fio_windowsaio_cleanup,
568 .open_file = fio_windowsaio_open_file,
569 .close_file = fio_windowsaio_close_file,
570 .get_file_size = generic_get_file_size,
571 .io_u_init = fio_windowsaio_io_u_init,
572 .io_u_free = fio_windowsaio_io_u_free,
574 .option_struct_size = sizeof(struct windowsaio_options),
577 static void fio_init fio_windowsaio_register(void)
579 register_ioengine(&ioengine);
582 static void fio_exit fio_windowsaio_unregister(void)
584 unregister_ioengine(&ioengine);