t/nvmept_trim: increase transfer size for some tests
[fio.git] / engines / windowsaio.c
... / ...
CommitLineData
1/*
2 * windowsaio engine
3 *
4 * IO engine using Windows IO Completion Ports.
5 */
6
7#include <stdio.h>
8#include <stdlib.h>
9#include <unistd.h>
10#include <signal.h>
11#include <errno.h>
12
13#include "../fio.h"
14#include "../optgroup.h"
15
16typedef BOOL (WINAPI *CANCELIOEX)(HANDLE hFile, LPOVERLAPPED lpOverlapped);
17
18int geterrno_from_win_error (DWORD code, int deferrno);
19
20struct fio_overlapped {
21 OVERLAPPED o;
22 struct io_u *io_u;
23 BOOL io_complete;
24};
25
26struct windowsaio_data {
27 struct io_u **aio_events;
28 HANDLE iocp;
29 HANDLE iothread;
30 HANDLE iocomplete_event;
31 BOOL iothread_running;
32};
33
34struct thread_ctx {
35 HANDLE iocp;
36 struct windowsaio_data *wd;
37};
38
39struct windowsaio_options {
40 struct thread_data *td;
41 unsigned int no_completion_thread;
42};
43
44static struct fio_option options[] = {
45 {
46 .name = "no_completion_thread",
47 .lname = "No completion polling thread",
48 .type = FIO_OPT_STR_SET,
49 .off1 = offsetof(struct windowsaio_options, no_completion_thread),
50 .help = "Use to avoid separate completion polling thread",
51 .category = FIO_OPT_C_ENGINE,
52 .group = FIO_OPT_G_WINDOWSAIO,
53 },
54 {
55 .name = NULL,
56 },
57};
58
59static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter);
60
61static int fio_windowsaio_init(struct thread_data *td)
62{
63 struct windowsaio_data *wd;
64 int rc = 0;
65
66 wd = calloc(1, sizeof(struct windowsaio_data));
67 if (wd == NULL) {
68 log_err("windowsaio: failed to allocate memory for engine data\n");
69 rc = 1;
70 }
71
72 if (!rc) {
73 wd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u*));
74 if (wd->aio_events == NULL) {
75 log_err("windowsaio: failed to allocate memory for aio events list\n");
76 rc = 1;
77 }
78 }
79
80 if (!rc) {
81 /* Create an auto-reset event */
82 wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL);
83 if (wd->iocomplete_event == NULL) {
84 log_err("windowsaio: failed to create io complete event handle\n");
85 rc = 1;
86 }
87 }
88
89 if (rc) {
90 if (wd != NULL) {
91 if (wd->aio_events != NULL)
92 free(wd->aio_events);
93
94 free(wd);
95 }
96 }
97
98 td->io_ops_data = wd;
99
100 if (!rc) {
101 struct thread_ctx *ctx;
102 struct windowsaio_data *wd;
103 HANDLE hFile;
104 struct windowsaio_options *o = td->eo;
105
106 hFile = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
107 if (hFile == INVALID_HANDLE_VALUE) {
108 log_err("windowsaio: failed to create io completion port\n");
109 rc = 1;
110 }
111
112 wd = td->io_ops_data;
113 wd->iothread_running = TRUE;
114 wd->iocp = hFile;
115
116 if (o->no_completion_thread == 0) {
117 if (!rc)
118 ctx = malloc(sizeof(struct thread_ctx));
119
120 if (!rc && ctx == NULL) {
121 log_err("windowsaio: failed to allocate memory for thread context structure\n");
122 CloseHandle(hFile);
123 rc = 1;
124 }
125
126 if (!rc) {
127 DWORD threadid;
128
129 ctx->iocp = hFile;
130 ctx->wd = wd;
131 wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, &threadid);
132 if (!wd->iothread)
133 log_err("windowsaio: failed to create io completion thread\n");
134 else if (fio_option_is_set(&td->o, cpumask))
135 fio_setaffinity(threadid, td->o.cpumask);
136 }
137 if (rc || wd->iothread == NULL)
138 rc = 1;
139 }
140 }
141
142 return rc;
143}
144
145static void fio_windowsaio_cleanup(struct thread_data *td)
146{
147 struct windowsaio_data *wd;
148
149 wd = td->io_ops_data;
150
151 if (wd != NULL) {
152 wd->iothread_running = FALSE;
153 WaitForSingleObject(wd->iothread, INFINITE);
154
155 CloseHandle(wd->iothread);
156 CloseHandle(wd->iocomplete_event);
157
158 free(wd->aio_events);
159 free(wd);
160
161 td->io_ops_data = NULL;
162 }
163}
164
165static int windowsaio_invalidate_cache(struct fio_file *f)
166{
167 DWORD error;
168 DWORD isharemode = (FILE_SHARE_DELETE | FILE_SHARE_READ |
169 FILE_SHARE_WRITE);
170 HANDLE ihFile;
171 int rc = 0;
172
173 /*
174 * Encourage Windows to drop cached parts of a file by temporarily
175 * opening it for non-buffered access. Note: this will only work when
176 * the following is the only thing with the file open on the whole
177 * system.
178 */
179 dprint(FD_IO, "windowaio: attempt invalidate cache for %s\n",
180 f->file_name);
181 ihFile = CreateFile(f->file_name, 0, isharemode, NULL, OPEN_EXISTING,
182 FILE_FLAG_NO_BUFFERING, NULL);
183
184 if (ihFile != INVALID_HANDLE_VALUE) {
185 if (!CloseHandle(ihFile)) {
186 error = GetLastError();
187 log_info("windowsaio: invalidation fd close %s failed: error %lu\n",
188 f->file_name, error);
189 rc = 1;
190 }
191 } else {
192 error = GetLastError();
193 if (error != ERROR_FILE_NOT_FOUND) {
194 log_info("windowsaio: cache invalidation of %s failed: error %lu\n",
195 f->file_name, error);
196 rc = 1;
197 }
198 }
199
200 return rc;
201}
202
203static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f)
204{
205 int rc = 0;
206 DWORD flags = FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED;
207 DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE;
208 DWORD openmode = OPEN_ALWAYS;
209 DWORD access;
210
211 dprint(FD_FILE, "fd open %s\n", f->file_name);
212
213 if (f->filetype == FIO_TYPE_PIPE) {
214 log_err("windowsaio: pipes are not supported\n");
215 return 1;
216 }
217
218 if (!strcmp(f->file_name, "-")) {
219 log_err("windowsaio: can't read/write to stdin/out\n");
220 return 1;
221 }
222
223 if (td->o.odirect)
224 flags |= FILE_FLAG_NO_BUFFERING;
225 if (td->o.sync_io)
226 flags |= FILE_FLAG_WRITE_THROUGH;
227
228 /*
229 * Inform Windows whether we're going to be doing sequential or
230 * random IO so it can tune the Cache Manager
231 */
232 switch (td->o.fadvise_hint) {
233 case F_ADV_TYPE:
234 if (td_random(td))
235 flags |= FILE_FLAG_RANDOM_ACCESS;
236 else
237 flags |= FILE_FLAG_SEQUENTIAL_SCAN;
238 break;
239 case F_ADV_RANDOM:
240 flags |= FILE_FLAG_RANDOM_ACCESS;
241 break;
242 case F_ADV_SEQUENTIAL:
243 flags |= FILE_FLAG_SEQUENTIAL_SCAN;
244 break;
245 case F_ADV_NONE:
246 break;
247 default:
248 log_err("fio: unknown fadvise type %d\n", td->o.fadvise_hint);
249 }
250
251 if ((!td_write(td) && !(td->flags & TD_F_SYNCS)) || read_only)
252 access = GENERIC_READ;
253 else
254 access = (GENERIC_READ | GENERIC_WRITE);
255
256 if (td->o.create_on_open)
257 openmode = OPEN_ALWAYS;
258 else
259 openmode = OPEN_EXISTING;
260
261 /* If we're going to use direct I/O, Windows will try and invalidate
262 * its cache at that point so there's no need to do it here */
263 if (td->o.invalidate_cache && !td->o.odirect)
264 windowsaio_invalidate_cache(f);
265
266 f->hFile = CreateFile(f->file_name, access, sharemode,
267 NULL, openmode, flags, NULL);
268
269 if (f->hFile == INVALID_HANDLE_VALUE) {
270 log_err("windowsaio: failed to open file \"%s\"\n", f->file_name);
271 rc = 1;
272 }
273
274 /* Only set up the completion port and thread if we're not just
275 * querying the device size */
276 if (!rc && td->io_ops_data != NULL) {
277 struct windowsaio_data *wd;
278
279 wd = td->io_ops_data;
280
281 if (CreateIoCompletionPort(f->hFile, wd->iocp, 0, 0) == NULL) {
282 log_err("windowsaio: failed to create io completion port\n");
283 rc = 1;
284 }
285 }
286
287 return rc;
288}
289
290static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f)
291{
292 int rc = 0;
293
294 dprint(FD_FILE, "fd close %s\n", f->file_name);
295
296 if (f->hFile != INVALID_HANDLE_VALUE) {
297 if (!CloseHandle(f->hFile)) {
298 log_info("windowsaio: failed to close file handle for \"%s\"\n", f->file_name);
299 rc = 1;
300 }
301 }
302
303 f->hFile = INVALID_HANDLE_VALUE;
304 return rc;
305}
306
307static BOOL timeout_expired(DWORD start_count, DWORD end_count)
308{
309 BOOL expired = FALSE;
310 DWORD current_time;
311
312 current_time = GetTickCount();
313
314 if ((end_count > start_count) && current_time >= end_count)
315 expired = TRUE;
316 else if (current_time < start_count && current_time > end_count)
317 expired = TRUE;
318
319 return expired;
320}
321
322static struct io_u* fio_windowsaio_event(struct thread_data *td, int event)
323{
324 struct windowsaio_data *wd = td->io_ops_data;
325 return wd->aio_events[event];
326}
327
328/* dequeue completion entrees directly (no separate completion thread) */
329static int fio_windowsaio_getevents_nothread(struct thread_data *td, unsigned int min,
330 unsigned int max, const struct timespec *t)
331{
332 struct windowsaio_data *wd = td->io_ops_data;
333 unsigned int dequeued = 0;
334 struct io_u *io_u;
335 DWORD start_count = 0;
336 DWORD end_count = 0;
337 DWORD mswait = 250;
338 struct fio_overlapped *fov;
339
340 if (t != NULL) {
341 mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
342 start_count = GetTickCount();
343 end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
344 }
345
346 do {
347 BOOL ret;
348 OVERLAPPED *ovl;
349
350 ULONG entries = min(16, max-dequeued);
351 OVERLAPPED_ENTRY oe[16];
352 ret = GetQueuedCompletionStatusEx(wd->iocp, oe, 16, &entries, mswait, 0);
353 if (ret && entries) {
354 int entry_num;
355
356 for (entry_num=0; entry_num<entries; entry_num++) {
357 ovl = oe[entry_num].lpOverlapped;
358 fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
359 io_u = fov->io_u;
360
361 if (ovl->Internal == ERROR_SUCCESS) {
362 io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
363 io_u->error = 0;
364 } else {
365 io_u->resid = io_u->xfer_buflen;
366 io_u->error = win_to_posix_error(GetLastError());
367 }
368
369 fov->io_complete = FALSE;
370 wd->aio_events[dequeued] = io_u;
371 dequeued++;
372 }
373 }
374
375 if (dequeued >= min ||
376 (t != NULL && timeout_expired(start_count, end_count)))
377 break;
378 } while (1);
379 return dequeued;
380}
381
382/* dequeue completion entrees creates by separate IoCompletionRoutine thread */
383static int fio_windowaio_getevents_thread(struct thread_data *td, unsigned int min,
384 unsigned int max, const struct timespec *t)
385{
386 struct windowsaio_data *wd = td->io_ops_data;
387 unsigned int dequeued = 0;
388 struct io_u *io_u;
389 int i;
390 struct fio_overlapped *fov;
391 DWORD start_count = 0;
392 DWORD end_count = 0;
393 DWORD status;
394 DWORD mswait = 250;
395
396 if (t != NULL) {
397 mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
398 start_count = GetTickCount();
399 end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
400 }
401
402 do {
403 io_u_qiter(&td->io_u_all, io_u, i) {
404 if (!(io_u->flags & IO_U_F_FLIGHT))
405 continue;
406
407 fov = (struct fio_overlapped*)io_u->engine_data;
408
409 if (fov->io_complete) {
410 fov->io_complete = FALSE;
411 wd->aio_events[dequeued] = io_u;
412 dequeued++;
413 }
414 }
415 if (dequeued >= min)
416 break;
417
418 if (dequeued < min) {
419 status = WaitForSingleObject(wd->iocomplete_event, mswait);
420 if (status != WAIT_OBJECT_0 && dequeued >= min)
421 break;
422 }
423
424 if (dequeued >= min ||
425 (t != NULL && timeout_expired(start_count, end_count)))
426 break;
427 } while (1);
428
429 return dequeued;
430}
431
432static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
433 unsigned int max, const struct timespec *t)
434{
435 struct windowsaio_options *o = td->eo;
436
437 if (o->no_completion_thread)
438 return fio_windowsaio_getevents_nothread(td, min, max, t);
439 return fio_windowaio_getevents_thread(td, min, max, t);
440}
441
442static enum fio_q_status fio_windowsaio_queue(struct thread_data *td,
443 struct io_u *io_u)
444{
445 struct fio_overlapped *o = io_u->engine_data;
446 LPOVERLAPPED lpOvl = &o->o;
447 BOOL success = FALSE;
448 int rc = FIO_Q_COMPLETED;
449
450 fio_ro_check(td, io_u);
451
452 lpOvl->Internal = 0;
453 lpOvl->InternalHigh = 0;
454 lpOvl->Offset = io_u->offset & 0xFFFFFFFF;
455 lpOvl->OffsetHigh = io_u->offset >> 32;
456
457 switch (io_u->ddir) {
458 case DDIR_WRITE:
459 success = WriteFile(io_u->file->hFile, io_u->xfer_buf,
460 io_u->xfer_buflen, NULL, lpOvl);
461 break;
462 case DDIR_READ:
463 success = ReadFile(io_u->file->hFile, io_u->xfer_buf,
464 io_u->xfer_buflen, NULL, lpOvl);
465 break;
466 case DDIR_SYNC:
467 case DDIR_DATASYNC:
468 case DDIR_SYNC_FILE_RANGE:
469 success = FlushFileBuffers(io_u->file->hFile);
470 if (!success) {
471 log_err("windowsaio: failed to flush file buffers\n");
472 io_u->error = win_to_posix_error(GetLastError());
473 }
474
475 return FIO_Q_COMPLETED;
476 case DDIR_TRIM:
477 log_err("windowsaio: manual TRIM isn't supported on Windows\n");
478 io_u->error = 1;
479 io_u->resid = io_u->xfer_buflen;
480 return FIO_Q_COMPLETED;
481 default:
482 assert(0);
483 break;
484 }
485
486 if (success || GetLastError() == ERROR_IO_PENDING)
487 rc = FIO_Q_QUEUED;
488 else {
489 io_u->error = win_to_posix_error(GetLastError());
490 io_u->resid = io_u->xfer_buflen;
491 }
492
493 return rc;
494}
495
496/* Runs as a thread and waits for queued IO to complete */
497static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter)
498{
499 OVERLAPPED *ovl;
500 struct fio_overlapped *fov;
501 struct io_u *io_u;
502 struct windowsaio_data *wd;
503 struct thread_ctx *ctx;
504 ULONG_PTR ulKey = 0;
505 DWORD bytes;
506
507 ctx = (struct thread_ctx*)lpParameter;
508 wd = ctx->wd;
509
510 do {
511 BOOL ret;
512
513 ret = GetQueuedCompletionStatus(ctx->iocp, &bytes, &ulKey,
514 &ovl, 250);
515 if (!ret && ovl == NULL)
516 continue;
517
518 fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
519 io_u = fov->io_u;
520
521 if (ovl->Internal == ERROR_SUCCESS) {
522 io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
523 io_u->error = 0;
524 } else {
525 io_u->resid = io_u->xfer_buflen;
526 io_u->error = win_to_posix_error(GetLastError());
527 }
528
529 fov->io_complete = TRUE;
530 SetEvent(wd->iocomplete_event);
531 } while (ctx->wd->iothread_running);
532
533 CloseHandle(ctx->iocp);
534 free(ctx);
535 return 0;
536}
537
538static void fio_windowsaio_io_u_free(struct thread_data *td, struct io_u *io_u)
539{
540 struct fio_overlapped *o = io_u->engine_data;
541
542 if (o) {
543 io_u->engine_data = NULL;
544 free(o);
545 }
546}
547
548static int fio_windowsaio_io_u_init(struct thread_data *td, struct io_u *io_u)
549{
550 struct fio_overlapped *o;
551
552 o = malloc(sizeof(*o));
553 o->io_complete = FALSE;
554 o->io_u = io_u;
555 o->o.hEvent = NULL;
556 io_u->engine_data = o;
557 return 0;
558}
559
560static struct ioengine_ops ioengine = {
561 .name = "windowsaio",
562 .version = FIO_IOOPS_VERSION,
563 .init = fio_windowsaio_init,
564 .queue = fio_windowsaio_queue,
565 .getevents = fio_windowsaio_getevents,
566 .event = fio_windowsaio_event,
567 .cleanup = fio_windowsaio_cleanup,
568 .open_file = fio_windowsaio_open_file,
569 .close_file = fio_windowsaio_close_file,
570 .get_file_size = generic_get_file_size,
571 .io_u_init = fio_windowsaio_io_u_init,
572 .io_u_free = fio_windowsaio_io_u_free,
573 .options = options,
574 .option_struct_size = sizeof(struct windowsaio_options),
575};
576
577static void fio_init fio_windowsaio_register(void)
578{
579 register_ioengine(&ioengine);
580}
581
582static void fio_exit fio_windowsaio_unregister(void)
583{
584 unregister_ioengine(&ioengine);
585}