t/io_uring: only calculate per-file depth if we have files
[fio.git] / engines / windowsaio.c
CommitLineData
ecc314ba 1/*
03244c19
BC
2 * windowsaio engine
3 *
4 * IO engine using Windows IO Completion Ports.
ecc314ba
BC
5 */
6
ecc314ba
BC
7#include <stdio.h>
8#include <stdlib.h>
9#include <unistd.h>
10#include <signal.h>
11#include <errno.h>
ecc314ba
BC
12
13#include "../fio.h"
1388e473 14#include "../optgroup.h"
ecc314ba 15
ea4500d8
BC
16typedef BOOL (WINAPI *CANCELIOEX)(HANDLE hFile, LPOVERLAPPED lpOverlapped);
17
66c098b8
BC
18int geterrno_from_win_error (DWORD code, int deferrno);
19
67897036
BC
20struct fio_overlapped {
21 OVERLAPPED o;
22 struct io_u *io_u;
23 BOOL io_complete;
67897036 24};
ecc314ba 25
9b836561 26struct windowsaio_data {
9b836561 27 struct io_u **aio_events;
5a90bb5f 28 HANDLE iocp;
67897036 29 HANDLE iothread;
9b836561 30 HANDLE iocomplete_event;
67897036 31 BOOL iothread_running;
9b836561
BC
32};
33
ecc314ba 34struct thread_ctx {
9b836561 35 HANDLE iocp;
ecc314ba
BC
36 struct windowsaio_data *wd;
37};
38
1388e473 39struct windowsaio_options {
40 struct thread_data *td;
41 unsigned int no_completion_thread;
42};
43
44static struct fio_option options[] = {
45 {
46 .name = "no_completion_thread",
47 .lname = "No completion polling thread",
48 .type = FIO_OPT_STR_SET,
49 .off1 = offsetof(struct windowsaio_options, no_completion_thread),
50 .help = "Use to avoid separate completion polling thread",
51 .category = FIO_OPT_C_ENGINE,
52 .group = FIO_OPT_G_WINDOWSAIO,
53 },
54 {
55 .name = NULL,
56 },
57};
58
ecc314ba 59static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter);
ecc314ba 60
ecc314ba
BC
61static int fio_windowsaio_init(struct thread_data *td)
62{
ecc314ba 63 struct windowsaio_data *wd;
9b836561 64 int rc = 0;
ecc314ba 65
03244c19
BC
66 wd = calloc(1, sizeof(struct windowsaio_data));
67 if (wd == NULL) {
68 log_err("windowsaio: failed to allocate memory for engine data\n");
9b836561 69 rc = 1;
03244c19 70 }
ecc314ba 71
9b836561
BC
72 if (!rc) {
73 wd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u*));
03244c19
BC
74 if (wd->aio_events == NULL) {
75 log_err("windowsaio: failed to allocate memory for aio events list\n");
9b836561 76 rc = 1;
03244c19 77 }
e4db9fec
BC
78 }
79
9b836561
BC
80 if (!rc) {
81 /* Create an auto-reset event */
82 wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL);
03244c19
BC
83 if (wd->iocomplete_event == NULL) {
84 log_err("windowsaio: failed to create io complete event handle\n");
9b836561 85 rc = 1;
03244c19 86 }
9b836561
BC
87 }
88
9b836561 89 if (rc) {
9b836561 90 if (wd != NULL) {
9b836561
BC
91 if (wd->aio_events != NULL)
92 free(wd->aio_events);
93
94 free(wd);
95 }
96 }
ecc314ba 97
565e784d 98 td->io_ops_data = wd;
93bcfd20 99
5a90bb5f
BC
100 if (!rc) {
101 struct thread_ctx *ctx;
102 struct windowsaio_data *wd;
103 HANDLE hFile;
1388e473 104 struct windowsaio_options *o = td->eo;
5a90bb5f
BC
105
106 hFile = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
03244c19
BC
107 if (hFile == INVALID_HANDLE_VALUE) {
108 log_err("windowsaio: failed to create io completion port\n");
5a90bb5f 109 rc = 1;
03244c19 110 }
5a90bb5f 111
565e784d 112 wd = td->io_ops_data;
5a90bb5f
BC
113 wd->iothread_running = TRUE;
114 wd->iocp = hFile;
115
1388e473 116 if (o->no_completion_thread == 0) {
117 if (!rc)
118 ctx = malloc(sizeof(struct thread_ctx));
5a90bb5f 119
1388e473 120 if (!rc && ctx == NULL) {
121 log_err("windowsaio: failed to allocate memory for thread context structure\n");
122 CloseHandle(hFile);
123 rc = 1;
124 }
5a90bb5f 125
1388e473 126 if (!rc) {
127 DWORD threadid;
438bb1cf 128
1388e473 129 ctx->iocp = hFile;
130 ctx->wd = wd;
131 wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, &threadid);
132 if (!wd->iothread)
133 log_err("windowsaio: failed to create io completion thread\n");
134 else if (fio_option_is_set(&td->o, cpumask))
135 fio_setaffinity(threadid, td->o.cpumask);
136 }
137 if (rc || wd->iothread == NULL)
138 rc = 1;
5a90bb5f 139 }
5a90bb5f
BC
140 }
141
4e79098f 142 return rc;
ecc314ba
BC
143}
144
67897036
BC
145static void fio_windowsaio_cleanup(struct thread_data *td)
146{
67897036
BC
147 struct windowsaio_data *wd;
148
565e784d 149 wd = td->io_ops_data;
67897036
BC
150
151 if (wd != NULL) {
40c5db35
JA
152 wd->iothread_running = FALSE;
153 WaitForSingleObject(wd->iothread, INFINITE);
67897036
BC
154
155 CloseHandle(wd->iothread);
156 CloseHandle(wd->iocomplete_event);
157
67897036 158 free(wd->aio_events);
67897036
BC
159 free(wd);
160
565e784d 161 td->io_ops_data = NULL;
67897036
BC
162 }
163}
164
8300eba5
SW
165static int windowsaio_invalidate_cache(struct fio_file *f)
166{
167 DWORD error;
168 DWORD isharemode = (FILE_SHARE_DELETE | FILE_SHARE_READ |
1633aa61 169 FILE_SHARE_WRITE);
8300eba5
SW
170 HANDLE ihFile;
171 int rc = 0;
172
173 /*
174 * Encourage Windows to drop cached parts of a file by temporarily
175 * opening it for non-buffered access. Note: this will only work when
176 * the following is the only thing with the file open on the whole
177 * system.
178 */
179 dprint(FD_IO, "windowaio: attempt invalidate cache for %s\n",
180 f->file_name);
181 ihFile = CreateFile(f->file_name, 0, isharemode, NULL, OPEN_EXISTING,
182 FILE_FLAG_NO_BUFFERING, NULL);
183
184 if (ihFile != INVALID_HANDLE_VALUE) {
185 if (!CloseHandle(ihFile)) {
186 error = GetLastError();
4937100f
SW
187 log_info("windowsaio: invalidation fd close %s failed: error %lu\n",
188 f->file_name, error);
8300eba5
SW
189 rc = 1;
190 }
191 } else {
192 error = GetLastError();
193 if (error != ERROR_FILE_NOT_FOUND) {
4937100f
SW
194 log_info("windowsaio: cache invalidation of %s failed: error %lu\n",
195 f->file_name, error);
8300eba5
SW
196 rc = 1;
197 }
198 }
199
200 return rc;
201}
202
ecc314ba
BC
203static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f)
204{
205 int rc = 0;
4e79098f 206 DWORD flags = FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED;
ecc314ba
BC
207 DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE;
208 DWORD openmode = OPEN_ALWAYS;
209 DWORD access;
210
211 dprint(FD_FILE, "fd open %s\n", f->file_name);
212
ecc314ba 213 if (f->filetype == FIO_TYPE_PIPE) {
03244c19 214 log_err("windowsaio: pipes are not supported\n");
ecc314ba
BC
215 return 1;
216 }
217
218 if (!strcmp(f->file_name, "-")) {
03244c19 219 log_err("windowsaio: can't read/write to stdin/out\n");
ecc314ba
BC
220 return 1;
221 }
222
223 if (td->o.odirect)
224 flags |= FILE_FLAG_NO_BUFFERING;
225 if (td->o.sync_io)
226 flags |= FILE_FLAG_WRITE_THROUGH;
227
93bcfd20
BC
228 /*
229 * Inform Windows whether we're going to be doing sequential or
f20a86a7 230 * random IO so it can tune the Cache Manager
93bcfd20 231 */
f20a86a7
SW
232 switch (td->o.fadvise_hint) {
233 case F_ADV_TYPE:
234 if (td_random(td))
235 flags |= FILE_FLAG_RANDOM_ACCESS;
236 else
237 flags |= FILE_FLAG_SEQUENTIAL_SCAN;
238 break;
239 case F_ADV_RANDOM:
ecc314ba 240 flags |= FILE_FLAG_RANDOM_ACCESS;
f20a86a7
SW
241 break;
242 case F_ADV_SEQUENTIAL:
243 flags |= FILE_FLAG_SEQUENTIAL_SCAN;
244 break;
245 case F_ADV_NONE:
246 break;
247 default:
248 log_err("fio: unknown fadvise type %d\n", td->o.fadvise_hint);
249 }
ecc314ba 250
c3773c17 251 if ((!td_write(td) && !(td->flags & TD_F_SYNCS)) || read_only)
ecc314ba
BC
252 access = GENERIC_READ;
253 else
254 access = (GENERIC_READ | GENERIC_WRITE);
255
93bcfd20 256 if (td->o.create_on_open)
ecc314ba
BC
257 openmode = OPEN_ALWAYS;
258 else
259 openmode = OPEN_EXISTING;
260
8300eba5
SW
261 /* If we're going to use direct I/O, Windows will try and invalidate
262 * its cache at that point so there's no need to do it here */
3e9ae304 263 if (td->o.invalidate_cache && !td->o.odirect)
8300eba5 264 windowsaio_invalidate_cache(f);
8300eba5 265
ecc314ba
BC
266 f->hFile = CreateFile(f->file_name, access, sharemode,
267 NULL, openmode, flags, NULL);
268
03244c19
BC
269 if (f->hFile == INVALID_HANDLE_VALUE) {
270 log_err("windowsaio: failed to open file \"%s\"\n", f->file_name);
ecc314ba 271 rc = 1;
03244c19 272 }
ecc314ba 273
93bcfd20 274 /* Only set up the completion port and thread if we're not just
ecc314ba 275 * querying the device size */
565e784d 276 if (!rc && td->io_ops_data != NULL) {
40c5db35 277 struct windowsaio_data *wd;
ecc314ba 278
565e784d 279 wd = td->io_ops_data;
ecc314ba 280
03244c19
BC
281 if (CreateIoCompletionPort(f->hFile, wd->iocp, 0, 0) == NULL) {
282 log_err("windowsaio: failed to create io completion port\n");
ecc314ba 283 rc = 1;
03244c19 284 }
ecc314ba
BC
285 }
286
ecc314ba
BC
287 return rc;
288}
289
290static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f)
291{
4e79098f 292 int rc = 0;
1e42cc3d 293
9b836561
BC
294 dprint(FD_FILE, "fd close %s\n", f->file_name);
295
ecc314ba 296 if (f->hFile != INVALID_HANDLE_VALUE) {
03244c19
BC
297 if (!CloseHandle(f->hFile)) {
298 log_info("windowsaio: failed to close file handle for \"%s\"\n", f->file_name);
4e79098f 299 rc = 1;
03244c19 300 }
ecc314ba
BC
301 }
302
303 f->hFile = INVALID_HANDLE_VALUE;
4e79098f 304 return rc;
ecc314ba
BC
305}
306
67897036
BC
307static BOOL timeout_expired(DWORD start_count, DWORD end_count)
308{
309 BOOL expired = FALSE;
310 DWORD current_time;
311
312 current_time = GetTickCount();
313
314 if ((end_count > start_count) && current_time >= end_count)
315 expired = TRUE;
316 else if (current_time < start_count && current_time > end_count)
317 expired = TRUE;
318
319 return expired;
320}
321
322static struct io_u* fio_windowsaio_event(struct thread_data *td, int event)
323{
565e784d 324 struct windowsaio_data *wd = td->io_ops_data;
67897036
BC
325 return wd->aio_events[event];
326}
327
1388e473 328/* dequeue completion entrees directly (no separate completion thread) */
329static int fio_windowsaio_getevents_nothread(struct thread_data *td, unsigned int min,
330 unsigned int max, const struct timespec *t)
331{
332 struct windowsaio_data *wd = td->io_ops_data;
333 unsigned int dequeued = 0;
334 struct io_u *io_u;
335 DWORD start_count = 0;
336 DWORD end_count = 0;
337 DWORD mswait = 250;
338 struct fio_overlapped *fov;
339
340 if (t != NULL) {
341 mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
342 start_count = GetTickCount();
343 end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
344 }
345
346 do {
347 BOOL ret;
348 OVERLAPPED *ovl;
349
350 ULONG entries = min(16, max-dequeued);
351 OVERLAPPED_ENTRY oe[16];
352 ret = GetQueuedCompletionStatusEx(wd->iocp, oe, 16, &entries, mswait, 0);
353 if (ret && entries) {
354 int entry_num;
355
356 for (entry_num=0; entry_num<entries; entry_num++) {
357 ovl = oe[entry_num].lpOverlapped;
358 fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
359 io_u = fov->io_u;
360
361 if (ovl->Internal == ERROR_SUCCESS) {
362 io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
363 io_u->error = 0;
364 } else {
365 io_u->resid = io_u->xfer_buflen;
366 io_u->error = win_to_posix_error(GetLastError());
367 }
368
369 fov->io_complete = FALSE;
370 wd->aio_events[dequeued] = io_u;
371 dequeued++;
372 }
373 }
374
375 if (dequeued >= min ||
376 (t != NULL && timeout_expired(start_count, end_count)))
377 break;
378 } while (1);
379 return dequeued;
380}
381
382/* dequeue completion entrees creates by separate IoCompletionRoutine thread */
383static int fio_windowaio_getevents_thread(struct thread_data *td, unsigned int min,
384 unsigned int max, const struct timespec *t)
67897036 385{
565e784d 386 struct windowsaio_data *wd = td->io_ops_data;
67897036
BC
387 unsigned int dequeued = 0;
388 struct io_u *io_u;
7b2dfab1 389 int i;
67897036
BC
390 struct fio_overlapped *fov;
391 DWORD start_count = 0;
392 DWORD end_count = 0;
393 DWORD status;
394 DWORD mswait = 250;
395
396 if (t != NULL) {
397 mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
398 start_count = GetTickCount();
399 end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
400 }
401
402 do {
7b2dfab1
BC
403 io_u_qiter(&td->io_u_all, io_u, i) {
404 if (!(io_u->flags & IO_U_F_FLIGHT))
405 continue;
406
67897036
BC
407 fov = (struct fio_overlapped*)io_u->engine_data;
408
409 if (fov->io_complete) {
40c5db35 410 fov->io_complete = FALSE;
67897036
BC
411 wd->aio_events[dequeued] = io_u;
412 dequeued++;
413 }
67897036 414 }
77e7b330
JR
415 if (dequeued >= min)
416 break;
67897036 417
40c5db35 418 if (dequeued < min) {
67897036 419 status = WaitForSingleObject(wd->iocomplete_event, mswait);
f9a58c2a 420 if (status != WAIT_OBJECT_0 && dequeued >= min)
03244c19 421 break;
67897036
BC
422 }
423
1633aa61
JA
424 if (dequeued >= min ||
425 (t != NULL && timeout_expired(start_count, end_count)))
67897036
BC
426 break;
427 } while (1);
428
429 return dequeued;
430}
431
1388e473 432static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
433 unsigned int max, const struct timespec *t)
434{
435 struct windowsaio_options *o = td->eo;
436
437 if (o->no_completion_thread)
438 return fio_windowsaio_getevents_nothread(td, min, max, t);
439 return fio_windowaio_getevents_thread(td, min, max, t);
440}
441
2e4ef4fb
JA
442static enum fio_q_status fio_windowsaio_queue(struct thread_data *td,
443 struct io_u *io_u)
67897036 444{
c73ed246
JA
445 struct fio_overlapped *o = io_u->engine_data;
446 LPOVERLAPPED lpOvl = &o->o;
93bcfd20 447 BOOL success = FALSE;
67897036
BC
448 int rc = FIO_Q_COMPLETED;
449
450 fio_ro_check(td, io_u);
451
77e7b330 452 lpOvl->Internal = 0;
4e79098f
BC
453 lpOvl->InternalHigh = 0;
454 lpOvl->Offset = io_u->offset & 0xFFFFFFFF;
455 lpOvl->OffsetHigh = io_u->offset >> 32;
67897036
BC
456
457 switch (io_u->ddir) {
40c5db35 458 case DDIR_WRITE:
1633aa61
JA
459 success = WriteFile(io_u->file->hFile, io_u->xfer_buf,
460 io_u->xfer_buflen, NULL, lpOvl);
67897036
BC
461 break;
462 case DDIR_READ:
1633aa61
JA
463 success = ReadFile(io_u->file->hFile, io_u->xfer_buf,
464 io_u->xfer_buflen, NULL, lpOvl);
67897036
BC
465 break;
466 case DDIR_SYNC:
467 case DDIR_DATASYNC:
468 case DDIR_SYNC_FILE_RANGE:
469 success = FlushFileBuffers(io_u->file->hFile);
03244c19
BC
470 if (!success) {
471 log_err("windowsaio: failed to flush file buffers\n");
472 io_u->error = win_to_posix_error(GetLastError());
473 }
67897036
BC
474
475 return FIO_Q_COMPLETED;
67897036 476 case DDIR_TRIM:
03244c19 477 log_err("windowsaio: manual TRIM isn't supported on Windows\n");
67897036
BC
478 io_u->error = 1;
479 io_u->resid = io_u->xfer_buflen;
480 return FIO_Q_COMPLETED;
67897036
BC
481 default:
482 assert(0);
93bcfd20 483 break;
67897036
BC
484 }
485
40c5db35 486 if (success || GetLastError() == ERROR_IO_PENDING)
67897036 487 rc = FIO_Q_QUEUED;
40c5db35 488 else {
2277d5d5 489 io_u->error = win_to_posix_error(GetLastError());
67897036
BC
490 io_u->resid = io_u->xfer_buflen;
491 }
492
493 return rc;
494}
495
496/* Runs as a thread and waits for queued IO to complete */
497static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter)
498{
499 OVERLAPPED *ovl;
500 struct fio_overlapped *fov;
501 struct io_u *io_u;
502 struct windowsaio_data *wd;
503 struct thread_ctx *ctx;
504 ULONG_PTR ulKey = 0;
505 DWORD bytes;
506
507 ctx = (struct thread_ctx*)lpParameter;
508 wd = ctx->wd;
509
510 do {
1633aa61
JA
511 BOOL ret;
512
513 ret = GetQueuedCompletionStatus(ctx->iocp, &bytes, &ulKey,
514 &ovl, 250);
515 if (!ret && ovl == NULL)
67897036
BC
516 continue;
517
518 fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
519 io_u = fov->io_u;
520
521 if (ovl->Internal == ERROR_SUCCESS) {
522 io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
523 io_u->error = 0;
524 } else {
525 io_u->resid = io_u->xfer_buflen;
2277d5d5 526 io_u->error = win_to_posix_error(GetLastError());
67897036
BC
527 }
528
40c5db35 529 fov->io_complete = TRUE;
67897036
BC
530 SetEvent(wd->iocomplete_event);
531 } while (ctx->wd->iothread_running);
532
533 CloseHandle(ctx->iocp);
534 free(ctx);
535 return 0;
536}
537
c73ed246
JA
538static void fio_windowsaio_io_u_free(struct thread_data *td, struct io_u *io_u)
539{
540 struct fio_overlapped *o = io_u->engine_data;
541
542 if (o) {
c73ed246
JA
543 io_u->engine_data = NULL;
544 free(o);
545 }
546}
547
548static int fio_windowsaio_io_u_init(struct thread_data *td, struct io_u *io_u)
549{
550 struct fio_overlapped *o;
551
552 o = malloc(sizeof(*o));
b0106419 553 o->io_complete = FALSE;
c73ed246 554 o->io_u = io_u;
77e7b330 555 o->o.hEvent = NULL;
c73ed246
JA
556 io_u->engine_data = o;
557 return 0;
558}
559
ecc314ba
BC
560static struct ioengine_ops ioengine = {
561 .name = "windowsaio",
562 .version = FIO_IOOPS_VERSION,
563 .init = fio_windowsaio_init,
564 .queue = fio_windowsaio_queue,
ecc314ba
BC
565 .getevents = fio_windowsaio_getevents,
566 .event = fio_windowsaio_event,
567 .cleanup = fio_windowsaio_cleanup,
568 .open_file = fio_windowsaio_open_file,
569 .close_file = fio_windowsaio_close_file,
c73ed246
JA
570 .get_file_size = generic_get_file_size,
571 .io_u_init = fio_windowsaio_io_u_init,
572 .io_u_free = fio_windowsaio_io_u_free,
1388e473 573 .options = options,
574 .option_struct_size = sizeof(struct windowsaio_options),
ecc314ba
BC
575};
576
c874d188 577static void fio_init fio_windowsaio_register(void)
ecc314ba
BC
578{
579 register_ioengine(&ioengine);
580}
581
c874d188 582static void fio_exit fio_windowsaio_unregister(void)
ecc314ba
BC
583{
584 unregister_ioengine(&ioengine);
585}