Fix integer overflow in calculating large IOPS on 32-bit platforms
[fio.git] / engines / windowsaio.c
... / ...
CommitLineData
1/*
2 * Native Windows async IO engine
3 * Copyright (C) 2011 Bruce Cran <bruce@cran.org.uk>
4 */
5
6#include <stdio.h>
7#include <stdlib.h>
8#include <unistd.h>
9#include <signal.h>
10#include <errno.h>
11#include <windows.h>
12
13#include "../fio.h"
14
15typedef BOOL (WINAPI *CANCELIOEX)(HANDLE hFile, LPOVERLAPPED lpOverlapped);
16
17struct fio_overlapped {
18 OVERLAPPED o;
19 struct io_u *io_u;
20 BOOL io_complete;
21 BOOL io_free;
22};
23
24struct windowsaio_data {
25 struct fio_overlapped *ovls;
26 struct io_u **aio_events;
27 HANDLE iothread;
28 HANDLE iocomplete_event;
29 CANCELIOEX pCancelIoEx;
30 BOOL iothread_running;
31 BOOL use_iocp;
32};
33
34struct thread_ctx {
35 HANDLE iocp;
36 struct windowsaio_data *wd;
37};
38
39static void PrintError(LPCSTR lpszFunction);
40static int fio_windowsaio_cancel(struct thread_data *td,
41 struct io_u *io_u);
42static BOOL timeout_expired(DWORD start_count, DWORD end_count);
43static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
44 unsigned int max, struct timespec *t);
45static struct io_u *fio_windowsaio_event(struct thread_data *td, int event);
46static int fio_windowsaio_queue(struct thread_data *td,
47 struct io_u *io_u);
48static void fio_windowsaio_cleanup(struct thread_data *td);
49static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter);
50static int fio_windowsaio_init(struct thread_data *td);
51static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f);
52static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f);
53
54int sync_file_range(int fd, off64_t offset, off64_t nbytes,
55 unsigned int flags)
56{
57 errno = ENOSYS;
58 return -1;
59}
60
61static void PrintError(LPCSTR lpszFunction)
62{
63 // Retrieve the system error message for the last-error code
64
65 LPSTR lpMsgBuf;
66 DWORD dw = GetLastError();
67
68 FormatMessage(
69 FORMAT_MESSAGE_ALLOCATE_BUFFER |
70 FORMAT_MESSAGE_FROM_SYSTEM |
71 FORMAT_MESSAGE_IGNORE_INSERTS,
72 NULL,
73 dw,
74 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
75 (LPTSTR)&lpMsgBuf,
76 0, NULL );
77
78 log_err("%s - %s", lpszFunction, lpMsgBuf);
79 LocalFree(lpMsgBuf);
80}
81
82static int fio_windowsaio_init(struct thread_data *td)
83{
84 struct windowsaio_data *wd;
85 HANDLE hKernel32Dll;
86 int rc = 0;
87 int i;
88
89 wd = malloc(sizeof(struct windowsaio_data));
90 if (wd != NULL)
91 ZeroMemory(wd, sizeof(struct windowsaio_data));
92 else
93 rc = 1;
94
95 if (!rc) {
96 wd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u*));
97 if (wd->aio_events == NULL)
98 rc = 1;
99 }
100
101 if (!rc) {
102 wd->ovls = malloc(td->o.iodepth * sizeof(struct fio_overlapped));
103 if (wd->ovls == NULL)
104 rc = 1;
105 }
106
107 if (!rc) {
108 for (i = 0; i < td->o.iodepth; i++) {
109 wd->ovls[i].io_free = TRUE;
110 wd->ovls[i].io_complete = FALSE;
111
112 wd->ovls[i].o.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
113 if (wd->ovls[i].o.hEvent == NULL) {
114 rc = 1;
115 break;
116 }
117 }
118 }
119
120 if (!rc) {
121 /* Create an auto-reset event */
122 wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL);
123 if (wd->iocomplete_event == NULL)
124 rc = 1;
125 }
126
127 if (rc) {
128 PrintError(__func__);
129 if (wd != NULL) {
130 if (wd->ovls != NULL)
131 free(wd->ovls);
132 if (wd->aio_events != NULL)
133 free(wd->aio_events);
134
135 free(wd);
136 }
137 }
138
139 hKernel32Dll = GetModuleHandle("kernel32.dll");
140 wd->pCancelIoEx = GetProcAddress(hKernel32Dll, "CancelIoEx");
141
142 td->io_ops->data = wd;
143 return 0;
144}
145
146static void fio_windowsaio_cleanup(struct thread_data *td)
147{
148 int i;
149 struct windowsaio_data *wd;
150
151 wd = td->io_ops->data;
152
153 if (wd != NULL) {
154 wd->iothread_running = FALSE;
155 WaitForSingleObject(wd->iothread, INFINITE);
156
157 CloseHandle(wd->iothread);
158 CloseHandle(wd->iocomplete_event);
159
160 for (i = 0; i < td->o.iodepth; i++) {
161 CloseHandle(wd->ovls[i].o.hEvent);
162 }
163
164 free(wd->aio_events);
165 free(wd->ovls);
166 free(wd);
167
168 td->io_ops->data = NULL;
169 }
170}
171
172
173static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f)
174{
175 int rc = 0;
176 HANDLE hFile;
177 DWORD flags = FILE_FLAG_POSIX_SEMANTICS;
178 DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE;
179 DWORD openmode = OPEN_ALWAYS;
180 DWORD access;
181
182 dprint(FD_FILE, "fd open %s\n", f->file_name);
183
184 if (f->filetype == FIO_TYPE_PIPE) {
185 log_err("fio: windowsaio doesn't support pipes\n");
186 return 1;
187 }
188
189 if (!strcmp(f->file_name, "-")) {
190 log_err("fio: can't read/write to stdin/out\n");
191 return 1;
192 }
193
194 if (!td->o.odirect && !td->o.sync_io && td->io_ops->data != NULL)
195 flags |= FILE_FLAG_OVERLAPPED;
196
197 if (td->o.odirect)
198 flags |= FILE_FLAG_NO_BUFFERING;
199 if (td->o.sync_io)
200 flags |= FILE_FLAG_WRITE_THROUGH;
201
202
203 if (td->o.td_ddir == TD_DDIR_READ ||
204 td->o.td_ddir == TD_DDIR_WRITE)
205 flags |= FILE_FLAG_SEQUENTIAL_SCAN;
206 else
207 flags |= FILE_FLAG_RANDOM_ACCESS;
208
209 if (!td_write(td) || read_only)
210 access = GENERIC_READ;
211 else
212 access = (GENERIC_READ | GENERIC_WRITE);
213
214 if (td->o.create_on_open > 0)
215 openmode = OPEN_ALWAYS;
216 else
217 openmode = OPEN_EXISTING;
218
219 f->hFile = CreateFile(f->file_name, access, sharemode,
220 NULL, openmode, flags, NULL);
221
222 if (f->hFile == INVALID_HANDLE_VALUE) {
223 PrintError(__func__);
224 rc = 1;
225 }
226
227 /* Only set up the competion port and thread if we're not just
228 * querying the device size */
229 if (!rc && td->io_ops->data != NULL && !td->o.odirect && !td->o.sync_io) {
230 struct thread_ctx *ctx;
231 struct windowsaio_data *wd;
232 hFile = CreateIoCompletionPort(f->hFile, NULL, 0, 0);
233
234 wd = td->io_ops->data;
235
236 if (!td->o.odirect && !td->o.sync_io)
237 wd->use_iocp = 1;
238 else
239 wd->use_iocp = 0;
240
241 wd->iothread_running = TRUE;
242
243 if (!rc) {
244 ctx = malloc(sizeof(struct thread_ctx));
245 ctx->iocp = hFile;
246 ctx->wd = wd;
247
248 wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, NULL);
249 }
250
251 if (rc || wd->iothread == NULL) {
252 PrintError(__func__);
253 rc = 1;
254 }
255 }
256
257 return rc;
258}
259
260static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f)
261{
262 dprint(FD_FILE, "fd close %s\n", f->file_name);
263
264 if (f->hFile != INVALID_HANDLE_VALUE) {
265 if (!CloseHandle(f->hFile))
266 PrintError(__func__);
267 }
268
269 f->hFile = INVALID_HANDLE_VALUE;
270 return 0;
271}
272
273static BOOL timeout_expired(DWORD start_count, DWORD end_count)
274{
275 BOOL expired = FALSE;
276 DWORD current_time;
277
278 current_time = GetTickCount();
279
280 if ((end_count > start_count) && current_time >= end_count)
281 expired = TRUE;
282 else if (current_time < start_count && current_time > end_count)
283 expired = TRUE;
284
285 return expired;
286}
287
288static struct io_u* fio_windowsaio_event(struct thread_data *td, int event)
289{
290 struct windowsaio_data *wd = td->io_ops->data;
291 return wd->aio_events[event];
292}
293
294static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
295 unsigned int max, struct timespec *t)
296{
297 struct windowsaio_data *wd = td->io_ops->data;
298 struct flist_head *entry;
299 unsigned int dequeued = 0;
300 struct io_u *io_u;
301 struct fio_overlapped *fov;
302 DWORD start_count = 0;
303 DWORD end_count = 0;
304 DWORD status;
305 DWORD mswait = 250;
306
307 if (t != NULL) {
308 mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
309 start_count = GetTickCount();
310 end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
311 }
312
313 do {
314 flist_for_each(entry, &td->io_u_busylist) {
315 io_u = flist_entry(entry, struct io_u, list);
316 fov = (struct fio_overlapped*)io_u->engine_data;
317
318 if (fov->io_complete) {
319 fov->io_complete = FALSE;
320 fov->io_free = TRUE;
321 wd->aio_events[dequeued] = io_u;
322 dequeued++;
323 }
324
325 if (dequeued >= min)
326 break;
327 }
328
329 if (dequeued < min) {
330 status = WaitForSingleObject(wd->iocomplete_event, mswait);
331 if (status != WAIT_OBJECT_0 && dequeued > 0)
332 break;
333 }
334
335 if (dequeued >= min || (t != NULL && timeout_expired(start_count, end_count)))
336 break;
337 } while (1);
338
339 return dequeued;
340}
341
342static int fio_windowsaio_queue(struct thread_data *td,
343 struct io_u *io_u)
344{
345 LPOVERLAPPED lpOvl = NULL;
346 struct windowsaio_data *wd;
347 DWORD iobytes;
348 BOOL success;
349 int index;
350 int rc = FIO_Q_COMPLETED;
351
352 fio_ro_check(td, io_u);
353
354 wd = td->io_ops->data;
355
356 if (wd->use_iocp) {
357 for (index = 0; index < td->o.iodepth; index++) {
358 if (wd->ovls[index].io_free) {
359 wd->ovls[index].io_free = FALSE;
360 ResetEvent(wd->ovls[index].o.hEvent);
361 break;
362 }
363 }
364
365 assert(index < td->o.iodepth);
366
367 lpOvl = &wd->ovls[index].o;
368 wd->ovls[index].io_u = io_u;
369 lpOvl->Internal = STATUS_PENDING;
370 lpOvl->InternalHigh = 0;
371 lpOvl->Offset = io_u->offset & 0xFFFFFFFF;
372 lpOvl->OffsetHigh = io_u->offset >> 32;
373 lpOvl->Pointer = NULL;
374 io_u->engine_data = &wd->ovls[index];
375 }
376
377 switch (io_u->ddir) {
378 case DDIR_WRITE:
379 success = WriteFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &iobytes, lpOvl);
380 break;
381 case DDIR_READ:
382 success = ReadFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &iobytes, lpOvl);
383 break;
384 case DDIR_SYNC:
385 case DDIR_DATASYNC:
386 case DDIR_SYNC_FILE_RANGE:
387 success = FlushFileBuffers(io_u->file->hFile);
388 if (!success)
389 io_u->error = GetLastError();
390
391 return FIO_Q_COMPLETED;
392 break;
393 case DDIR_TRIM:
394 log_err("manual TRIM isn't supported on Windows");
395 io_u->error = 1;
396 io_u->resid = io_u->xfer_buflen;
397 return FIO_Q_COMPLETED;
398 break;
399 default:
400 assert(0);
401 }
402
403 if (wd->use_iocp && (success || GetLastError() == ERROR_IO_PENDING)) {
404 rc = FIO_Q_QUEUED;
405 } else if (success && !wd->use_iocp) {
406 io_u->resid = io_u->xfer_buflen - iobytes;
407 io_u->error = 0;
408 } else {
409 PrintError(__func__);
410 io_u->error = GetLastError();
411 io_u->resid = io_u->xfer_buflen;
412 }
413
414 return rc;
415}
416
417/* Runs as a thread and waits for queued IO to complete */
418static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter)
419{
420 OVERLAPPED *ovl;
421 struct fio_overlapped *fov;
422 struct io_u *io_u;
423 struct windowsaio_data *wd;
424 struct thread_ctx *ctx;
425 ULONG_PTR ulKey = 0;
426 DWORD bytes;
427
428 ctx = (struct thread_ctx*)lpParameter;
429 wd = ctx->wd;
430
431 do {
432 if (!GetQueuedCompletionStatus(ctx->iocp, &bytes, &ulKey, &ovl, 250))
433 continue;
434
435 fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
436 io_u = fov->io_u;
437
438 if (ovl->Internal == ERROR_SUCCESS) {
439 io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
440 io_u->error = 0;
441 } else {
442 io_u->resid = io_u->xfer_buflen;
443 io_u->error = ovl->Internal;
444 }
445
446 fov->io_complete = TRUE;
447 SetEvent(wd->iocomplete_event);
448 } while (ctx->wd->iothread_running);
449
450 CloseHandle(ctx->iocp);
451 free(ctx);
452 return 0;
453}
454
455static int fio_windowsaio_cancel(struct thread_data *td,
456 struct io_u *io_u)
457{
458 int rc = 0;
459
460 struct windowsaio_data *wd = td->io_ops->data;
461
462 /* If we're running on Vista or newer, we can cancel individual IO requests */
463 if (wd->pCancelIoEx != NULL) {
464 struct fio_overlapped *ovl = io_u->engine_data;
465 if (!wd->pCancelIoEx(io_u->file->hFile, &ovl->o))
466 rc = 1;
467 } else
468 rc = 1;
469
470 return rc;
471}
472
473static struct ioengine_ops ioengine = {
474 .name = "windowsaio",
475 .version = FIO_IOOPS_VERSION,
476 .init = fio_windowsaio_init,
477 .queue = fio_windowsaio_queue,
478 .cancel = fio_windowsaio_cancel,
479 .getevents = fio_windowsaio_getevents,
480 .event = fio_windowsaio_event,
481 .cleanup = fio_windowsaio_cleanup,
482 .open_file = fio_windowsaio_open_file,
483 .close_file = fio_windowsaio_close_file,
484 .get_file_size = generic_get_file_size
485};
486
487static void fio_init fio_posixaio_register(void)
488{
489 register_ioengine(&ioengine);
490}
491
492static void fio_exit fio_posixaio_unregister(void)
493{
494 unregister_ioengine(&ioengine);
495}