2 * Native Windows async IO engine
3 * Copyright (C) 2011 Bruce Cran <bruce@cran.org.uk>
15 typedef BOOL (WINAPI *CANCELIOEX)(HANDLE hFile, LPOVERLAPPED lpOverlapped);
17 struct fio_overlapped {
24 struct windowsaio_data {
25 struct fio_overlapped *ovls;
26 struct io_u **aio_events;
28 HANDLE iocomplete_event;
29 CANCELIOEX pCancelIoEx;
30 BOOL iothread_running;
36 struct windowsaio_data *wd;
39 static void PrintError(LPCSTR lpszFunction);
40 static int fio_windowsaio_cancel(struct thread_data *td,
42 static BOOL timeout_expired(DWORD start_count, DWORD end_count);
43 static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
44 unsigned int max, struct timespec *t);
45 static struct io_u *fio_windowsaio_event(struct thread_data *td, int event);
46 static int fio_windowsaio_queue(struct thread_data *td,
48 static void fio_windowsaio_cleanup(struct thread_data *td);
49 static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter);
50 static int fio_windowsaio_init(struct thread_data *td);
51 static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f);
52 static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f);
54 int sync_file_range(int fd, off64_t offset, off64_t nbytes,
61 static void PrintError(LPCSTR lpszFunction)
63 // Retrieve the system error message for the last-error code
66 DWORD dw = GetLastError();
69 FORMAT_MESSAGE_ALLOCATE_BUFFER |
70 FORMAT_MESSAGE_FROM_SYSTEM |
71 FORMAT_MESSAGE_IGNORE_INSERTS,
74 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
78 log_err("%s - %s", lpszFunction, lpMsgBuf);
82 static int fio_windowsaio_init(struct thread_data *td)
84 struct windowsaio_data *wd;
89 wd = malloc(sizeof(struct windowsaio_data));
91 ZeroMemory(wd, sizeof(struct windowsaio_data));
96 wd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u*));
97 if (wd->aio_events == NULL)
102 wd->ovls = malloc(td->o.iodepth * sizeof(struct fio_overlapped));
103 if (wd->ovls == NULL)
108 for (i = 0; i < td->o.iodepth; i++) {
109 wd->ovls[i].io_free = TRUE;
110 wd->ovls[i].io_complete = FALSE;
112 wd->ovls[i].o.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
113 if (wd->ovls[i].o.hEvent == NULL) {
121 /* Create an auto-reset event */
122 wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL);
123 if (wd->iocomplete_event == NULL)
128 PrintError(__func__);
130 if (wd->ovls != NULL)
132 if (wd->aio_events != NULL)
133 free(wd->aio_events);
139 hKernel32Dll = GetModuleHandle("kernel32.dll");
140 wd->pCancelIoEx = GetProcAddress(hKernel32Dll, "CancelIoEx");
142 td->io_ops->data = wd;
146 static void fio_windowsaio_cleanup(struct thread_data *td)
149 struct windowsaio_data *wd;
151 wd = td->io_ops->data;
154 wd->iothread_running = FALSE;
155 WaitForSingleObject(wd->iothread, INFINITE);
157 CloseHandle(wd->iothread);
158 CloseHandle(wd->iocomplete_event);
160 for (i = 0; i < td->o.iodepth; i++) {
161 CloseHandle(wd->ovls[i].o.hEvent);
164 free(wd->aio_events);
168 td->io_ops->data = NULL;
173 static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f)
177 DWORD flags = FILE_FLAG_POSIX_SEMANTICS;
178 DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE;
179 DWORD openmode = OPEN_ALWAYS;
182 dprint(FD_FILE, "fd open %s\n", f->file_name);
184 if (f->filetype == FIO_TYPE_PIPE) {
185 log_err("fio: windowsaio doesn't support pipes\n");
189 if (!strcmp(f->file_name, "-")) {
190 log_err("fio: can't read/write to stdin/out\n");
194 if (!td->o.odirect && !td->o.sync_io && td->io_ops->data != NULL)
195 flags |= FILE_FLAG_OVERLAPPED;
198 flags |= FILE_FLAG_NO_BUFFERING;
200 flags |= FILE_FLAG_WRITE_THROUGH;
203 if (td->o.td_ddir == TD_DDIR_READ ||
204 td->o.td_ddir == TD_DDIR_WRITE)
205 flags |= FILE_FLAG_SEQUENTIAL_SCAN;
207 flags |= FILE_FLAG_RANDOM_ACCESS;
209 if (!td_write(td) || read_only)
210 access = GENERIC_READ;
212 access = (GENERIC_READ | GENERIC_WRITE);
214 if (td->o.create_on_open > 0)
215 openmode = OPEN_ALWAYS;
217 openmode = OPEN_EXISTING;
219 f->hFile = CreateFile(f->file_name, access, sharemode,
220 NULL, openmode, flags, NULL);
222 if (f->hFile == INVALID_HANDLE_VALUE) {
223 PrintError(__func__);
227 /* Only set up the competion port and thread if we're not just
228 * querying the device size */
229 if (!rc && td->io_ops->data != NULL && !td->o.odirect && !td->o.sync_io) {
230 struct thread_ctx *ctx;
231 struct windowsaio_data *wd;
232 hFile = CreateIoCompletionPort(f->hFile, NULL, 0, 0);
234 wd = td->io_ops->data;
236 if (!td->o.odirect && !td->o.sync_io)
241 wd->iothread_running = TRUE;
244 ctx = malloc(sizeof(struct thread_ctx));
248 wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, NULL);
251 if (rc || wd->iothread == NULL) {
252 PrintError(__func__);
260 static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f)
262 dprint(FD_FILE, "fd close %s\n", f->file_name);
264 if (f->hFile != INVALID_HANDLE_VALUE) {
265 if (!CloseHandle(f->hFile))
266 PrintError(__func__);
269 f->hFile = INVALID_HANDLE_VALUE;
273 static BOOL timeout_expired(DWORD start_count, DWORD end_count)
275 BOOL expired = FALSE;
278 current_time = GetTickCount();
280 if ((end_count > start_count) && current_time >= end_count)
282 else if (current_time < start_count && current_time > end_count)
288 static struct io_u* fio_windowsaio_event(struct thread_data *td, int event)
290 struct windowsaio_data *wd = td->io_ops->data;
291 return wd->aio_events[event];
294 static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
295 unsigned int max, struct timespec *t)
297 struct windowsaio_data *wd = td->io_ops->data;
298 struct flist_head *entry;
299 unsigned int dequeued = 0;
301 struct fio_overlapped *fov;
302 DWORD start_count = 0;
308 mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
309 start_count = GetTickCount();
310 end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
314 flist_for_each(entry, &td->io_u_busylist) {
315 io_u = flist_entry(entry, struct io_u, list);
316 fov = (struct fio_overlapped*)io_u->engine_data;
318 if (fov->io_complete) {
319 fov->io_complete = FALSE;
321 wd->aio_events[dequeued] = io_u;
329 if (dequeued < min) {
330 status = WaitForSingleObject(wd->iocomplete_event, mswait);
331 if (status != WAIT_OBJECT_0 && dequeued > 0)
335 if (dequeued >= min || (t != NULL && timeout_expired(start_count, end_count)))
342 static int fio_windowsaio_queue(struct thread_data *td,
345 LPOVERLAPPED lpOvl = NULL;
346 struct windowsaio_data *wd;
350 int rc = FIO_Q_COMPLETED;
352 fio_ro_check(td, io_u);
354 wd = td->io_ops->data;
357 for (index = 0; index < td->o.iodepth; index++) {
358 if (wd->ovls[index].io_free) {
359 wd->ovls[index].io_free = FALSE;
360 ResetEvent(wd->ovls[index].o.hEvent);
365 assert(index < td->o.iodepth);
367 lpOvl = &wd->ovls[index].o;
368 wd->ovls[index].io_u = io_u;
369 lpOvl->Internal = STATUS_PENDING;
370 lpOvl->InternalHigh = 0;
371 lpOvl->Offset = io_u->offset & 0xFFFFFFFF;
372 lpOvl->OffsetHigh = io_u->offset >> 32;
373 lpOvl->Pointer = NULL;
374 io_u->engine_data = &wd->ovls[index];
377 switch (io_u->ddir) {
379 success = WriteFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &iobytes, lpOvl);
382 success = ReadFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &iobytes, lpOvl);
386 case DDIR_SYNC_FILE_RANGE:
387 success = FlushFileBuffers(io_u->file->hFile);
389 io_u->error = GetLastError();
391 return FIO_Q_COMPLETED;
394 log_err("manual TRIM isn't supported on Windows");
396 io_u->resid = io_u->xfer_buflen;
397 return FIO_Q_COMPLETED;
403 if (wd->use_iocp && (success || GetLastError() == ERROR_IO_PENDING)) {
405 } else if (success && !wd->use_iocp) {
406 io_u->resid = io_u->xfer_buflen - iobytes;
409 PrintError(__func__);
410 io_u->error = GetLastError();
411 io_u->resid = io_u->xfer_buflen;
417 /* Runs as a thread and waits for queued IO to complete */
418 static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter)
421 struct fio_overlapped *fov;
423 struct windowsaio_data *wd;
424 struct thread_ctx *ctx;
428 ctx = (struct thread_ctx*)lpParameter;
432 if (!GetQueuedCompletionStatus(ctx->iocp, &bytes, &ulKey, &ovl, 250))
435 fov = CONTAINING_RECORD(ovl, struct fio_overlapped, o);
438 if (ovl->Internal == ERROR_SUCCESS) {
439 io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
442 io_u->resid = io_u->xfer_buflen;
443 io_u->error = ovl->Internal;
446 fov->io_complete = TRUE;
447 SetEvent(wd->iocomplete_event);
448 } while (ctx->wd->iothread_running);
450 CloseHandle(ctx->iocp);
455 static int fio_windowsaio_cancel(struct thread_data *td,
460 struct windowsaio_data *wd = td->io_ops->data;
462 /* If we're running on Vista or newer, we can cancel individual IO requests */
463 if (wd->pCancelIoEx != NULL) {
464 struct fio_overlapped *ovl = io_u->engine_data;
465 if (!wd->pCancelIoEx(io_u->file->hFile, &ovl->o))
473 static struct ioengine_ops ioengine = {
474 .name = "windowsaio",
475 .version = FIO_IOOPS_VERSION,
476 .init = fio_windowsaio_init,
477 .queue = fio_windowsaio_queue,
478 .cancel = fio_windowsaio_cancel,
479 .getevents = fio_windowsaio_getevents,
480 .event = fio_windowsaio_event,
481 .cleanup = fio_windowsaio_cleanup,
482 .open_file = fio_windowsaio_open_file,
483 .close_file = fio_windowsaio_close_file,
484 .get_file_size = generic_get_file_size
487 static void fio_init fio_posixaio_register(void)
489 register_ioengine(&ioengine);
492 static void fio_exit fio_posixaio_unregister(void)
494 unregister_ioengine(&ioengine);