2 * Native Windows async IO engine
3 * Copyright (C) 2010 Bruce Cran <bruce@cran.org.uk>
15 BOOL windowsaio_debug = FALSE;
17 struct windowsaio_data {
18 struct io_u **aio_events;
19 HANDLE *busyIoHandles;
21 unsigned int ioFinished;
34 struct windowsaio_data *wd;
37 static void PrintError(LPCSTR lpszFunction);
38 static int fio_windowsaio_cancel(struct thread_data *td,
40 static BOOL TimedOut(DWORD startCount, DWORD endCount);
41 static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
42 unsigned int max, struct timespec *t);
43 static struct io_u *fio_windowsaio_event(struct thread_data *td, int event);
44 static int fio_windowsaio_queue(struct thread_data *td,
46 static void fio_windowsaio_cleanup(struct thread_data *td);
47 static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter);
48 static int fio_windowsaio_init(struct thread_data *td);
49 static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f);
50 static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f);
52 static void PrintError(LPCSTR lpszFunction)
54 // Retrieve the system error message for the last-error code
57 DWORD dw = GetLastError();
60 FORMAT_MESSAGE_ALLOCATE_BUFFER |
61 FORMAT_MESSAGE_FROM_SYSTEM |
62 FORMAT_MESSAGE_IGNORE_INSERTS,
65 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
69 log_err("%s - %s", lpszFunction, lpMsgBuf);
73 static int fio_windowsaio_cancel(struct thread_data *td,
79 bSuccess = CancelIo(io_u->file->hFile);
87 static BOOL TimedOut(DWORD startCount, DWORD endCount)
92 currentTime = GetTickCount();
94 if ((endCount > startCount) && currentTime >= endCount)
96 else if (currentTime < startCount && currentTime > endCount)
102 static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min,
103 unsigned int max, struct timespec *t)
105 struct windowsaio_data *wd = td->io_ops->data;
106 struct flist_head *entry;
107 unsigned int dequeued = 0;
109 DWORD startCount = 0, endCount = 0;
110 BOOL timedout = FALSE;
112 unsigned int waitInMs = 100;
115 waitInMs = (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
116 startCount = GetTickCount();
117 endCount = startCount + (t->tv_sec * 1000) + (t->tv_nsec / 1000000);
120 while (dequeued < min && !timedout) {
121 WaitForMultipleObjects(wd->busyIo, wd->busyIoHandles, FALSE, waitInMs);
123 flist_for_each(entry, &td->io_u_busylist) {
124 io_u = flist_entry(entry, struct io_u, list);
132 wd->aio_events[r] = io_u;
141 if (t != NULL && TimedOut(startCount, endCount))
148 static struct io_u *fio_windowsaio_event(struct thread_data *td, int event)
150 struct windowsaio_data *wd = td->io_ops->data;
151 return wd->aio_events[event];
154 static int fio_windowsaio_queue(struct thread_data *td,
159 BOOL bSuccess = TRUE;
162 fio_ro_check(td, io_u);
164 fov = malloc(sizeof(FIO_OVERLAPPED));
165 ZeroMemory(fov, sizeof(FIO_OVERLAPPED));
167 struct windowsaio_data *wd = td->io_ops->data;
171 fov->o.Offset = io_u->offset & 0xFFFFFFFF;
172 fov->o.OffsetHigh = io_u->offset >> 32;
173 fov->o.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
176 if (fov->o.hEvent == NULL) {
177 PrintError(__func__);
181 if (io_u->ddir == DDIR_WRITE)
182 bSuccess = WriteFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &ioBytes, &fov->o);
183 else if (io_u->ddir == DDIR_READ)
184 bSuccess = ReadFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &ioBytes, &fov->o);
185 else if (io_u->ddir == DDIR_SYNC ||
186 io_u->ddir == DDIR_DATASYNC ||
187 io_u->ddir == DDIR_SYNC_FILE_RANGE)
189 FlushFileBuffers(io_u->file->hFile);
190 return FIO_Q_COMPLETED;
191 } else if (io_u->ddir == DDIR_TRIM) {
192 log_info("explicit TRIM isn't supported on Windows");
193 return FIO_Q_COMPLETED;
198 io_u->resid = io_u->xfer_buflen - fov->o.InternalHigh;
200 rc = FIO_Q_COMPLETED;
201 } else if (!bSuccess && GetLastError() == ERROR_IO_PENDING) {
202 wd->busyIoHandles[wd->busyIo++] = fov->o.hEvent;
205 PrintError(__func__);
206 io_u->error = GetLastError();
207 io_u->resid = io_u->xfer_buflen;
208 rc = FIO_Q_COMPLETED;
214 static void fio_windowsaio_cleanup(struct thread_data *td)
216 struct windowsaio_data *wd;
218 wd = td->io_ops->data;
221 while (wd->stopped == FALSE)
225 CloseHandle(wd->hThread);
227 free(wd->aio_events);
228 free(wd->busyIoHandles);
231 td->io_ops->data = NULL;
236 static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter)
241 struct windowsaio_data *wd;
243 struct thread_ctx *ctx;
249 ctx = (struct thread_ctx*)lpParameter;
253 while (ctx->wd->running) {
254 bSuccess = GetQueuedCompletionStatus(ctx->ioCP, &bytes, &ulKey, &ovl, 100);
257 if (GetLastError() == WAIT_TIMEOUT) {
260 PrintError(__func__);
265 fov = CONTAINING_RECORD(ovl, FIO_OVERLAPPED, o);
271 ctx->wd->ioFinished++;
273 if (ovl->Internal == ERROR_SUCCESS) {
274 io_u->resid = io_u->xfer_buflen - ovl->InternalHigh;
277 io_u->resid = io_u->xfer_buflen;
282 CloseHandle(ovl->hEvent);
286 bSuccess = CloseHandle(ctx->ioCP);
288 PrintError(__func__);
290 ctx->wd->stopped = TRUE;
295 static int fio_windowsaio_init(struct thread_data *td)
297 struct windowsaio_data *wd;
299 wd = malloc(sizeof(struct windowsaio_data));
303 wd->aio_events = malloc((td->o.iodepth + 1) * sizeof(struct io_u *));
304 if (wd->aio_events == NULL) {
309 wd->busyIoHandles = malloc((td->o.iodepth + 1) * sizeof(struct io_u *));
310 if (wd->busyIoHandles == NULL) {
311 free(wd->aio_events);
316 ZeroMemory(wd->aio_events, (td->o.iodepth + 1) * sizeof(struct io_u *));
317 ZeroMemory(wd->busyIoHandles, (td->o.iodepth + 1) * sizeof(struct io_u *));
325 td->io_ops->data = wd;
329 static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f)
333 DWORD flags = FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED;
334 DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE;
335 DWORD openmode = OPEN_ALWAYS;
338 dprint(FD_FILE, "fd open %s\n", f->file_name);
340 if (f->filetype == FIO_TYPE_PIPE) {
341 log_err("fio: windowsaio doesn't support pipes\n");
345 if (!strcmp(f->file_name, "-")) {
346 log_err("fio: can't read/write to stdin/out\n");
351 flags |= FILE_FLAG_NO_BUFFERING;
353 flags |= FILE_FLAG_WRITE_THROUGH;
356 if (td->o.td_ddir == TD_DDIR_READ ||
357 td->o.td_ddir == TD_DDIR_WRITE ||
358 td->o.td_ddir == TD_DDIR_RANDRW)
360 flags |= FILE_FLAG_SEQUENTIAL_SCAN;
364 flags |= FILE_FLAG_RANDOM_ACCESS;
367 if (td_read(td) || read_only)
368 access = GENERIC_READ;
370 access = (GENERIC_READ | GENERIC_WRITE);
372 if (td->o.create_on_open > 0)
373 openmode = OPEN_ALWAYS;
375 openmode = OPEN_EXISTING;
377 f->hFile = CreateFile(f->file_name, access, sharemode,
378 NULL, openmode, flags, NULL);
380 if (f->hFile == INVALID_HANDLE_VALUE) {
381 log_err("Failed to open %s\n", f->file_name);
382 PrintError(__func__);
386 /* Only set up the competion port and thread if we're not just
387 * querying the device size */
388 if (!rc && td->io_ops->data != NULL) {
389 struct windowsaio_data *wd;
390 struct thread_ctx *ctx;
391 hFile = CreateIoCompletionPort(f->hFile, NULL, 0, 0);
393 wd = td->io_ops->data;
397 ctx = malloc(sizeof(struct thread_ctx));
401 wd->hThread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, NULL);
403 if (wd->hThread == NULL) {
404 PrintError(__func__);
412 static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f)
416 if (f->hFile != INVALID_HANDLE_VALUE) {
417 bSuccess = CloseHandle(f->hFile);
419 PrintError(__func__);
422 f->hFile = INVALID_HANDLE_VALUE;
426 static struct ioengine_ops ioengine = {
427 .name = "windowsaio",
428 .version = FIO_IOOPS_VERSION,
429 .init = fio_windowsaio_init,
430 .queue = fio_windowsaio_queue,
431 .cancel = fio_windowsaio_cancel,
432 .getevents = fio_windowsaio_getevents,
433 .event = fio_windowsaio_event,
434 .cleanup = fio_windowsaio_cleanup,
435 .open_file = fio_windowsaio_open_file,
436 .close_file = fio_windowsaio_close_file,
437 .get_file_size = generic_get_file_size
440 static void fio_init fio_posixaio_register(void)
442 register_ioengine(&ioengine);
445 static void fio_exit fio_posixaio_unregister(void)
447 unregister_ioengine(&ioengine);