Commit | Line | Data |
---|---|---|
ecc314ba BC |
1 | /* |
2 | * Native Windows async IO engine | |
ea4500d8 | 3 | * Copyright (C) 2011 Bruce Cran <bruce@cran.org.uk> |
ecc314ba BC |
4 | */ |
5 | ||
ecc314ba BC |
6 | #include <stdio.h> |
7 | #include <stdlib.h> | |
8 | #include <unistd.h> | |
9 | #include <signal.h> | |
10 | #include <errno.h> | |
11 | #include <windows.h> | |
12 | ||
13 | #include "../fio.h" | |
14 | ||
ea4500d8 BC |
15 | typedef BOOL (WINAPI *CANCELIOEX)(HANDLE hFile, LPOVERLAPPED lpOverlapped); |
16 | ||
ecc314ba BC |
17 | typedef struct { |
18 | OVERLAPPED o; | |
19 | struct io_u *io_u; | |
20 | } FIO_OVERLAPPED; | |
21 | ||
9b836561 BC |
22 | struct windowsaio_data { |
23 | HANDLE *io_handles; | |
24 | unsigned int io_index; | |
25 | FIO_OVERLAPPED *ovls; | |
26 | ||
27 | HANDLE iothread; | |
28 | HANDLE iothread_stopped; | |
29 | BOOL iothread_running; | |
30 | ||
31 | struct io_u **aio_events; | |
32 | HANDLE iocomplete_event; | |
ea4500d8 BC |
33 | CANCELIOEX pCancelIoEx; |
34 | BOOL useIOCP; | |
9b836561 BC |
35 | }; |
36 | ||
ecc314ba | 37 | struct thread_ctx { |
9b836561 | 38 | HANDLE iocp; |
ecc314ba BC |
39 | struct windowsaio_data *wd; |
40 | }; | |
41 | ||
42 | static void PrintError(LPCSTR lpszFunction); | |
43 | static int fio_windowsaio_cancel(struct thread_data *td, | |
44 | struct io_u *io_u); | |
9b836561 | 45 | static BOOL TimedOut(DWORD start_count, DWORD end_count); |
ecc314ba BC |
46 | static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, |
47 | unsigned int max, struct timespec *t); | |
48 | static struct io_u *fio_windowsaio_event(struct thread_data *td, int event); | |
49 | static int fio_windowsaio_queue(struct thread_data *td, | |
50 | struct io_u *io_u); | |
51 | static void fio_windowsaio_cleanup(struct thread_data *td); | |
52 | static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter); | |
53 | static int fio_windowsaio_init(struct thread_data *td); | |
54 | static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f); | |
55 | static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f); | |
56 | ||
9b836561 BC |
57 | int sync_file_range(int fd, off64_t offset, off64_t nbytes, |
58 | unsigned int flags) | |
59 | { | |
60 | errno = ENOSYS; | |
61 | return -1; | |
62 | } | |
63 | ||
ecc314ba BC |
64 | static void PrintError(LPCSTR lpszFunction) |
65 | { | |
66 | // Retrieve the system error message for the last-error code | |
67 | ||
68 | LPSTR lpMsgBuf; | |
69 | DWORD dw = GetLastError(); | |
70 | ||
71 | FormatMessage( | |
72 | FORMAT_MESSAGE_ALLOCATE_BUFFER | | |
73 | FORMAT_MESSAGE_FROM_SYSTEM | | |
74 | FORMAT_MESSAGE_IGNORE_INSERTS, | |
75 | NULL, | |
76 | dw, | |
77 | MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), | |
78 | (LPTSTR)&lpMsgBuf, | |
79 | 0, NULL ); | |
80 | ||
81 | log_err("%s - %s", lpszFunction, lpMsgBuf); | |
82 | LocalFree(lpMsgBuf); | |
83 | } | |
84 | ||
85 | static int fio_windowsaio_cancel(struct thread_data *td, | |
86 | struct io_u *io_u) | |
87 | { | |
ecc314ba BC |
88 | int rc = 0; |
89 | ||
9b836561 | 90 | struct windowsaio_data *wd = td->io_ops->data; |
ecc314ba | 91 | |
ea4500d8 BC |
92 | /* If we're running on Vista or newer, we can cancel individual IO requests */ |
93 | if (wd->pCancelIoEx != NULL) { | |
9b836561 | 94 | FIO_OVERLAPPED *ovl = io_u->engine_data; |
ea4500d8 | 95 | if (!wd->pCancelIoEx(io_u->file->hFile, &ovl->o)) |
9b836561 BC |
96 | rc = 1; |
97 | } else | |
ecc314ba BC |
98 | rc = 1; |
99 | ||
100 | return rc; | |
101 | } | |
102 | ||
9b836561 | 103 | static BOOL TimedOut(DWORD start_count, DWORD end_count) |
ecc314ba BC |
104 | { |
105 | BOOL expired = FALSE; | |
9b836561 | 106 | DWORD current_time; |
ecc314ba | 107 | |
9b836561 | 108 | current_time = GetTickCount(); |
ecc314ba | 109 | |
9b836561 | 110 | if ((end_count > start_count) && current_time >= end_count) |
ecc314ba | 111 | expired = TRUE; |
9b836561 | 112 | else if (current_time < start_count && current_time > end_count) |
ecc314ba BC |
113 | expired = TRUE; |
114 | ||
ecc314ba BC |
115 | return expired; |
116 | } | |
117 | ||
118 | static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, | |
119 | unsigned int max, struct timespec *t) | |
120 | { | |
121 | struct windowsaio_data *wd = td->io_ops->data; | |
122 | struct flist_head *entry; | |
123 | unsigned int dequeued = 0; | |
124 | struct io_u *io_u; | |
9b836561 | 125 | DWORD start_count = 0, end_count = 0; |
ecc314ba | 126 | BOOL timedout = FALSE; |
9b836561 | 127 | unsigned int mswait = 100; |
ecc314ba BC |
128 | |
129 | if (t != NULL) { | |
9b836561 BC |
130 | mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000); |
131 | start_count = GetTickCount(); | |
132 | end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000); | |
ecc314ba BC |
133 | } |
134 | ||
135 | while (dequeued < min && !timedout) { | |
ecc314ba BC |
136 | flist_for_each(entry, &td->io_u_busylist) { |
137 | io_u = flist_entry(entry, struct io_u, list); | |
138 | ||
ea4500d8 BC |
139 | if (io_u->seen == 0) { |
140 | io_u->seen = 1; | |
9b836561 BC |
141 | wd->aio_events[dequeued] = io_u; |
142 | dequeued++; | |
143 | } | |
ecc314ba BC |
144 | |
145 | if (dequeued == max) | |
146 | break; | |
147 | } | |
148 | ||
ea4500d8 | 149 | if (dequeued < min) |
9b836561 | 150 | WaitForSingleObject(wd->iocomplete_event, mswait); |
9b836561 BC |
151 | |
152 | if (t != NULL && TimedOut(start_count, end_count)) | |
ecc314ba | 153 | timedout = TRUE; |
ecc314ba BC |
154 | } |
155 | ||
ecc314ba BC |
156 | return dequeued; |
157 | } | |
158 | ||
159 | static struct io_u *fio_windowsaio_event(struct thread_data *td, int event) | |
160 | { | |
161 | struct windowsaio_data *wd = td->io_ops->data; | |
162 | return wd->aio_events[event]; | |
163 | } | |
164 | ||
165 | static int fio_windowsaio_queue(struct thread_data *td, | |
166 | struct io_u *io_u) | |
167 | { | |
9b836561 | 168 | struct windowsaio_data *wd; |
ea4500d8 | 169 | LPOVERLAPPED lpOvl; |
9b836561 BC |
170 | DWORD iobytes; |
171 | BOOL success = TRUE; | |
172 | int ind; | |
ecc314ba BC |
173 | int rc; |
174 | ||
175 | fio_ro_check(td, io_u); | |
176 | ||
9b836561 BC |
177 | wd = td->io_ops->data; |
178 | ind = wd->io_index; | |
ecc314ba | 179 | |
9b836561 | 180 | ResetEvent(wd->io_handles[ind]); |
ea4500d8 BC |
181 | |
182 | if (wd->useIOCP) { | |
183 | lpOvl = &wd->ovls[ind].o; | |
184 | ||
185 | lpOvl->Internal = STATUS_PENDING; | |
186 | lpOvl->InternalHigh = 0; | |
187 | lpOvl->Offset = io_u->offset & 0xFFFFFFFF; | |
188 | lpOvl->OffsetHigh = io_u->offset >> 32; | |
189 | lpOvl->hEvent = wd->io_handles[ind]; | |
190 | lpOvl->Pointer = NULL; | |
191 | wd->ovls[ind].io_u = io_u; | |
192 | } else { | |
193 | lpOvl = NULL; | |
194 | } | |
e4db9fec | 195 | |
9b836561 | 196 | io_u->engine_data = &wd->ovls[ind]; |
ecc314ba BC |
197 | io_u->seen = 0; |
198 | ||
9b836561 | 199 | if (io_u->ddir == DDIR_WRITE) { |
ea4500d8 | 200 | success = WriteFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &iobytes, lpOvl); |
9b836561 | 201 | } else if (io_u->ddir == DDIR_READ) { |
ea4500d8 | 202 | success = ReadFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &iobytes, lpOvl); |
9b836561 | 203 | } else if (io_u->ddir == DDIR_SYNC || |
ea4500d8 BC |
204 | io_u->ddir == DDIR_DATASYNC || |
205 | io_u->ddir == DDIR_SYNC_FILE_RANGE) | |
ecc314ba BC |
206 | { |
207 | FlushFileBuffers(io_u->file->hFile); | |
208 | return FIO_Q_COMPLETED; | |
209 | } else if (io_u->ddir == DDIR_TRIM) { | |
ea4500d8 | 210 | log_info("Manual TRIM isn't supported on Windows"); |
ecc314ba | 211 | return FIO_Q_COMPLETED; |
9b836561 BC |
212 | } else |
213 | assert(0); | |
ecc314ba | 214 | |
ea4500d8 BC |
215 | if (wd->useIOCP && (success || GetLastError() == ERROR_IO_PENDING)) { |
216 | wd->io_index = (wd->io_index + 1) % td->o.iodepth; | |
217 | rc = FIO_Q_QUEUED; | |
218 | } else if (success && !wd->useIOCP) { | |
9b836561 | 219 | io_u->resid = io_u->xfer_buflen - iobytes; |
ecc314ba BC |
220 | io_u->error = 0; |
221 | rc = FIO_Q_COMPLETED; | |
ecc314ba BC |
222 | } else { |
223 | PrintError(__func__); | |
224 | io_u->error = GetLastError(); | |
225 | io_u->resid = io_u->xfer_buflen; | |
226 | rc = FIO_Q_COMPLETED; | |
227 | } | |
228 | ||
ecc314ba BC |
229 | return rc; |
230 | } | |
231 | ||
232 | static void fio_windowsaio_cleanup(struct thread_data *td) | |
233 | { | |
9b836561 | 234 | int i; |
ecc314ba BC |
235 | struct windowsaio_data *wd; |
236 | ||
ecc314ba | 237 | wd = td->io_ops->data; |
ecc314ba | 238 | |
9b836561 | 239 | WaitForSingleObject(wd->iothread_stopped, INFINITE); |
ecc314ba BC |
240 | |
241 | if (wd != NULL) { | |
9b836561 BC |
242 | CloseHandle(wd->iothread); |
243 | CloseHandle(wd->iothread_stopped); | |
244 | CloseHandle(wd->iocomplete_event); | |
245 | ||
ea4500d8 | 246 | for (i = 0; i < td->o.iodepth; i++) { |
9b836561 BC |
247 | CloseHandle(wd->io_handles[i]); |
248 | } | |
e4db9fec | 249 | |
ecc314ba | 250 | free(wd->aio_events); |
9b836561 BC |
251 | free(wd->io_handles); |
252 | free(wd->ovls); | |
ecc314ba | 253 | free(wd); |
e4db9fec | 254 | |
ecc314ba BC |
255 | td->io_ops->data = NULL; |
256 | } | |
ecc314ba BC |
257 | } |
258 | ||
9b836561 | 259 | /* Runs as a thread and waits for queued IO to complete */ |
ecc314ba BC |
260 | static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter) |
261 | { | |
262 | OVERLAPPED *ovl; | |
263 | FIO_OVERLAPPED *fov; | |
264 | struct io_u *io_u; | |
265 | struct windowsaio_data *wd; | |
ecc314ba BC |
266 | struct thread_ctx *ctx; |
267 | ULONG_PTR ulKey = 0; | |
ecc314ba BC |
268 | DWORD bytes; |
269 | ||
ecc314ba BC |
270 | ctx = (struct thread_ctx*)lpParameter; |
271 | wd = ctx->wd; | |
ecc314ba | 272 | |
ea4500d8 | 273 | do { |
9b836561 BC |
274 | if (!GetQueuedCompletionStatus(ctx->iocp, &bytes, &ulKey, &ovl, 250)) |
275 | continue; | |
ecc314ba BC |
276 | |
277 | fov = CONTAINING_RECORD(ovl, FIO_OVERLAPPED, o); | |
278 | io_u = fov->io_u; | |
279 | ||
ecc314ba BC |
280 | if (ovl->Internal == ERROR_SUCCESS) { |
281 | io_u->resid = io_u->xfer_buflen - ovl->InternalHigh; | |
282 | io_u->error = 0; | |
283 | } else { | |
284 | io_u->resid = io_u->xfer_buflen; | |
9b836561 | 285 | io_u->error = ovl->Internal; |
ecc314ba BC |
286 | } |
287 | ||
9b836561 | 288 | SetEvent(wd->iocomplete_event); |
ea4500d8 | 289 | } while (ctx->wd->iothread_running); |
ecc314ba | 290 | |
9b836561 BC |
291 | CloseHandle(ctx->iocp); |
292 | SetEvent(ctx->wd->iothread_stopped); | |
ecc314ba | 293 | free(ctx); |
9b836561 | 294 | |
ecc314ba BC |
295 | return 0; |
296 | } | |
297 | ||
298 | static int fio_windowsaio_init(struct thread_data *td) | |
299 | { | |
ecc314ba | 300 | struct windowsaio_data *wd; |
ea4500d8 | 301 | HANDLE hKernel32Dll; |
9b836561 | 302 | int rc = 0; |
ecc314ba | 303 | |
ecc314ba | 304 | wd = malloc(sizeof(struct windowsaio_data)); |
9b836561 BC |
305 | if (wd != NULL) |
306 | ZeroMemory(wd, sizeof(struct windowsaio_data)); | |
307 | else | |
308 | rc = 1; | |
ecc314ba | 309 | |
9b836561 BC |
310 | if (!rc) { |
311 | wd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u*)); | |
312 | if (wd->aio_events == NULL) | |
313 | rc = 1; | |
e4db9fec BC |
314 | } |
315 | ||
9b836561 | 316 | if (!rc) { |
ea4500d8 | 317 | wd->io_handles = malloc(td->o.iodepth * sizeof(HANDLE)); |
9b836561 BC |
318 | if (wd->io_handles == NULL) |
319 | rc = 1; | |
e4db9fec BC |
320 | } |
321 | ||
9b836561 | 322 | if (!rc) { |
ea4500d8 | 323 | wd->ovls = malloc(td->o.iodepth * sizeof(FIO_OVERLAPPED)); |
9b836561 BC |
324 | if (wd->ovls == NULL) |
325 | rc = 1; | |
326 | } | |
e4db9fec | 327 | |
9b836561 BC |
328 | if (!rc) { |
329 | /* Create an auto-reset event */ | |
330 | wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL); | |
331 | if (wd->iocomplete_event == NULL) | |
332 | rc = 1; | |
333 | } | |
334 | ||
9b836561 BC |
335 | if (rc) { |
336 | PrintError(__func__); | |
337 | if (wd != NULL) { | |
338 | if (wd->ovls != NULL) | |
339 | free(wd->ovls); | |
340 | if (wd->io_handles != NULL) | |
341 | free(wd->io_handles); | |
342 | if (wd->aio_events != NULL) | |
343 | free(wd->aio_events); | |
344 | ||
345 | free(wd); | |
346 | } | |
347 | } | |
ecc314ba | 348 | |
ea4500d8 BC |
349 | hKernel32Dll = GetModuleHandle("kernel32.dll"); |
350 | wd->pCancelIoEx = GetProcAddress(hKernel32Dll, "CancelIoEx"); | |
351 | ||
ecc314ba | 352 | td->io_ops->data = wd; |
e4db9fec | 353 | return 0; |
ecc314ba BC |
354 | } |
355 | ||
356 | static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f) | |
357 | { | |
358 | int rc = 0; | |
359 | HANDLE hFile; | |
ea4500d8 | 360 | DWORD flags = FILE_FLAG_POSIX_SEMANTICS; |
ecc314ba BC |
361 | DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE; |
362 | DWORD openmode = OPEN_ALWAYS; | |
363 | DWORD access; | |
9b836561 | 364 | int i; |
ecc314ba BC |
365 | |
366 | dprint(FD_FILE, "fd open %s\n", f->file_name); | |
367 | ||
ecc314ba BC |
368 | if (f->filetype == FIO_TYPE_PIPE) { |
369 | log_err("fio: windowsaio doesn't support pipes\n"); | |
370 | return 1; | |
371 | } | |
372 | ||
373 | if (!strcmp(f->file_name, "-")) { | |
374 | log_err("fio: can't read/write to stdin/out\n"); | |
375 | return 1; | |
376 | } | |
377 | ||
ea4500d8 BC |
378 | if (!td->o.odirect && !td->o.sync_io && td->io_ops->data != NULL) |
379 | flags |= FILE_FLAG_OVERLAPPED; | |
380 | ||
ecc314ba BC |
381 | if (td->o.odirect) |
382 | flags |= FILE_FLAG_NO_BUFFERING; | |
383 | if (td->o.sync_io) | |
384 | flags |= FILE_FLAG_WRITE_THROUGH; | |
385 | ||
386 | ||
387 | if (td->o.td_ddir == TD_DDIR_READ || | |
ea4500d8 | 388 | td->o.td_ddir == TD_DDIR_WRITE) |
ecc314ba | 389 | flags |= FILE_FLAG_SEQUENTIAL_SCAN; |
ecc314ba | 390 | else |
ecc314ba | 391 | flags |= FILE_FLAG_RANDOM_ACCESS; |
ecc314ba | 392 | |
ea4500d8 | 393 | if (!td_write(td) || read_only) |
ecc314ba BC |
394 | access = GENERIC_READ; |
395 | else | |
396 | access = (GENERIC_READ | GENERIC_WRITE); | |
397 | ||
398 | if (td->o.create_on_open > 0) | |
399 | openmode = OPEN_ALWAYS; | |
400 | else | |
401 | openmode = OPEN_EXISTING; | |
402 | ||
403 | f->hFile = CreateFile(f->file_name, access, sharemode, | |
404 | NULL, openmode, flags, NULL); | |
405 | ||
406 | if (f->hFile == INVALID_HANDLE_VALUE) { | |
ecc314ba BC |
407 | PrintError(__func__); |
408 | rc = 1; | |
409 | } | |
410 | ||
411 | /* Only set up the competion port and thread if we're not just | |
412 | * querying the device size */ | |
ea4500d8 | 413 | if (!rc && td->io_ops->data != NULL && !td->o.odirect && !td->o.sync_io) { |
ecc314ba | 414 | struct thread_ctx *ctx; |
ea4500d8 | 415 | struct windowsaio_data *wd; |
ecc314ba BC |
416 | hFile = CreateIoCompletionPort(f->hFile, NULL, 0, 0); |
417 | ||
ea4500d8 BC |
418 | |
419 | wd = td->io_ops->data; | |
420 | ||
421 | if (!td->o.odirect && !td->o.sync_io) | |
422 | wd->useIOCP = 1; | |
423 | else | |
424 | wd->useIOCP = 0; | |
ecc314ba | 425 | |
9b836561 BC |
426 | wd->io_index = 0; |
427 | wd->iothread_running = TRUE; | |
428 | /* Create a manual-reset event */ | |
429 | wd->iothread_stopped = CreateEvent(NULL, TRUE, FALSE, NULL); | |
430 | ||
431 | if (wd->iothread_stopped == NULL) | |
432 | rc = 1; | |
433 | ||
434 | if (!rc) { | |
ea4500d8 | 435 | for (i = 0; i < td->o.iodepth; i++) { |
9b836561 BC |
436 | /* Create a manual-reset event for putting in OVERLAPPED */ |
437 | wd->io_handles[i] = CreateEvent(NULL, TRUE, FALSE, NULL); | |
438 | if (wd->io_handles[i] == NULL) { | |
439 | PrintError(__func__); | |
440 | rc = 1; | |
441 | break; | |
442 | } | |
443 | } | |
444 | } | |
ecc314ba | 445 | |
9b836561 BC |
446 | if (!rc) { |
447 | ctx = malloc(sizeof(struct thread_ctx)); | |
448 | ctx->iocp = hFile; | |
449 | ctx->wd = wd; | |
450 | ||
451 | wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, NULL); | |
452 | } | |
ecc314ba | 453 | |
9b836561 | 454 | if (rc || wd->iothread == NULL) { |
ecc314ba BC |
455 | PrintError(__func__); |
456 | rc = 1; | |
457 | } | |
458 | } | |
459 | ||
ecc314ba BC |
460 | return rc; |
461 | } | |
462 | ||
463 | static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f) | |
464 | { | |
9b836561 BC |
465 | struct windowsaio_data *wd; |
466 | ||
467 | dprint(FD_FILE, "fd close %s\n", f->file_name); | |
468 | ||
469 | if (td->io_ops->data != NULL) { | |
470 | wd = td->io_ops->data; | |
471 | wd->iothread_running = FALSE; | |
472 | WaitForSingleObject(wd->iothread_stopped, INFINITE); | |
473 | } | |
ecc314ba | 474 | |
ecc314ba | 475 | if (f->hFile != INVALID_HANDLE_VALUE) { |
9b836561 | 476 | if (!CloseHandle(f->hFile)) |
ecc314ba BC |
477 | PrintError(__func__); |
478 | } | |
479 | ||
480 | f->hFile = INVALID_HANDLE_VALUE; | |
481 | return 0; | |
482 | } | |
483 | ||
484 | static struct ioengine_ops ioengine = { | |
485 | .name = "windowsaio", | |
486 | .version = FIO_IOOPS_VERSION, | |
487 | .init = fio_windowsaio_init, | |
488 | .queue = fio_windowsaio_queue, | |
489 | .cancel = fio_windowsaio_cancel, | |
490 | .getevents = fio_windowsaio_getevents, | |
491 | .event = fio_windowsaio_event, | |
492 | .cleanup = fio_windowsaio_cleanup, | |
493 | .open_file = fio_windowsaio_open_file, | |
494 | .close_file = fio_windowsaio_close_file, | |
495 | .get_file_size = generic_get_file_size | |
496 | }; | |
497 | ||
498 | static void fio_init fio_posixaio_register(void) | |
499 | { | |
500 | register_ioengine(&ioengine); | |
501 | } | |
502 | ||
503 | static void fio_exit fio_posixaio_unregister(void) | |
504 | { | |
505 | unregister_ioengine(&ioengine); | |
506 | } |