Commit | Line | Data |
---|---|---|
ecc314ba BC |
1 | /* |
2 | * Native Windows async IO engine | |
3 | * Copyright (C) 2010 Bruce Cran <bruce@cran.org.uk> | |
4 | */ | |
5 | ||
ecc314ba BC |
6 | #include <stdio.h> |
7 | #include <stdlib.h> | |
8 | #include <unistd.h> | |
9 | #include <signal.h> | |
10 | #include <errno.h> | |
11 | #include <windows.h> | |
12 | ||
13 | #include "../fio.h" | |
14 | ||
ecc314ba BC |
15 | typedef struct { |
16 | OVERLAPPED o; | |
17 | struct io_u *io_u; | |
18 | } FIO_OVERLAPPED; | |
19 | ||
9b836561 BC |
20 | struct windowsaio_data { |
21 | HANDLE *io_handles; | |
22 | unsigned int io_index; | |
23 | FIO_OVERLAPPED *ovls; | |
24 | ||
25 | HANDLE iothread; | |
26 | HANDLE iothread_stopped; | |
27 | BOOL iothread_running; | |
28 | ||
29 | struct io_u **aio_events; | |
30 | HANDLE iocomplete_event; | |
31 | BOOL have_cancelioex; | |
32 | }; | |
33 | ||
ecc314ba | 34 | struct thread_ctx { |
9b836561 | 35 | HANDLE iocp; |
ecc314ba BC |
36 | struct windowsaio_data *wd; |
37 | }; | |
38 | ||
39 | static void PrintError(LPCSTR lpszFunction); | |
40 | static int fio_windowsaio_cancel(struct thread_data *td, | |
41 | struct io_u *io_u); | |
9b836561 | 42 | static BOOL TimedOut(DWORD start_count, DWORD end_count); |
ecc314ba BC |
43 | static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, |
44 | unsigned int max, struct timespec *t); | |
45 | static struct io_u *fio_windowsaio_event(struct thread_data *td, int event); | |
46 | static int fio_windowsaio_queue(struct thread_data *td, | |
47 | struct io_u *io_u); | |
48 | static void fio_windowsaio_cleanup(struct thread_data *td); | |
49 | static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter); | |
50 | static int fio_windowsaio_init(struct thread_data *td); | |
51 | static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f); | |
52 | static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f); | |
53 | ||
9b836561 BC |
54 | /* CancelIoEx isn't in Cygwin's w32api */ |
55 | BOOL WINAPI CancelIoEx( | |
56 | HANDLE hFile, | |
57 | LPOVERLAPPED lpOverlapped | |
58 | ); | |
59 | ||
60 | ||
61 | ||
62 | int sync_file_range(int fd, off64_t offset, off64_t nbytes, | |
63 | unsigned int flags) | |
64 | { | |
65 | errno = ENOSYS; | |
66 | return -1; | |
67 | } | |
68 | ||
ecc314ba BC |
69 | static void PrintError(LPCSTR lpszFunction) |
70 | { | |
71 | // Retrieve the system error message for the last-error code | |
72 | ||
73 | LPSTR lpMsgBuf; | |
74 | DWORD dw = GetLastError(); | |
75 | ||
76 | FormatMessage( | |
77 | FORMAT_MESSAGE_ALLOCATE_BUFFER | | |
78 | FORMAT_MESSAGE_FROM_SYSTEM | | |
79 | FORMAT_MESSAGE_IGNORE_INSERTS, | |
80 | NULL, | |
81 | dw, | |
82 | MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), | |
83 | (LPTSTR)&lpMsgBuf, | |
84 | 0, NULL ); | |
85 | ||
86 | log_err("%s - %s", lpszFunction, lpMsgBuf); | |
87 | LocalFree(lpMsgBuf); | |
88 | } | |
89 | ||
90 | static int fio_windowsaio_cancel(struct thread_data *td, | |
91 | struct io_u *io_u) | |
92 | { | |
ecc314ba BC |
93 | int rc = 0; |
94 | ||
9b836561 | 95 | struct windowsaio_data *wd = td->io_ops->data; |
ecc314ba | 96 | |
9b836561 BC |
97 | /* If we're running on Vista, we can cancel individual IO requests */ |
98 | if (wd->have_cancelioex) { | |
99 | FIO_OVERLAPPED *ovl = io_u->engine_data; | |
100 | if (!CancelIoEx(io_u->file->hFile, &ovl->o)) | |
101 | rc = 1; | |
102 | } else | |
ecc314ba BC |
103 | rc = 1; |
104 | ||
105 | return rc; | |
106 | } | |
107 | ||
9b836561 | 108 | static BOOL TimedOut(DWORD start_count, DWORD end_count) |
ecc314ba BC |
109 | { |
110 | BOOL expired = FALSE; | |
9b836561 | 111 | DWORD current_time; |
ecc314ba | 112 | |
9b836561 | 113 | current_time = GetTickCount(); |
ecc314ba | 114 | |
9b836561 | 115 | if ((end_count > start_count) && current_time >= end_count) |
ecc314ba | 116 | expired = TRUE; |
9b836561 | 117 | else if (current_time < start_count && current_time > end_count) |
ecc314ba BC |
118 | expired = TRUE; |
119 | ||
ecc314ba BC |
120 | return expired; |
121 | } | |
122 | ||
123 | static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, | |
124 | unsigned int max, struct timespec *t) | |
125 | { | |
126 | struct windowsaio_data *wd = td->io_ops->data; | |
127 | struct flist_head *entry; | |
128 | unsigned int dequeued = 0; | |
129 | struct io_u *io_u; | |
9b836561 | 130 | DWORD start_count = 0, end_count = 0; |
ecc314ba | 131 | BOOL timedout = FALSE; |
9b836561 | 132 | unsigned int mswait = 100; |
ecc314ba BC |
133 | |
134 | if (t != NULL) { | |
9b836561 BC |
135 | mswait = (t->tv_sec * 1000) + (t->tv_nsec / 1000000); |
136 | start_count = GetTickCount(); | |
137 | end_count = start_count + (t->tv_sec * 1000) + (t->tv_nsec / 1000000); | |
ecc314ba BC |
138 | } |
139 | ||
140 | while (dequeued < min && !timedout) { | |
ecc314ba BC |
141 | flist_for_each(entry, &td->io_u_busylist) { |
142 | io_u = flist_entry(entry, struct io_u, list); | |
143 | ||
9b836561 BC |
144 | if (io_u->seen == 1) { |
145 | io_u->seen = 2; | |
146 | wd->aio_events[dequeued] = io_u; | |
147 | dequeued++; | |
148 | } | |
ecc314ba BC |
149 | |
150 | if (dequeued == max) | |
151 | break; | |
152 | } | |
153 | ||
9b836561 BC |
154 | if (dequeued < min) { |
155 | WaitForSingleObject(wd->iocomplete_event, mswait); | |
156 | } | |
157 | ||
158 | if (t != NULL && TimedOut(start_count, end_count)) | |
ecc314ba | 159 | timedout = TRUE; |
ecc314ba BC |
160 | } |
161 | ||
ecc314ba BC |
162 | return dequeued; |
163 | } | |
164 | ||
165 | static struct io_u *fio_windowsaio_event(struct thread_data *td, int event) | |
166 | { | |
167 | struct windowsaio_data *wd = td->io_ops->data; | |
168 | return wd->aio_events[event]; | |
169 | } | |
170 | ||
171 | static int fio_windowsaio_queue(struct thread_data *td, | |
172 | struct io_u *io_u) | |
173 | { | |
9b836561 BC |
174 | struct windowsaio_data *wd; |
175 | DWORD iobytes; | |
176 | BOOL success = TRUE; | |
177 | int ind; | |
ecc314ba BC |
178 | int rc; |
179 | ||
180 | fio_ro_check(td, io_u); | |
181 | ||
9b836561 BC |
182 | wd = td->io_ops->data; |
183 | ind = wd->io_index; | |
ecc314ba | 184 | |
9b836561 BC |
185 | ResetEvent(wd->io_handles[ind]); |
186 | wd->ovls[ind].o.Internal = 0; | |
187 | wd->ovls[ind].o.InternalHigh = 0; | |
188 | wd->ovls[ind].o.Offset = io_u->offset & 0xFFFFFFFF; | |
189 | wd->ovls[ind].o.OffsetHigh = io_u->offset >> 32; | |
190 | wd->ovls[ind].o.hEvent = wd->io_handles[ind]; | |
191 | wd->ovls[ind].io_u = io_u; | |
e4db9fec | 192 | |
9b836561 | 193 | io_u->engine_data = &wd->ovls[ind]; |
ecc314ba BC |
194 | io_u->seen = 0; |
195 | ||
9b836561 BC |
196 | if (io_u->ddir == DDIR_WRITE) { |
197 | success = WriteFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &iobytes, &wd->ovls[ind].o); | |
198 | } else if (io_u->ddir == DDIR_READ) { | |
199 | success = ReadFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &iobytes, &wd->ovls[ind].o); | |
200 | } else if (io_u->ddir == DDIR_SYNC || | |
ecc314ba BC |
201 | io_u->ddir == DDIR_DATASYNC || |
202 | io_u->ddir == DDIR_SYNC_FILE_RANGE) | |
203 | { | |
204 | FlushFileBuffers(io_u->file->hFile); | |
205 | return FIO_Q_COMPLETED; | |
206 | } else if (io_u->ddir == DDIR_TRIM) { | |
207 | log_info("explicit TRIM isn't supported on Windows"); | |
208 | return FIO_Q_COMPLETED; | |
9b836561 BC |
209 | } else |
210 | assert(0); | |
ecc314ba | 211 | |
9b836561 | 212 | if (success) { |
ecc314ba | 213 | io_u->seen = 1; |
9b836561 | 214 | io_u->resid = io_u->xfer_buflen - iobytes; |
ecc314ba BC |
215 | io_u->error = 0; |
216 | rc = FIO_Q_COMPLETED; | |
9b836561 BC |
217 | } else if (!success && GetLastError() == ERROR_IO_PENDING) { |
218 | wd->io_index = (wd->io_index + 1) % (2 * td->o.iodepth); | |
ecc314ba BC |
219 | rc = FIO_Q_QUEUED; |
220 | } else { | |
221 | PrintError(__func__); | |
222 | io_u->error = GetLastError(); | |
223 | io_u->resid = io_u->xfer_buflen; | |
224 | rc = FIO_Q_COMPLETED; | |
225 | } | |
226 | ||
ecc314ba BC |
227 | return rc; |
228 | } | |
229 | ||
230 | static void fio_windowsaio_cleanup(struct thread_data *td) | |
231 | { | |
9b836561 | 232 | int i; |
ecc314ba BC |
233 | struct windowsaio_data *wd; |
234 | ||
ecc314ba | 235 | wd = td->io_ops->data; |
ecc314ba | 236 | |
9b836561 | 237 | WaitForSingleObject(wd->iothread_stopped, INFINITE); |
ecc314ba BC |
238 | |
239 | if (wd != NULL) { | |
9b836561 BC |
240 | CloseHandle(wd->iothread); |
241 | CloseHandle(wd->iothread_stopped); | |
242 | CloseHandle(wd->iocomplete_event); | |
243 | ||
244 | for (i = 0; i < 2 * td->o.iodepth; i++) { | |
245 | CloseHandle(wd->io_handles[i]); | |
246 | } | |
e4db9fec | 247 | |
ecc314ba | 248 | free(wd->aio_events); |
9b836561 BC |
249 | free(wd->io_handles); |
250 | free(wd->ovls); | |
ecc314ba | 251 | free(wd); |
e4db9fec | 252 | |
ecc314ba BC |
253 | td->io_ops->data = NULL; |
254 | } | |
ecc314ba BC |
255 | } |
256 | ||
9b836561 | 257 | /* Runs as a thread and waits for queued IO to complete */ |
ecc314ba BC |
258 | static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter) |
259 | { | |
260 | OVERLAPPED *ovl; | |
261 | FIO_OVERLAPPED *fov; | |
262 | struct io_u *io_u; | |
263 | struct windowsaio_data *wd; | |
ecc314ba BC |
264 | struct thread_ctx *ctx; |
265 | ULONG_PTR ulKey = 0; | |
ecc314ba BC |
266 | DWORD bytes; |
267 | ||
ecc314ba BC |
268 | ctx = (struct thread_ctx*)lpParameter; |
269 | wd = ctx->wd; | |
ecc314ba | 270 | |
9b836561 BC |
271 | while (ctx->wd->iothread_running) { |
272 | if (!GetQueuedCompletionStatus(ctx->iocp, &bytes, &ulKey, &ovl, 250)) | |
273 | continue; | |
ecc314ba BC |
274 | |
275 | fov = CONTAINING_RECORD(ovl, FIO_OVERLAPPED, o); | |
276 | io_u = fov->io_u; | |
277 | ||
9b836561 | 278 | if (io_u->seen != 0) |
ecc314ba BC |
279 | continue; |
280 | ||
ecc314ba BC |
281 | if (ovl->Internal == ERROR_SUCCESS) { |
282 | io_u->resid = io_u->xfer_buflen - ovl->InternalHigh; | |
283 | io_u->error = 0; | |
284 | } else { | |
285 | io_u->resid = io_u->xfer_buflen; | |
9b836561 | 286 | io_u->error = ovl->Internal; |
ecc314ba BC |
287 | } |
288 | ||
289 | io_u->seen = 1; | |
9b836561 | 290 | SetEvent(wd->iocomplete_event); |
ecc314ba BC |
291 | } |
292 | ||
9b836561 BC |
293 | CloseHandle(ctx->iocp); |
294 | SetEvent(ctx->wd->iothread_stopped); | |
ecc314ba | 295 | free(ctx); |
9b836561 | 296 | |
ecc314ba BC |
297 | return 0; |
298 | } | |
299 | ||
300 | static int fio_windowsaio_init(struct thread_data *td) | |
301 | { | |
ecc314ba | 302 | struct windowsaio_data *wd; |
9b836561 BC |
303 | OSVERSIONINFO osInfo; |
304 | int rc = 0; | |
ecc314ba | 305 | |
ecc314ba | 306 | wd = malloc(sizeof(struct windowsaio_data)); |
9b836561 BC |
307 | if (wd != NULL) |
308 | ZeroMemory(wd, sizeof(struct windowsaio_data)); | |
309 | else | |
310 | rc = 1; | |
ecc314ba | 311 | |
9b836561 BC |
312 | if (!rc) { |
313 | wd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u*)); | |
314 | if (wd->aio_events == NULL) | |
315 | rc = 1; | |
e4db9fec BC |
316 | } |
317 | ||
9b836561 BC |
318 | if (!rc) { |
319 | wd->io_handles = malloc(2 * td->o.iodepth * sizeof(HANDLE)); | |
320 | if (wd->io_handles == NULL) | |
321 | rc = 1; | |
e4db9fec BC |
322 | } |
323 | ||
9b836561 BC |
324 | if (!rc) { |
325 | wd->ovls = malloc(2 * td->o.iodepth * sizeof(FIO_OVERLAPPED)); | |
326 | if (wd->ovls == NULL) | |
327 | rc = 1; | |
328 | } | |
e4db9fec | 329 | |
9b836561 BC |
330 | if (!rc) { |
331 | /* Create an auto-reset event */ | |
332 | wd->iocomplete_event = CreateEvent(NULL, FALSE, FALSE, NULL); | |
333 | if (wd->iocomplete_event == NULL) | |
334 | rc = 1; | |
335 | } | |
336 | ||
337 | if (!rc) { | |
338 | osInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); | |
339 | GetVersionEx(&osInfo); | |
340 | ||
341 | if (osInfo.dwMajorVersion >= 6) | |
342 | wd->have_cancelioex = TRUE; | |
343 | else | |
344 | wd->have_cancelioex = FALSE; | |
345 | } | |
346 | ||
347 | if (rc) { | |
348 | PrintError(__func__); | |
349 | if (wd != NULL) { | |
350 | if (wd->ovls != NULL) | |
351 | free(wd->ovls); | |
352 | if (wd->io_handles != NULL) | |
353 | free(wd->io_handles); | |
354 | if (wd->aio_events != NULL) | |
355 | free(wd->aio_events); | |
356 | ||
357 | free(wd); | |
358 | } | |
359 | } | |
ecc314ba BC |
360 | |
361 | td->io_ops->data = wd; | |
e4db9fec | 362 | return 0; |
ecc314ba BC |
363 | } |
364 | ||
365 | static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f) | |
366 | { | |
367 | int rc = 0; | |
368 | HANDLE hFile; | |
369 | DWORD flags = FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED; | |
370 | DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE; | |
371 | DWORD openmode = OPEN_ALWAYS; | |
372 | DWORD access; | |
9b836561 | 373 | int i; |
ecc314ba BC |
374 | |
375 | dprint(FD_FILE, "fd open %s\n", f->file_name); | |
376 | ||
ecc314ba BC |
377 | if (f->filetype == FIO_TYPE_PIPE) { |
378 | log_err("fio: windowsaio doesn't support pipes\n"); | |
379 | return 1; | |
380 | } | |
381 | ||
382 | if (!strcmp(f->file_name, "-")) { | |
383 | log_err("fio: can't read/write to stdin/out\n"); | |
384 | return 1; | |
385 | } | |
386 | ||
387 | if (td->o.odirect) | |
388 | flags |= FILE_FLAG_NO_BUFFERING; | |
389 | if (td->o.sync_io) | |
390 | flags |= FILE_FLAG_WRITE_THROUGH; | |
391 | ||
392 | ||
393 | if (td->o.td_ddir == TD_DDIR_READ || | |
394 | td->o.td_ddir == TD_DDIR_WRITE || | |
395 | td->o.td_ddir == TD_DDIR_RANDRW) | |
396 | { | |
397 | flags |= FILE_FLAG_SEQUENTIAL_SCAN; | |
398 | } | |
399 | else | |
400 | { | |
401 | flags |= FILE_FLAG_RANDOM_ACCESS; | |
402 | } | |
403 | ||
404 | if (td_read(td) || read_only) | |
405 | access = GENERIC_READ; | |
406 | else | |
407 | access = (GENERIC_READ | GENERIC_WRITE); | |
408 | ||
409 | if (td->o.create_on_open > 0) | |
410 | openmode = OPEN_ALWAYS; | |
411 | else | |
412 | openmode = OPEN_EXISTING; | |
413 | ||
414 | f->hFile = CreateFile(f->file_name, access, sharemode, | |
415 | NULL, openmode, flags, NULL); | |
416 | ||
417 | if (f->hFile == INVALID_HANDLE_VALUE) { | |
ecc314ba BC |
418 | PrintError(__func__); |
419 | rc = 1; | |
420 | } | |
421 | ||
422 | /* Only set up the competion port and thread if we're not just | |
423 | * querying the device size */ | |
424 | if (!rc && td->io_ops->data != NULL) { | |
425 | struct windowsaio_data *wd; | |
426 | struct thread_ctx *ctx; | |
427 | hFile = CreateIoCompletionPort(f->hFile, NULL, 0, 0); | |
428 | ||
429 | wd = td->io_ops->data; | |
ecc314ba | 430 | |
9b836561 BC |
431 | wd->io_index = 0; |
432 | wd->iothread_running = TRUE; | |
433 | /* Create a manual-reset event */ | |
434 | wd->iothread_stopped = CreateEvent(NULL, TRUE, FALSE, NULL); | |
435 | ||
436 | if (wd->iothread_stopped == NULL) | |
437 | rc = 1; | |
438 | ||
439 | if (!rc) { | |
440 | for (i = 0; i < 2 * td->o.iodepth; i++) { | |
441 | /* Create a manual-reset event for putting in OVERLAPPED */ | |
442 | wd->io_handles[i] = CreateEvent(NULL, TRUE, FALSE, NULL); | |
443 | if (wd->io_handles[i] == NULL) { | |
444 | PrintError(__func__); | |
445 | rc = 1; | |
446 | break; | |
447 | } | |
448 | } | |
449 | } | |
ecc314ba | 450 | |
9b836561 BC |
451 | if (!rc) { |
452 | ctx = malloc(sizeof(struct thread_ctx)); | |
453 | ctx->iocp = hFile; | |
454 | ctx->wd = wd; | |
455 | ||
456 | wd->iothread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, NULL); | |
457 | } | |
ecc314ba | 458 | |
9b836561 | 459 | if (rc || wd->iothread == NULL) { |
ecc314ba BC |
460 | PrintError(__func__); |
461 | rc = 1; | |
462 | } | |
463 | } | |
464 | ||
ecc314ba BC |
465 | return rc; |
466 | } | |
467 | ||
468 | static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f) | |
469 | { | |
9b836561 BC |
470 | struct windowsaio_data *wd; |
471 | ||
472 | dprint(FD_FILE, "fd close %s\n", f->file_name); | |
473 | ||
474 | if (td->io_ops->data != NULL) { | |
475 | wd = td->io_ops->data; | |
476 | wd->iothread_running = FALSE; | |
477 | WaitForSingleObject(wd->iothread_stopped, INFINITE); | |
478 | } | |
ecc314ba | 479 | |
ecc314ba | 480 | if (f->hFile != INVALID_HANDLE_VALUE) { |
9b836561 | 481 | if (!CloseHandle(f->hFile)) |
ecc314ba BC |
482 | PrintError(__func__); |
483 | } | |
484 | ||
485 | f->hFile = INVALID_HANDLE_VALUE; | |
486 | return 0; | |
487 | } | |
488 | ||
489 | static struct ioengine_ops ioengine = { | |
490 | .name = "windowsaio", | |
491 | .version = FIO_IOOPS_VERSION, | |
492 | .init = fio_windowsaio_init, | |
493 | .queue = fio_windowsaio_queue, | |
494 | .cancel = fio_windowsaio_cancel, | |
495 | .getevents = fio_windowsaio_getevents, | |
496 | .event = fio_windowsaio_event, | |
497 | .cleanup = fio_windowsaio_cleanup, | |
498 | .open_file = fio_windowsaio_open_file, | |
499 | .close_file = fio_windowsaio_close_file, | |
500 | .get_file_size = generic_get_file_size | |
501 | }; | |
502 | ||
503 | static void fio_init fio_posixaio_register(void) | |
504 | { | |
505 | register_ioengine(&ioengine); | |
506 | } | |
507 | ||
508 | static void fio_exit fio_posixaio_unregister(void) | |
509 | { | |
510 | unregister_ioengine(&ioengine); | |
511 | } |