Commit | Line | Data |
---|---|---|
ecc314ba BC |
1 | /* |
2 | * Native Windows async IO engine | |
3 | * Copyright (C) 2010 Bruce Cran <bruce@cran.org.uk> | |
4 | */ | |
5 | ||
6 | ||
7 | #include <stdio.h> | |
8 | #include <stdlib.h> | |
9 | #include <unistd.h> | |
10 | #include <signal.h> | |
11 | #include <errno.h> | |
12 | #include <windows.h> | |
13 | ||
14 | #include "../fio.h" | |
15 | ||
16 | BOOL windowsaio_debug = FALSE; | |
17 | ||
18 | struct windowsaio_data { | |
19 | struct io_u **aio_events; | |
20 | unsigned int ioFinished; | |
21 | BOOL running; | |
22 | BOOL stopped; | |
23 | HANDLE hThread; | |
24 | }; | |
25 | ||
26 | typedef struct { | |
27 | OVERLAPPED o; | |
28 | struct io_u *io_u; | |
29 | } FIO_OVERLAPPED; | |
30 | ||
31 | struct thread_ctx { | |
32 | HANDLE ioCP; | |
33 | struct windowsaio_data *wd; | |
34 | }; | |
35 | ||
36 | static void PrintError(LPCSTR lpszFunction); | |
37 | static int fio_windowsaio_cancel(struct thread_data *td, | |
38 | struct io_u *io_u); | |
39 | static DWORD GetEndCount(DWORD startCount, struct timespec *t); | |
40 | static BOOL TimedOut(DWORD startCount, DWORD endCount); | |
41 | static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, | |
42 | unsigned int max, struct timespec *t); | |
43 | static struct io_u *fio_windowsaio_event(struct thread_data *td, int event); | |
44 | static int fio_windowsaio_queue(struct thread_data *td, | |
45 | struct io_u *io_u); | |
46 | static void fio_windowsaio_cleanup(struct thread_data *td); | |
47 | static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter); | |
48 | static int fio_windowsaio_init(struct thread_data *td); | |
49 | static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f); | |
50 | static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f); | |
51 | ||
52 | static void PrintError(LPCSTR lpszFunction) | |
53 | { | |
54 | // Retrieve the system error message for the last-error code | |
55 | ||
56 | LPSTR lpMsgBuf; | |
57 | DWORD dw = GetLastError(); | |
58 | ||
59 | FormatMessage( | |
60 | FORMAT_MESSAGE_ALLOCATE_BUFFER | | |
61 | FORMAT_MESSAGE_FROM_SYSTEM | | |
62 | FORMAT_MESSAGE_IGNORE_INSERTS, | |
63 | NULL, | |
64 | dw, | |
65 | MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), | |
66 | (LPTSTR)&lpMsgBuf, | |
67 | 0, NULL ); | |
68 | ||
69 | log_err("%s - %s", lpszFunction, lpMsgBuf); | |
70 | LocalFree(lpMsgBuf); | |
71 | } | |
72 | ||
73 | static int fio_windowsaio_cancel(struct thread_data *td, | |
74 | struct io_u *io_u) | |
75 | { | |
76 | BOOL bSuccess; | |
77 | int rc = 0; | |
78 | ||
79 | bSuccess = CancelIo(io_u->file->hFile); | |
80 | ||
81 | if (!bSuccess) | |
82 | rc = 1; | |
83 | ||
84 | return rc; | |
85 | } | |
86 | ||
87 | static DWORD GetEndCount(DWORD startCount, struct timespec *t) | |
88 | { | |
89 | DWORD endCount = startCount; | |
90 | ||
91 | if (t == NULL) | |
92 | return 0; | |
93 | ||
94 | endCount += (t->tv_sec * 1000) + (t->tv_nsec / 1000000); | |
95 | return endCount; | |
96 | } | |
97 | ||
98 | static BOOL TimedOut(DWORD startCount, DWORD endCount) | |
99 | { | |
100 | BOOL expired = FALSE; | |
101 | DWORD currentTime; | |
102 | ||
103 | if (startCount == 0 || endCount == 0) | |
104 | return FALSE; | |
105 | ||
106 | currentTime = GetTickCount(); | |
107 | ||
108 | if ((endCount > startCount) && currentTime >= endCount) | |
109 | expired = TRUE; | |
110 | else if (currentTime < startCount && currentTime > endCount) | |
111 | expired = TRUE; | |
112 | ||
113 | if (windowsaio_debug) | |
114 | printf("windowsaio: timedout = %d\n", expired); | |
115 | ||
116 | return expired; | |
117 | } | |
118 | ||
119 | static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, | |
120 | unsigned int max, struct timespec *t) | |
121 | { | |
122 | struct windowsaio_data *wd = td->io_ops->data; | |
123 | struct flist_head *entry; | |
124 | unsigned int dequeued = 0; | |
125 | struct io_u *io_u; | |
126 | DWORD startCount = 0, endCount = 0; | |
127 | BOOL timedout = FALSE; | |
128 | unsigned int r = 0; | |
129 | ||
130 | if (windowsaio_debug) | |
131 | printf("getevents (min %d, max %d)\n", min, max); | |
132 | ||
133 | if (t != NULL) { | |
134 | startCount = GetTickCount(); | |
135 | endCount = GetEndCount(startCount, t); | |
136 | } | |
137 | ||
138 | while (dequeued < min && !timedout) { | |
139 | ||
140 | flist_for_each(entry, &td->io_u_busylist) { | |
141 | io_u = flist_entry(entry, struct io_u, list); | |
142 | ||
143 | if (io_u->seen == 0) | |
144 | continue; | |
145 | ||
146 | dequeued++; | |
147 | ||
148 | wd->ioFinished--; | |
149 | wd->aio_events[r] = io_u; | |
150 | r++; | |
151 | ||
152 | if (windowsaio_debug) | |
153 | printf("dequeued %d\n", dequeued); | |
154 | ||
155 | if (dequeued == max) | |
156 | break; | |
157 | } | |
158 | ||
159 | if (TimedOut(startCount, endCount)) | |
160 | timedout = TRUE; | |
161 | ||
162 | if (dequeued < min && !timedout) | |
163 | Sleep(250); | |
164 | } | |
165 | ||
166 | if (windowsaio_debug) | |
167 | printf("leave getevents (%d)\n", dequeued); | |
168 | ||
169 | return dequeued; | |
170 | } | |
171 | ||
172 | static struct io_u *fio_windowsaio_event(struct thread_data *td, int event) | |
173 | { | |
174 | struct windowsaio_data *wd = td->io_ops->data; | |
175 | return wd->aio_events[event]; | |
176 | } | |
177 | ||
178 | static int fio_windowsaio_queue(struct thread_data *td, | |
179 | struct io_u *io_u) | |
180 | { | |
181 | FIO_OVERLAPPED *fov; | |
182 | DWORD ioBytes; | |
183 | BOOL bSuccess = TRUE; | |
184 | int rc; | |
185 | ||
186 | fio_ro_check(td, io_u); | |
187 | ||
188 | if (windowsaio_debug) | |
189 | printf("enqueue enter\n"); | |
190 | ||
191 | fov = malloc(sizeof(FIO_OVERLAPPED)); | |
192 | ZeroMemory(fov, sizeof(FIO_OVERLAPPED)); | |
193 | ||
194 | io_u->seen = 0; | |
195 | ||
196 | fov->o.Offset = io_u->offset & 0xFFFFFFFF; | |
197 | fov->o.OffsetHigh = io_u->offset >> 32; | |
198 | fov->o.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); | |
199 | fov->io_u = io_u; | |
200 | ||
201 | if (fov->o.hEvent == NULL) { | |
202 | PrintError(__func__); | |
203 | return 1; | |
204 | } | |
205 | ||
206 | if (io_u->ddir == DDIR_WRITE) | |
207 | bSuccess = WriteFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &ioBytes, &fov->o); | |
208 | else if (io_u->ddir == DDIR_READ) | |
209 | bSuccess = ReadFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &ioBytes, &fov->o); | |
210 | else if (io_u->ddir == DDIR_SYNC || | |
211 | io_u->ddir == DDIR_DATASYNC || | |
212 | io_u->ddir == DDIR_SYNC_FILE_RANGE) | |
213 | { | |
214 | FlushFileBuffers(io_u->file->hFile); | |
215 | return FIO_Q_COMPLETED; | |
216 | } else if (io_u->ddir == DDIR_TRIM) { | |
217 | log_info("explicit TRIM isn't supported on Windows"); | |
218 | return FIO_Q_COMPLETED; | |
219 | } | |
220 | ||
221 | if (bSuccess) { | |
222 | io_u->seen = 1; | |
223 | io_u->resid = io_u->xfer_buflen - fov->o.InternalHigh; | |
224 | io_u->error = 0; | |
225 | rc = FIO_Q_COMPLETED; | |
226 | } else if (!bSuccess && GetLastError() == ERROR_IO_PENDING) { | |
227 | rc = FIO_Q_QUEUED; | |
228 | } else { | |
229 | PrintError(__func__); | |
230 | io_u->error = GetLastError(); | |
231 | io_u->resid = io_u->xfer_buflen; | |
232 | rc = FIO_Q_COMPLETED; | |
233 | } | |
234 | ||
235 | if (windowsaio_debug) | |
236 | printf("enqueue - leave (offset %llu)\n", io_u->offset); | |
237 | ||
238 | return rc; | |
239 | } | |
240 | ||
241 | static void fio_windowsaio_cleanup(struct thread_data *td) | |
242 | { | |
243 | struct windowsaio_data *wd; | |
244 | ||
245 | if (windowsaio_debug) | |
246 | printf("windowsaio: cleanup - enter\n"); | |
247 | ||
248 | wd = td->io_ops->data; | |
249 | wd->running = FALSE; | |
250 | ||
251 | while (wd->stopped == FALSE) | |
252 | Sleep(5); | |
253 | ||
254 | if (wd != NULL) { | |
255 | CloseHandle(wd->hThread); | |
256 | free(wd->aio_events); | |
257 | wd->aio_events = NULL; | |
258 | free(wd); | |
259 | td->io_ops->data = NULL; | |
260 | } | |
261 | ||
262 | if (windowsaio_debug) | |
263 | printf("windowsaio: cleanup - leave\n"); | |
264 | } | |
265 | ||
266 | static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter) | |
267 | { | |
268 | OVERLAPPED *ovl; | |
269 | FIO_OVERLAPPED *fov; | |
270 | struct io_u *io_u; | |
271 | struct windowsaio_data *wd; | |
272 | ||
273 | struct thread_ctx *ctx; | |
274 | ULONG_PTR ulKey = 0; | |
275 | BOOL bSuccess; | |
276 | DWORD bytes; | |
277 | ||
278 | ||
279 | ctx = (struct thread_ctx*)lpParameter; | |
280 | wd = ctx->wd; | |
281 | bSuccess = TRUE; | |
282 | ||
283 | if (windowsaio_debug) | |
284 | printf("windowsaio: IoCompletionRoutine - enter\n"); | |
285 | ||
286 | while (ctx->wd->running) { | |
287 | bSuccess = GetQueuedCompletionStatus(ctx->ioCP, &bytes, &ulKey, &ovl, 500); | |
288 | ||
289 | if (windowsaio_debug) | |
290 | printf("GetQueuedCompletionStatus returned %d\n", bSuccess); | |
291 | ||
292 | if (!bSuccess) { | |
293 | if (GetLastError() == WAIT_TIMEOUT) { | |
294 | continue; | |
295 | } else { | |
296 | PrintError(__func__); | |
297 | continue; | |
298 | } | |
299 | } | |
300 | ||
301 | fov = CONTAINING_RECORD(ovl, FIO_OVERLAPPED, o); | |
302 | io_u = fov->io_u; | |
303 | ||
304 | if (windowsaio_debug) { | |
305 | if (io_u->seen == 1) | |
306 | printf("IoCompletionRoutine - got already completed IO\n"); | |
307 | else | |
308 | printf("IoCompletionRoutine - completed %d IO\n", ctx->wd->ioFinished); | |
309 | } | |
310 | ||
311 | if (io_u->seen == 1) | |
312 | continue; | |
313 | ||
314 | ctx->wd->ioFinished++; | |
315 | ||
316 | if (ovl->Internal == ERROR_SUCCESS) { | |
317 | io_u->resid = io_u->xfer_buflen - ovl->InternalHigh; | |
318 | io_u->error = 0; | |
319 | } else { | |
320 | io_u->resid = io_u->xfer_buflen; | |
321 | io_u->error = 1; | |
322 | } | |
323 | ||
324 | io_u->seen = 1; | |
325 | CloseHandle(ovl->hEvent); | |
326 | free(ovl); | |
327 | } | |
328 | ||
329 | bSuccess = CloseHandle(ctx->ioCP); | |
330 | if (!bSuccess) | |
331 | PrintError(__func__); | |
332 | ||
333 | if (windowsaio_debug) | |
334 | printf("windowsaio: IoCompletionRoutine - leave\n"); | |
335 | ||
336 | ctx->wd->stopped = TRUE; | |
337 | free(ctx); | |
338 | return 0; | |
339 | } | |
340 | ||
341 | static int fio_windowsaio_init(struct thread_data *td) | |
342 | { | |
343 | int rc = 0; | |
344 | struct windowsaio_data *wd; | |
345 | ||
346 | if (windowsaio_debug) | |
347 | printf("windowsaio: init\n"); | |
348 | ||
349 | wd = malloc(sizeof(struct windowsaio_data)); | |
350 | ||
351 | ZeroMemory(wd, sizeof(*wd)); | |
352 | wd->aio_events = malloc((td->o.iodepth + 1) * sizeof(struct io_u *)); | |
353 | ZeroMemory(wd->aio_events, (td->o.iodepth + 1) * sizeof(struct io_u *)); | |
354 | ||
355 | td->io_ops->data = wd; | |
356 | return rc; | |
357 | } | |
358 | ||
359 | static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f) | |
360 | { | |
361 | int rc = 0; | |
362 | HANDLE hFile; | |
363 | DWORD flags = FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED; | |
364 | DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE; | |
365 | DWORD openmode = OPEN_ALWAYS; | |
366 | DWORD access; | |
367 | ||
368 | dprint(FD_FILE, "fd open %s\n", f->file_name); | |
369 | ||
370 | if (windowsaio_debug) | |
371 | printf("windowsaio: open file %s - enter\n", f->file_name); | |
372 | ||
373 | if (f->filetype == FIO_TYPE_PIPE) { | |
374 | log_err("fio: windowsaio doesn't support pipes\n"); | |
375 | return 1; | |
376 | } | |
377 | ||
378 | if (!strcmp(f->file_name, "-")) { | |
379 | log_err("fio: can't read/write to stdin/out\n"); | |
380 | return 1; | |
381 | } | |
382 | ||
383 | if (td->o.odirect) | |
384 | flags |= FILE_FLAG_NO_BUFFERING; | |
385 | if (td->o.sync_io) | |
386 | flags |= FILE_FLAG_WRITE_THROUGH; | |
387 | ||
388 | ||
389 | if (td->o.td_ddir == TD_DDIR_READ || | |
390 | td->o.td_ddir == TD_DDIR_WRITE || | |
391 | td->o.td_ddir == TD_DDIR_RANDRW) | |
392 | { | |
393 | flags |= FILE_FLAG_SEQUENTIAL_SCAN; | |
394 | } | |
395 | else | |
396 | { | |
397 | flags |= FILE_FLAG_RANDOM_ACCESS; | |
398 | } | |
399 | ||
400 | if (td_read(td) || read_only) | |
401 | access = GENERIC_READ; | |
402 | else | |
403 | access = (GENERIC_READ | GENERIC_WRITE); | |
404 | ||
405 | if (td->o.create_on_open > 0) | |
406 | openmode = OPEN_ALWAYS; | |
407 | else | |
408 | openmode = OPEN_EXISTING; | |
409 | ||
410 | f->hFile = CreateFile(f->file_name, access, sharemode, | |
411 | NULL, openmode, flags, NULL); | |
412 | ||
413 | if (f->hFile == INVALID_HANDLE_VALUE) { | |
414 | log_err("Failed to open %s\n", f->file_name); | |
415 | PrintError(__func__); | |
416 | rc = 1; | |
417 | } | |
418 | ||
419 | /* Only set up the competion port and thread if we're not just | |
420 | * querying the device size */ | |
421 | if (!rc && td->io_ops->data != NULL) { | |
422 | struct windowsaio_data *wd; | |
423 | struct thread_ctx *ctx; | |
424 | hFile = CreateIoCompletionPort(f->hFile, NULL, 0, 0); | |
425 | ||
426 | wd = td->io_ops->data; | |
427 | wd->running = TRUE; | |
428 | wd->stopped = FALSE; | |
429 | ||
430 | ctx = malloc(sizeof(struct thread_ctx)); | |
431 | ctx->ioCP = hFile; | |
432 | ctx->wd = wd; | |
433 | ||
434 | wd->hThread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, NULL); | |
435 | ||
436 | if (wd->hThread == NULL) { | |
437 | PrintError(__func__); | |
438 | rc = 1; | |
439 | } | |
440 | } | |
441 | ||
442 | if (windowsaio_debug) | |
443 | printf("windowsaio: open file - leave (%d)\n", rc); | |
444 | ||
445 | return rc; | |
446 | } | |
447 | ||
448 | static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f) | |
449 | { | |
450 | BOOL bSuccess; | |
451 | ||
452 | if (windowsaio_debug) | |
453 | printf("windowsaio: close file\n"); | |
454 | ||
455 | if (f->hFile != INVALID_HANDLE_VALUE) { | |
456 | bSuccess = CloseHandle(f->hFile); | |
457 | if (!bSuccess) | |
458 | PrintError(__func__); | |
459 | } | |
460 | ||
461 | f->hFile = INVALID_HANDLE_VALUE; | |
462 | return 0; | |
463 | } | |
464 | ||
465 | static struct ioengine_ops ioengine = { | |
466 | .name = "windowsaio", | |
467 | .version = FIO_IOOPS_VERSION, | |
468 | .init = fio_windowsaio_init, | |
469 | .queue = fio_windowsaio_queue, | |
470 | .cancel = fio_windowsaio_cancel, | |
471 | .getevents = fio_windowsaio_getevents, | |
472 | .event = fio_windowsaio_event, | |
473 | .cleanup = fio_windowsaio_cleanup, | |
474 | .open_file = fio_windowsaio_open_file, | |
475 | .close_file = fio_windowsaio_close_file, | |
476 | .get_file_size = generic_get_file_size | |
477 | }; | |
478 | ||
479 | static void fio_init fio_posixaio_register(void) | |
480 | { | |
481 | register_ioengine(&ioengine); | |
482 | } | |
483 | ||
484 | static void fio_exit fio_posixaio_unregister(void) | |
485 | { | |
486 | unregister_ioengine(&ioengine); | |
487 | } |