Commit | Line | Data |
---|---|---|
ecc314ba BC |
1 | /* |
2 | * Native Windows async IO engine | |
3 | * Copyright (C) 2010 Bruce Cran <bruce@cran.org.uk> | |
4 | */ | |
5 | ||
ecc314ba BC |
6 | #include <stdio.h> |
7 | #include <stdlib.h> | |
8 | #include <unistd.h> | |
9 | #include <signal.h> | |
10 | #include <errno.h> | |
11 | #include <windows.h> | |
12 | ||
13 | #include "../fio.h" | |
14 | ||
15 | BOOL windowsaio_debug = FALSE; | |
16 | ||
17 | struct windowsaio_data { | |
18 | struct io_u **aio_events; | |
e4db9fec BC |
19 | HANDLE *busyIoHandles; |
20 | unsigned int busyIo; | |
ecc314ba BC |
21 | unsigned int ioFinished; |
22 | BOOL running; | |
23 | BOOL stopped; | |
24 | HANDLE hThread; | |
25 | }; | |
26 | ||
27 | typedef struct { | |
28 | OVERLAPPED o; | |
29 | struct io_u *io_u; | |
30 | } FIO_OVERLAPPED; | |
31 | ||
32 | struct thread_ctx { | |
33 | HANDLE ioCP; | |
34 | struct windowsaio_data *wd; | |
35 | }; | |
36 | ||
37 | static void PrintError(LPCSTR lpszFunction); | |
38 | static int fio_windowsaio_cancel(struct thread_data *td, | |
39 | struct io_u *io_u); | |
ecc314ba BC |
40 | static BOOL TimedOut(DWORD startCount, DWORD endCount); |
41 | static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, | |
42 | unsigned int max, struct timespec *t); | |
43 | static struct io_u *fio_windowsaio_event(struct thread_data *td, int event); | |
44 | static int fio_windowsaio_queue(struct thread_data *td, | |
45 | struct io_u *io_u); | |
46 | static void fio_windowsaio_cleanup(struct thread_data *td); | |
47 | static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter); | |
48 | static int fio_windowsaio_init(struct thread_data *td); | |
49 | static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f); | |
50 | static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f); | |
51 | ||
52 | static void PrintError(LPCSTR lpszFunction) | |
53 | { | |
54 | // Retrieve the system error message for the last-error code | |
55 | ||
56 | LPSTR lpMsgBuf; | |
57 | DWORD dw = GetLastError(); | |
58 | ||
59 | FormatMessage( | |
60 | FORMAT_MESSAGE_ALLOCATE_BUFFER | | |
61 | FORMAT_MESSAGE_FROM_SYSTEM | | |
62 | FORMAT_MESSAGE_IGNORE_INSERTS, | |
63 | NULL, | |
64 | dw, | |
65 | MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), | |
66 | (LPTSTR)&lpMsgBuf, | |
67 | 0, NULL ); | |
68 | ||
69 | log_err("%s - %s", lpszFunction, lpMsgBuf); | |
70 | LocalFree(lpMsgBuf); | |
71 | } | |
72 | ||
73 | static int fio_windowsaio_cancel(struct thread_data *td, | |
74 | struct io_u *io_u) | |
75 | { | |
76 | BOOL bSuccess; | |
77 | int rc = 0; | |
78 | ||
79 | bSuccess = CancelIo(io_u->file->hFile); | |
80 | ||
81 | if (!bSuccess) | |
82 | rc = 1; | |
83 | ||
84 | return rc; | |
85 | } | |
86 | ||
ecc314ba BC |
87 | static BOOL TimedOut(DWORD startCount, DWORD endCount) |
88 | { | |
89 | BOOL expired = FALSE; | |
90 | DWORD currentTime; | |
91 | ||
ecc314ba BC |
92 | currentTime = GetTickCount(); |
93 | ||
94 | if ((endCount > startCount) && currentTime >= endCount) | |
95 | expired = TRUE; | |
96 | else if (currentTime < startCount && currentTime > endCount) | |
97 | expired = TRUE; | |
98 | ||
ecc314ba BC |
99 | return expired; |
100 | } | |
101 | ||
102 | static int fio_windowsaio_getevents(struct thread_data *td, unsigned int min, | |
103 | unsigned int max, struct timespec *t) | |
104 | { | |
105 | struct windowsaio_data *wd = td->io_ops->data; | |
106 | struct flist_head *entry; | |
107 | unsigned int dequeued = 0; | |
108 | struct io_u *io_u; | |
109 | DWORD startCount = 0, endCount = 0; | |
110 | BOOL timedout = FALSE; | |
111 | unsigned int r = 0; | |
e4db9fec | 112 | unsigned int waitInMs = 100; |
ecc314ba BC |
113 | |
114 | if (t != NULL) { | |
e4db9fec | 115 | waitInMs = (t->tv_sec * 1000) + (t->tv_nsec / 1000000); |
ecc314ba | 116 | startCount = GetTickCount(); |
e4db9fec | 117 | endCount = startCount + (t->tv_sec * 1000) + (t->tv_nsec / 1000000); |
ecc314ba BC |
118 | } |
119 | ||
120 | while (dequeued < min && !timedout) { | |
e4db9fec | 121 | WaitForMultipleObjects(wd->busyIo, wd->busyIoHandles, FALSE, waitInMs); |
ecc314ba BC |
122 | |
123 | flist_for_each(entry, &td->io_u_busylist) { | |
124 | io_u = flist_entry(entry, struct io_u, list); | |
125 | ||
126 | if (io_u->seen == 0) | |
127 | continue; | |
128 | ||
129 | dequeued++; | |
130 | ||
131 | wd->ioFinished--; | |
132 | wd->aio_events[r] = io_u; | |
133 | r++; | |
134 | ||
e4db9fec | 135 | wd->busyIo--; |
ecc314ba BC |
136 | |
137 | if (dequeued == max) | |
138 | break; | |
139 | } | |
140 | ||
e4db9fec | 141 | if (t != NULL && TimedOut(startCount, endCount)) |
ecc314ba | 142 | timedout = TRUE; |
ecc314ba BC |
143 | } |
144 | ||
ecc314ba BC |
145 | return dequeued; |
146 | } | |
147 | ||
148 | static struct io_u *fio_windowsaio_event(struct thread_data *td, int event) | |
149 | { | |
150 | struct windowsaio_data *wd = td->io_ops->data; | |
151 | return wd->aio_events[event]; | |
152 | } | |
153 | ||
154 | static int fio_windowsaio_queue(struct thread_data *td, | |
155 | struct io_u *io_u) | |
156 | { | |
157 | FIO_OVERLAPPED *fov; | |
158 | DWORD ioBytes; | |
159 | BOOL bSuccess = TRUE; | |
160 | int rc; | |
161 | ||
162 | fio_ro_check(td, io_u); | |
163 | ||
ecc314ba BC |
164 | fov = malloc(sizeof(FIO_OVERLAPPED)); |
165 | ZeroMemory(fov, sizeof(FIO_OVERLAPPED)); | |
166 | ||
e4db9fec BC |
167 | struct windowsaio_data *wd = td->io_ops->data; |
168 | ||
ecc314ba BC |
169 | io_u->seen = 0; |
170 | ||
171 | fov->o.Offset = io_u->offset & 0xFFFFFFFF; | |
172 | fov->o.OffsetHigh = io_u->offset >> 32; | |
173 | fov->o.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); | |
174 | fov->io_u = io_u; | |
175 | ||
176 | if (fov->o.hEvent == NULL) { | |
177 | PrintError(__func__); | |
178 | return 1; | |
179 | } | |
180 | ||
181 | if (io_u->ddir == DDIR_WRITE) | |
182 | bSuccess = WriteFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &ioBytes, &fov->o); | |
183 | else if (io_u->ddir == DDIR_READ) | |
184 | bSuccess = ReadFile(io_u->file->hFile, io_u->xfer_buf, io_u->xfer_buflen, &ioBytes, &fov->o); | |
185 | else if (io_u->ddir == DDIR_SYNC || | |
186 | io_u->ddir == DDIR_DATASYNC || | |
187 | io_u->ddir == DDIR_SYNC_FILE_RANGE) | |
188 | { | |
189 | FlushFileBuffers(io_u->file->hFile); | |
190 | return FIO_Q_COMPLETED; | |
191 | } else if (io_u->ddir == DDIR_TRIM) { | |
192 | log_info("explicit TRIM isn't supported on Windows"); | |
193 | return FIO_Q_COMPLETED; | |
194 | } | |
195 | ||
196 | if (bSuccess) { | |
197 | io_u->seen = 1; | |
198 | io_u->resid = io_u->xfer_buflen - fov->o.InternalHigh; | |
199 | io_u->error = 0; | |
200 | rc = FIO_Q_COMPLETED; | |
201 | } else if (!bSuccess && GetLastError() == ERROR_IO_PENDING) { | |
e4db9fec | 202 | wd->busyIoHandles[wd->busyIo++] = fov->o.hEvent; |
ecc314ba BC |
203 | rc = FIO_Q_QUEUED; |
204 | } else { | |
205 | PrintError(__func__); | |
206 | io_u->error = GetLastError(); | |
207 | io_u->resid = io_u->xfer_buflen; | |
208 | rc = FIO_Q_COMPLETED; | |
209 | } | |
210 | ||
ecc314ba BC |
211 | return rc; |
212 | } | |
213 | ||
214 | static void fio_windowsaio_cleanup(struct thread_data *td) | |
215 | { | |
216 | struct windowsaio_data *wd; | |
217 | ||
ecc314ba BC |
218 | wd = td->io_ops->data; |
219 | wd->running = FALSE; | |
220 | ||
221 | while (wd->stopped == FALSE) | |
e4db9fec | 222 | Sleep(20); |
ecc314ba BC |
223 | |
224 | if (wd != NULL) { | |
225 | CloseHandle(wd->hThread); | |
e4db9fec | 226 | |
ecc314ba | 227 | free(wd->aio_events); |
e4db9fec | 228 | free(wd->busyIoHandles); |
ecc314ba | 229 | free(wd); |
e4db9fec | 230 | |
ecc314ba BC |
231 | td->io_ops->data = NULL; |
232 | } | |
233 | ||
ecc314ba BC |
234 | } |
235 | ||
236 | static DWORD WINAPI IoCompletionRoutine(LPVOID lpParameter) | |
237 | { | |
238 | OVERLAPPED *ovl; | |
239 | FIO_OVERLAPPED *fov; | |
240 | struct io_u *io_u; | |
241 | struct windowsaio_data *wd; | |
242 | ||
243 | struct thread_ctx *ctx; | |
244 | ULONG_PTR ulKey = 0; | |
245 | BOOL bSuccess; | |
246 | DWORD bytes; | |
247 | ||
248 | ||
249 | ctx = (struct thread_ctx*)lpParameter; | |
250 | wd = ctx->wd; | |
251 | bSuccess = TRUE; | |
252 | ||
ecc314ba | 253 | while (ctx->wd->running) { |
e4db9fec | 254 | bSuccess = GetQueuedCompletionStatus(ctx->ioCP, &bytes, &ulKey, &ovl, 100); |
ecc314ba BC |
255 | |
256 | if (!bSuccess) { | |
257 | if (GetLastError() == WAIT_TIMEOUT) { | |
258 | continue; | |
259 | } else { | |
260 | PrintError(__func__); | |
261 | continue; | |
262 | } | |
263 | } | |
264 | ||
265 | fov = CONTAINING_RECORD(ovl, FIO_OVERLAPPED, o); | |
266 | io_u = fov->io_u; | |
267 | ||
ecc314ba BC |
268 | if (io_u->seen == 1) |
269 | continue; | |
270 | ||
271 | ctx->wd->ioFinished++; | |
272 | ||
273 | if (ovl->Internal == ERROR_SUCCESS) { | |
274 | io_u->resid = io_u->xfer_buflen - ovl->InternalHigh; | |
275 | io_u->error = 0; | |
276 | } else { | |
277 | io_u->resid = io_u->xfer_buflen; | |
278 | io_u->error = 1; | |
279 | } | |
280 | ||
281 | io_u->seen = 1; | |
282 | CloseHandle(ovl->hEvent); | |
283 | free(ovl); | |
284 | } | |
285 | ||
286 | bSuccess = CloseHandle(ctx->ioCP); | |
287 | if (!bSuccess) | |
288 | PrintError(__func__); | |
289 | ||
ecc314ba BC |
290 | ctx->wd->stopped = TRUE; |
291 | free(ctx); | |
292 | return 0; | |
293 | } | |
294 | ||
295 | static int fio_windowsaio_init(struct thread_data *td) | |
296 | { | |
ecc314ba BC |
297 | struct windowsaio_data *wd; |
298 | ||
ecc314ba | 299 | wd = malloc(sizeof(struct windowsaio_data)); |
e4db9fec BC |
300 | if (wd == NULL) |
301 | return 1; | |
ecc314ba | 302 | |
ecc314ba | 303 | wd->aio_events = malloc((td->o.iodepth + 1) * sizeof(struct io_u *)); |
e4db9fec BC |
304 | if (wd->aio_events == NULL) { |
305 | free(wd); | |
306 | return 1; | |
307 | } | |
308 | ||
309 | wd->busyIoHandles = malloc((td->o.iodepth + 1) * sizeof(struct io_u *)); | |
310 | if (wd->busyIoHandles == NULL) { | |
311 | free(wd->aio_events); | |
312 | free(wd); | |
313 | return 1; | |
314 | } | |
315 | ||
ecc314ba | 316 | ZeroMemory(wd->aio_events, (td->o.iodepth + 1) * sizeof(struct io_u *)); |
e4db9fec BC |
317 | ZeroMemory(wd->busyIoHandles, (td->o.iodepth + 1) * sizeof(struct io_u *)); |
318 | ||
319 | wd->busyIo = 0; | |
320 | wd->ioFinished = 0; | |
321 | wd->running = FALSE; | |
322 | wd->stopped = FALSE; | |
323 | wd->hThread = FALSE; | |
ecc314ba BC |
324 | |
325 | td->io_ops->data = wd; | |
e4db9fec | 326 | return 0; |
ecc314ba BC |
327 | } |
328 | ||
329 | static int fio_windowsaio_open_file(struct thread_data *td, struct fio_file *f) | |
330 | { | |
331 | int rc = 0; | |
332 | HANDLE hFile; | |
333 | DWORD flags = FILE_ATTRIBUTE_TEMPORARY | FILE_FLAG_POSIX_SEMANTICS | FILE_FLAG_OVERLAPPED; | |
334 | DWORD sharemode = FILE_SHARE_READ | FILE_SHARE_WRITE; | |
335 | DWORD openmode = OPEN_ALWAYS; | |
336 | DWORD access; | |
337 | ||
338 | dprint(FD_FILE, "fd open %s\n", f->file_name); | |
339 | ||
ecc314ba BC |
340 | if (f->filetype == FIO_TYPE_PIPE) { |
341 | log_err("fio: windowsaio doesn't support pipes\n"); | |
342 | return 1; | |
343 | } | |
344 | ||
345 | if (!strcmp(f->file_name, "-")) { | |
346 | log_err("fio: can't read/write to stdin/out\n"); | |
347 | return 1; | |
348 | } | |
349 | ||
350 | if (td->o.odirect) | |
351 | flags |= FILE_FLAG_NO_BUFFERING; | |
352 | if (td->o.sync_io) | |
353 | flags |= FILE_FLAG_WRITE_THROUGH; | |
354 | ||
355 | ||
356 | if (td->o.td_ddir == TD_DDIR_READ || | |
357 | td->o.td_ddir == TD_DDIR_WRITE || | |
358 | td->o.td_ddir == TD_DDIR_RANDRW) | |
359 | { | |
360 | flags |= FILE_FLAG_SEQUENTIAL_SCAN; | |
361 | } | |
362 | else | |
363 | { | |
364 | flags |= FILE_FLAG_RANDOM_ACCESS; | |
365 | } | |
366 | ||
367 | if (td_read(td) || read_only) | |
368 | access = GENERIC_READ; | |
369 | else | |
370 | access = (GENERIC_READ | GENERIC_WRITE); | |
371 | ||
372 | if (td->o.create_on_open > 0) | |
373 | openmode = OPEN_ALWAYS; | |
374 | else | |
375 | openmode = OPEN_EXISTING; | |
376 | ||
377 | f->hFile = CreateFile(f->file_name, access, sharemode, | |
378 | NULL, openmode, flags, NULL); | |
379 | ||
380 | if (f->hFile == INVALID_HANDLE_VALUE) { | |
381 | log_err("Failed to open %s\n", f->file_name); | |
382 | PrintError(__func__); | |
383 | rc = 1; | |
384 | } | |
385 | ||
386 | /* Only set up the competion port and thread if we're not just | |
387 | * querying the device size */ | |
388 | if (!rc && td->io_ops->data != NULL) { | |
389 | struct windowsaio_data *wd; | |
390 | struct thread_ctx *ctx; | |
391 | hFile = CreateIoCompletionPort(f->hFile, NULL, 0, 0); | |
392 | ||
393 | wd = td->io_ops->data; | |
394 | wd->running = TRUE; | |
395 | wd->stopped = FALSE; | |
396 | ||
397 | ctx = malloc(sizeof(struct thread_ctx)); | |
398 | ctx->ioCP = hFile; | |
399 | ctx->wd = wd; | |
400 | ||
401 | wd->hThread = CreateThread(NULL, 0, IoCompletionRoutine, ctx, 0, NULL); | |
402 | ||
403 | if (wd->hThread == NULL) { | |
404 | PrintError(__func__); | |
405 | rc = 1; | |
406 | } | |
407 | } | |
408 | ||
ecc314ba BC |
409 | return rc; |
410 | } | |
411 | ||
412 | static int fio_windowsaio_close_file(struct thread_data fio_unused *td, struct fio_file *f) | |
413 | { | |
414 | BOOL bSuccess; | |
415 | ||
ecc314ba BC |
416 | if (f->hFile != INVALID_HANDLE_VALUE) { |
417 | bSuccess = CloseHandle(f->hFile); | |
418 | if (!bSuccess) | |
419 | PrintError(__func__); | |
420 | } | |
421 | ||
422 | f->hFile = INVALID_HANDLE_VALUE; | |
423 | return 0; | |
424 | } | |
425 | ||
426 | static struct ioengine_ops ioengine = { | |
427 | .name = "windowsaio", | |
428 | .version = FIO_IOOPS_VERSION, | |
429 | .init = fio_windowsaio_init, | |
430 | .queue = fio_windowsaio_queue, | |
431 | .cancel = fio_windowsaio_cancel, | |
432 | .getevents = fio_windowsaio_getevents, | |
433 | .event = fio_windowsaio_event, | |
434 | .cleanup = fio_windowsaio_cleanup, | |
435 | .open_file = fio_windowsaio_open_file, | |
436 | .close_file = fio_windowsaio_close_file, | |
437 | .get_file_size = generic_get_file_size | |
438 | }; | |
439 | ||
440 | static void fio_init fio_posixaio_register(void) | |
441 | { | |
442 | register_ioengine(&ioengine); | |
443 | } | |
444 | ||
445 | static void fio_exit fio_posixaio_unregister(void) | |
446 | { | |
447 | unregister_ioengine(&ioengine); | |
448 | } |