engines/rbd: add support for "direct=1" option
[fio.git] / engines / posixaio.c
... / ...
CommitLineData
1/*
2 * posixaio engine
3 *
4 * IO engine that uses the posix defined aio interface.
5 *
6 */
7#include <stdio.h>
8#include <stdlib.h>
9#include <unistd.h>
10#include <errno.h>
11#include <fcntl.h>
12
13#include "../fio.h"
14
15struct posixaio_data {
16 struct io_u **aio_events;
17 unsigned int queued;
18};
19
20static unsigned long long ts_utime_since_now(const struct timespec *start)
21{
22 struct timespec now;
23
24 if (fio_get_mono_time(&now) < 0)
25 return 0;
26
27 return utime_since(start, &now);
28}
29
30static int fio_posixaio_cancel(struct thread_data fio_unused *td,
31 struct io_u *io_u)
32{
33 struct fio_file *f = io_u->file;
34 int r = aio_cancel(f->fd, &io_u->aiocb);
35
36 if (r == AIO_ALLDONE || r == AIO_CANCELED)
37 return 0;
38
39 return 1;
40}
41
42static int fio_posixaio_prep(struct thread_data fio_unused *td,
43 struct io_u *io_u)
44{
45 os_aiocb_t *aiocb = &io_u->aiocb;
46 struct fio_file *f = io_u->file;
47
48 aiocb->aio_fildes = f->fd;
49 aiocb->aio_buf = io_u->xfer_buf;
50 aiocb->aio_nbytes = io_u->xfer_buflen;
51 aiocb->aio_offset = io_u->offset;
52 aiocb->aio_sigevent.sigev_notify = SIGEV_NONE;
53
54 io_u->seen = 0;
55 return 0;
56}
57
58#define SUSPEND_ENTRIES 8
59
60static int fio_posixaio_getevents(struct thread_data *td, unsigned int min,
61 unsigned int max, const struct timespec *t)
62{
63 struct posixaio_data *pd = td->io_ops_data;
64 os_aiocb_t *suspend_list[SUSPEND_ENTRIES];
65 struct timespec start;
66 int have_timeout = 0;
67 int suspend_entries;
68 struct io_u *io_u;
69 unsigned int r;
70 int i;
71
72 if (t && fio_get_mono_time(&start) == 0)
73 have_timeout = 1;
74 else
75 memset(&start, 0, sizeof(start));
76
77 r = 0;
78restart:
79 memset(suspend_list, 0, sizeof(suspend_list));
80 suspend_entries = 0;
81 io_u_qiter(&td->io_u_all, io_u, i) {
82 int err;
83
84 if (io_u->seen || !(io_u->flags & IO_U_F_FLIGHT))
85 continue;
86
87 err = aio_error(&io_u->aiocb);
88 if (err == EINPROGRESS) {
89 if (suspend_entries < SUSPEND_ENTRIES) {
90 suspend_list[suspend_entries] = &io_u->aiocb;
91 suspend_entries++;
92 }
93 continue;
94 }
95
96 io_u->seen = 1;
97 pd->queued--;
98 pd->aio_events[r++] = io_u;
99
100 if (err == ECANCELED)
101 io_u->resid = io_u->xfer_buflen;
102 else if (!err) {
103 ssize_t retval = aio_return(&io_u->aiocb);
104
105 io_u->resid = io_u->xfer_buflen - retval;
106 } else
107 io_u->error = err;
108 }
109
110 if (r >= min)
111 return r;
112
113 if (have_timeout) {
114 unsigned long long usec;
115
116 usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
117 if (ts_utime_since_now(&start) > usec)
118 return r;
119 }
120
121 /*
122 * must have some in-flight, wait for at least one
123 */
124 aio_suspend((const os_aiocb_t * const *)suspend_list,
125 suspend_entries, t);
126 goto restart;
127}
128
129static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
130{
131 struct posixaio_data *pd = td->io_ops_data;
132
133 return pd->aio_events[event];
134}
135
136static enum fio_q_status fio_posixaio_queue(struct thread_data *td,
137 struct io_u *io_u)
138{
139 struct posixaio_data *pd = td->io_ops_data;
140 os_aiocb_t *aiocb = &io_u->aiocb;
141 int ret;
142
143 fio_ro_check(td, io_u);
144
145 if (io_u->ddir == DDIR_READ)
146 ret = aio_read(aiocb);
147 else if (io_u->ddir == DDIR_WRITE)
148 ret = aio_write(aiocb);
149 else if (io_u->ddir == DDIR_TRIM) {
150 if (pd->queued)
151 return FIO_Q_BUSY;
152
153 do_io_u_trim(td, io_u);
154 return FIO_Q_COMPLETED;
155 } else {
156#ifdef CONFIG_POSIXAIO_FSYNC
157 ret = aio_fsync(O_SYNC, aiocb);
158#else
159 if (pd->queued)
160 return FIO_Q_BUSY;
161
162 do_io_u_sync(td, io_u);
163 return FIO_Q_COMPLETED;
164#endif
165 }
166
167 if (ret) {
168 int aio_err = errno;
169
170 /*
171 * At least OSX has a very low limit on the number of pending
172 * IOs, so if it returns EAGAIN, we are out of resources
173 * to queue more. Just return FIO_Q_BUSY to naturally
174 * drop off at this depth.
175 */
176 if (aio_err == EAGAIN)
177 return FIO_Q_BUSY;
178
179 io_u->error = aio_err;
180 td_verror(td, io_u->error, "xfer");
181 return FIO_Q_COMPLETED;
182 }
183
184 pd->queued++;
185 return FIO_Q_QUEUED;
186}
187
188static void fio_posixaio_cleanup(struct thread_data *td)
189{
190 struct posixaio_data *pd = td->io_ops_data;
191
192 if (pd) {
193 free(pd->aio_events);
194 free(pd);
195 }
196}
197
198static int fio_posixaio_init(struct thread_data *td)
199{
200 struct posixaio_data *pd = malloc(sizeof(*pd));
201
202 memset(pd, 0, sizeof(*pd));
203 pd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u *));
204 memset(pd->aio_events, 0, td->o.iodepth * sizeof(struct io_u *));
205
206 td->io_ops_data = pd;
207 return 0;
208}
209
210static struct ioengine_ops ioengine = {
211 .name = "posixaio",
212 .version = FIO_IOOPS_VERSION,
213 .flags = FIO_ASYNCIO_SYNC_TRIM,
214 .init = fio_posixaio_init,
215 .prep = fio_posixaio_prep,
216 .queue = fio_posixaio_queue,
217 .cancel = fio_posixaio_cancel,
218 .getevents = fio_posixaio_getevents,
219 .event = fio_posixaio_event,
220 .cleanup = fio_posixaio_cleanup,
221 .open_file = generic_open_file,
222 .close_file = generic_close_file,
223 .get_file_size = generic_get_file_size,
224};
225
226static void fio_init fio_posixaio_register(void)
227{
228 register_ioengine(&ioengine);
229}
230
231static void fio_exit fio_posixaio_unregister(void)
232{
233 unregister_ioengine(&ioengine);
234}