engines/glusterfs_async: cleanups
[fio.git] / engines / rbd.c
CommitLineData
fc5c0345
DG
1/*
2 * rbd engine
3 *
4 * IO engine using Ceph's librbd to test RADOS Block Devices.
5 *
6 */
7
8#include <rbd/librbd.h>
9
10#include "../fio.h"
11
12struct fio_rbd_iou {
13 struct io_u *io_u;
d8b64af2 14 rbd_completion_t completion;
d8b64af2 15 int io_seen;
fc5c0345
DG
16};
17
18struct rbd_data {
19 rados_t cluster;
20 rados_ioctx_t io_ctx;
21 rbd_image_t image;
22 struct io_u **aio_events;
23};
24
25struct rbd_options {
fc5c0345
DG
26 char *rbd_name;
27 char *pool_name;
28 char *client_name;
d7d702c7 29 int busy_poll;
fc5c0345
DG
30};
31
32static struct fio_option options[] = {
33 {
d8b64af2
JA
34 .name = "rbdname",
35 .lname = "rbd engine rbdname",
36 .type = FIO_OPT_STR_STORE,
37 .help = "RBD name for RBD engine",
38 .off1 = offsetof(struct rbd_options, rbd_name),
39 .category = FIO_OPT_C_ENGINE,
40 .group = FIO_OPT_G_RBD,
41 },
fc5c0345 42 {
d7d702c7
JA
43 .name = "pool",
44 .lname = "rbd engine pool",
45 .type = FIO_OPT_STR_STORE,
46 .help = "Name of the pool hosting the RBD for the RBD engine",
47 .off1 = offsetof(struct rbd_options, pool_name),
48 .category = FIO_OPT_C_ENGINE,
49 .group = FIO_OPT_G_RBD,
d8b64af2 50 },
fc5c0345 51 {
d7d702c7
JA
52 .name = "clientname",
53 .lname = "rbd engine clientname",
54 .type = FIO_OPT_STR_STORE,
55 .help = "Name of the ceph client to access the RBD for the RBD engine",
56 .off1 = offsetof(struct rbd_options, client_name),
57 .category = FIO_OPT_C_ENGINE,
58 .group = FIO_OPT_G_RBD,
59 },
60 {
61 .name = "busy_poll",
62 .lname = "Busy poll",
63 .type = FIO_OPT_BOOL,
64 .help = "Busy poll for completions instead of sleeping",
65 .off1 = offsetof(struct rbd_options, client_name),
66 .def = "0",
67 .category = FIO_OPT_C_ENGINE,
68 .group = FIO_OPT_G_RBD,
d8b64af2 69 },
fc5c0345 70 {
d8b64af2
JA
71 .name = NULL,
72 },
fc5c0345
DG
73};
74
75static int _fio_setup_rbd_data(struct thread_data *td,
76 struct rbd_data **rbd_data_ptr)
77{
78 struct rbd_data *rbd_data;
79
80 if (td->io_ops->data)
81 return 0;
82
83 rbd_data = malloc(sizeof(struct rbd_data));
84 if (!rbd_data)
85 goto failed;
86
87 memset(rbd_data, 0, sizeof(struct rbd_data));
88
89 rbd_data->aio_events = malloc(td->o.iodepth * sizeof(struct io_u *));
90 if (!rbd_data->aio_events)
91 goto failed;
92
93 memset(rbd_data->aio_events, 0, td->o.iodepth * sizeof(struct io_u *));
94
95 *rbd_data_ptr = rbd_data;
96
97 return 0;
98
99failed:
d9cb70a8
JA
100 if (rbd_data)
101 free(rbd_data);
fc5c0345
DG
102 return 1;
103
104}
105
106static int _fio_rbd_connect(struct thread_data *td)
107{
108 struct rbd_data *rbd_data = td->io_ops->data;
109 struct rbd_options *o = td->eo;
110 int r;
111
dbf388d2 112 r = rados_create(&rbd_data->cluster, o->client_name);
fc5c0345
DG
113 if (r < 0) {
114 log_err("rados_create failed.\n");
115 goto failed_early;
116 }
117
118 r = rados_conf_read_file(rbd_data->cluster, NULL);
119 if (r < 0) {
120 log_err("rados_conf_read_file failed.\n");
121 goto failed_early;
122 }
123
124 r = rados_connect(rbd_data->cluster);
125 if (r < 0) {
126 log_err("rados_connect failed.\n");
127 goto failed_shutdown;
128 }
129
130 r = rados_ioctx_create(rbd_data->cluster, o->pool_name,
dbf388d2 131 &rbd_data->io_ctx);
fc5c0345
DG
132 if (r < 0) {
133 log_err("rados_ioctx_create failed.\n");
134 goto failed_shutdown;
135 }
136
dbf388d2 137 r = rbd_open(rbd_data->io_ctx, o->rbd_name, &rbd_data->image,
fc5c0345
DG
138 NULL /*snap */ );
139 if (r < 0) {
140 log_err("rbd_open failed.\n");
141 goto failed_open;
142 }
143 return 0;
144
145failed_open:
146 rados_ioctx_destroy(rbd_data->io_ctx);
086f9935 147 rbd_data->io_ctx = NULL;
fc5c0345
DG
148failed_shutdown:
149 rados_shutdown(rbd_data->cluster);
086f9935 150 rbd_data->cluster = NULL;
fc5c0345
DG
151failed_early:
152 return 1;
153}
154
155static void _fio_rbd_disconnect(struct rbd_data *rbd_data)
156{
157 if (!rbd_data)
158 return;
159
160 /* shutdown everything */
161 if (rbd_data->image) {
162 rbd_close(rbd_data->image);
163 rbd_data->image = NULL;
164 }
165
166 if (rbd_data->io_ctx) {
167 rados_ioctx_destroy(rbd_data->io_ctx);
168 rbd_data->io_ctx = NULL;
169 }
170
171 if (rbd_data->cluster) {
172 rados_shutdown(rbd_data->cluster);
173 rbd_data->cluster = NULL;
174 }
175}
176
d8b64af2 177static void _fio_rbd_finish_aiocb(rbd_completion_t comp, void *data)
fc5c0345 178{
dbf388d2
JA
179 struct fio_rbd_iou *fri = data;
180 struct io_u *io_u = fri->io_u;
d8b64af2 181 ssize_t ret;
fc5c0345 182
d8b64af2
JA
183 /*
184 * Looks like return value is 0 for success, or < 0 for
185 * a specific error. So we have to assume that it can't do
186 * partial completions.
187 */
188 ret = rbd_aio_get_return_value(fri->completion);
189 if (ret < 0) {
190 io_u->error = ret;
191 io_u->resid = io_u->xfer_buflen;
192 } else
193 io_u->error = 0;
194}
fc5c0345 195
d8b64af2
JA
196static struct io_u *fio_rbd_event(struct thread_data *td, int event)
197{
198 struct rbd_data *rbd_data = td->io_ops->data;
fc5c0345 199
d8b64af2 200 return rbd_data->aio_events[event];
fc5c0345
DG
201}
202
d8b64af2
JA
203static inline int fri_check_complete(struct rbd_data *rbd_data,
204 struct io_u *io_u,
205 unsigned int *events)
fc5c0345 206{
d8b64af2 207 struct fio_rbd_iou *fri = io_u->engine_data;
fc5c0345 208
dbf388d2 209 if (rbd_aio_is_complete(fri->completion)) {
d8b64af2
JA
210 fri->io_seen = 1;
211 rbd_data->aio_events[*events] = io_u;
212 (*events)++;
fc5c0345 213
d8b64af2
JA
214 rbd_aio_release(fri->completion);
215 return 1;
216 }
fc5c0345 217
d8b64af2 218 return 0;
fc5c0345
DG
219}
220
d8b64af2
JA
221static int rbd_iter_events(struct thread_data *td, unsigned int *events,
222 unsigned int min_evts, int wait)
82340a9f 223{
d8b64af2
JA
224 struct rbd_data *rbd_data = td->io_ops->data;
225 unsigned int this_events = 0;
226 struct io_u *io_u;
227 int i;
82340a9f 228
d8b64af2
JA
229 io_u_qiter(&td->io_u_all, io_u, i) {
230 struct fio_rbd_iou *fri = io_u->engine_data;
82340a9f 231
d8b64af2
JA
232 if (!(io_u->flags & IO_U_F_FLIGHT))
233 continue;
234 if (fri->io_seen)
235 continue;
82340a9f 236
d8b64af2
JA
237 if (fri_check_complete(rbd_data, io_u, events))
238 this_events++;
239 else if (wait) {
240 rbd_aio_wait_for_complete(fri->completion);
82340a9f 241
d8b64af2
JA
242 if (fri_check_complete(rbd_data, io_u, events))
243 this_events++;
244 }
245 if (*events >= min_evts)
246 break;
247 }
fc5c0345 248
d8b64af2 249 return this_events;
fc5c0345
DG
250}
251
252static int fio_rbd_getevents(struct thread_data *td, unsigned int min,
1f440ece 253 unsigned int max, const struct timespec *t)
fc5c0345 254{
d8b64af2 255 unsigned int this_events, events = 0;
d7d702c7 256 struct rbd_options *o = td->eo;
d8b64af2 257 int wait = 0;
fc5c0345
DG
258
259 do {
d8b64af2 260 this_events = rbd_iter_events(td, &events, min, wait);
fc5c0345 261
d8b64af2 262 if (events >= min)
fc5c0345 263 break;
d8b64af2
JA
264 if (this_events)
265 continue;
fc5c0345 266
d7d702c7
JA
267 if (!o->busy_poll)
268 wait = 1;
269 else
270 nop;
fc5c0345
DG
271 } while (1);
272
273 return events;
274}
275
276static int fio_rbd_queue(struct thread_data *td, struct io_u *io_u)
277{
fc5c0345 278 struct rbd_data *rbd_data = td->io_ops->data;
d8b64af2
JA
279 struct fio_rbd_iou *fri = io_u->engine_data;
280 int r = -1;
fc5c0345
DG
281
282 fio_ro_check(td, io_u);
283
d8b64af2
JA
284 fri->io_seen = 0;
285
dbf388d2 286 r = rbd_aio_create_completion(fri, _fio_rbd_finish_aiocb,
d8b64af2 287 &fri->completion);
dbf388d2
JA
288 if (r < 0) {
289 log_err("rbd_aio_create_completion failed.\n");
290 goto failed;
291 }
fc5c0345 292
dbf388d2 293 if (io_u->ddir == DDIR_WRITE) {
fc5c0345 294 r = rbd_aio_write(rbd_data->image, io_u->offset,
d8b64af2
JA
295 io_u->xfer_buflen, io_u->xfer_buf,
296 fri->completion);
fc5c0345
DG
297 if (r < 0) {
298 log_err("rbd_aio_write failed.\n");
dbf388d2 299 goto failed_comp;
fc5c0345
DG
300 }
301
302 } else if (io_u->ddir == DDIR_READ) {
fc5c0345 303 r = rbd_aio_read(rbd_data->image, io_u->offset,
d8b64af2
JA
304 io_u->xfer_buflen, io_u->xfer_buf,
305 fri->completion);
fc5c0345
DG
306
307 if (r < 0) {
308 log_err("rbd_aio_read failed.\n");
dbf388d2 309 goto failed_comp;
fc5c0345 310 }
dbf388d2
JA
311 } else if (io_u->ddir == DDIR_TRIM) {
312 r = rbd_aio_discard(rbd_data->image, io_u->offset,
313 io_u->xfer_buflen, fri->completion);
82340a9f 314 if (r < 0) {
dbf388d2
JA
315 log_err("rbd_aio_discard failed.\n");
316 goto failed_comp;
82340a9f 317 }
dbf388d2 318 } else if (io_u->ddir == DDIR_SYNC) {
d8b64af2 319 r = rbd_aio_flush(rbd_data->image, fri->completion);
fc5c0345
DG
320 if (r < 0) {
321 log_err("rbd_flush failed.\n");
dbf388d2 322 goto failed_comp;
fc5c0345 323 }
fc5c0345
DG
324 } else {
325 dprint(FD_IO, "%s: Warning: unhandled ddir: %d\n", __func__,
326 io_u->ddir);
dbf388d2 327 goto failed_comp;
fc5c0345
DG
328 }
329
330 return FIO_Q_QUEUED;
dbf388d2
JA
331failed_comp:
332 rbd_aio_release(fri->completion);
fc5c0345
DG
333failed:
334 io_u->error = r;
335 td_verror(td, io_u->error, "xfer");
336 return FIO_Q_COMPLETED;
337}
338
339static int fio_rbd_init(struct thread_data *td)
340{
341 int r;
342
343 r = _fio_rbd_connect(td);
344 if (r) {
345 log_err("fio_rbd_connect failed, return code: %d .\n", r);
346 goto failed;
347 }
348
349 return 0;
350
351failed:
352 return 1;
fc5c0345
DG
353}
354
355static void fio_rbd_cleanup(struct thread_data *td)
356{
357 struct rbd_data *rbd_data = td->io_ops->data;
358
359 if (rbd_data) {
360 _fio_rbd_disconnect(rbd_data);
361 free(rbd_data->aio_events);
362 free(rbd_data);
363 }
364
365}
366
367static int fio_rbd_setup(struct thread_data *td)
368{
369 int r = 0;
370 rbd_image_info_t info;
371 struct fio_file *f;
372 struct rbd_data *rbd_data = NULL;
373 int major, minor, extra;
374
375 /* log version of librbd. No cluster connection required. */
376 rbd_version(&major, &minor, &extra);
377 log_info("rbd engine: RBD version: %d.%d.%d\n", major, minor, extra);
378
379 /* allocate engine specific structure to deal with librbd. */
380 r = _fio_setup_rbd_data(td, &rbd_data);
381 if (r) {
382 log_err("fio_setup_rbd_data failed.\n");
383 goto cleanup;
384 }
385 td->io_ops->data = rbd_data;
386
d8b64af2
JA
387 /* librbd does not allow us to run first in the main thread and later
388 * in a fork child. It needs to be the same process context all the
389 * time.
fc5c0345
DG
390 */
391 td->o.use_thread = 1;
392
393 /* connect in the main thread to determine to determine
394 * the size of the given RADOS block device. And disconnect
395 * later on.
396 */
397 r = _fio_rbd_connect(td);
398 if (r) {
399 log_err("fio_rbd_connect failed.\n");
400 goto cleanup;
401 }
402
403 /* get size of the RADOS block device */
404 r = rbd_stat(rbd_data->image, &info, sizeof(info));
405 if (r < 0) {
406 log_err("rbd_status failed.\n");
407 goto disconnect;
408 }
409 dprint(FD_IO, "rbd-engine: image size: %lu\n", info.size);
410
411 /* taken from "net" engine. Pretend we deal with files,
412 * even if we do not have any ideas about files.
413 * The size of the RBD is set instead of a artificial file.
414 */
415 if (!td->files_index) {
5903e7b7 416 add_file(td, td->o.filename ? : "rbd", 0, 0);
fc5c0345 417 td->o.nr_files = td->o.nr_files ? : 1;
b53f2c54 418 td->o.open_files++;
fc5c0345
DG
419 }
420 f = td->files[0];
421 f->real_file_size = info.size;
422
423 /* disconnect, then we were only connected to determine
424 * the size of the RBD.
425 */
426 _fio_rbd_disconnect(rbd_data);
427 return 0;
428
429disconnect:
430 _fio_rbd_disconnect(rbd_data);
431cleanup:
432 fio_rbd_cleanup(td);
433 return r;
434}
435
436static int fio_rbd_open(struct thread_data *td, struct fio_file *f)
437{
438 return 0;
439}
440
d9b100fc
JA
441static int fio_rbd_invalidate(struct thread_data *td, struct fio_file *f)
442{
903b2812
JA
443#if defined(CONFIG_RBD_INVAL)
444 struct rbd_data *rbd_data = td->io_ops->data;
445
446 return rbd_invalidate_cache(rbd_data->image);
447#else
d9b100fc 448 return 0;
903b2812 449#endif
d9b100fc
JA
450}
451
fc5c0345
DG
452static void fio_rbd_io_u_free(struct thread_data *td, struct io_u *io_u)
453{
d8b64af2 454 struct fio_rbd_iou *fri = io_u->engine_data;
fc5c0345 455
d8b64af2 456 if (fri) {
fc5c0345 457 io_u->engine_data = NULL;
d8b64af2 458 free(fri);
fc5c0345
DG
459 }
460}
461
462static int fio_rbd_io_u_init(struct thread_data *td, struct io_u *io_u)
463{
d8b64af2 464 struct fio_rbd_iou *fri;
fc5c0345 465
d8b64af2
JA
466 fri = calloc(1, sizeof(*fri));
467 fri->io_u = io_u;
468 io_u->engine_data = fri;
fc5c0345
DG
469 return 0;
470}
471
10aa136b 472static struct ioengine_ops ioengine = {
d9b100fc
JA
473 .name = "rbd",
474 .version = FIO_IOOPS_VERSION,
475 .setup = fio_rbd_setup,
476 .init = fio_rbd_init,
477 .queue = fio_rbd_queue,
478 .getevents = fio_rbd_getevents,
479 .event = fio_rbd_event,
480 .cleanup = fio_rbd_cleanup,
481 .open_file = fio_rbd_open,
482 .invalidate = fio_rbd_invalidate,
483 .options = options,
484 .io_u_init = fio_rbd_io_u_init,
485 .io_u_free = fio_rbd_io_u_free,
486 .option_struct_size = sizeof(struct rbd_options),
fc5c0345
DG
487};
488
489static void fio_init fio_rbd_register(void)
490{
491 register_ioengine(&ioengine);
492}
493
494static void fio_exit fio_rbd_unregister(void)
495{
496 unregister_ioengine(&ioengine);
497}