4 * IO engine using Ceph's RADOS interface to test low-level performance of
9 #include <rados/librados.h>
12 #include "../optgroup.h"
17 struct io_u **aio_events;
19 pthread_mutex_t completed_lock;
20 pthread_cond_t completed_more_io;
21 struct flist_head completed_operations;
22 uint64_t ops_scheduled;
23 uint64_t ops_completed;
26 struct fio_rados_iou {
27 struct flist_head list;
28 struct thread_data *td;
30 rados_completion_t completion;
31 rados_write_op_t write_op;
34 /* fio configuration options read from the job file */
35 struct rados_options {
44 static struct fio_option options[] = {
46 .name = "clustername",
47 .lname = "ceph cluster name",
48 .type = FIO_OPT_STR_STORE,
49 .help = "Cluster name for ceph",
50 .off1 = offsetof(struct rados_options, cluster_name),
51 .category = FIO_OPT_C_ENGINE,
52 .group = FIO_OPT_G_RBD,
56 .lname = "pool name to use",
57 .type = FIO_OPT_STR_STORE,
58 .help = "Ceph pool name to benchmark against",
59 .off1 = offsetof(struct rados_options, pool_name),
60 .category = FIO_OPT_C_ENGINE,
61 .group = FIO_OPT_G_RBD,
65 .lname = "rados engine clientname",
66 .type = FIO_OPT_STR_STORE,
67 .help = "Name of the ceph client to access RADOS engine",
68 .off1 = offsetof(struct rados_options, client_name),
69 .category = FIO_OPT_C_ENGINE,
70 .group = FIO_OPT_G_RBD,
74 .lname = "busy poll mode",
76 .help = "Busy poll for completions instead of sleeping",
77 .off1 = offsetof(struct rados_options, busy_poll),
79 .category = FIO_OPT_C_ENGINE,
80 .group = FIO_OPT_G_RBD,
83 .name = "touch_objects",
84 .lname = "touch objects on start",
86 .help = "Touch (create) objects on start",
87 .off1 = offsetof(struct rados_options, touch_objects),
89 .category = FIO_OPT_C_ENGINE,
90 .group = FIO_OPT_G_RBD,
97 static int _fio_setup_rados_data(struct thread_data *td,
98 struct rados_data **rados_data_ptr)
100 struct rados_data *rados;
105 rados = calloc(1, sizeof(struct rados_data));
109 rados->connected = false;
111 rados->aio_events = calloc(td->o.iodepth, sizeof(struct io_u *));
112 if (!rados->aio_events)
114 pthread_mutex_init(&rados->completed_lock, NULL);
115 pthread_cond_init(&rados->completed_more_io, NULL);
116 INIT_FLIST_HEAD(&rados->completed_operations);
117 rados->ops_scheduled = 0;
118 rados->ops_completed = 0;
119 *rados_data_ptr = rados;
124 if (rados->aio_events)
125 free(rados->aio_events);
131 static void _fio_rados_rm_objects(struct thread_data *td, struct rados_data *rados)
134 for (i = 0; i < td->o.nr_files; i++) {
135 struct fio_file *f = td->files[i];
136 rados_remove(rados->io_ctx, f->file_name);
140 static int _fio_rados_connect(struct thread_data *td)
142 struct rados_data *rados = td->io_ops_data;
143 struct rados_options *o = td->eo;
145 const uint64_t file_size =
146 td->o.size / (td->o.nr_files ? td->o.nr_files : 1u);
150 if (o->cluster_name) {
151 char *client_name = NULL;
154 * If we specify cluster name, the rados_create2
155 * will not assume 'client.'. name is considered
156 * as a full type.id namestr
158 if (o->client_name) {
159 if (!index(o->client_name, '.')) {
160 client_name = calloc(1, strlen("client.") +
161 strlen(o->client_name) + 1);
162 strcat(client_name, "client.");
163 strcat(client_name, o->client_name);
165 client_name = o->client_name;
169 r = rados_create2(&rados->cluster, o->cluster_name,
172 if (client_name && !index(o->client_name, '.'))
175 r = rados_create(&rados->cluster, o->client_name);
177 if (o->pool_name == NULL) {
178 log_err("rados pool name must be provided.\n");
183 log_err("rados_create failed.\n");
187 r = rados_conf_read_file(rados->cluster, NULL);
189 log_err("rados_conf_read_file failed.\n");
193 r = rados_connect(rados->cluster);
195 log_err("rados_connect failed.\n");
199 r = rados_ioctx_create(rados->cluster, o->pool_name, &rados->io_ctx);
201 log_err("rados_ioctx_create failed.\n");
202 goto failed_shutdown;
205 for (i = 0; i < td->o.nr_files; i++) {
207 f->real_file_size = file_size;
208 if (o->touch_objects) {
209 r = rados_write(rados->io_ctx, f->file_name, "", 0, 0);
211 goto failed_obj_create;
218 _fio_rados_rm_objects(td, rados);
219 rados_ioctx_destroy(rados->io_ctx);
220 rados->io_ctx = NULL;
222 rados_shutdown(rados->cluster);
223 rados->cluster = NULL;
228 static void _fio_rados_disconnect(struct rados_data *rados)
234 rados_ioctx_destroy(rados->io_ctx);
235 rados->io_ctx = NULL;
238 if (rados->cluster) {
239 rados_shutdown(rados->cluster);
240 rados->cluster = NULL;
244 static void fio_rados_cleanup(struct thread_data *td)
246 struct rados_data *rados = td->io_ops_data;
248 pthread_mutex_lock(&rados->completed_lock);
249 while (rados->ops_scheduled != rados->ops_completed)
250 pthread_cond_wait(&rados->completed_more_io, &rados->completed_lock);
251 pthread_mutex_unlock(&rados->completed_lock);
252 _fio_rados_rm_objects(td, rados);
253 _fio_rados_disconnect(rados);
254 free(rados->aio_events);
259 static void complete_callback(rados_completion_t cb, void *arg)
261 struct fio_rados_iou *fri = (struct fio_rados_iou *)arg;
262 struct rados_data *rados = fri->td->io_ops_data;
263 assert(fri->completion);
264 assert(rados_aio_is_complete(fri->completion));
265 pthread_mutex_lock(&rados->completed_lock);
266 flist_add_tail(&fri->list, &rados->completed_operations);
267 rados->ops_completed++;
268 pthread_mutex_unlock(&rados->completed_lock);
269 pthread_cond_signal(&rados->completed_more_io);
272 static enum fio_q_status fio_rados_queue(struct thread_data *td,
275 struct rados_data *rados = td->io_ops_data;
276 struct fio_rados_iou *fri = io_u->engine_data;
277 char *object = io_u->file->file_name;
280 fio_ro_check(td, io_u);
282 if (io_u->ddir == DDIR_WRITE) {
283 r = rados_aio_create_completion(fri, complete_callback,
284 NULL, &fri->completion);
286 log_err("rados_aio_create_completion failed.\n");
290 r = rados_aio_write(rados->io_ctx, object, fri->completion,
291 io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
293 log_err("rados_write failed.\n");
296 rados->ops_scheduled++;
298 } else if (io_u->ddir == DDIR_READ) {
299 r = rados_aio_create_completion(fri, complete_callback,
300 NULL, &fri->completion);
302 log_err("rados_aio_create_completion failed.\n");
305 r = rados_aio_read(rados->io_ctx, object, fri->completion,
306 io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
308 log_err("rados_aio_read failed.\n");
311 rados->ops_scheduled++;
313 } else if (io_u->ddir == DDIR_TRIM) {
314 r = rados_aio_create_completion(fri, complete_callback,
315 NULL , &fri->completion);
317 log_err("rados_aio_create_completion failed.\n");
320 fri->write_op = rados_create_write_op();
321 if (fri->write_op == NULL) {
322 log_err("rados_create_write_op failed.\n");
325 rados_write_op_zero(fri->write_op, io_u->offset,
327 r = rados_aio_write_op_operate(fri->write_op, rados->io_ctx,
328 fri->completion, object, NULL, 0);
330 log_err("rados_aio_write_op_operate failed.\n");
331 goto failed_write_op;
333 rados->ops_scheduled++;
337 log_err("WARNING: Only DDIR_READ, DDIR_WRITE and DDIR_TRIM are supported!");
340 rados_release_write_op(fri->write_op);
342 rados_aio_release(fri->completion);
345 td_verror(td, io_u->error, "xfer");
346 return FIO_Q_COMPLETED;
349 static struct io_u *fio_rados_event(struct thread_data *td, int event)
351 struct rados_data *rados = td->io_ops_data;
352 return rados->aio_events[event];
355 int fio_rados_getevents(struct thread_data *td, unsigned int min,
356 unsigned int max, const struct timespec *t)
358 struct rados_data *rados = td->io_ops_data;
359 unsigned int events = 0;
360 struct fio_rados_iou *fri;
362 pthread_mutex_lock(&rados->completed_lock);
363 while (events < min) {
364 while (flist_empty(&rados->completed_operations)) {
365 pthread_cond_wait(&rados->completed_more_io, &rados->completed_lock);
367 assert(!flist_empty(&rados->completed_operations));
369 fri = flist_first_entry(&rados->completed_operations, struct fio_rados_iou, list);
370 assert(fri->completion);
371 assert(rados_aio_is_complete(fri->completion));
372 if (fri->write_op != NULL) {
373 rados_release_write_op(fri->write_op);
374 fri->write_op = NULL;
376 rados_aio_release(fri->completion);
377 fri->completion = NULL;
379 rados->aio_events[events] = fri->io_u;
381 flist_del(&fri->list);
382 if (events >= max) break;
384 pthread_mutex_unlock(&rados->completed_lock);
388 static int fio_rados_setup(struct thread_data *td)
390 struct rados_data *rados = NULL;
392 /* allocate engine specific structure to deal with librados. */
393 r = _fio_setup_rados_data(td, &rados);
395 log_err("fio_setup_rados_data failed.\n");
398 td->io_ops_data = rados;
400 /* Force single process mode.
402 td->o.use_thread = 1;
404 /* connect in the main thread to determine to determine
405 * the size of the given RADOS block device. And disconnect
408 r = _fio_rados_connect(td);
410 log_err("fio_rados_connect failed.\n");
413 rados->connected = true;
417 fio_rados_cleanup(td);
421 /* open/invalidate are noops. we set the FIO_DISKLESSIO flag in ioengine_ops to
422 prevent fio from creating the files
424 static int fio_rados_open(struct thread_data *td, struct fio_file *f)
428 static int fio_rados_invalidate(struct thread_data *td, struct fio_file *f)
433 static void fio_rados_io_u_free(struct thread_data *td, struct io_u *io_u)
435 struct fio_rados_iou *fri = io_u->engine_data;
438 io_u->engine_data = NULL;
441 rados_aio_release(fri->completion);
443 rados_release_write_op(fri->write_op);
448 static int fio_rados_io_u_init(struct thread_data *td, struct io_u *io_u)
450 struct fio_rados_iou *fri;
451 fri = calloc(1, sizeof(*fri));
454 INIT_FLIST_HEAD(&fri->list);
455 io_u->engine_data = fri;
459 /* ioengine_ops for get_ioengine() */
460 FIO_STATIC struct ioengine_ops ioengine = {
462 .version = FIO_IOOPS_VERSION,
463 .flags = FIO_DISKLESSIO,
464 .setup = fio_rados_setup,
465 .queue = fio_rados_queue,
466 .getevents = fio_rados_getevents,
467 .event = fio_rados_event,
468 .cleanup = fio_rados_cleanup,
469 .open_file = fio_rados_open,
470 .invalidate = fio_rados_invalidate,
472 .io_u_init = fio_rados_io_u_init,
473 .io_u_free = fio_rados_io_u_free,
474 .option_struct_size = sizeof(struct rados_options),
477 static void fio_init fio_rados_register(void)
479 register_ioengine(&ioengine);
482 static void fio_exit fio_rados_unregister(void)
484 unregister_ioengine(&ioengine);