Introduce enum fio_q_status
[fio.git] / engines / rados.c
CommitLineData
d5f9b0ea
IF
1/*
2 * Ceph Rados engine
3 *
4 * IO engine using Ceph's RADOS interface to test low-level performance of
5 * Ceph OSDs.
6 *
7 */
8
9#include <rados/librados.h>
10#include <pthread.h>
11#include "fio.h"
12#include "../optgroup.h"
13
14struct fio_rados_iou {
15 struct thread_data *td;
16 struct io_u *io_u;
17 rados_completion_t completion;
18 rados_write_op_t write_op;
19};
20
21struct rados_data {
22 rados_t cluster;
23 rados_ioctx_t io_ctx;
24 char **objects;
25 size_t object_count;
26 struct io_u **aio_events;
27 bool connected;
28};
29
30/* fio configuration options read from the job file */
31struct rados_options {
32 void *pad;
33 char *cluster_name;
34 char *pool_name;
35 char *client_name;
36 int busy_poll;
37};
38
39static struct fio_option options[] = {
40 {
41 .name = "clustername",
42 .lname = "ceph cluster name",
43 .type = FIO_OPT_STR_STORE,
44 .help = "Cluster name for ceph",
45 .off1 = offsetof(struct rados_options, cluster_name),
46 .category = FIO_OPT_C_ENGINE,
47 .group = FIO_OPT_G_RBD,
48 },
49 {
50 .name = "pool",
51 .lname = "pool name to use",
52 .type = FIO_OPT_STR_STORE,
53 .help = "Ceph pool name to benchmark against",
54 .off1 = offsetof(struct rados_options, pool_name),
55 .category = FIO_OPT_C_ENGINE,
56 .group = FIO_OPT_G_RBD,
57 },
58 {
59 .name = "clientname",
60 .lname = "rados engine clientname",
61 .type = FIO_OPT_STR_STORE,
62 .help = "Name of the ceph client to access RADOS engine",
63 .off1 = offsetof(struct rados_options, client_name),
64 .category = FIO_OPT_C_ENGINE,
65 .group = FIO_OPT_G_RBD,
66 },
67 {
68 .name = "busy_poll",
69 .lname = "busy poll mode",
70 .type = FIO_OPT_BOOL,
71 .help = "Busy poll for completions instead of sleeping",
72 .off1 = offsetof(struct rados_options, busy_poll),
73 .def = "0",
74 .category = FIO_OPT_C_ENGINE,
75 .group = FIO_OPT_G_RBD,
76 },
77 {
78 .name = NULL,
79 },
80};
81
82static int _fio_setup_rados_data(struct thread_data *td,
83 struct rados_data **rados_data_ptr)
84{
85 struct rados_data *rados;
86
87 if (td->io_ops_data)
88 return 0;
89
90 rados = calloc(1, sizeof(struct rados_data));
91 if (!rados)
92 goto failed;
93
94 rados->connected = false;
95
96 rados->aio_events = calloc(td->o.iodepth, sizeof(struct io_u *));
97 if (!rados->aio_events)
98 goto failed;
99
100 rados->object_count = td->o.nr_files;
101 rados->objects = calloc(rados->object_count, sizeof(char*));
102 if (!rados->objects)
103 goto failed;
104
105 *rados_data_ptr = rados;
106 return 0;
107
108failed:
109 if (rados) {
110 rados->object_count = 0;
111 if (rados->aio_events)
112 free(rados->aio_events);
113 free(rados);
114 }
115 return 1;
116}
117
118static void _fio_rados_rm_objects(struct rados_data *rados)
119{
120 size_t i;
121 for (i = 0; i < rados->object_count; ++i) {
122 if (rados->objects[i]) {
123 rados_remove(rados->io_ctx, rados->objects[i]);
124 free(rados->objects[i]);
125 rados->objects[i] = NULL;
126 }
127 }
128}
129
130static int _fio_rados_connect(struct thread_data *td)
131{
132 struct rados_data *rados = td->io_ops_data;
133 struct rados_options *o = td->eo;
134 int r;
135 const uint64_t file_size =
136 td->o.size / (td->o.nr_files ? td->o.nr_files : 1u);
137 struct fio_file *f;
138 uint32_t i;
139 size_t oname_len = 0;
140
141 if (o->cluster_name) {
142 char *client_name = NULL;
143
144 /*
145 * If we specify cluser name, the rados_create2
146 * will not assume 'client.'. name is considered
147 * as a full type.id namestr
148 */
149 if (o->client_name) {
150 if (!index(o->client_name, '.')) {
151 client_name = calloc(1, strlen("client.") +
152 strlen(o->client_name) + 1);
153 strcat(client_name, "client.");
154 strcat(client_name, o->client_name);
155 } else {
156 client_name = o->client_name;
157 }
158 }
159
160 r = rados_create2(&rados->cluster, o->cluster_name,
161 client_name, 0);
162
163 if (client_name && !index(o->client_name, '.'))
164 free(client_name);
165 } else
166 r = rados_create(&rados->cluster, o->client_name);
167
168 if (r < 0) {
169 log_err("rados_create failed.\n");
170 goto failed_early;
171 }
172
173 r = rados_conf_read_file(rados->cluster, NULL);
174 if (r < 0) {
175 log_err("rados_conf_read_file failed.\n");
176 goto failed_early;
177 }
178
179 r = rados_connect(rados->cluster);
180 if (r < 0) {
181 log_err("rados_connect failed.\n");
182 goto failed_early;
183 }
184
185 r = rados_ioctx_create(rados->cluster, o->pool_name, &rados->io_ctx);
186 if (r < 0) {
187 log_err("rados_ioctx_create failed.\n");
188 goto failed_shutdown;
189 }
190
191 for (i = 0; i < rados->object_count; i++) {
192 f = td->files[i];
193 f->real_file_size = file_size;
194 f->engine_pos = i;
195
196 oname_len = strlen(f->file_name) + 32;
197 rados->objects[i] = malloc(oname_len);
198 /* vary objects for different jobs */
199 snprintf(rados->objects[i], oname_len - 1,
200 "fio_rados_bench.%s.%x",
201 f->file_name, td->thread_number);
202 r = rados_write(rados->io_ctx, rados->objects[i], "", 0, 0);
203 if (r < 0) {
204 free(rados->objects[i]);
205 rados->objects[i] = NULL;
206 log_err("error creating object.\n");
207 goto failed_obj_create;
208 }
209 }
210
211 return 0;
212
213failed_obj_create:
214 _fio_rados_rm_objects(rados);
215 rados_ioctx_destroy(rados->io_ctx);
216 rados->io_ctx = NULL;
217failed_shutdown:
218 rados_shutdown(rados->cluster);
219 rados->cluster = NULL;
220failed_early:
221 return 1;
222}
223
224static void _fio_rados_disconnect(struct rados_data *rados)
225{
226 if (!rados)
227 return;
228
229 _fio_rados_rm_objects(rados);
230
231 if (rados->io_ctx) {
232 rados_ioctx_destroy(rados->io_ctx);
233 rados->io_ctx = NULL;
234 }
235
236 if (rados->cluster) {
237 rados_shutdown(rados->cluster);
238 rados->cluster = NULL;
239 }
240}
241
242static void fio_rados_cleanup(struct thread_data *td)
243{
244 struct rados_data *rados = td->io_ops_data;
245
246 if (rados) {
247 _fio_rados_disconnect(rados);
248 free(rados->objects);
249 free(rados->aio_events);
250 free(rados);
251 }
252}
253
d3b07186
BVA
254static enum fio_q_status
255fio_rados_queue(struct thread_data *td, struct io_u *io_u)
d5f9b0ea
IF
256{
257 struct rados_data *rados = td->io_ops_data;
258 struct fio_rados_iou *fri = io_u->engine_data;
259 char *object = rados->objects[io_u->file->engine_pos];
260 int r = -1;
261
262 fio_ro_check(td, io_u);
263
264 if (io_u->ddir == DDIR_WRITE) {
265 r = rados_aio_create_completion(fri, NULL,
266 NULL, &fri->completion);
267 if (r < 0) {
268 log_err("rados_aio_create_completion failed.\n");
269 goto failed;
270 }
271
272 r = rados_aio_write(rados->io_ctx, object, fri->completion,
273 io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
274 if (r < 0) {
275 log_err("rados_write failed.\n");
276 goto failed_comp;
277 }
278 return FIO_Q_QUEUED;
279 } else if (io_u->ddir == DDIR_READ) {
280 r = rados_aio_create_completion(fri, NULL,
281 NULL, &fri->completion);
282 if (r < 0) {
283 log_err("rados_aio_create_completion failed.\n");
284 goto failed;
285 }
286 r = rados_aio_read(rados->io_ctx, object, fri->completion,
287 io_u->xfer_buf, io_u->xfer_buflen, io_u->offset);
288 if (r < 0) {
289 log_err("rados_aio_read failed.\n");
290 goto failed_comp;
291 }
292 return FIO_Q_QUEUED;
293 } else if (io_u->ddir == DDIR_TRIM) {
294 r = rados_aio_create_completion(fri, NULL,
295 NULL , &fri->completion);
296 if (r < 0) {
297 log_err("rados_aio_create_completion failed.\n");
298 goto failed;
299 }
300 fri->write_op = rados_create_write_op();
301 if (fri->write_op == NULL) {
302 log_err("rados_create_write_op failed.\n");
303 goto failed_comp;
304 }
305 rados_write_op_zero(fri->write_op, io_u->offset,
306 io_u->xfer_buflen);
307 r = rados_aio_write_op_operate(fri->write_op, rados->io_ctx,
308 fri->completion, object, NULL, 0);
309 if (r < 0) {
310 log_err("rados_aio_write_op_operate failed.\n");
311 goto failed_write_op;
312 }
313 return FIO_Q_QUEUED;
314 }
315
316 log_err("WARNING: Only DDIR_READ, DDIR_WRITE and DDIR_TRIM are supported!");
317
318failed_write_op:
319 rados_release_write_op(fri->write_op);
320failed_comp:
321 rados_aio_release(fri->completion);
322failed:
323 io_u->error = -r;
324 td_verror(td, io_u->error, "xfer");
325 return FIO_Q_COMPLETED;
326}
327
328static struct io_u *fio_rados_event(struct thread_data *td, int event)
329{
330 struct rados_data *rados = td->io_ops_data;
331 return rados->aio_events[event];
332}
333
334int fio_rados_getevents(struct thread_data *td, unsigned int min,
335 unsigned int max, const struct timespec *t)
336{
337 struct rados_data *rados = td->io_ops_data;
338 struct rados_options *o = td->eo;
339 int busy_poll = o->busy_poll;
340 unsigned int events = 0;
341 struct io_u *u;
342 struct fio_rados_iou *fri;
343 unsigned int i;
344 rados_completion_t first_unfinished;
345 int observed_new = 0;
346
347 /* loop through inflight ios until we find 'min' completions */
348 do {
349 first_unfinished = NULL;
350 io_u_qiter(&td->io_u_all, u, i) {
351 if (!(u->flags & IO_U_F_FLIGHT))
352 continue;
353
354 fri = u->engine_data;
355 if (fri->completion) {
356 if (rados_aio_is_complete(fri->completion)) {
357 if (fri->write_op != NULL) {
358 rados_release_write_op(fri->write_op);
359 fri->write_op = NULL;
360 }
361 rados_aio_release(fri->completion);
362 fri->completion = NULL;
363 rados->aio_events[events] = u;
364 events++;
365 observed_new = 1;
366 } else if (first_unfinished == NULL) {
367 first_unfinished = fri->completion;
368 }
369 }
370 if (events >= max)
371 break;
372 }
373 if (events >= min)
374 return events;
375 if (first_unfinished == NULL || busy_poll)
376 continue;
377
378 if (!observed_new)
379 rados_aio_wait_for_complete(first_unfinished);
380 } while (1);
381 return events;
382}
383
384static int fio_rados_setup(struct thread_data *td)
385{
386 struct rados_data *rados = NULL;
387 int r;
388 /* allocate engine specific structure to deal with librados. */
389 r = _fio_setup_rados_data(td, &rados);
390 if (r) {
391 log_err("fio_setup_rados_data failed.\n");
392 goto cleanup;
393 }
394 td->io_ops_data = rados;
395
396 /* Force single process mode.
397 */
398 td->o.use_thread = 1;
399
400 /* connect in the main thread to determine to determine
401 * the size of the given RADOS block device. And disconnect
402 * later on.
403 */
404 r = _fio_rados_connect(td);
405 if (r) {
406 log_err("fio_rados_connect failed.\n");
407 goto cleanup;
408 }
409 rados->connected = true;
410
411 return 0;
412cleanup:
413 fio_rados_cleanup(td);
414 return r;
415}
416
417/* open/invalidate are noops. we set the FIO_DISKLESSIO flag in ioengine_ops to
418 prevent fio from creating the files
419*/
420static int fio_rados_open(struct thread_data *td, struct fio_file *f)
421{
422 return 0;
423}
424static int fio_rados_invalidate(struct thread_data *td, struct fio_file *f)
425{
426 return 0;
427}
428
429static void fio_rados_io_u_free(struct thread_data *td, struct io_u *io_u)
430{
431 struct fio_rados_iou *fri = io_u->engine_data;
432
433 if (fri) {
434 io_u->engine_data = NULL;
435 fri->td = NULL;
436 if (fri->completion)
437 rados_aio_release(fri->completion);
438 if (fri->write_op)
439 rados_release_write_op(fri->write_op);
440 free(fri);
441 }
442}
443
444static int fio_rados_io_u_init(struct thread_data *td, struct io_u *io_u)
445{
446 struct fio_rados_iou *fri;
447 fri = calloc(1, sizeof(*fri));
448 fri->io_u = io_u;
449 fri->td = td;
450 io_u->engine_data = fri;
451 return 0;
452}
453
454/* ioengine_ops for get_ioengine() */
455static struct ioengine_ops ioengine = {
456 .name = "rados",
457 .version = FIO_IOOPS_VERSION,
458 .flags = FIO_DISKLESSIO,
459 .setup = fio_rados_setup,
460 .queue = fio_rados_queue,
461 .getevents = fio_rados_getevents,
462 .event = fio_rados_event,
463 .cleanup = fio_rados_cleanup,
464 .open_file = fio_rados_open,
465 .invalidate = fio_rados_invalidate,
466 .options = options,
467 .io_u_init = fio_rados_io_u_init,
468 .io_u_free = fio_rados_io_u_free,
469 .option_struct_size = sizeof(struct rados_options),
470};
471
472static void fio_init fio_rados_register(void)
473{
474 register_ioengine(&ioengine);
475}
476
477static void fio_exit fio_rados_unregister(void)
478{
479 unregister_ioengine(&ioengine);
480}