4 * IO engine using libblkio to access various block I/O interfaces:
5 * https://gitlab.com/libblkio/libblkio
19 #include "../optgroup.h"
20 #include "../options.h"
23 /* per-process state */
25 pthread_mutex_t mutex;
27 int initted_hipri_threads;
29 } proc_state = { PTHREAD_MUTEX_INITIALIZER, 0, 0, NULL };
31 static void fio_blkio_proc_lock(void) {
33 ret = pthread_mutex_lock(&proc_state.mutex);
37 static void fio_blkio_proc_unlock(void) {
39 ret = pthread_mutex_unlock(&proc_state.mutex);
43 /* per-thread state */
44 struct fio_blkio_data {
46 int completion_fd; /* may be -1 if not FIO_BLKIO_WAIT_MODE_EVENTFD */
48 bool has_mem_region; /* whether mem_region is valid */
49 struct blkio_mem_region mem_region; /* only if allocated by libblkio */
51 struct iovec *iovecs; /* for vectored requests */
52 struct blkio_completion *completions;
55 enum fio_blkio_wait_mode {
56 FIO_BLKIO_WAIT_MODE_BLOCK,
57 FIO_BLKIO_WAIT_MODE_EVENTFD,
58 FIO_BLKIO_WAIT_MODE_LOOP,
61 struct fio_blkio_options {
62 void *pad; /* option fields must not have offset 0 */
67 char *pre_connect_props;
71 char *pre_start_props;
74 unsigned int vectored;
75 unsigned int write_zeroes_on_trim;
76 enum fio_blkio_wait_mode wait_mode;
77 unsigned int force_enable_completion_eventfd;
80 static struct fio_option options[] = {
82 .name = "libblkio_driver",
83 .lname = "libblkio driver name",
84 .type = FIO_OPT_STR_STORE,
85 .off1 = offsetof(struct fio_blkio_options, driver),
86 .help = "Name of the driver to be used by libblkio",
87 .category = FIO_OPT_C_ENGINE,
88 .group = FIO_OPT_G_LIBBLKIO,
91 .name = "libblkio_path",
92 .lname = "libblkio \"path\" property",
93 .type = FIO_OPT_STR_STORE,
94 .off1 = offsetof(struct fio_blkio_options, path),
95 .help = "Value to set the \"path\" property to",
96 .category = FIO_OPT_C_ENGINE,
97 .group = FIO_OPT_G_LIBBLKIO,
100 .name = "libblkio_pre_connect_props",
101 .lname = "Additional properties to be set before blkio_connect()",
102 .type = FIO_OPT_STR_STORE,
103 .off1 = offsetof(struct fio_blkio_options, pre_connect_props),
105 .category = FIO_OPT_C_ENGINE,
106 .group = FIO_OPT_G_LIBBLKIO,
109 .name = "libblkio_num_entries",
110 .lname = "libblkio \"num-entries\" property",
112 .off1 = offsetof(struct fio_blkio_options, num_entries),
113 .help = "Value to set the \"num-entries\" property to",
116 .category = FIO_OPT_C_ENGINE,
117 .group = FIO_OPT_G_LIBBLKIO,
120 .name = "libblkio_queue_size",
121 .lname = "libblkio \"queue-size\" property",
123 .off1 = offsetof(struct fio_blkio_options, queue_size),
124 .help = "Value to set the \"queue-size\" property to",
127 .category = FIO_OPT_C_ENGINE,
128 .group = FIO_OPT_G_LIBBLKIO,
131 .name = "libblkio_pre_start_props",
132 .lname = "Additional properties to be set before blkio_start()",
133 .type = FIO_OPT_STR_STORE,
134 .off1 = offsetof(struct fio_blkio_options, pre_start_props),
136 .category = FIO_OPT_C_ENGINE,
137 .group = FIO_OPT_G_LIBBLKIO,
141 .lname = "Use poll queues",
142 .type = FIO_OPT_STR_SET,
143 .off1 = offsetof(struct fio_blkio_options, hipri),
144 .help = "Use poll queues",
145 .category = FIO_OPT_C_ENGINE,
146 .group = FIO_OPT_G_LIBBLKIO,
149 .name = "libblkio_vectored",
150 .lname = "Use blkioq_{readv,writev}()",
151 .type = FIO_OPT_STR_SET,
152 .off1 = offsetof(struct fio_blkio_options, vectored),
153 .help = "Use blkioq_{readv,writev}() instead of blkioq_{read,write}()",
154 .category = FIO_OPT_C_ENGINE,
155 .group = FIO_OPT_G_LIBBLKIO,
158 .name = "libblkio_write_zeroes_on_trim",
159 .lname = "Use blkioq_write_zeroes() for TRIM",
160 .type = FIO_OPT_STR_SET,
161 .off1 = offsetof(struct fio_blkio_options,
162 write_zeroes_on_trim),
163 .help = "Use blkioq_write_zeroes() for TRIM instead of blkioq_discard()",
164 .category = FIO_OPT_C_ENGINE,
165 .group = FIO_OPT_G_LIBBLKIO,
168 .name = "libblkio_wait_mode",
169 .lname = "How to wait for completions",
171 .off1 = offsetof(struct fio_blkio_options, wait_mode),
172 .help = "How to wait for completions",
176 .oval = FIO_BLKIO_WAIT_MODE_BLOCK,
177 .help = "Blocking blkioq_do_io()",
180 .oval = FIO_BLKIO_WAIT_MODE_EVENTFD,
181 .help = "Blocking read() on the completion eventfd",
184 .oval = FIO_BLKIO_WAIT_MODE_LOOP,
185 .help = "Busy loop with non-blocking blkioq_do_io()",
188 .category = FIO_OPT_C_ENGINE,
189 .group = FIO_OPT_G_LIBBLKIO,
192 .name = "libblkio_force_enable_completion_eventfd",
193 .lname = "Force enable the completion eventfd, even if unused",
194 .type = FIO_OPT_STR_SET,
195 .off1 = offsetof(struct fio_blkio_options,
196 force_enable_completion_eventfd),
197 .help = "This can impact performance",
198 .category = FIO_OPT_C_ENGINE,
199 .group = FIO_OPT_G_LIBBLKIO,
206 static int fio_blkio_set_props_from_str(struct blkio *b, const char *opt_name,
209 char *new_str, *name, *value;
214 /* iteration can mutate string, so copy it */
215 new_str = strdup(str);
217 log_err("fio: strdup() failed\n");
221 /* iterate over property name-value pairs */
222 while ((name = get_next_str(&new_str))) {
223 /* split into property name and value */
224 value = strchr(name, '=');
226 log_err("fio: missing '=' in option %s\n", opt_name);
234 /* strip whitespace from property name */
235 strip_blank_front(&name);
236 strip_blank_end(name);
238 if (name[0] == '\0') {
239 log_err("fio: empty property name in option %s\n",
245 /* strip whitespace from property value */
246 strip_blank_front(&value);
247 strip_blank_end(value);
250 if (blkio_set_str(b, name, value) != 0) {
251 log_err("fio: error setting property '%s' to '%s': %s\n",
252 name, value, blkio_get_error_msg());
263 * Log the failure of a libblkio function.
265 * `(void)func` is to ensure `func` exists and prevent typos
267 #define fio_blkio_log_err(func) \
270 log_err("fio: %s() failed: %s\n", #func, \
271 blkio_get_error_msg()); \
274 static bool possibly_null_strs_equal(const char *a, const char *b)
276 return (!a && !b) || (a && b && strcmp(a, b) == 0);
280 * Returns the total number of subjobs using the 'libblkio' ioengine and setting
281 * the 'thread' option in the entire workload that have the given value for the
284 static int total_threaded_subjobs(bool hipri)
290 const struct fio_blkio_options *options = td->eo;
291 if (strcmp(td->o.ioengine, "libblkio") == 0 &&
292 td->o.use_thread && (bool)options->hipri == hipri)
302 struct fio_blkio_options opts;
303 } first_threaded_subjob = { 0 };
305 static void fio_blkio_log_opt_compat_err(const char *option_name)
307 log_err("fio: jobs using engine libblkio and sharing a process must agree on the %s option\n",
312 * If td represents a subjob with option 'thread', check if its options are
313 * compatible with those of other threaded subjobs that were already set up.
315 static int fio_blkio_check_opt_compat(struct thread_data *td)
317 const struct fio_blkio_options *options = td->eo, *prev_options;
319 if (!td->o.use_thread)
320 return 0; /* subjob doesn't use 'thread' */
322 if (!first_threaded_subjob.set_up) {
323 /* first subjob using 'thread', store options for later */
324 first_threaded_subjob.set_up = true;
325 first_threaded_subjob.direct = td->o.odirect;
326 first_threaded_subjob.opts = *options;
330 /* not first subjob using 'thread', check option compatibility */
331 prev_options = &first_threaded_subjob.opts;
333 if (td->o.odirect != first_threaded_subjob.direct) {
334 fio_blkio_log_opt_compat_err("direct/buffered");
338 if (strcmp(options->driver, prev_options->driver) != 0) {
339 fio_blkio_log_opt_compat_err("libblkio_driver");
343 if (!possibly_null_strs_equal(options->path, prev_options->path)) {
344 fio_blkio_log_opt_compat_err("libblkio_path");
348 if (!possibly_null_strs_equal(options->pre_connect_props,
349 prev_options->pre_connect_props)) {
350 fio_blkio_log_opt_compat_err("libblkio_pre_connect_props");
354 if (options->num_entries != prev_options->num_entries) {
355 fio_blkio_log_opt_compat_err("libblkio_num_entries");
359 if (options->queue_size != prev_options->queue_size) {
360 fio_blkio_log_opt_compat_err("libblkio_queue_size");
364 if (!possibly_null_strs_equal(options->pre_start_props,
365 prev_options->pre_start_props)) {
366 fio_blkio_log_opt_compat_err("libblkio_pre_start_props");
373 static int fio_blkio_create_and_connect(struct thread_data *td,
374 struct blkio **out_blkio)
376 const struct fio_blkio_options *options = td->eo;
380 if (!options->driver) {
381 log_err("fio: engine libblkio requires option libblkio_driver to be set\n");
385 if (blkio_create(options->driver, &b) != 0) {
386 fio_blkio_log_err(blkio_create);
390 /* don't fail if driver doesn't have a "direct" property */
391 ret = blkio_set_bool(b, "direct", td->o.odirect);
392 if (ret != 0 && ret != -ENOENT) {
393 fio_blkio_log_err(blkio_set_bool);
394 goto err_blkio_destroy;
397 if (blkio_set_bool(b, "read-only", read_only) != 0) {
398 fio_blkio_log_err(blkio_set_bool);
399 goto err_blkio_destroy;
403 if (blkio_set_str(b, "path", options->path) != 0) {
404 fio_blkio_log_err(blkio_set_str);
405 goto err_blkio_destroy;
409 if (fio_blkio_set_props_from_str(b, "libblkio_pre_connect_props",
410 options->pre_connect_props) != 0)
411 goto err_blkio_destroy;
413 if (blkio_connect(b) != 0) {
414 fio_blkio_log_err(blkio_connect);
415 goto err_blkio_destroy;
418 if (options->num_entries != 0) {
419 if (blkio_set_int(b, "num-entries",
420 options->num_entries) != 0) {
421 fio_blkio_log_err(blkio_set_int);
422 goto err_blkio_destroy;
426 if (options->queue_size != 0) {
427 if (blkio_set_int(b, "queue-size", options->queue_size) != 0) {
428 fio_blkio_log_err(blkio_set_int);
429 goto err_blkio_destroy;
433 if (fio_blkio_set_props_from_str(b, "libblkio_pre_start_props",
434 options->pre_start_props) != 0)
435 goto err_blkio_destroy;
445 static bool incompatible_threaded_subjob_options = false;
448 * This callback determines the device/file size, so it creates and connects a
449 * blkio instance. But it is invoked from the main thread in the original fio
450 * process, not from the processes in which jobs will actually run. It thus
451 * subsequently destroys the blkio, which is recreated in the init() callback.
453 static int fio_blkio_setup(struct thread_data *td)
455 const struct fio_blkio_options *options = td->eo;
460 assert(td->files_index == 1);
462 if (fio_blkio_check_opt_compat(td) != 0) {
463 incompatible_threaded_subjob_options = true;
467 if (options->hipri &&
468 options->wait_mode == FIO_BLKIO_WAIT_MODE_EVENTFD) {
469 log_err("fio: option hipri is incompatible with option libblkio_wait_mode=eventfd\n");
473 if (options->hipri && options->force_enable_completion_eventfd) {
474 log_err("fio: option hipri is incompatible with option libblkio_force_enable_completion_eventfd\n");
478 if (fio_blkio_create_and_connect(td, &b) != 0)
481 if (blkio_get_uint64(b, "capacity", &capacity) != 0) {
482 fio_blkio_log_err(blkio_get_uint64);
484 goto out_blkio_destroy;
487 td->files[0]->real_file_size = capacity;
488 fio_file_set_size_known(td->files[0]);
495 static int fio_blkio_init(struct thread_data *td)
497 const struct fio_blkio_options *options = td->eo;
498 struct fio_blkio_data *data;
501 if (td->o.use_thread && incompatible_threaded_subjob_options) {
503 * Different subjobs using option 'thread' specified
504 * incompatible options. We don't know which configuration
505 * should win, so we just fail all such subjobs.
511 * Request enqueueing is fast, and it's not possible to know exactly
512 * when a request is submitted, so never report submission latencies.
514 td->o.disable_slat = 1;
516 data = calloc(1, sizeof(*data));
518 log_err("fio: calloc() failed\n");
522 data->iovecs = calloc(td->o.iodepth, sizeof(data->iovecs[0]));
523 data->completions = calloc(td->o.iodepth, sizeof(data->completions[0]));
524 if (!data->iovecs || !data->completions) {
525 log_err("fio: calloc() failed\n");
529 fio_blkio_proc_lock();
531 if (proc_state.initted_threads == 0) {
532 /* initialize per-process blkio */
533 int num_queues, num_poll_queues;
535 if (td->o.use_thread) {
536 num_queues = total_threaded_subjobs(false);
537 num_poll_queues = total_threaded_subjobs(true);
539 num_queues = options->hipri ? 0 : 1;
540 num_poll_queues = options->hipri ? 1 : 0;
543 if (fio_blkio_create_and_connect(td, &proc_state.b) != 0)
546 if (blkio_set_int(proc_state.b, "num-queues",
548 fio_blkio_log_err(blkio_set_int);
549 goto err_blkio_destroy;
552 if (blkio_set_int(proc_state.b, "num-poll-queues",
553 num_poll_queues) != 0) {
554 fio_blkio_log_err(blkio_set_int);
555 goto err_blkio_destroy;
558 if (blkio_start(proc_state.b) != 0) {
559 fio_blkio_log_err(blkio_start);
560 goto err_blkio_destroy;
564 if (options->hipri) {
565 int i = proc_state.initted_hipri_threads;
566 data->q = blkio_get_poll_queue(proc_state.b, i);
568 int i = proc_state.initted_threads -
569 proc_state.initted_hipri_threads;
570 data->q = blkio_get_queue(proc_state.b, i);
573 if (options->wait_mode == FIO_BLKIO_WAIT_MODE_EVENTFD ||
574 options->force_enable_completion_eventfd) {
575 /* enable completion fd and make it blocking */
576 blkioq_set_completion_fd_enabled(data->q, true);
577 data->completion_fd = blkioq_get_completion_fd(data->q);
579 flags = fcntl(data->completion_fd, F_GETFL);
581 log_err("fio: fcntl(F_GETFL) failed: %s\n",
583 goto err_blkio_destroy;
586 if (fcntl(data->completion_fd, F_SETFL,
587 flags & ~O_NONBLOCK) != 0) {
588 log_err("fio: fcntl(F_SETFL) failed: %s\n",
590 goto err_blkio_destroy;
593 data->completion_fd = -1;
596 ++proc_state.initted_threads;
598 ++proc_state.initted_hipri_threads;
600 /* Set data last so cleanup() does nothing if init() fails. */
601 td->io_ops_data = data;
603 fio_blkio_proc_unlock();
608 if (proc_state.initted_threads == 0)
609 blkio_destroy(&proc_state.b);
611 if (proc_state.initted_threads == 0)
613 fio_blkio_proc_unlock();
615 free(data->completions);
621 static int fio_blkio_post_init(struct thread_data *td)
623 struct fio_blkio_data *data = td->io_ops_data;
625 if (!data->has_mem_region) {
627 * Memory was allocated by the fio core and not iomem_alloc(),
628 * so we need to register it as a memory region here.
630 * `td->orig_buffer_size` is computed like `len` below, but then
631 * fio can add some padding to it to make sure it is
632 * sufficiently aligned to the page size and the mem_align
633 * option. However, this can make it become unaligned to the
634 * "mem-region-alignment" property in ways that the user can't
635 * control, so we essentially recompute `td->orig_buffer_size`
636 * here but without adding that padding.
639 unsigned long long max_block_size;
640 struct blkio_mem_region region;
642 max_block_size = max(td->o.max_bs[DDIR_READ],
643 max(td->o.max_bs[DDIR_WRITE],
644 td->o.max_bs[DDIR_TRIM]));
646 region = (struct blkio_mem_region) {
647 .addr = td->orig_buffer,
648 .len = (size_t)max_block_size *
649 (size_t)td->o.iodepth,
653 if (blkio_map_mem_region(proc_state.b, ®ion) != 0) {
654 fio_blkio_log_err(blkio_map_mem_region);
662 static void fio_blkio_cleanup(struct thread_data *td)
664 struct fio_blkio_data *data = td->io_ops_data;
667 * Subjobs from different jobs can be terminated at different times, so
668 * this callback may be invoked for one subjob while another is still
669 * doing I/O. Those subjobs may share the process, so we must wait until
670 * the last subjob in the process wants to clean up to actually destroy
675 free(data->completions);
679 fio_blkio_proc_lock();
680 if (--proc_state.initted_threads == 0) {
681 blkio_destroy(&proc_state.b);
684 fio_blkio_proc_unlock();
688 #define align_up(x, y) ((((x) + (y) - 1) / (y)) * (y))
690 static int fio_blkio_iomem_alloc(struct thread_data *td, size_t size)
692 struct fio_blkio_data *data = td->io_ops_data;
694 uint64_t mem_region_alignment;
696 if (blkio_get_uint64(proc_state.b, "mem-region-alignment",
697 &mem_region_alignment) != 0) {
698 fio_blkio_log_err(blkio_get_uint64);
702 /* round up size to satisfy mem-region-alignment */
703 size = align_up(size, (size_t)mem_region_alignment);
705 fio_blkio_proc_lock();
707 if (blkio_alloc_mem_region(proc_state.b, &data->mem_region,
709 fio_blkio_log_err(blkio_alloc_mem_region);
714 if (blkio_map_mem_region(proc_state.b, &data->mem_region) != 0) {
715 fio_blkio_log_err(blkio_map_mem_region);
720 td->orig_buffer = data->mem_region.addr;
721 data->has_mem_region = true;
727 blkio_free_mem_region(proc_state.b, &data->mem_region);
729 fio_blkio_proc_unlock();
733 static void fio_blkio_iomem_free(struct thread_data *td)
735 struct fio_blkio_data *data = td->io_ops_data;
737 if (data && data->has_mem_region) {
738 fio_blkio_proc_lock();
739 blkio_unmap_mem_region(proc_state.b, &data->mem_region);
740 blkio_free_mem_region(proc_state.b, &data->mem_region);
741 fio_blkio_proc_unlock();
743 data->has_mem_region = false;
747 static int fio_blkio_open_file(struct thread_data *td, struct fio_file *f)
752 static enum fio_q_status fio_blkio_queue(struct thread_data *td,
755 const struct fio_blkio_options *options = td->eo;
756 struct fio_blkio_data *data = td->io_ops_data;
758 fio_ro_check(td, io_u);
760 switch (io_u->ddir) {
762 if (options->vectored) {
763 struct iovec *iov = &data->iovecs[io_u->index];
764 iov->iov_base = io_u->xfer_buf;
765 iov->iov_len = (size_t)io_u->xfer_buflen;
767 blkioq_readv(data->q, io_u->offset, iov, 1,
770 blkioq_read(data->q, io_u->offset,
772 (size_t)io_u->xfer_buflen, io_u, 0);
776 if (options->vectored) {
777 struct iovec *iov = &data->iovecs[io_u->index];
778 iov->iov_base = io_u->xfer_buf;
779 iov->iov_len = (size_t)io_u->xfer_buflen;
781 blkioq_writev(data->q, io_u->offset, iov, 1,
784 blkioq_write(data->q, io_u->offset,
786 (size_t)io_u->xfer_buflen, io_u,
791 if (options->write_zeroes_on_trim) {
792 blkioq_write_zeroes(data->q, io_u->offset,
793 io_u->xfer_buflen, io_u, 0);
795 blkioq_discard(data->q, io_u->offset,
796 io_u->xfer_buflen, io_u, 0);
801 blkioq_flush(data->q, io_u, 0);
804 io_u->error = ENOTSUP;
805 io_u_log_error(td, io_u);
806 return FIO_Q_COMPLETED;
812 static int fio_blkio_getevents(struct thread_data *td, unsigned int min,
813 unsigned int max, const struct timespec *t)
815 const struct fio_blkio_options *options = td->eo;
816 struct fio_blkio_data *data = td->io_ops_data;
820 switch (options->wait_mode) {
821 case FIO_BLKIO_WAIT_MODE_BLOCK:
822 n = blkioq_do_io(data->q, data->completions, (int)min, (int)max,
825 fio_blkio_log_err(blkioq_do_io);
829 case FIO_BLKIO_WAIT_MODE_EVENTFD:
830 n = blkioq_do_io(data->q, data->completions, 0, (int)max, NULL);
832 fio_blkio_log_err(blkioq_do_io);
835 while (n < (int)min) {
836 ret = read(data->completion_fd, &event, sizeof(event));
837 if (ret != sizeof(event)) {
838 log_err("fio: read() on the completion fd returned %d\n",
843 ret = blkioq_do_io(data->q, data->completions + n, 0,
846 fio_blkio_log_err(blkioq_do_io);
853 case FIO_BLKIO_WAIT_MODE_LOOP:
854 for (n = 0; n < (int)min; ) {
855 ret = blkioq_do_io(data->q, data->completions + n, 0,
858 fio_blkio_log_err(blkioq_do_io);
870 static struct io_u *fio_blkio_event(struct thread_data *td, int event)
872 struct fio_blkio_data *data = td->io_ops_data;
873 struct blkio_completion *completion = &data->completions[event];
874 struct io_u *io_u = completion->user_data;
876 io_u->error = -completion->ret;
881 FIO_STATIC struct ioengine_ops ioengine = {
883 .version = FIO_IOOPS_VERSION,
884 .flags = FIO_DISKLESSIO | FIO_NOEXTEND |
885 FIO_NO_OFFLOAD | FIO_SKIPPABLE_IOMEM_ALLOC,
887 .setup = fio_blkio_setup,
888 .init = fio_blkio_init,
889 .post_init = fio_blkio_post_init,
890 .cleanup = fio_blkio_cleanup,
892 .iomem_alloc = fio_blkio_iomem_alloc,
893 .iomem_free = fio_blkio_iomem_free,
895 .open_file = fio_blkio_open_file,
897 .queue = fio_blkio_queue,
898 .getevents = fio_blkio_getevents,
899 .event = fio_blkio_event,
902 .option_struct_size = sizeof(struct fio_blkio_options),
905 static void fio_init fio_blkio_register(void)
907 register_ioengine(&ioengine);
910 static void fio_exit fio_blkio_unregister(void)
912 unregister_ioengine(&ioengine);