4 * IO engine using the xNVMe C API.
6 * See: http://xnvme.io/
8 * SPDX-License-Identifier: Apache-2.0
14 #include "zbd_types.h"
18 static pthread_mutex_t g_serialize = PTHREAD_MUTEX_INITIALIZER;
20 struct xnvme_fioe_fwrap {
21 /* fio file representation */
22 struct fio_file *fio_file;
24 /* xNVMe device handle */
25 struct xnvme_dev *dev;
26 /* xNVMe device geometry */
27 const struct xnvme_geo *geo;
29 struct xnvme_queue *queue;
36 XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
38 struct xnvme_fioe_data {
39 /* I/O completion queue */
42 /* # of iocq entries; incremented via getevents()/cb_pool() */
46 * # of errors; incremented when observed on completion via
47 * getevents()/cb_pool()
51 /* Controller which device/file to select */
55 /* Number of devices/files for which open() has been called */
57 /* Number of devices/files allocated in files[] */
64 struct xnvme_fioe_fwrap files[];
66 XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
68 struct xnvme_fioe_options {
71 unsigned int sqpoll_thread;
72 unsigned int xnvme_dev_nsid;
73 unsigned int xnvme_iovec;
79 char *xnvme_dev_subnqn;
82 static struct fio_option options[] = {
85 .lname = "High Priority",
86 .type = FIO_OPT_STR_SET,
87 .off1 = offsetof(struct xnvme_fioe_options, hipri),
88 .help = "Use polled IO completions",
89 .category = FIO_OPT_C_ENGINE,
90 .group = FIO_OPT_G_XNVME,
93 .name = "sqthread_poll",
94 .lname = "Kernel SQ thread polling",
95 .type = FIO_OPT_STR_SET,
96 .off1 = offsetof(struct xnvme_fioe_options, sqpoll_thread),
97 .help = "Offload submission/completion to kernel thread",
98 .category = FIO_OPT_C_ENGINE,
99 .group = FIO_OPT_G_XNVME,
103 .lname = "xNVMe Backend",
104 .type = FIO_OPT_STR_STORE,
105 .off1 = offsetof(struct xnvme_fioe_options, xnvme_be),
106 .help = "Select xNVMe backend [spdk,linux,fbsd]",
107 .category = FIO_OPT_C_ENGINE,
108 .group = FIO_OPT_G_XNVME,
112 .lname = "xNVMe Memory Backend",
113 .type = FIO_OPT_STR_STORE,
114 .off1 = offsetof(struct xnvme_fioe_options, xnvme_mem),
115 .help = "Select xNVMe memory backend",
116 .category = FIO_OPT_C_ENGINE,
117 .group = FIO_OPT_G_XNVME,
120 .name = "xnvme_async",
121 .lname = "xNVMe Asynchronous command-interface",
122 .type = FIO_OPT_STR_STORE,
123 .off1 = offsetof(struct xnvme_fioe_options, xnvme_async),
124 .help = "Select xNVMe async. interface: "
125 "[emu,thrpool,io_uring,io_uring_cmd,libaio,posix,vfio,nil]",
126 .category = FIO_OPT_C_ENGINE,
127 .group = FIO_OPT_G_XNVME,
130 .name = "xnvme_sync",
131 .lname = "xNVMe Synchronous. command-interface",
132 .type = FIO_OPT_STR_STORE,
133 .off1 = offsetof(struct xnvme_fioe_options, xnvme_sync),
134 .help = "Select xNVMe sync. interface: [nvme,psync,block]",
135 .category = FIO_OPT_C_ENGINE,
136 .group = FIO_OPT_G_XNVME,
139 .name = "xnvme_admin",
140 .lname = "xNVMe Admin command-interface",
141 .type = FIO_OPT_STR_STORE,
142 .off1 = offsetof(struct xnvme_fioe_options, xnvme_admin),
143 .help = "Select xNVMe admin. cmd-interface: [nvme,block]",
144 .category = FIO_OPT_C_ENGINE,
145 .group = FIO_OPT_G_XNVME,
148 .name = "xnvme_dev_nsid",
149 .lname = "xNVMe Namespace-Identifier, for user-space NVMe driver",
151 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_nsid),
152 .help = "xNVMe Namespace-Identifier, for user-space NVMe driver",
153 .category = FIO_OPT_C_ENGINE,
154 .group = FIO_OPT_G_XNVME,
157 .name = "xnvme_dev_subnqn",
158 .lname = "Subsystem nqn for Fabrics",
159 .type = FIO_OPT_STR_STORE,
160 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_subnqn),
161 .help = "Subsystem NQN for Fabrics",
162 .category = FIO_OPT_C_ENGINE,
163 .group = FIO_OPT_G_XNVME,
166 .name = "xnvme_iovec",
167 .lname = "Vectored IOs",
168 .type = FIO_OPT_STR_SET,
169 .off1 = offsetof(struct xnvme_fioe_options, xnvme_iovec),
170 .help = "Send vectored IOs",
171 .category = FIO_OPT_C_ENGINE,
172 .group = FIO_OPT_G_XNVME,
180 static void cb_pool(struct xnvme_cmd_ctx *ctx, void *cb_arg)
182 struct io_u *io_u = cb_arg;
183 struct xnvme_fioe_data *xd = io_u->mmap_data;
185 if (xnvme_cmd_ctx_cpl_status(ctx)) {
186 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
191 xd->iocq[xd->completed++] = io_u;
192 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
195 static struct xnvme_opts xnvme_opts_from_fioe(struct thread_data *td)
197 struct xnvme_fioe_options *o = td->eo;
198 struct xnvme_opts opts = xnvme_opts_default();
200 opts.nsid = o->xnvme_dev_nsid;
201 opts.subnqn = o->xnvme_dev_subnqn;
202 opts.be = o->xnvme_be;
203 opts.mem = o->xnvme_mem;
204 opts.async = o->xnvme_async;
205 opts.sync = o->xnvme_sync;
206 opts.admin = o->xnvme_admin;
208 opts.poll_io = o->hipri;
209 opts.poll_sq = o->sqpoll_thread;
211 opts.direct = td->o.odirect;
216 static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
219 xnvme_queue_term(fwrap->queue);
221 xnvme_dev_close(fwrap->dev);
223 memset(fwrap, 0, sizeof(*fwrap));
226 static void xnvme_fioe_cleanup(struct thread_data *td)
228 struct xnvme_fioe_data *xd = NULL;
231 if (!td->io_ops_data)
234 xd = td->io_ops_data;
236 err = pthread_mutex_lock(&g_serialize);
238 log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
239 /* NOTE: not returning here */
241 for (uint64_t i = 0; i < xd->nallocated; ++i)
242 _dev_close(td, &xd->files[i]);
245 err = pthread_mutex_unlock(&g_serialize);
247 log_err("ioeng->cleanup(): pthread_mutex_unlock(), err(%d)\n", err);
253 td->io_ops_data = NULL;
257 * Helper function setting up device handles as addressed by the naming
258 * convention of the given `fio_file` filename.
260 * Checks thread-options for explicit control of asynchronous implementation via
261 * the ``--xnvme_async={thrpool,emu,posix,io_uring,libaio,nil}``.
263 static int _dev_open(struct thread_data *td, struct fio_file *f)
265 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
266 struct xnvme_fioe_data *xd = td->io_ops_data;
267 struct xnvme_fioe_fwrap *fwrap;
271 if (f->fileno > (int)xd->nallocated) {
272 log_err("ioeng->_dev_open(%s): invalid assumption\n", f->file_name);
276 fwrap = &xd->files[f->fileno];
278 err = pthread_mutex_lock(&g_serialize);
280 log_err("ioeng->_dev_open(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
285 fwrap->dev = xnvme_dev_open(f->file_name, &opts);
287 log_err("ioeng->_dev_open(%s): xnvme_dev_open(), err(%d)\n", f->file_name, errno);
290 fwrap->geo = xnvme_dev_get_geo(fwrap->dev);
292 if (xnvme_queue_init(fwrap->dev, td->o.iodepth, flags, &(fwrap->queue))) {
293 log_err("ioeng->_dev_open(%s): xnvme_queue_init(), err(?)\n", f->file_name);
296 xnvme_queue_set_cb(fwrap->queue, cb_pool, NULL);
298 fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
299 fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
302 fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
303 fwrap->fio_file->real_file_size = fwrap->geo->tbytes;
304 fio_file_set_size_known(fwrap->fio_file);
306 err = pthread_mutex_unlock(&g_serialize);
308 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
314 xnvme_queue_term(fwrap->queue);
315 xnvme_dev_close(fwrap->dev);
317 err = pthread_mutex_unlock(&g_serialize);
319 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
325 static int xnvme_fioe_init(struct thread_data *td)
327 struct xnvme_fioe_data *xd = NULL;
328 struct xnvme_fioe_options *o = td->eo;
332 if (!td->o.use_thread) {
333 log_err("ioeng->init(): --thread=1 is required\n");
337 /* Allocate xd and iocq */
338 xd = calloc(1, sizeof(*xd) + sizeof(*xd->files) * td->o.nr_files);
340 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
344 xd->iocq = calloc(td->o.iodepth, sizeof(struct io_u *));
347 log_err("ioeng->init(): !calloc(xd->iocq), err(%d)\n", errno);
351 if (o->xnvme_iovec) {
352 xd->iovec = calloc(td->o.iodepth, sizeof(*xd->iovec));
356 log_err("ioeng->init(): !calloc(xd->iovec), err(%d)\n", errno);
362 td->io_ops_data = xd;
364 for_each_file(td, f, i)
366 if (_dev_open(td, f)) {
368 * Note: We are not freeing xd, iocq and iovec. This
369 * will be done as part of cleanup routine.
371 log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
378 if (xd->nallocated != td->o.nr_files) {
379 log_err("ioeng->init(): failed; nallocated != td->o.nr_files\n");
386 /* NOTE: using the first device for buffer-allocators) */
387 static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
389 struct xnvme_fioe_data *xd = td->io_ops_data;
390 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
393 log_err("ioeng->iomem_alloc(): failed; no dev-handle\n");
397 td->orig_buffer = xnvme_buf_alloc(fwrap->dev, total_mem);
399 return td->orig_buffer == NULL;
402 /* NOTE: using the first device for buffer-allocators) */
403 static void xnvme_fioe_iomem_free(struct thread_data *td)
405 struct xnvme_fioe_data *xd = NULL;
406 struct xnvme_fioe_fwrap *fwrap = NULL;
408 if (!td->io_ops_data)
411 xd = td->io_ops_data;
412 fwrap = &xd->files[0];
415 log_err("ioeng->iomem_free(): failed no dev-handle\n");
419 xnvme_buf_free(fwrap->dev, td->orig_buffer);
422 static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
424 io_u->mmap_data = td->io_ops_data;
429 static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
431 io_u->mmap_data = NULL;
434 static struct io_u *xnvme_fioe_event(struct thread_data *td, int event)
436 struct xnvme_fioe_data *xd = td->io_ops_data;
439 assert((unsigned)event < xd->completed);
441 return xd->iocq[event];
444 static int xnvme_fioe_getevents(struct thread_data *td, unsigned int min, unsigned int max,
445 const struct timespec *t)
447 struct xnvme_fioe_data *xd = td->io_ops_data;
448 struct xnvme_fioe_fwrap *fwrap = NULL;
449 int nfiles = xd->nallocated;
452 if (xd->prev != -1 && ++xd->prev < nfiles) {
453 fwrap = &xd->files[xd->prev];
459 if (fwrap == NULL || xd->cur == nfiles) {
460 fwrap = &xd->files[0];
464 while (fwrap != NULL && xd->cur < nfiles && err >= 0) {
465 err = xnvme_queue_poke(fwrap->queue, max - xd->completed);
474 log_err("ioeng->getevents(): unhandled IO error\n");
479 if (xd->completed >= min) {
481 return xd->completed;
484 fwrap = &xd->files[xd->cur];
499 return xd->completed;
502 static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *io_u)
504 struct xnvme_fioe_data *xd = td->io_ops_data;
505 struct xnvme_fioe_fwrap *fwrap;
506 struct xnvme_cmd_ctx *ctx;
511 bool vectored_io = ((struct xnvme_fioe_options *)td->eo)->xnvme_iovec;
512 uint32_t dir = io_u->dtype;
514 fio_ro_check(td, io_u);
516 fwrap = &xd->files[io_u->file->fileno];
517 nsid = xnvme_dev_get_nsid(fwrap->dev);
519 slba = io_u->offset >> fwrap->ssw;
520 nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
522 ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
523 ctx->async.cb_arg = io_u;
525 ctx->cmd.common.nsid = nsid;
526 ctx->cmd.nvm.slba = slba;
527 ctx->cmd.nvm.nlb = nlb;
529 ctx->cmd.nvm.dtype = io_u->dtype;
530 ctx->cmd.nvm.cdw13.dspec = io_u->dspec;
533 switch (io_u->ddir) {
535 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
539 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
543 log_err("ioeng->queue(): ENOSYS: %u\n", io_u->ddir);
544 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
546 io_u->error = ENOSYS;
548 return FIO_Q_COMPLETED;
552 xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
553 xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
555 err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
558 err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
566 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
570 log_err("ioeng->queue(): err: '%d'\n", err);
572 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
574 io_u->error = abs(err);
576 return FIO_Q_COMPLETED;
580 static int xnvme_fioe_close(struct thread_data *td, struct fio_file *f)
582 struct xnvme_fioe_data *xd = td->io_ops_data;
584 dprint(FD_FILE, "xnvme close %s -- nopen: %ld\n", f->file_name, xd->nopen);
591 static int xnvme_fioe_open(struct thread_data *td, struct fio_file *f)
593 struct xnvme_fioe_data *xd = td->io_ops_data;
595 dprint(FD_FILE, "xnvme open %s -- nopen: %ld\n", f->file_name, xd->nopen);
597 if (f->fileno > (int)xd->nallocated) {
598 log_err("ioeng->open(): f->fileno > xd->nallocated; invalid assumption\n");
601 if (xd->files[f->fileno].fio_file != f) {
602 log_err("ioeng->open(): fio_file != f; invalid assumption\n");
611 static int xnvme_fioe_invalidate(struct thread_data *td, struct fio_file *f)
613 /* Consider only doing this with be:spdk */
617 static int xnvme_fioe_get_max_open_zones(struct thread_data *td, struct fio_file *f,
618 unsigned int *max_open_zones)
620 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
621 struct xnvme_dev *dev;
622 const struct xnvme_spec_znd_idfy_ns *zns;
623 int err = 0, err_lock;
625 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
626 f->filetype != FIO_TYPE_CHAR) {
627 log_info("ioeng->get_max_open_zoned(): ignoring filetype: %d\n", f->filetype);
630 err_lock = pthread_mutex_lock(&g_serialize);
632 log_err("ioeng->get_max_open_zones(): pthread_mutex_lock(), err(%d)\n", err_lock);
636 dev = xnvme_dev_open(f->file_name, &opts);
638 log_err("ioeng->get_max_open_zones(): xnvme_dev_open(), err(%d)\n", err_lock);
642 if (xnvme_dev_get_geo(dev)->type != XNVME_GEO_ZONED) {
648 zns = (void *)xnvme_dev_get_ns_css(dev);
650 log_err("ioeng->get_max_open_zones(): xnvme_dev_get_ns_css(), err(%d)\n", errno);
656 * intentional overflow as the value is zero-based and NVMe
657 * defines 0xFFFFFFFF as unlimited thus overflowing to 0 which
658 * is how fio indicates unlimited and otherwise just converting
661 *max_open_zones = zns->mor + 1;
664 xnvme_dev_close(dev);
665 err_lock = pthread_mutex_unlock(&g_serialize);
667 log_err("ioeng->get_max_open_zones(): pthread_mutex_unlock(), err(%d)\n",
674 * Currently, this function is called before of I/O engine initialization, so,
675 * we cannot consult the file-wrapping done when 'fioe' initializes.
676 * Instead we just open based on the given filename.
678 * TODO: unify the different setup methods, consider keeping the handle around,
679 * and consider how to support the --be option in this usecase
681 static int xnvme_fioe_get_zoned_model(struct thread_data *td, struct fio_file *f,
682 enum zbd_zoned_model *model)
684 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
685 struct xnvme_dev *dev;
686 int err = 0, err_lock;
688 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
689 f->filetype != FIO_TYPE_CHAR) {
690 log_info("ioeng->get_zoned_model(): ignoring filetype: %d\n", f->filetype);
694 err = pthread_mutex_lock(&g_serialize);
696 log_err("ioeng->get_zoned_model(): pthread_mutex_lock(), err(%d)\n", err);
700 dev = xnvme_dev_open(f->file_name, &opts);
702 log_err("ioeng->get_zoned_model(): xnvme_dev_open(%s) failed, errno: %d\n",
703 f->file_name, errno);
708 switch (xnvme_dev_get_geo(dev)->type) {
709 case XNVME_GEO_UNKNOWN:
710 dprint(FD_ZBD, "%s: got 'unknown', assigning ZBD_NONE\n", f->file_name);
714 case XNVME_GEO_CONVENTIONAL:
715 dprint(FD_ZBD, "%s: got 'conventional', assigning ZBD_NONE\n", f->file_name);
719 case XNVME_GEO_ZONED:
720 dprint(FD_ZBD, "%s: got 'zoned', assigning ZBD_HOST_MANAGED\n", f->file_name);
721 *model = ZBD_HOST_MANAGED;
725 dprint(FD_ZBD, "%s: hit-default, assigning ZBD_NONE\n", f->file_name);
733 xnvme_dev_close(dev);
735 err_lock = pthread_mutex_unlock(&g_serialize);
737 log_err("ioeng->get_zoned_model(): pthread_mutex_unlock(), err(%d)\n", err_lock);
743 * Fills the given ``zbdz`` with at most ``nr_zones`` zone-descriptors.
745 * The implementation converts the NVMe Zoned Command Set log-pages for Zone
746 * descriptors into the Linux Kernel Zoned Block Report format.
748 * NOTE: This function is called before I/O engine initialization, that is,
749 * before ``_dev_open`` has been called and file-wrapping is setup. Thus is has
750 * to do the ``_dev_open`` itself, and shut it down again once it is done
751 * retrieving the log-pages and converting them to the report format.
753 * TODO: unify the different setup methods, consider keeping the handle around,
754 * and consider how to support the --async option in this usecase
756 static int xnvme_fioe_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
757 struct zbd_zone *zbdz, unsigned int nr_zones)
759 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
760 const struct xnvme_spec_znd_idfy_lbafe *lbafe = NULL;
761 struct xnvme_dev *dev = NULL;
762 const struct xnvme_geo *geo = NULL;
763 struct xnvme_znd_report *rprt = NULL;
766 unsigned int limit = 0;
767 int err = 0, err_lock;
769 dprint(FD_ZBD, "%s: report_zones() offset: %zu, nr_zones: %u\n", f->file_name, offset,
772 err = pthread_mutex_lock(&g_serialize);
774 log_err("ioeng->report_zones(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
779 dev = xnvme_dev_open(f->file_name, &opts);
781 log_err("ioeng->report_zones(%s): xnvme_dev_open(), err(%d)\n", f->file_name,
786 geo = xnvme_dev_get_geo(dev);
787 ssw = xnvme_dev_get_ssw(dev);
788 lbafe = xnvme_znd_dev_get_lbafe(dev);
790 limit = nr_zones > geo->nzone ? geo->nzone : nr_zones;
792 dprint(FD_ZBD, "%s: limit: %u\n", f->file_name, limit);
794 slba = ((offset >> ssw) / geo->nsect) * geo->nsect;
796 rprt = xnvme_znd_report_from_dev(dev, slba, limit, 0);
798 log_err("ioeng->report_zones(%s): xnvme_znd_report_from_dev(), err(%d)\n",
799 f->file_name, errno);
803 if (rprt->nentries != limit) {
804 log_err("ioeng->report_zones(%s): nentries != nr_zones\n", f->file_name);
808 if (offset > geo->tbytes) {
809 log_err("ioeng->report_zones(%s): out-of-bounds\n", f->file_name);
813 /* Transform the zone-report */
814 for (uint32_t idx = 0; idx < rprt->nentries; ++idx) {
815 struct xnvme_spec_znd_descr *descr = XNVME_ZND_REPORT_DESCR(rprt, idx);
817 zbdz[idx].start = descr->zslba << ssw;
818 zbdz[idx].len = lbafe->zsze << ssw;
819 zbdz[idx].capacity = descr->zcap << ssw;
820 zbdz[idx].wp = descr->wp << ssw;
823 case XNVME_SPEC_ZND_TYPE_SEQWR:
824 zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
828 log_err("ioeng->report_zones(%s): invalid type for zone at offset(%zu)\n",
829 f->file_name, zbdz[idx].start);
835 case XNVME_SPEC_ZND_STATE_EMPTY:
836 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
838 case XNVME_SPEC_ZND_STATE_IOPEN:
839 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
841 case XNVME_SPEC_ZND_STATE_EOPEN:
842 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
844 case XNVME_SPEC_ZND_STATE_CLOSED:
845 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
847 case XNVME_SPEC_ZND_STATE_FULL:
848 zbdz[idx].cond = ZBD_ZONE_COND_FULL;
851 case XNVME_SPEC_ZND_STATE_RONLY:
852 case XNVME_SPEC_ZND_STATE_OFFLINE:
854 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
860 xnvme_buf_virt_free(rprt);
862 xnvme_dev_close(dev);
864 err_lock = pthread_mutex_unlock(&g_serialize);
866 log_err("ioeng->report_zones(): pthread_mutex_unlock(), err: %d\n", err_lock);
868 dprint(FD_ZBD, "err: %d, nr_zones: %d\n", err, (int)nr_zones);
870 return err ? err : (int)limit;
874 * NOTE: This function may get called before I/O engine initialization, that is,
875 * before ``_dev_open`` has been called and file-wrapping is setup. In such
876 * case it has to do ``_dev_open`` itself, and shut it down again once it is
877 * done resetting write pointer of zones.
879 static int xnvme_fioe_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset,
882 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
883 struct xnvme_fioe_data *xd = NULL;
884 struct xnvme_fioe_fwrap *fwrap = NULL;
885 struct xnvme_dev *dev = NULL;
886 const struct xnvme_geo *geo = NULL;
887 uint64_t first, last;
890 int err = 0, err_lock;
892 if (td->io_ops_data) {
893 xd = td->io_ops_data;
894 fwrap = &xd->files[f->fileno];
903 err = pthread_mutex_lock(&g_serialize);
905 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", err);
909 dev = xnvme_dev_open(f->file_name, &opts);
911 log_err("ioeng->reset_wp(): xnvme_dev_open(%s) failed, errno(%d)\n",
912 f->file_name, errno);
915 geo = xnvme_dev_get_geo(dev);
916 ssw = xnvme_dev_get_ssw(dev);
919 nsid = xnvme_dev_get_nsid(dev);
921 first = ((offset >> ssw) / geo->nsect) * geo->nsect;
922 last = (((offset + length) >> ssw) / geo->nsect) * geo->nsect;
923 dprint(FD_ZBD, "first: 0x%lx, last: 0x%lx\n", first, last);
925 for (uint64_t zslba = first; zslba < last; zslba += geo->nsect) {
926 struct xnvme_cmd_ctx ctx = xnvme_cmd_ctx_from_dev(dev);
928 if (zslba >= (geo->nsect * geo->nzone)) {
929 log_err("ioeng->reset_wp(): out-of-bounds\n");
934 err = xnvme_znd_mgmt_send(&ctx, nsid, zslba, false,
935 XNVME_SPEC_ZND_CMD_MGMT_SEND_RESET, 0x0, NULL);
936 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
937 err = err ? err : -EIO;
938 log_err("ioeng->reset_wp(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
944 if (!td->io_ops_data) {
945 xnvme_dev_close(dev);
947 err_lock = pthread_mutex_unlock(&g_serialize);
949 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err_lock);
955 static int xnvme_fioe_fetch_ruhs(struct thread_data *td, struct fio_file *f,
956 struct fio_ruhs_info *fruhs_info)
958 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
959 struct xnvme_dev *dev;
960 struct xnvme_spec_ruhs *ruhs;
961 struct xnvme_cmd_ctx ctx;
962 uint32_t ruhs_nbytes;
964 int err = 0, err_lock;
966 if (f->filetype != FIO_TYPE_CHAR && f->filetype != FIO_TYPE_FILE) {
967 log_err("ioeng->fdp_ruhs(): ignoring filetype: %d\n", f->filetype);
971 err = pthread_mutex_lock(&g_serialize);
973 log_err("ioeng->fdp_ruhs(): pthread_mutex_lock(), err(%d)\n", err);
977 dev = xnvme_dev_open(f->file_name, &opts);
979 log_err("ioeng->fdp_ruhs(): xnvme_dev_open(%s) failed, errno: %d\n",
980 f->file_name, errno);
985 ruhs_nbytes = sizeof(*ruhs) + (FDP_MAX_RUHS * sizeof(struct xnvme_spec_ruhs_desc));
986 ruhs = xnvme_buf_alloc(dev, ruhs_nbytes);
991 memset(ruhs, 0, ruhs_nbytes);
993 ctx = xnvme_cmd_ctx_from_dev(dev);
994 nsid = xnvme_dev_get_nsid(dev);
996 err = xnvme_nvm_mgmt_recv(&ctx, nsid, XNVME_SPEC_IO_MGMT_RECV_RUHS, 0, ruhs, ruhs_nbytes);
998 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
999 err = err ? err : -EIO;
1000 log_err("ioeng->fdp_ruhs(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
1004 fruhs_info->nr_ruhs = ruhs->nruhsd;
1005 for (uint32_t idx = 0; idx < fruhs_info->nr_ruhs; ++idx) {
1006 fruhs_info->plis[idx] = le16_to_cpu(ruhs->desc[idx].pi);
1010 xnvme_buf_free(dev, ruhs);
1012 xnvme_dev_close(dev);
1014 err_lock = pthread_mutex_unlock(&g_serialize);
1016 log_err("ioeng->fdp_ruhs(): pthread_mutex_unlock(), err(%d)\n", err_lock);
1021 static int xnvme_fioe_get_file_size(struct thread_data *td, struct fio_file *f)
1023 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
1024 struct xnvme_dev *dev;
1027 if (fio_file_size_known(f))
1030 ret = pthread_mutex_lock(&g_serialize);
1032 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", ret);
1036 dev = xnvme_dev_open(f->file_name, &opts);
1038 log_err("%s: failed retrieving device handle, errno: %d\n", f->file_name, errno);
1043 f->real_file_size = xnvme_dev_get_geo(dev)->tbytes;
1044 fio_file_set_size_known(f);
1046 if (td->o.zone_mode == ZONE_MODE_ZBD)
1047 f->filetype = FIO_TYPE_BLOCK;
1050 xnvme_dev_close(dev);
1051 err = pthread_mutex_unlock(&g_serialize);
1053 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err);
1058 FIO_STATIC struct ioengine_ops ioengine = {
1060 .version = FIO_IOOPS_VERSION,
1062 .option_struct_size = sizeof(struct xnvme_fioe_options),
1063 .flags = FIO_DISKLESSIO | FIO_NODISKUTIL | FIO_NOEXTEND | FIO_MEMALIGN | FIO_RAWIO,
1065 .cleanup = xnvme_fioe_cleanup,
1066 .init = xnvme_fioe_init,
1068 .iomem_free = xnvme_fioe_iomem_free,
1069 .iomem_alloc = xnvme_fioe_iomem_alloc,
1071 .io_u_free = xnvme_fioe_io_u_free,
1072 .io_u_init = xnvme_fioe_io_u_init,
1074 .event = xnvme_fioe_event,
1075 .getevents = xnvme_fioe_getevents,
1076 .queue = xnvme_fioe_queue,
1078 .close_file = xnvme_fioe_close,
1079 .open_file = xnvme_fioe_open,
1080 .get_file_size = xnvme_fioe_get_file_size,
1082 .invalidate = xnvme_fioe_invalidate,
1083 .get_max_open_zones = xnvme_fioe_get_max_open_zones,
1084 .get_zoned_model = xnvme_fioe_get_zoned_model,
1085 .report_zones = xnvme_fioe_report_zones,
1086 .reset_wp = xnvme_fioe_reset_wp,
1088 .fdp_fetch_ruhs = xnvme_fioe_fetch_ruhs,
1091 static void fio_init fio_xnvme_register(void)
1093 register_ioengine(&ioengine);
1096 static void fio_exit fio_xnvme_unregister(void)
1098 unregister_ioengine(&ioengine);