engines/xnvme: add subnqn to fio-options
[fio.git] / engines / xnvme.c
CommitLineData
a3ff873e
AK
1/*
2 * fio xNVMe IO Engine
3 *
4 * IO engine using the xNVMe C API.
5 *
6 * See: http://xnvme.io/
7 *
8 * SPDX-License-Identifier: Apache-2.0
9 */
10#include <stdlib.h>
11#include <assert.h>
12#include <libxnvme.h>
13#include <libxnvme_libconf.h>
14#include <libxnvme_nvm.h>
15#include <libxnvme_znd.h>
16#include <libxnvme_spec_fs.h>
17#include "fio.h"
18#include "zbd_types.h"
19#include "optgroup.h"
20
21static pthread_mutex_t g_serialize = PTHREAD_MUTEX_INITIALIZER;
22
23struct xnvme_fioe_fwrap {
24 /* fio file representation */
25 struct fio_file *fio_file;
26
27 /* xNVMe device handle */
28 struct xnvme_dev *dev;
29 /* xNVMe device geometry */
30 const struct xnvme_geo *geo;
31
32 struct xnvme_queue *queue;
33
34 uint32_t ssw;
35 uint32_t lba_nbytes;
36
37 uint8_t _pad[24];
38};
39XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
40
41struct xnvme_fioe_data {
42 /* I/O completion queue */
43 struct io_u **iocq;
44
45 /* # of iocq entries; incremented via getevents()/cb_pool() */
46 uint64_t completed;
47
48 /*
49 * # of errors; incremented when observed on completion via
50 * getevents()/cb_pool()
51 */
52 uint64_t ecount;
53
54 /* Controller which device/file to select */
55 int32_t prev;
56 int32_t cur;
57
58 /* Number of devices/files for which open() has been called */
59 int64_t nopen;
60 /* Number of devices/files allocated in files[] */
61 uint64_t nallocated;
62
63 struct iovec *iovec;
64
65 uint8_t _pad[8];
66
67 struct xnvme_fioe_fwrap files[];
68};
69XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
70
71struct xnvme_fioe_options {
72 void *padding;
73 unsigned int hipri;
74 unsigned int sqpoll_thread;
75 unsigned int xnvme_dev_nsid;
76 unsigned int xnvme_iovec;
77 char *xnvme_be;
78 char *xnvme_async;
79 char *xnvme_sync;
80 char *xnvme_admin;
efbafe2a 81 char *xnvme_dev_subnqn;
a3ff873e
AK
82};
83
84static struct fio_option options[] = {
85 {
86 .name = "hipri",
87 .lname = "High Priority",
88 .type = FIO_OPT_STR_SET,
89 .off1 = offsetof(struct xnvme_fioe_options, hipri),
90 .help = "Use polled IO completions",
91 .category = FIO_OPT_C_ENGINE,
92 .group = FIO_OPT_G_XNVME,
93 },
94 {
95 .name = "sqthread_poll",
96 .lname = "Kernel SQ thread polling",
97 .type = FIO_OPT_STR_SET,
98 .off1 = offsetof(struct xnvme_fioe_options, sqpoll_thread),
99 .help = "Offload submission/completion to kernel thread",
100 .category = FIO_OPT_C_ENGINE,
101 .group = FIO_OPT_G_XNVME,
102 },
103 {
104 .name = "xnvme_be",
105 .lname = "xNVMe Backend",
106 .type = FIO_OPT_STR_STORE,
107 .off1 = offsetof(struct xnvme_fioe_options, xnvme_be),
108 .help = "Select xNVMe backend [spdk,linux,fbsd]",
109 .category = FIO_OPT_C_ENGINE,
110 .group = FIO_OPT_G_XNVME,
111 },
112 {
113 .name = "xnvme_async",
114 .lname = "xNVMe Asynchronous command-interface",
115 .type = FIO_OPT_STR_STORE,
116 .off1 = offsetof(struct xnvme_fioe_options, xnvme_async),
203a4c7c
AK
117 .help = "Select xNVMe async. interface: "
118 "[emu,thrpool,io_uring,io_uring_cmd,libaio,posix,vfio,nil]",
a3ff873e
AK
119 .category = FIO_OPT_C_ENGINE,
120 .group = FIO_OPT_G_XNVME,
121 },
122 {
123 .name = "xnvme_sync",
124 .lname = "xNVMe Synchronous. command-interface",
125 .type = FIO_OPT_STR_STORE,
126 .off1 = offsetof(struct xnvme_fioe_options, xnvme_sync),
203a4c7c 127 .help = "Select xNVMe sync. interface: [nvme,psync,block]",
a3ff873e
AK
128 .category = FIO_OPT_C_ENGINE,
129 .group = FIO_OPT_G_XNVME,
130 },
131 {
132 .name = "xnvme_admin",
133 .lname = "xNVMe Admin command-interface",
134 .type = FIO_OPT_STR_STORE,
135 .off1 = offsetof(struct xnvme_fioe_options, xnvme_admin),
203a4c7c 136 .help = "Select xNVMe admin. cmd-interface: [nvme,block]",
a3ff873e
AK
137 .category = FIO_OPT_C_ENGINE,
138 .group = FIO_OPT_G_XNVME,
139 },
140 {
141 .name = "xnvme_dev_nsid",
142 .lname = "xNVMe Namespace-Identifier, for user-space NVMe driver",
143 .type = FIO_OPT_INT,
144 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_nsid),
145 .help = "xNVMe Namespace-Identifier, for user-space NVMe driver",
146 .category = FIO_OPT_C_ENGINE,
147 .group = FIO_OPT_G_XNVME,
148 },
efbafe2a
AK
149 {
150 .name = "xnvme_dev_subnqn",
151 .lname = "Subsystem nqn for Fabrics",
152 .type = FIO_OPT_STR_STORE,
153 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_subnqn),
154 .help = "Subsystem NQN for Fabrics",
155 .category = FIO_OPT_C_ENGINE,
156 .group = FIO_OPT_G_XNVME,
157 },
a3ff873e
AK
158 {
159 .name = "xnvme_iovec",
160 .lname = "Vectored IOs",
161 .type = FIO_OPT_STR_SET,
162 .off1 = offsetof(struct xnvme_fioe_options, xnvme_iovec),
163 .help = "Send vectored IOs",
164 .category = FIO_OPT_C_ENGINE,
165 .group = FIO_OPT_G_XNVME,
166 },
167
168 {
169 .name = NULL,
170 },
171};
172
173static void cb_pool(struct xnvme_cmd_ctx *ctx, void *cb_arg)
174{
175 struct io_u *io_u = cb_arg;
176 struct xnvme_fioe_data *xd = io_u->mmap_data;
177
178 if (xnvme_cmd_ctx_cpl_status(ctx)) {
179 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
180 xd->ecount += 1;
181 io_u->error = EIO;
182 }
183
184 xd->iocq[xd->completed++] = io_u;
185 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
186}
187
188static struct xnvme_opts xnvme_opts_from_fioe(struct thread_data *td)
189{
190 struct xnvme_fioe_options *o = td->eo;
191 struct xnvme_opts opts = xnvme_opts_default();
192
193 opts.nsid = o->xnvme_dev_nsid;
efbafe2a 194 opts.subnqn = o->xnvme_dev_subnqn;
a3ff873e
AK
195 opts.be = o->xnvme_be;
196 opts.async = o->xnvme_async;
197 opts.sync = o->xnvme_sync;
198 opts.admin = o->xnvme_admin;
199
200 opts.poll_io = o->hipri;
201 opts.poll_sq = o->sqpoll_thread;
202
203 opts.direct = td->o.odirect;
204
205 return opts;
206}
207
208static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
209{
210 if (fwrap->dev)
211 xnvme_queue_term(fwrap->queue);
212
213 xnvme_dev_close(fwrap->dev);
214
215 memset(fwrap, 0, sizeof(*fwrap));
216}
217
218static void xnvme_fioe_cleanup(struct thread_data *td)
219{
fdac9c68 220 struct xnvme_fioe_data *xd = NULL;
a3ff873e
AK
221 int err;
222
fdac9c68
AK
223 if (!td->io_ops_data)
224 return;
225
226 xd = td->io_ops_data;
227
a3ff873e
AK
228 err = pthread_mutex_lock(&g_serialize);
229 if (err)
230 log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
231 /* NOTE: not returning here */
232
233 for (uint64_t i = 0; i < xd->nallocated; ++i)
234 _dev_close(td, &xd->files[i]);
235
236 if (!err) {
237 err = pthread_mutex_unlock(&g_serialize);
238 if (err)
239 log_err("ioeng->cleanup(): pthread_mutex_unlock(), err(%d)\n", err);
240 }
241
242 free(xd->iocq);
243 free(xd->iovec);
244 free(xd);
245 td->io_ops_data = NULL;
246}
247
248/**
249 * Helper function setting up device handles as addressed by the naming
250 * convention of the given `fio_file` filename.
251 *
252 * Checks thread-options for explicit control of asynchronous implementation via
253 * the ``--xnvme_async={thrpool,emu,posix,io_uring,libaio,nil}``.
254 */
255static int _dev_open(struct thread_data *td, struct fio_file *f)
256{
257 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
258 struct xnvme_fioe_data *xd = td->io_ops_data;
259 struct xnvme_fioe_fwrap *fwrap;
260 int flags = 0;
261 int err;
262
263 if (f->fileno > (int)xd->nallocated) {
264 log_err("ioeng->_dev_open(%s): invalid assumption\n", f->file_name);
265 return 1;
266 }
267
268 fwrap = &xd->files[f->fileno];
269
270 err = pthread_mutex_lock(&g_serialize);
271 if (err) {
272 log_err("ioeng->_dev_open(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
273 err);
274 return -err;
275 }
276
277 fwrap->dev = xnvme_dev_open(f->file_name, &opts);
278 if (!fwrap->dev) {
279 log_err("ioeng->_dev_open(%s): xnvme_dev_open(), err(%d)\n", f->file_name, errno);
280 goto failure;
281 }
282 fwrap->geo = xnvme_dev_get_geo(fwrap->dev);
283
284 if (xnvme_queue_init(fwrap->dev, td->o.iodepth, flags, &(fwrap->queue))) {
285 log_err("ioeng->_dev_open(%s): xnvme_queue_init(), err(?)\n", f->file_name);
286 goto failure;
287 }
288 xnvme_queue_set_cb(fwrap->queue, cb_pool, NULL);
289
290 fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
291 fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
292
293 fwrap->fio_file = f;
294 fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
295 fwrap->fio_file->real_file_size = fwrap->geo->tbytes;
296 fio_file_set_size_known(fwrap->fio_file);
297
298 err = pthread_mutex_unlock(&g_serialize);
299 if (err)
300 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
301 err);
302
303 return 0;
304
305failure:
306 xnvme_queue_term(fwrap->queue);
307 xnvme_dev_close(fwrap->dev);
308
309 err = pthread_mutex_unlock(&g_serialize);
310 if (err)
311 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
312 err);
313
314 return 1;
315}
316
317static int xnvme_fioe_init(struct thread_data *td)
318{
319 struct xnvme_fioe_data *xd = NULL;
320 struct fio_file *f;
321 unsigned int i;
322
323 if (!td->o.use_thread) {
324 log_err("ioeng->init(): --thread=1 is required\n");
325 return 1;
326 }
327
328 /* Allocate xd and iocq */
329 xd = calloc(1, sizeof(*xd) + sizeof(*xd->files) * td->o.nr_files);
330 if (!xd) {
331 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
332 return 1;
333 }
334
335 xd->iocq = calloc(td->o.iodepth, sizeof(struct io_u *));
336 if (!xd->iocq) {
eb3570b5
AK
337 free(xd);
338 log_err("ioeng->init(): !calloc(xd->iocq), err(%d)\n", errno);
a3ff873e
AK
339 return 1;
340 }
341
342 xd->iovec = calloc(td->o.iodepth, sizeof(*xd->iovec));
343 if (!xd->iovec) {
eb3570b5
AK
344 free(xd->iocq);
345 free(xd);
a3ff873e
AK
346 log_err("ioeng->init(): !calloc(xd->iovec), err(%d)\n", errno);
347 return 1;
348 }
349
350 xd->prev = -1;
351 td->io_ops_data = xd;
352
353 for_each_file(td, f, i)
354 {
355 if (_dev_open(td, f)) {
eb3570b5
AK
356 /*
357 * Note: We are not freeing xd, iocq and iovec. This
358 * will be done as part of cleanup routine.
359 */
a3ff873e
AK
360 log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
361 return 1;
362 }
363
364 ++(xd->nallocated);
365 }
366
367 if (xd->nallocated != td->o.nr_files) {
368 log_err("ioeng->init(): failed; nallocated != td->o.nr_files\n");
369 return 1;
370 }
371
372 return 0;
373}
374
375/* NOTE: using the first device for buffer-allocators) */
376static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
377{
378 struct xnvme_fioe_data *xd = td->io_ops_data;
379 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
380
381 if (!fwrap->dev) {
382 log_err("ioeng->iomem_alloc(): failed; no dev-handle\n");
383 return 1;
384 }
385
386 td->orig_buffer = xnvme_buf_alloc(fwrap->dev, total_mem);
387
388 return td->orig_buffer == NULL;
389}
390
391/* NOTE: using the first device for buffer-allocators) */
392static void xnvme_fioe_iomem_free(struct thread_data *td)
393{
fdac9c68
AK
394 struct xnvme_fioe_data *xd = NULL;
395 struct xnvme_fioe_fwrap *fwrap = NULL;
396
397 if (!td->io_ops_data)
398 return;
399
400 xd = td->io_ops_data;
401 fwrap = &xd->files[0];
a3ff873e
AK
402
403 if (!fwrap->dev) {
404 log_err("ioeng->iomem_free(): failed no dev-handle\n");
405 return;
406 }
407
408 xnvme_buf_free(fwrap->dev, td->orig_buffer);
409}
410
411static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
412{
413 io_u->mmap_data = td->io_ops_data;
414
415 return 0;
416}
417
418static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
419{
420 io_u->mmap_data = NULL;
421}
422
423static struct io_u *xnvme_fioe_event(struct thread_data *td, int event)
424{
425 struct xnvme_fioe_data *xd = td->io_ops_data;
426
427 assert(event >= 0);
428 assert((unsigned)event < xd->completed);
429
430 return xd->iocq[event];
431}
432
433static int xnvme_fioe_getevents(struct thread_data *td, unsigned int min, unsigned int max,
434 const struct timespec *t)
435{
436 struct xnvme_fioe_data *xd = td->io_ops_data;
437 struct xnvme_fioe_fwrap *fwrap = NULL;
438 int nfiles = xd->nallocated;
439 int err = 0;
440
441 if (xd->prev != -1 && ++xd->prev < nfiles) {
442 fwrap = &xd->files[xd->prev];
443 xd->cur = xd->prev;
444 }
445
446 xd->completed = 0;
447 for (;;) {
448 if (fwrap == NULL || xd->cur == nfiles) {
449 fwrap = &xd->files[0];
450 xd->cur = 0;
451 }
452
453 while (fwrap != NULL && xd->cur < nfiles && err >= 0) {
454 err = xnvme_queue_poke(fwrap->queue, max - xd->completed);
455 if (err < 0) {
456 switch (err) {
457 case -EBUSY:
458 case -EAGAIN:
459 usleep(1);
460 break;
461
462 default:
463 log_err("ioeng->getevents(): unhandled IO error\n");
464 assert(false);
465 return 0;
466 }
467 }
468 if (xd->completed >= min) {
469 xd->prev = xd->cur;
470 return xd->completed;
471 }
472 xd->cur++;
473 fwrap = &xd->files[xd->cur];
474
475 if (err < 0) {
476 switch (err) {
477 case -EBUSY:
478 case -EAGAIN:
479 usleep(1);
480 break;
481 }
482 }
483 }
484 }
485
486 xd->cur = 0;
487
488 return xd->completed;
489}
490
491static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *io_u)
492{
493 struct xnvme_fioe_data *xd = td->io_ops_data;
494 struct xnvme_fioe_fwrap *fwrap;
495 struct xnvme_cmd_ctx *ctx;
496 uint32_t nsid;
497 uint64_t slba;
498 uint16_t nlb;
499 int err;
500 bool vectored_io = ((struct xnvme_fioe_options *)td->eo)->xnvme_iovec;
501
502 fio_ro_check(td, io_u);
503
504 fwrap = &xd->files[io_u->file->fileno];
505 nsid = xnvme_dev_get_nsid(fwrap->dev);
506
507 slba = io_u->offset >> fwrap->ssw;
508 nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
509
510 ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
511 ctx->async.cb_arg = io_u;
512
513 ctx->cmd.common.nsid = nsid;
514 ctx->cmd.nvm.slba = slba;
515 ctx->cmd.nvm.nlb = nlb;
516
517 switch (io_u->ddir) {
518 case DDIR_READ:
519 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
520 break;
521
522 case DDIR_WRITE:
523 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
524 break;
525
526 default:
527 log_err("ioeng->queue(): ENOSYS: %u\n", io_u->ddir);
eb3570b5
AK
528 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
529
530 io_u->error = ENOSYS;
a3ff873e 531 assert(false);
eb3570b5 532 return FIO_Q_COMPLETED;
a3ff873e
AK
533 }
534
535 if (vectored_io) {
536 xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
537 xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
538
539 err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
540 0);
541 } else {
542 err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
543 }
544 switch (err) {
545 case 0:
546 return FIO_Q_QUEUED;
547
548 case -EBUSY:
549 case -EAGAIN:
550 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
551 return FIO_Q_BUSY;
552
553 default:
554 log_err("ioeng->queue(): err: '%d'\n", err);
555
556 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
557
558 io_u->error = abs(err);
559 assert(false);
560 return FIO_Q_COMPLETED;
561 }
562}
563
564static int xnvme_fioe_close(struct thread_data *td, struct fio_file *f)
565{
566 struct xnvme_fioe_data *xd = td->io_ops_data;
567
568 dprint(FD_FILE, "xnvme close %s -- nopen: %ld\n", f->file_name, xd->nopen);
569
570 --(xd->nopen);
571
572 return 0;
573}
574
575static int xnvme_fioe_open(struct thread_data *td, struct fio_file *f)
576{
577 struct xnvme_fioe_data *xd = td->io_ops_data;
578
579 dprint(FD_FILE, "xnvme open %s -- nopen: %ld\n", f->file_name, xd->nopen);
580
581 if (f->fileno > (int)xd->nallocated) {
582 log_err("ioeng->open(): f->fileno > xd->nallocated; invalid assumption\n");
583 return 1;
584 }
585 if (xd->files[f->fileno].fio_file != f) {
586 log_err("ioeng->open(): fio_file != f; invalid assumption\n");
587 return 1;
588 }
589
590 ++(xd->nopen);
591
592 return 0;
593}
594
595static int xnvme_fioe_invalidate(struct thread_data *td, struct fio_file *f)
596{
597 /* Consider only doing this with be:spdk */
598 return 0;
599}
600
601static int xnvme_fioe_get_max_open_zones(struct thread_data *td, struct fio_file *f,
602 unsigned int *max_open_zones)
603{
604 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
605 struct xnvme_dev *dev;
606 const struct xnvme_spec_znd_idfy_ns *zns;
607 int err = 0, err_lock;
608
609 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
610 f->filetype != FIO_TYPE_CHAR) {
611 log_info("ioeng->get_max_open_zoned(): ignoring filetype: %d\n", f->filetype);
612 return 0;
613 }
614 err_lock = pthread_mutex_lock(&g_serialize);
615 if (err_lock) {
616 log_err("ioeng->get_max_open_zones(): pthread_mutex_lock(), err(%d)\n", err_lock);
617 return -err_lock;
618 }
619
620 dev = xnvme_dev_open(f->file_name, &opts);
621 if (!dev) {
622 log_err("ioeng->get_max_open_zones(): xnvme_dev_open(), err(%d)\n", err_lock);
623 err = -errno;
624 goto exit;
625 }
626 if (xnvme_dev_get_geo(dev)->type != XNVME_GEO_ZONED) {
627 errno = EINVAL;
628 err = -errno;
629 goto exit;
630 }
631
632 zns = (void *)xnvme_dev_get_ns_css(dev);
633 if (!zns) {
634 log_err("ioeng->get_max_open_zones(): xnvme_dev_get_ns_css(), err(%d)\n", errno);
635 err = -errno;
636 goto exit;
637 }
638
639 /*
640 * intentional overflow as the value is zero-based and NVMe
641 * defines 0xFFFFFFFF as unlimited thus overflowing to 0 which
642 * is how fio indicates unlimited and otherwise just converting
643 * to one-based.
644 */
645 *max_open_zones = zns->mor + 1;
646
647exit:
648 xnvme_dev_close(dev);
649 err_lock = pthread_mutex_unlock(&g_serialize);
650 if (err_lock)
651 log_err("ioeng->get_max_open_zones(): pthread_mutex_unlock(), err(%d)\n",
652 err_lock);
653
654 return err;
655}
656
657/**
658 * Currently, this function is called before of I/O engine initialization, so,
659 * we cannot consult the file-wrapping done when 'fioe' initializes.
660 * Instead we just open based on the given filename.
661 *
662 * TODO: unify the different setup methods, consider keeping the handle around,
663 * and consider how to support the --be option in this usecase
664 */
665static int xnvme_fioe_get_zoned_model(struct thread_data *td, struct fio_file *f,
666 enum zbd_zoned_model *model)
667{
668 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
669 struct xnvme_dev *dev;
670 int err = 0, err_lock;
671
672 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
673 f->filetype != FIO_TYPE_CHAR) {
674 log_info("ioeng->get_zoned_model(): ignoring filetype: %d\n", f->filetype);
675 return -EINVAL;
676 }
677
678 err = pthread_mutex_lock(&g_serialize);
679 if (err) {
680 log_err("ioeng->get_zoned_model(): pthread_mutex_lock(), err(%d)\n", err);
681 return -err;
682 }
683
684 dev = xnvme_dev_open(f->file_name, &opts);
685 if (!dev) {
686 log_err("ioeng->get_zoned_model(): xnvme_dev_open(%s) failed, errno: %d\n",
687 f->file_name, errno);
688 err = -errno;
689 goto exit;
690 }
691
692 switch (xnvme_dev_get_geo(dev)->type) {
693 case XNVME_GEO_UNKNOWN:
694 dprint(FD_ZBD, "%s: got 'unknown', assigning ZBD_NONE\n", f->file_name);
695 *model = ZBD_NONE;
696 break;
697
698 case XNVME_GEO_CONVENTIONAL:
699 dprint(FD_ZBD, "%s: got 'conventional', assigning ZBD_NONE\n", f->file_name);
700 *model = ZBD_NONE;
701 break;
702
703 case XNVME_GEO_ZONED:
704 dprint(FD_ZBD, "%s: got 'zoned', assigning ZBD_HOST_MANAGED\n", f->file_name);
705 *model = ZBD_HOST_MANAGED;
706 break;
707
708 default:
709 dprint(FD_ZBD, "%s: hit-default, assigning ZBD_NONE\n", f->file_name);
710 *model = ZBD_NONE;
711 errno = EINVAL;
712 err = -errno;
713 break;
714 }
715
716exit:
717 xnvme_dev_close(dev);
718
719 err_lock = pthread_mutex_unlock(&g_serialize);
720 if (err_lock)
721 log_err("ioeng->get_zoned_model(): pthread_mutex_unlock(), err(%d)\n", err_lock);
722
723 return err;
724}
725
726/**
727 * Fills the given ``zbdz`` with at most ``nr_zones`` zone-descriptors.
728 *
729 * The implementation converts the NVMe Zoned Command Set log-pages for Zone
730 * descriptors into the Linux Kernel Zoned Block Report format.
731 *
732 * NOTE: This function is called before I/O engine initialization, that is,
733 * before ``_dev_open`` has been called and file-wrapping is setup. Thus is has
734 * to do the ``_dev_open`` itself, and shut it down again once it is done
735 * retrieving the log-pages and converting them to the report format.
736 *
737 * TODO: unify the different setup methods, consider keeping the handle around,
738 * and consider how to support the --async option in this usecase
739 */
740static int xnvme_fioe_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
741 struct zbd_zone *zbdz, unsigned int nr_zones)
742{
743 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
744 const struct xnvme_spec_znd_idfy_lbafe *lbafe = NULL;
745 struct xnvme_dev *dev = NULL;
746 const struct xnvme_geo *geo = NULL;
747 struct xnvme_znd_report *rprt = NULL;
748 uint32_t ssw;
749 uint64_t slba;
750 unsigned int limit = 0;
751 int err = 0, err_lock;
752
753 dprint(FD_ZBD, "%s: report_zones() offset: %zu, nr_zones: %u\n", f->file_name, offset,
754 nr_zones);
755
756 err = pthread_mutex_lock(&g_serialize);
757 if (err) {
758 log_err("ioeng->report_zones(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
759 err);
760 return -err;
761 }
762
763 dev = xnvme_dev_open(f->file_name, &opts);
764 if (!dev) {
765 log_err("ioeng->report_zones(%s): xnvme_dev_open(), err(%d)\n", f->file_name,
766 errno);
767 goto exit;
768 }
769
770 geo = xnvme_dev_get_geo(dev);
771 ssw = xnvme_dev_get_ssw(dev);
772 lbafe = xnvme_znd_dev_get_lbafe(dev);
773
774 limit = nr_zones > geo->nzone ? geo->nzone : nr_zones;
775
776 dprint(FD_ZBD, "%s: limit: %u\n", f->file_name, limit);
777
778 slba = ((offset >> ssw) / geo->nsect) * geo->nsect;
779
780 rprt = xnvme_znd_report_from_dev(dev, slba, limit, 0);
781 if (!rprt) {
782 log_err("ioeng->report_zones(%s): xnvme_znd_report_from_dev(), err(%d)\n",
783 f->file_name, errno);
784 err = -errno;
785 goto exit;
786 }
787 if (rprt->nentries != limit) {
788 log_err("ioeng->report_zones(%s): nentries != nr_zones\n", f->file_name);
789 err = 1;
790 goto exit;
791 }
792 if (offset > geo->tbytes) {
793 log_err("ioeng->report_zones(%s): out-of-bounds\n", f->file_name);
794 goto exit;
795 }
796
797 /* Transform the zone-report */
798 for (uint32_t idx = 0; idx < rprt->nentries; ++idx) {
799 struct xnvme_spec_znd_descr *descr = XNVME_ZND_REPORT_DESCR(rprt, idx);
800
801 zbdz[idx].start = descr->zslba << ssw;
802 zbdz[idx].len = lbafe->zsze << ssw;
803 zbdz[idx].capacity = descr->zcap << ssw;
804 zbdz[idx].wp = descr->wp << ssw;
805
806 switch (descr->zt) {
807 case XNVME_SPEC_ZND_TYPE_SEQWR:
808 zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
809 break;
810
811 default:
812 log_err("ioeng->report_zones(%s): invalid type for zone at offset(%zu)\n",
813 f->file_name, zbdz[idx].start);
814 err = -EIO;
815 goto exit;
816 }
817
818 switch (descr->zs) {
819 case XNVME_SPEC_ZND_STATE_EMPTY:
820 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
821 break;
822 case XNVME_SPEC_ZND_STATE_IOPEN:
823 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
824 break;
825 case XNVME_SPEC_ZND_STATE_EOPEN:
826 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
827 break;
828 case XNVME_SPEC_ZND_STATE_CLOSED:
829 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
830 break;
831 case XNVME_SPEC_ZND_STATE_FULL:
832 zbdz[idx].cond = ZBD_ZONE_COND_FULL;
833 break;
834
835 case XNVME_SPEC_ZND_STATE_RONLY:
836 case XNVME_SPEC_ZND_STATE_OFFLINE:
837 default:
838 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
839 break;
840 }
841 }
842
843exit:
844 xnvme_buf_virt_free(rprt);
845
846 xnvme_dev_close(dev);
847
848 err_lock = pthread_mutex_unlock(&g_serialize);
849 if (err_lock)
850 log_err("ioeng->report_zones(): pthread_mutex_unlock(), err: %d\n", err_lock);
851
852 dprint(FD_ZBD, "err: %d, nr_zones: %d\n", err, (int)nr_zones);
853
854 return err ? err : (int)limit;
855}
856
857/**
858 * NOTE: This function may get called before I/O engine initialization, that is,
859 * before ``_dev_open`` has been called and file-wrapping is setup. In such
860 * case it has to do ``_dev_open`` itself, and shut it down again once it is
861 * done resetting write pointer of zones.
862 */
863static int xnvme_fioe_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset,
864 uint64_t length)
865{
866 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
867 struct xnvme_fioe_data *xd = NULL;
868 struct xnvme_fioe_fwrap *fwrap = NULL;
869 struct xnvme_dev *dev = NULL;
870 const struct xnvme_geo *geo = NULL;
871 uint64_t first, last;
872 uint32_t ssw;
873 uint32_t nsid;
874 int err = 0, err_lock;
875
876 if (td->io_ops_data) {
877 xd = td->io_ops_data;
878 fwrap = &xd->files[f->fileno];
879
880 assert(fwrap->dev);
881 assert(fwrap->geo);
882
883 dev = fwrap->dev;
884 geo = fwrap->geo;
885 ssw = fwrap->ssw;
886 } else {
887 err = pthread_mutex_lock(&g_serialize);
888 if (err) {
889 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", err);
890 return -err;
891 }
892
893 dev = xnvme_dev_open(f->file_name, &opts);
894 if (!dev) {
895 log_err("ioeng->reset_wp(): xnvme_dev_open(%s) failed, errno(%d)\n",
896 f->file_name, errno);
897 goto exit;
898 }
899 geo = xnvme_dev_get_geo(dev);
900 ssw = xnvme_dev_get_ssw(dev);
901 }
902
903 nsid = xnvme_dev_get_nsid(dev);
904
905 first = ((offset >> ssw) / geo->nsect) * geo->nsect;
906 last = (((offset + length) >> ssw) / geo->nsect) * geo->nsect;
907 dprint(FD_ZBD, "first: 0x%lx, last: 0x%lx\n", first, last);
908
909 for (uint64_t zslba = first; zslba < last; zslba += geo->nsect) {
910 struct xnvme_cmd_ctx ctx = xnvme_cmd_ctx_from_dev(dev);
911
912 if (zslba >= (geo->nsect * geo->nzone)) {
913 log_err("ioeng->reset_wp(): out-of-bounds\n");
914 err = 0;
915 break;
916 }
917
918 err = xnvme_znd_mgmt_send(&ctx, nsid, zslba, false,
919 XNVME_SPEC_ZND_CMD_MGMT_SEND_RESET, 0x0, NULL);
920 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
921 err = err ? err : -EIO;
922 log_err("ioeng->reset_wp(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
923 goto exit;
924 }
925 }
926
927exit:
928 if (!td->io_ops_data) {
929 xnvme_dev_close(dev);
930
931 err_lock = pthread_mutex_unlock(&g_serialize);
932 if (err_lock)
933 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err_lock);
934 }
935
936 return err;
937}
938
939static int xnvme_fioe_get_file_size(struct thread_data *td, struct fio_file *f)
940{
941 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
942 struct xnvme_dev *dev;
943 int ret = 0, err;
944
945 if (fio_file_size_known(f))
946 return 0;
947
948 ret = pthread_mutex_lock(&g_serialize);
949 if (ret) {
950 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", ret);
951 return -ret;
952 }
953
954 dev = xnvme_dev_open(f->file_name, &opts);
955 if (!dev) {
956 log_err("%s: failed retrieving device handle, errno: %d\n", f->file_name, errno);
957 ret = -errno;
958 goto exit;
959 }
960
961 f->real_file_size = xnvme_dev_get_geo(dev)->tbytes;
962 fio_file_set_size_known(f);
963 f->filetype = FIO_TYPE_BLOCK;
964
965exit:
966 xnvme_dev_close(dev);
967 err = pthread_mutex_unlock(&g_serialize);
968 if (err)
969 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err);
970
971 return ret;
972}
973
974FIO_STATIC struct ioengine_ops ioengine = {
975 .name = "xnvme",
976 .version = FIO_IOOPS_VERSION,
977 .options = options,
978 .option_struct_size = sizeof(struct xnvme_fioe_options),
979 .flags = FIO_DISKLESSIO | FIO_NODISKUTIL | FIO_NOEXTEND | FIO_MEMALIGN | FIO_RAWIO,
980
981 .cleanup = xnvme_fioe_cleanup,
982 .init = xnvme_fioe_init,
983
984 .iomem_free = xnvme_fioe_iomem_free,
985 .iomem_alloc = xnvme_fioe_iomem_alloc,
986
987 .io_u_free = xnvme_fioe_io_u_free,
988 .io_u_init = xnvme_fioe_io_u_init,
989
990 .event = xnvme_fioe_event,
991 .getevents = xnvme_fioe_getevents,
992 .queue = xnvme_fioe_queue,
993
994 .close_file = xnvme_fioe_close,
995 .open_file = xnvme_fioe_open,
996 .get_file_size = xnvme_fioe_get_file_size,
997
998 .invalidate = xnvme_fioe_invalidate,
999 .get_max_open_zones = xnvme_fioe_get_max_open_zones,
1000 .get_zoned_model = xnvme_fioe_get_zoned_model,
1001 .report_zones = xnvme_fioe_report_zones,
1002 .reset_wp = xnvme_fioe_reset_wp,
1003};
1004
1005static void fio_init fio_xnvme_register(void)
1006{
1007 register_ioengine(&ioengine);
1008}
1009
1010static void fio_exit fio_xnvme_unregister(void)
1011{
1012 unregister_ioengine(&ioengine);
1013}