engines/xnvme: fix fdp support for userspace drivers
[fio.git] / engines / xnvme.c
CommitLineData
a3ff873e
AK
1/*
2 * fio xNVMe IO Engine
3 *
4 * IO engine using the xNVMe C API.
5 *
6 * See: http://xnvme.io/
7 *
8 * SPDX-License-Identifier: Apache-2.0
9 */
10#include <stdlib.h>
11#include <assert.h>
12#include <libxnvme.h>
13#include <libxnvme_libconf.h>
14#include <libxnvme_nvm.h>
15#include <libxnvme_znd.h>
16#include <libxnvme_spec_fs.h>
17#include "fio.h"
18#include "zbd_types.h"
e5f3b613 19#include "fdp.h"
a3ff873e
AK
20#include "optgroup.h"
21
22static pthread_mutex_t g_serialize = PTHREAD_MUTEX_INITIALIZER;
23
24struct xnvme_fioe_fwrap {
25 /* fio file representation */
26 struct fio_file *fio_file;
27
28 /* xNVMe device handle */
29 struct xnvme_dev *dev;
30 /* xNVMe device geometry */
31 const struct xnvme_geo *geo;
32
33 struct xnvme_queue *queue;
34
35 uint32_t ssw;
36 uint32_t lba_nbytes;
37
38 uint8_t _pad[24];
39};
40XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
41
42struct xnvme_fioe_data {
43 /* I/O completion queue */
44 struct io_u **iocq;
45
46 /* # of iocq entries; incremented via getevents()/cb_pool() */
47 uint64_t completed;
48
49 /*
50 * # of errors; incremented when observed on completion via
51 * getevents()/cb_pool()
52 */
53 uint64_t ecount;
54
55 /* Controller which device/file to select */
56 int32_t prev;
57 int32_t cur;
58
59 /* Number of devices/files for which open() has been called */
60 int64_t nopen;
61 /* Number of devices/files allocated in files[] */
62 uint64_t nallocated;
63
64 struct iovec *iovec;
65
66 uint8_t _pad[8];
67
68 struct xnvme_fioe_fwrap files[];
69};
70XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
71
72struct xnvme_fioe_options {
73 void *padding;
74 unsigned int hipri;
75 unsigned int sqpoll_thread;
76 unsigned int xnvme_dev_nsid;
77 unsigned int xnvme_iovec;
78 char *xnvme_be;
c945074c 79 char *xnvme_mem;
a3ff873e
AK
80 char *xnvme_async;
81 char *xnvme_sync;
82 char *xnvme_admin;
efbafe2a 83 char *xnvme_dev_subnqn;
a3ff873e
AK
84};
85
86static struct fio_option options[] = {
87 {
88 .name = "hipri",
89 .lname = "High Priority",
90 .type = FIO_OPT_STR_SET,
91 .off1 = offsetof(struct xnvme_fioe_options, hipri),
92 .help = "Use polled IO completions",
93 .category = FIO_OPT_C_ENGINE,
94 .group = FIO_OPT_G_XNVME,
95 },
96 {
97 .name = "sqthread_poll",
98 .lname = "Kernel SQ thread polling",
99 .type = FIO_OPT_STR_SET,
100 .off1 = offsetof(struct xnvme_fioe_options, sqpoll_thread),
101 .help = "Offload submission/completion to kernel thread",
102 .category = FIO_OPT_C_ENGINE,
103 .group = FIO_OPT_G_XNVME,
104 },
105 {
106 .name = "xnvme_be",
107 .lname = "xNVMe Backend",
108 .type = FIO_OPT_STR_STORE,
109 .off1 = offsetof(struct xnvme_fioe_options, xnvme_be),
110 .help = "Select xNVMe backend [spdk,linux,fbsd]",
111 .category = FIO_OPT_C_ENGINE,
112 .group = FIO_OPT_G_XNVME,
113 },
c945074c
AK
114 {
115 .name = "xnvme_mem",
116 .lname = "xNVMe Memory Backend",
117 .type = FIO_OPT_STR_STORE,
118 .off1 = offsetof(struct xnvme_fioe_options, xnvme_mem),
119 .help = "Select xNVMe memory backend",
120 .category = FIO_OPT_C_ENGINE,
121 .group = FIO_OPT_G_XNVME,
122 },
a3ff873e
AK
123 {
124 .name = "xnvme_async",
125 .lname = "xNVMe Asynchronous command-interface",
126 .type = FIO_OPT_STR_STORE,
127 .off1 = offsetof(struct xnvme_fioe_options, xnvme_async),
203a4c7c
AK
128 .help = "Select xNVMe async. interface: "
129 "[emu,thrpool,io_uring,io_uring_cmd,libaio,posix,vfio,nil]",
a3ff873e
AK
130 .category = FIO_OPT_C_ENGINE,
131 .group = FIO_OPT_G_XNVME,
132 },
133 {
134 .name = "xnvme_sync",
135 .lname = "xNVMe Synchronous. command-interface",
136 .type = FIO_OPT_STR_STORE,
137 .off1 = offsetof(struct xnvme_fioe_options, xnvme_sync),
203a4c7c 138 .help = "Select xNVMe sync. interface: [nvme,psync,block]",
a3ff873e
AK
139 .category = FIO_OPT_C_ENGINE,
140 .group = FIO_OPT_G_XNVME,
141 },
142 {
143 .name = "xnvme_admin",
144 .lname = "xNVMe Admin command-interface",
145 .type = FIO_OPT_STR_STORE,
146 .off1 = offsetof(struct xnvme_fioe_options, xnvme_admin),
203a4c7c 147 .help = "Select xNVMe admin. cmd-interface: [nvme,block]",
a3ff873e
AK
148 .category = FIO_OPT_C_ENGINE,
149 .group = FIO_OPT_G_XNVME,
150 },
151 {
152 .name = "xnvme_dev_nsid",
153 .lname = "xNVMe Namespace-Identifier, for user-space NVMe driver",
154 .type = FIO_OPT_INT,
155 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_nsid),
156 .help = "xNVMe Namespace-Identifier, for user-space NVMe driver",
157 .category = FIO_OPT_C_ENGINE,
158 .group = FIO_OPT_G_XNVME,
159 },
efbafe2a
AK
160 {
161 .name = "xnvme_dev_subnqn",
162 .lname = "Subsystem nqn for Fabrics",
163 .type = FIO_OPT_STR_STORE,
164 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_subnqn),
165 .help = "Subsystem NQN for Fabrics",
166 .category = FIO_OPT_C_ENGINE,
167 .group = FIO_OPT_G_XNVME,
168 },
a3ff873e
AK
169 {
170 .name = "xnvme_iovec",
171 .lname = "Vectored IOs",
172 .type = FIO_OPT_STR_SET,
173 .off1 = offsetof(struct xnvme_fioe_options, xnvme_iovec),
174 .help = "Send vectored IOs",
175 .category = FIO_OPT_C_ENGINE,
176 .group = FIO_OPT_G_XNVME,
177 },
178
179 {
180 .name = NULL,
181 },
182};
183
184static void cb_pool(struct xnvme_cmd_ctx *ctx, void *cb_arg)
185{
186 struct io_u *io_u = cb_arg;
187 struct xnvme_fioe_data *xd = io_u->mmap_data;
188
189 if (xnvme_cmd_ctx_cpl_status(ctx)) {
190 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
191 xd->ecount += 1;
192 io_u->error = EIO;
193 }
194
195 xd->iocq[xd->completed++] = io_u;
196 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
197}
198
199static struct xnvme_opts xnvme_opts_from_fioe(struct thread_data *td)
200{
201 struct xnvme_fioe_options *o = td->eo;
202 struct xnvme_opts opts = xnvme_opts_default();
203
204 opts.nsid = o->xnvme_dev_nsid;
efbafe2a 205 opts.subnqn = o->xnvme_dev_subnqn;
a3ff873e 206 opts.be = o->xnvme_be;
c945074c 207 opts.mem = o->xnvme_mem;
a3ff873e
AK
208 opts.async = o->xnvme_async;
209 opts.sync = o->xnvme_sync;
210 opts.admin = o->xnvme_admin;
211
212 opts.poll_io = o->hipri;
213 opts.poll_sq = o->sqpoll_thread;
214
215 opts.direct = td->o.odirect;
216
217 return opts;
218}
219
220static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
221{
222 if (fwrap->dev)
223 xnvme_queue_term(fwrap->queue);
224
225 xnvme_dev_close(fwrap->dev);
226
227 memset(fwrap, 0, sizeof(*fwrap));
228}
229
230static void xnvme_fioe_cleanup(struct thread_data *td)
231{
fdac9c68 232 struct xnvme_fioe_data *xd = NULL;
a3ff873e
AK
233 int err;
234
fdac9c68
AK
235 if (!td->io_ops_data)
236 return;
237
238 xd = td->io_ops_data;
239
a3ff873e
AK
240 err = pthread_mutex_lock(&g_serialize);
241 if (err)
242 log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
243 /* NOTE: not returning here */
244
245 for (uint64_t i = 0; i < xd->nallocated; ++i)
246 _dev_close(td, &xd->files[i]);
247
248 if (!err) {
249 err = pthread_mutex_unlock(&g_serialize);
250 if (err)
251 log_err("ioeng->cleanup(): pthread_mutex_unlock(), err(%d)\n", err);
252 }
253
254 free(xd->iocq);
255 free(xd->iovec);
256 free(xd);
257 td->io_ops_data = NULL;
258}
259
260/**
261 * Helper function setting up device handles as addressed by the naming
262 * convention of the given `fio_file` filename.
263 *
264 * Checks thread-options for explicit control of asynchronous implementation via
265 * the ``--xnvme_async={thrpool,emu,posix,io_uring,libaio,nil}``.
266 */
267static int _dev_open(struct thread_data *td, struct fio_file *f)
268{
269 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
270 struct xnvme_fioe_data *xd = td->io_ops_data;
271 struct xnvme_fioe_fwrap *fwrap;
272 int flags = 0;
273 int err;
274
275 if (f->fileno > (int)xd->nallocated) {
276 log_err("ioeng->_dev_open(%s): invalid assumption\n", f->file_name);
277 return 1;
278 }
279
280 fwrap = &xd->files[f->fileno];
281
282 err = pthread_mutex_lock(&g_serialize);
283 if (err) {
284 log_err("ioeng->_dev_open(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
285 err);
286 return -err;
287 }
288
289 fwrap->dev = xnvme_dev_open(f->file_name, &opts);
290 if (!fwrap->dev) {
291 log_err("ioeng->_dev_open(%s): xnvme_dev_open(), err(%d)\n", f->file_name, errno);
292 goto failure;
293 }
294 fwrap->geo = xnvme_dev_get_geo(fwrap->dev);
295
296 if (xnvme_queue_init(fwrap->dev, td->o.iodepth, flags, &(fwrap->queue))) {
297 log_err("ioeng->_dev_open(%s): xnvme_queue_init(), err(?)\n", f->file_name);
298 goto failure;
299 }
300 xnvme_queue_set_cb(fwrap->queue, cb_pool, NULL);
301
302 fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
303 fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
304
305 fwrap->fio_file = f;
306 fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
307 fwrap->fio_file->real_file_size = fwrap->geo->tbytes;
308 fio_file_set_size_known(fwrap->fio_file);
309
310 err = pthread_mutex_unlock(&g_serialize);
311 if (err)
312 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
313 err);
314
315 return 0;
316
317failure:
318 xnvme_queue_term(fwrap->queue);
319 xnvme_dev_close(fwrap->dev);
320
321 err = pthread_mutex_unlock(&g_serialize);
322 if (err)
323 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
324 err);
325
326 return 1;
327}
328
329static int xnvme_fioe_init(struct thread_data *td)
330{
331 struct xnvme_fioe_data *xd = NULL;
332 struct fio_file *f;
333 unsigned int i;
334
335 if (!td->o.use_thread) {
336 log_err("ioeng->init(): --thread=1 is required\n");
337 return 1;
338 }
339
340 /* Allocate xd and iocq */
341 xd = calloc(1, sizeof(*xd) + sizeof(*xd->files) * td->o.nr_files);
342 if (!xd) {
343 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
344 return 1;
345 }
346
347 xd->iocq = calloc(td->o.iodepth, sizeof(struct io_u *));
348 if (!xd->iocq) {
eb3570b5
AK
349 free(xd);
350 log_err("ioeng->init(): !calloc(xd->iocq), err(%d)\n", errno);
a3ff873e
AK
351 return 1;
352 }
353
354 xd->iovec = calloc(td->o.iodepth, sizeof(*xd->iovec));
355 if (!xd->iovec) {
eb3570b5
AK
356 free(xd->iocq);
357 free(xd);
a3ff873e
AK
358 log_err("ioeng->init(): !calloc(xd->iovec), err(%d)\n", errno);
359 return 1;
360 }
361
362 xd->prev = -1;
363 td->io_ops_data = xd;
364
365 for_each_file(td, f, i)
366 {
367 if (_dev_open(td, f)) {
eb3570b5
AK
368 /*
369 * Note: We are not freeing xd, iocq and iovec. This
370 * will be done as part of cleanup routine.
371 */
a3ff873e
AK
372 log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
373 return 1;
374 }
375
376 ++(xd->nallocated);
377 }
378
379 if (xd->nallocated != td->o.nr_files) {
380 log_err("ioeng->init(): failed; nallocated != td->o.nr_files\n");
381 return 1;
382 }
383
384 return 0;
385}
386
387/* NOTE: using the first device for buffer-allocators) */
388static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
389{
390 struct xnvme_fioe_data *xd = td->io_ops_data;
391 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
392
393 if (!fwrap->dev) {
394 log_err("ioeng->iomem_alloc(): failed; no dev-handle\n");
395 return 1;
396 }
397
398 td->orig_buffer = xnvme_buf_alloc(fwrap->dev, total_mem);
399
400 return td->orig_buffer == NULL;
401}
402
403/* NOTE: using the first device for buffer-allocators) */
404static void xnvme_fioe_iomem_free(struct thread_data *td)
405{
fdac9c68
AK
406 struct xnvme_fioe_data *xd = NULL;
407 struct xnvme_fioe_fwrap *fwrap = NULL;
408
409 if (!td->io_ops_data)
410 return;
411
412 xd = td->io_ops_data;
413 fwrap = &xd->files[0];
a3ff873e
AK
414
415 if (!fwrap->dev) {
416 log_err("ioeng->iomem_free(): failed no dev-handle\n");
417 return;
418 }
419
420 xnvme_buf_free(fwrap->dev, td->orig_buffer);
421}
422
423static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
424{
425 io_u->mmap_data = td->io_ops_data;
426
427 return 0;
428}
429
430static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
431{
432 io_u->mmap_data = NULL;
433}
434
435static struct io_u *xnvme_fioe_event(struct thread_data *td, int event)
436{
437 struct xnvme_fioe_data *xd = td->io_ops_data;
438
439 assert(event >= 0);
440 assert((unsigned)event < xd->completed);
441
442 return xd->iocq[event];
443}
444
445static int xnvme_fioe_getevents(struct thread_data *td, unsigned int min, unsigned int max,
446 const struct timespec *t)
447{
448 struct xnvme_fioe_data *xd = td->io_ops_data;
449 struct xnvme_fioe_fwrap *fwrap = NULL;
450 int nfiles = xd->nallocated;
451 int err = 0;
452
453 if (xd->prev != -1 && ++xd->prev < nfiles) {
454 fwrap = &xd->files[xd->prev];
455 xd->cur = xd->prev;
456 }
457
458 xd->completed = 0;
459 for (;;) {
460 if (fwrap == NULL || xd->cur == nfiles) {
461 fwrap = &xd->files[0];
462 xd->cur = 0;
463 }
464
465 while (fwrap != NULL && xd->cur < nfiles && err >= 0) {
466 err = xnvme_queue_poke(fwrap->queue, max - xd->completed);
467 if (err < 0) {
468 switch (err) {
469 case -EBUSY:
470 case -EAGAIN:
471 usleep(1);
472 break;
473
474 default:
475 log_err("ioeng->getevents(): unhandled IO error\n");
476 assert(false);
477 return 0;
478 }
479 }
480 if (xd->completed >= min) {
481 xd->prev = xd->cur;
482 return xd->completed;
483 }
484 xd->cur++;
485 fwrap = &xd->files[xd->cur];
486
487 if (err < 0) {
488 switch (err) {
489 case -EBUSY:
490 case -EAGAIN:
491 usleep(1);
492 break;
493 }
494 }
495 }
496 }
497
498 xd->cur = 0;
499
500 return xd->completed;
501}
502
503static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *io_u)
504{
505 struct xnvme_fioe_data *xd = td->io_ops_data;
506 struct xnvme_fioe_fwrap *fwrap;
507 struct xnvme_cmd_ctx *ctx;
508 uint32_t nsid;
509 uint64_t slba;
510 uint16_t nlb;
511 int err;
512 bool vectored_io = ((struct xnvme_fioe_options *)td->eo)->xnvme_iovec;
e5f3b613 513 uint32_t dir = io_u->dtype;
a3ff873e
AK
514
515 fio_ro_check(td, io_u);
516
517 fwrap = &xd->files[io_u->file->fileno];
518 nsid = xnvme_dev_get_nsid(fwrap->dev);
519
520 slba = io_u->offset >> fwrap->ssw;
521 nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
522
523 ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
524 ctx->async.cb_arg = io_u;
525
526 ctx->cmd.common.nsid = nsid;
527 ctx->cmd.nvm.slba = slba;
528 ctx->cmd.nvm.nlb = nlb;
e5f3b613
AK
529 if (dir) {
530 ctx->cmd.nvm.dtype = io_u->dtype;
531 ctx->cmd.nvm.cdw13.dspec = io_u->dspec;
532 }
a3ff873e
AK
533
534 switch (io_u->ddir) {
535 case DDIR_READ:
536 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
537 break;
538
539 case DDIR_WRITE:
540 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
541 break;
542
543 default:
544 log_err("ioeng->queue(): ENOSYS: %u\n", io_u->ddir);
eb3570b5
AK
545 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
546
547 io_u->error = ENOSYS;
a3ff873e 548 assert(false);
eb3570b5 549 return FIO_Q_COMPLETED;
a3ff873e
AK
550 }
551
552 if (vectored_io) {
553 xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
554 xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
555
556 err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
557 0);
558 } else {
559 err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
560 }
561 switch (err) {
562 case 0:
563 return FIO_Q_QUEUED;
564
565 case -EBUSY:
566 case -EAGAIN:
567 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
568 return FIO_Q_BUSY;
569
570 default:
571 log_err("ioeng->queue(): err: '%d'\n", err);
572
573 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
574
575 io_u->error = abs(err);
576 assert(false);
577 return FIO_Q_COMPLETED;
578 }
579}
580
581static int xnvme_fioe_close(struct thread_data *td, struct fio_file *f)
582{
583 struct xnvme_fioe_data *xd = td->io_ops_data;
584
585 dprint(FD_FILE, "xnvme close %s -- nopen: %ld\n", f->file_name, xd->nopen);
586
587 --(xd->nopen);
588
589 return 0;
590}
591
592static int xnvme_fioe_open(struct thread_data *td, struct fio_file *f)
593{
594 struct xnvme_fioe_data *xd = td->io_ops_data;
595
596 dprint(FD_FILE, "xnvme open %s -- nopen: %ld\n", f->file_name, xd->nopen);
597
598 if (f->fileno > (int)xd->nallocated) {
599 log_err("ioeng->open(): f->fileno > xd->nallocated; invalid assumption\n");
600 return 1;
601 }
602 if (xd->files[f->fileno].fio_file != f) {
603 log_err("ioeng->open(): fio_file != f; invalid assumption\n");
604 return 1;
605 }
606
607 ++(xd->nopen);
608
609 return 0;
610}
611
612static int xnvme_fioe_invalidate(struct thread_data *td, struct fio_file *f)
613{
614 /* Consider only doing this with be:spdk */
615 return 0;
616}
617
618static int xnvme_fioe_get_max_open_zones(struct thread_data *td, struct fio_file *f,
619 unsigned int *max_open_zones)
620{
621 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
622 struct xnvme_dev *dev;
623 const struct xnvme_spec_znd_idfy_ns *zns;
624 int err = 0, err_lock;
625
626 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
627 f->filetype != FIO_TYPE_CHAR) {
628 log_info("ioeng->get_max_open_zoned(): ignoring filetype: %d\n", f->filetype);
629 return 0;
630 }
631 err_lock = pthread_mutex_lock(&g_serialize);
632 if (err_lock) {
633 log_err("ioeng->get_max_open_zones(): pthread_mutex_lock(), err(%d)\n", err_lock);
634 return -err_lock;
635 }
636
637 dev = xnvme_dev_open(f->file_name, &opts);
638 if (!dev) {
639 log_err("ioeng->get_max_open_zones(): xnvme_dev_open(), err(%d)\n", err_lock);
640 err = -errno;
641 goto exit;
642 }
643 if (xnvme_dev_get_geo(dev)->type != XNVME_GEO_ZONED) {
644 errno = EINVAL;
645 err = -errno;
646 goto exit;
647 }
648
649 zns = (void *)xnvme_dev_get_ns_css(dev);
650 if (!zns) {
651 log_err("ioeng->get_max_open_zones(): xnvme_dev_get_ns_css(), err(%d)\n", errno);
652 err = -errno;
653 goto exit;
654 }
655
656 /*
657 * intentional overflow as the value is zero-based and NVMe
658 * defines 0xFFFFFFFF as unlimited thus overflowing to 0 which
659 * is how fio indicates unlimited and otherwise just converting
660 * to one-based.
661 */
662 *max_open_zones = zns->mor + 1;
663
664exit:
665 xnvme_dev_close(dev);
666 err_lock = pthread_mutex_unlock(&g_serialize);
667 if (err_lock)
668 log_err("ioeng->get_max_open_zones(): pthread_mutex_unlock(), err(%d)\n",
669 err_lock);
670
671 return err;
672}
673
674/**
675 * Currently, this function is called before of I/O engine initialization, so,
676 * we cannot consult the file-wrapping done when 'fioe' initializes.
677 * Instead we just open based on the given filename.
678 *
679 * TODO: unify the different setup methods, consider keeping the handle around,
680 * and consider how to support the --be option in this usecase
681 */
682static int xnvme_fioe_get_zoned_model(struct thread_data *td, struct fio_file *f,
683 enum zbd_zoned_model *model)
684{
685 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
686 struct xnvme_dev *dev;
687 int err = 0, err_lock;
688
689 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
690 f->filetype != FIO_TYPE_CHAR) {
691 log_info("ioeng->get_zoned_model(): ignoring filetype: %d\n", f->filetype);
692 return -EINVAL;
693 }
694
695 err = pthread_mutex_lock(&g_serialize);
696 if (err) {
697 log_err("ioeng->get_zoned_model(): pthread_mutex_lock(), err(%d)\n", err);
698 return -err;
699 }
700
701 dev = xnvme_dev_open(f->file_name, &opts);
702 if (!dev) {
703 log_err("ioeng->get_zoned_model(): xnvme_dev_open(%s) failed, errno: %d\n",
704 f->file_name, errno);
705 err = -errno;
706 goto exit;
707 }
708
709 switch (xnvme_dev_get_geo(dev)->type) {
710 case XNVME_GEO_UNKNOWN:
711 dprint(FD_ZBD, "%s: got 'unknown', assigning ZBD_NONE\n", f->file_name);
712 *model = ZBD_NONE;
713 break;
714
715 case XNVME_GEO_CONVENTIONAL:
716 dprint(FD_ZBD, "%s: got 'conventional', assigning ZBD_NONE\n", f->file_name);
717 *model = ZBD_NONE;
718 break;
719
720 case XNVME_GEO_ZONED:
721 dprint(FD_ZBD, "%s: got 'zoned', assigning ZBD_HOST_MANAGED\n", f->file_name);
722 *model = ZBD_HOST_MANAGED;
723 break;
724
725 default:
726 dprint(FD_ZBD, "%s: hit-default, assigning ZBD_NONE\n", f->file_name);
727 *model = ZBD_NONE;
728 errno = EINVAL;
729 err = -errno;
730 break;
731 }
732
733exit:
734 xnvme_dev_close(dev);
735
736 err_lock = pthread_mutex_unlock(&g_serialize);
737 if (err_lock)
738 log_err("ioeng->get_zoned_model(): pthread_mutex_unlock(), err(%d)\n", err_lock);
739
740 return err;
741}
742
743/**
744 * Fills the given ``zbdz`` with at most ``nr_zones`` zone-descriptors.
745 *
746 * The implementation converts the NVMe Zoned Command Set log-pages for Zone
747 * descriptors into the Linux Kernel Zoned Block Report format.
748 *
749 * NOTE: This function is called before I/O engine initialization, that is,
750 * before ``_dev_open`` has been called and file-wrapping is setup. Thus is has
751 * to do the ``_dev_open`` itself, and shut it down again once it is done
752 * retrieving the log-pages and converting them to the report format.
753 *
754 * TODO: unify the different setup methods, consider keeping the handle around,
755 * and consider how to support the --async option in this usecase
756 */
757static int xnvme_fioe_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
758 struct zbd_zone *zbdz, unsigned int nr_zones)
759{
760 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
761 const struct xnvme_spec_znd_idfy_lbafe *lbafe = NULL;
762 struct xnvme_dev *dev = NULL;
763 const struct xnvme_geo *geo = NULL;
764 struct xnvme_znd_report *rprt = NULL;
765 uint32_t ssw;
766 uint64_t slba;
767 unsigned int limit = 0;
768 int err = 0, err_lock;
769
770 dprint(FD_ZBD, "%s: report_zones() offset: %zu, nr_zones: %u\n", f->file_name, offset,
771 nr_zones);
772
773 err = pthread_mutex_lock(&g_serialize);
774 if (err) {
775 log_err("ioeng->report_zones(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
776 err);
777 return -err;
778 }
779
780 dev = xnvme_dev_open(f->file_name, &opts);
781 if (!dev) {
782 log_err("ioeng->report_zones(%s): xnvme_dev_open(), err(%d)\n", f->file_name,
783 errno);
784 goto exit;
785 }
786
787 geo = xnvme_dev_get_geo(dev);
788 ssw = xnvme_dev_get_ssw(dev);
789 lbafe = xnvme_znd_dev_get_lbafe(dev);
790
791 limit = nr_zones > geo->nzone ? geo->nzone : nr_zones;
792
793 dprint(FD_ZBD, "%s: limit: %u\n", f->file_name, limit);
794
795 slba = ((offset >> ssw) / geo->nsect) * geo->nsect;
796
797 rprt = xnvme_znd_report_from_dev(dev, slba, limit, 0);
798 if (!rprt) {
799 log_err("ioeng->report_zones(%s): xnvme_znd_report_from_dev(), err(%d)\n",
800 f->file_name, errno);
801 err = -errno;
802 goto exit;
803 }
804 if (rprt->nentries != limit) {
805 log_err("ioeng->report_zones(%s): nentries != nr_zones\n", f->file_name);
806 err = 1;
807 goto exit;
808 }
809 if (offset > geo->tbytes) {
810 log_err("ioeng->report_zones(%s): out-of-bounds\n", f->file_name);
811 goto exit;
812 }
813
814 /* Transform the zone-report */
815 for (uint32_t idx = 0; idx < rprt->nentries; ++idx) {
816 struct xnvme_spec_znd_descr *descr = XNVME_ZND_REPORT_DESCR(rprt, idx);
817
818 zbdz[idx].start = descr->zslba << ssw;
819 zbdz[idx].len = lbafe->zsze << ssw;
820 zbdz[idx].capacity = descr->zcap << ssw;
821 zbdz[idx].wp = descr->wp << ssw;
822
823 switch (descr->zt) {
824 case XNVME_SPEC_ZND_TYPE_SEQWR:
825 zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
826 break;
827
828 default:
829 log_err("ioeng->report_zones(%s): invalid type for zone at offset(%zu)\n",
830 f->file_name, zbdz[idx].start);
831 err = -EIO;
832 goto exit;
833 }
834
835 switch (descr->zs) {
836 case XNVME_SPEC_ZND_STATE_EMPTY:
837 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
838 break;
839 case XNVME_SPEC_ZND_STATE_IOPEN:
840 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
841 break;
842 case XNVME_SPEC_ZND_STATE_EOPEN:
843 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
844 break;
845 case XNVME_SPEC_ZND_STATE_CLOSED:
846 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
847 break;
848 case XNVME_SPEC_ZND_STATE_FULL:
849 zbdz[idx].cond = ZBD_ZONE_COND_FULL;
850 break;
851
852 case XNVME_SPEC_ZND_STATE_RONLY:
853 case XNVME_SPEC_ZND_STATE_OFFLINE:
854 default:
855 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
856 break;
857 }
858 }
859
860exit:
861 xnvme_buf_virt_free(rprt);
862
863 xnvme_dev_close(dev);
864
865 err_lock = pthread_mutex_unlock(&g_serialize);
866 if (err_lock)
867 log_err("ioeng->report_zones(): pthread_mutex_unlock(), err: %d\n", err_lock);
868
869 dprint(FD_ZBD, "err: %d, nr_zones: %d\n", err, (int)nr_zones);
870
871 return err ? err : (int)limit;
872}
873
874/**
875 * NOTE: This function may get called before I/O engine initialization, that is,
876 * before ``_dev_open`` has been called and file-wrapping is setup. In such
877 * case it has to do ``_dev_open`` itself, and shut it down again once it is
878 * done resetting write pointer of zones.
879 */
880static int xnvme_fioe_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset,
881 uint64_t length)
882{
883 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
884 struct xnvme_fioe_data *xd = NULL;
885 struct xnvme_fioe_fwrap *fwrap = NULL;
886 struct xnvme_dev *dev = NULL;
887 const struct xnvme_geo *geo = NULL;
888 uint64_t first, last;
889 uint32_t ssw;
890 uint32_t nsid;
891 int err = 0, err_lock;
892
893 if (td->io_ops_data) {
894 xd = td->io_ops_data;
895 fwrap = &xd->files[f->fileno];
896
897 assert(fwrap->dev);
898 assert(fwrap->geo);
899
900 dev = fwrap->dev;
901 geo = fwrap->geo;
902 ssw = fwrap->ssw;
903 } else {
904 err = pthread_mutex_lock(&g_serialize);
905 if (err) {
906 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", err);
907 return -err;
908 }
909
910 dev = xnvme_dev_open(f->file_name, &opts);
911 if (!dev) {
912 log_err("ioeng->reset_wp(): xnvme_dev_open(%s) failed, errno(%d)\n",
913 f->file_name, errno);
914 goto exit;
915 }
916 geo = xnvme_dev_get_geo(dev);
917 ssw = xnvme_dev_get_ssw(dev);
918 }
919
920 nsid = xnvme_dev_get_nsid(dev);
921
922 first = ((offset >> ssw) / geo->nsect) * geo->nsect;
923 last = (((offset + length) >> ssw) / geo->nsect) * geo->nsect;
924 dprint(FD_ZBD, "first: 0x%lx, last: 0x%lx\n", first, last);
925
926 for (uint64_t zslba = first; zslba < last; zslba += geo->nsect) {
927 struct xnvme_cmd_ctx ctx = xnvme_cmd_ctx_from_dev(dev);
928
929 if (zslba >= (geo->nsect * geo->nzone)) {
930 log_err("ioeng->reset_wp(): out-of-bounds\n");
931 err = 0;
932 break;
933 }
934
935 err = xnvme_znd_mgmt_send(&ctx, nsid, zslba, false,
936 XNVME_SPEC_ZND_CMD_MGMT_SEND_RESET, 0x0, NULL);
937 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
938 err = err ? err : -EIO;
939 log_err("ioeng->reset_wp(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
940 goto exit;
941 }
942 }
943
944exit:
945 if (!td->io_ops_data) {
946 xnvme_dev_close(dev);
947
948 err_lock = pthread_mutex_unlock(&g_serialize);
949 if (err_lock)
950 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err_lock);
951 }
952
953 return err;
954}
955
e5f3b613
AK
956static int xnvme_fioe_fetch_ruhs(struct thread_data *td, struct fio_file *f,
957 struct fio_ruhs_info *fruhs_info)
958{
959 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
960 struct xnvme_dev *dev;
961 struct xnvme_spec_ruhs *ruhs;
962 struct xnvme_cmd_ctx ctx;
963 uint32_t ruhs_nbytes;
964 uint32_t nsid;
965 int err = 0, err_lock;
966
48cf0c63 967 if (f->filetype != FIO_TYPE_CHAR && f->filetype != FIO_TYPE_FILE) {
e5f3b613
AK
968 log_err("ioeng->fdp_ruhs(): ignoring filetype: %d\n", f->filetype);
969 return -EINVAL;
970 }
971
972 err = pthread_mutex_lock(&g_serialize);
973 if (err) {
974 log_err("ioeng->fdp_ruhs(): pthread_mutex_lock(), err(%d)\n", err);
975 return -err;
976 }
977
978 dev = xnvme_dev_open(f->file_name, &opts);
979 if (!dev) {
980 log_err("ioeng->fdp_ruhs(): xnvme_dev_open(%s) failed, errno: %d\n",
981 f->file_name, errno);
982 err = -errno;
983 goto exit;
984 }
985
986 ruhs_nbytes = sizeof(*ruhs) + (FDP_MAX_RUHS * sizeof(struct xnvme_spec_ruhs_desc));
987 ruhs = xnvme_buf_alloc(dev, ruhs_nbytes);
988 if (!ruhs) {
989 err = -errno;
990 goto exit;
991 }
992 memset(ruhs, 0, ruhs_nbytes);
993
994 ctx = xnvme_cmd_ctx_from_dev(dev);
995 nsid = xnvme_dev_get_nsid(dev);
996
997 err = xnvme_nvm_mgmt_recv(&ctx, nsid, XNVME_SPEC_IO_MGMT_RECV_RUHS, 0, ruhs, ruhs_nbytes);
998
999 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
1000 err = err ? err : -EIO;
1001 log_err("ioeng->fdp_ruhs(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
1002 goto free_buffer;
1003 }
1004
1005 fruhs_info->nr_ruhs = ruhs->nruhsd;
1006 for (uint32_t idx = 0; idx < fruhs_info->nr_ruhs; ++idx) {
1007 fruhs_info->plis[idx] = le16_to_cpu(ruhs->desc[idx].pi);
1008 }
1009
1010free_buffer:
1011 xnvme_buf_free(dev, ruhs);
1012exit:
1013 xnvme_dev_close(dev);
1014
1015 err_lock = pthread_mutex_unlock(&g_serialize);
1016 if (err_lock)
1017 log_err("ioeng->fdp_ruhs(): pthread_mutex_unlock(), err(%d)\n", err_lock);
1018
1019 return err;
1020}
1021
a3ff873e
AK
1022static int xnvme_fioe_get_file_size(struct thread_data *td, struct fio_file *f)
1023{
1024 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
1025 struct xnvme_dev *dev;
1026 int ret = 0, err;
1027
1028 if (fio_file_size_known(f))
1029 return 0;
1030
1031 ret = pthread_mutex_lock(&g_serialize);
1032 if (ret) {
1033 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", ret);
1034 return -ret;
1035 }
1036
1037 dev = xnvme_dev_open(f->file_name, &opts);
1038 if (!dev) {
1039 log_err("%s: failed retrieving device handle, errno: %d\n", f->file_name, errno);
1040 ret = -errno;
1041 goto exit;
1042 }
1043
1044 f->real_file_size = xnvme_dev_get_geo(dev)->tbytes;
1045 fio_file_set_size_known(f);
e5f3b613
AK
1046
1047 if (td->o.zone_mode == ZONE_MODE_ZBD)
1048 f->filetype = FIO_TYPE_BLOCK;
a3ff873e
AK
1049
1050exit:
1051 xnvme_dev_close(dev);
1052 err = pthread_mutex_unlock(&g_serialize);
1053 if (err)
1054 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err);
1055
1056 return ret;
1057}
1058
1059FIO_STATIC struct ioengine_ops ioengine = {
1060 .name = "xnvme",
1061 .version = FIO_IOOPS_VERSION,
1062 .options = options,
1063 .option_struct_size = sizeof(struct xnvme_fioe_options),
1064 .flags = FIO_DISKLESSIO | FIO_NODISKUTIL | FIO_NOEXTEND | FIO_MEMALIGN | FIO_RAWIO,
1065
1066 .cleanup = xnvme_fioe_cleanup,
1067 .init = xnvme_fioe_init,
1068
1069 .iomem_free = xnvme_fioe_iomem_free,
1070 .iomem_alloc = xnvme_fioe_iomem_alloc,
1071
1072 .io_u_free = xnvme_fioe_io_u_free,
1073 .io_u_init = xnvme_fioe_io_u_init,
1074
1075 .event = xnvme_fioe_event,
1076 .getevents = xnvme_fioe_getevents,
1077 .queue = xnvme_fioe_queue,
1078
1079 .close_file = xnvme_fioe_close,
1080 .open_file = xnvme_fioe_open,
1081 .get_file_size = xnvme_fioe_get_file_size,
1082
1083 .invalidate = xnvme_fioe_invalidate,
1084 .get_max_open_zones = xnvme_fioe_get_max_open_zones,
1085 .get_zoned_model = xnvme_fioe_get_zoned_model,
1086 .report_zones = xnvme_fioe_report_zones,
1087 .reset_wp = xnvme_fioe_reset_wp,
e5f3b613
AK
1088
1089 .fdp_fetch_ruhs = xnvme_fioe_fetch_ruhs,
a3ff873e
AK
1090};
1091
1092static void fio_init fio_xnvme_register(void)
1093{
1094 register_ioengine(&ioengine);
1095}
1096
1097static void fio_exit fio_xnvme_unregister(void)
1098{
1099 unregister_ioengine(&ioengine);
1100}