engines/io_uring_cmd: skip pi verify checks for error cases
[fio.git] / engines / xnvme.c
CommitLineData
a3ff873e
AK
1/*
2 * fio xNVMe IO Engine
3 *
4 * IO engine using the xNVMe C API.
5 *
6 * See: http://xnvme.io/
7 *
8 * SPDX-License-Identifier: Apache-2.0
9 */
10#include <stdlib.h>
11#include <assert.h>
12#include <libxnvme.h>
a3ff873e
AK
13#include "fio.h"
14#include "zbd_types.h"
e5f3b613 15#include "fdp.h"
a3ff873e
AK
16#include "optgroup.h"
17
18static pthread_mutex_t g_serialize = PTHREAD_MUTEX_INITIALIZER;
19
20struct xnvme_fioe_fwrap {
21 /* fio file representation */
22 struct fio_file *fio_file;
23
24 /* xNVMe device handle */
25 struct xnvme_dev *dev;
26 /* xNVMe device geometry */
27 const struct xnvme_geo *geo;
28
29 struct xnvme_queue *queue;
30
31 uint32_t ssw;
32 uint32_t lba_nbytes;
33
34 uint8_t _pad[24];
35};
36XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
37
38struct xnvme_fioe_data {
39 /* I/O completion queue */
40 struct io_u **iocq;
41
42 /* # of iocq entries; incremented via getevents()/cb_pool() */
43 uint64_t completed;
44
45 /*
46 * # of errors; incremented when observed on completion via
47 * getevents()/cb_pool()
48 */
49 uint64_t ecount;
50
51 /* Controller which device/file to select */
52 int32_t prev;
53 int32_t cur;
54
55 /* Number of devices/files for which open() has been called */
56 int64_t nopen;
57 /* Number of devices/files allocated in files[] */
58 uint64_t nallocated;
59
60 struct iovec *iovec;
61
62 uint8_t _pad[8];
63
64 struct xnvme_fioe_fwrap files[];
65};
66XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
67
68struct xnvme_fioe_options {
69 void *padding;
70 unsigned int hipri;
71 unsigned int sqpoll_thread;
72 unsigned int xnvme_dev_nsid;
73 unsigned int xnvme_iovec;
74 char *xnvme_be;
c945074c 75 char *xnvme_mem;
a3ff873e
AK
76 char *xnvme_async;
77 char *xnvme_sync;
78 char *xnvme_admin;
efbafe2a 79 char *xnvme_dev_subnqn;
a3ff873e
AK
80};
81
82static struct fio_option options[] = {
83 {
84 .name = "hipri",
85 .lname = "High Priority",
86 .type = FIO_OPT_STR_SET,
87 .off1 = offsetof(struct xnvme_fioe_options, hipri),
88 .help = "Use polled IO completions",
89 .category = FIO_OPT_C_ENGINE,
90 .group = FIO_OPT_G_XNVME,
91 },
92 {
93 .name = "sqthread_poll",
94 .lname = "Kernel SQ thread polling",
95 .type = FIO_OPT_STR_SET,
96 .off1 = offsetof(struct xnvme_fioe_options, sqpoll_thread),
97 .help = "Offload submission/completion to kernel thread",
98 .category = FIO_OPT_C_ENGINE,
99 .group = FIO_OPT_G_XNVME,
100 },
101 {
102 .name = "xnvme_be",
103 .lname = "xNVMe Backend",
104 .type = FIO_OPT_STR_STORE,
105 .off1 = offsetof(struct xnvme_fioe_options, xnvme_be),
106 .help = "Select xNVMe backend [spdk,linux,fbsd]",
107 .category = FIO_OPT_C_ENGINE,
108 .group = FIO_OPT_G_XNVME,
109 },
c945074c
AK
110 {
111 .name = "xnvme_mem",
112 .lname = "xNVMe Memory Backend",
113 .type = FIO_OPT_STR_STORE,
114 .off1 = offsetof(struct xnvme_fioe_options, xnvme_mem),
115 .help = "Select xNVMe memory backend",
116 .category = FIO_OPT_C_ENGINE,
117 .group = FIO_OPT_G_XNVME,
118 },
a3ff873e
AK
119 {
120 .name = "xnvme_async",
121 .lname = "xNVMe Asynchronous command-interface",
122 .type = FIO_OPT_STR_STORE,
123 .off1 = offsetof(struct xnvme_fioe_options, xnvme_async),
203a4c7c
AK
124 .help = "Select xNVMe async. interface: "
125 "[emu,thrpool,io_uring,io_uring_cmd,libaio,posix,vfio,nil]",
a3ff873e
AK
126 .category = FIO_OPT_C_ENGINE,
127 .group = FIO_OPT_G_XNVME,
128 },
129 {
130 .name = "xnvme_sync",
131 .lname = "xNVMe Synchronous. command-interface",
132 .type = FIO_OPT_STR_STORE,
133 .off1 = offsetof(struct xnvme_fioe_options, xnvme_sync),
203a4c7c 134 .help = "Select xNVMe sync. interface: [nvme,psync,block]",
a3ff873e
AK
135 .category = FIO_OPT_C_ENGINE,
136 .group = FIO_OPT_G_XNVME,
137 },
138 {
139 .name = "xnvme_admin",
140 .lname = "xNVMe Admin command-interface",
141 .type = FIO_OPT_STR_STORE,
142 .off1 = offsetof(struct xnvme_fioe_options, xnvme_admin),
203a4c7c 143 .help = "Select xNVMe admin. cmd-interface: [nvme,block]",
a3ff873e
AK
144 .category = FIO_OPT_C_ENGINE,
145 .group = FIO_OPT_G_XNVME,
146 },
147 {
148 .name = "xnvme_dev_nsid",
149 .lname = "xNVMe Namespace-Identifier, for user-space NVMe driver",
150 .type = FIO_OPT_INT,
151 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_nsid),
152 .help = "xNVMe Namespace-Identifier, for user-space NVMe driver",
153 .category = FIO_OPT_C_ENGINE,
154 .group = FIO_OPT_G_XNVME,
155 },
efbafe2a
AK
156 {
157 .name = "xnvme_dev_subnqn",
158 .lname = "Subsystem nqn for Fabrics",
159 .type = FIO_OPT_STR_STORE,
160 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_subnqn),
161 .help = "Subsystem NQN for Fabrics",
162 .category = FIO_OPT_C_ENGINE,
163 .group = FIO_OPT_G_XNVME,
164 },
a3ff873e
AK
165 {
166 .name = "xnvme_iovec",
167 .lname = "Vectored IOs",
168 .type = FIO_OPT_STR_SET,
169 .off1 = offsetof(struct xnvme_fioe_options, xnvme_iovec),
170 .help = "Send vectored IOs",
171 .category = FIO_OPT_C_ENGINE,
172 .group = FIO_OPT_G_XNVME,
173 },
174
175 {
176 .name = NULL,
177 },
178};
179
180static void cb_pool(struct xnvme_cmd_ctx *ctx, void *cb_arg)
181{
182 struct io_u *io_u = cb_arg;
183 struct xnvme_fioe_data *xd = io_u->mmap_data;
184
185 if (xnvme_cmd_ctx_cpl_status(ctx)) {
186 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
187 xd->ecount += 1;
188 io_u->error = EIO;
189 }
190
191 xd->iocq[xd->completed++] = io_u;
192 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
193}
194
195static struct xnvme_opts xnvme_opts_from_fioe(struct thread_data *td)
196{
197 struct xnvme_fioe_options *o = td->eo;
198 struct xnvme_opts opts = xnvme_opts_default();
199
200 opts.nsid = o->xnvme_dev_nsid;
efbafe2a 201 opts.subnqn = o->xnvme_dev_subnqn;
a3ff873e 202 opts.be = o->xnvme_be;
c945074c 203 opts.mem = o->xnvme_mem;
a3ff873e
AK
204 opts.async = o->xnvme_async;
205 opts.sync = o->xnvme_sync;
206 opts.admin = o->xnvme_admin;
207
208 opts.poll_io = o->hipri;
209 opts.poll_sq = o->sqpoll_thread;
210
211 opts.direct = td->o.odirect;
212
213 return opts;
214}
215
216static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
217{
218 if (fwrap->dev)
219 xnvme_queue_term(fwrap->queue);
220
221 xnvme_dev_close(fwrap->dev);
222
223 memset(fwrap, 0, sizeof(*fwrap));
224}
225
226static void xnvme_fioe_cleanup(struct thread_data *td)
227{
fdac9c68 228 struct xnvme_fioe_data *xd = NULL;
a3ff873e
AK
229 int err;
230
fdac9c68
AK
231 if (!td->io_ops_data)
232 return;
233
234 xd = td->io_ops_data;
235
a3ff873e
AK
236 err = pthread_mutex_lock(&g_serialize);
237 if (err)
238 log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
239 /* NOTE: not returning here */
240
241 for (uint64_t i = 0; i < xd->nallocated; ++i)
242 _dev_close(td, &xd->files[i]);
243
244 if (!err) {
245 err = pthread_mutex_unlock(&g_serialize);
246 if (err)
247 log_err("ioeng->cleanup(): pthread_mutex_unlock(), err(%d)\n", err);
248 }
249
250 free(xd->iocq);
251 free(xd->iovec);
252 free(xd);
253 td->io_ops_data = NULL;
254}
255
256/**
257 * Helper function setting up device handles as addressed by the naming
258 * convention of the given `fio_file` filename.
259 *
260 * Checks thread-options for explicit control of asynchronous implementation via
261 * the ``--xnvme_async={thrpool,emu,posix,io_uring,libaio,nil}``.
262 */
263static int _dev_open(struct thread_data *td, struct fio_file *f)
264{
265 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
266 struct xnvme_fioe_data *xd = td->io_ops_data;
267 struct xnvme_fioe_fwrap *fwrap;
268 int flags = 0;
269 int err;
270
271 if (f->fileno > (int)xd->nallocated) {
272 log_err("ioeng->_dev_open(%s): invalid assumption\n", f->file_name);
273 return 1;
274 }
275
276 fwrap = &xd->files[f->fileno];
277
278 err = pthread_mutex_lock(&g_serialize);
279 if (err) {
280 log_err("ioeng->_dev_open(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
281 err);
282 return -err;
283 }
284
285 fwrap->dev = xnvme_dev_open(f->file_name, &opts);
286 if (!fwrap->dev) {
287 log_err("ioeng->_dev_open(%s): xnvme_dev_open(), err(%d)\n", f->file_name, errno);
288 goto failure;
289 }
290 fwrap->geo = xnvme_dev_get_geo(fwrap->dev);
291
292 if (xnvme_queue_init(fwrap->dev, td->o.iodepth, flags, &(fwrap->queue))) {
293 log_err("ioeng->_dev_open(%s): xnvme_queue_init(), err(?)\n", f->file_name);
294 goto failure;
295 }
296 xnvme_queue_set_cb(fwrap->queue, cb_pool, NULL);
297
298 fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
299 fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
300
301 fwrap->fio_file = f;
302 fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
303 fwrap->fio_file->real_file_size = fwrap->geo->tbytes;
304 fio_file_set_size_known(fwrap->fio_file);
305
306 err = pthread_mutex_unlock(&g_serialize);
307 if (err)
308 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
309 err);
310
311 return 0;
312
313failure:
314 xnvme_queue_term(fwrap->queue);
315 xnvme_dev_close(fwrap->dev);
316
317 err = pthread_mutex_unlock(&g_serialize);
318 if (err)
319 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
320 err);
321
322 return 1;
323}
324
325static int xnvme_fioe_init(struct thread_data *td)
326{
327 struct xnvme_fioe_data *xd = NULL;
328 struct fio_file *f;
329 unsigned int i;
330
331 if (!td->o.use_thread) {
332 log_err("ioeng->init(): --thread=1 is required\n");
333 return 1;
334 }
335
336 /* Allocate xd and iocq */
337 xd = calloc(1, sizeof(*xd) + sizeof(*xd->files) * td->o.nr_files);
338 if (!xd) {
339 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
340 return 1;
341 }
342
343 xd->iocq = calloc(td->o.iodepth, sizeof(struct io_u *));
344 if (!xd->iocq) {
eb3570b5
AK
345 free(xd);
346 log_err("ioeng->init(): !calloc(xd->iocq), err(%d)\n", errno);
a3ff873e
AK
347 return 1;
348 }
349
350 xd->iovec = calloc(td->o.iodepth, sizeof(*xd->iovec));
351 if (!xd->iovec) {
eb3570b5
AK
352 free(xd->iocq);
353 free(xd);
a3ff873e
AK
354 log_err("ioeng->init(): !calloc(xd->iovec), err(%d)\n", errno);
355 return 1;
356 }
357
358 xd->prev = -1;
359 td->io_ops_data = xd;
360
361 for_each_file(td, f, i)
362 {
363 if (_dev_open(td, f)) {
eb3570b5
AK
364 /*
365 * Note: We are not freeing xd, iocq and iovec. This
366 * will be done as part of cleanup routine.
367 */
a3ff873e
AK
368 log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
369 return 1;
370 }
371
372 ++(xd->nallocated);
373 }
374
375 if (xd->nallocated != td->o.nr_files) {
376 log_err("ioeng->init(): failed; nallocated != td->o.nr_files\n");
377 return 1;
378 }
379
380 return 0;
381}
382
383/* NOTE: using the first device for buffer-allocators) */
384static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
385{
386 struct xnvme_fioe_data *xd = td->io_ops_data;
387 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
388
389 if (!fwrap->dev) {
390 log_err("ioeng->iomem_alloc(): failed; no dev-handle\n");
391 return 1;
392 }
393
394 td->orig_buffer = xnvme_buf_alloc(fwrap->dev, total_mem);
395
396 return td->orig_buffer == NULL;
397}
398
399/* NOTE: using the first device for buffer-allocators) */
400static void xnvme_fioe_iomem_free(struct thread_data *td)
401{
fdac9c68
AK
402 struct xnvme_fioe_data *xd = NULL;
403 struct xnvme_fioe_fwrap *fwrap = NULL;
404
405 if (!td->io_ops_data)
406 return;
407
408 xd = td->io_ops_data;
409 fwrap = &xd->files[0];
a3ff873e
AK
410
411 if (!fwrap->dev) {
412 log_err("ioeng->iomem_free(): failed no dev-handle\n");
413 return;
414 }
415
416 xnvme_buf_free(fwrap->dev, td->orig_buffer);
417}
418
419static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
420{
421 io_u->mmap_data = td->io_ops_data;
422
423 return 0;
424}
425
426static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
427{
428 io_u->mmap_data = NULL;
429}
430
431static struct io_u *xnvme_fioe_event(struct thread_data *td, int event)
432{
433 struct xnvme_fioe_data *xd = td->io_ops_data;
434
435 assert(event >= 0);
436 assert((unsigned)event < xd->completed);
437
438 return xd->iocq[event];
439}
440
441static int xnvme_fioe_getevents(struct thread_data *td, unsigned int min, unsigned int max,
442 const struct timespec *t)
443{
444 struct xnvme_fioe_data *xd = td->io_ops_data;
445 struct xnvme_fioe_fwrap *fwrap = NULL;
446 int nfiles = xd->nallocated;
447 int err = 0;
448
449 if (xd->prev != -1 && ++xd->prev < nfiles) {
450 fwrap = &xd->files[xd->prev];
451 xd->cur = xd->prev;
452 }
453
454 xd->completed = 0;
455 for (;;) {
456 if (fwrap == NULL || xd->cur == nfiles) {
457 fwrap = &xd->files[0];
458 xd->cur = 0;
459 }
460
461 while (fwrap != NULL && xd->cur < nfiles && err >= 0) {
462 err = xnvme_queue_poke(fwrap->queue, max - xd->completed);
463 if (err < 0) {
464 switch (err) {
465 case -EBUSY:
466 case -EAGAIN:
467 usleep(1);
468 break;
469
470 default:
471 log_err("ioeng->getevents(): unhandled IO error\n");
472 assert(false);
473 return 0;
474 }
475 }
476 if (xd->completed >= min) {
477 xd->prev = xd->cur;
478 return xd->completed;
479 }
480 xd->cur++;
481 fwrap = &xd->files[xd->cur];
482
483 if (err < 0) {
484 switch (err) {
485 case -EBUSY:
486 case -EAGAIN:
487 usleep(1);
488 break;
489 }
490 }
491 }
492 }
493
494 xd->cur = 0;
495
496 return xd->completed;
497}
498
499static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *io_u)
500{
501 struct xnvme_fioe_data *xd = td->io_ops_data;
502 struct xnvme_fioe_fwrap *fwrap;
503 struct xnvme_cmd_ctx *ctx;
504 uint32_t nsid;
505 uint64_t slba;
506 uint16_t nlb;
507 int err;
508 bool vectored_io = ((struct xnvme_fioe_options *)td->eo)->xnvme_iovec;
e5f3b613 509 uint32_t dir = io_u->dtype;
a3ff873e
AK
510
511 fio_ro_check(td, io_u);
512
513 fwrap = &xd->files[io_u->file->fileno];
514 nsid = xnvme_dev_get_nsid(fwrap->dev);
515
516 slba = io_u->offset >> fwrap->ssw;
517 nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
518
519 ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
520 ctx->async.cb_arg = io_u;
521
522 ctx->cmd.common.nsid = nsid;
523 ctx->cmd.nvm.slba = slba;
524 ctx->cmd.nvm.nlb = nlb;
e5f3b613
AK
525 if (dir) {
526 ctx->cmd.nvm.dtype = io_u->dtype;
527 ctx->cmd.nvm.cdw13.dspec = io_u->dspec;
528 }
a3ff873e
AK
529
530 switch (io_u->ddir) {
531 case DDIR_READ:
532 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
533 break;
534
535 case DDIR_WRITE:
536 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
537 break;
538
539 default:
540 log_err("ioeng->queue(): ENOSYS: %u\n", io_u->ddir);
eb3570b5
AK
541 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
542
543 io_u->error = ENOSYS;
a3ff873e 544 assert(false);
eb3570b5 545 return FIO_Q_COMPLETED;
a3ff873e
AK
546 }
547
548 if (vectored_io) {
549 xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
550 xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
551
552 err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
553 0);
554 } else {
555 err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
556 }
557 switch (err) {
558 case 0:
559 return FIO_Q_QUEUED;
560
561 case -EBUSY:
562 case -EAGAIN:
563 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
564 return FIO_Q_BUSY;
565
566 default:
567 log_err("ioeng->queue(): err: '%d'\n", err);
568
569 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
570
571 io_u->error = abs(err);
572 assert(false);
573 return FIO_Q_COMPLETED;
574 }
575}
576
577static int xnvme_fioe_close(struct thread_data *td, struct fio_file *f)
578{
579 struct xnvme_fioe_data *xd = td->io_ops_data;
580
581 dprint(FD_FILE, "xnvme close %s -- nopen: %ld\n", f->file_name, xd->nopen);
582
583 --(xd->nopen);
584
585 return 0;
586}
587
588static int xnvme_fioe_open(struct thread_data *td, struct fio_file *f)
589{
590 struct xnvme_fioe_data *xd = td->io_ops_data;
591
592 dprint(FD_FILE, "xnvme open %s -- nopen: %ld\n", f->file_name, xd->nopen);
593
594 if (f->fileno > (int)xd->nallocated) {
595 log_err("ioeng->open(): f->fileno > xd->nallocated; invalid assumption\n");
596 return 1;
597 }
598 if (xd->files[f->fileno].fio_file != f) {
599 log_err("ioeng->open(): fio_file != f; invalid assumption\n");
600 return 1;
601 }
602
603 ++(xd->nopen);
604
605 return 0;
606}
607
608static int xnvme_fioe_invalidate(struct thread_data *td, struct fio_file *f)
609{
610 /* Consider only doing this with be:spdk */
611 return 0;
612}
613
614static int xnvme_fioe_get_max_open_zones(struct thread_data *td, struct fio_file *f,
615 unsigned int *max_open_zones)
616{
617 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
618 struct xnvme_dev *dev;
619 const struct xnvme_spec_znd_idfy_ns *zns;
620 int err = 0, err_lock;
621
622 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
623 f->filetype != FIO_TYPE_CHAR) {
624 log_info("ioeng->get_max_open_zoned(): ignoring filetype: %d\n", f->filetype);
625 return 0;
626 }
627 err_lock = pthread_mutex_lock(&g_serialize);
628 if (err_lock) {
629 log_err("ioeng->get_max_open_zones(): pthread_mutex_lock(), err(%d)\n", err_lock);
630 return -err_lock;
631 }
632
633 dev = xnvme_dev_open(f->file_name, &opts);
634 if (!dev) {
635 log_err("ioeng->get_max_open_zones(): xnvme_dev_open(), err(%d)\n", err_lock);
636 err = -errno;
637 goto exit;
638 }
639 if (xnvme_dev_get_geo(dev)->type != XNVME_GEO_ZONED) {
640 errno = EINVAL;
641 err = -errno;
642 goto exit;
643 }
644
645 zns = (void *)xnvme_dev_get_ns_css(dev);
646 if (!zns) {
647 log_err("ioeng->get_max_open_zones(): xnvme_dev_get_ns_css(), err(%d)\n", errno);
648 err = -errno;
649 goto exit;
650 }
651
652 /*
653 * intentional overflow as the value is zero-based and NVMe
654 * defines 0xFFFFFFFF as unlimited thus overflowing to 0 which
655 * is how fio indicates unlimited and otherwise just converting
656 * to one-based.
657 */
658 *max_open_zones = zns->mor + 1;
659
660exit:
661 xnvme_dev_close(dev);
662 err_lock = pthread_mutex_unlock(&g_serialize);
663 if (err_lock)
664 log_err("ioeng->get_max_open_zones(): pthread_mutex_unlock(), err(%d)\n",
665 err_lock);
666
667 return err;
668}
669
670/**
671 * Currently, this function is called before of I/O engine initialization, so,
672 * we cannot consult the file-wrapping done when 'fioe' initializes.
673 * Instead we just open based on the given filename.
674 *
675 * TODO: unify the different setup methods, consider keeping the handle around,
676 * and consider how to support the --be option in this usecase
677 */
678static int xnvme_fioe_get_zoned_model(struct thread_data *td, struct fio_file *f,
679 enum zbd_zoned_model *model)
680{
681 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
682 struct xnvme_dev *dev;
683 int err = 0, err_lock;
684
685 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
686 f->filetype != FIO_TYPE_CHAR) {
687 log_info("ioeng->get_zoned_model(): ignoring filetype: %d\n", f->filetype);
688 return -EINVAL;
689 }
690
691 err = pthread_mutex_lock(&g_serialize);
692 if (err) {
693 log_err("ioeng->get_zoned_model(): pthread_mutex_lock(), err(%d)\n", err);
694 return -err;
695 }
696
697 dev = xnvme_dev_open(f->file_name, &opts);
698 if (!dev) {
699 log_err("ioeng->get_zoned_model(): xnvme_dev_open(%s) failed, errno: %d\n",
700 f->file_name, errno);
701 err = -errno;
702 goto exit;
703 }
704
705 switch (xnvme_dev_get_geo(dev)->type) {
706 case XNVME_GEO_UNKNOWN:
707 dprint(FD_ZBD, "%s: got 'unknown', assigning ZBD_NONE\n", f->file_name);
708 *model = ZBD_NONE;
709 break;
710
711 case XNVME_GEO_CONVENTIONAL:
712 dprint(FD_ZBD, "%s: got 'conventional', assigning ZBD_NONE\n", f->file_name);
713 *model = ZBD_NONE;
714 break;
715
716 case XNVME_GEO_ZONED:
717 dprint(FD_ZBD, "%s: got 'zoned', assigning ZBD_HOST_MANAGED\n", f->file_name);
718 *model = ZBD_HOST_MANAGED;
719 break;
720
721 default:
722 dprint(FD_ZBD, "%s: hit-default, assigning ZBD_NONE\n", f->file_name);
723 *model = ZBD_NONE;
724 errno = EINVAL;
725 err = -errno;
726 break;
727 }
728
729exit:
730 xnvme_dev_close(dev);
731
732 err_lock = pthread_mutex_unlock(&g_serialize);
733 if (err_lock)
734 log_err("ioeng->get_zoned_model(): pthread_mutex_unlock(), err(%d)\n", err_lock);
735
736 return err;
737}
738
739/**
740 * Fills the given ``zbdz`` with at most ``nr_zones`` zone-descriptors.
741 *
742 * The implementation converts the NVMe Zoned Command Set log-pages for Zone
743 * descriptors into the Linux Kernel Zoned Block Report format.
744 *
745 * NOTE: This function is called before I/O engine initialization, that is,
746 * before ``_dev_open`` has been called and file-wrapping is setup. Thus is has
747 * to do the ``_dev_open`` itself, and shut it down again once it is done
748 * retrieving the log-pages and converting them to the report format.
749 *
750 * TODO: unify the different setup methods, consider keeping the handle around,
751 * and consider how to support the --async option in this usecase
752 */
753static int xnvme_fioe_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
754 struct zbd_zone *zbdz, unsigned int nr_zones)
755{
756 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
757 const struct xnvme_spec_znd_idfy_lbafe *lbafe = NULL;
758 struct xnvme_dev *dev = NULL;
759 const struct xnvme_geo *geo = NULL;
760 struct xnvme_znd_report *rprt = NULL;
761 uint32_t ssw;
762 uint64_t slba;
763 unsigned int limit = 0;
764 int err = 0, err_lock;
765
766 dprint(FD_ZBD, "%s: report_zones() offset: %zu, nr_zones: %u\n", f->file_name, offset,
767 nr_zones);
768
769 err = pthread_mutex_lock(&g_serialize);
770 if (err) {
771 log_err("ioeng->report_zones(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
772 err);
773 return -err;
774 }
775
776 dev = xnvme_dev_open(f->file_name, &opts);
777 if (!dev) {
778 log_err("ioeng->report_zones(%s): xnvme_dev_open(), err(%d)\n", f->file_name,
779 errno);
780 goto exit;
781 }
782
783 geo = xnvme_dev_get_geo(dev);
784 ssw = xnvme_dev_get_ssw(dev);
785 lbafe = xnvme_znd_dev_get_lbafe(dev);
786
787 limit = nr_zones > geo->nzone ? geo->nzone : nr_zones;
788
789 dprint(FD_ZBD, "%s: limit: %u\n", f->file_name, limit);
790
791 slba = ((offset >> ssw) / geo->nsect) * geo->nsect;
792
793 rprt = xnvme_znd_report_from_dev(dev, slba, limit, 0);
794 if (!rprt) {
795 log_err("ioeng->report_zones(%s): xnvme_znd_report_from_dev(), err(%d)\n",
796 f->file_name, errno);
797 err = -errno;
798 goto exit;
799 }
800 if (rprt->nentries != limit) {
801 log_err("ioeng->report_zones(%s): nentries != nr_zones\n", f->file_name);
802 err = 1;
803 goto exit;
804 }
805 if (offset > geo->tbytes) {
806 log_err("ioeng->report_zones(%s): out-of-bounds\n", f->file_name);
807 goto exit;
808 }
809
810 /* Transform the zone-report */
811 for (uint32_t idx = 0; idx < rprt->nentries; ++idx) {
812 struct xnvme_spec_znd_descr *descr = XNVME_ZND_REPORT_DESCR(rprt, idx);
813
814 zbdz[idx].start = descr->zslba << ssw;
815 zbdz[idx].len = lbafe->zsze << ssw;
816 zbdz[idx].capacity = descr->zcap << ssw;
817 zbdz[idx].wp = descr->wp << ssw;
818
819 switch (descr->zt) {
820 case XNVME_SPEC_ZND_TYPE_SEQWR:
821 zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
822 break;
823
824 default:
825 log_err("ioeng->report_zones(%s): invalid type for zone at offset(%zu)\n",
826 f->file_name, zbdz[idx].start);
827 err = -EIO;
828 goto exit;
829 }
830
831 switch (descr->zs) {
832 case XNVME_SPEC_ZND_STATE_EMPTY:
833 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
834 break;
835 case XNVME_SPEC_ZND_STATE_IOPEN:
836 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
837 break;
838 case XNVME_SPEC_ZND_STATE_EOPEN:
839 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
840 break;
841 case XNVME_SPEC_ZND_STATE_CLOSED:
842 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
843 break;
844 case XNVME_SPEC_ZND_STATE_FULL:
845 zbdz[idx].cond = ZBD_ZONE_COND_FULL;
846 break;
847
848 case XNVME_SPEC_ZND_STATE_RONLY:
849 case XNVME_SPEC_ZND_STATE_OFFLINE:
850 default:
851 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
852 break;
853 }
854 }
855
856exit:
857 xnvme_buf_virt_free(rprt);
858
859 xnvme_dev_close(dev);
860
861 err_lock = pthread_mutex_unlock(&g_serialize);
862 if (err_lock)
863 log_err("ioeng->report_zones(): pthread_mutex_unlock(), err: %d\n", err_lock);
864
865 dprint(FD_ZBD, "err: %d, nr_zones: %d\n", err, (int)nr_zones);
866
867 return err ? err : (int)limit;
868}
869
870/**
871 * NOTE: This function may get called before I/O engine initialization, that is,
872 * before ``_dev_open`` has been called and file-wrapping is setup. In such
873 * case it has to do ``_dev_open`` itself, and shut it down again once it is
874 * done resetting write pointer of zones.
875 */
876static int xnvme_fioe_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset,
877 uint64_t length)
878{
879 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
880 struct xnvme_fioe_data *xd = NULL;
881 struct xnvme_fioe_fwrap *fwrap = NULL;
882 struct xnvme_dev *dev = NULL;
883 const struct xnvme_geo *geo = NULL;
884 uint64_t first, last;
885 uint32_t ssw;
886 uint32_t nsid;
887 int err = 0, err_lock;
888
889 if (td->io_ops_data) {
890 xd = td->io_ops_data;
891 fwrap = &xd->files[f->fileno];
892
893 assert(fwrap->dev);
894 assert(fwrap->geo);
895
896 dev = fwrap->dev;
897 geo = fwrap->geo;
898 ssw = fwrap->ssw;
899 } else {
900 err = pthread_mutex_lock(&g_serialize);
901 if (err) {
902 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", err);
903 return -err;
904 }
905
906 dev = xnvme_dev_open(f->file_name, &opts);
907 if (!dev) {
908 log_err("ioeng->reset_wp(): xnvme_dev_open(%s) failed, errno(%d)\n",
909 f->file_name, errno);
910 goto exit;
911 }
912 geo = xnvme_dev_get_geo(dev);
913 ssw = xnvme_dev_get_ssw(dev);
914 }
915
916 nsid = xnvme_dev_get_nsid(dev);
917
918 first = ((offset >> ssw) / geo->nsect) * geo->nsect;
919 last = (((offset + length) >> ssw) / geo->nsect) * geo->nsect;
920 dprint(FD_ZBD, "first: 0x%lx, last: 0x%lx\n", first, last);
921
922 for (uint64_t zslba = first; zslba < last; zslba += geo->nsect) {
923 struct xnvme_cmd_ctx ctx = xnvme_cmd_ctx_from_dev(dev);
924
925 if (zslba >= (geo->nsect * geo->nzone)) {
926 log_err("ioeng->reset_wp(): out-of-bounds\n");
927 err = 0;
928 break;
929 }
930
931 err = xnvme_znd_mgmt_send(&ctx, nsid, zslba, false,
932 XNVME_SPEC_ZND_CMD_MGMT_SEND_RESET, 0x0, NULL);
933 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
934 err = err ? err : -EIO;
935 log_err("ioeng->reset_wp(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
936 goto exit;
937 }
938 }
939
940exit:
941 if (!td->io_ops_data) {
942 xnvme_dev_close(dev);
943
944 err_lock = pthread_mutex_unlock(&g_serialize);
945 if (err_lock)
946 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err_lock);
947 }
948
949 return err;
950}
951
e5f3b613
AK
952static int xnvme_fioe_fetch_ruhs(struct thread_data *td, struct fio_file *f,
953 struct fio_ruhs_info *fruhs_info)
954{
955 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
956 struct xnvme_dev *dev;
957 struct xnvme_spec_ruhs *ruhs;
958 struct xnvme_cmd_ctx ctx;
959 uint32_t ruhs_nbytes;
960 uint32_t nsid;
961 int err = 0, err_lock;
962
48cf0c63 963 if (f->filetype != FIO_TYPE_CHAR && f->filetype != FIO_TYPE_FILE) {
e5f3b613
AK
964 log_err("ioeng->fdp_ruhs(): ignoring filetype: %d\n", f->filetype);
965 return -EINVAL;
966 }
967
968 err = pthread_mutex_lock(&g_serialize);
969 if (err) {
970 log_err("ioeng->fdp_ruhs(): pthread_mutex_lock(), err(%d)\n", err);
971 return -err;
972 }
973
974 dev = xnvme_dev_open(f->file_name, &opts);
975 if (!dev) {
976 log_err("ioeng->fdp_ruhs(): xnvme_dev_open(%s) failed, errno: %d\n",
977 f->file_name, errno);
978 err = -errno;
979 goto exit;
980 }
981
982 ruhs_nbytes = sizeof(*ruhs) + (FDP_MAX_RUHS * sizeof(struct xnvme_spec_ruhs_desc));
983 ruhs = xnvme_buf_alloc(dev, ruhs_nbytes);
984 if (!ruhs) {
985 err = -errno;
986 goto exit;
987 }
988 memset(ruhs, 0, ruhs_nbytes);
989
990 ctx = xnvme_cmd_ctx_from_dev(dev);
991 nsid = xnvme_dev_get_nsid(dev);
992
993 err = xnvme_nvm_mgmt_recv(&ctx, nsid, XNVME_SPEC_IO_MGMT_RECV_RUHS, 0, ruhs, ruhs_nbytes);
994
995 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
996 err = err ? err : -EIO;
997 log_err("ioeng->fdp_ruhs(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
998 goto free_buffer;
999 }
1000
1001 fruhs_info->nr_ruhs = ruhs->nruhsd;
1002 for (uint32_t idx = 0; idx < fruhs_info->nr_ruhs; ++idx) {
1003 fruhs_info->plis[idx] = le16_to_cpu(ruhs->desc[idx].pi);
1004 }
1005
1006free_buffer:
1007 xnvme_buf_free(dev, ruhs);
1008exit:
1009 xnvme_dev_close(dev);
1010
1011 err_lock = pthread_mutex_unlock(&g_serialize);
1012 if (err_lock)
1013 log_err("ioeng->fdp_ruhs(): pthread_mutex_unlock(), err(%d)\n", err_lock);
1014
1015 return err;
1016}
1017
a3ff873e
AK
1018static int xnvme_fioe_get_file_size(struct thread_data *td, struct fio_file *f)
1019{
1020 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
1021 struct xnvme_dev *dev;
1022 int ret = 0, err;
1023
1024 if (fio_file_size_known(f))
1025 return 0;
1026
1027 ret = pthread_mutex_lock(&g_serialize);
1028 if (ret) {
1029 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", ret);
1030 return -ret;
1031 }
1032
1033 dev = xnvme_dev_open(f->file_name, &opts);
1034 if (!dev) {
1035 log_err("%s: failed retrieving device handle, errno: %d\n", f->file_name, errno);
1036 ret = -errno;
1037 goto exit;
1038 }
1039
1040 f->real_file_size = xnvme_dev_get_geo(dev)->tbytes;
1041 fio_file_set_size_known(f);
e5f3b613
AK
1042
1043 if (td->o.zone_mode == ZONE_MODE_ZBD)
1044 f->filetype = FIO_TYPE_BLOCK;
a3ff873e
AK
1045
1046exit:
1047 xnvme_dev_close(dev);
1048 err = pthread_mutex_unlock(&g_serialize);
1049 if (err)
1050 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err);
1051
1052 return ret;
1053}
1054
1055FIO_STATIC struct ioengine_ops ioengine = {
1056 .name = "xnvme",
1057 .version = FIO_IOOPS_VERSION,
1058 .options = options,
1059 .option_struct_size = sizeof(struct xnvme_fioe_options),
1060 .flags = FIO_DISKLESSIO | FIO_NODISKUTIL | FIO_NOEXTEND | FIO_MEMALIGN | FIO_RAWIO,
1061
1062 .cleanup = xnvme_fioe_cleanup,
1063 .init = xnvme_fioe_init,
1064
1065 .iomem_free = xnvme_fioe_iomem_free,
1066 .iomem_alloc = xnvme_fioe_iomem_alloc,
1067
1068 .io_u_free = xnvme_fioe_io_u_free,
1069 .io_u_init = xnvme_fioe_io_u_init,
1070
1071 .event = xnvme_fioe_event,
1072 .getevents = xnvme_fioe_getevents,
1073 .queue = xnvme_fioe_queue,
1074
1075 .close_file = xnvme_fioe_close,
1076 .open_file = xnvme_fioe_open,
1077 .get_file_size = xnvme_fioe_get_file_size,
1078
1079 .invalidate = xnvme_fioe_invalidate,
1080 .get_max_open_zones = xnvme_fioe_get_max_open_zones,
1081 .get_zoned_model = xnvme_fioe_get_zoned_model,
1082 .report_zones = xnvme_fioe_report_zones,
1083 .reset_wp = xnvme_fioe_reset_wp,
e5f3b613
AK
1084
1085 .fdp_fetch_ruhs = xnvme_fioe_fetch_ruhs,
a3ff873e
AK
1086};
1087
1088static void fio_init fio_xnvme_register(void)
1089{
1090 register_ioengine(&ioengine);
1091}
1092
1093static void fio_exit fio_xnvme_unregister(void)
1094{
1095 unregister_ioengine(&ioengine);
1096}