t/run-fio-tests: relax acceptance criteria for t0008
[fio.git] / engines / xnvme.c
CommitLineData
a3ff873e
AK
1/*
2 * fio xNVMe IO Engine
3 *
4 * IO engine using the xNVMe C API.
5 *
6 * See: http://xnvme.io/
7 *
8 * SPDX-License-Identifier: Apache-2.0
9 */
10#include <stdlib.h>
11#include <assert.h>
12#include <libxnvme.h>
13#include <libxnvme_libconf.h>
14#include <libxnvme_nvm.h>
15#include <libxnvme_znd.h>
16#include <libxnvme_spec_fs.h>
17#include "fio.h"
18#include "zbd_types.h"
19#include "optgroup.h"
20
21static pthread_mutex_t g_serialize = PTHREAD_MUTEX_INITIALIZER;
22
23struct xnvme_fioe_fwrap {
24 /* fio file representation */
25 struct fio_file *fio_file;
26
27 /* xNVMe device handle */
28 struct xnvme_dev *dev;
29 /* xNVMe device geometry */
30 const struct xnvme_geo *geo;
31
32 struct xnvme_queue *queue;
33
34 uint32_t ssw;
35 uint32_t lba_nbytes;
36
37 uint8_t _pad[24];
38};
39XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
40
41struct xnvme_fioe_data {
42 /* I/O completion queue */
43 struct io_u **iocq;
44
45 /* # of iocq entries; incremented via getevents()/cb_pool() */
46 uint64_t completed;
47
48 /*
49 * # of errors; incremented when observed on completion via
50 * getevents()/cb_pool()
51 */
52 uint64_t ecount;
53
54 /* Controller which device/file to select */
55 int32_t prev;
56 int32_t cur;
57
58 /* Number of devices/files for which open() has been called */
59 int64_t nopen;
60 /* Number of devices/files allocated in files[] */
61 uint64_t nallocated;
62
63 struct iovec *iovec;
64
65 uint8_t _pad[8];
66
67 struct xnvme_fioe_fwrap files[];
68};
69XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
70
71struct xnvme_fioe_options {
72 void *padding;
73 unsigned int hipri;
74 unsigned int sqpoll_thread;
75 unsigned int xnvme_dev_nsid;
76 unsigned int xnvme_iovec;
77 char *xnvme_be;
78 char *xnvme_async;
79 char *xnvme_sync;
80 char *xnvme_admin;
81};
82
83static struct fio_option options[] = {
84 {
85 .name = "hipri",
86 .lname = "High Priority",
87 .type = FIO_OPT_STR_SET,
88 .off1 = offsetof(struct xnvme_fioe_options, hipri),
89 .help = "Use polled IO completions",
90 .category = FIO_OPT_C_ENGINE,
91 .group = FIO_OPT_G_XNVME,
92 },
93 {
94 .name = "sqthread_poll",
95 .lname = "Kernel SQ thread polling",
96 .type = FIO_OPT_STR_SET,
97 .off1 = offsetof(struct xnvme_fioe_options, sqpoll_thread),
98 .help = "Offload submission/completion to kernel thread",
99 .category = FIO_OPT_C_ENGINE,
100 .group = FIO_OPT_G_XNVME,
101 },
102 {
103 .name = "xnvme_be",
104 .lname = "xNVMe Backend",
105 .type = FIO_OPT_STR_STORE,
106 .off1 = offsetof(struct xnvme_fioe_options, xnvme_be),
107 .help = "Select xNVMe backend [spdk,linux,fbsd]",
108 .category = FIO_OPT_C_ENGINE,
109 .group = FIO_OPT_G_XNVME,
110 },
111 {
112 .name = "xnvme_async",
113 .lname = "xNVMe Asynchronous command-interface",
114 .type = FIO_OPT_STR_STORE,
115 .off1 = offsetof(struct xnvme_fioe_options, xnvme_async),
116 .help = "Select xNVMe async. interface: [emu,thrpool,io_uring,libaio,posix,nil]",
117 .category = FIO_OPT_C_ENGINE,
118 .group = FIO_OPT_G_XNVME,
119 },
120 {
121 .name = "xnvme_sync",
122 .lname = "xNVMe Synchronous. command-interface",
123 .type = FIO_OPT_STR_STORE,
124 .off1 = offsetof(struct xnvme_fioe_options, xnvme_sync),
125 .help = "Select xNVMe sync. interface: [nvme,psync]",
126 .category = FIO_OPT_C_ENGINE,
127 .group = FIO_OPT_G_XNVME,
128 },
129 {
130 .name = "xnvme_admin",
131 .lname = "xNVMe Admin command-interface",
132 .type = FIO_OPT_STR_STORE,
133 .off1 = offsetof(struct xnvme_fioe_options, xnvme_admin),
134 .help = "Select xNVMe admin. cmd-interface: [nvme,block,file_as_ns]",
135 .category = FIO_OPT_C_ENGINE,
136 .group = FIO_OPT_G_XNVME,
137 },
138 {
139 .name = "xnvme_dev_nsid",
140 .lname = "xNVMe Namespace-Identifier, for user-space NVMe driver",
141 .type = FIO_OPT_INT,
142 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_nsid),
143 .help = "xNVMe Namespace-Identifier, for user-space NVMe driver",
144 .category = FIO_OPT_C_ENGINE,
145 .group = FIO_OPT_G_XNVME,
146 },
147 {
148 .name = "xnvme_iovec",
149 .lname = "Vectored IOs",
150 .type = FIO_OPT_STR_SET,
151 .off1 = offsetof(struct xnvme_fioe_options, xnvme_iovec),
152 .help = "Send vectored IOs",
153 .category = FIO_OPT_C_ENGINE,
154 .group = FIO_OPT_G_XNVME,
155 },
156
157 {
158 .name = NULL,
159 },
160};
161
162static void cb_pool(struct xnvme_cmd_ctx *ctx, void *cb_arg)
163{
164 struct io_u *io_u = cb_arg;
165 struct xnvme_fioe_data *xd = io_u->mmap_data;
166
167 if (xnvme_cmd_ctx_cpl_status(ctx)) {
168 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
169 xd->ecount += 1;
170 io_u->error = EIO;
171 }
172
173 xd->iocq[xd->completed++] = io_u;
174 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
175}
176
177static struct xnvme_opts xnvme_opts_from_fioe(struct thread_data *td)
178{
179 struct xnvme_fioe_options *o = td->eo;
180 struct xnvme_opts opts = xnvme_opts_default();
181
182 opts.nsid = o->xnvme_dev_nsid;
183 opts.be = o->xnvme_be;
184 opts.async = o->xnvme_async;
185 opts.sync = o->xnvme_sync;
186 opts.admin = o->xnvme_admin;
187
188 opts.poll_io = o->hipri;
189 opts.poll_sq = o->sqpoll_thread;
190
191 opts.direct = td->o.odirect;
192
193 return opts;
194}
195
196static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
197{
198 if (fwrap->dev)
199 xnvme_queue_term(fwrap->queue);
200
201 xnvme_dev_close(fwrap->dev);
202
203 memset(fwrap, 0, sizeof(*fwrap));
204}
205
206static void xnvme_fioe_cleanup(struct thread_data *td)
207{
fdac9c68 208 struct xnvme_fioe_data *xd = NULL;
a3ff873e
AK
209 int err;
210
fdac9c68
AK
211 if (!td->io_ops_data)
212 return;
213
214 xd = td->io_ops_data;
215
a3ff873e
AK
216 err = pthread_mutex_lock(&g_serialize);
217 if (err)
218 log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
219 /* NOTE: not returning here */
220
221 for (uint64_t i = 0; i < xd->nallocated; ++i)
222 _dev_close(td, &xd->files[i]);
223
224 if (!err) {
225 err = pthread_mutex_unlock(&g_serialize);
226 if (err)
227 log_err("ioeng->cleanup(): pthread_mutex_unlock(), err(%d)\n", err);
228 }
229
230 free(xd->iocq);
231 free(xd->iovec);
232 free(xd);
233 td->io_ops_data = NULL;
234}
235
236/**
237 * Helper function setting up device handles as addressed by the naming
238 * convention of the given `fio_file` filename.
239 *
240 * Checks thread-options for explicit control of asynchronous implementation via
241 * the ``--xnvme_async={thrpool,emu,posix,io_uring,libaio,nil}``.
242 */
243static int _dev_open(struct thread_data *td, struct fio_file *f)
244{
245 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
246 struct xnvme_fioe_data *xd = td->io_ops_data;
247 struct xnvme_fioe_fwrap *fwrap;
248 int flags = 0;
249 int err;
250
251 if (f->fileno > (int)xd->nallocated) {
252 log_err("ioeng->_dev_open(%s): invalid assumption\n", f->file_name);
253 return 1;
254 }
255
256 fwrap = &xd->files[f->fileno];
257
258 err = pthread_mutex_lock(&g_serialize);
259 if (err) {
260 log_err("ioeng->_dev_open(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
261 err);
262 return -err;
263 }
264
265 fwrap->dev = xnvme_dev_open(f->file_name, &opts);
266 if (!fwrap->dev) {
267 log_err("ioeng->_dev_open(%s): xnvme_dev_open(), err(%d)\n", f->file_name, errno);
268 goto failure;
269 }
270 fwrap->geo = xnvme_dev_get_geo(fwrap->dev);
271
272 if (xnvme_queue_init(fwrap->dev, td->o.iodepth, flags, &(fwrap->queue))) {
273 log_err("ioeng->_dev_open(%s): xnvme_queue_init(), err(?)\n", f->file_name);
274 goto failure;
275 }
276 xnvme_queue_set_cb(fwrap->queue, cb_pool, NULL);
277
278 fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
279 fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
280
281 fwrap->fio_file = f;
282 fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
283 fwrap->fio_file->real_file_size = fwrap->geo->tbytes;
284 fio_file_set_size_known(fwrap->fio_file);
285
286 err = pthread_mutex_unlock(&g_serialize);
287 if (err)
288 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
289 err);
290
291 return 0;
292
293failure:
294 xnvme_queue_term(fwrap->queue);
295 xnvme_dev_close(fwrap->dev);
296
297 err = pthread_mutex_unlock(&g_serialize);
298 if (err)
299 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
300 err);
301
302 return 1;
303}
304
305static int xnvme_fioe_init(struct thread_data *td)
306{
307 struct xnvme_fioe_data *xd = NULL;
308 struct fio_file *f;
309 unsigned int i;
310
311 if (!td->o.use_thread) {
312 log_err("ioeng->init(): --thread=1 is required\n");
313 return 1;
314 }
315
316 /* Allocate xd and iocq */
317 xd = calloc(1, sizeof(*xd) + sizeof(*xd->files) * td->o.nr_files);
318 if (!xd) {
319 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
320 return 1;
321 }
322
323 xd->iocq = calloc(td->o.iodepth, sizeof(struct io_u *));
324 if (!xd->iocq) {
325 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
326 return 1;
327 }
328
329 xd->iovec = calloc(td->o.iodepth, sizeof(*xd->iovec));
330 if (!xd->iovec) {
331 log_err("ioeng->init(): !calloc(xd->iovec), err(%d)\n", errno);
332 return 1;
333 }
334
335 xd->prev = -1;
336 td->io_ops_data = xd;
337
338 for_each_file(td, f, i)
339 {
340 if (_dev_open(td, f)) {
341 log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
342 return 1;
343 }
344
345 ++(xd->nallocated);
346 }
347
348 if (xd->nallocated != td->o.nr_files) {
349 log_err("ioeng->init(): failed; nallocated != td->o.nr_files\n");
350 return 1;
351 }
352
353 return 0;
354}
355
356/* NOTE: using the first device for buffer-allocators) */
357static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
358{
359 struct xnvme_fioe_data *xd = td->io_ops_data;
360 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
361
362 if (!fwrap->dev) {
363 log_err("ioeng->iomem_alloc(): failed; no dev-handle\n");
364 return 1;
365 }
366
367 td->orig_buffer = xnvme_buf_alloc(fwrap->dev, total_mem);
368
369 return td->orig_buffer == NULL;
370}
371
372/* NOTE: using the first device for buffer-allocators) */
373static void xnvme_fioe_iomem_free(struct thread_data *td)
374{
fdac9c68
AK
375 struct xnvme_fioe_data *xd = NULL;
376 struct xnvme_fioe_fwrap *fwrap = NULL;
377
378 if (!td->io_ops_data)
379 return;
380
381 xd = td->io_ops_data;
382 fwrap = &xd->files[0];
a3ff873e
AK
383
384 if (!fwrap->dev) {
385 log_err("ioeng->iomem_free(): failed no dev-handle\n");
386 return;
387 }
388
389 xnvme_buf_free(fwrap->dev, td->orig_buffer);
390}
391
392static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
393{
394 io_u->mmap_data = td->io_ops_data;
395
396 return 0;
397}
398
399static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
400{
401 io_u->mmap_data = NULL;
402}
403
404static struct io_u *xnvme_fioe_event(struct thread_data *td, int event)
405{
406 struct xnvme_fioe_data *xd = td->io_ops_data;
407
408 assert(event >= 0);
409 assert((unsigned)event < xd->completed);
410
411 return xd->iocq[event];
412}
413
414static int xnvme_fioe_getevents(struct thread_data *td, unsigned int min, unsigned int max,
415 const struct timespec *t)
416{
417 struct xnvme_fioe_data *xd = td->io_ops_data;
418 struct xnvme_fioe_fwrap *fwrap = NULL;
419 int nfiles = xd->nallocated;
420 int err = 0;
421
422 if (xd->prev != -1 && ++xd->prev < nfiles) {
423 fwrap = &xd->files[xd->prev];
424 xd->cur = xd->prev;
425 }
426
427 xd->completed = 0;
428 for (;;) {
429 if (fwrap == NULL || xd->cur == nfiles) {
430 fwrap = &xd->files[0];
431 xd->cur = 0;
432 }
433
434 while (fwrap != NULL && xd->cur < nfiles && err >= 0) {
435 err = xnvme_queue_poke(fwrap->queue, max - xd->completed);
436 if (err < 0) {
437 switch (err) {
438 case -EBUSY:
439 case -EAGAIN:
440 usleep(1);
441 break;
442
443 default:
444 log_err("ioeng->getevents(): unhandled IO error\n");
445 assert(false);
446 return 0;
447 }
448 }
449 if (xd->completed >= min) {
450 xd->prev = xd->cur;
451 return xd->completed;
452 }
453 xd->cur++;
454 fwrap = &xd->files[xd->cur];
455
456 if (err < 0) {
457 switch (err) {
458 case -EBUSY:
459 case -EAGAIN:
460 usleep(1);
461 break;
462 }
463 }
464 }
465 }
466
467 xd->cur = 0;
468
469 return xd->completed;
470}
471
472static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *io_u)
473{
474 struct xnvme_fioe_data *xd = td->io_ops_data;
475 struct xnvme_fioe_fwrap *fwrap;
476 struct xnvme_cmd_ctx *ctx;
477 uint32_t nsid;
478 uint64_t slba;
479 uint16_t nlb;
480 int err;
481 bool vectored_io = ((struct xnvme_fioe_options *)td->eo)->xnvme_iovec;
482
483 fio_ro_check(td, io_u);
484
485 fwrap = &xd->files[io_u->file->fileno];
486 nsid = xnvme_dev_get_nsid(fwrap->dev);
487
488 slba = io_u->offset >> fwrap->ssw;
489 nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
490
491 ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
492 ctx->async.cb_arg = io_u;
493
494 ctx->cmd.common.nsid = nsid;
495 ctx->cmd.nvm.slba = slba;
496 ctx->cmd.nvm.nlb = nlb;
497
498 switch (io_u->ddir) {
499 case DDIR_READ:
500 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
501 break;
502
503 case DDIR_WRITE:
504 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
505 break;
506
507 default:
508 log_err("ioeng->queue(): ENOSYS: %u\n", io_u->ddir);
509 err = -1;
510 assert(false);
511 break;
512 }
513
514 if (vectored_io) {
515 xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
516 xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
517
518 err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
519 0);
520 } else {
521 err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
522 }
523 switch (err) {
524 case 0:
525 return FIO_Q_QUEUED;
526
527 case -EBUSY:
528 case -EAGAIN:
529 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
530 return FIO_Q_BUSY;
531
532 default:
533 log_err("ioeng->queue(): err: '%d'\n", err);
534
535 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
536
537 io_u->error = abs(err);
538 assert(false);
539 return FIO_Q_COMPLETED;
540 }
541}
542
543static int xnvme_fioe_close(struct thread_data *td, struct fio_file *f)
544{
545 struct xnvme_fioe_data *xd = td->io_ops_data;
546
547 dprint(FD_FILE, "xnvme close %s -- nopen: %ld\n", f->file_name, xd->nopen);
548
549 --(xd->nopen);
550
551 return 0;
552}
553
554static int xnvme_fioe_open(struct thread_data *td, struct fio_file *f)
555{
556 struct xnvme_fioe_data *xd = td->io_ops_data;
557
558 dprint(FD_FILE, "xnvme open %s -- nopen: %ld\n", f->file_name, xd->nopen);
559
560 if (f->fileno > (int)xd->nallocated) {
561 log_err("ioeng->open(): f->fileno > xd->nallocated; invalid assumption\n");
562 return 1;
563 }
564 if (xd->files[f->fileno].fio_file != f) {
565 log_err("ioeng->open(): fio_file != f; invalid assumption\n");
566 return 1;
567 }
568
569 ++(xd->nopen);
570
571 return 0;
572}
573
574static int xnvme_fioe_invalidate(struct thread_data *td, struct fio_file *f)
575{
576 /* Consider only doing this with be:spdk */
577 return 0;
578}
579
580static int xnvme_fioe_get_max_open_zones(struct thread_data *td, struct fio_file *f,
581 unsigned int *max_open_zones)
582{
583 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
584 struct xnvme_dev *dev;
585 const struct xnvme_spec_znd_idfy_ns *zns;
586 int err = 0, err_lock;
587
588 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
589 f->filetype != FIO_TYPE_CHAR) {
590 log_info("ioeng->get_max_open_zoned(): ignoring filetype: %d\n", f->filetype);
591 return 0;
592 }
593 err_lock = pthread_mutex_lock(&g_serialize);
594 if (err_lock) {
595 log_err("ioeng->get_max_open_zones(): pthread_mutex_lock(), err(%d)\n", err_lock);
596 return -err_lock;
597 }
598
599 dev = xnvme_dev_open(f->file_name, &opts);
600 if (!dev) {
601 log_err("ioeng->get_max_open_zones(): xnvme_dev_open(), err(%d)\n", err_lock);
602 err = -errno;
603 goto exit;
604 }
605 if (xnvme_dev_get_geo(dev)->type != XNVME_GEO_ZONED) {
606 errno = EINVAL;
607 err = -errno;
608 goto exit;
609 }
610
611 zns = (void *)xnvme_dev_get_ns_css(dev);
612 if (!zns) {
613 log_err("ioeng->get_max_open_zones(): xnvme_dev_get_ns_css(), err(%d)\n", errno);
614 err = -errno;
615 goto exit;
616 }
617
618 /*
619 * intentional overflow as the value is zero-based and NVMe
620 * defines 0xFFFFFFFF as unlimited thus overflowing to 0 which
621 * is how fio indicates unlimited and otherwise just converting
622 * to one-based.
623 */
624 *max_open_zones = zns->mor + 1;
625
626exit:
627 xnvme_dev_close(dev);
628 err_lock = pthread_mutex_unlock(&g_serialize);
629 if (err_lock)
630 log_err("ioeng->get_max_open_zones(): pthread_mutex_unlock(), err(%d)\n",
631 err_lock);
632
633 return err;
634}
635
636/**
637 * Currently, this function is called before of I/O engine initialization, so,
638 * we cannot consult the file-wrapping done when 'fioe' initializes.
639 * Instead we just open based on the given filename.
640 *
641 * TODO: unify the different setup methods, consider keeping the handle around,
642 * and consider how to support the --be option in this usecase
643 */
644static int xnvme_fioe_get_zoned_model(struct thread_data *td, struct fio_file *f,
645 enum zbd_zoned_model *model)
646{
647 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
648 struct xnvme_dev *dev;
649 int err = 0, err_lock;
650
651 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
652 f->filetype != FIO_TYPE_CHAR) {
653 log_info("ioeng->get_zoned_model(): ignoring filetype: %d\n", f->filetype);
654 return -EINVAL;
655 }
656
657 err = pthread_mutex_lock(&g_serialize);
658 if (err) {
659 log_err("ioeng->get_zoned_model(): pthread_mutex_lock(), err(%d)\n", err);
660 return -err;
661 }
662
663 dev = xnvme_dev_open(f->file_name, &opts);
664 if (!dev) {
665 log_err("ioeng->get_zoned_model(): xnvme_dev_open(%s) failed, errno: %d\n",
666 f->file_name, errno);
667 err = -errno;
668 goto exit;
669 }
670
671 switch (xnvme_dev_get_geo(dev)->type) {
672 case XNVME_GEO_UNKNOWN:
673 dprint(FD_ZBD, "%s: got 'unknown', assigning ZBD_NONE\n", f->file_name);
674 *model = ZBD_NONE;
675 break;
676
677 case XNVME_GEO_CONVENTIONAL:
678 dprint(FD_ZBD, "%s: got 'conventional', assigning ZBD_NONE\n", f->file_name);
679 *model = ZBD_NONE;
680 break;
681
682 case XNVME_GEO_ZONED:
683 dprint(FD_ZBD, "%s: got 'zoned', assigning ZBD_HOST_MANAGED\n", f->file_name);
684 *model = ZBD_HOST_MANAGED;
685 break;
686
687 default:
688 dprint(FD_ZBD, "%s: hit-default, assigning ZBD_NONE\n", f->file_name);
689 *model = ZBD_NONE;
690 errno = EINVAL;
691 err = -errno;
692 break;
693 }
694
695exit:
696 xnvme_dev_close(dev);
697
698 err_lock = pthread_mutex_unlock(&g_serialize);
699 if (err_lock)
700 log_err("ioeng->get_zoned_model(): pthread_mutex_unlock(), err(%d)\n", err_lock);
701
702 return err;
703}
704
705/**
706 * Fills the given ``zbdz`` with at most ``nr_zones`` zone-descriptors.
707 *
708 * The implementation converts the NVMe Zoned Command Set log-pages for Zone
709 * descriptors into the Linux Kernel Zoned Block Report format.
710 *
711 * NOTE: This function is called before I/O engine initialization, that is,
712 * before ``_dev_open`` has been called and file-wrapping is setup. Thus is has
713 * to do the ``_dev_open`` itself, and shut it down again once it is done
714 * retrieving the log-pages and converting them to the report format.
715 *
716 * TODO: unify the different setup methods, consider keeping the handle around,
717 * and consider how to support the --async option in this usecase
718 */
719static int xnvme_fioe_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
720 struct zbd_zone *zbdz, unsigned int nr_zones)
721{
722 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
723 const struct xnvme_spec_znd_idfy_lbafe *lbafe = NULL;
724 struct xnvme_dev *dev = NULL;
725 const struct xnvme_geo *geo = NULL;
726 struct xnvme_znd_report *rprt = NULL;
727 uint32_t ssw;
728 uint64_t slba;
729 unsigned int limit = 0;
730 int err = 0, err_lock;
731
732 dprint(FD_ZBD, "%s: report_zones() offset: %zu, nr_zones: %u\n", f->file_name, offset,
733 nr_zones);
734
735 err = pthread_mutex_lock(&g_serialize);
736 if (err) {
737 log_err("ioeng->report_zones(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
738 err);
739 return -err;
740 }
741
742 dev = xnvme_dev_open(f->file_name, &opts);
743 if (!dev) {
744 log_err("ioeng->report_zones(%s): xnvme_dev_open(), err(%d)\n", f->file_name,
745 errno);
746 goto exit;
747 }
748
749 geo = xnvme_dev_get_geo(dev);
750 ssw = xnvme_dev_get_ssw(dev);
751 lbafe = xnvme_znd_dev_get_lbafe(dev);
752
753 limit = nr_zones > geo->nzone ? geo->nzone : nr_zones;
754
755 dprint(FD_ZBD, "%s: limit: %u\n", f->file_name, limit);
756
757 slba = ((offset >> ssw) / geo->nsect) * geo->nsect;
758
759 rprt = xnvme_znd_report_from_dev(dev, slba, limit, 0);
760 if (!rprt) {
761 log_err("ioeng->report_zones(%s): xnvme_znd_report_from_dev(), err(%d)\n",
762 f->file_name, errno);
763 err = -errno;
764 goto exit;
765 }
766 if (rprt->nentries != limit) {
767 log_err("ioeng->report_zones(%s): nentries != nr_zones\n", f->file_name);
768 err = 1;
769 goto exit;
770 }
771 if (offset > geo->tbytes) {
772 log_err("ioeng->report_zones(%s): out-of-bounds\n", f->file_name);
773 goto exit;
774 }
775
776 /* Transform the zone-report */
777 for (uint32_t idx = 0; idx < rprt->nentries; ++idx) {
778 struct xnvme_spec_znd_descr *descr = XNVME_ZND_REPORT_DESCR(rprt, idx);
779
780 zbdz[idx].start = descr->zslba << ssw;
781 zbdz[idx].len = lbafe->zsze << ssw;
782 zbdz[idx].capacity = descr->zcap << ssw;
783 zbdz[idx].wp = descr->wp << ssw;
784
785 switch (descr->zt) {
786 case XNVME_SPEC_ZND_TYPE_SEQWR:
787 zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
788 break;
789
790 default:
791 log_err("ioeng->report_zones(%s): invalid type for zone at offset(%zu)\n",
792 f->file_name, zbdz[idx].start);
793 err = -EIO;
794 goto exit;
795 }
796
797 switch (descr->zs) {
798 case XNVME_SPEC_ZND_STATE_EMPTY:
799 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
800 break;
801 case XNVME_SPEC_ZND_STATE_IOPEN:
802 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
803 break;
804 case XNVME_SPEC_ZND_STATE_EOPEN:
805 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
806 break;
807 case XNVME_SPEC_ZND_STATE_CLOSED:
808 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
809 break;
810 case XNVME_SPEC_ZND_STATE_FULL:
811 zbdz[idx].cond = ZBD_ZONE_COND_FULL;
812 break;
813
814 case XNVME_SPEC_ZND_STATE_RONLY:
815 case XNVME_SPEC_ZND_STATE_OFFLINE:
816 default:
817 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
818 break;
819 }
820 }
821
822exit:
823 xnvme_buf_virt_free(rprt);
824
825 xnvme_dev_close(dev);
826
827 err_lock = pthread_mutex_unlock(&g_serialize);
828 if (err_lock)
829 log_err("ioeng->report_zones(): pthread_mutex_unlock(), err: %d\n", err_lock);
830
831 dprint(FD_ZBD, "err: %d, nr_zones: %d\n", err, (int)nr_zones);
832
833 return err ? err : (int)limit;
834}
835
836/**
837 * NOTE: This function may get called before I/O engine initialization, that is,
838 * before ``_dev_open`` has been called and file-wrapping is setup. In such
839 * case it has to do ``_dev_open`` itself, and shut it down again once it is
840 * done resetting write pointer of zones.
841 */
842static int xnvme_fioe_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset,
843 uint64_t length)
844{
845 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
846 struct xnvme_fioe_data *xd = NULL;
847 struct xnvme_fioe_fwrap *fwrap = NULL;
848 struct xnvme_dev *dev = NULL;
849 const struct xnvme_geo *geo = NULL;
850 uint64_t first, last;
851 uint32_t ssw;
852 uint32_t nsid;
853 int err = 0, err_lock;
854
855 if (td->io_ops_data) {
856 xd = td->io_ops_data;
857 fwrap = &xd->files[f->fileno];
858
859 assert(fwrap->dev);
860 assert(fwrap->geo);
861
862 dev = fwrap->dev;
863 geo = fwrap->geo;
864 ssw = fwrap->ssw;
865 } else {
866 err = pthread_mutex_lock(&g_serialize);
867 if (err) {
868 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", err);
869 return -err;
870 }
871
872 dev = xnvme_dev_open(f->file_name, &opts);
873 if (!dev) {
874 log_err("ioeng->reset_wp(): xnvme_dev_open(%s) failed, errno(%d)\n",
875 f->file_name, errno);
876 goto exit;
877 }
878 geo = xnvme_dev_get_geo(dev);
879 ssw = xnvme_dev_get_ssw(dev);
880 }
881
882 nsid = xnvme_dev_get_nsid(dev);
883
884 first = ((offset >> ssw) / geo->nsect) * geo->nsect;
885 last = (((offset + length) >> ssw) / geo->nsect) * geo->nsect;
886 dprint(FD_ZBD, "first: 0x%lx, last: 0x%lx\n", first, last);
887
888 for (uint64_t zslba = first; zslba < last; zslba += geo->nsect) {
889 struct xnvme_cmd_ctx ctx = xnvme_cmd_ctx_from_dev(dev);
890
891 if (zslba >= (geo->nsect * geo->nzone)) {
892 log_err("ioeng->reset_wp(): out-of-bounds\n");
893 err = 0;
894 break;
895 }
896
897 err = xnvme_znd_mgmt_send(&ctx, nsid, zslba, false,
898 XNVME_SPEC_ZND_CMD_MGMT_SEND_RESET, 0x0, NULL);
899 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
900 err = err ? err : -EIO;
901 log_err("ioeng->reset_wp(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
902 goto exit;
903 }
904 }
905
906exit:
907 if (!td->io_ops_data) {
908 xnvme_dev_close(dev);
909
910 err_lock = pthread_mutex_unlock(&g_serialize);
911 if (err_lock)
912 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err_lock);
913 }
914
915 return err;
916}
917
918static int xnvme_fioe_get_file_size(struct thread_data *td, struct fio_file *f)
919{
920 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
921 struct xnvme_dev *dev;
922 int ret = 0, err;
923
924 if (fio_file_size_known(f))
925 return 0;
926
927 ret = pthread_mutex_lock(&g_serialize);
928 if (ret) {
929 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", ret);
930 return -ret;
931 }
932
933 dev = xnvme_dev_open(f->file_name, &opts);
934 if (!dev) {
935 log_err("%s: failed retrieving device handle, errno: %d\n", f->file_name, errno);
936 ret = -errno;
937 goto exit;
938 }
939
940 f->real_file_size = xnvme_dev_get_geo(dev)->tbytes;
941 fio_file_set_size_known(f);
942 f->filetype = FIO_TYPE_BLOCK;
943
944exit:
945 xnvme_dev_close(dev);
946 err = pthread_mutex_unlock(&g_serialize);
947 if (err)
948 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err);
949
950 return ret;
951}
952
953FIO_STATIC struct ioengine_ops ioengine = {
954 .name = "xnvme",
955 .version = FIO_IOOPS_VERSION,
956 .options = options,
957 .option_struct_size = sizeof(struct xnvme_fioe_options),
958 .flags = FIO_DISKLESSIO | FIO_NODISKUTIL | FIO_NOEXTEND | FIO_MEMALIGN | FIO_RAWIO,
959
960 .cleanup = xnvme_fioe_cleanup,
961 .init = xnvme_fioe_init,
962
963 .iomem_free = xnvme_fioe_iomem_free,
964 .iomem_alloc = xnvme_fioe_iomem_alloc,
965
966 .io_u_free = xnvme_fioe_io_u_free,
967 .io_u_init = xnvme_fioe_io_u_init,
968
969 .event = xnvme_fioe_event,
970 .getevents = xnvme_fioe_getevents,
971 .queue = xnvme_fioe_queue,
972
973 .close_file = xnvme_fioe_close,
974 .open_file = xnvme_fioe_open,
975 .get_file_size = xnvme_fioe_get_file_size,
976
977 .invalidate = xnvme_fioe_invalidate,
978 .get_max_open_zones = xnvme_fioe_get_max_open_zones,
979 .get_zoned_model = xnvme_fioe_get_zoned_model,
980 .report_zones = xnvme_fioe_report_zones,
981 .reset_wp = xnvme_fioe_reset_wp,
982};
983
984static void fio_init fio_xnvme_register(void)
985{
986 register_ioengine(&ioengine);
987}
988
989static void fio_exit fio_xnvme_unregister(void)
990{
991 unregister_ioengine(&ioengine);
992}