engines/xnvme: add xnvme engine
[fio.git] / engines / xnvme.c
CommitLineData
a3ff873e
AK
1/*
2 * fio xNVMe IO Engine
3 *
4 * IO engine using the xNVMe C API.
5 *
6 * See: http://xnvme.io/
7 *
8 * SPDX-License-Identifier: Apache-2.0
9 */
10#include <stdlib.h>
11#include <assert.h>
12#include <libxnvme.h>
13#include <libxnvme_libconf.h>
14#include <libxnvme_nvm.h>
15#include <libxnvme_znd.h>
16#include <libxnvme_spec_fs.h>
17#include "fio.h"
18#include "zbd_types.h"
19#include "optgroup.h"
20
21static pthread_mutex_t g_serialize = PTHREAD_MUTEX_INITIALIZER;
22
23struct xnvme_fioe_fwrap {
24 /* fio file representation */
25 struct fio_file *fio_file;
26
27 /* xNVMe device handle */
28 struct xnvme_dev *dev;
29 /* xNVMe device geometry */
30 const struct xnvme_geo *geo;
31
32 struct xnvme_queue *queue;
33
34 uint32_t ssw;
35 uint32_t lba_nbytes;
36
37 uint8_t _pad[24];
38};
39XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
40
41struct xnvme_fioe_data {
42 /* I/O completion queue */
43 struct io_u **iocq;
44
45 /* # of iocq entries; incremented via getevents()/cb_pool() */
46 uint64_t completed;
47
48 /*
49 * # of errors; incremented when observed on completion via
50 * getevents()/cb_pool()
51 */
52 uint64_t ecount;
53
54 /* Controller which device/file to select */
55 int32_t prev;
56 int32_t cur;
57
58 /* Number of devices/files for which open() has been called */
59 int64_t nopen;
60 /* Number of devices/files allocated in files[] */
61 uint64_t nallocated;
62
63 struct iovec *iovec;
64
65 uint8_t _pad[8];
66
67 struct xnvme_fioe_fwrap files[];
68};
69XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
70
71struct xnvme_fioe_options {
72 void *padding;
73 unsigned int hipri;
74 unsigned int sqpoll_thread;
75 unsigned int xnvme_dev_nsid;
76 unsigned int xnvme_iovec;
77 char *xnvme_be;
78 char *xnvme_async;
79 char *xnvme_sync;
80 char *xnvme_admin;
81};
82
83static struct fio_option options[] = {
84 {
85 .name = "hipri",
86 .lname = "High Priority",
87 .type = FIO_OPT_STR_SET,
88 .off1 = offsetof(struct xnvme_fioe_options, hipri),
89 .help = "Use polled IO completions",
90 .category = FIO_OPT_C_ENGINE,
91 .group = FIO_OPT_G_XNVME,
92 },
93 {
94 .name = "sqthread_poll",
95 .lname = "Kernel SQ thread polling",
96 .type = FIO_OPT_STR_SET,
97 .off1 = offsetof(struct xnvme_fioe_options, sqpoll_thread),
98 .help = "Offload submission/completion to kernel thread",
99 .category = FIO_OPT_C_ENGINE,
100 .group = FIO_OPT_G_XNVME,
101 },
102 {
103 .name = "xnvme_be",
104 .lname = "xNVMe Backend",
105 .type = FIO_OPT_STR_STORE,
106 .off1 = offsetof(struct xnvme_fioe_options, xnvme_be),
107 .help = "Select xNVMe backend [spdk,linux,fbsd]",
108 .category = FIO_OPT_C_ENGINE,
109 .group = FIO_OPT_G_XNVME,
110 },
111 {
112 .name = "xnvme_async",
113 .lname = "xNVMe Asynchronous command-interface",
114 .type = FIO_OPT_STR_STORE,
115 .off1 = offsetof(struct xnvme_fioe_options, xnvme_async),
116 .help = "Select xNVMe async. interface: [emu,thrpool,io_uring,libaio,posix,nil]",
117 .category = FIO_OPT_C_ENGINE,
118 .group = FIO_OPT_G_XNVME,
119 },
120 {
121 .name = "xnvme_sync",
122 .lname = "xNVMe Synchronous. command-interface",
123 .type = FIO_OPT_STR_STORE,
124 .off1 = offsetof(struct xnvme_fioe_options, xnvme_sync),
125 .help = "Select xNVMe sync. interface: [nvme,psync]",
126 .category = FIO_OPT_C_ENGINE,
127 .group = FIO_OPT_G_XNVME,
128 },
129 {
130 .name = "xnvme_admin",
131 .lname = "xNVMe Admin command-interface",
132 .type = FIO_OPT_STR_STORE,
133 .off1 = offsetof(struct xnvme_fioe_options, xnvme_admin),
134 .help = "Select xNVMe admin. cmd-interface: [nvme,block,file_as_ns]",
135 .category = FIO_OPT_C_ENGINE,
136 .group = FIO_OPT_G_XNVME,
137 },
138 {
139 .name = "xnvme_dev_nsid",
140 .lname = "xNVMe Namespace-Identifier, for user-space NVMe driver",
141 .type = FIO_OPT_INT,
142 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_nsid),
143 .help = "xNVMe Namespace-Identifier, for user-space NVMe driver",
144 .category = FIO_OPT_C_ENGINE,
145 .group = FIO_OPT_G_XNVME,
146 },
147 {
148 .name = "xnvme_iovec",
149 .lname = "Vectored IOs",
150 .type = FIO_OPT_STR_SET,
151 .off1 = offsetof(struct xnvme_fioe_options, xnvme_iovec),
152 .help = "Send vectored IOs",
153 .category = FIO_OPT_C_ENGINE,
154 .group = FIO_OPT_G_XNVME,
155 },
156
157 {
158 .name = NULL,
159 },
160};
161
162static void cb_pool(struct xnvme_cmd_ctx *ctx, void *cb_arg)
163{
164 struct io_u *io_u = cb_arg;
165 struct xnvme_fioe_data *xd = io_u->mmap_data;
166
167 if (xnvme_cmd_ctx_cpl_status(ctx)) {
168 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
169 xd->ecount += 1;
170 io_u->error = EIO;
171 }
172
173 xd->iocq[xd->completed++] = io_u;
174 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
175}
176
177static struct xnvme_opts xnvme_opts_from_fioe(struct thread_data *td)
178{
179 struct xnvme_fioe_options *o = td->eo;
180 struct xnvme_opts opts = xnvme_opts_default();
181
182 opts.nsid = o->xnvme_dev_nsid;
183 opts.be = o->xnvme_be;
184 opts.async = o->xnvme_async;
185 opts.sync = o->xnvme_sync;
186 opts.admin = o->xnvme_admin;
187
188 opts.poll_io = o->hipri;
189 opts.poll_sq = o->sqpoll_thread;
190
191 opts.direct = td->o.odirect;
192
193 return opts;
194}
195
196static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
197{
198 if (fwrap->dev)
199 xnvme_queue_term(fwrap->queue);
200
201 xnvme_dev_close(fwrap->dev);
202
203 memset(fwrap, 0, sizeof(*fwrap));
204}
205
206static void xnvme_fioe_cleanup(struct thread_data *td)
207{
208 struct xnvme_fioe_data *xd = td->io_ops_data;
209 int err;
210
211 err = pthread_mutex_lock(&g_serialize);
212 if (err)
213 log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
214 /* NOTE: not returning here */
215
216 for (uint64_t i = 0; i < xd->nallocated; ++i)
217 _dev_close(td, &xd->files[i]);
218
219 if (!err) {
220 err = pthread_mutex_unlock(&g_serialize);
221 if (err)
222 log_err("ioeng->cleanup(): pthread_mutex_unlock(), err(%d)\n", err);
223 }
224
225 free(xd->iocq);
226 free(xd->iovec);
227 free(xd);
228 td->io_ops_data = NULL;
229}
230
231/**
232 * Helper function setting up device handles as addressed by the naming
233 * convention of the given `fio_file` filename.
234 *
235 * Checks thread-options for explicit control of asynchronous implementation via
236 * the ``--xnvme_async={thrpool,emu,posix,io_uring,libaio,nil}``.
237 */
238static int _dev_open(struct thread_data *td, struct fio_file *f)
239{
240 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
241 struct xnvme_fioe_data *xd = td->io_ops_data;
242 struct xnvme_fioe_fwrap *fwrap;
243 int flags = 0;
244 int err;
245
246 if (f->fileno > (int)xd->nallocated) {
247 log_err("ioeng->_dev_open(%s): invalid assumption\n", f->file_name);
248 return 1;
249 }
250
251 fwrap = &xd->files[f->fileno];
252
253 err = pthread_mutex_lock(&g_serialize);
254 if (err) {
255 log_err("ioeng->_dev_open(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
256 err);
257 return -err;
258 }
259
260 fwrap->dev = xnvme_dev_open(f->file_name, &opts);
261 if (!fwrap->dev) {
262 log_err("ioeng->_dev_open(%s): xnvme_dev_open(), err(%d)\n", f->file_name, errno);
263 goto failure;
264 }
265 fwrap->geo = xnvme_dev_get_geo(fwrap->dev);
266
267 if (xnvme_queue_init(fwrap->dev, td->o.iodepth, flags, &(fwrap->queue))) {
268 log_err("ioeng->_dev_open(%s): xnvme_queue_init(), err(?)\n", f->file_name);
269 goto failure;
270 }
271 xnvme_queue_set_cb(fwrap->queue, cb_pool, NULL);
272
273 fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
274 fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
275
276 fwrap->fio_file = f;
277 fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
278 fwrap->fio_file->real_file_size = fwrap->geo->tbytes;
279 fio_file_set_size_known(fwrap->fio_file);
280
281 err = pthread_mutex_unlock(&g_serialize);
282 if (err)
283 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
284 err);
285
286 return 0;
287
288failure:
289 xnvme_queue_term(fwrap->queue);
290 xnvme_dev_close(fwrap->dev);
291
292 err = pthread_mutex_unlock(&g_serialize);
293 if (err)
294 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
295 err);
296
297 return 1;
298}
299
300static int xnvme_fioe_init(struct thread_data *td)
301{
302 struct xnvme_fioe_data *xd = NULL;
303 struct fio_file *f;
304 unsigned int i;
305
306 if (!td->o.use_thread) {
307 log_err("ioeng->init(): --thread=1 is required\n");
308 return 1;
309 }
310
311 /* Allocate xd and iocq */
312 xd = calloc(1, sizeof(*xd) + sizeof(*xd->files) * td->o.nr_files);
313 if (!xd) {
314 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
315 return 1;
316 }
317
318 xd->iocq = calloc(td->o.iodepth, sizeof(struct io_u *));
319 if (!xd->iocq) {
320 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
321 return 1;
322 }
323
324 xd->iovec = calloc(td->o.iodepth, sizeof(*xd->iovec));
325 if (!xd->iovec) {
326 log_err("ioeng->init(): !calloc(xd->iovec), err(%d)\n", errno);
327 return 1;
328 }
329
330 xd->prev = -1;
331 td->io_ops_data = xd;
332
333 for_each_file(td, f, i)
334 {
335 if (_dev_open(td, f)) {
336 log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
337 return 1;
338 }
339
340 ++(xd->nallocated);
341 }
342
343 if (xd->nallocated != td->o.nr_files) {
344 log_err("ioeng->init(): failed; nallocated != td->o.nr_files\n");
345 return 1;
346 }
347
348 return 0;
349}
350
351/* NOTE: using the first device for buffer-allocators) */
352static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
353{
354 struct xnvme_fioe_data *xd = td->io_ops_data;
355 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
356
357 if (!fwrap->dev) {
358 log_err("ioeng->iomem_alloc(): failed; no dev-handle\n");
359 return 1;
360 }
361
362 td->orig_buffer = xnvme_buf_alloc(fwrap->dev, total_mem);
363
364 return td->orig_buffer == NULL;
365}
366
367/* NOTE: using the first device for buffer-allocators) */
368static void xnvme_fioe_iomem_free(struct thread_data *td)
369{
370 struct xnvme_fioe_data *xd = td->io_ops_data;
371 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
372
373 if (!fwrap->dev) {
374 log_err("ioeng->iomem_free(): failed no dev-handle\n");
375 return;
376 }
377
378 xnvme_buf_free(fwrap->dev, td->orig_buffer);
379}
380
381static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
382{
383 io_u->mmap_data = td->io_ops_data;
384
385 return 0;
386}
387
388static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
389{
390 io_u->mmap_data = NULL;
391}
392
393static struct io_u *xnvme_fioe_event(struct thread_data *td, int event)
394{
395 struct xnvme_fioe_data *xd = td->io_ops_data;
396
397 assert(event >= 0);
398 assert((unsigned)event < xd->completed);
399
400 return xd->iocq[event];
401}
402
403static int xnvme_fioe_getevents(struct thread_data *td, unsigned int min, unsigned int max,
404 const struct timespec *t)
405{
406 struct xnvme_fioe_data *xd = td->io_ops_data;
407 struct xnvme_fioe_fwrap *fwrap = NULL;
408 int nfiles = xd->nallocated;
409 int err = 0;
410
411 if (xd->prev != -1 && ++xd->prev < nfiles) {
412 fwrap = &xd->files[xd->prev];
413 xd->cur = xd->prev;
414 }
415
416 xd->completed = 0;
417 for (;;) {
418 if (fwrap == NULL || xd->cur == nfiles) {
419 fwrap = &xd->files[0];
420 xd->cur = 0;
421 }
422
423 while (fwrap != NULL && xd->cur < nfiles && err >= 0) {
424 err = xnvme_queue_poke(fwrap->queue, max - xd->completed);
425 if (err < 0) {
426 switch (err) {
427 case -EBUSY:
428 case -EAGAIN:
429 usleep(1);
430 break;
431
432 default:
433 log_err("ioeng->getevents(): unhandled IO error\n");
434 assert(false);
435 return 0;
436 }
437 }
438 if (xd->completed >= min) {
439 xd->prev = xd->cur;
440 return xd->completed;
441 }
442 xd->cur++;
443 fwrap = &xd->files[xd->cur];
444
445 if (err < 0) {
446 switch (err) {
447 case -EBUSY:
448 case -EAGAIN:
449 usleep(1);
450 break;
451 }
452 }
453 }
454 }
455
456 xd->cur = 0;
457
458 return xd->completed;
459}
460
461static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *io_u)
462{
463 struct xnvme_fioe_data *xd = td->io_ops_data;
464 struct xnvme_fioe_fwrap *fwrap;
465 struct xnvme_cmd_ctx *ctx;
466 uint32_t nsid;
467 uint64_t slba;
468 uint16_t nlb;
469 int err;
470 bool vectored_io = ((struct xnvme_fioe_options *)td->eo)->xnvme_iovec;
471
472 fio_ro_check(td, io_u);
473
474 fwrap = &xd->files[io_u->file->fileno];
475 nsid = xnvme_dev_get_nsid(fwrap->dev);
476
477 slba = io_u->offset >> fwrap->ssw;
478 nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
479
480 ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
481 ctx->async.cb_arg = io_u;
482
483 ctx->cmd.common.nsid = nsid;
484 ctx->cmd.nvm.slba = slba;
485 ctx->cmd.nvm.nlb = nlb;
486
487 switch (io_u->ddir) {
488 case DDIR_READ:
489 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
490 break;
491
492 case DDIR_WRITE:
493 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
494 break;
495
496 default:
497 log_err("ioeng->queue(): ENOSYS: %u\n", io_u->ddir);
498 err = -1;
499 assert(false);
500 break;
501 }
502
503 if (vectored_io) {
504 xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
505 xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
506
507 err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
508 0);
509 } else {
510 err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
511 }
512 switch (err) {
513 case 0:
514 return FIO_Q_QUEUED;
515
516 case -EBUSY:
517 case -EAGAIN:
518 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
519 return FIO_Q_BUSY;
520
521 default:
522 log_err("ioeng->queue(): err: '%d'\n", err);
523
524 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
525
526 io_u->error = abs(err);
527 assert(false);
528 return FIO_Q_COMPLETED;
529 }
530}
531
532static int xnvme_fioe_close(struct thread_data *td, struct fio_file *f)
533{
534 struct xnvme_fioe_data *xd = td->io_ops_data;
535
536 dprint(FD_FILE, "xnvme close %s -- nopen: %ld\n", f->file_name, xd->nopen);
537
538 --(xd->nopen);
539
540 return 0;
541}
542
543static int xnvme_fioe_open(struct thread_data *td, struct fio_file *f)
544{
545 struct xnvme_fioe_data *xd = td->io_ops_data;
546
547 dprint(FD_FILE, "xnvme open %s -- nopen: %ld\n", f->file_name, xd->nopen);
548
549 if (f->fileno > (int)xd->nallocated) {
550 log_err("ioeng->open(): f->fileno > xd->nallocated; invalid assumption\n");
551 return 1;
552 }
553 if (xd->files[f->fileno].fio_file != f) {
554 log_err("ioeng->open(): fio_file != f; invalid assumption\n");
555 return 1;
556 }
557
558 ++(xd->nopen);
559
560 return 0;
561}
562
563static int xnvme_fioe_invalidate(struct thread_data *td, struct fio_file *f)
564{
565 /* Consider only doing this with be:spdk */
566 return 0;
567}
568
569static int xnvme_fioe_get_max_open_zones(struct thread_data *td, struct fio_file *f,
570 unsigned int *max_open_zones)
571{
572 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
573 struct xnvme_dev *dev;
574 const struct xnvme_spec_znd_idfy_ns *zns;
575 int err = 0, err_lock;
576
577 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
578 f->filetype != FIO_TYPE_CHAR) {
579 log_info("ioeng->get_max_open_zoned(): ignoring filetype: %d\n", f->filetype);
580 return 0;
581 }
582 err_lock = pthread_mutex_lock(&g_serialize);
583 if (err_lock) {
584 log_err("ioeng->get_max_open_zones(): pthread_mutex_lock(), err(%d)\n", err_lock);
585 return -err_lock;
586 }
587
588 dev = xnvme_dev_open(f->file_name, &opts);
589 if (!dev) {
590 log_err("ioeng->get_max_open_zones(): xnvme_dev_open(), err(%d)\n", err_lock);
591 err = -errno;
592 goto exit;
593 }
594 if (xnvme_dev_get_geo(dev)->type != XNVME_GEO_ZONED) {
595 errno = EINVAL;
596 err = -errno;
597 goto exit;
598 }
599
600 zns = (void *)xnvme_dev_get_ns_css(dev);
601 if (!zns) {
602 log_err("ioeng->get_max_open_zones(): xnvme_dev_get_ns_css(), err(%d)\n", errno);
603 err = -errno;
604 goto exit;
605 }
606
607 /*
608 * intentional overflow as the value is zero-based and NVMe
609 * defines 0xFFFFFFFF as unlimited thus overflowing to 0 which
610 * is how fio indicates unlimited and otherwise just converting
611 * to one-based.
612 */
613 *max_open_zones = zns->mor + 1;
614
615exit:
616 xnvme_dev_close(dev);
617 err_lock = pthread_mutex_unlock(&g_serialize);
618 if (err_lock)
619 log_err("ioeng->get_max_open_zones(): pthread_mutex_unlock(), err(%d)\n",
620 err_lock);
621
622 return err;
623}
624
625/**
626 * Currently, this function is called before of I/O engine initialization, so,
627 * we cannot consult the file-wrapping done when 'fioe' initializes.
628 * Instead we just open based on the given filename.
629 *
630 * TODO: unify the different setup methods, consider keeping the handle around,
631 * and consider how to support the --be option in this usecase
632 */
633static int xnvme_fioe_get_zoned_model(struct thread_data *td, struct fio_file *f,
634 enum zbd_zoned_model *model)
635{
636 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
637 struct xnvme_dev *dev;
638 int err = 0, err_lock;
639
640 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
641 f->filetype != FIO_TYPE_CHAR) {
642 log_info("ioeng->get_zoned_model(): ignoring filetype: %d\n", f->filetype);
643 return -EINVAL;
644 }
645
646 err = pthread_mutex_lock(&g_serialize);
647 if (err) {
648 log_err("ioeng->get_zoned_model(): pthread_mutex_lock(), err(%d)\n", err);
649 return -err;
650 }
651
652 dev = xnvme_dev_open(f->file_name, &opts);
653 if (!dev) {
654 log_err("ioeng->get_zoned_model(): xnvme_dev_open(%s) failed, errno: %d\n",
655 f->file_name, errno);
656 err = -errno;
657 goto exit;
658 }
659
660 switch (xnvme_dev_get_geo(dev)->type) {
661 case XNVME_GEO_UNKNOWN:
662 dprint(FD_ZBD, "%s: got 'unknown', assigning ZBD_NONE\n", f->file_name);
663 *model = ZBD_NONE;
664 break;
665
666 case XNVME_GEO_CONVENTIONAL:
667 dprint(FD_ZBD, "%s: got 'conventional', assigning ZBD_NONE\n", f->file_name);
668 *model = ZBD_NONE;
669 break;
670
671 case XNVME_GEO_ZONED:
672 dprint(FD_ZBD, "%s: got 'zoned', assigning ZBD_HOST_MANAGED\n", f->file_name);
673 *model = ZBD_HOST_MANAGED;
674 break;
675
676 default:
677 dprint(FD_ZBD, "%s: hit-default, assigning ZBD_NONE\n", f->file_name);
678 *model = ZBD_NONE;
679 errno = EINVAL;
680 err = -errno;
681 break;
682 }
683
684exit:
685 xnvme_dev_close(dev);
686
687 err_lock = pthread_mutex_unlock(&g_serialize);
688 if (err_lock)
689 log_err("ioeng->get_zoned_model(): pthread_mutex_unlock(), err(%d)\n", err_lock);
690
691 return err;
692}
693
694/**
695 * Fills the given ``zbdz`` with at most ``nr_zones`` zone-descriptors.
696 *
697 * The implementation converts the NVMe Zoned Command Set log-pages for Zone
698 * descriptors into the Linux Kernel Zoned Block Report format.
699 *
700 * NOTE: This function is called before I/O engine initialization, that is,
701 * before ``_dev_open`` has been called and file-wrapping is setup. Thus is has
702 * to do the ``_dev_open`` itself, and shut it down again once it is done
703 * retrieving the log-pages and converting them to the report format.
704 *
705 * TODO: unify the different setup methods, consider keeping the handle around,
706 * and consider how to support the --async option in this usecase
707 */
708static int xnvme_fioe_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
709 struct zbd_zone *zbdz, unsigned int nr_zones)
710{
711 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
712 const struct xnvme_spec_znd_idfy_lbafe *lbafe = NULL;
713 struct xnvme_dev *dev = NULL;
714 const struct xnvme_geo *geo = NULL;
715 struct xnvme_znd_report *rprt = NULL;
716 uint32_t ssw;
717 uint64_t slba;
718 unsigned int limit = 0;
719 int err = 0, err_lock;
720
721 dprint(FD_ZBD, "%s: report_zones() offset: %zu, nr_zones: %u\n", f->file_name, offset,
722 nr_zones);
723
724 err = pthread_mutex_lock(&g_serialize);
725 if (err) {
726 log_err("ioeng->report_zones(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
727 err);
728 return -err;
729 }
730
731 dev = xnvme_dev_open(f->file_name, &opts);
732 if (!dev) {
733 log_err("ioeng->report_zones(%s): xnvme_dev_open(), err(%d)\n", f->file_name,
734 errno);
735 goto exit;
736 }
737
738 geo = xnvme_dev_get_geo(dev);
739 ssw = xnvme_dev_get_ssw(dev);
740 lbafe = xnvme_znd_dev_get_lbafe(dev);
741
742 limit = nr_zones > geo->nzone ? geo->nzone : nr_zones;
743
744 dprint(FD_ZBD, "%s: limit: %u\n", f->file_name, limit);
745
746 slba = ((offset >> ssw) / geo->nsect) * geo->nsect;
747
748 rprt = xnvme_znd_report_from_dev(dev, slba, limit, 0);
749 if (!rprt) {
750 log_err("ioeng->report_zones(%s): xnvme_znd_report_from_dev(), err(%d)\n",
751 f->file_name, errno);
752 err = -errno;
753 goto exit;
754 }
755 if (rprt->nentries != limit) {
756 log_err("ioeng->report_zones(%s): nentries != nr_zones\n", f->file_name);
757 err = 1;
758 goto exit;
759 }
760 if (offset > geo->tbytes) {
761 log_err("ioeng->report_zones(%s): out-of-bounds\n", f->file_name);
762 goto exit;
763 }
764
765 /* Transform the zone-report */
766 for (uint32_t idx = 0; idx < rprt->nentries; ++idx) {
767 struct xnvme_spec_znd_descr *descr = XNVME_ZND_REPORT_DESCR(rprt, idx);
768
769 zbdz[idx].start = descr->zslba << ssw;
770 zbdz[idx].len = lbafe->zsze << ssw;
771 zbdz[idx].capacity = descr->zcap << ssw;
772 zbdz[idx].wp = descr->wp << ssw;
773
774 switch (descr->zt) {
775 case XNVME_SPEC_ZND_TYPE_SEQWR:
776 zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
777 break;
778
779 default:
780 log_err("ioeng->report_zones(%s): invalid type for zone at offset(%zu)\n",
781 f->file_name, zbdz[idx].start);
782 err = -EIO;
783 goto exit;
784 }
785
786 switch (descr->zs) {
787 case XNVME_SPEC_ZND_STATE_EMPTY:
788 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
789 break;
790 case XNVME_SPEC_ZND_STATE_IOPEN:
791 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
792 break;
793 case XNVME_SPEC_ZND_STATE_EOPEN:
794 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
795 break;
796 case XNVME_SPEC_ZND_STATE_CLOSED:
797 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
798 break;
799 case XNVME_SPEC_ZND_STATE_FULL:
800 zbdz[idx].cond = ZBD_ZONE_COND_FULL;
801 break;
802
803 case XNVME_SPEC_ZND_STATE_RONLY:
804 case XNVME_SPEC_ZND_STATE_OFFLINE:
805 default:
806 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
807 break;
808 }
809 }
810
811exit:
812 xnvme_buf_virt_free(rprt);
813
814 xnvme_dev_close(dev);
815
816 err_lock = pthread_mutex_unlock(&g_serialize);
817 if (err_lock)
818 log_err("ioeng->report_zones(): pthread_mutex_unlock(), err: %d\n", err_lock);
819
820 dprint(FD_ZBD, "err: %d, nr_zones: %d\n", err, (int)nr_zones);
821
822 return err ? err : (int)limit;
823}
824
825/**
826 * NOTE: This function may get called before I/O engine initialization, that is,
827 * before ``_dev_open`` has been called and file-wrapping is setup. In such
828 * case it has to do ``_dev_open`` itself, and shut it down again once it is
829 * done resetting write pointer of zones.
830 */
831static int xnvme_fioe_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset,
832 uint64_t length)
833{
834 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
835 struct xnvme_fioe_data *xd = NULL;
836 struct xnvme_fioe_fwrap *fwrap = NULL;
837 struct xnvme_dev *dev = NULL;
838 const struct xnvme_geo *geo = NULL;
839 uint64_t first, last;
840 uint32_t ssw;
841 uint32_t nsid;
842 int err = 0, err_lock;
843
844 if (td->io_ops_data) {
845 xd = td->io_ops_data;
846 fwrap = &xd->files[f->fileno];
847
848 assert(fwrap->dev);
849 assert(fwrap->geo);
850
851 dev = fwrap->dev;
852 geo = fwrap->geo;
853 ssw = fwrap->ssw;
854 } else {
855 err = pthread_mutex_lock(&g_serialize);
856 if (err) {
857 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", err);
858 return -err;
859 }
860
861 dev = xnvme_dev_open(f->file_name, &opts);
862 if (!dev) {
863 log_err("ioeng->reset_wp(): xnvme_dev_open(%s) failed, errno(%d)\n",
864 f->file_name, errno);
865 goto exit;
866 }
867 geo = xnvme_dev_get_geo(dev);
868 ssw = xnvme_dev_get_ssw(dev);
869 }
870
871 nsid = xnvme_dev_get_nsid(dev);
872
873 first = ((offset >> ssw) / geo->nsect) * geo->nsect;
874 last = (((offset + length) >> ssw) / geo->nsect) * geo->nsect;
875 dprint(FD_ZBD, "first: 0x%lx, last: 0x%lx\n", first, last);
876
877 for (uint64_t zslba = first; zslba < last; zslba += geo->nsect) {
878 struct xnvme_cmd_ctx ctx = xnvme_cmd_ctx_from_dev(dev);
879
880 if (zslba >= (geo->nsect * geo->nzone)) {
881 log_err("ioeng->reset_wp(): out-of-bounds\n");
882 err = 0;
883 break;
884 }
885
886 err = xnvme_znd_mgmt_send(&ctx, nsid, zslba, false,
887 XNVME_SPEC_ZND_CMD_MGMT_SEND_RESET, 0x0, NULL);
888 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
889 err = err ? err : -EIO;
890 log_err("ioeng->reset_wp(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
891 goto exit;
892 }
893 }
894
895exit:
896 if (!td->io_ops_data) {
897 xnvme_dev_close(dev);
898
899 err_lock = pthread_mutex_unlock(&g_serialize);
900 if (err_lock)
901 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err_lock);
902 }
903
904 return err;
905}
906
907static int xnvme_fioe_get_file_size(struct thread_data *td, struct fio_file *f)
908{
909 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
910 struct xnvme_dev *dev;
911 int ret = 0, err;
912
913 if (fio_file_size_known(f))
914 return 0;
915
916 ret = pthread_mutex_lock(&g_serialize);
917 if (ret) {
918 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", ret);
919 return -ret;
920 }
921
922 dev = xnvme_dev_open(f->file_name, &opts);
923 if (!dev) {
924 log_err("%s: failed retrieving device handle, errno: %d\n", f->file_name, errno);
925 ret = -errno;
926 goto exit;
927 }
928
929 f->real_file_size = xnvme_dev_get_geo(dev)->tbytes;
930 fio_file_set_size_known(f);
931 f->filetype = FIO_TYPE_BLOCK;
932
933exit:
934 xnvme_dev_close(dev);
935 err = pthread_mutex_unlock(&g_serialize);
936 if (err)
937 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err);
938
939 return ret;
940}
941
942FIO_STATIC struct ioengine_ops ioengine = {
943 .name = "xnvme",
944 .version = FIO_IOOPS_VERSION,
945 .options = options,
946 .option_struct_size = sizeof(struct xnvme_fioe_options),
947 .flags = FIO_DISKLESSIO | FIO_NODISKUTIL | FIO_NOEXTEND | FIO_MEMALIGN | FIO_RAWIO,
948
949 .cleanup = xnvme_fioe_cleanup,
950 .init = xnvme_fioe_init,
951
952 .iomem_free = xnvme_fioe_iomem_free,
953 .iomem_alloc = xnvme_fioe_iomem_alloc,
954
955 .io_u_free = xnvme_fioe_io_u_free,
956 .io_u_init = xnvme_fioe_io_u_init,
957
958 .event = xnvme_fioe_event,
959 .getevents = xnvme_fioe_getevents,
960 .queue = xnvme_fioe_queue,
961
962 .close_file = xnvme_fioe_close,
963 .open_file = xnvme_fioe_open,
964 .get_file_size = xnvme_fioe_get_file_size,
965
966 .invalidate = xnvme_fioe_invalidate,
967 .get_max_open_zones = xnvme_fioe_get_max_open_zones,
968 .get_zoned_model = xnvme_fioe_get_zoned_model,
969 .report_zones = xnvme_fioe_report_zones,
970 .reset_wp = xnvme_fioe_reset_wp,
971};
972
973static void fio_init fio_xnvme_register(void)
974{
975 register_ioengine(&ioengine);
976}
977
978static void fio_exit fio_xnvme_unregister(void)
979{
980 unregister_ioengine(&ioengine);
981}