Fio 3.34
[fio.git] / engines / xnvme.c
CommitLineData
a3ff873e
AK
1/*
2 * fio xNVMe IO Engine
3 *
4 * IO engine using the xNVMe C API.
5 *
6 * See: http://xnvme.io/
7 *
8 * SPDX-License-Identifier: Apache-2.0
9 */
10#include <stdlib.h>
11#include <assert.h>
12#include <libxnvme.h>
13#include <libxnvme_libconf.h>
14#include <libxnvme_nvm.h>
15#include <libxnvme_znd.h>
16#include <libxnvme_spec_fs.h>
17#include "fio.h"
18#include "zbd_types.h"
19#include "optgroup.h"
20
21static pthread_mutex_t g_serialize = PTHREAD_MUTEX_INITIALIZER;
22
23struct xnvme_fioe_fwrap {
24 /* fio file representation */
25 struct fio_file *fio_file;
26
27 /* xNVMe device handle */
28 struct xnvme_dev *dev;
29 /* xNVMe device geometry */
30 const struct xnvme_geo *geo;
31
32 struct xnvme_queue *queue;
33
34 uint32_t ssw;
35 uint32_t lba_nbytes;
36
37 uint8_t _pad[24];
38};
39XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
40
41struct xnvme_fioe_data {
42 /* I/O completion queue */
43 struct io_u **iocq;
44
45 /* # of iocq entries; incremented via getevents()/cb_pool() */
46 uint64_t completed;
47
48 /*
49 * # of errors; incremented when observed on completion via
50 * getevents()/cb_pool()
51 */
52 uint64_t ecount;
53
54 /* Controller which device/file to select */
55 int32_t prev;
56 int32_t cur;
57
58 /* Number of devices/files for which open() has been called */
59 int64_t nopen;
60 /* Number of devices/files allocated in files[] */
61 uint64_t nallocated;
62
63 struct iovec *iovec;
64
65 uint8_t _pad[8];
66
67 struct xnvme_fioe_fwrap files[];
68};
69XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
70
71struct xnvme_fioe_options {
72 void *padding;
73 unsigned int hipri;
74 unsigned int sqpoll_thread;
75 unsigned int xnvme_dev_nsid;
76 unsigned int xnvme_iovec;
77 char *xnvme_be;
c945074c 78 char *xnvme_mem;
a3ff873e
AK
79 char *xnvme_async;
80 char *xnvme_sync;
81 char *xnvme_admin;
efbafe2a 82 char *xnvme_dev_subnqn;
a3ff873e
AK
83};
84
85static struct fio_option options[] = {
86 {
87 .name = "hipri",
88 .lname = "High Priority",
89 .type = FIO_OPT_STR_SET,
90 .off1 = offsetof(struct xnvme_fioe_options, hipri),
91 .help = "Use polled IO completions",
92 .category = FIO_OPT_C_ENGINE,
93 .group = FIO_OPT_G_XNVME,
94 },
95 {
96 .name = "sqthread_poll",
97 .lname = "Kernel SQ thread polling",
98 .type = FIO_OPT_STR_SET,
99 .off1 = offsetof(struct xnvme_fioe_options, sqpoll_thread),
100 .help = "Offload submission/completion to kernel thread",
101 .category = FIO_OPT_C_ENGINE,
102 .group = FIO_OPT_G_XNVME,
103 },
104 {
105 .name = "xnvme_be",
106 .lname = "xNVMe Backend",
107 .type = FIO_OPT_STR_STORE,
108 .off1 = offsetof(struct xnvme_fioe_options, xnvme_be),
109 .help = "Select xNVMe backend [spdk,linux,fbsd]",
110 .category = FIO_OPT_C_ENGINE,
111 .group = FIO_OPT_G_XNVME,
112 },
c945074c
AK
113 {
114 .name = "xnvme_mem",
115 .lname = "xNVMe Memory Backend",
116 .type = FIO_OPT_STR_STORE,
117 .off1 = offsetof(struct xnvme_fioe_options, xnvme_mem),
118 .help = "Select xNVMe memory backend",
119 .category = FIO_OPT_C_ENGINE,
120 .group = FIO_OPT_G_XNVME,
121 },
a3ff873e
AK
122 {
123 .name = "xnvme_async",
124 .lname = "xNVMe Asynchronous command-interface",
125 .type = FIO_OPT_STR_STORE,
126 .off1 = offsetof(struct xnvme_fioe_options, xnvme_async),
203a4c7c
AK
127 .help = "Select xNVMe async. interface: "
128 "[emu,thrpool,io_uring,io_uring_cmd,libaio,posix,vfio,nil]",
a3ff873e
AK
129 .category = FIO_OPT_C_ENGINE,
130 .group = FIO_OPT_G_XNVME,
131 },
132 {
133 .name = "xnvme_sync",
134 .lname = "xNVMe Synchronous. command-interface",
135 .type = FIO_OPT_STR_STORE,
136 .off1 = offsetof(struct xnvme_fioe_options, xnvme_sync),
203a4c7c 137 .help = "Select xNVMe sync. interface: [nvme,psync,block]",
a3ff873e
AK
138 .category = FIO_OPT_C_ENGINE,
139 .group = FIO_OPT_G_XNVME,
140 },
141 {
142 .name = "xnvme_admin",
143 .lname = "xNVMe Admin command-interface",
144 .type = FIO_OPT_STR_STORE,
145 .off1 = offsetof(struct xnvme_fioe_options, xnvme_admin),
203a4c7c 146 .help = "Select xNVMe admin. cmd-interface: [nvme,block]",
a3ff873e
AK
147 .category = FIO_OPT_C_ENGINE,
148 .group = FIO_OPT_G_XNVME,
149 },
150 {
151 .name = "xnvme_dev_nsid",
152 .lname = "xNVMe Namespace-Identifier, for user-space NVMe driver",
153 .type = FIO_OPT_INT,
154 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_nsid),
155 .help = "xNVMe Namespace-Identifier, for user-space NVMe driver",
156 .category = FIO_OPT_C_ENGINE,
157 .group = FIO_OPT_G_XNVME,
158 },
efbafe2a
AK
159 {
160 .name = "xnvme_dev_subnqn",
161 .lname = "Subsystem nqn for Fabrics",
162 .type = FIO_OPT_STR_STORE,
163 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_subnqn),
164 .help = "Subsystem NQN for Fabrics",
165 .category = FIO_OPT_C_ENGINE,
166 .group = FIO_OPT_G_XNVME,
167 },
a3ff873e
AK
168 {
169 .name = "xnvme_iovec",
170 .lname = "Vectored IOs",
171 .type = FIO_OPT_STR_SET,
172 .off1 = offsetof(struct xnvme_fioe_options, xnvme_iovec),
173 .help = "Send vectored IOs",
174 .category = FIO_OPT_C_ENGINE,
175 .group = FIO_OPT_G_XNVME,
176 },
177
178 {
179 .name = NULL,
180 },
181};
182
183static void cb_pool(struct xnvme_cmd_ctx *ctx, void *cb_arg)
184{
185 struct io_u *io_u = cb_arg;
186 struct xnvme_fioe_data *xd = io_u->mmap_data;
187
188 if (xnvme_cmd_ctx_cpl_status(ctx)) {
189 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
190 xd->ecount += 1;
191 io_u->error = EIO;
192 }
193
194 xd->iocq[xd->completed++] = io_u;
195 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
196}
197
198static struct xnvme_opts xnvme_opts_from_fioe(struct thread_data *td)
199{
200 struct xnvme_fioe_options *o = td->eo;
201 struct xnvme_opts opts = xnvme_opts_default();
202
203 opts.nsid = o->xnvme_dev_nsid;
efbafe2a 204 opts.subnqn = o->xnvme_dev_subnqn;
a3ff873e 205 opts.be = o->xnvme_be;
c945074c 206 opts.mem = o->xnvme_mem;
a3ff873e
AK
207 opts.async = o->xnvme_async;
208 opts.sync = o->xnvme_sync;
209 opts.admin = o->xnvme_admin;
210
211 opts.poll_io = o->hipri;
212 opts.poll_sq = o->sqpoll_thread;
213
214 opts.direct = td->o.odirect;
215
216 return opts;
217}
218
219static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
220{
221 if (fwrap->dev)
222 xnvme_queue_term(fwrap->queue);
223
224 xnvme_dev_close(fwrap->dev);
225
226 memset(fwrap, 0, sizeof(*fwrap));
227}
228
229static void xnvme_fioe_cleanup(struct thread_data *td)
230{
fdac9c68 231 struct xnvme_fioe_data *xd = NULL;
a3ff873e
AK
232 int err;
233
fdac9c68
AK
234 if (!td->io_ops_data)
235 return;
236
237 xd = td->io_ops_data;
238
a3ff873e
AK
239 err = pthread_mutex_lock(&g_serialize);
240 if (err)
241 log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
242 /* NOTE: not returning here */
243
244 for (uint64_t i = 0; i < xd->nallocated; ++i)
245 _dev_close(td, &xd->files[i]);
246
247 if (!err) {
248 err = pthread_mutex_unlock(&g_serialize);
249 if (err)
250 log_err("ioeng->cleanup(): pthread_mutex_unlock(), err(%d)\n", err);
251 }
252
253 free(xd->iocq);
254 free(xd->iovec);
255 free(xd);
256 td->io_ops_data = NULL;
257}
258
259/**
260 * Helper function setting up device handles as addressed by the naming
261 * convention of the given `fio_file` filename.
262 *
263 * Checks thread-options for explicit control of asynchronous implementation via
264 * the ``--xnvme_async={thrpool,emu,posix,io_uring,libaio,nil}``.
265 */
266static int _dev_open(struct thread_data *td, struct fio_file *f)
267{
268 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
269 struct xnvme_fioe_data *xd = td->io_ops_data;
270 struct xnvme_fioe_fwrap *fwrap;
271 int flags = 0;
272 int err;
273
274 if (f->fileno > (int)xd->nallocated) {
275 log_err("ioeng->_dev_open(%s): invalid assumption\n", f->file_name);
276 return 1;
277 }
278
279 fwrap = &xd->files[f->fileno];
280
281 err = pthread_mutex_lock(&g_serialize);
282 if (err) {
283 log_err("ioeng->_dev_open(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
284 err);
285 return -err;
286 }
287
288 fwrap->dev = xnvme_dev_open(f->file_name, &opts);
289 if (!fwrap->dev) {
290 log_err("ioeng->_dev_open(%s): xnvme_dev_open(), err(%d)\n", f->file_name, errno);
291 goto failure;
292 }
293 fwrap->geo = xnvme_dev_get_geo(fwrap->dev);
294
295 if (xnvme_queue_init(fwrap->dev, td->o.iodepth, flags, &(fwrap->queue))) {
296 log_err("ioeng->_dev_open(%s): xnvme_queue_init(), err(?)\n", f->file_name);
297 goto failure;
298 }
299 xnvme_queue_set_cb(fwrap->queue, cb_pool, NULL);
300
301 fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
302 fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
303
304 fwrap->fio_file = f;
305 fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
306 fwrap->fio_file->real_file_size = fwrap->geo->tbytes;
307 fio_file_set_size_known(fwrap->fio_file);
308
309 err = pthread_mutex_unlock(&g_serialize);
310 if (err)
311 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
312 err);
313
314 return 0;
315
316failure:
317 xnvme_queue_term(fwrap->queue);
318 xnvme_dev_close(fwrap->dev);
319
320 err = pthread_mutex_unlock(&g_serialize);
321 if (err)
322 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
323 err);
324
325 return 1;
326}
327
328static int xnvme_fioe_init(struct thread_data *td)
329{
330 struct xnvme_fioe_data *xd = NULL;
331 struct fio_file *f;
332 unsigned int i;
333
334 if (!td->o.use_thread) {
335 log_err("ioeng->init(): --thread=1 is required\n");
336 return 1;
337 }
338
339 /* Allocate xd and iocq */
340 xd = calloc(1, sizeof(*xd) + sizeof(*xd->files) * td->o.nr_files);
341 if (!xd) {
342 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
343 return 1;
344 }
345
346 xd->iocq = calloc(td->o.iodepth, sizeof(struct io_u *));
347 if (!xd->iocq) {
eb3570b5
AK
348 free(xd);
349 log_err("ioeng->init(): !calloc(xd->iocq), err(%d)\n", errno);
a3ff873e
AK
350 return 1;
351 }
352
353 xd->iovec = calloc(td->o.iodepth, sizeof(*xd->iovec));
354 if (!xd->iovec) {
eb3570b5
AK
355 free(xd->iocq);
356 free(xd);
a3ff873e
AK
357 log_err("ioeng->init(): !calloc(xd->iovec), err(%d)\n", errno);
358 return 1;
359 }
360
361 xd->prev = -1;
362 td->io_ops_data = xd;
363
364 for_each_file(td, f, i)
365 {
366 if (_dev_open(td, f)) {
eb3570b5
AK
367 /*
368 * Note: We are not freeing xd, iocq and iovec. This
369 * will be done as part of cleanup routine.
370 */
a3ff873e
AK
371 log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
372 return 1;
373 }
374
375 ++(xd->nallocated);
376 }
377
378 if (xd->nallocated != td->o.nr_files) {
379 log_err("ioeng->init(): failed; nallocated != td->o.nr_files\n");
380 return 1;
381 }
382
383 return 0;
384}
385
386/* NOTE: using the first device for buffer-allocators) */
387static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
388{
389 struct xnvme_fioe_data *xd = td->io_ops_data;
390 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
391
392 if (!fwrap->dev) {
393 log_err("ioeng->iomem_alloc(): failed; no dev-handle\n");
394 return 1;
395 }
396
397 td->orig_buffer = xnvme_buf_alloc(fwrap->dev, total_mem);
398
399 return td->orig_buffer == NULL;
400}
401
402/* NOTE: using the first device for buffer-allocators) */
403static void xnvme_fioe_iomem_free(struct thread_data *td)
404{
fdac9c68
AK
405 struct xnvme_fioe_data *xd = NULL;
406 struct xnvme_fioe_fwrap *fwrap = NULL;
407
408 if (!td->io_ops_data)
409 return;
410
411 xd = td->io_ops_data;
412 fwrap = &xd->files[0];
a3ff873e
AK
413
414 if (!fwrap->dev) {
415 log_err("ioeng->iomem_free(): failed no dev-handle\n");
416 return;
417 }
418
419 xnvme_buf_free(fwrap->dev, td->orig_buffer);
420}
421
422static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
423{
424 io_u->mmap_data = td->io_ops_data;
425
426 return 0;
427}
428
429static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
430{
431 io_u->mmap_data = NULL;
432}
433
434static struct io_u *xnvme_fioe_event(struct thread_data *td, int event)
435{
436 struct xnvme_fioe_data *xd = td->io_ops_data;
437
438 assert(event >= 0);
439 assert((unsigned)event < xd->completed);
440
441 return xd->iocq[event];
442}
443
444static int xnvme_fioe_getevents(struct thread_data *td, unsigned int min, unsigned int max,
445 const struct timespec *t)
446{
447 struct xnvme_fioe_data *xd = td->io_ops_data;
448 struct xnvme_fioe_fwrap *fwrap = NULL;
449 int nfiles = xd->nallocated;
450 int err = 0;
451
452 if (xd->prev != -1 && ++xd->prev < nfiles) {
453 fwrap = &xd->files[xd->prev];
454 xd->cur = xd->prev;
455 }
456
457 xd->completed = 0;
458 for (;;) {
459 if (fwrap == NULL || xd->cur == nfiles) {
460 fwrap = &xd->files[0];
461 xd->cur = 0;
462 }
463
464 while (fwrap != NULL && xd->cur < nfiles && err >= 0) {
465 err = xnvme_queue_poke(fwrap->queue, max - xd->completed);
466 if (err < 0) {
467 switch (err) {
468 case -EBUSY:
469 case -EAGAIN:
470 usleep(1);
471 break;
472
473 default:
474 log_err("ioeng->getevents(): unhandled IO error\n");
475 assert(false);
476 return 0;
477 }
478 }
479 if (xd->completed >= min) {
480 xd->prev = xd->cur;
481 return xd->completed;
482 }
483 xd->cur++;
484 fwrap = &xd->files[xd->cur];
485
486 if (err < 0) {
487 switch (err) {
488 case -EBUSY:
489 case -EAGAIN:
490 usleep(1);
491 break;
492 }
493 }
494 }
495 }
496
497 xd->cur = 0;
498
499 return xd->completed;
500}
501
502static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *io_u)
503{
504 struct xnvme_fioe_data *xd = td->io_ops_data;
505 struct xnvme_fioe_fwrap *fwrap;
506 struct xnvme_cmd_ctx *ctx;
507 uint32_t nsid;
508 uint64_t slba;
509 uint16_t nlb;
510 int err;
511 bool vectored_io = ((struct xnvme_fioe_options *)td->eo)->xnvme_iovec;
512
513 fio_ro_check(td, io_u);
514
515 fwrap = &xd->files[io_u->file->fileno];
516 nsid = xnvme_dev_get_nsid(fwrap->dev);
517
518 slba = io_u->offset >> fwrap->ssw;
519 nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
520
521 ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
522 ctx->async.cb_arg = io_u;
523
524 ctx->cmd.common.nsid = nsid;
525 ctx->cmd.nvm.slba = slba;
526 ctx->cmd.nvm.nlb = nlb;
527
528 switch (io_u->ddir) {
529 case DDIR_READ:
530 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
531 break;
532
533 case DDIR_WRITE:
534 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
535 break;
536
537 default:
538 log_err("ioeng->queue(): ENOSYS: %u\n", io_u->ddir);
eb3570b5
AK
539 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
540
541 io_u->error = ENOSYS;
a3ff873e 542 assert(false);
eb3570b5 543 return FIO_Q_COMPLETED;
a3ff873e
AK
544 }
545
546 if (vectored_io) {
547 xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
548 xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
549
550 err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
551 0);
552 } else {
553 err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
554 }
555 switch (err) {
556 case 0:
557 return FIO_Q_QUEUED;
558
559 case -EBUSY:
560 case -EAGAIN:
561 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
562 return FIO_Q_BUSY;
563
564 default:
565 log_err("ioeng->queue(): err: '%d'\n", err);
566
567 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
568
569 io_u->error = abs(err);
570 assert(false);
571 return FIO_Q_COMPLETED;
572 }
573}
574
575static int xnvme_fioe_close(struct thread_data *td, struct fio_file *f)
576{
577 struct xnvme_fioe_data *xd = td->io_ops_data;
578
579 dprint(FD_FILE, "xnvme close %s -- nopen: %ld\n", f->file_name, xd->nopen);
580
581 --(xd->nopen);
582
583 return 0;
584}
585
586static int xnvme_fioe_open(struct thread_data *td, struct fio_file *f)
587{
588 struct xnvme_fioe_data *xd = td->io_ops_data;
589
590 dprint(FD_FILE, "xnvme open %s -- nopen: %ld\n", f->file_name, xd->nopen);
591
592 if (f->fileno > (int)xd->nallocated) {
593 log_err("ioeng->open(): f->fileno > xd->nallocated; invalid assumption\n");
594 return 1;
595 }
596 if (xd->files[f->fileno].fio_file != f) {
597 log_err("ioeng->open(): fio_file != f; invalid assumption\n");
598 return 1;
599 }
600
601 ++(xd->nopen);
602
603 return 0;
604}
605
606static int xnvme_fioe_invalidate(struct thread_data *td, struct fio_file *f)
607{
608 /* Consider only doing this with be:spdk */
609 return 0;
610}
611
612static int xnvme_fioe_get_max_open_zones(struct thread_data *td, struct fio_file *f,
613 unsigned int *max_open_zones)
614{
615 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
616 struct xnvme_dev *dev;
617 const struct xnvme_spec_znd_idfy_ns *zns;
618 int err = 0, err_lock;
619
620 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
621 f->filetype != FIO_TYPE_CHAR) {
622 log_info("ioeng->get_max_open_zoned(): ignoring filetype: %d\n", f->filetype);
623 return 0;
624 }
625 err_lock = pthread_mutex_lock(&g_serialize);
626 if (err_lock) {
627 log_err("ioeng->get_max_open_zones(): pthread_mutex_lock(), err(%d)\n", err_lock);
628 return -err_lock;
629 }
630
631 dev = xnvme_dev_open(f->file_name, &opts);
632 if (!dev) {
633 log_err("ioeng->get_max_open_zones(): xnvme_dev_open(), err(%d)\n", err_lock);
634 err = -errno;
635 goto exit;
636 }
637 if (xnvme_dev_get_geo(dev)->type != XNVME_GEO_ZONED) {
638 errno = EINVAL;
639 err = -errno;
640 goto exit;
641 }
642
643 zns = (void *)xnvme_dev_get_ns_css(dev);
644 if (!zns) {
645 log_err("ioeng->get_max_open_zones(): xnvme_dev_get_ns_css(), err(%d)\n", errno);
646 err = -errno;
647 goto exit;
648 }
649
650 /*
651 * intentional overflow as the value is zero-based and NVMe
652 * defines 0xFFFFFFFF as unlimited thus overflowing to 0 which
653 * is how fio indicates unlimited and otherwise just converting
654 * to one-based.
655 */
656 *max_open_zones = zns->mor + 1;
657
658exit:
659 xnvme_dev_close(dev);
660 err_lock = pthread_mutex_unlock(&g_serialize);
661 if (err_lock)
662 log_err("ioeng->get_max_open_zones(): pthread_mutex_unlock(), err(%d)\n",
663 err_lock);
664
665 return err;
666}
667
668/**
669 * Currently, this function is called before of I/O engine initialization, so,
670 * we cannot consult the file-wrapping done when 'fioe' initializes.
671 * Instead we just open based on the given filename.
672 *
673 * TODO: unify the different setup methods, consider keeping the handle around,
674 * and consider how to support the --be option in this usecase
675 */
676static int xnvme_fioe_get_zoned_model(struct thread_data *td, struct fio_file *f,
677 enum zbd_zoned_model *model)
678{
679 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
680 struct xnvme_dev *dev;
681 int err = 0, err_lock;
682
683 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
684 f->filetype != FIO_TYPE_CHAR) {
685 log_info("ioeng->get_zoned_model(): ignoring filetype: %d\n", f->filetype);
686 return -EINVAL;
687 }
688
689 err = pthread_mutex_lock(&g_serialize);
690 if (err) {
691 log_err("ioeng->get_zoned_model(): pthread_mutex_lock(), err(%d)\n", err);
692 return -err;
693 }
694
695 dev = xnvme_dev_open(f->file_name, &opts);
696 if (!dev) {
697 log_err("ioeng->get_zoned_model(): xnvme_dev_open(%s) failed, errno: %d\n",
698 f->file_name, errno);
699 err = -errno;
700 goto exit;
701 }
702
703 switch (xnvme_dev_get_geo(dev)->type) {
704 case XNVME_GEO_UNKNOWN:
705 dprint(FD_ZBD, "%s: got 'unknown', assigning ZBD_NONE\n", f->file_name);
706 *model = ZBD_NONE;
707 break;
708
709 case XNVME_GEO_CONVENTIONAL:
710 dprint(FD_ZBD, "%s: got 'conventional', assigning ZBD_NONE\n", f->file_name);
711 *model = ZBD_NONE;
712 break;
713
714 case XNVME_GEO_ZONED:
715 dprint(FD_ZBD, "%s: got 'zoned', assigning ZBD_HOST_MANAGED\n", f->file_name);
716 *model = ZBD_HOST_MANAGED;
717 break;
718
719 default:
720 dprint(FD_ZBD, "%s: hit-default, assigning ZBD_NONE\n", f->file_name);
721 *model = ZBD_NONE;
722 errno = EINVAL;
723 err = -errno;
724 break;
725 }
726
727exit:
728 xnvme_dev_close(dev);
729
730 err_lock = pthread_mutex_unlock(&g_serialize);
731 if (err_lock)
732 log_err("ioeng->get_zoned_model(): pthread_mutex_unlock(), err(%d)\n", err_lock);
733
734 return err;
735}
736
737/**
738 * Fills the given ``zbdz`` with at most ``nr_zones`` zone-descriptors.
739 *
740 * The implementation converts the NVMe Zoned Command Set log-pages for Zone
741 * descriptors into the Linux Kernel Zoned Block Report format.
742 *
743 * NOTE: This function is called before I/O engine initialization, that is,
744 * before ``_dev_open`` has been called and file-wrapping is setup. Thus is has
745 * to do the ``_dev_open`` itself, and shut it down again once it is done
746 * retrieving the log-pages and converting them to the report format.
747 *
748 * TODO: unify the different setup methods, consider keeping the handle around,
749 * and consider how to support the --async option in this usecase
750 */
751static int xnvme_fioe_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
752 struct zbd_zone *zbdz, unsigned int nr_zones)
753{
754 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
755 const struct xnvme_spec_znd_idfy_lbafe *lbafe = NULL;
756 struct xnvme_dev *dev = NULL;
757 const struct xnvme_geo *geo = NULL;
758 struct xnvme_znd_report *rprt = NULL;
759 uint32_t ssw;
760 uint64_t slba;
761 unsigned int limit = 0;
762 int err = 0, err_lock;
763
764 dprint(FD_ZBD, "%s: report_zones() offset: %zu, nr_zones: %u\n", f->file_name, offset,
765 nr_zones);
766
767 err = pthread_mutex_lock(&g_serialize);
768 if (err) {
769 log_err("ioeng->report_zones(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
770 err);
771 return -err;
772 }
773
774 dev = xnvme_dev_open(f->file_name, &opts);
775 if (!dev) {
776 log_err("ioeng->report_zones(%s): xnvme_dev_open(), err(%d)\n", f->file_name,
777 errno);
778 goto exit;
779 }
780
781 geo = xnvme_dev_get_geo(dev);
782 ssw = xnvme_dev_get_ssw(dev);
783 lbafe = xnvme_znd_dev_get_lbafe(dev);
784
785 limit = nr_zones > geo->nzone ? geo->nzone : nr_zones;
786
787 dprint(FD_ZBD, "%s: limit: %u\n", f->file_name, limit);
788
789 slba = ((offset >> ssw) / geo->nsect) * geo->nsect;
790
791 rprt = xnvme_znd_report_from_dev(dev, slba, limit, 0);
792 if (!rprt) {
793 log_err("ioeng->report_zones(%s): xnvme_znd_report_from_dev(), err(%d)\n",
794 f->file_name, errno);
795 err = -errno;
796 goto exit;
797 }
798 if (rprt->nentries != limit) {
799 log_err("ioeng->report_zones(%s): nentries != nr_zones\n", f->file_name);
800 err = 1;
801 goto exit;
802 }
803 if (offset > geo->tbytes) {
804 log_err("ioeng->report_zones(%s): out-of-bounds\n", f->file_name);
805 goto exit;
806 }
807
808 /* Transform the zone-report */
809 for (uint32_t idx = 0; idx < rprt->nentries; ++idx) {
810 struct xnvme_spec_znd_descr *descr = XNVME_ZND_REPORT_DESCR(rprt, idx);
811
812 zbdz[idx].start = descr->zslba << ssw;
813 zbdz[idx].len = lbafe->zsze << ssw;
814 zbdz[idx].capacity = descr->zcap << ssw;
815 zbdz[idx].wp = descr->wp << ssw;
816
817 switch (descr->zt) {
818 case XNVME_SPEC_ZND_TYPE_SEQWR:
819 zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
820 break;
821
822 default:
823 log_err("ioeng->report_zones(%s): invalid type for zone at offset(%zu)\n",
824 f->file_name, zbdz[idx].start);
825 err = -EIO;
826 goto exit;
827 }
828
829 switch (descr->zs) {
830 case XNVME_SPEC_ZND_STATE_EMPTY:
831 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
832 break;
833 case XNVME_SPEC_ZND_STATE_IOPEN:
834 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
835 break;
836 case XNVME_SPEC_ZND_STATE_EOPEN:
837 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
838 break;
839 case XNVME_SPEC_ZND_STATE_CLOSED:
840 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
841 break;
842 case XNVME_SPEC_ZND_STATE_FULL:
843 zbdz[idx].cond = ZBD_ZONE_COND_FULL;
844 break;
845
846 case XNVME_SPEC_ZND_STATE_RONLY:
847 case XNVME_SPEC_ZND_STATE_OFFLINE:
848 default:
849 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
850 break;
851 }
852 }
853
854exit:
855 xnvme_buf_virt_free(rprt);
856
857 xnvme_dev_close(dev);
858
859 err_lock = pthread_mutex_unlock(&g_serialize);
860 if (err_lock)
861 log_err("ioeng->report_zones(): pthread_mutex_unlock(), err: %d\n", err_lock);
862
863 dprint(FD_ZBD, "err: %d, nr_zones: %d\n", err, (int)nr_zones);
864
865 return err ? err : (int)limit;
866}
867
868/**
869 * NOTE: This function may get called before I/O engine initialization, that is,
870 * before ``_dev_open`` has been called and file-wrapping is setup. In such
871 * case it has to do ``_dev_open`` itself, and shut it down again once it is
872 * done resetting write pointer of zones.
873 */
874static int xnvme_fioe_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset,
875 uint64_t length)
876{
877 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
878 struct xnvme_fioe_data *xd = NULL;
879 struct xnvme_fioe_fwrap *fwrap = NULL;
880 struct xnvme_dev *dev = NULL;
881 const struct xnvme_geo *geo = NULL;
882 uint64_t first, last;
883 uint32_t ssw;
884 uint32_t nsid;
885 int err = 0, err_lock;
886
887 if (td->io_ops_data) {
888 xd = td->io_ops_data;
889 fwrap = &xd->files[f->fileno];
890
891 assert(fwrap->dev);
892 assert(fwrap->geo);
893
894 dev = fwrap->dev;
895 geo = fwrap->geo;
896 ssw = fwrap->ssw;
897 } else {
898 err = pthread_mutex_lock(&g_serialize);
899 if (err) {
900 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", err);
901 return -err;
902 }
903
904 dev = xnvme_dev_open(f->file_name, &opts);
905 if (!dev) {
906 log_err("ioeng->reset_wp(): xnvme_dev_open(%s) failed, errno(%d)\n",
907 f->file_name, errno);
908 goto exit;
909 }
910 geo = xnvme_dev_get_geo(dev);
911 ssw = xnvme_dev_get_ssw(dev);
912 }
913
914 nsid = xnvme_dev_get_nsid(dev);
915
916 first = ((offset >> ssw) / geo->nsect) * geo->nsect;
917 last = (((offset + length) >> ssw) / geo->nsect) * geo->nsect;
918 dprint(FD_ZBD, "first: 0x%lx, last: 0x%lx\n", first, last);
919
920 for (uint64_t zslba = first; zslba < last; zslba += geo->nsect) {
921 struct xnvme_cmd_ctx ctx = xnvme_cmd_ctx_from_dev(dev);
922
923 if (zslba >= (geo->nsect * geo->nzone)) {
924 log_err("ioeng->reset_wp(): out-of-bounds\n");
925 err = 0;
926 break;
927 }
928
929 err = xnvme_znd_mgmt_send(&ctx, nsid, zslba, false,
930 XNVME_SPEC_ZND_CMD_MGMT_SEND_RESET, 0x0, NULL);
931 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
932 err = err ? err : -EIO;
933 log_err("ioeng->reset_wp(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
934 goto exit;
935 }
936 }
937
938exit:
939 if (!td->io_ops_data) {
940 xnvme_dev_close(dev);
941
942 err_lock = pthread_mutex_unlock(&g_serialize);
943 if (err_lock)
944 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err_lock);
945 }
946
947 return err;
948}
949
950static int xnvme_fioe_get_file_size(struct thread_data *td, struct fio_file *f)
951{
952 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
953 struct xnvme_dev *dev;
954 int ret = 0, err;
955
956 if (fio_file_size_known(f))
957 return 0;
958
959 ret = pthread_mutex_lock(&g_serialize);
960 if (ret) {
961 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", ret);
962 return -ret;
963 }
964
965 dev = xnvme_dev_open(f->file_name, &opts);
966 if (!dev) {
967 log_err("%s: failed retrieving device handle, errno: %d\n", f->file_name, errno);
968 ret = -errno;
969 goto exit;
970 }
971
972 f->real_file_size = xnvme_dev_get_geo(dev)->tbytes;
973 fio_file_set_size_known(f);
974 f->filetype = FIO_TYPE_BLOCK;
975
976exit:
977 xnvme_dev_close(dev);
978 err = pthread_mutex_unlock(&g_serialize);
979 if (err)
980 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err);
981
982 return ret;
983}
984
985FIO_STATIC struct ioengine_ops ioengine = {
986 .name = "xnvme",
987 .version = FIO_IOOPS_VERSION,
988 .options = options,
989 .option_struct_size = sizeof(struct xnvme_fioe_options),
990 .flags = FIO_DISKLESSIO | FIO_NODISKUTIL | FIO_NOEXTEND | FIO_MEMALIGN | FIO_RAWIO,
991
992 .cleanup = xnvme_fioe_cleanup,
993 .init = xnvme_fioe_init,
994
995 .iomem_free = xnvme_fioe_iomem_free,
996 .iomem_alloc = xnvme_fioe_iomem_alloc,
997
998 .io_u_free = xnvme_fioe_io_u_free,
999 .io_u_init = xnvme_fioe_io_u_init,
1000
1001 .event = xnvme_fioe_event,
1002 .getevents = xnvme_fioe_getevents,
1003 .queue = xnvme_fioe_queue,
1004
1005 .close_file = xnvme_fioe_close,
1006 .open_file = xnvme_fioe_open,
1007 .get_file_size = xnvme_fioe_get_file_size,
1008
1009 .invalidate = xnvme_fioe_invalidate,
1010 .get_max_open_zones = xnvme_fioe_get_max_open_zones,
1011 .get_zoned_model = xnvme_fioe_get_zoned_model,
1012 .report_zones = xnvme_fioe_report_zones,
1013 .reset_wp = xnvme_fioe_reset_wp,
1014};
1015
1016static void fio_init fio_xnvme_register(void)
1017{
1018 register_ioengine(&ioengine);
1019}
1020
1021static void fio_exit fio_xnvme_unregister(void)
1022{
1023 unregister_ioengine(&ioengine);
1024}