Merge branch 'fix-coverity-scan-defect' of https://github.com/parkvibes/fio
[fio.git] / engines / xnvme.c
CommitLineData
a3ff873e
AK
1/*
2 * fio xNVMe IO Engine
3 *
4 * IO engine using the xNVMe C API.
5 *
6 * See: http://xnvme.io/
7 *
8 * SPDX-License-Identifier: Apache-2.0
9 */
10#include <stdlib.h>
11#include <assert.h>
12#include <libxnvme.h>
a3ff873e 13#include "fio.h"
f560239d 14#include "verify.h"
a3ff873e 15#include "zbd_types.h"
c60d54ae 16#include "dataplacement.h"
a3ff873e
AK
17#include "optgroup.h"
18
19static pthread_mutex_t g_serialize = PTHREAD_MUTEX_INITIALIZER;
20
21struct xnvme_fioe_fwrap {
22 /* fio file representation */
23 struct fio_file *fio_file;
24
25 /* xNVMe device handle */
26 struct xnvme_dev *dev;
27 /* xNVMe device geometry */
28 const struct xnvme_geo *geo;
29
30 struct xnvme_queue *queue;
31
32 uint32_t ssw;
33 uint32_t lba_nbytes;
be5514e3
AK
34 uint32_t md_nbytes;
35 uint32_t lba_pow2;
a3ff873e 36
be5514e3 37 uint8_t _pad[16];
a3ff873e
AK
38};
39XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
40
41struct xnvme_fioe_data {
42 /* I/O completion queue */
43 struct io_u **iocq;
44
45 /* # of iocq entries; incremented via getevents()/cb_pool() */
46 uint64_t completed;
47
48 /*
49 * # of errors; incremented when observed on completion via
50 * getevents()/cb_pool()
51 */
52 uint64_t ecount;
53
54 /* Controller which device/file to select */
55 int32_t prev;
56 int32_t cur;
57
58 /* Number of devices/files for which open() has been called */
59 int64_t nopen;
60 /* Number of devices/files allocated in files[] */
61 uint64_t nallocated;
62
63 struct iovec *iovec;
be5514e3 64 struct iovec *md_iovec;
a3ff873e
AK
65
66 struct xnvme_fioe_fwrap files[];
67};
68XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
69
be5514e3 70struct xnvme_fioe_request {
90ec1ecc
AK
71 /* Context for NVMe PI */
72 struct xnvme_pi_ctx pi_ctx;
73
be5514e3
AK
74 /* Separate metadata buffer pointer */
75 void *md_buf;
76};
77
a3ff873e
AK
78struct xnvme_fioe_options {
79 void *padding;
80 unsigned int hipri;
81 unsigned int sqpoll_thread;
82 unsigned int xnvme_dev_nsid;
83 unsigned int xnvme_iovec;
be5514e3 84 unsigned int md_per_io_size;
90ec1ecc
AK
85 unsigned int pi_act;
86 unsigned int apptag;
87 unsigned int apptag_mask;
88 unsigned int prchk;
a3ff873e 89 char *xnvme_be;
c945074c 90 char *xnvme_mem;
a3ff873e
AK
91 char *xnvme_async;
92 char *xnvme_sync;
93 char *xnvme_admin;
efbafe2a 94 char *xnvme_dev_subnqn;
a3ff873e
AK
95};
96
90ec1ecc
AK
97static int str_pi_chk_cb(void *data, const char *str)
98{
99 struct xnvme_fioe_options *o = data;
100
101 if (strstr(str, "GUARD") != NULL)
102 o->prchk = XNVME_PI_FLAGS_GUARD_CHECK;
103 if (strstr(str, "REFTAG") != NULL)
104 o->prchk |= XNVME_PI_FLAGS_REFTAG_CHECK;
105 if (strstr(str, "APPTAG") != NULL)
106 o->prchk |= XNVME_PI_FLAGS_APPTAG_CHECK;
107
108 return 0;
109}
110
a3ff873e
AK
111static struct fio_option options[] = {
112 {
113 .name = "hipri",
114 .lname = "High Priority",
115 .type = FIO_OPT_STR_SET,
116 .off1 = offsetof(struct xnvme_fioe_options, hipri),
117 .help = "Use polled IO completions",
118 .category = FIO_OPT_C_ENGINE,
119 .group = FIO_OPT_G_XNVME,
120 },
121 {
122 .name = "sqthread_poll",
123 .lname = "Kernel SQ thread polling",
124 .type = FIO_OPT_STR_SET,
125 .off1 = offsetof(struct xnvme_fioe_options, sqpoll_thread),
126 .help = "Offload submission/completion to kernel thread",
127 .category = FIO_OPT_C_ENGINE,
128 .group = FIO_OPT_G_XNVME,
129 },
130 {
131 .name = "xnvme_be",
132 .lname = "xNVMe Backend",
133 .type = FIO_OPT_STR_STORE,
134 .off1 = offsetof(struct xnvme_fioe_options, xnvme_be),
135 .help = "Select xNVMe backend [spdk,linux,fbsd]",
136 .category = FIO_OPT_C_ENGINE,
137 .group = FIO_OPT_G_XNVME,
138 },
c945074c
AK
139 {
140 .name = "xnvme_mem",
141 .lname = "xNVMe Memory Backend",
142 .type = FIO_OPT_STR_STORE,
143 .off1 = offsetof(struct xnvme_fioe_options, xnvme_mem),
144 .help = "Select xNVMe memory backend",
145 .category = FIO_OPT_C_ENGINE,
146 .group = FIO_OPT_G_XNVME,
147 },
a3ff873e
AK
148 {
149 .name = "xnvme_async",
150 .lname = "xNVMe Asynchronous command-interface",
151 .type = FIO_OPT_STR_STORE,
152 .off1 = offsetof(struct xnvme_fioe_options, xnvme_async),
203a4c7c
AK
153 .help = "Select xNVMe async. interface: "
154 "[emu,thrpool,io_uring,io_uring_cmd,libaio,posix,vfio,nil]",
a3ff873e
AK
155 .category = FIO_OPT_C_ENGINE,
156 .group = FIO_OPT_G_XNVME,
157 },
158 {
159 .name = "xnvme_sync",
160 .lname = "xNVMe Synchronous. command-interface",
161 .type = FIO_OPT_STR_STORE,
162 .off1 = offsetof(struct xnvme_fioe_options, xnvme_sync),
203a4c7c 163 .help = "Select xNVMe sync. interface: [nvme,psync,block]",
a3ff873e
AK
164 .category = FIO_OPT_C_ENGINE,
165 .group = FIO_OPT_G_XNVME,
166 },
167 {
168 .name = "xnvme_admin",
169 .lname = "xNVMe Admin command-interface",
170 .type = FIO_OPT_STR_STORE,
171 .off1 = offsetof(struct xnvme_fioe_options, xnvme_admin),
203a4c7c 172 .help = "Select xNVMe admin. cmd-interface: [nvme,block]",
a3ff873e
AK
173 .category = FIO_OPT_C_ENGINE,
174 .group = FIO_OPT_G_XNVME,
175 },
176 {
177 .name = "xnvme_dev_nsid",
178 .lname = "xNVMe Namespace-Identifier, for user-space NVMe driver",
179 .type = FIO_OPT_INT,
180 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_nsid),
181 .help = "xNVMe Namespace-Identifier, for user-space NVMe driver",
182 .category = FIO_OPT_C_ENGINE,
183 .group = FIO_OPT_G_XNVME,
184 },
efbafe2a
AK
185 {
186 .name = "xnvme_dev_subnqn",
187 .lname = "Subsystem nqn for Fabrics",
188 .type = FIO_OPT_STR_STORE,
189 .off1 = offsetof(struct xnvme_fioe_options, xnvme_dev_subnqn),
190 .help = "Subsystem NQN for Fabrics",
191 .category = FIO_OPT_C_ENGINE,
192 .group = FIO_OPT_G_XNVME,
193 },
a3ff873e
AK
194 {
195 .name = "xnvme_iovec",
196 .lname = "Vectored IOs",
197 .type = FIO_OPT_STR_SET,
198 .off1 = offsetof(struct xnvme_fioe_options, xnvme_iovec),
199 .help = "Send vectored IOs",
200 .category = FIO_OPT_C_ENGINE,
201 .group = FIO_OPT_G_XNVME,
202 },
be5514e3
AK
203 {
204 .name = "md_per_io_size",
205 .lname = "Separate Metadata Buffer Size per I/O",
206 .type = FIO_OPT_INT,
207 .off1 = offsetof(struct xnvme_fioe_options, md_per_io_size),
208 .def = "0",
209 .help = "Size of separate metadata buffer per I/O (Default: 0)",
210 .category = FIO_OPT_C_ENGINE,
211 .group = FIO_OPT_G_XNVME,
212 },
90ec1ecc
AK
213 {
214 .name = "pi_act",
215 .lname = "Protection Information Action",
216 .type = FIO_OPT_BOOL,
217 .off1 = offsetof(struct xnvme_fioe_options, pi_act),
218 .def = "1",
219 .help = "Protection Information Action bit (pi_act=1 or pi_act=0)",
220 .category = FIO_OPT_C_ENGINE,
221 .group = FIO_OPT_G_XNVME,
222 },
223 {
224 .name = "pi_chk",
225 .lname = "Protection Information Check",
226 .type = FIO_OPT_STR_STORE,
227 .def = NULL,
228 .help = "Control of Protection Information Checking (pi_chk=GUARD,REFTAG,APPTAG)",
229 .cb = str_pi_chk_cb,
230 .category = FIO_OPT_C_ENGINE,
231 .group = FIO_OPT_G_XNVME,
232 },
233 {
234 .name = "apptag",
235 .lname = "Application Tag used in Protection Information",
236 .type = FIO_OPT_INT,
237 .off1 = offsetof(struct xnvme_fioe_options, apptag),
238 .def = "0x1234",
239 .help = "Application Tag used in Protection Information field (Default: 0x1234)",
240 .category = FIO_OPT_C_ENGINE,
241 .group = FIO_OPT_G_XNVME,
242 },
243 {
244 .name = "apptag_mask",
245 .lname = "Application Tag Mask",
246 .type = FIO_OPT_INT,
247 .off1 = offsetof(struct xnvme_fioe_options, apptag_mask),
248 .def = "0xffff",
249 .help = "Application Tag Mask used with Application Tag (Default: 0xffff)",
250 .category = FIO_OPT_C_ENGINE,
251 .group = FIO_OPT_G_XNVME,
252 },
a3ff873e
AK
253
254 {
255 .name = NULL,
256 },
257};
258
259static void cb_pool(struct xnvme_cmd_ctx *ctx, void *cb_arg)
260{
261 struct io_u *io_u = cb_arg;
262 struct xnvme_fioe_data *xd = io_u->mmap_data;
90ec1ecc
AK
263 struct xnvme_fioe_request *fio_req = io_u->engine_data;
264 struct xnvme_fioe_fwrap *fwrap = &xd->files[io_u->file->fileno];
265 bool pi_act = (fio_req->pi_ctx.pi_flags >> 3);
266 int err;
a3ff873e
AK
267
268 if (xnvme_cmd_ctx_cpl_status(ctx)) {
269 xnvme_cmd_ctx_pr(ctx, XNVME_PR_DEF);
270 xd->ecount += 1;
271 io_u->error = EIO;
272 }
273
90ec1ecc
AK
274 if (!io_u->error && fwrap->geo->pi_type && (io_u->ddir == DDIR_READ) && !pi_act) {
275 err = xnvme_pi_verify(&fio_req->pi_ctx, io_u->xfer_buf,
276 fio_req->md_buf, io_u->xfer_buflen / fwrap->lba_nbytes);
277 if (err) {
278 xd->ecount += 1;
279 io_u->error = EIO;
280 }
281 }
282
a3ff873e
AK
283 xd->iocq[xd->completed++] = io_u;
284 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
285}
286
287static struct xnvme_opts xnvme_opts_from_fioe(struct thread_data *td)
288{
289 struct xnvme_fioe_options *o = td->eo;
290 struct xnvme_opts opts = xnvme_opts_default();
291
292 opts.nsid = o->xnvme_dev_nsid;
efbafe2a 293 opts.subnqn = o->xnvme_dev_subnqn;
a3ff873e 294 opts.be = o->xnvme_be;
c945074c 295 opts.mem = o->xnvme_mem;
a3ff873e
AK
296 opts.async = o->xnvme_async;
297 opts.sync = o->xnvme_sync;
298 opts.admin = o->xnvme_admin;
299
300 opts.poll_io = o->hipri;
301 opts.poll_sq = o->sqpoll_thread;
302
303 opts.direct = td->o.odirect;
304
305 return opts;
306}
307
308static void _dev_close(struct thread_data *td, struct xnvme_fioe_fwrap *fwrap)
309{
310 if (fwrap->dev)
311 xnvme_queue_term(fwrap->queue);
312
313 xnvme_dev_close(fwrap->dev);
314
315 memset(fwrap, 0, sizeof(*fwrap));
316}
317
318static void xnvme_fioe_cleanup(struct thread_data *td)
319{
fdac9c68 320 struct xnvme_fioe_data *xd = NULL;
a3ff873e
AK
321 int err;
322
fdac9c68
AK
323 if (!td->io_ops_data)
324 return;
325
326 xd = td->io_ops_data;
327
a3ff873e
AK
328 err = pthread_mutex_lock(&g_serialize);
329 if (err)
330 log_err("ioeng->cleanup(): pthread_mutex_lock(), err(%d)\n", err);
331 /* NOTE: not returning here */
332
333 for (uint64_t i = 0; i < xd->nallocated; ++i)
334 _dev_close(td, &xd->files[i]);
335
336 if (!err) {
337 err = pthread_mutex_unlock(&g_serialize);
338 if (err)
339 log_err("ioeng->cleanup(): pthread_mutex_unlock(), err(%d)\n", err);
340 }
341
342 free(xd->iocq);
343 free(xd->iovec);
be5514e3 344 free(xd->md_iovec);
a3ff873e
AK
345 free(xd);
346 td->io_ops_data = NULL;
347}
348
f560239d
AK
349static int _verify_options(struct thread_data *td, struct fio_file *f,
350 struct xnvme_fioe_fwrap *fwrap)
351{
352 struct xnvme_fioe_options *o = td->eo;
353 unsigned int correct_md_size;
354
355 for_each_rw_ddir(ddir) {
356 if (td->o.min_bs[ddir] % fwrap->lba_nbytes || td->o.max_bs[ddir] % fwrap->lba_nbytes) {
357 if (!fwrap->lba_pow2) {
358 log_err("ioeng->_verify_options(%s): block size must be a multiple of %u "
359 "(LBA data size + Metadata size)\n", f->file_name, fwrap->lba_nbytes);
360 } else {
361 log_err("ioeng->_verify_options(%s): block size must be a multiple of LBA data size\n",
362 f->file_name);
363 }
364 return 1;
365 }
366 if (ddir == DDIR_TRIM)
367 continue;
368
369 correct_md_size = (td->o.max_bs[ddir] / fwrap->lba_nbytes) * fwrap->md_nbytes;
370 if (fwrap->md_nbytes && fwrap->lba_pow2 && (o->md_per_io_size < correct_md_size)) {
371 log_err("ioeng->_verify_options(%s): md_per_io_size should be at least %u bytes\n",
372 f->file_name, correct_md_size);
373 return 1;
374 }
375 }
376
377 /*
378 * For extended logical block sizes we cannot use verify when
379 * end to end data protection checks are enabled, as the PI
380 * section of data buffer conflicts with verify.
381 */
382 if (fwrap->md_nbytes && fwrap->geo->pi_type && !fwrap->lba_pow2 &&
383 td->o.verify != VERIFY_NONE) {
384 log_err("ioeng->_verify_options(%s): for extended LBA, verify cannot be used when E2E data protection is enabled\n",
385 f->file_name);
386 return 1;
387 }
388
389 return 0;
390}
391
a3ff873e
AK
392/**
393 * Helper function setting up device handles as addressed by the naming
394 * convention of the given `fio_file` filename.
395 *
396 * Checks thread-options for explicit control of asynchronous implementation via
397 * the ``--xnvme_async={thrpool,emu,posix,io_uring,libaio,nil}``.
398 */
399static int _dev_open(struct thread_data *td, struct fio_file *f)
400{
401 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
90ec1ecc 402 struct xnvme_fioe_options *o = td->eo;
a3ff873e
AK
403 struct xnvme_fioe_data *xd = td->io_ops_data;
404 struct xnvme_fioe_fwrap *fwrap;
405 int flags = 0;
406 int err;
407
408 if (f->fileno > (int)xd->nallocated) {
409 log_err("ioeng->_dev_open(%s): invalid assumption\n", f->file_name);
410 return 1;
411 }
412
413 fwrap = &xd->files[f->fileno];
414
415 err = pthread_mutex_lock(&g_serialize);
416 if (err) {
417 log_err("ioeng->_dev_open(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
418 err);
419 return -err;
420 }
421
422 fwrap->dev = xnvme_dev_open(f->file_name, &opts);
423 if (!fwrap->dev) {
424 log_err("ioeng->_dev_open(%s): xnvme_dev_open(), err(%d)\n", f->file_name, errno);
425 goto failure;
426 }
427 fwrap->geo = xnvme_dev_get_geo(fwrap->dev);
428
429 if (xnvme_queue_init(fwrap->dev, td->o.iodepth, flags, &(fwrap->queue))) {
430 log_err("ioeng->_dev_open(%s): xnvme_queue_init(), err(?)\n", f->file_name);
431 goto failure;
432 }
433 xnvme_queue_set_cb(fwrap->queue, cb_pool, NULL);
434
435 fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
436 fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
be5514e3
AK
437 fwrap->md_nbytes = fwrap->geo->nbytes_oob;
438
439 if (fwrap->geo->lba_extended)
440 fwrap->lba_pow2 = 0;
441 else
442 fwrap->lba_pow2 = 1;
a3ff873e 443
90ec1ecc
AK
444 /*
445 * When PI action is set and PI size is equal to metadata size, the
446 * controller inserts/removes PI. So update the LBA data and metadata
447 * sizes accordingly.
448 */
449 if (o->pi_act && fwrap->geo->pi_type &&
450 fwrap->geo->nbytes_oob == xnvme_pi_size(fwrap->geo->pi_format)) {
451 if (fwrap->geo->lba_extended) {
452 fwrap->lba_nbytes -= fwrap->geo->nbytes_oob;
453 fwrap->lba_pow2 = 1;
454 }
455 fwrap->md_nbytes = 0;
456 }
457
f560239d
AK
458 if (_verify_options(td, f, fwrap)) {
459 td_verror(td, EINVAL, "_dev_open");
460 goto failure;
461 }
462
a3ff873e
AK
463 fwrap->fio_file = f;
464 fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
465 fwrap->fio_file->real_file_size = fwrap->geo->tbytes;
466 fio_file_set_size_known(fwrap->fio_file);
467
468 err = pthread_mutex_unlock(&g_serialize);
469 if (err)
470 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
471 err);
472
473 return 0;
474
475failure:
476 xnvme_queue_term(fwrap->queue);
477 xnvme_dev_close(fwrap->dev);
478
479 err = pthread_mutex_unlock(&g_serialize);
480 if (err)
481 log_err("ioeng->_dev_open(%s): pthread_mutex_unlock(), err(%d)\n", f->file_name,
482 err);
483
484 return 1;
485}
486
487static int xnvme_fioe_init(struct thread_data *td)
488{
489 struct xnvme_fioe_data *xd = NULL;
7f6a3869 490 struct xnvme_fioe_options *o = td->eo;
a3ff873e
AK
491 struct fio_file *f;
492 unsigned int i;
493
494 if (!td->o.use_thread) {
495 log_err("ioeng->init(): --thread=1 is required\n");
496 return 1;
497 }
498
499 /* Allocate xd and iocq */
500 xd = calloc(1, sizeof(*xd) + sizeof(*xd->files) * td->o.nr_files);
501 if (!xd) {
502 log_err("ioeng->init(): !calloc(), err(%d)\n", errno);
503 return 1;
504 }
505
506 xd->iocq = calloc(td->o.iodepth, sizeof(struct io_u *));
507 if (!xd->iocq) {
eb3570b5
AK
508 free(xd);
509 log_err("ioeng->init(): !calloc(xd->iocq), err(%d)\n", errno);
a3ff873e
AK
510 return 1;
511 }
512
7f6a3869
AK
513 if (o->xnvme_iovec) {
514 xd->iovec = calloc(td->o.iodepth, sizeof(*xd->iovec));
515 if (!xd->iovec) {
516 free(xd->iocq);
517 free(xd);
518 log_err("ioeng->init(): !calloc(xd->iovec), err(%d)\n", errno);
519 return 1;
520 }
a3ff873e
AK
521 }
522
be5514e3
AK
523 if (o->xnvme_iovec && o->md_per_io_size) {
524 xd->md_iovec = calloc(td->o.iodepth, sizeof(*xd->md_iovec));
525 if (!xd->md_iovec) {
526 free(xd->iocq);
527 free(xd->iovec);
528 free(xd);
529 log_err("ioeng->init(): !calloc(xd->md_iovec), err(%d)\n", errno);
530 return 1;
531 }
532 }
533
a3ff873e
AK
534 xd->prev = -1;
535 td->io_ops_data = xd;
536
537 for_each_file(td, f, i)
538 {
539 if (_dev_open(td, f)) {
eb3570b5 540 /*
be5514e3
AK
541 * Note: We are not freeing xd, iocq, iovec and md_iovec.
542 * This will be done as part of cleanup routine.
eb3570b5 543 */
a3ff873e
AK
544 log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
545 return 1;
546 }
547
548 ++(xd->nallocated);
549 }
550
551 if (xd->nallocated != td->o.nr_files) {
552 log_err("ioeng->init(): failed; nallocated != td->o.nr_files\n");
553 return 1;
554 }
555
556 return 0;
557}
558
559/* NOTE: using the first device for buffer-allocators) */
560static int xnvme_fioe_iomem_alloc(struct thread_data *td, size_t total_mem)
561{
562 struct xnvme_fioe_data *xd = td->io_ops_data;
563 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
564
565 if (!fwrap->dev) {
566 log_err("ioeng->iomem_alloc(): failed; no dev-handle\n");
567 return 1;
568 }
569
570 td->orig_buffer = xnvme_buf_alloc(fwrap->dev, total_mem);
571
572 return td->orig_buffer == NULL;
573}
574
575/* NOTE: using the first device for buffer-allocators) */
576static void xnvme_fioe_iomem_free(struct thread_data *td)
577{
fdac9c68
AK
578 struct xnvme_fioe_data *xd = NULL;
579 struct xnvme_fioe_fwrap *fwrap = NULL;
580
581 if (!td->io_ops_data)
582 return;
583
584 xd = td->io_ops_data;
585 fwrap = &xd->files[0];
a3ff873e
AK
586
587 if (!fwrap->dev) {
588 log_err("ioeng->iomem_free(): failed no dev-handle\n");
589 return;
590 }
591
592 xnvme_buf_free(fwrap->dev, td->orig_buffer);
593}
594
595static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
596{
be5514e3
AK
597 struct xnvme_fioe_request *fio_req;
598 struct xnvme_fioe_options *o = td->eo;
599 struct xnvme_fioe_data *xd = td->io_ops_data;
600 struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
601
602 if (!fwrap->dev) {
603 log_err("ioeng->io_u_init(): failed; no dev-handle\n");
604 return 1;
605 }
606
a3ff873e 607 io_u->mmap_data = td->io_ops_data;
be5514e3
AK
608 io_u->engine_data = NULL;
609
610 fio_req = calloc(1, sizeof(*fio_req));
611 if (!fio_req) {
612 log_err("ioeng->io_u_init(): !calloc(fio_req), err(%d)\n", errno);
613 return 1;
614 }
615
616 if (o->md_per_io_size) {
617 fio_req->md_buf = xnvme_buf_alloc(fwrap->dev, o->md_per_io_size);
618 if (!fio_req->md_buf) {
619 free(fio_req);
620 return 1;
621 }
622 }
623
624 io_u->engine_data = fio_req;
a3ff873e
AK
625
626 return 0;
627}
628
629static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
630{
be5514e3
AK
631 struct xnvme_fioe_data *xd = NULL;
632 struct xnvme_fioe_fwrap *fwrap = NULL;
633 struct xnvme_fioe_request *fio_req = NULL;
634
635 if (!td->io_ops_data)
636 return;
637
638 xd = td->io_ops_data;
639 fwrap = &xd->files[0];
640
641 if (!fwrap->dev) {
642 log_err("ioeng->io_u_free(): failed no dev-handle\n");
643 return;
644 }
645
646 fio_req = io_u->engine_data;
647 if (fio_req->md_buf)
648 xnvme_buf_free(fwrap->dev, fio_req->md_buf);
649
650 free(fio_req);
651
a3ff873e
AK
652 io_u->mmap_data = NULL;
653}
654
655static struct io_u *xnvme_fioe_event(struct thread_data *td, int event)
656{
657 struct xnvme_fioe_data *xd = td->io_ops_data;
658
659 assert(event >= 0);
660 assert((unsigned)event < xd->completed);
661
662 return xd->iocq[event];
663}
664
665static int xnvme_fioe_getevents(struct thread_data *td, unsigned int min, unsigned int max,
666 const struct timespec *t)
667{
668 struct xnvme_fioe_data *xd = td->io_ops_data;
669 struct xnvme_fioe_fwrap *fwrap = NULL;
670 int nfiles = xd->nallocated;
671 int err = 0;
672
673 if (xd->prev != -1 && ++xd->prev < nfiles) {
674 fwrap = &xd->files[xd->prev];
675 xd->cur = xd->prev;
676 }
677
678 xd->completed = 0;
679 for (;;) {
680 if (fwrap == NULL || xd->cur == nfiles) {
681 fwrap = &xd->files[0];
682 xd->cur = 0;
683 }
684
685 while (fwrap != NULL && xd->cur < nfiles && err >= 0) {
686 err = xnvme_queue_poke(fwrap->queue, max - xd->completed);
687 if (err < 0) {
688 switch (err) {
689 case -EBUSY:
690 case -EAGAIN:
691 usleep(1);
692 break;
693
694 default:
695 log_err("ioeng->getevents(): unhandled IO error\n");
696 assert(false);
697 return 0;
698 }
699 }
700 if (xd->completed >= min) {
701 xd->prev = xd->cur;
702 return xd->completed;
703 }
704 xd->cur++;
705 fwrap = &xd->files[xd->cur];
706
707 if (err < 0) {
708 switch (err) {
709 case -EBUSY:
710 case -EAGAIN:
711 usleep(1);
712 break;
713 }
714 }
715 }
716 }
717
718 xd->cur = 0;
719
720 return xd->completed;
721}
722
723static enum fio_q_status xnvme_fioe_queue(struct thread_data *td, struct io_u *io_u)
724{
725 struct xnvme_fioe_data *xd = td->io_ops_data;
90ec1ecc 726 struct xnvme_fioe_options *o = td->eo;
a3ff873e
AK
727 struct xnvme_fioe_fwrap *fwrap;
728 struct xnvme_cmd_ctx *ctx;
be5514e3 729 struct xnvme_fioe_request *fio_req = io_u->engine_data;
a3ff873e
AK
730 uint32_t nsid;
731 uint64_t slba;
732 uint16_t nlb;
733 int err;
734 bool vectored_io = ((struct xnvme_fioe_options *)td->eo)->xnvme_iovec;
e5f3b613 735 uint32_t dir = io_u->dtype;
a3ff873e
AK
736
737 fio_ro_check(td, io_u);
738
739 fwrap = &xd->files[io_u->file->fileno];
740 nsid = xnvme_dev_get_nsid(fwrap->dev);
741
be5514e3
AK
742 if (fwrap->lba_pow2) {
743 slba = io_u->offset >> fwrap->ssw;
744 nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
745 } else {
746 slba = io_u->offset / fwrap->lba_nbytes;
747 nlb = (io_u->xfer_buflen / fwrap->lba_nbytes) - 1;
748 }
a3ff873e
AK
749
750 ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
751 ctx->async.cb_arg = io_u;
752
753 ctx->cmd.common.nsid = nsid;
754 ctx->cmd.nvm.slba = slba;
755 ctx->cmd.nvm.nlb = nlb;
e5f3b613
AK
756 if (dir) {
757 ctx->cmd.nvm.dtype = io_u->dtype;
758 ctx->cmd.nvm.cdw13.dspec = io_u->dspec;
759 }
a3ff873e
AK
760
761 switch (io_u->ddir) {
762 case DDIR_READ:
763 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_READ;
764 break;
765
766 case DDIR_WRITE:
767 ctx->cmd.common.opcode = XNVME_SPEC_NVM_OPC_WRITE;
768 break;
769
770 default:
771 log_err("ioeng->queue(): ENOSYS: %u\n", io_u->ddir);
eb3570b5
AK
772 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
773
774 io_u->error = ENOSYS;
a3ff873e 775 assert(false);
eb3570b5 776 return FIO_Q_COMPLETED;
a3ff873e
AK
777 }
778
90ec1ecc
AK
779 if (fwrap->geo->pi_type && !o->pi_act) {
780 err = xnvme_pi_ctx_init(&fio_req->pi_ctx, fwrap->lba_nbytes,
781 fwrap->geo->nbytes_oob, fwrap->geo->lba_extended,
782 fwrap->geo->pi_loc, fwrap->geo->pi_type,
783 (o->pi_act << 3 | o->prchk), slba, o->apptag_mask,
784 o->apptag, fwrap->geo->pi_format);
785 if (err) {
786 log_err("ioeng->queue(): err: '%d'\n", err);
787
788 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
789
790 io_u->error = abs(err);
791 return FIO_Q_COMPLETED;
792 }
793
794 if (io_u->ddir == DDIR_WRITE)
795 xnvme_pi_generate(&fio_req->pi_ctx, io_u->xfer_buf, fio_req->md_buf,
796 nlb + 1);
797 }
798
799 if (fwrap->geo->pi_type)
800 ctx->cmd.nvm.prinfo = (o->pi_act << 3 | o->prchk);
801
802 switch (fwrap->geo->pi_type) {
803 case XNVME_PI_TYPE1:
804 case XNVME_PI_TYPE2:
805 switch (fwrap->geo->pi_format) {
806 case XNVME_SPEC_NVM_NS_16B_GUARD:
807 if (o->prchk & XNVME_PI_FLAGS_REFTAG_CHECK)
808 ctx->cmd.nvm.ilbrt = (uint32_t)slba;
809 break;
810 case XNVME_SPEC_NVM_NS_64B_GUARD:
811 if (o->prchk & XNVME_PI_FLAGS_REFTAG_CHECK) {
812 ctx->cmd.nvm.ilbrt = (uint32_t)slba;
813 ctx->cmd.common.cdw03 = ((slba >> 32) & 0xffff);
814 }
815 break;
816 default:
817 break;
818 }
819 if (o->prchk & XNVME_PI_FLAGS_APPTAG_CHECK) {
820 ctx->cmd.nvm.lbat = o->apptag;
821 ctx->cmd.nvm.lbatm = o->apptag_mask;
822 }
823 break;
824 case XNVME_PI_TYPE3:
825 if (o->prchk & XNVME_PI_FLAGS_APPTAG_CHECK) {
826 ctx->cmd.nvm.lbat = o->apptag;
827 ctx->cmd.nvm.lbatm = o->apptag_mask;
828 }
829 break;
830 case XNVME_PI_DISABLE:
831 break;
832 }
833
a3ff873e
AK
834 if (vectored_io) {
835 xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
836 xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
be5514e3
AK
837 if (fwrap->md_nbytes && fwrap->lba_pow2) {
838 xd->md_iovec[io_u->index].iov_base = fio_req->md_buf;
839 xd->md_iovec[io_u->index].iov_len = fwrap->md_nbytes * (nlb + 1);
840 err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen,
841 &xd->md_iovec[io_u->index], 1,
842 fwrap->md_nbytes * (nlb + 1));
843 } else {
844 err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen,
845 NULL, 0, 0);
846 }
a3ff873e 847 } else {
be5514e3
AK
848 if (fwrap->md_nbytes && fwrap->lba_pow2)
849 err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen,
850 fio_req->md_buf, fwrap->md_nbytes * (nlb + 1));
851 else
852 err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
a3ff873e
AK
853 }
854 switch (err) {
855 case 0:
856 return FIO_Q_QUEUED;
857
858 case -EBUSY:
859 case -EAGAIN:
860 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
861 return FIO_Q_BUSY;
862
863 default:
864 log_err("ioeng->queue(): err: '%d'\n", err);
865
866 xnvme_queue_put_cmd_ctx(ctx->async.queue, ctx);
867
868 io_u->error = abs(err);
869 assert(false);
870 return FIO_Q_COMPLETED;
871 }
872}
873
874static int xnvme_fioe_close(struct thread_data *td, struct fio_file *f)
875{
876 struct xnvme_fioe_data *xd = td->io_ops_data;
877
878 dprint(FD_FILE, "xnvme close %s -- nopen: %ld\n", f->file_name, xd->nopen);
879
880 --(xd->nopen);
881
882 return 0;
883}
884
885static int xnvme_fioe_open(struct thread_data *td, struct fio_file *f)
886{
887 struct xnvme_fioe_data *xd = td->io_ops_data;
888
889 dprint(FD_FILE, "xnvme open %s -- nopen: %ld\n", f->file_name, xd->nopen);
890
891 if (f->fileno > (int)xd->nallocated) {
892 log_err("ioeng->open(): f->fileno > xd->nallocated; invalid assumption\n");
893 return 1;
894 }
895 if (xd->files[f->fileno].fio_file != f) {
896 log_err("ioeng->open(): fio_file != f; invalid assumption\n");
897 return 1;
898 }
899
900 ++(xd->nopen);
901
902 return 0;
903}
904
905static int xnvme_fioe_invalidate(struct thread_data *td, struct fio_file *f)
906{
907 /* Consider only doing this with be:spdk */
908 return 0;
909}
910
911static int xnvme_fioe_get_max_open_zones(struct thread_data *td, struct fio_file *f,
912 unsigned int *max_open_zones)
913{
914 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
915 struct xnvme_dev *dev;
916 const struct xnvme_spec_znd_idfy_ns *zns;
917 int err = 0, err_lock;
918
919 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
920 f->filetype != FIO_TYPE_CHAR) {
921 log_info("ioeng->get_max_open_zoned(): ignoring filetype: %d\n", f->filetype);
922 return 0;
923 }
924 err_lock = pthread_mutex_lock(&g_serialize);
925 if (err_lock) {
926 log_err("ioeng->get_max_open_zones(): pthread_mutex_lock(), err(%d)\n", err_lock);
927 return -err_lock;
928 }
929
930 dev = xnvme_dev_open(f->file_name, &opts);
931 if (!dev) {
932 log_err("ioeng->get_max_open_zones(): xnvme_dev_open(), err(%d)\n", err_lock);
933 err = -errno;
934 goto exit;
935 }
936 if (xnvme_dev_get_geo(dev)->type != XNVME_GEO_ZONED) {
937 errno = EINVAL;
938 err = -errno;
939 goto exit;
940 }
941
942 zns = (void *)xnvme_dev_get_ns_css(dev);
943 if (!zns) {
944 log_err("ioeng->get_max_open_zones(): xnvme_dev_get_ns_css(), err(%d)\n", errno);
945 err = -errno;
946 goto exit;
947 }
948
949 /*
950 * intentional overflow as the value is zero-based and NVMe
951 * defines 0xFFFFFFFF as unlimited thus overflowing to 0 which
952 * is how fio indicates unlimited and otherwise just converting
953 * to one-based.
954 */
955 *max_open_zones = zns->mor + 1;
956
957exit:
958 xnvme_dev_close(dev);
959 err_lock = pthread_mutex_unlock(&g_serialize);
960 if (err_lock)
961 log_err("ioeng->get_max_open_zones(): pthread_mutex_unlock(), err(%d)\n",
962 err_lock);
963
964 return err;
965}
966
967/**
968 * Currently, this function is called before of I/O engine initialization, so,
969 * we cannot consult the file-wrapping done when 'fioe' initializes.
970 * Instead we just open based on the given filename.
971 *
972 * TODO: unify the different setup methods, consider keeping the handle around,
973 * and consider how to support the --be option in this usecase
974 */
975static int xnvme_fioe_get_zoned_model(struct thread_data *td, struct fio_file *f,
976 enum zbd_zoned_model *model)
977{
978 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
979 struct xnvme_dev *dev;
980 int err = 0, err_lock;
981
982 if (f->filetype != FIO_TYPE_FILE && f->filetype != FIO_TYPE_BLOCK &&
983 f->filetype != FIO_TYPE_CHAR) {
984 log_info("ioeng->get_zoned_model(): ignoring filetype: %d\n", f->filetype);
985 return -EINVAL;
986 }
987
988 err = pthread_mutex_lock(&g_serialize);
989 if (err) {
990 log_err("ioeng->get_zoned_model(): pthread_mutex_lock(), err(%d)\n", err);
991 return -err;
992 }
993
994 dev = xnvme_dev_open(f->file_name, &opts);
995 if (!dev) {
996 log_err("ioeng->get_zoned_model(): xnvme_dev_open(%s) failed, errno: %d\n",
997 f->file_name, errno);
998 err = -errno;
999 goto exit;
1000 }
1001
1002 switch (xnvme_dev_get_geo(dev)->type) {
1003 case XNVME_GEO_UNKNOWN:
1004 dprint(FD_ZBD, "%s: got 'unknown', assigning ZBD_NONE\n", f->file_name);
1005 *model = ZBD_NONE;
1006 break;
1007
1008 case XNVME_GEO_CONVENTIONAL:
1009 dprint(FD_ZBD, "%s: got 'conventional', assigning ZBD_NONE\n", f->file_name);
1010 *model = ZBD_NONE;
1011 break;
1012
1013 case XNVME_GEO_ZONED:
1014 dprint(FD_ZBD, "%s: got 'zoned', assigning ZBD_HOST_MANAGED\n", f->file_name);
1015 *model = ZBD_HOST_MANAGED;
1016 break;
1017
1018 default:
1019 dprint(FD_ZBD, "%s: hit-default, assigning ZBD_NONE\n", f->file_name);
1020 *model = ZBD_NONE;
1021 errno = EINVAL;
1022 err = -errno;
1023 break;
1024 }
1025
1026exit:
1027 xnvme_dev_close(dev);
1028
1029 err_lock = pthread_mutex_unlock(&g_serialize);
1030 if (err_lock)
1031 log_err("ioeng->get_zoned_model(): pthread_mutex_unlock(), err(%d)\n", err_lock);
1032
1033 return err;
1034}
1035
1036/**
1037 * Fills the given ``zbdz`` with at most ``nr_zones`` zone-descriptors.
1038 *
1039 * The implementation converts the NVMe Zoned Command Set log-pages for Zone
1040 * descriptors into the Linux Kernel Zoned Block Report format.
1041 *
1042 * NOTE: This function is called before I/O engine initialization, that is,
1043 * before ``_dev_open`` has been called and file-wrapping is setup. Thus is has
1044 * to do the ``_dev_open`` itself, and shut it down again once it is done
1045 * retrieving the log-pages and converting them to the report format.
1046 *
1047 * TODO: unify the different setup methods, consider keeping the handle around,
1048 * and consider how to support the --async option in this usecase
1049 */
1050static int xnvme_fioe_report_zones(struct thread_data *td, struct fio_file *f, uint64_t offset,
1051 struct zbd_zone *zbdz, unsigned int nr_zones)
1052{
1053 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
1054 const struct xnvme_spec_znd_idfy_lbafe *lbafe = NULL;
1055 struct xnvme_dev *dev = NULL;
1056 const struct xnvme_geo *geo = NULL;
1057 struct xnvme_znd_report *rprt = NULL;
1058 uint32_t ssw;
1059 uint64_t slba;
1060 unsigned int limit = 0;
1061 int err = 0, err_lock;
1062
1063 dprint(FD_ZBD, "%s: report_zones() offset: %zu, nr_zones: %u\n", f->file_name, offset,
1064 nr_zones);
1065
1066 err = pthread_mutex_lock(&g_serialize);
1067 if (err) {
1068 log_err("ioeng->report_zones(%s): pthread_mutex_lock(), err(%d)\n", f->file_name,
1069 err);
1070 return -err;
1071 }
1072
1073 dev = xnvme_dev_open(f->file_name, &opts);
1074 if (!dev) {
1075 log_err("ioeng->report_zones(%s): xnvme_dev_open(), err(%d)\n", f->file_name,
1076 errno);
1077 goto exit;
1078 }
1079
1080 geo = xnvme_dev_get_geo(dev);
1081 ssw = xnvme_dev_get_ssw(dev);
1082 lbafe = xnvme_znd_dev_get_lbafe(dev);
1083
1084 limit = nr_zones > geo->nzone ? geo->nzone : nr_zones;
1085
1086 dprint(FD_ZBD, "%s: limit: %u\n", f->file_name, limit);
1087
1088 slba = ((offset >> ssw) / geo->nsect) * geo->nsect;
1089
1090 rprt = xnvme_znd_report_from_dev(dev, slba, limit, 0);
1091 if (!rprt) {
1092 log_err("ioeng->report_zones(%s): xnvme_znd_report_from_dev(), err(%d)\n",
1093 f->file_name, errno);
1094 err = -errno;
1095 goto exit;
1096 }
1097 if (rprt->nentries != limit) {
1098 log_err("ioeng->report_zones(%s): nentries != nr_zones\n", f->file_name);
1099 err = 1;
1100 goto exit;
1101 }
1102 if (offset > geo->tbytes) {
1103 log_err("ioeng->report_zones(%s): out-of-bounds\n", f->file_name);
1104 goto exit;
1105 }
1106
1107 /* Transform the zone-report */
1108 for (uint32_t idx = 0; idx < rprt->nentries; ++idx) {
1109 struct xnvme_spec_znd_descr *descr = XNVME_ZND_REPORT_DESCR(rprt, idx);
1110
1111 zbdz[idx].start = descr->zslba << ssw;
1112 zbdz[idx].len = lbafe->zsze << ssw;
1113 zbdz[idx].capacity = descr->zcap << ssw;
1114 zbdz[idx].wp = descr->wp << ssw;
1115
1116 switch (descr->zt) {
1117 case XNVME_SPEC_ZND_TYPE_SEQWR:
1118 zbdz[idx].type = ZBD_ZONE_TYPE_SWR;
1119 break;
1120
1121 default:
1122 log_err("ioeng->report_zones(%s): invalid type for zone at offset(%zu)\n",
1123 f->file_name, zbdz[idx].start);
1124 err = -EIO;
1125 goto exit;
1126 }
1127
1128 switch (descr->zs) {
1129 case XNVME_SPEC_ZND_STATE_EMPTY:
1130 zbdz[idx].cond = ZBD_ZONE_COND_EMPTY;
1131 break;
1132 case XNVME_SPEC_ZND_STATE_IOPEN:
1133 zbdz[idx].cond = ZBD_ZONE_COND_IMP_OPEN;
1134 break;
1135 case XNVME_SPEC_ZND_STATE_EOPEN:
1136 zbdz[idx].cond = ZBD_ZONE_COND_EXP_OPEN;
1137 break;
1138 case XNVME_SPEC_ZND_STATE_CLOSED:
1139 zbdz[idx].cond = ZBD_ZONE_COND_CLOSED;
1140 break;
1141 case XNVME_SPEC_ZND_STATE_FULL:
1142 zbdz[idx].cond = ZBD_ZONE_COND_FULL;
1143 break;
1144
1145 case XNVME_SPEC_ZND_STATE_RONLY:
1146 case XNVME_SPEC_ZND_STATE_OFFLINE:
1147 default:
1148 zbdz[idx].cond = ZBD_ZONE_COND_OFFLINE;
1149 break;
1150 }
1151 }
1152
1153exit:
1154 xnvme_buf_virt_free(rprt);
1155
1156 xnvme_dev_close(dev);
1157
1158 err_lock = pthread_mutex_unlock(&g_serialize);
1159 if (err_lock)
1160 log_err("ioeng->report_zones(): pthread_mutex_unlock(), err: %d\n", err_lock);
1161
1162 dprint(FD_ZBD, "err: %d, nr_zones: %d\n", err, (int)nr_zones);
1163
1164 return err ? err : (int)limit;
1165}
1166
1167/**
1168 * NOTE: This function may get called before I/O engine initialization, that is,
1169 * before ``_dev_open`` has been called and file-wrapping is setup. In such
1170 * case it has to do ``_dev_open`` itself, and shut it down again once it is
1171 * done resetting write pointer of zones.
1172 */
1173static int xnvme_fioe_reset_wp(struct thread_data *td, struct fio_file *f, uint64_t offset,
1174 uint64_t length)
1175{
1176 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
1177 struct xnvme_fioe_data *xd = NULL;
1178 struct xnvme_fioe_fwrap *fwrap = NULL;
1179 struct xnvme_dev *dev = NULL;
1180 const struct xnvme_geo *geo = NULL;
1181 uint64_t first, last;
1182 uint32_t ssw;
1183 uint32_t nsid;
1184 int err = 0, err_lock;
1185
1186 if (td->io_ops_data) {
1187 xd = td->io_ops_data;
1188 fwrap = &xd->files[f->fileno];
1189
1190 assert(fwrap->dev);
1191 assert(fwrap->geo);
1192
1193 dev = fwrap->dev;
1194 geo = fwrap->geo;
1195 ssw = fwrap->ssw;
1196 } else {
1197 err = pthread_mutex_lock(&g_serialize);
1198 if (err) {
1199 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", err);
1200 return -err;
1201 }
1202
1203 dev = xnvme_dev_open(f->file_name, &opts);
1204 if (!dev) {
1205 log_err("ioeng->reset_wp(): xnvme_dev_open(%s) failed, errno(%d)\n",
1206 f->file_name, errno);
1207 goto exit;
1208 }
1209 geo = xnvme_dev_get_geo(dev);
1210 ssw = xnvme_dev_get_ssw(dev);
1211 }
1212
1213 nsid = xnvme_dev_get_nsid(dev);
1214
1215 first = ((offset >> ssw) / geo->nsect) * geo->nsect;
1216 last = (((offset + length) >> ssw) / geo->nsect) * geo->nsect;
1217 dprint(FD_ZBD, "first: 0x%lx, last: 0x%lx\n", first, last);
1218
1219 for (uint64_t zslba = first; zslba < last; zslba += geo->nsect) {
1220 struct xnvme_cmd_ctx ctx = xnvme_cmd_ctx_from_dev(dev);
1221
1222 if (zslba >= (geo->nsect * geo->nzone)) {
1223 log_err("ioeng->reset_wp(): out-of-bounds\n");
1224 err = 0;
1225 break;
1226 }
1227
1228 err = xnvme_znd_mgmt_send(&ctx, nsid, zslba, false,
1229 XNVME_SPEC_ZND_CMD_MGMT_SEND_RESET, 0x0, NULL);
1230 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
1231 err = err ? err : -EIO;
1232 log_err("ioeng->reset_wp(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
1233 goto exit;
1234 }
1235 }
1236
1237exit:
1238 if (!td->io_ops_data) {
1239 xnvme_dev_close(dev);
1240
1241 err_lock = pthread_mutex_unlock(&g_serialize);
1242 if (err_lock)
1243 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err_lock);
1244 }
1245
1246 return err;
1247}
1248
e5f3b613
AK
1249static int xnvme_fioe_fetch_ruhs(struct thread_data *td, struct fio_file *f,
1250 struct fio_ruhs_info *fruhs_info)
1251{
1252 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
1253 struct xnvme_dev *dev;
1254 struct xnvme_spec_ruhs *ruhs;
1255 struct xnvme_cmd_ctx ctx;
1256 uint32_t ruhs_nbytes;
1257 uint32_t nsid;
1258 int err = 0, err_lock;
1259
48cf0c63 1260 if (f->filetype != FIO_TYPE_CHAR && f->filetype != FIO_TYPE_FILE) {
e5f3b613
AK
1261 log_err("ioeng->fdp_ruhs(): ignoring filetype: %d\n", f->filetype);
1262 return -EINVAL;
1263 }
1264
1265 err = pthread_mutex_lock(&g_serialize);
1266 if (err) {
1267 log_err("ioeng->fdp_ruhs(): pthread_mutex_lock(), err(%d)\n", err);
1268 return -err;
1269 }
1270
1271 dev = xnvme_dev_open(f->file_name, &opts);
1272 if (!dev) {
1273 log_err("ioeng->fdp_ruhs(): xnvme_dev_open(%s) failed, errno: %d\n",
1274 f->file_name, errno);
1275 err = -errno;
1276 goto exit;
1277 }
1278
1279 ruhs_nbytes = sizeof(*ruhs) + (FDP_MAX_RUHS * sizeof(struct xnvme_spec_ruhs_desc));
1280 ruhs = xnvme_buf_alloc(dev, ruhs_nbytes);
1281 if (!ruhs) {
1282 err = -errno;
1283 goto exit;
1284 }
1285 memset(ruhs, 0, ruhs_nbytes);
1286
1287 ctx = xnvme_cmd_ctx_from_dev(dev);
1288 nsid = xnvme_dev_get_nsid(dev);
1289
1290 err = xnvme_nvm_mgmt_recv(&ctx, nsid, XNVME_SPEC_IO_MGMT_RECV_RUHS, 0, ruhs, ruhs_nbytes);
1291
1292 if (err || xnvme_cmd_ctx_cpl_status(&ctx)) {
1293 err = err ? err : -EIO;
1294 log_err("ioeng->fdp_ruhs(): err(%d), sc(%d)", err, ctx.cpl.status.sc);
1295 goto free_buffer;
1296 }
1297
1298 fruhs_info->nr_ruhs = ruhs->nruhsd;
1299 for (uint32_t idx = 0; idx < fruhs_info->nr_ruhs; ++idx) {
1300 fruhs_info->plis[idx] = le16_to_cpu(ruhs->desc[idx].pi);
1301 }
1302
1303free_buffer:
1304 xnvme_buf_free(dev, ruhs);
1305exit:
1306 xnvme_dev_close(dev);
1307
1308 err_lock = pthread_mutex_unlock(&g_serialize);
1309 if (err_lock)
1310 log_err("ioeng->fdp_ruhs(): pthread_mutex_unlock(), err(%d)\n", err_lock);
1311
1312 return err;
1313}
1314
a3ff873e
AK
1315static int xnvme_fioe_get_file_size(struct thread_data *td, struct fio_file *f)
1316{
1317 struct xnvme_opts opts = xnvme_opts_from_fioe(td);
1318 struct xnvme_dev *dev;
1319 int ret = 0, err;
1320
1321 if (fio_file_size_known(f))
1322 return 0;
1323
1324 ret = pthread_mutex_lock(&g_serialize);
1325 if (ret) {
1326 log_err("ioeng->reset_wp(): pthread_mutex_lock(), err(%d)\n", ret);
1327 return -ret;
1328 }
1329
1330 dev = xnvme_dev_open(f->file_name, &opts);
1331 if (!dev) {
1332 log_err("%s: failed retrieving device handle, errno: %d\n", f->file_name, errno);
1333 ret = -errno;
1334 goto exit;
1335 }
1336
1337 f->real_file_size = xnvme_dev_get_geo(dev)->tbytes;
1338 fio_file_set_size_known(f);
e5f3b613
AK
1339
1340 if (td->o.zone_mode == ZONE_MODE_ZBD)
1341 f->filetype = FIO_TYPE_BLOCK;
a3ff873e
AK
1342
1343exit:
1344 xnvme_dev_close(dev);
1345 err = pthread_mutex_unlock(&g_serialize);
1346 if (err)
1347 log_err("ioeng->reset_wp(): pthread_mutex_unlock(), err(%d)\n", err);
1348
1349 return ret;
1350}
1351
1352FIO_STATIC struct ioengine_ops ioengine = {
1353 .name = "xnvme",
1354 .version = FIO_IOOPS_VERSION,
1355 .options = options,
1356 .option_struct_size = sizeof(struct xnvme_fioe_options),
1357 .flags = FIO_DISKLESSIO | FIO_NODISKUTIL | FIO_NOEXTEND | FIO_MEMALIGN | FIO_RAWIO,
1358
1359 .cleanup = xnvme_fioe_cleanup,
1360 .init = xnvme_fioe_init,
1361
1362 .iomem_free = xnvme_fioe_iomem_free,
1363 .iomem_alloc = xnvme_fioe_iomem_alloc,
1364
1365 .io_u_free = xnvme_fioe_io_u_free,
1366 .io_u_init = xnvme_fioe_io_u_init,
1367
1368 .event = xnvme_fioe_event,
1369 .getevents = xnvme_fioe_getevents,
1370 .queue = xnvme_fioe_queue,
1371
1372 .close_file = xnvme_fioe_close,
1373 .open_file = xnvme_fioe_open,
1374 .get_file_size = xnvme_fioe_get_file_size,
1375
1376 .invalidate = xnvme_fioe_invalidate,
1377 .get_max_open_zones = xnvme_fioe_get_max_open_zones,
1378 .get_zoned_model = xnvme_fioe_get_zoned_model,
1379 .report_zones = xnvme_fioe_report_zones,
1380 .reset_wp = xnvme_fioe_reset_wp,
e5f3b613
AK
1381
1382 .fdp_fetch_ruhs = xnvme_fioe_fetch_ruhs,
a3ff873e
AK
1383};
1384
1385static void fio_init fio_xnvme_register(void)
1386{
1387 register_ioengine(&ioengine);
1388}
1389
1390static void fio_exit fio_xnvme_unregister(void)
1391{
1392 unregister_ioengine(&ioengine);
1393}