This enables support for separate metadata buffers with xnvme ioengine.
This is done by providing xnvme specific option md_per_io_size, which
for the sake of consistency is the same option used by io_uring_cmd
engine and SPDK's external ioengine.
Bump up the required xnvme support to v0.7.4
Signed-off-by: Ankit Kumar <ankit.kumar@samsung.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20240213153315.134202-3-ankit.kumar@samsung.com
Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
want fio to use placement identifier only at indices 0, 2 and 5 specify
``fdp_pli=0,2,5``.
want fio to use placement identifier only at indices 0, 2 and 5 specify
``fdp_pli=0,2,5``.
-.. option:: md_per_io_size=int : [io_uring_cmd]
+.. option:: md_per_io_size=int : [io_uring_cmd] [xnvme]
Size in bytes for separate metadata buffer per IO. Default: 0.
Size in bytes for separate metadata buffer per IO. Default: 0.
##########################################
# Check if we have xnvme
if test "$xnvme" != "no" ; then
##########################################
# Check if we have xnvme
if test "$xnvme" != "no" ; then
- if check_min_lib_version xnvme 0.7.0; then
+ if check_min_lib_version xnvme 0.7.4; then
xnvme="yes"
xnvme_cflags=$(pkg-config --cflags xnvme)
xnvme_libs=$(pkg-config --libs xnvme)
xnvme="yes"
xnvme_cflags=$(pkg-config --cflags xnvme)
xnvme_libs=$(pkg-config --libs xnvme)
uint32_t ssw;
uint32_t lba_nbytes;
uint32_t ssw;
uint32_t lba_nbytes;
+ uint32_t md_nbytes;
+ uint32_t lba_pow2;
};
XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
};
XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_fwrap) == 64, "Incorrect size")
uint64_t nallocated;
struct iovec *iovec;
uint64_t nallocated;
struct iovec *iovec;
+ struct iovec *md_iovec;
struct xnvme_fioe_fwrap files[];
};
XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
struct xnvme_fioe_fwrap files[];
};
XNVME_STATIC_ASSERT(sizeof(struct xnvme_fioe_data) == 64, "Incorrect size")
+struct xnvme_fioe_request {
+ /* Separate metadata buffer pointer */
+ void *md_buf;
+};
+
struct xnvme_fioe_options {
void *padding;
unsigned int hipri;
unsigned int sqpoll_thread;
unsigned int xnvme_dev_nsid;
unsigned int xnvme_iovec;
struct xnvme_fioe_options {
void *padding;
unsigned int hipri;
unsigned int sqpoll_thread;
unsigned int xnvme_dev_nsid;
unsigned int xnvme_iovec;
+ unsigned int md_per_io_size;
char *xnvme_be;
char *xnvme_mem;
char *xnvme_async;
char *xnvme_be;
char *xnvme_mem;
char *xnvme_async;
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_XNVME,
},
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_XNVME,
},
+ {
+ .name = "md_per_io_size",
+ .lname = "Separate Metadata Buffer Size per I/O",
+ .type = FIO_OPT_INT,
+ .off1 = offsetof(struct xnvme_fioe_options, md_per_io_size),
+ .def = "0",
+ .help = "Size of separate metadata buffer per I/O (Default: 0)",
+ .category = FIO_OPT_C_ENGINE,
+ .group = FIO_OPT_G_XNVME,
+ },
free(xd->iocq);
free(xd->iovec);
free(xd->iocq);
free(xd->iovec);
free(xd);
td->io_ops_data = NULL;
}
free(xd);
td->io_ops_data = NULL;
}
fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
fwrap->ssw = xnvme_dev_get_ssw(fwrap->dev);
fwrap->lba_nbytes = fwrap->geo->lba_nbytes;
+ fwrap->md_nbytes = fwrap->geo->nbytes_oob;
+
+ if (fwrap->geo->lba_extended)
+ fwrap->lba_pow2 = 0;
+ else
+ fwrap->lba_pow2 = 1;
fwrap->fio_file = f;
fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
fwrap->fio_file = f;
fwrap->fio_file->filetype = FIO_TYPE_BLOCK;
+ if (o->xnvme_iovec && o->md_per_io_size) {
+ xd->md_iovec = calloc(td->o.iodepth, sizeof(*xd->md_iovec));
+ if (!xd->md_iovec) {
+ free(xd->iocq);
+ free(xd->iovec);
+ free(xd);
+ log_err("ioeng->init(): !calloc(xd->md_iovec), err(%d)\n", errno);
+ return 1;
+ }
+ }
+
xd->prev = -1;
td->io_ops_data = xd;
xd->prev = -1;
td->io_ops_data = xd;
{
if (_dev_open(td, f)) {
/*
{
if (_dev_open(td, f)) {
/*
- * Note: We are not freeing xd, iocq and iovec. This
- * will be done as part of cleanup routine.
+ * Note: We are not freeing xd, iocq, iovec and md_iovec.
+ * This will be done as part of cleanup routine.
*/
log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
return 1;
*/
log_err("ioeng->init(): failed; _dev_open(%s)\n", f->file_name);
return 1;
static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
{
static int xnvme_fioe_io_u_init(struct thread_data *td, struct io_u *io_u)
{
+ struct xnvme_fioe_request *fio_req;
+ struct xnvme_fioe_options *o = td->eo;
+ struct xnvme_fioe_data *xd = td->io_ops_data;
+ struct xnvme_fioe_fwrap *fwrap = &xd->files[0];
+
+ if (!fwrap->dev) {
+ log_err("ioeng->io_u_init(): failed; no dev-handle\n");
+ return 1;
+ }
+
io_u->mmap_data = td->io_ops_data;
io_u->mmap_data = td->io_ops_data;
+ io_u->engine_data = NULL;
+
+ fio_req = calloc(1, sizeof(*fio_req));
+ if (!fio_req) {
+ log_err("ioeng->io_u_init(): !calloc(fio_req), err(%d)\n", errno);
+ return 1;
+ }
+
+ if (o->md_per_io_size) {
+ fio_req->md_buf = xnvme_buf_alloc(fwrap->dev, o->md_per_io_size);
+ if (!fio_req->md_buf) {
+ free(fio_req);
+ return 1;
+ }
+ }
+
+ io_u->engine_data = fio_req;
return 0;
}
static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
{
return 0;
}
static void xnvme_fioe_io_u_free(struct thread_data *td, struct io_u *io_u)
{
+ struct xnvme_fioe_data *xd = NULL;
+ struct xnvme_fioe_fwrap *fwrap = NULL;
+ struct xnvme_fioe_request *fio_req = NULL;
+
+ if (!td->io_ops_data)
+ return;
+
+ xd = td->io_ops_data;
+ fwrap = &xd->files[0];
+
+ if (!fwrap->dev) {
+ log_err("ioeng->io_u_free(): failed no dev-handle\n");
+ return;
+ }
+
+ fio_req = io_u->engine_data;
+ if (fio_req->md_buf)
+ xnvme_buf_free(fwrap->dev, fio_req->md_buf);
+
+ free(fio_req);
+
io_u->mmap_data = NULL;
}
io_u->mmap_data = NULL;
}
struct xnvme_fioe_data *xd = td->io_ops_data;
struct xnvme_fioe_fwrap *fwrap;
struct xnvme_cmd_ctx *ctx;
struct xnvme_fioe_data *xd = td->io_ops_data;
struct xnvme_fioe_fwrap *fwrap;
struct xnvme_cmd_ctx *ctx;
+ struct xnvme_fioe_request *fio_req = io_u->engine_data;
uint32_t nsid;
uint64_t slba;
uint16_t nlb;
uint32_t nsid;
uint64_t slba;
uint16_t nlb;
fwrap = &xd->files[io_u->file->fileno];
nsid = xnvme_dev_get_nsid(fwrap->dev);
fwrap = &xd->files[io_u->file->fileno];
nsid = xnvme_dev_get_nsid(fwrap->dev);
- slba = io_u->offset >> fwrap->ssw;
- nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
+ if (fwrap->lba_pow2) {
+ slba = io_u->offset >> fwrap->ssw;
+ nlb = (io_u->xfer_buflen >> fwrap->ssw) - 1;
+ } else {
+ slba = io_u->offset / fwrap->lba_nbytes;
+ nlb = (io_u->xfer_buflen / fwrap->lba_nbytes) - 1;
+ }
ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
ctx->async.cb_arg = io_u;
ctx = xnvme_queue_get_cmd_ctx(fwrap->queue);
ctx->async.cb_arg = io_u;
if (vectored_io) {
xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
if (vectored_io) {
xd->iovec[io_u->index].iov_base = io_u->xfer_buf;
xd->iovec[io_u->index].iov_len = io_u->xfer_buflen;
-
- err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen, NULL, 0,
- 0);
+ if (fwrap->md_nbytes && fwrap->lba_pow2) {
+ xd->md_iovec[io_u->index].iov_base = fio_req->md_buf;
+ xd->md_iovec[io_u->index].iov_len = fwrap->md_nbytes * (nlb + 1);
+ err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen,
+ &xd->md_iovec[io_u->index], 1,
+ fwrap->md_nbytes * (nlb + 1));
+ } else {
+ err = xnvme_cmd_passv(ctx, &xd->iovec[io_u->index], 1, io_u->xfer_buflen,
+ NULL, 0, 0);
+ }
- err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
+ if (fwrap->md_nbytes && fwrap->lba_pow2)
+ err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen,
+ fio_req->md_buf, fwrap->md_nbytes * (nlb + 1));
+ else
+ err = xnvme_cmd_pass(ctx, io_u->xfer_buf, io_u->xfer_buflen, NULL, 0);
to isolate these identifiers to specific jobs. If you want fio to use placement
identifier only at indices 0, 2 and 5 specify, you would set `fdp_pli=0,2,5`.
.TP
to isolate these identifiers to specific jobs. If you want fio to use placement
identifier only at indices 0, 2 and 5 specify, you would set `fdp_pli=0,2,5`.
.TP
-.BI (io_uring_cmd)md_per_io_size \fR=\fPint
+.BI (io_uring_cmd,xnvme)md_per_io_size \fR=\fPint
Size in bytes for separate metadata buffer per IO. Default: 0.
.TP
.BI (io_uring_cmd)pi_act \fR=\fPint
Size in bytes for separate metadata buffer per IO. Default: 0.
.TP
.BI (io_uring_cmd)pi_act \fR=\fPint