From 4885a6eba420ce216e4102df3e42229e167d1b7b Mon Sep 17 00:00:00 2001 From: Vincent Fu Date: Tue, 27 Jun 2023 18:48:53 +0000 Subject: [PATCH 1/1] engines/io_uring_cmd: make trims async Instead of using a synchronous IOCTL to send a trim/deallocate request, just use the io_uring pass-through interface to send the dataset management command with the deallocate request just like we already do for read and write commands. Signed-off-by: Vincent Fu --- engines/io_uring.c | 23 +++++++++++++++---- engines/nvme.c | 57 +++++++++++++++++++++++++++++++++++++--------- engines/nvme.h | 2 +- 3 files changed, 65 insertions(+), 17 deletions(-) diff --git a/engines/io_uring.c b/engines/io_uring.c index 73e4a27a..7cdbdafa 100644 --- a/engines/io_uring.c +++ b/engines/io_uring.c @@ -78,6 +78,8 @@ struct ioring_data { struct ioring_mmap mmap[3]; struct cmdprio cmdprio; + + struct nvme_dsm_range *dsm; }; struct ioring_options { @@ -410,7 +412,7 @@ static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u) if (o->cmd_type != FIO_URING_CMD_NVME) return -EINVAL; - if (io_u->ddir == DDIR_TRIM) + if (io_u->ddir == DDIR_TRIM && td->io_ops->flags & FIO_ASYNCIO_SYNC_TRIM) return 0; sqe = &ld->sqes[(io_u->index) << 1]; @@ -444,7 +446,8 @@ static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u) cmd = (struct nvme_uring_cmd *)sqe->cmd; return fio_nvme_uring_cmd_prep(cmd, io_u, - o->nonvectored ? NULL : &ld->iovecs[io_u->index]); + o->nonvectored ? NULL : &ld->iovecs[io_u->index], + &ld->dsm[io_u->index]); } static struct io_u *fio_ioring_event(struct thread_data *td, int event) @@ -594,7 +597,7 @@ static enum fio_q_status fio_ioring_queue(struct thread_data *td, if (ld->queued == ld->iodepth) return FIO_Q_BUSY; - if (io_u->ddir == DDIR_TRIM) { + if (io_u->ddir == DDIR_TRIM && td->io_ops->flags & FIO_ASYNCIO_SYNC_TRIM) { if (ld->queued) return FIO_Q_BUSY; @@ -734,6 +737,7 @@ static void fio_ioring_cleanup(struct thread_data *td) free(ld->io_u_index); free(ld->iovecs); free(ld->fds); + free(ld->dsm); free(ld); } } @@ -1146,6 +1150,16 @@ static int fio_ioring_init(struct thread_data *td) return 1; } + /* + * For io_uring_cmd, trims are async operations unless we are operating + * in zbd mode where trim means zone reset. + */ + if (!strcmp(td->io_ops->name, "io_uring_cmd") && td_trim(td) && + td->o.zone_mode == ZONE_MODE_ZBD) + td->io_ops->flags |= FIO_ASYNCIO_SYNC_TRIM; + else + ld->dsm = calloc(ld->iodepth, sizeof(*ld->dsm)); + return 0; } @@ -1361,8 +1375,7 @@ static struct ioengine_ops ioengine_uring = { static struct ioengine_ops ioengine_uring_cmd = { .name = "io_uring_cmd", .version = FIO_IOOPS_VERSION, - .flags = FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD | - FIO_MEMALIGN | FIO_RAWIO | + .flags = FIO_NO_OFFLOAD | FIO_MEMALIGN | FIO_RAWIO | FIO_ASYNCIO_SETS_ISSUE_TIME, .init = fio_ioring_init, .post_init = fio_ioring_cmd_post_init, diff --git a/engines/nvme.c b/engines/nvme.c index 1047ade2..2901803a 100644 --- a/engines/nvme.c +++ b/engines/nvme.c @@ -5,8 +5,41 @@ #include "nvme.h" +static inline __u64 get_slba(struct nvme_data *data, struct io_u *io_u) +{ + if (data->lba_ext) + return io_u->offset / data->lba_ext; + else + return io_u->offset >> data->lba_shift; +} + +static inline __u32 get_nlb(struct nvme_data *data, struct io_u *io_u) +{ + if (data->lba_ext) + return io_u->xfer_buflen / data->lba_ext - 1; + else + return (io_u->xfer_buflen >> data->lba_shift) - 1; +} + +void fio_nvme_uring_cmd_trim_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u, + struct nvme_dsm_range *dsm) +{ + struct nvme_data *data = FILE_ENG_DATA(io_u->file); + + cmd->opcode = nvme_cmd_dsm; + cmd->nsid = data->nsid; + cmd->cdw10 = 0; + cmd->cdw11 = NVME_ATTRIBUTE_DEALLOCATE; + cmd->addr = (__u64) (uintptr_t) dsm; + cmd->data_len = sizeof(*dsm); + + dsm->slba = get_slba(data, io_u); + /* nlb is a 1-based value for deallocate */ + dsm->nlb = get_nlb(data, io_u) + 1; +} + int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u, - struct iovec *iov) + struct iovec *iov, struct nvme_dsm_range *dsm) { struct nvme_data *data = FILE_ENG_DATA(io_u->file); __u64 slba; @@ -14,21 +47,23 @@ int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u, memset(cmd, 0, sizeof(struct nvme_uring_cmd)); - if (io_u->ddir == DDIR_READ) + switch (io_u->ddir) { + case DDIR_READ: cmd->opcode = nvme_cmd_read; - else if (io_u->ddir == DDIR_WRITE) + break; + case DDIR_WRITE: cmd->opcode = nvme_cmd_write; - else + break; + case DDIR_TRIM: + fio_nvme_uring_cmd_trim_prep(cmd, io_u, dsm); + return 0; + default: return -ENOTSUP; - - if (data->lba_ext) { - slba = io_u->offset / data->lba_ext; - nlb = (io_u->xfer_buflen / data->lba_ext) - 1; - } else { - slba = io_u->offset >> data->lba_shift; - nlb = (io_u->xfer_buflen >> data->lba_shift) - 1; } + slba = get_slba(data, io_u); + nlb = get_nlb(data, io_u); + /* cdw10 and cdw11 represent starting lba */ cmd->cdw10 = slba & 0xffffffff; cmd->cdw11 = slba >> 32; diff --git a/engines/nvme.h b/engines/nvme.h index f7cb820d..34be2de1 100644 --- a/engines/nvme.h +++ b/engines/nvme.h @@ -226,7 +226,7 @@ int fio_nvme_get_info(struct fio_file *f, __u32 *nsid, __u32 *lba_sz, __u32 *ms, __u64 *nlba); int fio_nvme_uring_cmd_prep(struct nvme_uring_cmd *cmd, struct io_u *io_u, - struct iovec *iov); + struct iovec *iov, struct nvme_dsm_range *dsm); int fio_nvme_get_zoned_model(struct thread_data *td, struct fio_file *f, enum zbd_zoned_model *model); -- 2.25.1