{
.name = "sqthread_poll",
.lname = "Kernel SQ thread polling",
- .type = FIO_OPT_INT,
+ .type = FIO_OPT_STR_SET,
.off1 = offsetof(struct ioring_options, sqpoll_thread),
.help = "Offload submission/completion to kernel thread",
.category = FIO_OPT_C_ENGINE,
ld->prepped = 0;
sqe->flags |= IOSQE_ASYNC;
}
+ if (o->fixedbufs) {
+ sqe->uring_cmd_flags = IORING_URING_CMD_FIXED;
+ sqe->buf_index = io_u->index;
+ }
cmd = (struct nvme_uring_cmd *)sqe->cmd;
return fio_nvme_uring_cmd_prep(cmd, io_u,
if (r < 0) {
if (errno == EAGAIN || errno == EINTR)
continue;
+ r = -errno;
td_verror(td, errno, "io_uring_enter");
break;
}
start++;
}
+
+ /*
+ * only used for iolog
+ */
+ if (td->o.read_iolog_file)
+ memcpy(&td->last_issue, &now, sizeof(now));
}
static int fio_ioring_commit(struct thread_data *td)
*/
if (o->sqpoll_thread) {
struct io_sq_ring *ring = &ld->sq_ring;
+ unsigned start = *ld->sq_ring.head;
unsigned flags;
flags = atomic_load_acquire(ring->flags);
if (flags & IORING_SQ_NEED_WAKEUP)
io_uring_enter(ld, ld->queued, 0,
IORING_ENTER_SQ_WAKEUP);
+ fio_ioring_queued(td, start, ld->queued);
+ io_u_mark_submit(td, ld->queued);
+
ld->queued = 0;
return 0;
}
usleep(1);
continue;
}
+ ret = -errno;
td_verror(td, errno, "io_uring_enter submit");
break;
}
p.flags |= IORING_SETUP_SQ_AFF;
p.sq_thread_cpu = o->sqpoll_cpu;
}
+
+ /*
+ * Submission latency for sqpoll_thread is just the time it
+ * takes to fill in the SQ ring entries, and any syscall if
+ * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time
+ * separately.
+ */
+ td->o.disable_slat = 1;
}
/*
p.flags |= IORING_SETUP_CQSIZE;
p.cq_entries = depth;
+ /*
+ * Setup COOP_TASKRUN as we don't need to get IPI interrupted for
+ * completing IO operations.
+ */
+ p.flags |= IORING_SETUP_COOP_TASKRUN;
+
+ /*
+ * io_uring is always a single issuer, and we can defer task_work
+ * runs until we reap events.
+ */
+ p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
+
retry:
ret = syscall(__NR_io_uring_setup, depth, &p);
if (ret < 0) {
+ if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) {
+ p.flags &= ~IORING_SETUP_DEFER_TASKRUN;
+ p.flags &= ~IORING_SETUP_SINGLE_ISSUER;
+ goto retry;
+ }
+ if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) {
+ p.flags &= ~IORING_SETUP_COOP_TASKRUN;
+ goto retry;
+ }
if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
p.flags &= ~IORING_SETUP_CQSIZE;
goto retry;
p.flags |= IORING_SETUP_SQ_AFF;
p.sq_thread_cpu = o->sqpoll_cpu;
}
+
+ /*
+ * Submission latency for sqpoll_thread is just the time it
+ * takes to fill in the SQ ring entries, and any syscall if
+ * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time
+ * separately.
+ */
+ td->o.disable_slat = 1;
}
if (o->cmd_type == FIO_URING_CMD_NVME) {
p.flags |= IORING_SETUP_SQE128;
p.flags |= IORING_SETUP_CQSIZE;
p.cq_entries = depth;
+ /*
+ * Setup COOP_TASKRUN as we don't need to get IPI interrupted for
+ * completing IO operations.
+ */
+ p.flags |= IORING_SETUP_COOP_TASKRUN;
+
+ /*
+ * io_uring is always a single issuer, and we can defer task_work
+ * runs until we reap events.
+ */
+ p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
+
retry:
ret = syscall(__NR_io_uring_setup, depth, &p);
if (ret < 0) {
+ if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) {
+ p.flags &= ~IORING_SETUP_DEFER_TASKRUN;
+ p.flags &= ~IORING_SETUP_SINGLE_ISSUER;
+ goto retry;
+ }
+ if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) {
+ p.flags &= ~IORING_SETUP_COOP_TASKRUN;
+ goto retry;
+ }
if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
p.flags &= ~IORING_SETUP_CQSIZE;
goto retry;
static struct ioengine_ops ioengine_uring = {
.name = "io_uring",
.version = FIO_IOOPS_VERSION,
- .flags = FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD,
+ .flags = FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD |
+ FIO_ASYNCIO_SETS_ISSUE_TIME,
.init = fio_ioring_init,
.post_init = fio_ioring_post_init,
.io_u_init = fio_ioring_io_u_init,
static struct ioengine_ops ioengine_uring_cmd = {
.name = "io_uring_cmd",
.version = FIO_IOOPS_VERSION,
- .flags = FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD | FIO_MEMALIGN | FIO_RAWIO,
+ .flags = FIO_ASYNCIO_SYNC_TRIM | FIO_NO_OFFLOAD |
+ FIO_MEMALIGN | FIO_RAWIO |
+ FIO_ASYNCIO_SETS_ISSUE_TIME,
.init = fio_ioring_init,
.post_init = fio_ioring_cmd_post_init,
.io_u_init = fio_ioring_io_u_init,