4 * IO engine using the new native Linux aio io_uring interface. See:
6 * http://git.kernel.dk/cgit/linux-block/log/?h=io_uring
13 #include <sys/resource.h>
16 #include "../lib/pow2.h"
17 #include "../optgroup.h"
18 #include "../lib/memalign.h"
19 #include "../lib/fls.h"
20 #include "../lib/roundup.h"
21 #include "../verify.h"
23 #ifdef ARCH_HAVE_IOURING
25 #include "../lib/types.h"
26 #include "../os/linux/io_uring.h"
34 FIO_URING_CMD_NVME = 1,
37 enum uring_cmd_write_mode {
38 FIO_URING_CMD_WMODE_WRITE = 1,
39 FIO_URING_CMD_WMODE_UNCOR,
40 FIO_URING_CMD_WMODE_ZEROES,
41 FIO_URING_CMD_WMODE_VERIFY,
44 enum uring_cmd_verify_mode {
45 FIO_URING_CMD_VMODE_READ = 1,
46 FIO_URING_CMD_VMODE_COMPARE,
53 unsigned *ring_entries;
62 unsigned *ring_entries;
63 struct io_uring_cqe *cqes;
74 struct io_u **io_u_index;
79 struct io_sq_ring sq_ring;
80 struct io_uring_sqe *sqes;
82 unsigned sq_ring_mask;
84 struct io_cq_ring cq_ring;
85 unsigned cq_ring_mask;
93 struct ioring_mmap mmap[3];
95 struct cmdprio cmdprio;
98 uint32_t cdw12_flags[DDIR_RWDIR_CNT];
102 struct ioring_options {
103 struct thread_data *td;
105 unsigned int readfua;
106 unsigned int writefua;
108 unsigned int write_mode;
109 unsigned int verify_mode;
110 struct cmdprio_options cmdprio_options;
111 unsigned int fixedbufs;
112 unsigned int registerfiles;
113 unsigned int sqpoll_thread;
114 unsigned int sqpoll_set;
115 unsigned int sqpoll_cpu;
116 unsigned int nonvectored;
117 unsigned int uncached;
119 unsigned int force_async;
120 unsigned int md_per_io_size;
123 unsigned int apptag_mask;
126 enum uring_cmd_type cmd_type;
129 static const int ddir_to_op[2][2] = {
130 { IORING_OP_READV, IORING_OP_READ },
131 { IORING_OP_WRITEV, IORING_OP_WRITE }
134 static const int fixed_ddir_to_op[2] = {
135 IORING_OP_READ_FIXED,
136 IORING_OP_WRITE_FIXED
139 static int fio_ioring_sqpoll_cb(void *data, unsigned long long *val)
141 struct ioring_options *o = data;
143 o->sqpoll_cpu = *val;
148 static struct fio_option options[] = {
151 .lname = "High Priority",
152 .type = FIO_OPT_STR_SET,
153 .off1 = offsetof(struct ioring_options, hipri),
154 .help = "Use polled IO completions",
155 .category = FIO_OPT_C_ENGINE,
156 .group = FIO_OPT_G_IOURING,
160 .lname = "Read fua flag support",
161 .type = FIO_OPT_BOOL,
162 .off1 = offsetof(struct ioring_options, readfua),
163 .help = "Set FUA flag (force unit access) for all Read operations",
165 .category = FIO_OPT_C_ENGINE,
166 .group = FIO_OPT_G_IOURING,
170 .lname = "Write fua flag support",
171 .type = FIO_OPT_BOOL,
172 .off1 = offsetof(struct ioring_options, writefua),
173 .help = "Set FUA flag (force unit access) for all Write operations",
175 .category = FIO_OPT_C_ENGINE,
176 .group = FIO_OPT_G_IOURING,
179 .name = "write_mode",
180 .lname = "Additional Write commands support (Write Uncorrectable, Write Zeores)",
182 .off1 = offsetof(struct ioring_options, write_mode),
183 .help = "Issue Write Uncorrectable or Zeroes command instead of Write command",
187 .oval = FIO_URING_CMD_WMODE_WRITE,
188 .help = "Issue Write commands for write operations"
191 .oval = FIO_URING_CMD_WMODE_UNCOR,
192 .help = "Issue Write Uncorrectable commands for write operations"
195 .oval = FIO_URING_CMD_WMODE_ZEROES,
196 .help = "Issue Write Zeroes commands for write operations"
199 .oval = FIO_URING_CMD_WMODE_VERIFY,
200 .help = "Issue Verify commands for write operations"
203 .category = FIO_OPT_C_ENGINE,
204 .group = FIO_OPT_G_IOURING,
207 .name = "verify_mode",
208 .lname = "Do verify based on the configured command (e.g., Read or Compare command)",
210 .off1 = offsetof(struct ioring_options, verify_mode),
211 .help = "Issue Read or Compare command in the verification phase",
215 .oval = FIO_URING_CMD_VMODE_READ,
216 .help = "Issue Read commands in the verification phase"
219 .oval = FIO_URING_CMD_VMODE_COMPARE,
220 .help = "Issue Compare commands in the verification phase"
223 .category = FIO_OPT_C_ENGINE,
224 .group = FIO_OPT_G_IOURING,
228 .lname = "Fixed (pre-mapped) IO buffers",
229 .type = FIO_OPT_STR_SET,
230 .off1 = offsetof(struct ioring_options, fixedbufs),
231 .help = "Pre map IO buffers",
232 .category = FIO_OPT_C_ENGINE,
233 .group = FIO_OPT_G_IOURING,
236 .name = "registerfiles",
237 .lname = "Register file set",
238 .type = FIO_OPT_STR_SET,
239 .off1 = offsetof(struct ioring_options, registerfiles),
240 .help = "Pre-open/register files",
241 .category = FIO_OPT_C_ENGINE,
242 .group = FIO_OPT_G_IOURING,
245 .name = "sqthread_poll",
246 .lname = "Kernel SQ thread polling",
247 .type = FIO_OPT_STR_SET,
248 .off1 = offsetof(struct ioring_options, sqpoll_thread),
249 .help = "Offload submission/completion to kernel thread",
250 .category = FIO_OPT_C_ENGINE,
251 .group = FIO_OPT_G_IOURING,
254 .name = "sqthread_poll_cpu",
255 .lname = "SQ Thread Poll CPU",
257 .cb = fio_ioring_sqpoll_cb,
258 .help = "What CPU to run SQ thread polling on",
259 .category = FIO_OPT_C_ENGINE,
260 .group = FIO_OPT_G_IOURING,
263 .name = "nonvectored",
264 .lname = "Non-vectored",
266 .off1 = offsetof(struct ioring_options, nonvectored),
268 .help = "Use non-vectored read/write commands",
269 .category = FIO_OPT_C_ENGINE,
270 .group = FIO_OPT_G_IOURING,
276 .off1 = offsetof(struct ioring_options, uncached),
277 .help = "Use RWF_DONTCACHE for buffered read/writes",
278 .category = FIO_OPT_C_ENGINE,
279 .group = FIO_OPT_G_IOURING,
283 .lname = "RWF_NOWAIT",
284 .type = FIO_OPT_BOOL,
285 .off1 = offsetof(struct ioring_options, nowait),
286 .help = "Use RWF_NOWAIT for reads/writes",
287 .category = FIO_OPT_C_ENGINE,
288 .group = FIO_OPT_G_IOURING,
291 .name = "force_async",
292 .lname = "Force async",
294 .off1 = offsetof(struct ioring_options, force_async),
295 .help = "Set IOSQE_ASYNC every N requests",
296 .category = FIO_OPT_C_ENGINE,
297 .group = FIO_OPT_G_IOURING,
301 .lname = "Uring cmd type",
303 .off1 = offsetof(struct ioring_options, cmd_type),
304 .help = "Specify uring-cmd type",
308 .oval = FIO_URING_CMD_NVME,
309 .help = "Issue nvme-uring-cmd",
312 .category = FIO_OPT_C_ENGINE,
313 .group = FIO_OPT_G_IOURING,
315 CMDPRIO_OPTIONS(struct ioring_options, FIO_OPT_G_IOURING),
317 .name = "md_per_io_size",
318 .lname = "Separate Metadata Buffer Size per I/O",
320 .off1 = offsetof(struct ioring_options, md_per_io_size),
322 .help = "Size of separate metadata buffer per I/O (Default: 0)",
323 .category = FIO_OPT_C_ENGINE,
324 .group = FIO_OPT_G_IOURING,
328 .lname = "Protection Information Action",
329 .type = FIO_OPT_BOOL,
330 .off1 = offsetof(struct ioring_options, pi_act),
332 .help = "Protection Information Action bit (pi_act=1 or pi_act=0)",
333 .category = FIO_OPT_C_ENGINE,
334 .group = FIO_OPT_G_IOURING,
338 .lname = "Protection Information Check",
339 .type = FIO_OPT_STR_STORE,
340 .off1 = offsetof(struct ioring_options, pi_chk),
342 .help = "Control of Protection Information Checking (pi_chk=GUARD,REFTAG,APPTAG)",
343 .category = FIO_OPT_C_ENGINE,
344 .group = FIO_OPT_G_IOURING,
348 .lname = "Application Tag used in Protection Information",
350 .off1 = offsetof(struct ioring_options, apptag),
352 .help = "Application Tag used in Protection Information field (Default: 0x1234)",
353 .category = FIO_OPT_C_ENGINE,
354 .group = FIO_OPT_G_IOURING,
357 .name = "apptag_mask",
358 .lname = "Application Tag Mask",
360 .off1 = offsetof(struct ioring_options, apptag_mask),
362 .help = "Application Tag Mask used with Application Tag (Default: 0xffff)",
363 .category = FIO_OPT_C_ENGINE,
364 .group = FIO_OPT_G_IOURING,
368 .lname = "Deallocate bit for write zeroes command",
369 .type = FIO_OPT_BOOL,
370 .off1 = offsetof(struct ioring_options, deac),
371 .help = "Set DEAC (deallocate) flag for write zeroes command",
373 .category = FIO_OPT_C_ENGINE,
374 .group = FIO_OPT_G_IOURING,
381 static int io_uring_enter(struct ioring_data *ld, unsigned int to_submit,
382 unsigned int min_complete, unsigned int flags)
384 #ifdef FIO_ARCH_HAS_SYSCALL
385 return __do_syscall6(__NR_io_uring_enter, ld->ring_fd, to_submit,
386 min_complete, flags, NULL, 0);
388 return syscall(__NR_io_uring_enter, ld->ring_fd, to_submit,
389 min_complete, flags, NULL, 0);
393 #ifndef BLOCK_URING_CMD_DISCARD
394 #define BLOCK_URING_CMD_DISCARD _IO(0x12, 0)
397 static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
399 struct ioring_data *ld = td->io_ops_data;
400 struct ioring_options *o = td->eo;
401 struct fio_file *f = io_u->file;
402 struct io_uring_sqe *sqe;
404 sqe = &ld->sqes[io_u->index];
406 if (o->registerfiles) {
407 sqe->fd = f->engine_pos;
408 sqe->flags = IOSQE_FIXED_FILE;
414 if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
416 sqe->opcode = fixed_ddir_to_op[io_u->ddir];
417 sqe->addr = (unsigned long) io_u->xfer_buf;
418 sqe->len = io_u->xfer_buflen;
419 sqe->buf_index = io_u->index;
421 struct iovec *iov = &ld->iovecs[io_u->index];
424 * Update based on actual io_u, requeue could have
427 iov->iov_base = io_u->xfer_buf;
428 iov->iov_len = io_u->xfer_buflen;
430 sqe->opcode = ddir_to_op[io_u->ddir][!!o->nonvectored];
431 if (o->nonvectored) {
432 sqe->addr = (unsigned long) iov->iov_base;
433 sqe->len = iov->iov_len;
435 sqe->addr = (unsigned long) iov;
440 if (!td->o.odirect && o->uncached)
441 sqe->rw_flags |= RWF_DONTCACHE;
443 sqe->rw_flags |= RWF_NOWAIT;
444 if (td->o.oatomic && io_u->ddir == DDIR_WRITE)
445 sqe->rw_flags |= RWF_ATOMIC;
448 * Since io_uring can have a submission context (sqthread_poll)
449 * that is different from the process context, we cannot rely on
450 * the IO priority set by ioprio_set() (options prio, prioclass,
451 * and priohint) to be inherited.
452 * td->ioprio will have the value of the "default prio", so set
453 * this unconditionally. This value might get overridden by
454 * fio_ioring_cmdprio_prep() if the option cmdprio_percentage or
455 * cmdprio_bssplit is used.
457 sqe->ioprio = td->ioprio;
458 sqe->off = io_u->offset;
459 } else if (ddir_sync(io_u->ddir)) {
461 if (io_u->ddir == DDIR_SYNC_FILE_RANGE) {
462 sqe->off = f->first_write;
463 sqe->len = f->last_write - f->first_write;
464 sqe->sync_range_flags = td->o.sync_file_range;
465 sqe->opcode = IORING_OP_SYNC_FILE_RANGE;
470 if (io_u->ddir == DDIR_DATASYNC)
471 sqe->fsync_flags |= IORING_FSYNC_DATASYNC;
472 sqe->opcode = IORING_OP_FSYNC;
474 } else if (io_u->ddir == DDIR_TRIM) {
475 sqe->opcode = IORING_OP_URING_CMD;
476 sqe->addr = io_u->offset;
477 sqe->addr3 = io_u->xfer_buflen;
479 sqe->len = sqe->off = 0;
481 sqe->cmd_op = BLOCK_URING_CMD_DISCARD;
486 if (o->force_async && ++ld->prepped == o->force_async) {
488 sqe->flags |= IOSQE_ASYNC;
491 sqe->user_data = (unsigned long) io_u;
495 static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u)
497 struct ioring_data *ld = td->io_ops_data;
498 struct ioring_options *o = td->eo;
499 struct fio_file *f = io_u->file;
500 struct nvme_uring_cmd *cmd;
501 struct io_uring_sqe *sqe;
502 struct nvme_dsm *dsm;
504 unsigned int dsm_size;
505 uint8_t read_opcode = nvme_cmd_read;
507 /* only supports nvme_uring_cmd */
508 if (o->cmd_type != FIO_URING_CMD_NVME)
511 if (io_u->ddir == DDIR_TRIM && td->io_ops->flags & FIO_ASYNCIO_SYNC_TRIM)
514 sqe = &ld->sqes[(io_u->index) << 1];
516 if (o->registerfiles) {
517 sqe->fd = f->engine_pos;
518 sqe->flags = IOSQE_FIXED_FILE;
523 if (!td->o.odirect && o->uncached)
524 sqe->rw_flags |= RWF_DONTCACHE;
526 sqe->rw_flags |= RWF_NOWAIT;
528 sqe->opcode = IORING_OP_URING_CMD;
529 sqe->user_data = (unsigned long) io_u;
531 sqe->cmd_op = NVME_URING_CMD_IO;
533 sqe->cmd_op = NVME_URING_CMD_IO_VEC;
534 if (o->force_async && ++ld->prepped == o->force_async) {
536 sqe->flags |= IOSQE_ASYNC;
539 sqe->uring_cmd_flags = IORING_URING_CMD_FIXED;
540 sqe->buf_index = io_u->index;
543 cmd = (struct nvme_uring_cmd *)sqe->cmd;
544 dsm_size = sizeof(*ld->dsm) + td->o.num_range * sizeof(struct nvme_dsm_range);
545 ptr += io_u->index * dsm_size;
546 dsm = (struct nvme_dsm *)ptr;
549 * If READ command belongs to the verification phase and the
550 * verify_mode=compare, convert READ to COMPARE command.
552 if (io_u->flags & IO_U_F_VER_LIST && io_u->ddir == DDIR_READ &&
553 o->verify_mode == FIO_URING_CMD_VMODE_COMPARE) {
554 populate_verify_io_u(td, io_u);
555 read_opcode = nvme_cmd_compare;
556 io_u_set(td, io_u, IO_U_F_VER_IN_DEV);
559 return fio_nvme_uring_cmd_prep(cmd, io_u,
560 o->nonvectored ? NULL : &ld->iovecs[io_u->index],
561 dsm, read_opcode, ld->write_opcode,
562 ld->cdw12_flags[io_u->ddir]);
565 static struct io_u *fio_ioring_event(struct thread_data *td, int event)
567 struct ioring_data *ld = td->io_ops_data;
568 struct io_uring_cqe *cqe;
572 index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
574 cqe = &ld->cq_ring.cqes[index];
575 io_u = (struct io_u *) (uintptr_t) cqe->user_data;
577 /* trim returns 0 on success */
578 if (cqe->res == io_u->xfer_buflen ||
579 (io_u->ddir == DDIR_TRIM && !cqe->res)) {
584 if (cqe->res != io_u->xfer_buflen) {
585 if (io_u->ddir == DDIR_TRIM) {
586 ld->async_trim_fail = 1;
589 if (cqe->res > io_u->xfer_buflen)
590 io_u->error = -cqe->res;
592 io_u->resid = io_u->xfer_buflen - cqe->res;
598 static struct io_u *fio_ioring_cmd_event(struct thread_data *td, int event)
600 struct ioring_data *ld = td->io_ops_data;
601 struct ioring_options *o = td->eo;
602 struct io_uring_cqe *cqe;
604 struct nvme_data *data;
608 index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
609 if (o->cmd_type == FIO_URING_CMD_NVME)
612 cqe = &ld->cq_ring.cqes[index];
613 io_u = (struct io_u *) (uintptr_t) cqe->user_data;
615 io_u->error = cqe->res;
616 if (io_u->error != 0)
619 if (o->cmd_type == FIO_URING_CMD_NVME) {
620 data = FILE_ENG_DATA(io_u->file);
621 if (data->pi_type && (io_u->ddir == DDIR_READ) && !o->pi_act) {
622 ret = fio_nvme_pi_verify(data, io_u);
630 * If IO_U_F_DEVICE_ERROR is not set, io_u->error will be parsed as an
631 * errno, otherwise device-specific error value (status value in CQE).
633 if ((int)io_u->error > 0)
634 io_u_set(td, io_u, IO_U_F_DEVICE_ERROR);
636 io_u_clear(td, io_u, IO_U_F_DEVICE_ERROR);
637 io_u->error = abs((int)io_u->error);
641 static char *fio_ioring_cmd_errdetails(struct thread_data *td,
644 struct ioring_options *o = td->eo;
645 unsigned int sct = (io_u->error >> 8) & 0x7;
646 unsigned int sc = io_u->error & 0xff;
647 #define MAXERRDETAIL 1024
648 #define MAXMSGCHUNK 128
649 char *msg, msgchunk[MAXMSGCHUNK];
651 if (!(io_u->flags & IO_U_F_DEVICE_ERROR))
654 msg = calloc(1, MAXERRDETAIL);
655 strcpy(msg, "io_uring_cmd: ");
657 snprintf(msgchunk, MAXMSGCHUNK, "%s: ", io_u->file->file_name);
658 strlcat(msg, msgchunk, MAXERRDETAIL);
660 if (o->cmd_type == FIO_URING_CMD_NVME) {
661 strlcat(msg, "cq entry status (", MAXERRDETAIL);
663 snprintf(msgchunk, MAXMSGCHUNK, "sct=0x%02x; ", sct);
664 strlcat(msg, msgchunk, MAXERRDETAIL);
666 snprintf(msgchunk, MAXMSGCHUNK, "sc=0x%02x)", sc);
667 strlcat(msg, msgchunk, MAXERRDETAIL);
669 /* Print status code in generic */
670 snprintf(msgchunk, MAXMSGCHUNK, "status=0x%x", io_u->error);
671 strlcat(msg, msgchunk, MAXERRDETAIL);
677 static int fio_ioring_cqring_reap(struct thread_data *td, unsigned int events,
680 struct ioring_data *ld = td->io_ops_data;
681 struct io_cq_ring *ring = &ld->cq_ring;
682 unsigned head, reaped = 0;
686 if (head == atomic_load_acquire(ring->tail))
690 } while (reaped + events < max);
693 atomic_store_release(ring->head, head);
698 static int fio_ioring_getevents(struct thread_data *td, unsigned int min,
699 unsigned int max, const struct timespec *t)
701 struct ioring_data *ld = td->io_ops_data;
702 unsigned actual_min = td->o.iodepth_batch_complete_min == 0 ? 0 : min;
703 struct ioring_options *o = td->eo;
704 struct io_cq_ring *ring = &ld->cq_ring;
708 ld->cq_ring_off = *ring->head;
710 r = fio_ioring_cqring_reap(td, events, max);
719 if (!o->sqpoll_thread) {
720 r = io_uring_enter(ld, 0, actual_min,
721 IORING_ENTER_GETEVENTS);
723 if (errno == EAGAIN || errno == EINTR)
726 td_verror(td, errno, "io_uring_enter");
730 } while (events < min);
732 return r < 0 ? r : events;
735 static inline void fio_ioring_cmd_nvme_pi(struct thread_data *td,
738 struct ioring_data *ld = td->io_ops_data;
739 struct ioring_options *o = td->eo;
740 struct nvme_uring_cmd *cmd;
741 struct io_uring_sqe *sqe;
742 struct nvme_cmd_ext_io_opts ext_opts = {0};
743 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
745 if (io_u->ddir == DDIR_TRIM)
748 sqe = &ld->sqes[(io_u->index) << 1];
749 cmd = (struct nvme_uring_cmd *)sqe->cmd;
753 ext_opts.io_flags |= NVME_IO_PRINFO_PRACT;
754 ext_opts.io_flags |= o->prchk;
755 ext_opts.apptag = o->apptag;
756 ext_opts.apptag_mask = o->apptag_mask;
759 fio_nvme_pi_fill(cmd, io_u, &ext_opts);
762 static inline void fio_ioring_cmdprio_prep(struct thread_data *td,
765 struct ioring_data *ld = td->io_ops_data;
766 struct cmdprio *cmdprio = &ld->cmdprio;
768 if (fio_cmdprio_set_ioprio(td, cmdprio, io_u))
769 ld->sqes[io_u->index].ioprio = io_u->ioprio;
772 static enum fio_q_status fio_ioring_queue(struct thread_data *td,
775 struct ioring_data *ld = td->io_ops_data;
776 struct ioring_options *o = td->eo;
777 struct io_sq_ring *ring = &ld->sq_ring;
780 fio_ro_check(td, io_u);
782 /* should not hit... */
783 if (ld->queued == td->o.iodepth)
786 /* if async trim has been tried and failed, punt to sync */
787 if (io_u->ddir == DDIR_TRIM && ld->async_trim_fail) {
791 do_io_u_trim(td, io_u);
793 io_u_mark_submit(td, 1);
794 io_u_mark_complete(td, 1);
795 return FIO_Q_COMPLETED;
798 if (ld->cmdprio.mode != CMDPRIO_MODE_NONE)
799 fio_ioring_cmdprio_prep(td, io_u);
801 if (!strcmp(td->io_ops->name, "io_uring_cmd") &&
802 o->cmd_type == FIO_URING_CMD_NVME)
803 fio_ioring_cmd_nvme_pi(td, io_u);
806 ring->array[tail & ld->sq_ring_mask] = io_u->index;
807 atomic_store_release(ring->tail, tail + 1);
813 static void fio_ioring_queued(struct thread_data *td, int start, int nr)
815 struct ioring_data *ld = td->io_ops_data;
818 if (!fio_fill_issue_time(td))
821 fio_gettime(&now, NULL);
824 struct io_sq_ring *ring = &ld->sq_ring;
825 int index = ring->array[start & ld->sq_ring_mask];
826 struct io_u *io_u = ld->io_u_index[index];
828 memcpy(&io_u->issue_time, &now, sizeof(now));
829 io_u_queued(td, io_u);
835 * only used for iolog
837 if (td->o.read_iolog_file)
838 memcpy(&td->last_issue, &now, sizeof(now));
841 static int fio_ioring_commit(struct thread_data *td)
843 struct ioring_data *ld = td->io_ops_data;
844 struct ioring_options *o = td->eo;
851 * Kernel side does submission. just need to check if the ring is
852 * flagged as needing a kick, if so, call io_uring_enter(). This
853 * only happens if we've been idle too long.
855 if (o->sqpoll_thread) {
856 struct io_sq_ring *ring = &ld->sq_ring;
857 unsigned start = *ld->sq_ring.tail - ld->queued;
860 flags = atomic_load_relaxed(ring->flags);
861 if (flags & IORING_SQ_NEED_WAKEUP)
862 io_uring_enter(ld, ld->queued, 0,
863 IORING_ENTER_SQ_WAKEUP);
864 fio_ioring_queued(td, start, ld->queued);
865 io_u_mark_submit(td, ld->queued);
872 unsigned start = *ld->sq_ring.head;
873 long nr = ld->queued;
875 ret = io_uring_enter(ld, nr, 0, IORING_ENTER_GETEVENTS);
877 fio_ioring_queued(td, start, ret);
878 io_u_mark_submit(td, ret);
883 io_u_mark_submit(td, ret);
886 if (errno == EAGAIN || errno == EINTR) {
887 ret = fio_ioring_cqring_reap(td, 0, ld->queued);
890 /* Shouldn't happen */
895 td_verror(td, errno, "io_uring_enter submit");
898 } while (ld->queued);
903 static void fio_ioring_unmap(struct ioring_data *ld)
907 for (i = 0; i < FIO_ARRAY_SIZE(ld->mmap); i++)
908 munmap(ld->mmap[i].ptr, ld->mmap[i].len);
912 static void fio_ioring_cleanup(struct thread_data *td)
914 struct ioring_data *ld = td->io_ops_data;
917 if (!(td->flags & TD_F_CHILD))
918 fio_ioring_unmap(ld);
920 fio_cmdprio_cleanup(&ld->cmdprio);
921 free(ld->io_u_index);
930 static int fio_ioring_mmap(struct ioring_data *ld, struct io_uring_params *p)
932 struct io_sq_ring *sring = &ld->sq_ring;
933 struct io_cq_ring *cring = &ld->cq_ring;
936 ld->mmap[0].len = p->sq_off.array + p->sq_entries * sizeof(__u32);
937 ptr = mmap(0, ld->mmap[0].len, PROT_READ | PROT_WRITE,
938 MAP_SHARED | MAP_POPULATE, ld->ring_fd,
940 ld->mmap[0].ptr = ptr;
941 sring->head = ptr + p->sq_off.head;
942 sring->tail = ptr + p->sq_off.tail;
943 sring->ring_mask = ptr + p->sq_off.ring_mask;
944 sring->ring_entries = ptr + p->sq_off.ring_entries;
945 sring->flags = ptr + p->sq_off.flags;
946 sring->array = ptr + p->sq_off.array;
947 ld->sq_ring_mask = *sring->ring_mask;
949 if (p->flags & IORING_SETUP_SQE128)
950 ld->mmap[1].len = 2 * p->sq_entries * sizeof(struct io_uring_sqe);
952 ld->mmap[1].len = p->sq_entries * sizeof(struct io_uring_sqe);
953 ld->sqes = mmap(0, ld->mmap[1].len, PROT_READ | PROT_WRITE,
954 MAP_SHARED | MAP_POPULATE, ld->ring_fd,
956 ld->mmap[1].ptr = ld->sqes;
958 if (p->flags & IORING_SETUP_CQE32) {
959 ld->mmap[2].len = p->cq_off.cqes +
960 2 * p->cq_entries * sizeof(struct io_uring_cqe);
962 ld->mmap[2].len = p->cq_off.cqes +
963 p->cq_entries * sizeof(struct io_uring_cqe);
965 ptr = mmap(0, ld->mmap[2].len, PROT_READ | PROT_WRITE,
966 MAP_SHARED | MAP_POPULATE, ld->ring_fd,
968 ld->mmap[2].ptr = ptr;
969 cring->head = ptr + p->cq_off.head;
970 cring->tail = ptr + p->cq_off.tail;
971 cring->ring_mask = ptr + p->cq_off.ring_mask;
972 cring->ring_entries = ptr + p->cq_off.ring_entries;
973 cring->cqes = ptr + p->cq_off.cqes;
974 ld->cq_ring_mask = *cring->ring_mask;
978 static void fio_ioring_probe(struct thread_data *td)
980 struct ioring_data *ld = td->io_ops_data;
981 struct ioring_options *o = td->eo;
982 struct io_uring_probe *p;
985 /* already set by user, don't touch */
986 if (o->nonvectored != -1)
989 /* default to off, as that's always safe */
992 p = calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
996 ret = syscall(__NR_io_uring_register, ld->ring_fd,
997 IORING_REGISTER_PROBE, p, 256);
1001 if (IORING_OP_WRITE > p->ops_len)
1004 if ((p->ops[IORING_OP_READ].flags & IO_URING_OP_SUPPORTED) &&
1005 (p->ops[IORING_OP_WRITE].flags & IO_URING_OP_SUPPORTED))
1011 static int fio_ioring_queue_init(struct thread_data *td)
1013 struct ioring_data *ld = td->io_ops_data;
1014 struct ioring_options *o = td->eo;
1015 int depth = ld->iodepth;
1016 struct io_uring_params p;
1019 memset(&p, 0, sizeof(p));
1022 p.flags |= IORING_SETUP_IOPOLL;
1023 if (o->sqpoll_thread) {
1024 p.flags |= IORING_SETUP_SQPOLL;
1025 if (o->sqpoll_set) {
1026 p.flags |= IORING_SETUP_SQ_AFF;
1027 p.sq_thread_cpu = o->sqpoll_cpu;
1031 * Submission latency for sqpoll_thread is just the time it
1032 * takes to fill in the SQ ring entries, and any syscall if
1033 * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time
1036 td->o.disable_slat = 1;
1040 * Clamp CQ ring size at our SQ ring size, we don't need more entries
1043 p.flags |= IORING_SETUP_CQSIZE;
1044 p.cq_entries = depth;
1047 * Setup COOP_TASKRUN as we don't need to get IPI interrupted for
1048 * completing IO operations.
1050 p.flags |= IORING_SETUP_COOP_TASKRUN;
1053 * io_uring is always a single issuer, and we can defer task_work
1054 * runs until we reap events.
1056 p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
1059 ret = syscall(__NR_io_uring_setup, depth, &p);
1061 if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) {
1062 p.flags &= ~IORING_SETUP_DEFER_TASKRUN;
1063 p.flags &= ~IORING_SETUP_SINGLE_ISSUER;
1066 if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) {
1067 p.flags &= ~IORING_SETUP_COOP_TASKRUN;
1070 if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
1071 p.flags &= ~IORING_SETUP_CQSIZE;
1079 fio_ioring_probe(td);
1082 ret = syscall(__NR_io_uring_register, ld->ring_fd,
1083 IORING_REGISTER_BUFFERS, ld->iovecs, depth);
1088 return fio_ioring_mmap(ld, &p);
1091 static int fio_ioring_cmd_queue_init(struct thread_data *td)
1093 struct ioring_data *ld = td->io_ops_data;
1094 struct ioring_options *o = td->eo;
1095 int depth = ld->iodepth;
1096 struct io_uring_params p;
1099 memset(&p, 0, sizeof(p));
1102 p.flags |= IORING_SETUP_IOPOLL;
1103 if (o->sqpoll_thread) {
1104 p.flags |= IORING_SETUP_SQPOLL;
1105 if (o->sqpoll_set) {
1106 p.flags |= IORING_SETUP_SQ_AFF;
1107 p.sq_thread_cpu = o->sqpoll_cpu;
1111 * Submission latency for sqpoll_thread is just the time it
1112 * takes to fill in the SQ ring entries, and any syscall if
1113 * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time
1116 td->o.disable_slat = 1;
1118 if (o->cmd_type == FIO_URING_CMD_NVME) {
1119 p.flags |= IORING_SETUP_SQE128;
1120 p.flags |= IORING_SETUP_CQE32;
1124 * Clamp CQ ring size at our SQ ring size, we don't need more entries
1127 p.flags |= IORING_SETUP_CQSIZE;
1128 p.cq_entries = depth;
1131 * Setup COOP_TASKRUN as we don't need to get IPI interrupted for
1132 * completing IO operations.
1134 p.flags |= IORING_SETUP_COOP_TASKRUN;
1137 * io_uring is always a single issuer, and we can defer task_work
1138 * runs until we reap events.
1140 p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
1143 ret = syscall(__NR_io_uring_setup, depth, &p);
1145 if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) {
1146 p.flags &= ~IORING_SETUP_DEFER_TASKRUN;
1147 p.flags &= ~IORING_SETUP_SINGLE_ISSUER;
1150 if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) {
1151 p.flags &= ~IORING_SETUP_COOP_TASKRUN;
1154 if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
1155 p.flags &= ~IORING_SETUP_CQSIZE;
1163 fio_ioring_probe(td);
1166 ret = syscall(__NR_io_uring_register, ld->ring_fd,
1167 IORING_REGISTER_BUFFERS, ld->iovecs, depth);
1172 return fio_ioring_mmap(ld, &p);
1175 static int fio_ioring_register_files(struct thread_data *td)
1177 struct ioring_data *ld = td->io_ops_data;
1182 ld->fds = calloc(td->o.nr_files, sizeof(int));
1184 for_each_file(td, f, i) {
1185 ret = generic_open_file(td, f);
1192 ret = syscall(__NR_io_uring_register, ld->ring_fd,
1193 IORING_REGISTER_FILES, ld->fds, td->o.nr_files);
1201 * Pretend the file is closed again, and really close it if we hit
1204 for_each_file(td, f, i) {
1206 int fio_unused ret2;
1207 ret2 = generic_close_file(td, f);
1215 static int fio_ioring_post_init(struct thread_data *td)
1217 struct ioring_data *ld = td->io_ops_data;
1218 struct ioring_options *o = td->eo;
1222 for (i = 0; i < td->o.iodepth; i++) {
1223 struct iovec *iov = &ld->iovecs[i];
1225 io_u = ld->io_u_index[i];
1226 iov->iov_base = io_u->buf;
1227 iov->iov_len = td_max_bs(td);
1230 err = fio_ioring_queue_init(td);
1232 int init_err = errno;
1234 if (init_err == ENOSYS)
1235 log_err("fio: your kernel doesn't support io_uring\n");
1236 td_verror(td, init_err, "io_queue_init");
1240 for (i = 0; i < ld->iodepth; i++) {
1241 struct io_uring_sqe *sqe;
1244 memset(sqe, 0, sizeof(*sqe));
1247 if (o->registerfiles) {
1248 err = fio_ioring_register_files(td);
1250 td_verror(td, errno, "ioring_register_files");
1258 static int fio_ioring_cmd_post_init(struct thread_data *td)
1260 struct ioring_data *ld = td->io_ops_data;
1261 struct ioring_options *o = td->eo;
1265 for (i = 0; i < td->o.iodepth; i++) {
1266 struct iovec *iov = &ld->iovecs[i];
1268 io_u = ld->io_u_index[i];
1269 iov->iov_base = io_u->buf;
1270 iov->iov_len = td_max_bs(td);
1273 err = fio_ioring_cmd_queue_init(td);
1275 int init_err = errno;
1277 td_verror(td, init_err, "io_queue_init");
1281 for (i = 0; i < ld->iodepth; i++) {
1282 struct io_uring_sqe *sqe;
1284 if (o->cmd_type == FIO_URING_CMD_NVME) {
1285 sqe = &ld->sqes[i << 1];
1286 memset(sqe, 0, 2 * sizeof(*sqe));
1289 memset(sqe, 0, sizeof(*sqe));
1293 if (o->registerfiles) {
1294 err = fio_ioring_register_files(td);
1296 td_verror(td, errno, "ioring_register_files");
1304 static void parse_prchk_flags(struct ioring_options *o)
1309 if (strstr(o->pi_chk, "GUARD") != NULL)
1310 o->prchk = NVME_IO_PRINFO_PRCHK_GUARD;
1311 if (strstr(o->pi_chk, "REFTAG") != NULL)
1312 o->prchk |= NVME_IO_PRINFO_PRCHK_REF;
1313 if (strstr(o->pi_chk, "APPTAG") != NULL)
1314 o->prchk |= NVME_IO_PRINFO_PRCHK_APP;
1317 static int fio_ioring_init(struct thread_data *td)
1319 struct ioring_options *o = td->eo;
1320 struct ioring_data *ld;
1321 struct nvme_dsm *dsm;
1323 unsigned int dsm_size;
1324 unsigned long long md_size;
1327 /* sqthread submission requires registered files */
1328 if (o->sqpoll_thread)
1329 o->registerfiles = 1;
1331 if (o->registerfiles && td->o.nr_files != td->o.open_files) {
1332 log_err("fio: io_uring registered files require nr_files to "
1333 "be identical to open_files\n");
1337 ld = calloc(1, sizeof(*ld));
1340 * The internal io_uring queue depth must be a power-of-2, as that's
1341 * how the ring interface works. So round that up, in case the user
1342 * set iodepth isn't a power-of-2. Leave the fio depth the same, as
1343 * not to be driving too much of an iodepth, if we did round up.
1345 ld->iodepth = roundup_pow2(td->o.iodepth);
1348 ld->io_u_index = calloc(td->o.iodepth, sizeof(struct io_u *));
1351 * metadata buffer for nvme command.
1352 * We are only supporting iomem=malloc / mem=malloc as of now.
1354 if (!strcmp(td->io_ops->name, "io_uring_cmd") &&
1355 (o->cmd_type == FIO_URING_CMD_NVME) && o->md_per_io_size) {
1356 md_size = (unsigned long long) o->md_per_io_size
1357 * (unsigned long long) td->o.iodepth;
1358 md_size += page_mask + td->o.mem_align;
1359 if (td->o.mem_align && td->o.mem_align > page_size)
1360 md_size += td->o.mem_align - page_size;
1361 ld->md_buf = malloc(md_size);
1367 parse_prchk_flags(o);
1369 ld->iovecs = calloc(ld->iodepth, sizeof(struct iovec));
1371 td->io_ops_data = ld;
1373 ret = fio_cmdprio_init(td, &ld->cmdprio, &o->cmdprio_options);
1375 td_verror(td, EINVAL, "fio_ioring_init");
1380 * For io_uring_cmd, trims are async operations unless we are operating
1381 * in zbd mode where trim means zone reset.
1383 if (!strcmp(td->io_ops->name, "io_uring_cmd") && td_trim(td) &&
1384 td->o.zone_mode == ZONE_MODE_ZBD) {
1385 td->io_ops->flags |= FIO_ASYNCIO_SYNC_TRIM;
1387 dsm_size = sizeof(*ld->dsm) +
1388 td->o.num_range * sizeof(struct nvme_dsm_range);
1389 ld->dsm = calloc(td->o.iodepth, dsm_size);
1391 for (i = 0; i < td->o.iodepth; i++) {
1392 dsm = (struct nvme_dsm *)ptr;
1393 dsm->nr_ranges = td->o.num_range;
1398 if (!strcmp(td->io_ops->name, "io_uring_cmd")) {
1400 switch (o->write_mode) {
1401 case FIO_URING_CMD_WMODE_UNCOR:
1402 ld->write_opcode = nvme_cmd_write_uncor;
1404 case FIO_URING_CMD_WMODE_ZEROES:
1405 ld->write_opcode = nvme_cmd_write_zeroes;
1407 ld->cdw12_flags[DDIR_WRITE] = 1 << 25;
1409 case FIO_URING_CMD_WMODE_VERIFY:
1410 ld->write_opcode = nvme_cmd_verify;
1413 ld->write_opcode = nvme_cmd_write;
1419 ld->cdw12_flags[DDIR_READ] = 1 << 30;
1421 ld->cdw12_flags[DDIR_WRITE] = 1 << 30;
1427 static int fio_ioring_io_u_init(struct thread_data *td, struct io_u *io_u)
1429 struct ioring_data *ld = td->io_ops_data;
1430 struct ioring_options *o = td->eo;
1431 struct nvme_pi_data *pi_data;
1434 ld->io_u_index[io_u->index] = io_u;
1436 if (!strcmp(td->io_ops->name, "io_uring_cmd")) {
1437 p = PTR_ALIGN(ld->md_buf, page_mask) + td->o.mem_align;
1438 p += o->md_per_io_size * io_u->index;
1439 io_u->mmap_data = p;
1442 pi_data = calloc(1, sizeof(*pi_data));
1443 pi_data->io_flags |= o->prchk;
1444 pi_data->apptag_mask = o->apptag_mask;
1445 pi_data->apptag = o->apptag;
1446 io_u->engine_data = pi_data;
1453 static void fio_ioring_io_u_free(struct thread_data *td, struct io_u *io_u)
1455 struct ioring_options *o = td->eo;
1458 if (!strcmp(td->io_ops->name, "io_uring_cmd") &&
1459 (o->cmd_type == FIO_URING_CMD_NVME)) {
1460 pi = io_u->engine_data;
1462 io_u->engine_data = NULL;
1466 static int fio_ioring_open_file(struct thread_data *td, struct fio_file *f)
1468 struct ioring_data *ld = td->io_ops_data;
1469 struct ioring_options *o = td->eo;
1471 if (!ld || !o->registerfiles)
1472 return generic_open_file(td, f);
1474 f->fd = ld->fds[f->engine_pos];
1478 static int fio_ioring_cmd_open_file(struct thread_data *td, struct fio_file *f)
1480 struct ioring_data *ld = td->io_ops_data;
1481 struct ioring_options *o = td->eo;
1483 if (o->cmd_type == FIO_URING_CMD_NVME) {
1484 struct nvme_data *data = NULL;
1485 unsigned int lba_size = 0;
1489 /* Store the namespace-id and lba size. */
1490 data = FILE_ENG_DATA(f);
1492 data = calloc(1, sizeof(struct nvme_data));
1493 ret = fio_nvme_get_info(f, &nlba, o->pi_act, data);
1499 FILE_SET_ENG_DATA(f, data);
1502 lba_size = data->lba_ext ? data->lba_ext : data->lba_size;
1504 for_each_rw_ddir(ddir) {
1505 if (td->o.min_bs[ddir] % lba_size || td->o.max_bs[ddir] % lba_size) {
1506 if (data->lba_ext) {
1507 log_err("%s: block size must be a multiple of %u "
1508 "(LBA data size + Metadata size)\n", f->file_name, lba_size);
1509 if (td->o.min_bs[ddir] == td->o.max_bs[ddir] &&
1510 !(td->o.min_bs[ddir] % data->lba_size)) {
1511 /* fixed block size is actually a multiple of LBA data size */
1512 unsigned long long suggestion = lba_size *
1513 (td->o.min_bs[ddir] / data->lba_size);
1514 log_err("Did you mean to use a block size of %llu?\n", suggestion);
1517 log_err("%s: block size must be a multiple of LBA data size\n",
1520 td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
1523 if (data->ms && !data->lba_ext && ddir != DDIR_TRIM &&
1524 (o->md_per_io_size < ((td->o.max_bs[ddir] / data->lba_size) *
1526 log_err("%s: md_per_io_size should be at least %llu bytes\n",
1528 ((td->o.max_bs[ddir] / data->lba_size) * data->ms));
1529 td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
1535 * For extended logical block sizes we cannot use verify when
1536 * end to end data protection checks are enabled, as the PI
1537 * section of data buffer conflicts with verify.
1539 if (data->ms && data->pi_type && data->lba_ext &&
1540 td->o.verify != VERIFY_NONE) {
1541 log_err("%s: for extended LBA, verify cannot be used when E2E data protection is enabled\n",
1543 td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
1547 if (o->write_mode != FIO_URING_CMD_WMODE_WRITE &&
1549 log_err("%s: 'readwrite=|rw=' has no write\n",
1551 td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
1555 if (!ld || !o->registerfiles)
1556 return generic_open_file(td, f);
1558 f->fd = ld->fds[f->engine_pos];
1562 static int fio_ioring_close_file(struct thread_data *td, struct fio_file *f)
1564 struct ioring_data *ld = td->io_ops_data;
1565 struct ioring_options *o = td->eo;
1567 if (!ld || !o->registerfiles)
1568 return generic_close_file(td, f);
1574 static int fio_ioring_cmd_close_file(struct thread_data *td,
1577 struct ioring_data *ld = td->io_ops_data;
1578 struct ioring_options *o = td->eo;
1580 if (o->cmd_type == FIO_URING_CMD_NVME) {
1581 struct nvme_data *data = FILE_ENG_DATA(f);
1583 FILE_SET_ENG_DATA(f, NULL);
1586 if (!ld || !o->registerfiles)
1587 return generic_close_file(td, f);
1593 static int fio_ioring_cmd_get_file_size(struct thread_data *td,
1596 struct ioring_options *o = td->eo;
1598 if (fio_file_size_known(f))
1601 if (o->cmd_type == FIO_URING_CMD_NVME) {
1602 struct nvme_data *data = NULL;
1606 data = calloc(1, sizeof(struct nvme_data));
1607 ret = fio_nvme_get_info(f, &nlba, o->pi_act, data);
1614 f->real_file_size = data->lba_ext * nlba;
1616 f->real_file_size = data->lba_size * nlba;
1617 fio_file_set_size_known(f);
1619 FILE_SET_ENG_DATA(f, data);
1622 return generic_get_file_size(td, f);
1625 static int fio_ioring_cmd_get_zoned_model(struct thread_data *td,
1627 enum zbd_zoned_model *model)
1629 return fio_nvme_get_zoned_model(td, f, model);
1632 static int fio_ioring_cmd_report_zones(struct thread_data *td,
1633 struct fio_file *f, uint64_t offset,
1634 struct zbd_zone *zbdz,
1635 unsigned int nr_zones)
1637 return fio_nvme_report_zones(td, f, offset, zbdz, nr_zones);
1640 static int fio_ioring_cmd_reset_wp(struct thread_data *td, struct fio_file *f,
1641 uint64_t offset, uint64_t length)
1643 return fio_nvme_reset_wp(td, f, offset, length);
1646 static int fio_ioring_cmd_get_max_open_zones(struct thread_data *td,
1648 unsigned int *max_open_zones)
1650 return fio_nvme_get_max_open_zones(td, f, max_open_zones);
1653 static int fio_ioring_cmd_fetch_ruhs(struct thread_data *td, struct fio_file *f,
1654 struct fio_ruhs_info *fruhs_info)
1656 struct nvme_fdp_ruh_status *ruhs;
1657 int bytes, nr_ruhs, ret, i;
1659 nr_ruhs = fruhs_info->nr_ruhs;
1660 bytes = sizeof(*ruhs) + fruhs_info->nr_ruhs * sizeof(struct nvme_fdp_ruh_status_desc);
1662 ruhs = calloc(1, bytes);
1666 ret = fio_nvme_iomgmt_ruhs(td, f, ruhs, bytes);
1670 fruhs_info->nr_ruhs = le16_to_cpu(ruhs->nruhsd);
1671 for (i = 0; i < nr_ruhs; i++)
1672 fruhs_info->plis[i] = le16_to_cpu(ruhs->ruhss[i].pid);
1678 static struct ioengine_ops ioengine_uring = {
1680 .version = FIO_IOOPS_VERSION,
1681 .flags = FIO_NO_OFFLOAD | FIO_ASYNCIO_SETS_ISSUE_TIME |
1683 .init = fio_ioring_init,
1684 .post_init = fio_ioring_post_init,
1685 .io_u_init = fio_ioring_io_u_init,
1686 .prep = fio_ioring_prep,
1687 .queue = fio_ioring_queue,
1688 .commit = fio_ioring_commit,
1689 .getevents = fio_ioring_getevents,
1690 .event = fio_ioring_event,
1691 .cleanup = fio_ioring_cleanup,
1692 .open_file = fio_ioring_open_file,
1693 .close_file = fio_ioring_close_file,
1694 .get_file_size = generic_get_file_size,
1696 .option_struct_size = sizeof(struct ioring_options),
1699 static struct ioengine_ops ioengine_uring_cmd = {
1700 .name = "io_uring_cmd",
1701 .version = FIO_IOOPS_VERSION,
1702 .flags = FIO_NO_OFFLOAD | FIO_MEMALIGN | FIO_RAWIO |
1703 FIO_ASYNCIO_SETS_ISSUE_TIME |
1704 FIO_MULTI_RANGE_TRIM,
1705 .init = fio_ioring_init,
1706 .post_init = fio_ioring_cmd_post_init,
1707 .io_u_init = fio_ioring_io_u_init,
1708 .io_u_free = fio_ioring_io_u_free,
1709 .prep = fio_ioring_cmd_prep,
1710 .queue = fio_ioring_queue,
1711 .commit = fio_ioring_commit,
1712 .getevents = fio_ioring_getevents,
1713 .event = fio_ioring_cmd_event,
1714 .errdetails = fio_ioring_cmd_errdetails,
1715 .cleanup = fio_ioring_cleanup,
1716 .open_file = fio_ioring_cmd_open_file,
1717 .close_file = fio_ioring_cmd_close_file,
1718 .get_file_size = fio_ioring_cmd_get_file_size,
1719 .get_zoned_model = fio_ioring_cmd_get_zoned_model,
1720 .report_zones = fio_ioring_cmd_report_zones,
1721 .reset_wp = fio_ioring_cmd_reset_wp,
1722 .get_max_open_zones = fio_ioring_cmd_get_max_open_zones,
1724 .option_struct_size = sizeof(struct ioring_options),
1725 .fdp_fetch_ruhs = fio_ioring_cmd_fetch_ruhs,
1728 static void fio_init fio_ioring_register(void)
1730 register_ioengine(&ioengine_uring);
1731 register_ioengine(&ioengine_uring_cmd);
1734 static void fio_exit fio_ioring_unregister(void)
1736 unregister_ioengine(&ioengine_uring);
1737 unregister_ioengine(&ioengine_uring_cmd);