Merge branch 'fix-randtrimwrite' of https://github.com/minwooim/fio
[fio.git] / engines / io_uring.c
CommitLineData
52885fa2 1/*
bffad86f 2 * io_uring engine
52885fa2 3 *
bffad86f 4 * IO engine using the new native Linux aio io_uring interface. See:
a90cd050 5 *
bffad86f 6 * http://git.kernel.dk/cgit/linux-block/log/?h=io_uring
52885fa2
JA
7 *
8 */
9#include <stdlib.h>
10#include <unistd.h>
11#include <errno.h>
52885fa2
JA
12#include <sys/time.h>
13#include <sys/resource.h>
14
15#include "../fio.h"
16#include "../lib/pow2.h"
17#include "../optgroup.h"
18#include "../lib/memalign.h"
b87aa01a 19#include "../lib/fls.h"
6d975f2c 20#include "../lib/roundup.h"
ba342e58 21#include "../verify.h"
52885fa2 22
bffad86f 23#ifdef ARCH_HAVE_IOURING
52885fa2 24
57fa61f0 25#include "../lib/types.h"
f3e769a4 26#include "../os/linux/io_uring.h"
e9f6567a 27#include "cmdprio.h"
16be6037 28#include "zbd.h"
855dc4d4
AG
29#include "nvme.h"
30
31#include <sys/stat.h>
32
33enum uring_cmd_type {
34 FIO_URING_CMD_NVME = 1,
35};
9a2d78b3 36
87a4903f
MI
37enum uring_cmd_write_mode {
38 FIO_URING_CMD_WMODE_WRITE = 1,
39 FIO_URING_CMD_WMODE_UNCOR,
40 FIO_URING_CMD_WMODE_ZEROES,
41 FIO_URING_CMD_WMODE_VERIFY,
42};
43
6170d92a
MI
44enum uring_cmd_verify_mode {
45 FIO_URING_CMD_VMODE_READ = 1,
46 FIO_URING_CMD_VMODE_COMPARE,
47};
48
bffad86f 49struct io_sq_ring {
e2239016
JA
50 unsigned *head;
51 unsigned *tail;
52 unsigned *ring_mask;
53 unsigned *ring_entries;
54 unsigned *flags;
55 unsigned *array;
52885fa2
JA
56};
57
bffad86f 58struct io_cq_ring {
e2239016
JA
59 unsigned *head;
60 unsigned *tail;
61 unsigned *ring_mask;
62 unsigned *ring_entries;
f0403f94 63 struct io_uring_cqe *cqes;
9a2d78b3
JA
64};
65
bffad86f 66struct ioring_mmap {
9a2d78b3
JA
67 void *ptr;
68 size_t len;
52885fa2
JA
69};
70
bffad86f 71struct ioring_data {
9a2d78b3
JA
72 int ring_fd;
73
52885fa2 74 struct io_u **io_u_index;
2d6451c9 75 char *md_buf;
52885fa2 76
5ffd5626
JA
77 int *fds;
78
bffad86f 79 struct io_sq_ring sq_ring;
f0403f94 80 struct io_uring_sqe *sqes;
9a2d78b3 81 struct iovec *iovecs;
b87aa01a 82 unsigned sq_ring_mask;
52885fa2 83
bffad86f 84 struct io_cq_ring cq_ring;
b87aa01a 85 unsigned cq_ring_mask;
52885fa2 86
980fb7f2 87 int async_trim_fail;
52885fa2
JA
88 int queued;
89 int cq_ring_off;
b87aa01a 90 unsigned iodepth;
5a59a81d 91 int prepped;
96563db9 92
bffad86f 93 struct ioring_mmap mmap[3];
d6cbeab4
NC
94
95 struct cmdprio cmdprio;
4885a6eb 96
5d4ee0de 97 struct nvme_dsm *dsm;
55e14d73 98 uint32_t cdw12_flags[DDIR_RWDIR_CNT];
87a4903f 99 uint8_t write_opcode;
52885fa2
JA
100};
101
bffad86f 102struct ioring_options {
a48f0cc7 103 struct thread_data *td;
52885fa2 104 unsigned int hipri;
55e14d73
MI
105 unsigned int readfua;
106 unsigned int writefua;
dfc79b17 107 unsigned int deac;
87a4903f 108 unsigned int write_mode;
6170d92a 109 unsigned int verify_mode;
d6cbeab4 110 struct cmdprio_options cmdprio_options;
52885fa2 111 unsigned int fixedbufs;
5ffd5626 112 unsigned int registerfiles;
3d7d00a3 113 unsigned int sqpoll_thread;
2ea53ca3
JA
114 unsigned int sqpoll_set;
115 unsigned int sqpoll_cpu;
b10b1e70 116 unsigned int nonvectored;
43c67b9f 117 unsigned int uncached;
7d42e66e 118 unsigned int nowait;
5a59a81d 119 unsigned int force_async;
2d6451c9 120 unsigned int md_per_io_size;
3ee8311a
AK
121 unsigned int pi_act;
122 unsigned int apptag;
123 unsigned int apptag_mask;
124 unsigned int prchk;
125 char *pi_chk;
855dc4d4 126 enum uring_cmd_type cmd_type;
52885fa2
JA
127};
128
b10b1e70
JA
129static const int ddir_to_op[2][2] = {
130 { IORING_OP_READV, IORING_OP_READ },
131 { IORING_OP_WRITEV, IORING_OP_WRITE }
132};
133
3f1e3af7
KB
134static const int fixed_ddir_to_op[2] = {
135 IORING_OP_READ_FIXED,
136 IORING_OP_WRITE_FIXED
137};
138
2ea53ca3 139static int fio_ioring_sqpoll_cb(void *data, unsigned long long *val)
a90cd050 140{
bffad86f 141 struct ioring_options *o = data;
a90cd050 142
2ea53ca3
JA
143 o->sqpoll_cpu = *val;
144 o->sqpoll_set = 1;
a90cd050
JA
145 return 0;
146}
147
52885fa2
JA
148static struct fio_option options[] = {
149 {
150 .name = "hipri",
151 .lname = "High Priority",
152 .type = FIO_OPT_STR_SET,
bffad86f 153 .off1 = offsetof(struct ioring_options, hipri),
52885fa2
JA
154 .help = "Use polled IO completions",
155 .category = FIO_OPT_C_ENGINE,
27f436d9 156 .group = FIO_OPT_G_IOURING,
52885fa2 157 },
55e14d73
MI
158 {
159 .name = "readfua",
160 .lname = "Read fua flag support",
161 .type = FIO_OPT_BOOL,
162 .off1 = offsetof(struct ioring_options, readfua),
163 .help = "Set FUA flag (force unit access) for all Read operations",
164 .def = "0",
165 .category = FIO_OPT_C_ENGINE,
166 .group = FIO_OPT_G_IOURING,
167 },
168 {
169 .name = "writefua",
170 .lname = "Write fua flag support",
171 .type = FIO_OPT_BOOL,
172 .off1 = offsetof(struct ioring_options, writefua),
173 .help = "Set FUA flag (force unit access) for all Write operations",
174 .def = "0",
175 .category = FIO_OPT_C_ENGINE,
176 .group = FIO_OPT_G_IOURING,
177 },
87a4903f
MI
178 {
179 .name = "write_mode",
180 .lname = "Additional Write commands support (Write Uncorrectable, Write Zeores)",
181 .type = FIO_OPT_STR,
182 .off1 = offsetof(struct ioring_options, write_mode),
04230255 183 .help = "Issue Write Uncorrectable or Zeroes command instead of Write command",
87a4903f
MI
184 .def = "write",
185 .posval = {
186 { .ival = "write",
187 .oval = FIO_URING_CMD_WMODE_WRITE,
188 .help = "Issue Write commands for write operations"
189 },
190 { .ival = "uncor",
191 .oval = FIO_URING_CMD_WMODE_UNCOR,
192 .help = "Issue Write Uncorrectable commands for write operations"
193 },
194 { .ival = "zeroes",
195 .oval = FIO_URING_CMD_WMODE_ZEROES,
196 .help = "Issue Write Zeroes commands for write operations"
197 },
198 { .ival = "verify",
199 .oval = FIO_URING_CMD_WMODE_VERIFY,
200 .help = "Issue Verify commands for write operations"
201 },
202 },
203 .category = FIO_OPT_C_ENGINE,
204 .group = FIO_OPT_G_IOURING,
205 },
6170d92a
MI
206 {
207 .name = "verify_mode",
208 .lname = "Do verify based on the configured command (e.g., Read or Compare command)",
209 .type = FIO_OPT_STR,
210 .off1 = offsetof(struct ioring_options, verify_mode),
211 .help = "Issue Read or Compare command in the verification phase",
212 .def = "read",
213 .posval = {
214 { .ival = "read",
215 .oval = FIO_URING_CMD_VMODE_READ,
216 .help = "Issue Read commands in the verification phase"
217 },
218 { .ival = "compare",
219 .oval = FIO_URING_CMD_VMODE_COMPARE,
220 .help = "Issue Compare commands in the verification phase"
221 },
222 },
223 .category = FIO_OPT_C_ENGINE,
224 .group = FIO_OPT_G_IOURING,
225 },
52885fa2
JA
226 {
227 .name = "fixedbufs",
228 .lname = "Fixed (pre-mapped) IO buffers",
229 .type = FIO_OPT_STR_SET,
bffad86f 230 .off1 = offsetof(struct ioring_options, fixedbufs),
52885fa2
JA
231 .help = "Pre map IO buffers",
232 .category = FIO_OPT_C_ENGINE,
27f436d9 233 .group = FIO_OPT_G_IOURING,
52885fa2 234 },
5ffd5626
JA
235 {
236 .name = "registerfiles",
237 .lname = "Register file set",
238 .type = FIO_OPT_STR_SET,
239 .off1 = offsetof(struct ioring_options, registerfiles),
240 .help = "Pre-open/register files",
241 .category = FIO_OPT_C_ENGINE,
27f436d9 242 .group = FIO_OPT_G_IOURING,
5ffd5626 243 },
771c9901
JA
244 {
245 .name = "sqthread_poll",
3d7d00a3 246 .lname = "Kernel SQ thread polling",
d6f936d1 247 .type = FIO_OPT_STR_SET,
3d7d00a3
JA
248 .off1 = offsetof(struct ioring_options, sqpoll_thread),
249 .help = "Offload submission/completion to kernel thread",
250 .category = FIO_OPT_C_ENGINE,
27f436d9 251 .group = FIO_OPT_G_IOURING,
3d7d00a3
JA
252 },
253 {
254 .name = "sqthread_poll_cpu",
255 .lname = "SQ Thread Poll CPU",
2ea53ca3
JA
256 .type = FIO_OPT_INT,
257 .cb = fio_ioring_sqpoll_cb,
3d7d00a3 258 .help = "What CPU to run SQ thread polling on",
a90cd050 259 .category = FIO_OPT_C_ENGINE,
27f436d9 260 .group = FIO_OPT_G_IOURING,
a90cd050 261 },
b10b1e70
JA
262 {
263 .name = "nonvectored",
264 .lname = "Non-vectored",
265 .type = FIO_OPT_INT,
266 .off1 = offsetof(struct ioring_options, nonvectored),
556d8415 267 .def = "-1",
b10b1e70
JA
268 .help = "Use non-vectored read/write commands",
269 .category = FIO_OPT_C_ENGINE,
270 .group = FIO_OPT_G_IOURING,
271 },
4a87b584
JA
272 {
273 .name = "uncached",
274 .lname = "Uncached",
43c67b9f
JA
275 .type = FIO_OPT_INT,
276 .off1 = offsetof(struct ioring_options, uncached),
277 .help = "Use RWF_DONTCACHE for buffered read/writes",
278 .category = FIO_OPT_C_ENGINE,
279 .group = FIO_OPT_G_IOURING,
4a87b584 280 },
7d42e66e
KK
281 {
282 .name = "nowait",
283 .lname = "RWF_NOWAIT",
284 .type = FIO_OPT_BOOL,
285 .off1 = offsetof(struct ioring_options, nowait),
286 .help = "Use RWF_NOWAIT for reads/writes",
287 .category = FIO_OPT_C_ENGINE,
288 .group = FIO_OPT_G_IOURING,
289 },
5a59a81d
JA
290 {
291 .name = "force_async",
292 .lname = "Force async",
293 .type = FIO_OPT_INT,
294 .off1 = offsetof(struct ioring_options, force_async),
295 .help = "Set IOSQE_ASYNC every N requests",
296 .category = FIO_OPT_C_ENGINE,
297 .group = FIO_OPT_G_IOURING,
298 },
855dc4d4
AG
299 {
300 .name = "cmd_type",
301 .lname = "Uring cmd type",
302 .type = FIO_OPT_STR,
303 .off1 = offsetof(struct ioring_options, cmd_type),
304 .help = "Specify uring-cmd type",
305 .def = "nvme",
306 .posval = {
307 { .ival = "nvme",
308 .oval = FIO_URING_CMD_NVME,
309 .help = "Issue nvme-uring-cmd",
310 },
311 },
312 .category = FIO_OPT_C_ENGINE,
313 .group = FIO_OPT_G_IOURING,
314 },
2838f77a 315 CMDPRIO_OPTIONS(struct ioring_options, FIO_OPT_G_IOURING),
2d6451c9
AK
316 {
317 .name = "md_per_io_size",
318 .lname = "Separate Metadata Buffer Size per I/O",
319 .type = FIO_OPT_INT,
320 .off1 = offsetof(struct ioring_options, md_per_io_size),
321 .def = "0",
322 .help = "Size of separate metadata buffer per I/O (Default: 0)",
323 .category = FIO_OPT_C_ENGINE,
324 .group = FIO_OPT_G_IOURING,
325 },
3ee8311a
AK
326 {
327 .name = "pi_act",
328 .lname = "Protection Information Action",
329 .type = FIO_OPT_BOOL,
330 .off1 = offsetof(struct ioring_options, pi_act),
331 .def = "1",
332 .help = "Protection Information Action bit (pi_act=1 or pi_act=0)",
333 .category = FIO_OPT_C_ENGINE,
334 .group = FIO_OPT_G_IOURING,
335 },
336 {
337 .name = "pi_chk",
338 .lname = "Protection Information Check",
339 .type = FIO_OPT_STR_STORE,
340 .off1 = offsetof(struct ioring_options, pi_chk),
341 .def = NULL,
342 .help = "Control of Protection Information Checking (pi_chk=GUARD,REFTAG,APPTAG)",
343 .category = FIO_OPT_C_ENGINE,
344 .group = FIO_OPT_G_IOURING,
345 },
346 {
347 .name = "apptag",
348 .lname = "Application Tag used in Protection Information",
349 .type = FIO_OPT_INT,
350 .off1 = offsetof(struct ioring_options, apptag),
351 .def = "0x1234",
352 .help = "Application Tag used in Protection Information field (Default: 0x1234)",
353 .category = FIO_OPT_C_ENGINE,
354 .group = FIO_OPT_G_IOURING,
355 },
356 {
357 .name = "apptag_mask",
358 .lname = "Application Tag Mask",
359 .type = FIO_OPT_INT,
360 .off1 = offsetof(struct ioring_options, apptag_mask),
361 .def = "0xffff",
362 .help = "Application Tag Mask used with Application Tag (Default: 0xffff)",
363 .category = FIO_OPT_C_ENGINE,
364 .group = FIO_OPT_G_IOURING,
365 },
dfc79b17
VF
366 {
367 .name = "deac",
368 .lname = "Deallocate bit for write zeroes command",
369 .type = FIO_OPT_BOOL,
370 .off1 = offsetof(struct ioring_options, deac),
371 .help = "Set DEAC (deallocate) flag for write zeroes command",
372 .def = "0",
373 .category = FIO_OPT_C_ENGINE,
374 .group = FIO_OPT_G_IOURING,
375 },
52885fa2
JA
376 {
377 .name = NULL,
378 },
379};
380
bffad86f 381static int io_uring_enter(struct ioring_data *ld, unsigned int to_submit,
52885fa2
JA
382 unsigned int min_complete, unsigned int flags)
383{
c377f4f8
JA
384#ifdef FIO_ARCH_HAS_SYSCALL
385 return __do_syscall6(__NR_io_uring_enter, ld->ring_fd, to_submit,
386 min_complete, flags, NULL, 0);
387#else
bfed648c 388 return syscall(__NR_io_uring_enter, ld->ring_fd, to_submit,
521164fa 389 min_complete, flags, NULL, 0);
c377f4f8 390#endif
52885fa2
JA
391}
392
980fb7f2
JA
393#ifndef BLOCK_URING_CMD_DISCARD
394#define BLOCK_URING_CMD_DISCARD _IO(0x12, 0)
395#endif
396
bffad86f 397static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u)
52885fa2 398{
bffad86f 399 struct ioring_data *ld = td->io_ops_data;
cfcc8564 400 struct ioring_options *o = td->eo;
52885fa2 401 struct fio_file *f = io_u->file;
f0403f94 402 struct io_uring_sqe *sqe;
52885fa2 403
f0403f94 404 sqe = &ld->sqes[io_u->index];
34d6090e 405
5ffd5626
JA
406 if (o->registerfiles) {
407 sqe->fd = f->engine_pos;
408 sqe->flags = IOSQE_FIXED_FILE;
409 } else {
410 sqe->fd = f->fd;
87b69ef2 411 sqe->flags = 0;
5ffd5626 412 }
52885fa2 413
e3970057 414 if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) {
f0403f94 415 if (o->fixedbufs) {
3f1e3af7 416 sqe->opcode = fixed_ddir_to_op[io_u->ddir];
919850d2 417 sqe->addr = (unsigned long) io_u->xfer_buf;
f0403f94 418 sqe->len = io_u->xfer_buflen;
2ea53ca3 419 sqe->buf_index = io_u->index;
cfcc8564 420 } else {
832faaaf
JA
421 struct iovec *iov = &ld->iovecs[io_u->index];
422
423 /*
424 * Update based on actual io_u, requeue could have
425 * adjusted these
426 */
427 iov->iov_base = io_u->xfer_buf;
428 iov->iov_len = io_u->xfer_buflen;
429
3f1e3af7 430 sqe->opcode = ddir_to_op[io_u->ddir][!!o->nonvectored];
b10b1e70 431 if (o->nonvectored) {
832faaaf
JA
432 sqe->addr = (unsigned long) iov->iov_base;
433 sqe->len = iov->iov_len;
b10b1e70 434 } else {
832faaaf 435 sqe->addr = (unsigned long) iov;
b10b1e70
JA
436 sqe->len = 1;
437 }
cfcc8564 438 }
fd70e361 439 sqe->rw_flags = 0;
43c67b9f
JA
440 if (!td->o.odirect && o->uncached)
441 sqe->rw_flags |= RWF_DONTCACHE;
7d42e66e
KK
442 if (o->nowait)
443 sqe->rw_flags |= RWF_NOWAIT;
b1552b6e
JG
444 if (td->o.oatomic && io_u->ddir == DDIR_WRITE)
445 sqe->rw_flags |= RWF_ATOMIC;
8ff6b289
NC
446
447 /*
448 * Since io_uring can have a submission context (sqthread_poll)
449 * that is different from the process context, we cannot rely on
79012fec
DLM
450 * the IO priority set by ioprio_set() (options prio, prioclass,
451 * and priohint) to be inherited.
8ff6b289
NC
452 * td->ioprio will have the value of the "default prio", so set
453 * this unconditionally. This value might get overridden by
ff00f247 454 * fio_ioring_cmdprio_prep() if the option cmdprio_percentage or
8ff6b289
NC
455 * cmdprio_bssplit is used.
456 */
457 sqe->ioprio = td->ioprio;
f0403f94 458 sqe->off = io_u->offset;
48e698fa 459 } else if (ddir_sync(io_u->ddir)) {
7c70f506 460 sqe->ioprio = 0;
01387bfe
AF
461 if (io_u->ddir == DDIR_SYNC_FILE_RANGE) {
462 sqe->off = f->first_write;
463 sqe->len = f->last_write - f->first_write;
464 sqe->sync_range_flags = td->o.sync_file_range;
465 sqe->opcode = IORING_OP_SYNC_FILE_RANGE;
466 } else {
7c70f506
JA
467 sqe->off = 0;
468 sqe->addr = 0;
469 sqe->len = 0;
01387bfe
AF
470 if (io_u->ddir == DDIR_DATASYNC)
471 sqe->fsync_flags |= IORING_FSYNC_DATASYNC;
472 sqe->opcode = IORING_OP_FSYNC;
473 }
980fb7f2
JA
474 } else if (io_u->ddir == DDIR_TRIM) {
475 sqe->opcode = IORING_OP_URING_CMD;
476 sqe->addr = io_u->offset;
477 sqe->addr3 = io_u->xfer_buflen;
478 sqe->rw_flags = 0;
479 sqe->len = sqe->off = 0;
480 sqe->ioprio = 0;
481 sqe->cmd_op = BLOCK_URING_CMD_DISCARD;
482 sqe->__pad1 = 0;
483 sqe->file_index = 0;
48e698fa 484 }
52885fa2 485
5a59a81d
JA
486 if (o->force_async && ++ld->prepped == o->force_async) {
487 ld->prepped = 0;
488 sqe->flags |= IOSQE_ASYNC;
489 }
490
48e698fa 491 sqe->user_data = (unsigned long) io_u;
52885fa2
JA
492 return 0;
493}
494
855dc4d4
AG
495static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u)
496{
497 struct ioring_data *ld = td->io_ops_data;
498 struct ioring_options *o = td->eo;
499 struct fio_file *f = io_u->file;
3ce6a3de 500 struct nvme_uring_cmd *cmd;
855dc4d4 501 struct io_uring_sqe *sqe;
5d4ee0de
AK
502 struct nvme_dsm *dsm;
503 void *ptr = ld->dsm;
504 unsigned int dsm_size;
6170d92a 505 uint8_t read_opcode = nvme_cmd_read;
855dc4d4 506
3ce6a3de
JA
507 /* only supports nvme_uring_cmd */
508 if (o->cmd_type != FIO_URING_CMD_NVME)
509 return -EINVAL;
855dc4d4 510
4885a6eb 511 if (io_u->ddir == DDIR_TRIM && td->io_ops->flags & FIO_ASYNCIO_SYNC_TRIM)
16be6037
AK
512 return 0;
513
3ce6a3de 514 sqe = &ld->sqes[(io_u->index) << 1];
855dc4d4 515
3ce6a3de
JA
516 if (o->registerfiles) {
517 sqe->fd = f->engine_pos;
518 sqe->flags = IOSQE_FIXED_FILE;
519 } else {
520 sqe->fd = f->fd;
521 }
522 sqe->rw_flags = 0;
43c67b9f
JA
523 if (!td->o.odirect && o->uncached)
524 sqe->rw_flags |= RWF_DONTCACHE;
3ce6a3de
JA
525 if (o->nowait)
526 sqe->rw_flags |= RWF_NOWAIT;
855dc4d4 527
3ce6a3de
JA
528 sqe->opcode = IORING_OP_URING_CMD;
529 sqe->user_data = (unsigned long) io_u;
530 if (o->nonvectored)
531 sqe->cmd_op = NVME_URING_CMD_IO;
532 else
533 sqe->cmd_op = NVME_URING_CMD_IO_VEC;
534 if (o->force_async && ++ld->prepped == o->force_async) {
535 ld->prepped = 0;
536 sqe->flags |= IOSQE_ASYNC;
855dc4d4 537 }
0ebd3bf6
AG
538 if (o->fixedbufs) {
539 sqe->uring_cmd_flags = IORING_URING_CMD_FIXED;
540 sqe->buf_index = io_u->index;
541 }
3ce6a3de
JA
542
543 cmd = (struct nvme_uring_cmd *)sqe->cmd;
5d4ee0de
AK
544 dsm_size = sizeof(*ld->dsm) + td->o.num_range * sizeof(struct nvme_dsm_range);
545 ptr += io_u->index * dsm_size;
546 dsm = (struct nvme_dsm *)ptr;
547
6170d92a
MI
548 /*
549 * If READ command belongs to the verification phase and the
550 * verify_mode=compare, convert READ to COMPARE command.
551 */
552 if (io_u->flags & IO_U_F_VER_LIST && io_u->ddir == DDIR_READ &&
553 o->verify_mode == FIO_URING_CMD_VMODE_COMPARE) {
554 populate_verify_io_u(td, io_u);
555 read_opcode = nvme_cmd_compare;
556 io_u_set(td, io_u, IO_U_F_VER_IN_DEV);
557 }
558
3ce6a3de 559 return fio_nvme_uring_cmd_prep(cmd, io_u,
4885a6eb 560 o->nonvectored ? NULL : &ld->iovecs[io_u->index],
6170d92a
MI
561 dsm, read_opcode, ld->write_opcode,
562 ld->cdw12_flags[io_u->ddir]);
855dc4d4
AG
563}
564
bffad86f 565static struct io_u *fio_ioring_event(struct thread_data *td, int event)
52885fa2 566{
bffad86f 567 struct ioring_data *ld = td->io_ops_data;
f0403f94 568 struct io_uring_cqe *cqe;
52885fa2 569 struct io_u *io_u;
b87aa01a 570 unsigned index;
52885fa2 571
b87aa01a 572 index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
52885fa2 573
f0403f94 574 cqe = &ld->cq_ring.cqes[index];
e3466352 575 io_u = (struct io_u *) (uintptr_t) cqe->user_data;
52885fa2 576
980fb7f2
JA
577 /* trim returns 0 on success */
578 if (cqe->res == io_u->xfer_buflen ||
579 (io_u->ddir == DDIR_TRIM && !cqe->res)) {
580 io_u->error = 0;
581 return io_u;
582 }
583
f0403f94 584 if (cqe->res != io_u->xfer_buflen) {
980fb7f2
JA
585 if (io_u->ddir == DDIR_TRIM) {
586 ld->async_trim_fail = 1;
587 cqe->res = 0;
588 }
f0403f94
JA
589 if (cqe->res > io_u->xfer_buflen)
590 io_u->error = -cqe->res;
52885fa2 591 else
f0403f94 592 io_u->resid = io_u->xfer_buflen - cqe->res;
980fb7f2 593 }
52885fa2
JA
594
595 return io_u;
596}
597
855dc4d4
AG
598static struct io_u *fio_ioring_cmd_event(struct thread_data *td, int event)
599{
600 struct ioring_data *ld = td->io_ops_data;
601 struct ioring_options *o = td->eo;
602 struct io_uring_cqe *cqe;
603 struct io_u *io_u;
5163f35e 604 struct nvme_data *data;
855dc4d4 605 unsigned index;
5163f35e 606 int ret;
855dc4d4
AG
607
608 index = (event + ld->cq_ring_off) & ld->cq_ring_mask;
609 if (o->cmd_type == FIO_URING_CMD_NVME)
610 index <<= 1;
611
612 cqe = &ld->cq_ring.cqes[index];
613 io_u = (struct io_u *) (uintptr_t) cqe->user_data;
614
ebe67b66
MI
615 io_u->error = cqe->res;
616 if (io_u->error != 0)
617 goto ret;
855dc4d4 618
5163f35e
AK
619 if (o->cmd_type == FIO_URING_CMD_NVME) {
620 data = FILE_ENG_DATA(io_u->file);
621 if (data->pi_type && (io_u->ddir == DDIR_READ) && !o->pi_act) {
622 ret = fio_nvme_pi_verify(data, io_u);
623 if (ret)
624 io_u->error = ret;
625 }
626 }
627
ebe67b66
MI
628ret:
629 /*
630 * If IO_U_F_DEVICE_ERROR is not set, io_u->error will be parsed as an
631 * errno, otherwise device-specific error value (status value in CQE).
632 */
633 if ((int)io_u->error > 0)
634 io_u_set(td, io_u, IO_U_F_DEVICE_ERROR);
635 else
636 io_u_clear(td, io_u, IO_U_F_DEVICE_ERROR);
e4e8520b 637 io_u->error = abs((int)io_u->error);
855dc4d4
AG
638 return io_u;
639}
640
2a13699a
MI
641static char *fio_ioring_cmd_errdetails(struct thread_data *td,
642 struct io_u *io_u)
643{
644 struct ioring_options *o = td->eo;
645 unsigned int sct = (io_u->error >> 8) & 0x7;
646 unsigned int sc = io_u->error & 0xff;
647#define MAXERRDETAIL 1024
648#define MAXMSGCHUNK 128
649 char *msg, msgchunk[MAXMSGCHUNK];
650
ebe67b66
MI
651 if (!(io_u->flags & IO_U_F_DEVICE_ERROR))
652 return NULL;
653
2a13699a
MI
654 msg = calloc(1, MAXERRDETAIL);
655 strcpy(msg, "io_uring_cmd: ");
656
657 snprintf(msgchunk, MAXMSGCHUNK, "%s: ", io_u->file->file_name);
658 strlcat(msg, msgchunk, MAXERRDETAIL);
659
660 if (o->cmd_type == FIO_URING_CMD_NVME) {
661 strlcat(msg, "cq entry status (", MAXERRDETAIL);
662
663 snprintf(msgchunk, MAXMSGCHUNK, "sct=0x%02x; ", sct);
664 strlcat(msg, msgchunk, MAXERRDETAIL);
665
666 snprintf(msgchunk, MAXMSGCHUNK, "sc=0x%02x)", sc);
667 strlcat(msg, msgchunk, MAXERRDETAIL);
668 } else {
669 /* Print status code in generic */
670 snprintf(msgchunk, MAXMSGCHUNK, "status=0x%x", io_u->error);
671 strlcat(msg, msgchunk, MAXERRDETAIL);
672 }
673
674 return msg;
675}
676
bffad86f 677static int fio_ioring_cqring_reap(struct thread_data *td, unsigned int events,
52885fa2
JA
678 unsigned int max)
679{
bffad86f
JA
680 struct ioring_data *ld = td->io_ops_data;
681 struct io_cq_ring *ring = &ld->cq_ring;
e2239016 682 unsigned head, reaped = 0;
52885fa2 683
9a2d78b3 684 head = *ring->head;
52885fa2 685 do {
9e26aff9 686 if (head == atomic_load_acquire(ring->tail))
52885fa2
JA
687 break;
688 reaped++;
689 head++;
52885fa2
JA
690 } while (reaped + events < max);
691
76ce63dd
AB
692 if (reaped)
693 atomic_store_release(ring->head, head);
694
52885fa2
JA
695 return reaped;
696}
697
bffad86f
JA
698static int fio_ioring_getevents(struct thread_data *td, unsigned int min,
699 unsigned int max, const struct timespec *t)
52885fa2 700{
bffad86f 701 struct ioring_data *ld = td->io_ops_data;
52885fa2 702 unsigned actual_min = td->o.iodepth_batch_complete_min == 0 ? 0 : min;
bffad86f
JA
703 struct ioring_options *o = td->eo;
704 struct io_cq_ring *ring = &ld->cq_ring;
b87aa01a
JA
705 unsigned events = 0;
706 int r;
52885fa2 707
9a2d78b3 708 ld->cq_ring_off = *ring->head;
52885fa2 709 do {
bffad86f 710 r = fio_ioring_cqring_reap(td, events, max);
52885fa2
JA
711 if (r) {
712 events += r;
ae8646a1 713 max -= r;
f7cbbbf8
ST
714 if (actual_min != 0)
715 actual_min -= r;
52885fa2
JA
716 continue;
717 }
718
3d7d00a3 719 if (!o->sqpoll_thread) {
9a2d78b3
JA
720 r = io_uring_enter(ld, 0, actual_min,
721 IORING_ENTER_GETEVENTS);
771c9901 722 if (r < 0) {
f6abd731 723 if (errno == EAGAIN || errno == EINTR)
771c9901 724 continue;
1816895b 725 r = -errno;
9a2d78b3 726 td_verror(td, errno, "io_uring_enter");
771c9901
JA
727 break;
728 }
52885fa2
JA
729 }
730 } while (events < min);
731
732 return r < 0 ? r : events;
733}
734
3ee8311a
AK
735static inline void fio_ioring_cmd_nvme_pi(struct thread_data *td,
736 struct io_u *io_u)
737{
738 struct ioring_data *ld = td->io_ops_data;
739 struct ioring_options *o = td->eo;
740 struct nvme_uring_cmd *cmd;
741 struct io_uring_sqe *sqe;
742 struct nvme_cmd_ext_io_opts ext_opts = {0};
743 struct nvme_data *data = FILE_ENG_DATA(io_u->file);
744
745 if (io_u->ddir == DDIR_TRIM)
746 return;
747
748 sqe = &ld->sqes[(io_u->index) << 1];
749 cmd = (struct nvme_uring_cmd *)sqe->cmd;
750
751 if (data->pi_type) {
752 if (o->pi_act)
753 ext_opts.io_flags |= NVME_IO_PRINFO_PRACT;
754 ext_opts.io_flags |= o->prchk;
755 ext_opts.apptag = o->apptag;
756 ext_opts.apptag_mask = o->apptag_mask;
757 }
758
759 fio_nvme_pi_fill(cmd, io_u, &ext_opts);
760}
761
127715b6
NC
762static inline void fio_ioring_cmdprio_prep(struct thread_data *td,
763 struct io_u *io_u)
b2a432bf 764{
b2a432bf 765 struct ioring_data *ld = td->io_ops_data;
d6cbeab4 766 struct cmdprio *cmdprio = &ld->cmdprio;
127715b6
NC
767
768 if (fio_cmdprio_set_ioprio(td, cmdprio, io_u))
769 ld->sqes[io_u->index].ioprio = io_u->ioprio;
b2a432bf
PC
770}
771
bffad86f
JA
772static enum fio_q_status fio_ioring_queue(struct thread_data *td,
773 struct io_u *io_u)
52885fa2 774{
bffad86f 775 struct ioring_data *ld = td->io_ops_data;
3ee8311a 776 struct ioring_options *o = td->eo;
bffad86f 777 struct io_sq_ring *ring = &ld->sq_ring;
d88e8c91 778 unsigned tail;
52885fa2
JA
779
780 fio_ro_check(td, io_u);
781
af0ad0fa
JA
782 /* should not hit... */
783 if (ld->queued == td->o.iodepth)
52885fa2
JA
784 return FIO_Q_BUSY;
785
980fb7f2
JA
786 /* if async trim has been tried and failed, punt to sync */
787 if (io_u->ddir == DDIR_TRIM && ld->async_trim_fail) {
52885fa2
JA
788 if (ld->queued)
789 return FIO_Q_BUSY;
790
7f57e30f 791 do_io_u_trim(td, io_u);
16be6037 792
52885fa2
JA
793 io_u_mark_submit(td, 1);
794 io_u_mark_complete(td, 1);
795 return FIO_Q_COMPLETED;
796 }
797
d6cbeab4 798 if (ld->cmdprio.mode != CMDPRIO_MODE_NONE)
ff00f247
NC
799 fio_ioring_cmdprio_prep(td, io_u);
800
3ee8311a
AK
801 if (!strcmp(td->io_ops->name, "io_uring_cmd") &&
802 o->cmd_type == FIO_URING_CMD_NVME)
803 fio_ioring_cmd_nvme_pi(td, io_u);
804
d88e8c91 805 tail = *ring->tail;
b87aa01a 806 ring->array[tail & ld->sq_ring_mask] = io_u->index;
d88e8c91 807 atomic_store_release(ring->tail, tail + 1);
52885fa2
JA
808
809 ld->queued++;
810 return FIO_Q_QUEUED;
811}
812
bffad86f 813static void fio_ioring_queued(struct thread_data *td, int start, int nr)
52885fa2 814{
bffad86f 815 struct ioring_data *ld = td->io_ops_data;
52885fa2
JA
816 struct timespec now;
817
818 if (!fio_fill_issue_time(td))
819 return;
820
821 fio_gettime(&now, NULL);
822
823 while (nr--) {
bffad86f 824 struct io_sq_ring *ring = &ld->sq_ring;
9a2d78b3 825 int index = ring->array[start & ld->sq_ring_mask];
f8289afc 826 struct io_u *io_u = ld->io_u_index[index];
52885fa2
JA
827
828 memcpy(&io_u->issue_time, &now, sizeof(now));
829 io_u_queued(td, io_u);
830
831 start++;
52885fa2 832 }
39f56400
VF
833
834 /*
835 * only used for iolog
836 */
837 if (td->o.read_iolog_file)
838 memcpy(&td->last_issue, &now, sizeof(now));
52885fa2
JA
839}
840
bffad86f 841static int fio_ioring_commit(struct thread_data *td)
52885fa2 842{
bffad86f
JA
843 struct ioring_data *ld = td->io_ops_data;
844 struct ioring_options *o = td->eo;
52885fa2
JA
845 int ret;
846
847 if (!ld->queued)
848 return 0;
849
3d7d00a3
JA
850 /*
851 * Kernel side does submission. just need to check if the ring is
852 * flagged as needing a kick, if so, call io_uring_enter(). This
853 * only happens if we've been idle too long.
854 */
855 if (o->sqpoll_thread) {
bffad86f 856 struct io_sq_ring *ring = &ld->sq_ring;
10bad6b9 857 unsigned start = *ld->sq_ring.tail - ld->queued;
2dd96cc4 858 unsigned flags;
4cdbc048 859
5c15a911 860 flags = atomic_load_relaxed(ring->flags);
2dd96cc4 861 if (flags & IORING_SQ_NEED_WAKEUP)
b532dd6d
JA
862 io_uring_enter(ld, ld->queued, 0,
863 IORING_ENTER_SQ_WAKEUP);
c011bf12
AK
864 fio_ioring_queued(td, start, ld->queued);
865 io_u_mark_submit(td, ld->queued);
866
771c9901
JA
867 ld->queued = 0;
868 return 0;
869 }
870
52885fa2 871 do {
9a2d78b3 872 unsigned start = *ld->sq_ring.head;
52885fa2
JA
873 long nr = ld->queued;
874
9a2d78b3 875 ret = io_uring_enter(ld, nr, 0, IORING_ENTER_GETEVENTS);
52885fa2 876 if (ret > 0) {
bffad86f 877 fio_ioring_queued(td, start, ret);
52885fa2
JA
878 io_u_mark_submit(td, ret);
879
880 ld->queued -= ret;
881 ret = 0;
a90cd050
JA
882 } else if (!ret) {
883 io_u_mark_submit(td, ret);
52885fa2 884 continue;
a90cd050 885 } else {
f6abd731 886 if (errno == EAGAIN || errno == EINTR) {
bffad86f 887 ret = fio_ioring_cqring_reap(td, 0, ld->queued);
a90cd050
JA
888 if (ret)
889 continue;
890 /* Shouldn't happen */
891 usleep(1);
892 continue;
52885fa2 893 }
1816895b 894 ret = -errno;
9a2d78b3 895 td_verror(td, errno, "io_uring_enter submit");
52885fa2 896 break;
a90cd050 897 }
52885fa2
JA
898 } while (ld->queued);
899
900 return ret;
901}
902
bffad86f 903static void fio_ioring_unmap(struct ioring_data *ld)
52885fa2 904{
9a2d78b3 905 int i;
52885fa2 906
59f94d26 907 for (i = 0; i < FIO_ARRAY_SIZE(ld->mmap); i++)
9a2d78b3
JA
908 munmap(ld->mmap[i].ptr, ld->mmap[i].len);
909 close(ld->ring_fd);
b87aa01a
JA
910}
911
bffad86f 912static void fio_ioring_cleanup(struct thread_data *td)
52885fa2 913{
bffad86f 914 struct ioring_data *ld = td->io_ops_data;
52885fa2
JA
915
916 if (ld) {
52885fa2 917 if (!(td->flags & TD_F_CHILD))
bffad86f 918 fio_ioring_unmap(ld);
9a2d78b3 919
d6cbeab4 920 fio_cmdprio_cleanup(&ld->cmdprio);
52885fa2 921 free(ld->io_u_index);
2d6451c9 922 free(ld->md_buf);
9a2d78b3 923 free(ld->iovecs);
5ffd5626 924 free(ld->fds);
4885a6eb 925 free(ld->dsm);
52885fa2
JA
926 free(ld);
927 }
928}
929
bffad86f 930static int fio_ioring_mmap(struct ioring_data *ld, struct io_uring_params *p)
9a2d78b3 931{
bffad86f
JA
932 struct io_sq_ring *sring = &ld->sq_ring;
933 struct io_cq_ring *cring = &ld->cq_ring;
9a2d78b3
JA
934 void *ptr;
935
e2239016 936 ld->mmap[0].len = p->sq_off.array + p->sq_entries * sizeof(__u32);
9a2d78b3
JA
937 ptr = mmap(0, ld->mmap[0].len, PROT_READ | PROT_WRITE,
938 MAP_SHARED | MAP_POPULATE, ld->ring_fd,
939 IORING_OFF_SQ_RING);
940 ld->mmap[0].ptr = ptr;
941 sring->head = ptr + p->sq_off.head;
942 sring->tail = ptr + p->sq_off.tail;
943 sring->ring_mask = ptr + p->sq_off.ring_mask;
944 sring->ring_entries = ptr + p->sq_off.ring_entries;
945 sring->flags = ptr + p->sq_off.flags;
ac122fea 946 sring->array = ptr + p->sq_off.array;
9a2d78b3
JA
947 ld->sq_ring_mask = *sring->ring_mask;
948
855dc4d4
AG
949 if (p->flags & IORING_SETUP_SQE128)
950 ld->mmap[1].len = 2 * p->sq_entries * sizeof(struct io_uring_sqe);
951 else
952 ld->mmap[1].len = p->sq_entries * sizeof(struct io_uring_sqe);
f0403f94 953 ld->sqes = mmap(0, ld->mmap[1].len, PROT_READ | PROT_WRITE,
9a2d78b3 954 MAP_SHARED | MAP_POPULATE, ld->ring_fd,
f0403f94
JA
955 IORING_OFF_SQES);
956 ld->mmap[1].ptr = ld->sqes;
9a2d78b3 957
855dc4d4
AG
958 if (p->flags & IORING_SETUP_CQE32) {
959 ld->mmap[2].len = p->cq_off.cqes +
960 2 * p->cq_entries * sizeof(struct io_uring_cqe);
961 } else {
962 ld->mmap[2].len = p->cq_off.cqes +
963 p->cq_entries * sizeof(struct io_uring_cqe);
964 }
9a2d78b3
JA
965 ptr = mmap(0, ld->mmap[2].len, PROT_READ | PROT_WRITE,
966 MAP_SHARED | MAP_POPULATE, ld->ring_fd,
967 IORING_OFF_CQ_RING);
968 ld->mmap[2].ptr = ptr;
969 cring->head = ptr + p->cq_off.head;
970 cring->tail = ptr + p->cq_off.tail;
971 cring->ring_mask = ptr + p->cq_off.ring_mask;
972 cring->ring_entries = ptr + p->cq_off.ring_entries;
f0403f94 973 cring->cqes = ptr + p->cq_off.cqes;
9a2d78b3
JA
974 ld->cq_ring_mask = *cring->ring_mask;
975 return 0;
976}
977
556d8415
JA
978static void fio_ioring_probe(struct thread_data *td)
979{
980 struct ioring_data *ld = td->io_ops_data;
981 struct ioring_options *o = td->eo;
982 struct io_uring_probe *p;
983 int ret;
984
985 /* already set by user, don't touch */
986 if (o->nonvectored != -1)
987 return;
988
989 /* default to off, as that's always safe */
990 o->nonvectored = 0;
991
223decdd 992 p = calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
556d8415
JA
993 if (!p)
994 return;
995
556d8415
JA
996 ret = syscall(__NR_io_uring_register, ld->ring_fd,
997 IORING_REGISTER_PROBE, p, 256);
998 if (ret < 0)
999 goto out;
1000
1001 if (IORING_OP_WRITE > p->ops_len)
1002 goto out;
1003
1004 if ((p->ops[IORING_OP_READ].flags & IO_URING_OP_SUPPORTED) &&
1005 (p->ops[IORING_OP_WRITE].flags & IO_URING_OP_SUPPORTED))
1006 o->nonvectored = 1;
1007out:
1008 free(p);
1009}
1010
bffad86f 1011static int fio_ioring_queue_init(struct thread_data *td)
52885fa2 1012{
bffad86f
JA
1013 struct ioring_data *ld = td->io_ops_data;
1014 struct ioring_options *o = td->eo;
af0ad0fa 1015 int depth = ld->iodepth;
bffad86f 1016 struct io_uring_params p;
9a2d78b3
JA
1017 int ret;
1018
1019 memset(&p, 0, sizeof(p));
52885fa2
JA
1020
1021 if (o->hipri)
bffad86f 1022 p.flags |= IORING_SETUP_IOPOLL;
3d7d00a3
JA
1023 if (o->sqpoll_thread) {
1024 p.flags |= IORING_SETUP_SQPOLL;
1025 if (o->sqpoll_set) {
1026 p.flags |= IORING_SETUP_SQ_AFF;
1027 p.sq_thread_cpu = o->sqpoll_cpu;
1028 }
c011bf12
AK
1029
1030 /*
1031 * Submission latency for sqpoll_thread is just the time it
1032 * takes to fill in the SQ ring entries, and any syscall if
1033 * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time
1034 * separately.
1035 */
1036 td->o.disable_slat = 1;
f635f1fb 1037 }
a90cd050 1038
1db268db
JA
1039 /*
1040 * Clamp CQ ring size at our SQ ring size, we don't need more entries
1041 * than that.
1042 */
1043 p.flags |= IORING_SETUP_CQSIZE;
1044 p.cq_entries = depth;
1045
4d22c103
JA
1046 /*
1047 * Setup COOP_TASKRUN as we don't need to get IPI interrupted for
1048 * completing IO operations.
1049 */
1050 p.flags |= IORING_SETUP_COOP_TASKRUN;
1051
e453f369
JA
1052 /*
1053 * io_uring is always a single issuer, and we can defer task_work
1054 * runs until we reap events.
1055 */
1056 p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
1057
b5e99df6 1058retry:
bfed648c 1059 ret = syscall(__NR_io_uring_setup, depth, &p);
b5e99df6 1060 if (ret < 0) {
e453f369
JA
1061 if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) {
1062 p.flags &= ~IORING_SETUP_DEFER_TASKRUN;
1063 p.flags &= ~IORING_SETUP_SINGLE_ISSUER;
1064 goto retry;
1065 }
4d22c103
JA
1066 if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) {
1067 p.flags &= ~IORING_SETUP_COOP_TASKRUN;
1068 goto retry;
1069 }
b5e99df6
JA
1070 if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
1071 p.flags &= ~IORING_SETUP_CQSIZE;
1072 goto retry;
1073 }
9a2d78b3 1074 return ret;
b5e99df6 1075 }
9a2d78b3
JA
1076
1077 ld->ring_fd = ret;
2ea53ca3 1078
556d8415
JA
1079 fio_ioring_probe(td);
1080
2ea53ca3 1081 if (o->fixedbufs) {
bfed648c 1082 ret = syscall(__NR_io_uring_register, ld->ring_fd,
919850d2 1083 IORING_REGISTER_BUFFERS, ld->iovecs, depth);
2ea53ca3
JA
1084 if (ret < 0)
1085 return ret;
1086 }
1087
bffad86f 1088 return fio_ioring_mmap(ld, &p);
52885fa2
JA
1089}
1090
855dc4d4
AG
1091static int fio_ioring_cmd_queue_init(struct thread_data *td)
1092{
1093 struct ioring_data *ld = td->io_ops_data;
1094 struct ioring_options *o = td->eo;
af0ad0fa 1095 int depth = ld->iodepth;
855dc4d4
AG
1096 struct io_uring_params p;
1097 int ret;
1098
1099 memset(&p, 0, sizeof(p));
1100
1101 if (o->hipri)
1102 p.flags |= IORING_SETUP_IOPOLL;
1103 if (o->sqpoll_thread) {
1104 p.flags |= IORING_SETUP_SQPOLL;
1105 if (o->sqpoll_set) {
1106 p.flags |= IORING_SETUP_SQ_AFF;
1107 p.sq_thread_cpu = o->sqpoll_cpu;
1108 }
c011bf12
AK
1109
1110 /*
1111 * Submission latency for sqpoll_thread is just the time it
1112 * takes to fill in the SQ ring entries, and any syscall if
1113 * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time
1114 * separately.
1115 */
1116 td->o.disable_slat = 1;
855dc4d4
AG
1117 }
1118 if (o->cmd_type == FIO_URING_CMD_NVME) {
1119 p.flags |= IORING_SETUP_SQE128;
1120 p.flags |= IORING_SETUP_CQE32;
1121 }
1122
1123 /*
1124 * Clamp CQ ring size at our SQ ring size, we don't need more entries
1125 * than that.
1126 */
1127 p.flags |= IORING_SETUP_CQSIZE;
1128 p.cq_entries = depth;
1129
07f78c37
AK
1130 /*
1131 * Setup COOP_TASKRUN as we don't need to get IPI interrupted for
1132 * completing IO operations.
1133 */
1134 p.flags |= IORING_SETUP_COOP_TASKRUN;
1135
1136 /*
1137 * io_uring is always a single issuer, and we can defer task_work
1138 * runs until we reap events.
1139 */
1140 p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
1141
855dc4d4
AG
1142retry:
1143 ret = syscall(__NR_io_uring_setup, depth, &p);
1144 if (ret < 0) {
07f78c37
AK
1145 if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) {
1146 p.flags &= ~IORING_SETUP_DEFER_TASKRUN;
1147 p.flags &= ~IORING_SETUP_SINGLE_ISSUER;
1148 goto retry;
1149 }
1150 if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) {
1151 p.flags &= ~IORING_SETUP_COOP_TASKRUN;
1152 goto retry;
1153 }
855dc4d4
AG
1154 if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) {
1155 p.flags &= ~IORING_SETUP_CQSIZE;
1156 goto retry;
1157 }
1158 return ret;
1159 }
1160
1161 ld->ring_fd = ret;
1162
1163 fio_ioring_probe(td);
1164
1165 if (o->fixedbufs) {
1166 ret = syscall(__NR_io_uring_register, ld->ring_fd,
1167 IORING_REGISTER_BUFFERS, ld->iovecs, depth);
1168 if (ret < 0)
1169 return ret;
1170 }
1171
1172 return fio_ioring_mmap(ld, &p);
1173}
1174
5ffd5626
JA
1175static int fio_ioring_register_files(struct thread_data *td)
1176{
1177 struct ioring_data *ld = td->io_ops_data;
1178 struct fio_file *f;
1179 unsigned int i;
1180 int ret;
1181
1182 ld->fds = calloc(td->o.nr_files, sizeof(int));
1183
1184 for_each_file(td, f, i) {
1185 ret = generic_open_file(td, f);
1186 if (ret)
1187 goto err;
1188 ld->fds[i] = f->fd;
1189 f->engine_pos = i;
1190 }
1191
bfed648c 1192 ret = syscall(__NR_io_uring_register, ld->ring_fd,
5ffd5626
JA
1193 IORING_REGISTER_FILES, ld->fds, td->o.nr_files);
1194 if (ret) {
1195err:
1196 free(ld->fds);
1197 ld->fds = NULL;
1198 }
1199
1200 /*
1201 * Pretend the file is closed again, and really close it if we hit
1202 * an error.
1203 */
1204 for_each_file(td, f, i) {
1205 if (ret) {
1206 int fio_unused ret2;
1207 ret2 = generic_close_file(td, f);
1208 } else
1209 f->fd = -1;
1210 }
1211
1212 return ret;
1213}
1214
bffad86f 1215static int fio_ioring_post_init(struct thread_data *td)
52885fa2 1216{
bffad86f 1217 struct ioring_data *ld = td->io_ops_data;
5ffd5626 1218 struct ioring_options *o = td->eo;
52885fa2 1219 struct io_u *io_u;
650346e1 1220 int err, i;
52885fa2 1221
650346e1
JA
1222 for (i = 0; i < td->o.iodepth; i++) {
1223 struct iovec *iov = &ld->iovecs[i];
9a2d78b3 1224
650346e1
JA
1225 io_u = ld->io_u_index[i];
1226 iov->iov_base = io_u->buf;
1227 iov->iov_len = td_max_bs(td);
52885fa2
JA
1228 }
1229
bffad86f 1230 err = fio_ioring_queue_init(td);
52885fa2 1231 if (err) {
0442b53f 1232 int init_err = errno;
c4f5c92f 1233
0442b53f 1234 if (init_err == ENOSYS)
c4f5c92f 1235 log_err("fio: your kernel doesn't support io_uring\n");
0442b53f 1236 td_verror(td, init_err, "io_queue_init");
52885fa2
JA
1237 return 1;
1238 }
1239
af0ad0fa 1240 for (i = 0; i < ld->iodepth; i++) {
7c70f506
JA
1241 struct io_uring_sqe *sqe;
1242
1243 sqe = &ld->sqes[i];
1244 memset(sqe, 0, sizeof(*sqe));
1245 }
1246
5ffd5626
JA
1247 if (o->registerfiles) {
1248 err = fio_ioring_register_files(td);
1249 if (err) {
1250 td_verror(td, errno, "ioring_register_files");
1251 return 1;
1252 }
1253 }
1254
52885fa2
JA
1255 return 0;
1256}
1257
855dc4d4
AG
1258static int fio_ioring_cmd_post_init(struct thread_data *td)
1259{
1260 struct ioring_data *ld = td->io_ops_data;
1261 struct ioring_options *o = td->eo;
1262 struct io_u *io_u;
1263 int err, i;
1264
1265 for (i = 0; i < td->o.iodepth; i++) {
1266 struct iovec *iov = &ld->iovecs[i];
1267
1268 io_u = ld->io_u_index[i];
1269 iov->iov_base = io_u->buf;
1270 iov->iov_len = td_max_bs(td);
1271 }
1272
1273 err = fio_ioring_cmd_queue_init(td);
1274 if (err) {
1275 int init_err = errno;
1276
1277 td_verror(td, init_err, "io_queue_init");
1278 return 1;
1279 }
1280
af0ad0fa 1281 for (i = 0; i < ld->iodepth; i++) {
855dc4d4
AG
1282 struct io_uring_sqe *sqe;
1283
1284 if (o->cmd_type == FIO_URING_CMD_NVME) {
1285 sqe = &ld->sqes[i << 1];
1286 memset(sqe, 0, 2 * sizeof(*sqe));
1287 } else {
1288 sqe = &ld->sqes[i];
1289 memset(sqe, 0, sizeof(*sqe));
1290 }
1291 }
1292
1293 if (o->registerfiles) {
1294 err = fio_ioring_register_files(td);
1295 if (err) {
1296 td_verror(td, errno, "ioring_register_files");
1297 return 1;
1298 }
1299 }
1300
1301 return 0;
1302}
1303
3ee8311a
AK
1304static void parse_prchk_flags(struct ioring_options *o)
1305{
1306 if (!o->pi_chk)
1307 return;
1308
1309 if (strstr(o->pi_chk, "GUARD") != NULL)
1310 o->prchk = NVME_IO_PRINFO_PRCHK_GUARD;
1311 if (strstr(o->pi_chk, "REFTAG") != NULL)
1312 o->prchk |= NVME_IO_PRINFO_PRCHK_REF;
1313 if (strstr(o->pi_chk, "APPTAG") != NULL)
1314 o->prchk |= NVME_IO_PRINFO_PRCHK_APP;
1315}
1316
bffad86f 1317static int fio_ioring_init(struct thread_data *td)
52885fa2 1318{
5ffd5626 1319 struct ioring_options *o = td->eo;
bffad86f 1320 struct ioring_data *ld;
5d4ee0de
AK
1321 struct nvme_dsm *dsm;
1322 void *ptr;
1323 unsigned int dsm_size;
2d6451c9 1324 unsigned long long md_size;
5d4ee0de 1325 int ret, i;
52885fa2 1326
5ffd5626
JA
1327 /* sqthread submission requires registered files */
1328 if (o->sqpoll_thread)
1329 o->registerfiles = 1;
1330
1331 if (o->registerfiles && td->o.nr_files != td->o.open_files) {
1332 log_err("fio: io_uring registered files require nr_files to "
1333 "be identical to open_files\n");
1334 return 1;
1335 }
1336
52885fa2
JA
1337 ld = calloc(1, sizeof(*ld));
1338
af0ad0fa
JA
1339 /*
1340 * The internal io_uring queue depth must be a power-of-2, as that's
1341 * how the ring interface works. So round that up, in case the user
1342 * set iodepth isn't a power-of-2. Leave the fio depth the same, as
1343 * not to be driving too much of an iodepth, if we did round up.
1344 */
1345 ld->iodepth = roundup_pow2(td->o.iodepth);
b87aa01a 1346
52885fa2
JA
1347 /* io_u index */
1348 ld->io_u_index = calloc(td->o.iodepth, sizeof(struct io_u *));
2d6451c9
AK
1349
1350 /*
1351 * metadata buffer for nvme command.
1352 * We are only supporting iomem=malloc / mem=malloc as of now.
1353 */
1354 if (!strcmp(td->io_ops->name, "io_uring_cmd") &&
1355 (o->cmd_type == FIO_URING_CMD_NVME) && o->md_per_io_size) {
1356 md_size = (unsigned long long) o->md_per_io_size
1357 * (unsigned long long) td->o.iodepth;
1358 md_size += page_mask + td->o.mem_align;
1359 if (td->o.mem_align && td->o.mem_align > page_size)
1360 md_size += td->o.mem_align - page_size;
d78f2f3f
VF
1361 ld->md_buf = malloc(md_size);
1362 if (!ld->md_buf) {
6795954b 1363 free(ld);
2d6451c9
AK
1364 return 1;
1365 }
1366 }
3ee8311a 1367 parse_prchk_flags(o);
2d6451c9 1368
af0ad0fa 1369 ld->iovecs = calloc(ld->iodepth, sizeof(struct iovec));
52885fa2
JA
1370
1371 td->io_ops_data = ld;
b2a432bf 1372
d6cbeab4 1373 ret = fio_cmdprio_init(td, &ld->cmdprio, &o->cmdprio_options);
e9f6567a
DLM
1374 if (ret) {
1375 td_verror(td, EINVAL, "fio_ioring_init");
b2a432bf
PC
1376 return 1;
1377 }
1af44196 1378
4885a6eb
VF
1379 /*
1380 * For io_uring_cmd, trims are async operations unless we are operating
1381 * in zbd mode where trim means zone reset.
1382 */
1383 if (!strcmp(td->io_ops->name, "io_uring_cmd") && td_trim(td) &&
5d4ee0de 1384 td->o.zone_mode == ZONE_MODE_ZBD) {
4885a6eb 1385 td->io_ops->flags |= FIO_ASYNCIO_SYNC_TRIM;
5d4ee0de
AK
1386 } else {
1387 dsm_size = sizeof(*ld->dsm) +
1388 td->o.num_range * sizeof(struct nvme_dsm_range);
1389 ld->dsm = calloc(td->o.iodepth, dsm_size);
1390 ptr = ld->dsm;
1391 for (i = 0; i < td->o.iodepth; i++) {
1392 dsm = (struct nvme_dsm *)ptr;
1393 dsm->nr_ranges = td->o.num_range;
1394 ptr += dsm_size;
1395 }
1396 }
4885a6eb 1397
55e14d73 1398 if (!strcmp(td->io_ops->name, "io_uring_cmd")) {
87a4903f
MI
1399 if (td_write(td)) {
1400 switch (o->write_mode) {
1401 case FIO_URING_CMD_WMODE_UNCOR:
1402 ld->write_opcode = nvme_cmd_write_uncor;
1403 break;
1404 case FIO_URING_CMD_WMODE_ZEROES:
1405 ld->write_opcode = nvme_cmd_write_zeroes;
dfc79b17
VF
1406 if (o->deac)
1407 ld->cdw12_flags[DDIR_WRITE] = 1 << 25;
87a4903f
MI
1408 break;
1409 case FIO_URING_CMD_WMODE_VERIFY:
1410 ld->write_opcode = nvme_cmd_verify;
1411 break;
1412 default:
1413 ld->write_opcode = nvme_cmd_write;
1414 break;
1415 }
1416 }
1417
55e14d73
MI
1418 if (o->readfua)
1419 ld->cdw12_flags[DDIR_READ] = 1 << 30;
1420 if (o->writefua)
1421 ld->cdw12_flags[DDIR_WRITE] = 1 << 30;
1422 }
1423
52885fa2
JA
1424 return 0;
1425}
1426
bffad86f 1427static int fio_ioring_io_u_init(struct thread_data *td, struct io_u *io_u)
52885fa2 1428{
bffad86f 1429 struct ioring_data *ld = td->io_ops_data;
2d6451c9 1430 struct ioring_options *o = td->eo;
5163f35e 1431 struct nvme_pi_data *pi_data;
2d6451c9 1432 char *p;
52885fa2
JA
1433
1434 ld->io_u_index[io_u->index] = io_u;
2d6451c9
AK
1435
1436 if (!strcmp(td->io_ops->name, "io_uring_cmd")) {
1437 p = PTR_ALIGN(ld->md_buf, page_mask) + td->o.mem_align;
1438 p += o->md_per_io_size * io_u->index;
1439 io_u->mmap_data = p;
5163f35e
AK
1440
1441 if (!o->pi_act) {
1442 pi_data = calloc(1, sizeof(*pi_data));
1443 pi_data->io_flags |= o->prchk;
1444 pi_data->apptag_mask = o->apptag_mask;
1445 pi_data->apptag = o->apptag;
1446 io_u->engine_data = pi_data;
1447 }
2d6451c9
AK
1448 }
1449
52885fa2
JA
1450 return 0;
1451}
1452
5163f35e
AK
1453static void fio_ioring_io_u_free(struct thread_data *td, struct io_u *io_u)
1454{
1455 struct ioring_options *o = td->eo;
1456 struct nvme_pi *pi;
1457
1458 if (!strcmp(td->io_ops->name, "io_uring_cmd") &&
1459 (o->cmd_type == FIO_URING_CMD_NVME)) {
1460 pi = io_u->engine_data;
1461 free(pi);
1462 io_u->engine_data = NULL;
1463 }
1464}
1465
5ffd5626
JA
1466static int fio_ioring_open_file(struct thread_data *td, struct fio_file *f)
1467{
1468 struct ioring_data *ld = td->io_ops_data;
1469 struct ioring_options *o = td->eo;
1470
17318cf6 1471 if (!ld || !o->registerfiles)
5ffd5626
JA
1472 return generic_open_file(td, f);
1473
1474 f->fd = ld->fds[f->engine_pos];
1475 return 0;
1476}
1477
855dc4d4
AG
1478static int fio_ioring_cmd_open_file(struct thread_data *td, struct fio_file *f)
1479{
1480 struct ioring_data *ld = td->io_ops_data;
1481 struct ioring_options *o = td->eo;
1482
1483 if (o->cmd_type == FIO_URING_CMD_NVME) {
1484 struct nvme_data *data = NULL;
e7e5023b 1485 unsigned int lba_size = 0;
671aa9f5 1486 __u64 nlba = 0;
855dc4d4
AG
1487 int ret;
1488
1489 /* Store the namespace-id and lba size. */
1490 data = FILE_ENG_DATA(f);
1491 if (data == NULL) {
855dc4d4 1492 data = calloc(1, sizeof(struct nvme_data));
3ee8311a 1493 ret = fio_nvme_get_info(f, &nlba, o->pi_act, data);
e7e5023b
AK
1494 if (ret) {
1495 free(data);
1496 return ret;
1497 }
855dc4d4
AG
1498
1499 FILE_SET_ENG_DATA(f, data);
1500 }
345fa8fd 1501
e7e5023b 1502 lba_size = data->lba_ext ? data->lba_ext : data->lba_size;
345fa8fd
AK
1503
1504 for_each_rw_ddir(ddir) {
3cb50530
VF
1505 if (td->o.min_bs[ddir] % lba_size || td->o.max_bs[ddir] % lba_size) {
1506 if (data->lba_ext) {
1507 log_err("%s: block size must be a multiple of %u "
1508 "(LBA data size + Metadata size)\n", f->file_name, lba_size);
1509 if (td->o.min_bs[ddir] == td->o.max_bs[ddir] &&
1510 !(td->o.min_bs[ddir] % data->lba_size)) {
1511 /* fixed block size is actually a multiple of LBA data size */
1512 unsigned long long suggestion = lba_size *
1513 (td->o.min_bs[ddir] / data->lba_size);
1514 log_err("Did you mean to use a block size of %llu?\n", suggestion);
1515 }
1516 } else {
a904d182
AK
1517 log_err("%s: block size must be a multiple of LBA data size\n",
1518 f->file_name);
3cb50530 1519 }
a904d182 1520 td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
345fa8fd
AK
1521 return 1;
1522 }
2d6451c9
AK
1523 if (data->ms && !data->lba_ext && ddir != DDIR_TRIM &&
1524 (o->md_per_io_size < ((td->o.max_bs[ddir] / data->lba_size) *
1525 data->ms))) {
1526 log_err("%s: md_per_io_size should be at least %llu bytes\n",
1527 f->file_name,
1528 ((td->o.max_bs[ddir] / data->lba_size) * data->ms));
1529 td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
1530 return 1;
1531 }
345fa8fd 1532 }
ba342e58
AK
1533
1534 /*
1535 * For extended logical block sizes we cannot use verify when
1536 * end to end data protection checks are enabled, as the PI
1537 * section of data buffer conflicts with verify.
1538 */
1539 if (data->ms && data->pi_type && data->lba_ext &&
1540 td->o.verify != VERIFY_NONE) {
1541 log_err("%s: for extended LBA, verify cannot be used when E2E data protection is enabled\n",
1542 f->file_name);
1543 td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
1544 return 1;
1545 }
87a4903f
MI
1546
1547 if (o->write_mode != FIO_URING_CMD_WMODE_WRITE &&
1548 !td_write(td)) {
1549 log_err("%s: 'readwrite=|rw=' has no write\n",
1550 f->file_name);
1551 td_verror(td, EINVAL, "fio_ioring_cmd_open_file");
1552 return 1;
1553 }
855dc4d4
AG
1554 }
1555 if (!ld || !o->registerfiles)
1556 return generic_open_file(td, f);
1557
1558 f->fd = ld->fds[f->engine_pos];
1559 return 0;
1560}
1561
5ffd5626
JA
1562static int fio_ioring_close_file(struct thread_data *td, struct fio_file *f)
1563{
17318cf6 1564 struct ioring_data *ld = td->io_ops_data;
5ffd5626
JA
1565 struct ioring_options *o = td->eo;
1566
17318cf6 1567 if (!ld || !o->registerfiles)
5ffd5626
JA
1568 return generic_close_file(td, f);
1569
1570 f->fd = -1;
1571 return 0;
1572}
1573
855dc4d4
AG
1574static int fio_ioring_cmd_close_file(struct thread_data *td,
1575 struct fio_file *f)
1576{
1577 struct ioring_data *ld = td->io_ops_data;
1578 struct ioring_options *o = td->eo;
1579
1580 if (o->cmd_type == FIO_URING_CMD_NVME) {
1581 struct nvme_data *data = FILE_ENG_DATA(f);
1582
1583 FILE_SET_ENG_DATA(f, NULL);
1584 free(data);
1585 }
1586 if (!ld || !o->registerfiles)
1587 return generic_close_file(td, f);
1588
1589 f->fd = -1;
1590 return 0;
1591}
1592
1593static int fio_ioring_cmd_get_file_size(struct thread_data *td,
1594 struct fio_file *f)
1595{
1596 struct ioring_options *o = td->eo;
1597
1598 if (fio_file_size_known(f))
1599 return 0;
1600
1601 if (o->cmd_type == FIO_URING_CMD_NVME) {
1602 struct nvme_data *data = NULL;
671aa9f5 1603 __u64 nlba = 0;
855dc4d4
AG
1604 int ret;
1605
855dc4d4 1606 data = calloc(1, sizeof(struct nvme_data));
3ee8311a 1607 ret = fio_nvme_get_info(f, &nlba, o->pi_act, data);
e7e5023b
AK
1608 if (ret) {
1609 free(data);
1610 return ret;
1611 }
855dc4d4 1612
acd2dd42 1613 if (data->lba_ext)
1614 f->real_file_size = data->lba_ext * nlba;
1615 else
1616 f->real_file_size = data->lba_size * nlba;
855dc4d4
AG
1617 fio_file_set_size_known(f);
1618
1619 FILE_SET_ENG_DATA(f, data);
1620 return 0;
1621 }
1622 return generic_get_file_size(td, f);
1623}
1624
3d05e0ff
AK
1625static int fio_ioring_cmd_get_zoned_model(struct thread_data *td,
1626 struct fio_file *f,
1627 enum zbd_zoned_model *model)
1628{
1629 return fio_nvme_get_zoned_model(td, f, model);
1630}
1631
1632static int fio_ioring_cmd_report_zones(struct thread_data *td,
1633 struct fio_file *f, uint64_t offset,
1634 struct zbd_zone *zbdz,
1635 unsigned int nr_zones)
1636{
1637 return fio_nvme_report_zones(td, f, offset, zbdz, nr_zones);
1638}
1639
1640static int fio_ioring_cmd_reset_wp(struct thread_data *td, struct fio_file *f,
1641 uint64_t offset, uint64_t length)
1642{
1643 return fio_nvme_reset_wp(td, f, offset, length);
1644}
1645
1646static int fio_ioring_cmd_get_max_open_zones(struct thread_data *td,
1647 struct fio_file *f,
1648 unsigned int *max_open_zones)
1649{
1650 return fio_nvme_get_max_open_zones(td, f, max_open_zones);
1651}
1652
a7e8aae0
KB
1653static int fio_ioring_cmd_fetch_ruhs(struct thread_data *td, struct fio_file *f,
1654 struct fio_ruhs_info *fruhs_info)
1655{
1656 struct nvme_fdp_ruh_status *ruhs;
70ae781d 1657 int bytes, nr_ruhs, ret, i;
a7e8aae0 1658
70ae781d
AK
1659 nr_ruhs = fruhs_info->nr_ruhs;
1660 bytes = sizeof(*ruhs) + fruhs_info->nr_ruhs * sizeof(struct nvme_fdp_ruh_status_desc);
1661
1662 ruhs = calloc(1, bytes);
a7e8aae0
KB
1663 if (!ruhs)
1664 return -ENOMEM;
1665
1666 ret = fio_nvme_iomgmt_ruhs(td, f, ruhs, bytes);
1667 if (ret)
1668 goto free;
1669
1670 fruhs_info->nr_ruhs = le16_to_cpu(ruhs->nruhsd);
70ae781d 1671 for (i = 0; i < nr_ruhs; i++)
a7e8aae0
KB
1672 fruhs_info->plis[i] = le16_to_cpu(ruhs->ruhss[i].pid);
1673free:
70ae781d 1674 free(ruhs);
a7e8aae0
KB
1675 return ret;
1676}
1677
855dc4d4 1678static struct ioengine_ops ioengine_uring = {
bffad86f 1679 .name = "io_uring",
52885fa2 1680 .version = FIO_IOOPS_VERSION,
980fb7f2
JA
1681 .flags = FIO_NO_OFFLOAD | FIO_ASYNCIO_SETS_ISSUE_TIME |
1682 FIO_ATOMICWRITES,
bffad86f
JA
1683 .init = fio_ioring_init,
1684 .post_init = fio_ioring_post_init,
1685 .io_u_init = fio_ioring_io_u_init,
1686 .prep = fio_ioring_prep,
1687 .queue = fio_ioring_queue,
1688 .commit = fio_ioring_commit,
1689 .getevents = fio_ioring_getevents,
1690 .event = fio_ioring_event,
1691 .cleanup = fio_ioring_cleanup,
5ffd5626
JA
1692 .open_file = fio_ioring_open_file,
1693 .close_file = fio_ioring_close_file,
52885fa2
JA
1694 .get_file_size = generic_get_file_size,
1695 .options = options,
bffad86f 1696 .option_struct_size = sizeof(struct ioring_options),
52885fa2
JA
1697};
1698
855dc4d4
AG
1699static struct ioengine_ops ioengine_uring_cmd = {
1700 .name = "io_uring_cmd",
1701 .version = FIO_IOOPS_VERSION,
4885a6eb 1702 .flags = FIO_NO_OFFLOAD | FIO_MEMALIGN | FIO_RAWIO |
5d4ee0de
AK
1703 FIO_ASYNCIO_SETS_ISSUE_TIME |
1704 FIO_MULTI_RANGE_TRIM,
855dc4d4
AG
1705 .init = fio_ioring_init,
1706 .post_init = fio_ioring_cmd_post_init,
1707 .io_u_init = fio_ioring_io_u_init,
5163f35e 1708 .io_u_free = fio_ioring_io_u_free,
855dc4d4
AG
1709 .prep = fio_ioring_cmd_prep,
1710 .queue = fio_ioring_queue,
1711 .commit = fio_ioring_commit,
1712 .getevents = fio_ioring_getevents,
1713 .event = fio_ioring_cmd_event,
2a13699a 1714 .errdetails = fio_ioring_cmd_errdetails,
855dc4d4
AG
1715 .cleanup = fio_ioring_cleanup,
1716 .open_file = fio_ioring_cmd_open_file,
1717 .close_file = fio_ioring_cmd_close_file,
1718 .get_file_size = fio_ioring_cmd_get_file_size,
3d05e0ff
AK
1719 .get_zoned_model = fio_ioring_cmd_get_zoned_model,
1720 .report_zones = fio_ioring_cmd_report_zones,
1721 .reset_wp = fio_ioring_cmd_reset_wp,
1722 .get_max_open_zones = fio_ioring_cmd_get_max_open_zones,
855dc4d4
AG
1723 .options = options,
1724 .option_struct_size = sizeof(struct ioring_options),
a7e8aae0 1725 .fdp_fetch_ruhs = fio_ioring_cmd_fetch_ruhs,
855dc4d4
AG
1726};
1727
bffad86f 1728static void fio_init fio_ioring_register(void)
52885fa2 1729{
855dc4d4
AG
1730 register_ioengine(&ioengine_uring);
1731 register_ioengine(&ioengine_uring_cmd);
52885fa2
JA
1732}
1733
bffad86f 1734static void fio_exit fio_ioring_unregister(void)
52885fa2 1735{
855dc4d4
AG
1736 unregister_ioengine(&ioengine_uring);
1737 unregister_ioengine(&ioengine_uring_cmd);
52885fa2 1738}
1f90e9bb 1739#endif