Commit | Line | Data |
---|---|---|
52885fa2 | 1 | /* |
bffad86f | 2 | * io_uring engine |
52885fa2 | 3 | * |
bffad86f | 4 | * IO engine using the new native Linux aio io_uring interface. See: |
a90cd050 | 5 | * |
bffad86f | 6 | * http://git.kernel.dk/cgit/linux-block/log/?h=io_uring |
52885fa2 JA |
7 | * |
8 | */ | |
9 | #include <stdlib.h> | |
10 | #include <unistd.h> | |
11 | #include <errno.h> | |
52885fa2 JA |
12 | #include <sys/time.h> |
13 | #include <sys/resource.h> | |
14 | ||
15 | #include "../fio.h" | |
16 | #include "../lib/pow2.h" | |
17 | #include "../optgroup.h" | |
18 | #include "../lib/memalign.h" | |
b87aa01a | 19 | #include "../lib/fls.h" |
6d975f2c | 20 | #include "../lib/roundup.h" |
ba342e58 | 21 | #include "../verify.h" |
52885fa2 | 22 | |
bffad86f | 23 | #ifdef ARCH_HAVE_IOURING |
52885fa2 | 24 | |
57fa61f0 | 25 | #include "../lib/types.h" |
f3e769a4 | 26 | #include "../os/linux/io_uring.h" |
e9f6567a | 27 | #include "cmdprio.h" |
16be6037 | 28 | #include "zbd.h" |
855dc4d4 AG |
29 | #include "nvme.h" |
30 | ||
31 | #include <sys/stat.h> | |
32 | ||
f97d9f38 VF |
33 | #ifndef IO_INTEGRITY_CHK_GUARD |
34 | /* flags for integrity meta */ | |
35 | #define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */ | |
36 | #define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */ | |
37 | #define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */ | |
38 | #endif /* IO_INTEGRITY_CHK_GUARD */ | |
39 | ||
40 | #ifndef FS_IOC_GETLBMD_CAP | |
41 | /* Protection info capability flags */ | |
42 | #define LBMD_PI_CAP_INTEGRITY (1 << 0) | |
43 | #define LBMD_PI_CAP_REFTAG (1 << 1) | |
44 | ||
45 | /* Checksum types for Protection Information */ | |
46 | #define LBMD_PI_CSUM_NONE 0 | |
47 | #define LBMD_PI_CSUM_IP 1 | |
48 | #define LBMD_PI_CSUM_CRC16_T10DIF 2 | |
49 | #define LBMD_PI_CSUM_CRC64_NVME 4 | |
50 | ||
51 | /* | |
52 | * Logical block metadata capability descriptor | |
53 | * If the device does not support metadata, all the fields will be zero. | |
54 | * Applications must check lbmd_flags to determine whether metadata is | |
55 | * supported or not. | |
56 | */ | |
57 | struct logical_block_metadata_cap { | |
58 | /* Bitmask of logical block metadata capability flags */ | |
59 | __u32 lbmd_flags; | |
60 | /* | |
61 | * The amount of data described by each unit of logical block | |
62 | * metadata | |
63 | */ | |
64 | __u16 lbmd_interval; | |
65 | /* | |
66 | * Size in bytes of the logical block metadata associated with each | |
67 | * interval | |
68 | */ | |
69 | __u8 lbmd_size; | |
70 | /* | |
71 | * Size in bytes of the opaque block tag associated with each | |
72 | * interval | |
73 | */ | |
74 | __u8 lbmd_opaque_size; | |
75 | /* | |
76 | * Offset in bytes of the opaque block tag within the logical block | |
77 | * metadata | |
78 | */ | |
79 | __u8 lbmd_opaque_offset; | |
80 | /* Size in bytes of the T10 PI tuple associated with each interval */ | |
81 | __u8 lbmd_pi_size; | |
82 | /* Offset in bytes of T10 PI tuple within the logical block metadata */ | |
83 | __u8 lbmd_pi_offset; | |
84 | /* T10 PI guard tag type */ | |
85 | __u8 lbmd_guard_tag_type; | |
86 | /* Size in bytes of the T10 PI application tag */ | |
87 | __u8 lbmd_app_tag_size; | |
88 | /* Size in bytes of the T10 PI reference tag */ | |
89 | __u8 lbmd_ref_tag_size; | |
90 | /* Size in bytes of the T10 PI storage tag */ | |
91 | __u8 lbmd_storage_tag_size; | |
92 | __u8 pad; | |
93 | }; | |
94 | ||
95 | #define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap) | |
96 | #endif /* FS_IOC_GETLBMD_CAP */ | |
97 | ||
855dc4d4 AG |
98 | enum uring_cmd_type { |
99 | FIO_URING_CMD_NVME = 1, | |
100 | }; | |
9a2d78b3 | 101 | |
87a4903f MI |
102 | enum uring_cmd_write_mode { |
103 | FIO_URING_CMD_WMODE_WRITE = 1, | |
104 | FIO_URING_CMD_WMODE_UNCOR, | |
105 | FIO_URING_CMD_WMODE_ZEROES, | |
106 | FIO_URING_CMD_WMODE_VERIFY, | |
107 | }; | |
108 | ||
6170d92a MI |
109 | enum uring_cmd_verify_mode { |
110 | FIO_URING_CMD_VMODE_READ = 1, | |
111 | FIO_URING_CMD_VMODE_COMPARE, | |
112 | }; | |
113 | ||
bffad86f | 114 | struct io_sq_ring { |
e2239016 JA |
115 | unsigned *head; |
116 | unsigned *tail; | |
117 | unsigned *ring_mask; | |
118 | unsigned *ring_entries; | |
119 | unsigned *flags; | |
120 | unsigned *array; | |
52885fa2 JA |
121 | }; |
122 | ||
bffad86f | 123 | struct io_cq_ring { |
e2239016 JA |
124 | unsigned *head; |
125 | unsigned *tail; | |
126 | unsigned *ring_mask; | |
127 | unsigned *ring_entries; | |
f0403f94 | 128 | struct io_uring_cqe *cqes; |
9a2d78b3 JA |
129 | }; |
130 | ||
bffad86f | 131 | struct ioring_mmap { |
9a2d78b3 JA |
132 | void *ptr; |
133 | size_t len; | |
52885fa2 JA |
134 | }; |
135 | ||
bffad86f | 136 | struct ioring_data { |
9a2d78b3 JA |
137 | int ring_fd; |
138 | ||
52885fa2 | 139 | struct io_u **io_u_index; |
2d6451c9 | 140 | char *md_buf; |
f97d9f38 | 141 | char *pi_attr; |
52885fa2 | 142 | |
5ffd5626 JA |
143 | int *fds; |
144 | ||
bffad86f | 145 | struct io_sq_ring sq_ring; |
f0403f94 | 146 | struct io_uring_sqe *sqes; |
9a2d78b3 | 147 | struct iovec *iovecs; |
b87aa01a | 148 | unsigned sq_ring_mask; |
52885fa2 | 149 | |
bffad86f | 150 | struct io_cq_ring cq_ring; |
b87aa01a | 151 | unsigned cq_ring_mask; |
52885fa2 | 152 | |
980fb7f2 | 153 | int async_trim_fail; |
52885fa2 JA |
154 | int queued; |
155 | int cq_ring_off; | |
b87aa01a | 156 | unsigned iodepth; |
5a59a81d | 157 | int prepped; |
96563db9 | 158 | |
bffad86f | 159 | struct ioring_mmap mmap[3]; |
d6cbeab4 NC |
160 | |
161 | struct cmdprio cmdprio; | |
4885a6eb | 162 | |
5d4ee0de | 163 | struct nvme_dsm *dsm; |
55e14d73 | 164 | uint32_t cdw12_flags[DDIR_RWDIR_CNT]; |
87a4903f | 165 | uint8_t write_opcode; |
9897c064 VF |
166 | |
167 | bool is_uring_cmd_eng; | |
be06e55f VF |
168 | |
169 | struct nvme_cmd_ext_io_opts ext_opts; | |
52885fa2 JA |
170 | }; |
171 | ||
bffad86f | 172 | struct ioring_options { |
a48f0cc7 | 173 | struct thread_data *td; |
52885fa2 | 174 | unsigned int hipri; |
55e14d73 MI |
175 | unsigned int readfua; |
176 | unsigned int writefua; | |
dfc79b17 | 177 | unsigned int deac; |
87a4903f | 178 | unsigned int write_mode; |
6170d92a | 179 | unsigned int verify_mode; |
d6cbeab4 | 180 | struct cmdprio_options cmdprio_options; |
52885fa2 | 181 | unsigned int fixedbufs; |
5ffd5626 | 182 | unsigned int registerfiles; |
3d7d00a3 | 183 | unsigned int sqpoll_thread; |
2ea53ca3 JA |
184 | unsigned int sqpoll_set; |
185 | unsigned int sqpoll_cpu; | |
b10b1e70 | 186 | unsigned int nonvectored; |
43c67b9f | 187 | unsigned int uncached; |
7d42e66e | 188 | unsigned int nowait; |
5a59a81d | 189 | unsigned int force_async; |
2d6451c9 | 190 | unsigned int md_per_io_size; |
3ee8311a AK |
191 | unsigned int pi_act; |
192 | unsigned int apptag; | |
193 | unsigned int apptag_mask; | |
194 | unsigned int prchk; | |
195 | char *pi_chk; | |
855dc4d4 | 196 | enum uring_cmd_type cmd_type; |
52885fa2 JA |
197 | }; |
198 | ||
b10b1e70 JA |
199 | static const int ddir_to_op[2][2] = { |
200 | { IORING_OP_READV, IORING_OP_READ }, | |
201 | { IORING_OP_WRITEV, IORING_OP_WRITE } | |
202 | }; | |
203 | ||
3f1e3af7 KB |
204 | static const int fixed_ddir_to_op[2] = { |
205 | IORING_OP_READ_FIXED, | |
206 | IORING_OP_WRITE_FIXED | |
207 | }; | |
208 | ||
2ea53ca3 | 209 | static int fio_ioring_sqpoll_cb(void *data, unsigned long long *val) |
a90cd050 | 210 | { |
bffad86f | 211 | struct ioring_options *o = data; |
a90cd050 | 212 | |
2ea53ca3 JA |
213 | o->sqpoll_cpu = *val; |
214 | o->sqpoll_set = 1; | |
a90cd050 JA |
215 | return 0; |
216 | } | |
217 | ||
52885fa2 JA |
218 | static struct fio_option options[] = { |
219 | { | |
220 | .name = "hipri", | |
221 | .lname = "High Priority", | |
222 | .type = FIO_OPT_STR_SET, | |
bffad86f | 223 | .off1 = offsetof(struct ioring_options, hipri), |
52885fa2 JA |
224 | .help = "Use polled IO completions", |
225 | .category = FIO_OPT_C_ENGINE, | |
27f436d9 | 226 | .group = FIO_OPT_G_IOURING, |
52885fa2 | 227 | }, |
55e14d73 MI |
228 | { |
229 | .name = "readfua", | |
230 | .lname = "Read fua flag support", | |
231 | .type = FIO_OPT_BOOL, | |
232 | .off1 = offsetof(struct ioring_options, readfua), | |
233 | .help = "Set FUA flag (force unit access) for all Read operations", | |
234 | .def = "0", | |
235 | .category = FIO_OPT_C_ENGINE, | |
236 | .group = FIO_OPT_G_IOURING, | |
237 | }, | |
238 | { | |
239 | .name = "writefua", | |
240 | .lname = "Write fua flag support", | |
241 | .type = FIO_OPT_BOOL, | |
242 | .off1 = offsetof(struct ioring_options, writefua), | |
243 | .help = "Set FUA flag (force unit access) for all Write operations", | |
244 | .def = "0", | |
245 | .category = FIO_OPT_C_ENGINE, | |
246 | .group = FIO_OPT_G_IOURING, | |
247 | }, | |
87a4903f MI |
248 | { |
249 | .name = "write_mode", | |
250 | .lname = "Additional Write commands support (Write Uncorrectable, Write Zeores)", | |
251 | .type = FIO_OPT_STR, | |
252 | .off1 = offsetof(struct ioring_options, write_mode), | |
04230255 | 253 | .help = "Issue Write Uncorrectable or Zeroes command instead of Write command", |
87a4903f MI |
254 | .def = "write", |
255 | .posval = { | |
256 | { .ival = "write", | |
257 | .oval = FIO_URING_CMD_WMODE_WRITE, | |
258 | .help = "Issue Write commands for write operations" | |
259 | }, | |
260 | { .ival = "uncor", | |
261 | .oval = FIO_URING_CMD_WMODE_UNCOR, | |
262 | .help = "Issue Write Uncorrectable commands for write operations" | |
263 | }, | |
264 | { .ival = "zeroes", | |
265 | .oval = FIO_URING_CMD_WMODE_ZEROES, | |
266 | .help = "Issue Write Zeroes commands for write operations" | |
267 | }, | |
268 | { .ival = "verify", | |
269 | .oval = FIO_URING_CMD_WMODE_VERIFY, | |
270 | .help = "Issue Verify commands for write operations" | |
271 | }, | |
272 | }, | |
273 | .category = FIO_OPT_C_ENGINE, | |
274 | .group = FIO_OPT_G_IOURING, | |
275 | }, | |
6170d92a MI |
276 | { |
277 | .name = "verify_mode", | |
278 | .lname = "Do verify based on the configured command (e.g., Read or Compare command)", | |
279 | .type = FIO_OPT_STR, | |
280 | .off1 = offsetof(struct ioring_options, verify_mode), | |
281 | .help = "Issue Read or Compare command in the verification phase", | |
282 | .def = "read", | |
283 | .posval = { | |
284 | { .ival = "read", | |
285 | .oval = FIO_URING_CMD_VMODE_READ, | |
286 | .help = "Issue Read commands in the verification phase" | |
287 | }, | |
288 | { .ival = "compare", | |
289 | .oval = FIO_URING_CMD_VMODE_COMPARE, | |
290 | .help = "Issue Compare commands in the verification phase" | |
291 | }, | |
292 | }, | |
293 | .category = FIO_OPT_C_ENGINE, | |
294 | .group = FIO_OPT_G_IOURING, | |
295 | }, | |
52885fa2 JA |
296 | { |
297 | .name = "fixedbufs", | |
298 | .lname = "Fixed (pre-mapped) IO buffers", | |
299 | .type = FIO_OPT_STR_SET, | |
bffad86f | 300 | .off1 = offsetof(struct ioring_options, fixedbufs), |
52885fa2 JA |
301 | .help = "Pre map IO buffers", |
302 | .category = FIO_OPT_C_ENGINE, | |
27f436d9 | 303 | .group = FIO_OPT_G_IOURING, |
52885fa2 | 304 | }, |
5ffd5626 JA |
305 | { |
306 | .name = "registerfiles", | |
307 | .lname = "Register file set", | |
308 | .type = FIO_OPT_STR_SET, | |
309 | .off1 = offsetof(struct ioring_options, registerfiles), | |
310 | .help = "Pre-open/register files", | |
311 | .category = FIO_OPT_C_ENGINE, | |
27f436d9 | 312 | .group = FIO_OPT_G_IOURING, |
5ffd5626 | 313 | }, |
771c9901 JA |
314 | { |
315 | .name = "sqthread_poll", | |
3d7d00a3 | 316 | .lname = "Kernel SQ thread polling", |
d6f936d1 | 317 | .type = FIO_OPT_STR_SET, |
3d7d00a3 JA |
318 | .off1 = offsetof(struct ioring_options, sqpoll_thread), |
319 | .help = "Offload submission/completion to kernel thread", | |
320 | .category = FIO_OPT_C_ENGINE, | |
27f436d9 | 321 | .group = FIO_OPT_G_IOURING, |
3d7d00a3 JA |
322 | }, |
323 | { | |
324 | .name = "sqthread_poll_cpu", | |
325 | .lname = "SQ Thread Poll CPU", | |
2ea53ca3 JA |
326 | .type = FIO_OPT_INT, |
327 | .cb = fio_ioring_sqpoll_cb, | |
3d7d00a3 | 328 | .help = "What CPU to run SQ thread polling on", |
a90cd050 | 329 | .category = FIO_OPT_C_ENGINE, |
27f436d9 | 330 | .group = FIO_OPT_G_IOURING, |
a90cd050 | 331 | }, |
b10b1e70 JA |
332 | { |
333 | .name = "nonvectored", | |
334 | .lname = "Non-vectored", | |
335 | .type = FIO_OPT_INT, | |
336 | .off1 = offsetof(struct ioring_options, nonvectored), | |
556d8415 | 337 | .def = "-1", |
b10b1e70 JA |
338 | .help = "Use non-vectored read/write commands", |
339 | .category = FIO_OPT_C_ENGINE, | |
340 | .group = FIO_OPT_G_IOURING, | |
341 | }, | |
4a87b584 JA |
342 | { |
343 | .name = "uncached", | |
344 | .lname = "Uncached", | |
43c67b9f JA |
345 | .type = FIO_OPT_INT, |
346 | .off1 = offsetof(struct ioring_options, uncached), | |
347 | .help = "Use RWF_DONTCACHE for buffered read/writes", | |
348 | .category = FIO_OPT_C_ENGINE, | |
349 | .group = FIO_OPT_G_IOURING, | |
4a87b584 | 350 | }, |
7d42e66e KK |
351 | { |
352 | .name = "nowait", | |
353 | .lname = "RWF_NOWAIT", | |
354 | .type = FIO_OPT_BOOL, | |
355 | .off1 = offsetof(struct ioring_options, nowait), | |
356 | .help = "Use RWF_NOWAIT for reads/writes", | |
357 | .category = FIO_OPT_C_ENGINE, | |
358 | .group = FIO_OPT_G_IOURING, | |
359 | }, | |
5a59a81d JA |
360 | { |
361 | .name = "force_async", | |
362 | .lname = "Force async", | |
363 | .type = FIO_OPT_INT, | |
364 | .off1 = offsetof(struct ioring_options, force_async), | |
365 | .help = "Set IOSQE_ASYNC every N requests", | |
366 | .category = FIO_OPT_C_ENGINE, | |
367 | .group = FIO_OPT_G_IOURING, | |
368 | }, | |
855dc4d4 AG |
369 | { |
370 | .name = "cmd_type", | |
371 | .lname = "Uring cmd type", | |
372 | .type = FIO_OPT_STR, | |
373 | .off1 = offsetof(struct ioring_options, cmd_type), | |
374 | .help = "Specify uring-cmd type", | |
375 | .def = "nvme", | |
376 | .posval = { | |
377 | { .ival = "nvme", | |
378 | .oval = FIO_URING_CMD_NVME, | |
379 | .help = "Issue nvme-uring-cmd", | |
380 | }, | |
381 | }, | |
382 | .category = FIO_OPT_C_ENGINE, | |
383 | .group = FIO_OPT_G_IOURING, | |
384 | }, | |
2838f77a | 385 | CMDPRIO_OPTIONS(struct ioring_options, FIO_OPT_G_IOURING), |
2d6451c9 AK |
386 | { |
387 | .name = "md_per_io_size", | |
388 | .lname = "Separate Metadata Buffer Size per I/O", | |
389 | .type = FIO_OPT_INT, | |
390 | .off1 = offsetof(struct ioring_options, md_per_io_size), | |
391 | .def = "0", | |
392 | .help = "Size of separate metadata buffer per I/O (Default: 0)", | |
393 | .category = FIO_OPT_C_ENGINE, | |
394 | .group = FIO_OPT_G_IOURING, | |
395 | }, | |
3ee8311a AK |
396 | { |
397 | .name = "pi_act", | |
398 | .lname = "Protection Information Action", | |
399 | .type = FIO_OPT_BOOL, | |
400 | .off1 = offsetof(struct ioring_options, pi_act), | |
401 | .def = "1", | |
402 | .help = "Protection Information Action bit (pi_act=1 or pi_act=0)", | |
403 | .category = FIO_OPT_C_ENGINE, | |
404 | .group = FIO_OPT_G_IOURING, | |
405 | }, | |
406 | { | |
407 | .name = "pi_chk", | |
408 | .lname = "Protection Information Check", | |
409 | .type = FIO_OPT_STR_STORE, | |
410 | .off1 = offsetof(struct ioring_options, pi_chk), | |
411 | .def = NULL, | |
412 | .help = "Control of Protection Information Checking (pi_chk=GUARD,REFTAG,APPTAG)", | |
413 | .category = FIO_OPT_C_ENGINE, | |
414 | .group = FIO_OPT_G_IOURING, | |
415 | }, | |
416 | { | |
417 | .name = "apptag", | |
418 | .lname = "Application Tag used in Protection Information", | |
419 | .type = FIO_OPT_INT, | |
420 | .off1 = offsetof(struct ioring_options, apptag), | |
421 | .def = "0x1234", | |
422 | .help = "Application Tag used in Protection Information field (Default: 0x1234)", | |
423 | .category = FIO_OPT_C_ENGINE, | |
424 | .group = FIO_OPT_G_IOURING, | |
425 | }, | |
426 | { | |
427 | .name = "apptag_mask", | |
428 | .lname = "Application Tag Mask", | |
429 | .type = FIO_OPT_INT, | |
430 | .off1 = offsetof(struct ioring_options, apptag_mask), | |
431 | .def = "0xffff", | |
432 | .help = "Application Tag Mask used with Application Tag (Default: 0xffff)", | |
433 | .category = FIO_OPT_C_ENGINE, | |
434 | .group = FIO_OPT_G_IOURING, | |
435 | }, | |
dfc79b17 VF |
436 | { |
437 | .name = "deac", | |
438 | .lname = "Deallocate bit for write zeroes command", | |
439 | .type = FIO_OPT_BOOL, | |
440 | .off1 = offsetof(struct ioring_options, deac), | |
441 | .help = "Set DEAC (deallocate) flag for write zeroes command", | |
442 | .def = "0", | |
443 | .category = FIO_OPT_C_ENGINE, | |
444 | .group = FIO_OPT_G_IOURING, | |
445 | }, | |
52885fa2 JA |
446 | { |
447 | .name = NULL, | |
448 | }, | |
449 | }; | |
450 | ||
bffad86f | 451 | static int io_uring_enter(struct ioring_data *ld, unsigned int to_submit, |
52885fa2 JA |
452 | unsigned int min_complete, unsigned int flags) |
453 | { | |
c377f4f8 JA |
454 | #ifdef FIO_ARCH_HAS_SYSCALL |
455 | return __do_syscall6(__NR_io_uring_enter, ld->ring_fd, to_submit, | |
456 | min_complete, flags, NULL, 0); | |
457 | #else | |
bfed648c | 458 | return syscall(__NR_io_uring_enter, ld->ring_fd, to_submit, |
521164fa | 459 | min_complete, flags, NULL, 0); |
c377f4f8 | 460 | #endif |
52885fa2 JA |
461 | } |
462 | ||
980fb7f2 JA |
463 | #ifndef BLOCK_URING_CMD_DISCARD |
464 | #define BLOCK_URING_CMD_DISCARD _IO(0x12, 0) | |
465 | #endif | |
466 | ||
f97d9f38 VF |
467 | static void fio_ioring_prep_md(struct thread_data *td, struct io_u *io_u) |
468 | { | |
469 | struct ioring_data *ld = td->io_ops_data; | |
470 | struct io_uring_attr_pi *pi_attr = io_u->pi_attr; | |
471 | struct nvme_data *data = FILE_ENG_DATA(io_u->file); | |
472 | struct io_uring_sqe *sqe; | |
473 | ||
474 | sqe = &ld->sqes[io_u->index]; | |
475 | ||
476 | sqe->attr_type_mask = IORING_RW_ATTR_FLAG_PI; | |
477 | sqe->attr_ptr = (__u64)(uintptr_t)pi_attr; | |
478 | pi_attr->addr = (__u64)(uintptr_t)io_u->mmap_data; | |
479 | ||
480 | if (pi_attr->flags & IO_INTEGRITY_CHK_REFTAG) { | |
481 | __u64 slba = get_slba(data, io_u->offset); | |
482 | pi_attr->seed = (__u32)slba; | |
483 | } | |
484 | } | |
485 | ||
bffad86f | 486 | static int fio_ioring_prep(struct thread_data *td, struct io_u *io_u) |
52885fa2 | 487 | { |
bffad86f | 488 | struct ioring_data *ld = td->io_ops_data; |
cfcc8564 | 489 | struct ioring_options *o = td->eo; |
52885fa2 | 490 | struct fio_file *f = io_u->file; |
f0403f94 | 491 | struct io_uring_sqe *sqe; |
52885fa2 | 492 | |
f0403f94 | 493 | sqe = &ld->sqes[io_u->index]; |
34d6090e | 494 | |
5ffd5626 JA |
495 | if (o->registerfiles) { |
496 | sqe->fd = f->engine_pos; | |
497 | sqe->flags = IOSQE_FIXED_FILE; | |
498 | } else { | |
499 | sqe->fd = f->fd; | |
87b69ef2 | 500 | sqe->flags = 0; |
5ffd5626 | 501 | } |
52885fa2 | 502 | |
e3970057 | 503 | if (io_u->ddir == DDIR_READ || io_u->ddir == DDIR_WRITE) { |
f0403f94 | 504 | if (o->fixedbufs) { |
3f1e3af7 | 505 | sqe->opcode = fixed_ddir_to_op[io_u->ddir]; |
919850d2 | 506 | sqe->addr = (unsigned long) io_u->xfer_buf; |
f0403f94 | 507 | sqe->len = io_u->xfer_buflen; |
2ea53ca3 | 508 | sqe->buf_index = io_u->index; |
cfcc8564 | 509 | } else { |
832faaaf JA |
510 | struct iovec *iov = &ld->iovecs[io_u->index]; |
511 | ||
512 | /* | |
513 | * Update based on actual io_u, requeue could have | |
514 | * adjusted these | |
515 | */ | |
516 | iov->iov_base = io_u->xfer_buf; | |
517 | iov->iov_len = io_u->xfer_buflen; | |
518 | ||
3f1e3af7 | 519 | sqe->opcode = ddir_to_op[io_u->ddir][!!o->nonvectored]; |
b10b1e70 | 520 | if (o->nonvectored) { |
832faaaf JA |
521 | sqe->addr = (unsigned long) iov->iov_base; |
522 | sqe->len = iov->iov_len; | |
b10b1e70 | 523 | } else { |
832faaaf | 524 | sqe->addr = (unsigned long) iov; |
b10b1e70 JA |
525 | sqe->len = 1; |
526 | } | |
cfcc8564 | 527 | } |
f97d9f38 VF |
528 | if (o->md_per_io_size) |
529 | fio_ioring_prep_md(td, io_u); | |
fd70e361 | 530 | sqe->rw_flags = 0; |
43c67b9f JA |
531 | if (!td->o.odirect && o->uncached) |
532 | sqe->rw_flags |= RWF_DONTCACHE; | |
7d42e66e KK |
533 | if (o->nowait) |
534 | sqe->rw_flags |= RWF_NOWAIT; | |
b1552b6e JG |
535 | if (td->o.oatomic && io_u->ddir == DDIR_WRITE) |
536 | sqe->rw_flags |= RWF_ATOMIC; | |
8ff6b289 NC |
537 | |
538 | /* | |
539 | * Since io_uring can have a submission context (sqthread_poll) | |
540 | * that is different from the process context, we cannot rely on | |
79012fec DLM |
541 | * the IO priority set by ioprio_set() (options prio, prioclass, |
542 | * and priohint) to be inherited. | |
8ff6b289 NC |
543 | * td->ioprio will have the value of the "default prio", so set |
544 | * this unconditionally. This value might get overridden by | |
ff00f247 | 545 | * fio_ioring_cmdprio_prep() if the option cmdprio_percentage or |
8ff6b289 NC |
546 | * cmdprio_bssplit is used. |
547 | */ | |
548 | sqe->ioprio = td->ioprio; | |
f0403f94 | 549 | sqe->off = io_u->offset; |
48e698fa | 550 | } else if (ddir_sync(io_u->ddir)) { |
7c70f506 | 551 | sqe->ioprio = 0; |
01387bfe AF |
552 | if (io_u->ddir == DDIR_SYNC_FILE_RANGE) { |
553 | sqe->off = f->first_write; | |
554 | sqe->len = f->last_write - f->first_write; | |
555 | sqe->sync_range_flags = td->o.sync_file_range; | |
556 | sqe->opcode = IORING_OP_SYNC_FILE_RANGE; | |
557 | } else { | |
7c70f506 JA |
558 | sqe->off = 0; |
559 | sqe->addr = 0; | |
560 | sqe->len = 0; | |
01387bfe AF |
561 | if (io_u->ddir == DDIR_DATASYNC) |
562 | sqe->fsync_flags |= IORING_FSYNC_DATASYNC; | |
563 | sqe->opcode = IORING_OP_FSYNC; | |
564 | } | |
980fb7f2 JA |
565 | } else if (io_u->ddir == DDIR_TRIM) { |
566 | sqe->opcode = IORING_OP_URING_CMD; | |
567 | sqe->addr = io_u->offset; | |
568 | sqe->addr3 = io_u->xfer_buflen; | |
569 | sqe->rw_flags = 0; | |
570 | sqe->len = sqe->off = 0; | |
571 | sqe->ioprio = 0; | |
572 | sqe->cmd_op = BLOCK_URING_CMD_DISCARD; | |
573 | sqe->__pad1 = 0; | |
574 | sqe->file_index = 0; | |
48e698fa | 575 | } |
52885fa2 | 576 | |
5a59a81d JA |
577 | if (o->force_async && ++ld->prepped == o->force_async) { |
578 | ld->prepped = 0; | |
579 | sqe->flags |= IOSQE_ASYNC; | |
580 | } | |
581 | ||
48e698fa | 582 | sqe->user_data = (unsigned long) io_u; |
52885fa2 JA |
583 | return 0; |
584 | } | |
585 | ||
855dc4d4 AG |
586 | static int fio_ioring_cmd_prep(struct thread_data *td, struct io_u *io_u) |
587 | { | |
588 | struct ioring_data *ld = td->io_ops_data; | |
589 | struct ioring_options *o = td->eo; | |
590 | struct fio_file *f = io_u->file; | |
3ce6a3de | 591 | struct nvme_uring_cmd *cmd; |
855dc4d4 | 592 | struct io_uring_sqe *sqe; |
5d4ee0de AK |
593 | struct nvme_dsm *dsm; |
594 | void *ptr = ld->dsm; | |
595 | unsigned int dsm_size; | |
6170d92a | 596 | uint8_t read_opcode = nvme_cmd_read; |
855dc4d4 | 597 | |
3ce6a3de JA |
598 | /* only supports nvme_uring_cmd */ |
599 | if (o->cmd_type != FIO_URING_CMD_NVME) | |
600 | return -EINVAL; | |
855dc4d4 | 601 | |
4885a6eb | 602 | if (io_u->ddir == DDIR_TRIM && td->io_ops->flags & FIO_ASYNCIO_SYNC_TRIM) |
16be6037 AK |
603 | return 0; |
604 | ||
3ce6a3de | 605 | sqe = &ld->sqes[(io_u->index) << 1]; |
855dc4d4 | 606 | |
3ce6a3de JA |
607 | if (o->registerfiles) { |
608 | sqe->fd = f->engine_pos; | |
609 | sqe->flags = IOSQE_FIXED_FILE; | |
610 | } else { | |
611 | sqe->fd = f->fd; | |
612 | } | |
613 | sqe->rw_flags = 0; | |
43c67b9f JA |
614 | if (!td->o.odirect && o->uncached) |
615 | sqe->rw_flags |= RWF_DONTCACHE; | |
3ce6a3de JA |
616 | if (o->nowait) |
617 | sqe->rw_flags |= RWF_NOWAIT; | |
855dc4d4 | 618 | |
3ce6a3de JA |
619 | sqe->opcode = IORING_OP_URING_CMD; |
620 | sqe->user_data = (unsigned long) io_u; | |
621 | if (o->nonvectored) | |
622 | sqe->cmd_op = NVME_URING_CMD_IO; | |
623 | else | |
624 | sqe->cmd_op = NVME_URING_CMD_IO_VEC; | |
625 | if (o->force_async && ++ld->prepped == o->force_async) { | |
626 | ld->prepped = 0; | |
627 | sqe->flags |= IOSQE_ASYNC; | |
855dc4d4 | 628 | } |
0ebd3bf6 AG |
629 | if (o->fixedbufs) { |
630 | sqe->uring_cmd_flags = IORING_URING_CMD_FIXED; | |
631 | sqe->buf_index = io_u->index; | |
632 | } | |
3ce6a3de JA |
633 | |
634 | cmd = (struct nvme_uring_cmd *)sqe->cmd; | |
5d4ee0de AK |
635 | dsm_size = sizeof(*ld->dsm) + td->o.num_range * sizeof(struct nvme_dsm_range); |
636 | ptr += io_u->index * dsm_size; | |
637 | dsm = (struct nvme_dsm *)ptr; | |
638 | ||
6170d92a MI |
639 | /* |
640 | * If READ command belongs to the verification phase and the | |
641 | * verify_mode=compare, convert READ to COMPARE command. | |
642 | */ | |
643 | if (io_u->flags & IO_U_F_VER_LIST && io_u->ddir == DDIR_READ && | |
644 | o->verify_mode == FIO_URING_CMD_VMODE_COMPARE) { | |
645 | populate_verify_io_u(td, io_u); | |
646 | read_opcode = nvme_cmd_compare; | |
647 | io_u_set(td, io_u, IO_U_F_VER_IN_DEV); | |
648 | } | |
649 | ||
3ce6a3de | 650 | return fio_nvme_uring_cmd_prep(cmd, io_u, |
4885a6eb | 651 | o->nonvectored ? NULL : &ld->iovecs[io_u->index], |
6170d92a MI |
652 | dsm, read_opcode, ld->write_opcode, |
653 | ld->cdw12_flags[io_u->ddir]); | |
855dc4d4 AG |
654 | } |
655 | ||
f97d9f38 VF |
656 | static void fio_ioring_validate_md(struct thread_data *td, struct io_u *io_u) |
657 | { | |
658 | struct nvme_data *data; | |
659 | struct ioring_options *o = td->eo; | |
660 | int ret; | |
661 | ||
662 | data = FILE_ENG_DATA(io_u->file); | |
663 | if (data->pi_type && (io_u->ddir == DDIR_READ) && !o->pi_act) { | |
664 | ret = fio_nvme_pi_verify(data, io_u); | |
665 | if (ret) | |
55355b2f | 666 | io_u->error = -ret; |
f97d9f38 VF |
667 | } |
668 | ||
669 | return; | |
670 | } | |
671 | ||
bffad86f | 672 | static struct io_u *fio_ioring_event(struct thread_data *td, int event) |
52885fa2 | 673 | { |
bffad86f | 674 | struct ioring_data *ld = td->io_ops_data; |
f97d9f38 | 675 | struct ioring_options *o = td->eo; |
f0403f94 | 676 | struct io_uring_cqe *cqe; |
52885fa2 | 677 | struct io_u *io_u; |
b87aa01a | 678 | unsigned index; |
52885fa2 | 679 | |
b87aa01a | 680 | index = (event + ld->cq_ring_off) & ld->cq_ring_mask; |
52885fa2 | 681 | |
f0403f94 | 682 | cqe = &ld->cq_ring.cqes[index]; |
e3466352 | 683 | io_u = (struct io_u *) (uintptr_t) cqe->user_data; |
52885fa2 | 684 | |
980fb7f2 JA |
685 | /* trim returns 0 on success */ |
686 | if (cqe->res == io_u->xfer_buflen || | |
687 | (io_u->ddir == DDIR_TRIM && !cqe->res)) { | |
688 | io_u->error = 0; | |
689 | return io_u; | |
690 | } | |
691 | ||
f0403f94 | 692 | if (cqe->res != io_u->xfer_buflen) { |
980fb7f2 JA |
693 | if (io_u->ddir == DDIR_TRIM) { |
694 | ld->async_trim_fail = 1; | |
695 | cqe->res = 0; | |
696 | } | |
f0403f94 JA |
697 | if (cqe->res > io_u->xfer_buflen) |
698 | io_u->error = -cqe->res; | |
52885fa2 | 699 | else |
f0403f94 | 700 | io_u->resid = io_u->xfer_buflen - cqe->res; |
f97d9f38 VF |
701 | |
702 | return io_u; | |
980fb7f2 | 703 | } |
52885fa2 | 704 | |
f97d9f38 VF |
705 | if (o->md_per_io_size) |
706 | fio_ioring_validate_md(td, io_u); | |
707 | ||
52885fa2 JA |
708 | return io_u; |
709 | } | |
710 | ||
855dc4d4 AG |
711 | static struct io_u *fio_ioring_cmd_event(struct thread_data *td, int event) |
712 | { | |
713 | struct ioring_data *ld = td->io_ops_data; | |
714 | struct ioring_options *o = td->eo; | |
715 | struct io_uring_cqe *cqe; | |
716 | struct io_u *io_u; | |
5163f35e | 717 | struct nvme_data *data; |
855dc4d4 | 718 | unsigned index; |
5163f35e | 719 | int ret; |
855dc4d4 AG |
720 | |
721 | index = (event + ld->cq_ring_off) & ld->cq_ring_mask; | |
722 | if (o->cmd_type == FIO_URING_CMD_NVME) | |
723 | index <<= 1; | |
724 | ||
725 | cqe = &ld->cq_ring.cqes[index]; | |
726 | io_u = (struct io_u *) (uintptr_t) cqe->user_data; | |
727 | ||
ebe67b66 MI |
728 | io_u->error = cqe->res; |
729 | if (io_u->error != 0) | |
730 | goto ret; | |
855dc4d4 | 731 | |
5163f35e AK |
732 | if (o->cmd_type == FIO_URING_CMD_NVME) { |
733 | data = FILE_ENG_DATA(io_u->file); | |
734 | if (data->pi_type && (io_u->ddir == DDIR_READ) && !o->pi_act) { | |
735 | ret = fio_nvme_pi_verify(data, io_u); | |
736 | if (ret) | |
737 | io_u->error = ret; | |
738 | } | |
739 | } | |
740 | ||
ebe67b66 MI |
741 | ret: |
742 | /* | |
743 | * If IO_U_F_DEVICE_ERROR is not set, io_u->error will be parsed as an | |
744 | * errno, otherwise device-specific error value (status value in CQE). | |
745 | */ | |
746 | if ((int)io_u->error > 0) | |
747 | io_u_set(td, io_u, IO_U_F_DEVICE_ERROR); | |
748 | else | |
749 | io_u_clear(td, io_u, IO_U_F_DEVICE_ERROR); | |
e4e8520b | 750 | io_u->error = abs((int)io_u->error); |
855dc4d4 AG |
751 | return io_u; |
752 | } | |
753 | ||
2a13699a MI |
754 | static char *fio_ioring_cmd_errdetails(struct thread_data *td, |
755 | struct io_u *io_u) | |
756 | { | |
757 | struct ioring_options *o = td->eo; | |
758 | unsigned int sct = (io_u->error >> 8) & 0x7; | |
759 | unsigned int sc = io_u->error & 0xff; | |
760 | #define MAXERRDETAIL 1024 | |
761 | #define MAXMSGCHUNK 128 | |
762 | char *msg, msgchunk[MAXMSGCHUNK]; | |
763 | ||
ebe67b66 MI |
764 | if (!(io_u->flags & IO_U_F_DEVICE_ERROR)) |
765 | return NULL; | |
766 | ||
2a13699a MI |
767 | msg = calloc(1, MAXERRDETAIL); |
768 | strcpy(msg, "io_uring_cmd: "); | |
769 | ||
770 | snprintf(msgchunk, MAXMSGCHUNK, "%s: ", io_u->file->file_name); | |
771 | strlcat(msg, msgchunk, MAXERRDETAIL); | |
772 | ||
773 | if (o->cmd_type == FIO_URING_CMD_NVME) { | |
774 | strlcat(msg, "cq entry status (", MAXERRDETAIL); | |
775 | ||
776 | snprintf(msgchunk, MAXMSGCHUNK, "sct=0x%02x; ", sct); | |
777 | strlcat(msg, msgchunk, MAXERRDETAIL); | |
778 | ||
779 | snprintf(msgchunk, MAXMSGCHUNK, "sc=0x%02x)", sc); | |
780 | strlcat(msg, msgchunk, MAXERRDETAIL); | |
781 | } else { | |
782 | /* Print status code in generic */ | |
783 | snprintf(msgchunk, MAXMSGCHUNK, "status=0x%x", io_u->error); | |
784 | strlcat(msg, msgchunk, MAXERRDETAIL); | |
785 | } | |
786 | ||
787 | return msg; | |
788 | } | |
789 | ||
c6078492 | 790 | static unsigned fio_ioring_cqring_reap(struct thread_data *td, unsigned int max) |
52885fa2 | 791 | { |
bffad86f JA |
792 | struct ioring_data *ld = td->io_ops_data; |
793 | struct io_cq_ring *ring = &ld->cq_ring; | |
740019d2 CSM |
794 | unsigned head = *ring->head; |
795 | unsigned available = atomic_load_acquire(ring->tail) - head; | |
52885fa2 | 796 | |
740019d2 CSM |
797 | if (!available) |
798 | return 0; | |
76ce63dd | 799 | |
740019d2 | 800 | available = min(available, max); |
a9af54b0 CSM |
801 | /* |
802 | * The CQ consumer index is advanced before the CQEs are actually read. | |
803 | * This is generally unsafe, as it lets the kernel reuse the CQE slots. | |
804 | * However, the CQ is sized large enough for the maximum iodepth and a | |
805 | * new SQE won't be submitted until the CQE is processed, so the CQE | |
806 | * slot won't actually be reused until it has been processed. | |
807 | */ | |
808 | atomic_store_relaxed(ring->head, head + available); | |
740019d2 | 809 | return available; |
52885fa2 JA |
810 | } |
811 | ||
bffad86f JA |
812 | static int fio_ioring_getevents(struct thread_data *td, unsigned int min, |
813 | unsigned int max, const struct timespec *t) | |
52885fa2 | 814 | { |
bffad86f | 815 | struct ioring_data *ld = td->io_ops_data; |
52885fa2 | 816 | unsigned actual_min = td->o.iodepth_batch_complete_min == 0 ? 0 : min; |
bffad86f JA |
817 | struct ioring_options *o = td->eo; |
818 | struct io_cq_ring *ring = &ld->cq_ring; | |
b87aa01a JA |
819 | unsigned events = 0; |
820 | int r; | |
52885fa2 | 821 | |
9a2d78b3 | 822 | ld->cq_ring_off = *ring->head; |
97d1c1a9 | 823 | for (;;) { |
ec87e8c1 | 824 | r = fio_ioring_cqring_reap(td, max - events); |
52885fa2 JA |
825 | if (r) { |
826 | events += r; | |
97d1c1a9 CSM |
827 | if (events >= min) |
828 | return events; | |
829 | ||
f7cbbbf8 ST |
830 | if (actual_min != 0) |
831 | actual_min -= r; | |
52885fa2 JA |
832 | } |
833 | ||
3d7d00a3 | 834 | if (!o->sqpoll_thread) { |
9a2d78b3 JA |
835 | r = io_uring_enter(ld, 0, actual_min, |
836 | IORING_ENTER_GETEVENTS); | |
771c9901 | 837 | if (r < 0) { |
f6abd731 | 838 | if (errno == EAGAIN || errno == EINTR) |
771c9901 | 839 | continue; |
1816895b | 840 | r = -errno; |
9a2d78b3 | 841 | td_verror(td, errno, "io_uring_enter"); |
97d1c1a9 | 842 | return r; |
771c9901 | 843 | } |
52885fa2 | 844 | } |
97d1c1a9 | 845 | } |
52885fa2 JA |
846 | } |
847 | ||
3ee8311a AK |
848 | static inline void fio_ioring_cmd_nvme_pi(struct thread_data *td, |
849 | struct io_u *io_u) | |
850 | { | |
851 | struct ioring_data *ld = td->io_ops_data; | |
3ee8311a AK |
852 | struct nvme_uring_cmd *cmd; |
853 | struct io_uring_sqe *sqe; | |
3ee8311a AK |
854 | |
855 | if (io_u->ddir == DDIR_TRIM) | |
856 | return; | |
857 | ||
858 | sqe = &ld->sqes[(io_u->index) << 1]; | |
859 | cmd = (struct nvme_uring_cmd *)sqe->cmd; | |
860 | ||
be06e55f | 861 | fio_nvme_pi_fill(cmd, io_u, &ld->ext_opts); |
3ee8311a AK |
862 | } |
863 | ||
f97d9f38 VF |
864 | static inline void fio_ioring_setup_pi(struct thread_data *td, |
865 | struct io_u *io_u) | |
866 | { | |
867 | struct ioring_data *ld = td->io_ops_data; | |
868 | ||
869 | if (io_u->ddir == DDIR_TRIM) | |
870 | return; | |
871 | ||
872 | fio_nvme_generate_guard(io_u, &ld->ext_opts); | |
873 | } | |
874 | ||
127715b6 NC |
875 | static inline void fio_ioring_cmdprio_prep(struct thread_data *td, |
876 | struct io_u *io_u) | |
b2a432bf | 877 | { |
b2a432bf | 878 | struct ioring_data *ld = td->io_ops_data; |
d6cbeab4 | 879 | struct cmdprio *cmdprio = &ld->cmdprio; |
127715b6 NC |
880 | |
881 | if (fio_cmdprio_set_ioprio(td, cmdprio, io_u)) | |
882 | ld->sqes[io_u->index].ioprio = io_u->ioprio; | |
b2a432bf PC |
883 | } |
884 | ||
bffad86f JA |
885 | static enum fio_q_status fio_ioring_queue(struct thread_data *td, |
886 | struct io_u *io_u) | |
52885fa2 | 887 | { |
bffad86f | 888 | struct ioring_data *ld = td->io_ops_data; |
3ee8311a | 889 | struct ioring_options *o = td->eo; |
bffad86f | 890 | struct io_sq_ring *ring = &ld->sq_ring; |
d88e8c91 | 891 | unsigned tail; |
52885fa2 JA |
892 | |
893 | fio_ro_check(td, io_u); | |
894 | ||
af0ad0fa JA |
895 | /* should not hit... */ |
896 | if (ld->queued == td->o.iodepth) | |
52885fa2 JA |
897 | return FIO_Q_BUSY; |
898 | ||
980fb7f2 JA |
899 | /* if async trim has been tried and failed, punt to sync */ |
900 | if (io_u->ddir == DDIR_TRIM && ld->async_trim_fail) { | |
52885fa2 JA |
901 | if (ld->queued) |
902 | return FIO_Q_BUSY; | |
903 | ||
7f57e30f | 904 | do_io_u_trim(td, io_u); |
16be6037 | 905 | |
52885fa2 JA |
906 | io_u_mark_submit(td, 1); |
907 | io_u_mark_complete(td, 1); | |
908 | return FIO_Q_COMPLETED; | |
909 | } | |
910 | ||
d6cbeab4 | 911 | if (ld->cmdprio.mode != CMDPRIO_MODE_NONE) |
ff00f247 NC |
912 | fio_ioring_cmdprio_prep(td, io_u); |
913 | ||
9897c064 | 914 | if (o->cmd_type == FIO_URING_CMD_NVME && ld->is_uring_cmd_eng) |
3ee8311a | 915 | fio_ioring_cmd_nvme_pi(td, io_u); |
f97d9f38 VF |
916 | else if (o->md_per_io_size) |
917 | fio_ioring_setup_pi(td, io_u); | |
3ee8311a | 918 | |
d88e8c91 | 919 | tail = *ring->tail; |
b87aa01a | 920 | ring->array[tail & ld->sq_ring_mask] = io_u->index; |
d88e8c91 | 921 | atomic_store_release(ring->tail, tail + 1); |
52885fa2 JA |
922 | |
923 | ld->queued++; | |
924 | return FIO_Q_QUEUED; | |
925 | } | |
926 | ||
bffad86f | 927 | static void fio_ioring_queued(struct thread_data *td, int start, int nr) |
52885fa2 | 928 | { |
bffad86f | 929 | struct ioring_data *ld = td->io_ops_data; |
52885fa2 JA |
930 | struct timespec now; |
931 | ||
932 | if (!fio_fill_issue_time(td)) | |
933 | return; | |
934 | ||
935 | fio_gettime(&now, NULL); | |
936 | ||
937 | while (nr--) { | |
bffad86f | 938 | struct io_sq_ring *ring = &ld->sq_ring; |
9a2d78b3 | 939 | int index = ring->array[start & ld->sq_ring_mask]; |
f8289afc | 940 | struct io_u *io_u = ld->io_u_index[index]; |
52885fa2 JA |
941 | |
942 | memcpy(&io_u->issue_time, &now, sizeof(now)); | |
943 | io_u_queued(td, io_u); | |
944 | ||
945 | start++; | |
52885fa2 | 946 | } |
39f56400 VF |
947 | |
948 | /* | |
949 | * only used for iolog | |
950 | */ | |
951 | if (td->o.read_iolog_file) | |
952 | memcpy(&td->last_issue, &now, sizeof(now)); | |
52885fa2 JA |
953 | } |
954 | ||
bffad86f | 955 | static int fio_ioring_commit(struct thread_data *td) |
52885fa2 | 956 | { |
bffad86f JA |
957 | struct ioring_data *ld = td->io_ops_data; |
958 | struct ioring_options *o = td->eo; | |
52885fa2 JA |
959 | int ret; |
960 | ||
961 | if (!ld->queued) | |
962 | return 0; | |
963 | ||
3d7d00a3 JA |
964 | /* |
965 | * Kernel side does submission. just need to check if the ring is | |
966 | * flagged as needing a kick, if so, call io_uring_enter(). This | |
967 | * only happens if we've been idle too long. | |
968 | */ | |
969 | if (o->sqpoll_thread) { | |
bffad86f | 970 | struct io_sq_ring *ring = &ld->sq_ring; |
10bad6b9 | 971 | unsigned start = *ld->sq_ring.tail - ld->queued; |
2dd96cc4 | 972 | unsigned flags; |
4cdbc048 | 973 | |
5c15a911 | 974 | flags = atomic_load_relaxed(ring->flags); |
2dd96cc4 | 975 | if (flags & IORING_SQ_NEED_WAKEUP) |
b532dd6d JA |
976 | io_uring_enter(ld, ld->queued, 0, |
977 | IORING_ENTER_SQ_WAKEUP); | |
c011bf12 AK |
978 | fio_ioring_queued(td, start, ld->queued); |
979 | io_u_mark_submit(td, ld->queued); | |
980 | ||
771c9901 JA |
981 | ld->queued = 0; |
982 | return 0; | |
983 | } | |
984 | ||
52885fa2 | 985 | do { |
9a2d78b3 | 986 | unsigned start = *ld->sq_ring.head; |
52885fa2 JA |
987 | long nr = ld->queued; |
988 | ||
9a2d78b3 | 989 | ret = io_uring_enter(ld, nr, 0, IORING_ENTER_GETEVENTS); |
52885fa2 | 990 | if (ret > 0) { |
bffad86f | 991 | fio_ioring_queued(td, start, ret); |
52885fa2 JA |
992 | io_u_mark_submit(td, ret); |
993 | ||
994 | ld->queued -= ret; | |
995 | ret = 0; | |
a90cd050 JA |
996 | } else if (!ret) { |
997 | io_u_mark_submit(td, ret); | |
52885fa2 | 998 | continue; |
a90cd050 | 999 | } else { |
f6abd731 | 1000 | if (errno == EAGAIN || errno == EINTR) { |
ec87e8c1 | 1001 | ret = fio_ioring_cqring_reap(td, ld->queued); |
a90cd050 JA |
1002 | if (ret) |
1003 | continue; | |
1004 | /* Shouldn't happen */ | |
1005 | usleep(1); | |
1006 | continue; | |
52885fa2 | 1007 | } |
1816895b | 1008 | ret = -errno; |
9a2d78b3 | 1009 | td_verror(td, errno, "io_uring_enter submit"); |
52885fa2 | 1010 | break; |
a90cd050 | 1011 | } |
52885fa2 JA |
1012 | } while (ld->queued); |
1013 | ||
1014 | return ret; | |
1015 | } | |
1016 | ||
bffad86f | 1017 | static void fio_ioring_unmap(struct ioring_data *ld) |
52885fa2 | 1018 | { |
9a2d78b3 | 1019 | int i; |
52885fa2 | 1020 | |
59f94d26 | 1021 | for (i = 0; i < FIO_ARRAY_SIZE(ld->mmap); i++) |
9a2d78b3 JA |
1022 | munmap(ld->mmap[i].ptr, ld->mmap[i].len); |
1023 | close(ld->ring_fd); | |
b87aa01a JA |
1024 | } |
1025 | ||
bffad86f | 1026 | static void fio_ioring_cleanup(struct thread_data *td) |
52885fa2 | 1027 | { |
bffad86f | 1028 | struct ioring_data *ld = td->io_ops_data; |
52885fa2 JA |
1029 | |
1030 | if (ld) { | |
52885fa2 | 1031 | if (!(td->flags & TD_F_CHILD)) |
bffad86f | 1032 | fio_ioring_unmap(ld); |
9a2d78b3 | 1033 | |
d6cbeab4 | 1034 | fio_cmdprio_cleanup(&ld->cmdprio); |
52885fa2 | 1035 | free(ld->io_u_index); |
2d6451c9 | 1036 | free(ld->md_buf); |
f97d9f38 | 1037 | free(ld->pi_attr); |
9a2d78b3 | 1038 | free(ld->iovecs); |
5ffd5626 | 1039 | free(ld->fds); |
4885a6eb | 1040 | free(ld->dsm); |
52885fa2 JA |
1041 | free(ld); |
1042 | } | |
1043 | } | |
1044 | ||
bffad86f | 1045 | static int fio_ioring_mmap(struct ioring_data *ld, struct io_uring_params *p) |
9a2d78b3 | 1046 | { |
bffad86f JA |
1047 | struct io_sq_ring *sring = &ld->sq_ring; |
1048 | struct io_cq_ring *cring = &ld->cq_ring; | |
9a2d78b3 JA |
1049 | void *ptr; |
1050 | ||
e2239016 | 1051 | ld->mmap[0].len = p->sq_off.array + p->sq_entries * sizeof(__u32); |
9a2d78b3 JA |
1052 | ptr = mmap(0, ld->mmap[0].len, PROT_READ | PROT_WRITE, |
1053 | MAP_SHARED | MAP_POPULATE, ld->ring_fd, | |
1054 | IORING_OFF_SQ_RING); | |
1055 | ld->mmap[0].ptr = ptr; | |
1056 | sring->head = ptr + p->sq_off.head; | |
1057 | sring->tail = ptr + p->sq_off.tail; | |
1058 | sring->ring_mask = ptr + p->sq_off.ring_mask; | |
1059 | sring->ring_entries = ptr + p->sq_off.ring_entries; | |
1060 | sring->flags = ptr + p->sq_off.flags; | |
ac122fea | 1061 | sring->array = ptr + p->sq_off.array; |
9a2d78b3 JA |
1062 | ld->sq_ring_mask = *sring->ring_mask; |
1063 | ||
855dc4d4 AG |
1064 | if (p->flags & IORING_SETUP_SQE128) |
1065 | ld->mmap[1].len = 2 * p->sq_entries * sizeof(struct io_uring_sqe); | |
1066 | else | |
1067 | ld->mmap[1].len = p->sq_entries * sizeof(struct io_uring_sqe); | |
f0403f94 | 1068 | ld->sqes = mmap(0, ld->mmap[1].len, PROT_READ | PROT_WRITE, |
9a2d78b3 | 1069 | MAP_SHARED | MAP_POPULATE, ld->ring_fd, |
f0403f94 JA |
1070 | IORING_OFF_SQES); |
1071 | ld->mmap[1].ptr = ld->sqes; | |
9a2d78b3 | 1072 | |
855dc4d4 AG |
1073 | if (p->flags & IORING_SETUP_CQE32) { |
1074 | ld->mmap[2].len = p->cq_off.cqes + | |
1075 | 2 * p->cq_entries * sizeof(struct io_uring_cqe); | |
1076 | } else { | |
1077 | ld->mmap[2].len = p->cq_off.cqes + | |
1078 | p->cq_entries * sizeof(struct io_uring_cqe); | |
1079 | } | |
9a2d78b3 JA |
1080 | ptr = mmap(0, ld->mmap[2].len, PROT_READ | PROT_WRITE, |
1081 | MAP_SHARED | MAP_POPULATE, ld->ring_fd, | |
1082 | IORING_OFF_CQ_RING); | |
1083 | ld->mmap[2].ptr = ptr; | |
1084 | cring->head = ptr + p->cq_off.head; | |
1085 | cring->tail = ptr + p->cq_off.tail; | |
1086 | cring->ring_mask = ptr + p->cq_off.ring_mask; | |
1087 | cring->ring_entries = ptr + p->cq_off.ring_entries; | |
f0403f94 | 1088 | cring->cqes = ptr + p->cq_off.cqes; |
9a2d78b3 JA |
1089 | ld->cq_ring_mask = *cring->ring_mask; |
1090 | return 0; | |
1091 | } | |
1092 | ||
556d8415 JA |
1093 | static void fio_ioring_probe(struct thread_data *td) |
1094 | { | |
1095 | struct ioring_data *ld = td->io_ops_data; | |
1096 | struct ioring_options *o = td->eo; | |
1097 | struct io_uring_probe *p; | |
1098 | int ret; | |
1099 | ||
1100 | /* already set by user, don't touch */ | |
1101 | if (o->nonvectored != -1) | |
1102 | return; | |
1103 | ||
1104 | /* default to off, as that's always safe */ | |
1105 | o->nonvectored = 0; | |
1106 | ||
223decdd | 1107 | p = calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op)); |
556d8415 JA |
1108 | if (!p) |
1109 | return; | |
1110 | ||
556d8415 JA |
1111 | ret = syscall(__NR_io_uring_register, ld->ring_fd, |
1112 | IORING_REGISTER_PROBE, p, 256); | |
1113 | if (ret < 0) | |
1114 | goto out; | |
1115 | ||
1116 | if (IORING_OP_WRITE > p->ops_len) | |
1117 | goto out; | |
1118 | ||
1119 | if ((p->ops[IORING_OP_READ].flags & IO_URING_OP_SUPPORTED) && | |
1120 | (p->ops[IORING_OP_WRITE].flags & IO_URING_OP_SUPPORTED)) | |
1121 | o->nonvectored = 1; | |
1122 | out: | |
1123 | free(p); | |
1124 | } | |
1125 | ||
bffad86f | 1126 | static int fio_ioring_queue_init(struct thread_data *td) |
52885fa2 | 1127 | { |
bffad86f JA |
1128 | struct ioring_data *ld = td->io_ops_data; |
1129 | struct ioring_options *o = td->eo; | |
af0ad0fa | 1130 | int depth = ld->iodepth; |
bffad86f | 1131 | struct io_uring_params p; |
9a2d78b3 JA |
1132 | int ret; |
1133 | ||
1134 | memset(&p, 0, sizeof(p)); | |
52885fa2 JA |
1135 | |
1136 | if (o->hipri) | |
bffad86f | 1137 | p.flags |= IORING_SETUP_IOPOLL; |
3d7d00a3 JA |
1138 | if (o->sqpoll_thread) { |
1139 | p.flags |= IORING_SETUP_SQPOLL; | |
1140 | if (o->sqpoll_set) { | |
1141 | p.flags |= IORING_SETUP_SQ_AFF; | |
1142 | p.sq_thread_cpu = o->sqpoll_cpu; | |
1143 | } | |
c011bf12 AK |
1144 | |
1145 | /* | |
1146 | * Submission latency for sqpoll_thread is just the time it | |
1147 | * takes to fill in the SQ ring entries, and any syscall if | |
1148 | * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time | |
1149 | * separately. | |
1150 | */ | |
1151 | td->o.disable_slat = 1; | |
f635f1fb | 1152 | } |
a90cd050 | 1153 | |
1db268db JA |
1154 | /* |
1155 | * Clamp CQ ring size at our SQ ring size, we don't need more entries | |
1156 | * than that. | |
1157 | */ | |
1158 | p.flags |= IORING_SETUP_CQSIZE; | |
1159 | p.cq_entries = depth; | |
1160 | ||
4d22c103 JA |
1161 | /* |
1162 | * Setup COOP_TASKRUN as we don't need to get IPI interrupted for | |
1163 | * completing IO operations. | |
1164 | */ | |
1165 | p.flags |= IORING_SETUP_COOP_TASKRUN; | |
1166 | ||
e453f369 JA |
1167 | /* |
1168 | * io_uring is always a single issuer, and we can defer task_work | |
1169 | * runs until we reap events. | |
1170 | */ | |
1171 | p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; | |
1172 | ||
b5e99df6 | 1173 | retry: |
bfed648c | 1174 | ret = syscall(__NR_io_uring_setup, depth, &p); |
b5e99df6 | 1175 | if (ret < 0) { |
e453f369 JA |
1176 | if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) { |
1177 | p.flags &= ~IORING_SETUP_DEFER_TASKRUN; | |
1178 | p.flags &= ~IORING_SETUP_SINGLE_ISSUER; | |
1179 | goto retry; | |
1180 | } | |
4d22c103 JA |
1181 | if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) { |
1182 | p.flags &= ~IORING_SETUP_COOP_TASKRUN; | |
1183 | goto retry; | |
1184 | } | |
b5e99df6 JA |
1185 | if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) { |
1186 | p.flags &= ~IORING_SETUP_CQSIZE; | |
1187 | goto retry; | |
1188 | } | |
9a2d78b3 | 1189 | return ret; |
b5e99df6 | 1190 | } |
9a2d78b3 JA |
1191 | |
1192 | ld->ring_fd = ret; | |
2ea53ca3 | 1193 | |
556d8415 JA |
1194 | fio_ioring_probe(td); |
1195 | ||
2ea53ca3 | 1196 | if (o->fixedbufs) { |
bfed648c | 1197 | ret = syscall(__NR_io_uring_register, ld->ring_fd, |
919850d2 | 1198 | IORING_REGISTER_BUFFERS, ld->iovecs, depth); |
2ea53ca3 JA |
1199 | if (ret < 0) |
1200 | return ret; | |
1201 | } | |
1202 | ||
bffad86f | 1203 | return fio_ioring_mmap(ld, &p); |
52885fa2 JA |
1204 | } |
1205 | ||
855dc4d4 AG |
1206 | static int fio_ioring_cmd_queue_init(struct thread_data *td) |
1207 | { | |
1208 | struct ioring_data *ld = td->io_ops_data; | |
1209 | struct ioring_options *o = td->eo; | |
af0ad0fa | 1210 | int depth = ld->iodepth; |
855dc4d4 AG |
1211 | struct io_uring_params p; |
1212 | int ret; | |
1213 | ||
1214 | memset(&p, 0, sizeof(p)); | |
1215 | ||
1216 | if (o->hipri) | |
1217 | p.flags |= IORING_SETUP_IOPOLL; | |
1218 | if (o->sqpoll_thread) { | |
1219 | p.flags |= IORING_SETUP_SQPOLL; | |
1220 | if (o->sqpoll_set) { | |
1221 | p.flags |= IORING_SETUP_SQ_AFF; | |
1222 | p.sq_thread_cpu = o->sqpoll_cpu; | |
1223 | } | |
c011bf12 AK |
1224 | |
1225 | /* | |
1226 | * Submission latency for sqpoll_thread is just the time it | |
1227 | * takes to fill in the SQ ring entries, and any syscall if | |
1228 | * IORING_SQ_NEED_WAKEUP is set, we don't need to log that time | |
1229 | * separately. | |
1230 | */ | |
1231 | td->o.disable_slat = 1; | |
855dc4d4 AG |
1232 | } |
1233 | if (o->cmd_type == FIO_URING_CMD_NVME) { | |
1234 | p.flags |= IORING_SETUP_SQE128; | |
1235 | p.flags |= IORING_SETUP_CQE32; | |
1236 | } | |
1237 | ||
1238 | /* | |
1239 | * Clamp CQ ring size at our SQ ring size, we don't need more entries | |
1240 | * than that. | |
1241 | */ | |
1242 | p.flags |= IORING_SETUP_CQSIZE; | |
1243 | p.cq_entries = depth; | |
1244 | ||
07f78c37 AK |
1245 | /* |
1246 | * Setup COOP_TASKRUN as we don't need to get IPI interrupted for | |
1247 | * completing IO operations. | |
1248 | */ | |
1249 | p.flags |= IORING_SETUP_COOP_TASKRUN; | |
1250 | ||
1251 | /* | |
1252 | * io_uring is always a single issuer, and we can defer task_work | |
1253 | * runs until we reap events. | |
1254 | */ | |
1255 | p.flags |= IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN; | |
1256 | ||
855dc4d4 AG |
1257 | retry: |
1258 | ret = syscall(__NR_io_uring_setup, depth, &p); | |
1259 | if (ret < 0) { | |
07f78c37 AK |
1260 | if (errno == EINVAL && p.flags & IORING_SETUP_DEFER_TASKRUN) { |
1261 | p.flags &= ~IORING_SETUP_DEFER_TASKRUN; | |
1262 | p.flags &= ~IORING_SETUP_SINGLE_ISSUER; | |
1263 | goto retry; | |
1264 | } | |
1265 | if (errno == EINVAL && p.flags & IORING_SETUP_COOP_TASKRUN) { | |
1266 | p.flags &= ~IORING_SETUP_COOP_TASKRUN; | |
1267 | goto retry; | |
1268 | } | |
855dc4d4 AG |
1269 | if (errno == EINVAL && p.flags & IORING_SETUP_CQSIZE) { |
1270 | p.flags &= ~IORING_SETUP_CQSIZE; | |
1271 | goto retry; | |
1272 | } | |
1273 | return ret; | |
1274 | } | |
1275 | ||
1276 | ld->ring_fd = ret; | |
1277 | ||
1278 | fio_ioring_probe(td); | |
1279 | ||
1280 | if (o->fixedbufs) { | |
1281 | ret = syscall(__NR_io_uring_register, ld->ring_fd, | |
1282 | IORING_REGISTER_BUFFERS, ld->iovecs, depth); | |
1283 | if (ret < 0) | |
1284 | return ret; | |
1285 | } | |
1286 | ||
1287 | return fio_ioring_mmap(ld, &p); | |
1288 | } | |
1289 | ||
5ffd5626 JA |
1290 | static int fio_ioring_register_files(struct thread_data *td) |
1291 | { | |
1292 | struct ioring_data *ld = td->io_ops_data; | |
1293 | struct fio_file *f; | |
1294 | unsigned int i; | |
1295 | int ret; | |
1296 | ||
1297 | ld->fds = calloc(td->o.nr_files, sizeof(int)); | |
1298 | ||
1299 | for_each_file(td, f, i) { | |
1300 | ret = generic_open_file(td, f); | |
1301 | if (ret) | |
1302 | goto err; | |
1303 | ld->fds[i] = f->fd; | |
1304 | f->engine_pos = i; | |
1305 | } | |
1306 | ||
bfed648c | 1307 | ret = syscall(__NR_io_uring_register, ld->ring_fd, |
5ffd5626 JA |
1308 | IORING_REGISTER_FILES, ld->fds, td->o.nr_files); |
1309 | if (ret) { | |
1310 | err: | |
1311 | free(ld->fds); | |
1312 | ld->fds = NULL; | |
1313 | } | |
1314 | ||
1315 | /* | |
1316 | * Pretend the file is closed again, and really close it if we hit | |
1317 | * an error. | |
1318 | */ | |
1319 | for_each_file(td, f, i) { | |
1320 | if (ret) { | |
1321 | int fio_unused ret2; | |
1322 | ret2 = generic_close_file(td, f); | |
1323 | } else | |
1324 | f->fd = -1; | |
1325 | } | |
1326 | ||
1327 | return ret; | |
1328 | } | |
1329 | ||
bffad86f | 1330 | static int fio_ioring_post_init(struct thread_data *td) |
52885fa2 | 1331 | { |
bffad86f | 1332 | struct ioring_data *ld = td->io_ops_data; |
5ffd5626 | 1333 | struct ioring_options *o = td->eo; |
52885fa2 | 1334 | struct io_u *io_u; |
650346e1 | 1335 | int err, i; |
52885fa2 | 1336 | |
650346e1 JA |
1337 | for (i = 0; i < td->o.iodepth; i++) { |
1338 | struct iovec *iov = &ld->iovecs[i]; | |
9a2d78b3 | 1339 | |
650346e1 JA |
1340 | io_u = ld->io_u_index[i]; |
1341 | iov->iov_base = io_u->buf; | |
1342 | iov->iov_len = td_max_bs(td); | |
52885fa2 JA |
1343 | } |
1344 | ||
bffad86f | 1345 | err = fio_ioring_queue_init(td); |
52885fa2 | 1346 | if (err) { |
0442b53f | 1347 | int init_err = errno; |
c4f5c92f | 1348 | |
0442b53f | 1349 | if (init_err == ENOSYS) |
c4f5c92f | 1350 | log_err("fio: your kernel doesn't support io_uring\n"); |
0442b53f | 1351 | td_verror(td, init_err, "io_queue_init"); |
52885fa2 JA |
1352 | return 1; |
1353 | } | |
1354 | ||
af0ad0fa | 1355 | for (i = 0; i < ld->iodepth; i++) { |
7c70f506 JA |
1356 | struct io_uring_sqe *sqe; |
1357 | ||
1358 | sqe = &ld->sqes[i]; | |
1359 | memset(sqe, 0, sizeof(*sqe)); | |
1360 | } | |
1361 | ||
5ffd5626 JA |
1362 | if (o->registerfiles) { |
1363 | err = fio_ioring_register_files(td); | |
1364 | if (err) { | |
1365 | td_verror(td, errno, "ioring_register_files"); | |
1366 | return 1; | |
1367 | } | |
1368 | } | |
1369 | ||
52885fa2 JA |
1370 | return 0; |
1371 | } | |
1372 | ||
855dc4d4 AG |
1373 | static int fio_ioring_cmd_post_init(struct thread_data *td) |
1374 | { | |
1375 | struct ioring_data *ld = td->io_ops_data; | |
1376 | struct ioring_options *o = td->eo; | |
1377 | struct io_u *io_u; | |
1378 | int err, i; | |
1379 | ||
1380 | for (i = 0; i < td->o.iodepth; i++) { | |
1381 | struct iovec *iov = &ld->iovecs[i]; | |
1382 | ||
1383 | io_u = ld->io_u_index[i]; | |
1384 | iov->iov_base = io_u->buf; | |
1385 | iov->iov_len = td_max_bs(td); | |
1386 | } | |
1387 | ||
1388 | err = fio_ioring_cmd_queue_init(td); | |
1389 | if (err) { | |
1390 | int init_err = errno; | |
1391 | ||
1392 | td_verror(td, init_err, "io_queue_init"); | |
1393 | return 1; | |
1394 | } | |
1395 | ||
af0ad0fa | 1396 | for (i = 0; i < ld->iodepth; i++) { |
855dc4d4 AG |
1397 | struct io_uring_sqe *sqe; |
1398 | ||
1399 | if (o->cmd_type == FIO_URING_CMD_NVME) { | |
1400 | sqe = &ld->sqes[i << 1]; | |
1401 | memset(sqe, 0, 2 * sizeof(*sqe)); | |
1402 | } else { | |
1403 | sqe = &ld->sqes[i]; | |
1404 | memset(sqe, 0, sizeof(*sqe)); | |
1405 | } | |
1406 | } | |
1407 | ||
1408 | if (o->registerfiles) { | |
1409 | err = fio_ioring_register_files(td); | |
1410 | if (err) { | |
1411 | td_verror(td, errno, "ioring_register_files"); | |
1412 | return 1; | |
1413 | } | |
1414 | } | |
1415 | ||
1416 | return 0; | |
1417 | } | |
1418 | ||
3ee8311a AK |
1419 | static void parse_prchk_flags(struct ioring_options *o) |
1420 | { | |
1421 | if (!o->pi_chk) | |
1422 | return; | |
1423 | ||
1424 | if (strstr(o->pi_chk, "GUARD") != NULL) | |
1425 | o->prchk = NVME_IO_PRINFO_PRCHK_GUARD; | |
1426 | if (strstr(o->pi_chk, "REFTAG") != NULL) | |
1427 | o->prchk |= NVME_IO_PRINFO_PRCHK_REF; | |
1428 | if (strstr(o->pi_chk, "APPTAG") != NULL) | |
1429 | o->prchk |= NVME_IO_PRINFO_PRCHK_APP; | |
1430 | } | |
1431 | ||
bc428fd8 JA |
1432 | static int fio_ioring_cmd_init(struct thread_data *td, struct ioring_data *ld) |
1433 | { | |
1434 | struct ioring_options *o = td->eo; | |
1435 | ||
1436 | if (td_write(td)) { | |
1437 | switch (o->write_mode) { | |
1438 | case FIO_URING_CMD_WMODE_UNCOR: | |
1439 | ld->write_opcode = nvme_cmd_write_uncor; | |
1440 | break; | |
1441 | case FIO_URING_CMD_WMODE_ZEROES: | |
1442 | ld->write_opcode = nvme_cmd_write_zeroes; | |
1443 | if (o->deac) | |
1444 | ld->cdw12_flags[DDIR_WRITE] = 1 << 25; | |
1445 | break; | |
1446 | case FIO_URING_CMD_WMODE_VERIFY: | |
1447 | ld->write_opcode = nvme_cmd_verify; | |
1448 | break; | |
1449 | default: | |
1450 | ld->write_opcode = nvme_cmd_write; | |
1451 | break; | |
1452 | } | |
1453 | } | |
1454 | ||
1455 | if (o->readfua) | |
1456 | ld->cdw12_flags[DDIR_READ] = 1 << 30; | |
1457 | if (o->writefua) | |
1458 | ld->cdw12_flags[DDIR_WRITE] = 1 << 30; | |
1459 | ||
1460 | return 0; | |
1461 | } | |
1462 | ||
bffad86f | 1463 | static int fio_ioring_init(struct thread_data *td) |
52885fa2 | 1464 | { |
5ffd5626 | 1465 | struct ioring_options *o = td->eo; |
bffad86f | 1466 | struct ioring_data *ld; |
5d4ee0de AK |
1467 | struct nvme_dsm *dsm; |
1468 | void *ptr; | |
1469 | unsigned int dsm_size; | |
2d6451c9 | 1470 | unsigned long long md_size; |
5d4ee0de | 1471 | int ret, i; |
be06e55f | 1472 | struct nvme_cmd_ext_io_opts *ext_opts; |
52885fa2 | 1473 | |
5ffd5626 JA |
1474 | /* sqthread submission requires registered files */ |
1475 | if (o->sqpoll_thread) | |
1476 | o->registerfiles = 1; | |
1477 | ||
1478 | if (o->registerfiles && td->o.nr_files != td->o.open_files) { | |
1479 | log_err("fio: io_uring registered files require nr_files to " | |
1480 | "be identical to open_files\n"); | |
1481 | return 1; | |
1482 | } | |
1483 | ||
52885fa2 JA |
1484 | ld = calloc(1, sizeof(*ld)); |
1485 | ||
9897c064 VF |
1486 | ld->is_uring_cmd_eng = (td->io_ops->prep == fio_ioring_cmd_prep); |
1487 | ||
af0ad0fa JA |
1488 | /* |
1489 | * The internal io_uring queue depth must be a power-of-2, as that's | |
1490 | * how the ring interface works. So round that up, in case the user | |
1491 | * set iodepth isn't a power-of-2. Leave the fio depth the same, as | |
1492 | * not to be driving too much of an iodepth, if we did round up. | |
1493 | */ | |
1494 | ld->iodepth = roundup_pow2(td->o.iodepth); | |
b87aa01a | 1495 | |
52885fa2 JA |
1496 | /* io_u index */ |
1497 | ld->io_u_index = calloc(td->o.iodepth, sizeof(struct io_u *)); | |
2d6451c9 | 1498 | |
f97d9f38 VF |
1499 | if (!ld->is_uring_cmd_eng && o->md_per_io_size) { |
1500 | if (o->apptag_mask != 0xffff) { | |
1501 | log_err("fio: io_uring with metadata requires an apptag_mask of 0xffff\n"); | |
bdcb359c | 1502 | free(ld->io_u_index); |
f97d9f38 VF |
1503 | free(ld); |
1504 | return 1; | |
1505 | } | |
1506 | } | |
1507 | ||
2d6451c9 | 1508 | /* |
f97d9f38 | 1509 | * metadata buffer |
2d6451c9 AK |
1510 | * We are only supporting iomem=malloc / mem=malloc as of now. |
1511 | */ | |
f97d9f38 VF |
1512 | if (o->md_per_io_size && (!ld->is_uring_cmd_eng || |
1513 | (ld->is_uring_cmd_eng && o->cmd_type == FIO_URING_CMD_NVME))) { | |
2d6451c9 AK |
1514 | md_size = (unsigned long long) o->md_per_io_size |
1515 | * (unsigned long long) td->o.iodepth; | |
1516 | md_size += page_mask + td->o.mem_align; | |
1517 | if (td->o.mem_align && td->o.mem_align > page_size) | |
1518 | md_size += td->o.mem_align - page_size; | |
d78f2f3f VF |
1519 | ld->md_buf = malloc(md_size); |
1520 | if (!ld->md_buf) { | |
bdcb359c | 1521 | free(ld->io_u_index); |
6795954b | 1522 | free(ld); |
2d6451c9 AK |
1523 | return 1; |
1524 | } | |
f97d9f38 VF |
1525 | |
1526 | if (!ld->is_uring_cmd_eng) { | |
1527 | ld->pi_attr = calloc(ld->iodepth, sizeof(struct io_uring_attr_pi)); | |
1528 | if (!ld->pi_attr) { | |
bdcb359c | 1529 | free(ld->io_u_index); |
f97d9f38 VF |
1530 | free(ld->md_buf); |
1531 | free(ld); | |
1532 | return 1; | |
1533 | } | |
1534 | } | |
1535 | ||
2d6451c9 | 1536 | } |
3ee8311a | 1537 | parse_prchk_flags(o); |
be06e55f VF |
1538 | ext_opts = &ld->ext_opts; |
1539 | if (o->pi_act) | |
1540 | ext_opts->io_flags |= NVME_IO_PRINFO_PRACT; | |
1541 | ext_opts->io_flags |= o->prchk; | |
1542 | ext_opts->apptag = o->apptag; | |
1543 | ext_opts->apptag_mask = o->apptag_mask; | |
2d6451c9 | 1544 | |
af0ad0fa | 1545 | ld->iovecs = calloc(ld->iodepth, sizeof(struct iovec)); |
52885fa2 JA |
1546 | |
1547 | td->io_ops_data = ld; | |
b2a432bf | 1548 | |
d6cbeab4 | 1549 | ret = fio_cmdprio_init(td, &ld->cmdprio, &o->cmdprio_options); |
e9f6567a DLM |
1550 | if (ret) { |
1551 | td_verror(td, EINVAL, "fio_ioring_init"); | |
b2a432bf PC |
1552 | return 1; |
1553 | } | |
1af44196 | 1554 | |
4885a6eb VF |
1555 | /* |
1556 | * For io_uring_cmd, trims are async operations unless we are operating | |
1557 | * in zbd mode where trim means zone reset. | |
1558 | */ | |
bc428fd8 | 1559 | if (td_trim(td) && td->o.zone_mode == ZONE_MODE_ZBD && |
9897c064 | 1560 | ld->is_uring_cmd_eng) { |
4885a6eb | 1561 | td->io_ops->flags |= FIO_ASYNCIO_SYNC_TRIM; |
5d4ee0de | 1562 | } else { |
0e987a24 JA |
1563 | dsm_size = sizeof(*ld->dsm); |
1564 | dsm_size += td->o.num_range * sizeof(struct nvme_dsm_range); | |
5d4ee0de AK |
1565 | ld->dsm = calloc(td->o.iodepth, dsm_size); |
1566 | ptr = ld->dsm; | |
1567 | for (i = 0; i < td->o.iodepth; i++) { | |
1568 | dsm = (struct nvme_dsm *)ptr; | |
1569 | dsm->nr_ranges = td->o.num_range; | |
1570 | ptr += dsm_size; | |
1571 | } | |
1572 | } | |
4885a6eb | 1573 | |
9897c064 | 1574 | if (ld->is_uring_cmd_eng) |
bc428fd8 JA |
1575 | return fio_ioring_cmd_init(td, ld); |
1576 | return 0; | |
1577 | } | |
87a4903f | 1578 | |
bc428fd8 | 1579 | static int fio_ioring_io_u_init(struct thread_data *td, struct io_u *io_u) |
52885fa2 | 1580 | { |
bffad86f | 1581 | struct ioring_data *ld = td->io_ops_data; |
2d6451c9 | 1582 | struct ioring_options *o = td->eo; |
5163f35e | 1583 | struct nvme_pi_data *pi_data; |
f97d9f38 | 1584 | char *p, *q; |
52885fa2 | 1585 | |
f97d9f38 | 1586 | ld->io_u_index[io_u->index] = io_u; |
2d6451c9 | 1587 | |
bc428fd8 JA |
1588 | p = PTR_ALIGN(ld->md_buf, page_mask) + td->o.mem_align; |
1589 | p += o->md_per_io_size * io_u->index; | |
1590 | io_u->mmap_data = p; | |
1591 | ||
f97d9f38 VF |
1592 | if (ld->pi_attr) { |
1593 | struct io_uring_attr_pi *pi_attr; | |
1594 | ||
1595 | q = ld->pi_attr; | |
1596 | q += (sizeof(struct io_uring_attr_pi) * io_u->index); | |
1597 | io_u->pi_attr = q; | |
1598 | ||
1599 | pi_attr = io_u->pi_attr; | |
1600 | pi_attr->len = o->md_per_io_size; | |
1601 | pi_attr->app_tag = o->apptag; | |
1602 | pi_attr->flags = 0; | |
1603 | if (strstr(o->pi_chk, "GUARD") != NULL) | |
1604 | pi_attr->flags |= IO_INTEGRITY_CHK_GUARD; | |
1605 | if (strstr(o->pi_chk, "REFTAG") != NULL) | |
1606 | pi_attr->flags |= IO_INTEGRITY_CHK_REFTAG; | |
1607 | if (strstr(o->pi_chk, "APPTAG") != NULL) | |
1608 | pi_attr->flags |= IO_INTEGRITY_CHK_APPTAG; | |
1609 | } | |
1610 | ||
bc428fd8 JA |
1611 | if (!o->pi_act) { |
1612 | pi_data = calloc(1, sizeof(*pi_data)); | |
1613 | pi_data->io_flags |= o->prchk; | |
1614 | pi_data->apptag_mask = o->apptag_mask; | |
1615 | pi_data->apptag = o->apptag; | |
1616 | io_u->engine_data = pi_data; | |
2d6451c9 AK |
1617 | } |
1618 | ||
52885fa2 JA |
1619 | return 0; |
1620 | } | |
1621 | ||
5163f35e AK |
1622 | static void fio_ioring_io_u_free(struct thread_data *td, struct io_u *io_u) |
1623 | { | |
f928d3ea | 1624 | struct nvme_pi *pi = io_u->engine_data; |
bc428fd8 | 1625 | |
f928d3ea VF |
1626 | free(pi); |
1627 | io_u->engine_data = NULL; | |
5163f35e AK |
1628 | } |
1629 | ||
f97d9f38 VF |
1630 | static int fio_get_pi_info(struct fio_file *f, struct nvme_data *data) |
1631 | { | |
1632 | struct logical_block_metadata_cap md_cap; | |
1633 | int ret; | |
1634 | int fd, err = 0; | |
1635 | ||
1636 | fd = open(f->file_name, O_RDONLY); | |
1637 | if (fd < 0) | |
1638 | return -errno; | |
1639 | ||
1640 | ret = ioctl(fd, FS_IOC_GETLBMD_CAP, &md_cap); | |
1641 | if (ret < 0) { | |
1642 | err = -errno; | |
1643 | log_err("%s: failed to query protection information capabilities; error %d\n", f->file_name, errno); | |
1644 | goto out; | |
1645 | } | |
1646 | ||
1647 | if (!(md_cap.lbmd_flags & LBMD_PI_CAP_INTEGRITY)) { | |
1648 | log_err("%s: Protection information not supported\n", f->file_name); | |
1649 | err = -ENOTSUP; | |
1650 | goto out; | |
1651 | } | |
1652 | ||
1653 | /* Currently we don't support storage tags */ | |
1654 | if (md_cap.lbmd_storage_tag_size) { | |
1655 | log_err("%s: Storage tag not supported\n", f->file_name); | |
1656 | err = -ENOTSUP; | |
1657 | goto out; | |
1658 | } | |
1659 | ||
1660 | data->lba_size = md_cap.lbmd_interval; | |
1661 | data->lba_shift = ilog2(data->lba_size); | |
1662 | data->ms = md_cap.lbmd_size; | |
1663 | data->pi_size = md_cap.lbmd_pi_size; | |
1664 | data->pi_loc = !(md_cap.lbmd_pi_offset); | |
1665 | ||
1666 | /* Assume Type 1 PI if reference tags supported */ | |
1667 | if (md_cap.lbmd_flags & LBMD_PI_CAP_REFTAG) | |
1668 | data->pi_type = NVME_NS_DPS_PI_TYPE1; | |
1669 | else | |
1670 | data->pi_type = NVME_NS_DPS_PI_TYPE3; | |
1671 | ||
1672 | switch (md_cap.lbmd_guard_tag_type) { | |
1673 | case LBMD_PI_CSUM_CRC16_T10DIF: | |
1674 | data->guard_type = NVME_NVM_NS_16B_GUARD; | |
1675 | break; | |
1676 | case LBMD_PI_CSUM_CRC64_NVME: | |
1677 | data->guard_type = NVME_NVM_NS_64B_GUARD; | |
1678 | break; | |
1679 | default: | |
1680 | log_err("%s: unsupported checksum type %d\n", f->file_name, | |
1681 | md_cap.lbmd_guard_tag_type); | |
1682 | err = -ENOTSUP; | |
1683 | goto out; | |
1684 | } | |
1685 | ||
1686 | out: | |
1687 | close(fd); | |
1688 | return err; | |
1689 | } | |
1690 | ||
1691 | static inline int fio_ioring_open_file_md(struct thread_data *td, struct fio_file *f) | |
1692 | { | |
1693 | int ret = 0; | |
1694 | struct nvme_data *data = NULL; | |
1695 | ||
1696 | data = FILE_ENG_DATA(f); | |
1697 | if (data == NULL) { | |
1698 | data = calloc(1, sizeof(struct nvme_data)); | |
1699 | ret = fio_get_pi_info(f, data); | |
1700 | if (ret) { | |
1701 | free(data); | |
1702 | return ret; | |
1703 | } | |
1704 | ||
1705 | FILE_SET_ENG_DATA(f, data); | |
1706 | } | |
1707 | ||
1708 | return ret; | |
1709 | } | |
1710 | ||
5ffd5626 JA |
1711 | static int fio_ioring_open_file(struct thread_data *td, struct fio_file *f) |
1712 | { | |
1713 | struct ioring_data *ld = td->io_ops_data; | |
1714 | struct ioring_options *o = td->eo; | |
1715 | ||
f97d9f38 VF |
1716 | if (o->md_per_io_size) { |
1717 | /* | |
1718 | * This will be a no-op when called by the io_uring_cmd | |
1719 | * ioengine because engine data has already been collected by | |
1720 | * the time this call is made | |
1721 | */ | |
1722 | int ret = fio_ioring_open_file_md(td, f); | |
1723 | if (ret) | |
1724 | return ret; | |
1725 | } | |
1726 | ||
17318cf6 | 1727 | if (!ld || !o->registerfiles) |
5ffd5626 JA |
1728 | return generic_open_file(td, f); |
1729 | ||
1730 | f->fd = ld->fds[f->engine_pos]; | |
1731 | return 0; | |
1732 | } | |
1733 | ||
0d9ed42a JA |
1734 | static int verify_params(struct thread_data *td, struct nvme_data *data, |
1735 | struct fio_file *f, enum fio_ddir ddir) | |
855dc4d4 | 1736 | { |
855dc4d4 | 1737 | struct ioring_options *o = td->eo; |
0d9ed42a JA |
1738 | unsigned int lba_size; |
1739 | ||
1740 | lba_size = data->lba_ext ? data->lba_ext : data->lba_size; | |
1741 | if (td->o.min_bs[ddir] % lba_size || td->o.max_bs[ddir] % lba_size) { | |
1742 | if (data->lba_ext) { | |
1743 | log_err("%s: block size must be a multiple of %u " | |
1744 | "(LBA data size + Metadata size)\n", f->file_name, lba_size); | |
1745 | if (td->o.min_bs[ddir] == td->o.max_bs[ddir] && | |
1746 | !(td->o.min_bs[ddir] % data->lba_size)) { | |
1747 | /* fixed block size is actually a multiple of LBA data size */ | |
1748 | unsigned long long suggestion = lba_size * | |
1749 | (td->o.min_bs[ddir] / data->lba_size); | |
1750 | log_err("Did you mean to use a block size of %llu?\n", suggestion); | |
1751 | } | |
1752 | } else { | |
1753 | log_err("%s: block size must be a multiple of LBA data size\n", | |
1754 | f->file_name); | |
1755 | } | |
1756 | td_verror(td, EINVAL, "fio_ioring_cmd_open_file"); | |
1757 | return 1; | |
1758 | } | |
1759 | if (data->ms && !data->lba_ext && ddir != DDIR_TRIM && | |
1760 | (o->md_per_io_size < ((td->o.max_bs[ddir] / data->lba_size) * data->ms))) { | |
1761 | log_err("%s: md_per_io_size should be at least %llu bytes\n", | |
1762 | f->file_name, | |
1763 | ((td->o.max_bs[ddir] / data->lba_size) * data->ms)); | |
1764 | td_verror(td, EINVAL, "fio_ioring_cmd_open_file"); | |
1765 | return 1; | |
1766 | } | |
855dc4d4 | 1767 | |
0d9ed42a JA |
1768 | return 0; |
1769 | } | |
855dc4d4 | 1770 | |
0d9ed42a JA |
1771 | static int fio_ioring_open_nvme(struct thread_data *td, struct fio_file *f) |
1772 | { | |
1773 | struct ioring_options *o = td->eo; | |
1774 | struct nvme_data *data = NULL; | |
1775 | __u64 nlba = 0; | |
1776 | int ret; | |
855dc4d4 | 1777 | |
0d9ed42a JA |
1778 | /* Store the namespace-id and lba size. */ |
1779 | data = FILE_ENG_DATA(f); | |
1780 | if (data == NULL) { | |
1781 | data = calloc(1, sizeof(struct nvme_data)); | |
1782 | ret = fio_nvme_get_info(f, &nlba, o->pi_act, data); | |
1783 | if (ret) { | |
1784 | free(data); | |
1785 | return ret; | |
855dc4d4 | 1786 | } |
345fa8fd | 1787 | |
0d9ed42a JA |
1788 | FILE_SET_ENG_DATA(f, data); |
1789 | } | |
ba342e58 | 1790 | |
0d9ed42a JA |
1791 | for_each_rw_ddir(ddir) { |
1792 | ret = verify_params(td, data, f, ddir); | |
1793 | if (ret) | |
1794 | return ret; | |
1795 | } | |
87a4903f | 1796 | |
0d9ed42a JA |
1797 | /* |
1798 | * For extended logical block sizes we cannot use verify when | |
1799 | * end to end data protection checks are enabled, as the PI | |
1800 | * section of data buffer conflicts with verify. | |
1801 | */ | |
1802 | if (data->ms && data->pi_type && data->lba_ext && | |
1803 | td->o.verify != VERIFY_NONE) { | |
1804 | log_err("%s: for extended LBA, verify cannot be used when E2E " | |
1805 | "data protection is enabled\n", f->file_name); | |
1806 | td_verror(td, EINVAL, "fio_ioring_cmd_open_file"); | |
1807 | return 1; | |
1808 | } | |
1809 | ||
1810 | if (o->write_mode != FIO_URING_CMD_WMODE_WRITE && !td_write(td)) { | |
1811 | log_err("%s: 'readwrite=|rw=' has no write\n", f->file_name); | |
1812 | td_verror(td, EINVAL, "fio_ioring_cmd_open_file"); | |
1813 | return 1; | |
1814 | } | |
1815 | ||
1816 | return 0; | |
1817 | } | |
1818 | ||
1819 | static int fio_ioring_cmd_open_file(struct thread_data *td, struct fio_file *f) | |
1820 | { | |
0d9ed42a JA |
1821 | struct ioring_options *o = td->eo; |
1822 | ||
1823 | if (o->cmd_type == FIO_URING_CMD_NVME) { | |
1824 | int ret; | |
1825 | ||
1826 | ret = fio_ioring_open_nvme(td, f); | |
1827 | if (ret) | |
1828 | return ret; | |
855dc4d4 | 1829 | } |
855dc4d4 | 1830 | |
306d8986 | 1831 | return fio_ioring_open_file(td, f); |
855dc4d4 AG |
1832 | } |
1833 | ||
5ffd5626 JA |
1834 | static int fio_ioring_close_file(struct thread_data *td, struct fio_file *f) |
1835 | { | |
17318cf6 | 1836 | struct ioring_data *ld = td->io_ops_data; |
5ffd5626 JA |
1837 | struct ioring_options *o = td->eo; |
1838 | ||
17318cf6 | 1839 | if (!ld || !o->registerfiles) |
5ffd5626 JA |
1840 | return generic_close_file(td, f); |
1841 | ||
1842 | f->fd = -1; | |
1843 | return 0; | |
1844 | } | |
1845 | ||
855dc4d4 AG |
1846 | static int fio_ioring_cmd_close_file(struct thread_data *td, |
1847 | struct fio_file *f) | |
1848 | { | |
855dc4d4 AG |
1849 | struct ioring_options *o = td->eo; |
1850 | ||
1851 | if (o->cmd_type == FIO_URING_CMD_NVME) { | |
1852 | struct nvme_data *data = FILE_ENG_DATA(f); | |
1853 | ||
1854 | FILE_SET_ENG_DATA(f, NULL); | |
1855 | free(data); | |
1856 | } | |
855dc4d4 | 1857 | |
306d8986 | 1858 | return fio_ioring_close_file(td, f); |
855dc4d4 AG |
1859 | } |
1860 | ||
1861 | static int fio_ioring_cmd_get_file_size(struct thread_data *td, | |
1862 | struct fio_file *f) | |
1863 | { | |
1864 | struct ioring_options *o = td->eo; | |
1865 | ||
1866 | if (fio_file_size_known(f)) | |
1867 | return 0; | |
1868 | ||
1869 | if (o->cmd_type == FIO_URING_CMD_NVME) { | |
1870 | struct nvme_data *data = NULL; | |
671aa9f5 | 1871 | __u64 nlba = 0; |
855dc4d4 AG |
1872 | int ret; |
1873 | ||
855dc4d4 | 1874 | data = calloc(1, sizeof(struct nvme_data)); |
3ee8311a | 1875 | ret = fio_nvme_get_info(f, &nlba, o->pi_act, data); |
e7e5023b AK |
1876 | if (ret) { |
1877 | free(data); | |
1878 | return ret; | |
1879 | } | |
855dc4d4 | 1880 | |
acd2dd42 | 1881 | if (data->lba_ext) |
1882 | f->real_file_size = data->lba_ext * nlba; | |
1883 | else | |
1884 | f->real_file_size = data->lba_size * nlba; | |
855dc4d4 AG |
1885 | fio_file_set_size_known(f); |
1886 | ||
1887 | FILE_SET_ENG_DATA(f, data); | |
1888 | return 0; | |
1889 | } | |
1890 | return generic_get_file_size(td, f); | |
1891 | } | |
1892 | ||
3d05e0ff AK |
1893 | static int fio_ioring_cmd_get_zoned_model(struct thread_data *td, |
1894 | struct fio_file *f, | |
1895 | enum zbd_zoned_model *model) | |
1896 | { | |
1897 | return fio_nvme_get_zoned_model(td, f, model); | |
1898 | } | |
1899 | ||
1900 | static int fio_ioring_cmd_report_zones(struct thread_data *td, | |
1901 | struct fio_file *f, uint64_t offset, | |
1902 | struct zbd_zone *zbdz, | |
1903 | unsigned int nr_zones) | |
1904 | { | |
1905 | return fio_nvme_report_zones(td, f, offset, zbdz, nr_zones); | |
1906 | } | |
1907 | ||
1908 | static int fio_ioring_cmd_reset_wp(struct thread_data *td, struct fio_file *f, | |
1909 | uint64_t offset, uint64_t length) | |
1910 | { | |
1911 | return fio_nvme_reset_wp(td, f, offset, length); | |
1912 | } | |
1913 | ||
1914 | static int fio_ioring_cmd_get_max_open_zones(struct thread_data *td, | |
1915 | struct fio_file *f, | |
1916 | unsigned int *max_open_zones) | |
1917 | { | |
1918 | return fio_nvme_get_max_open_zones(td, f, max_open_zones); | |
1919 | } | |
1920 | ||
a7e8aae0 KB |
1921 | static int fio_ioring_cmd_fetch_ruhs(struct thread_data *td, struct fio_file *f, |
1922 | struct fio_ruhs_info *fruhs_info) | |
1923 | { | |
1924 | struct nvme_fdp_ruh_status *ruhs; | |
70ae781d | 1925 | int bytes, nr_ruhs, ret, i; |
a7e8aae0 | 1926 | |
70ae781d AK |
1927 | nr_ruhs = fruhs_info->nr_ruhs; |
1928 | bytes = sizeof(*ruhs) + fruhs_info->nr_ruhs * sizeof(struct nvme_fdp_ruh_status_desc); | |
1929 | ||
1930 | ruhs = calloc(1, bytes); | |
a7e8aae0 KB |
1931 | if (!ruhs) |
1932 | return -ENOMEM; | |
1933 | ||
1934 | ret = fio_nvme_iomgmt_ruhs(td, f, ruhs, bytes); | |
1935 | if (ret) | |
1936 | goto free; | |
1937 | ||
1938 | fruhs_info->nr_ruhs = le16_to_cpu(ruhs->nruhsd); | |
70ae781d | 1939 | for (i = 0; i < nr_ruhs; i++) |
a7e8aae0 KB |
1940 | fruhs_info->plis[i] = le16_to_cpu(ruhs->ruhss[i].pid); |
1941 | free: | |
70ae781d | 1942 | free(ruhs); |
a7e8aae0 KB |
1943 | return ret; |
1944 | } | |
1945 | ||
855dc4d4 | 1946 | static struct ioengine_ops ioengine_uring = { |
bffad86f | 1947 | .name = "io_uring", |
52885fa2 | 1948 | .version = FIO_IOOPS_VERSION, |
980fb7f2 JA |
1949 | .flags = FIO_NO_OFFLOAD | FIO_ASYNCIO_SETS_ISSUE_TIME | |
1950 | FIO_ATOMICWRITES, | |
bffad86f JA |
1951 | .init = fio_ioring_init, |
1952 | .post_init = fio_ioring_post_init, | |
1953 | .io_u_init = fio_ioring_io_u_init, | |
f97d9f38 | 1954 | .io_u_free = fio_ioring_io_u_free, |
bffad86f JA |
1955 | .prep = fio_ioring_prep, |
1956 | .queue = fio_ioring_queue, | |
1957 | .commit = fio_ioring_commit, | |
1958 | .getevents = fio_ioring_getevents, | |
1959 | .event = fio_ioring_event, | |
1960 | .cleanup = fio_ioring_cleanup, | |
5ffd5626 JA |
1961 | .open_file = fio_ioring_open_file, |
1962 | .close_file = fio_ioring_close_file, | |
52885fa2 JA |
1963 | .get_file_size = generic_get_file_size, |
1964 | .options = options, | |
bffad86f | 1965 | .option_struct_size = sizeof(struct ioring_options), |
52885fa2 JA |
1966 | }; |
1967 | ||
855dc4d4 AG |
1968 | static struct ioengine_ops ioengine_uring_cmd = { |
1969 | .name = "io_uring_cmd", | |
1970 | .version = FIO_IOOPS_VERSION, | |
4885a6eb | 1971 | .flags = FIO_NO_OFFLOAD | FIO_MEMALIGN | FIO_RAWIO | |
5d4ee0de AK |
1972 | FIO_ASYNCIO_SETS_ISSUE_TIME | |
1973 | FIO_MULTI_RANGE_TRIM, | |
855dc4d4 AG |
1974 | .init = fio_ioring_init, |
1975 | .post_init = fio_ioring_cmd_post_init, | |
f97d9f38 | 1976 | .io_u_init = fio_ioring_io_u_init, |
5163f35e | 1977 | .io_u_free = fio_ioring_io_u_free, |
855dc4d4 AG |
1978 | .prep = fio_ioring_cmd_prep, |
1979 | .queue = fio_ioring_queue, | |
1980 | .commit = fio_ioring_commit, | |
1981 | .getevents = fio_ioring_getevents, | |
1982 | .event = fio_ioring_cmd_event, | |
2a13699a | 1983 | .errdetails = fio_ioring_cmd_errdetails, |
855dc4d4 AG |
1984 | .cleanup = fio_ioring_cleanup, |
1985 | .open_file = fio_ioring_cmd_open_file, | |
1986 | .close_file = fio_ioring_cmd_close_file, | |
1987 | .get_file_size = fio_ioring_cmd_get_file_size, | |
3d05e0ff AK |
1988 | .get_zoned_model = fio_ioring_cmd_get_zoned_model, |
1989 | .report_zones = fio_ioring_cmd_report_zones, | |
1990 | .reset_wp = fio_ioring_cmd_reset_wp, | |
1991 | .get_max_open_zones = fio_ioring_cmd_get_max_open_zones, | |
855dc4d4 AG |
1992 | .options = options, |
1993 | .option_struct_size = sizeof(struct ioring_options), | |
a7e8aae0 | 1994 | .fdp_fetch_ruhs = fio_ioring_cmd_fetch_ruhs, |
855dc4d4 AG |
1995 | }; |
1996 | ||
bffad86f | 1997 | static void fio_init fio_ioring_register(void) |
52885fa2 | 1998 | { |
855dc4d4 AG |
1999 | register_ioengine(&ioengine_uring); |
2000 | register_ioengine(&ioengine_uring_cmd); | |
52885fa2 JA |
2001 | } |
2002 | ||
bffad86f | 2003 | static void fio_exit fio_ioring_unregister(void) |
52885fa2 | 2004 | { |
855dc4d4 AG |
2005 | unregister_ioengine(&ioengine_uring); |
2006 | unregister_ioengine(&ioengine_uring_cmd); | |
52885fa2 | 2007 | } |
1f90e9bb | 2008 | #endif |