t/zbd: avoid test case 31 failure with small devices
[fio.git] / os / linux / io_uring.h
CommitLineData
556d8415 1/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
e2239016
JA
2/*
3 * Header file for the io_uring interface.
4 *
5 * Copyright (C) 2019 Jens Axboe
6 * Copyright (C) 2019 Christoph Hellwig
7 */
8#ifndef LINUX_IO_URING_H
9#define LINUX_IO_URING_H
e31b8288 10
6e70fd30 11#include <linux/fs.h>
e2239016 12#include <linux/types.h>
6e70fd30 13
e31b8288 14/*
f0403f94 15 * IO submission data structure (Submission Queue Entry)
e31b8288 16 */
f0403f94 17struct io_uring_sqe {
a7086591 18 __u8 opcode; /* type of operation for this sqe */
48e698fa 19 __u8 flags; /* IOSQE_ flags */
a7086591
JA
20 __u16 ioprio; /* ioprio for the request */
21 __s32 fd; /* file descriptor to do IO on */
b10b1e70
JA
22 union {
23 __u64 off; /* offset into file */
24 __u64 addr2;
855dc4d4 25 __u32 cmd_op;
b10b1e70 26 };
556d8415
JA
27 union {
28 __u64 addr; /* pointer to buffer or iovecs */
29 __u64 splice_off_in;
30 };
a7086591 31 __u32 len; /* buffer size or number of iovecs */
e31b8288
JA
32 union {
33 __kernel_rwf_t rw_flags;
48e698fa 34 __u32 fsync_flags;
556d8415
JA
35 __u16 poll_events; /* compatibility */
36 __u32 poll32_events; /* word-reversed for BE */
a819dfb6 37 __u32 sync_range_flags;
b10b1e70
JA
38 __u32 msg_flags;
39 __u32 timeout_flags;
40 __u32 accept_flags;
41 __u32 cancel_flags;
42 __u32 open_flags;
43 __u32 statx_flags;
556d8415
JA
44 __u32 fadvise_advice;
45 __u32 splice_flags;
9f51d89c
JA
46 __u32 rename_flags;
47 __u32 unlink_flags;
48 __u32 hardlink_flags;
021ce718 49 __u32 uring_cmd_flags;
e31b8288 50 };
48e698fa 51 __u64 user_data; /* data to be passed back at completion time */
9f51d89c 52 /* pack this to avoid bogus arm OABI complaints */
f3e769a4 53 union {
9f51d89c
JA
54 /* index into fixed buffers, if used */
55 __u16 buf_index;
56 /* for grouped buffer selection */
57 __u16 buf_group;
58 } __attribute__((packed));
59 /* personality to use, if used */
60 __u16 personality;
61 union {
62 __s32 splice_fd_in;
63 __u32 file_index;
f3e769a4 64 };
1bca8ad1
AG
65 union {
66 struct {
67 __u64 addr3;
68 __u64 __pad2[1];
69 };
70 /*
71 * If the ring is initialized with IORING_SETUP_SQE128, then
72 * this field is used for 80 bytes of arbitrary command data
73 */
74 __u8 cmd[0];
75 };
e31b8288
JA
76};
77
556d8415
JA
78enum {
79 IOSQE_FIXED_FILE_BIT,
80 IOSQE_IO_DRAIN_BIT,
81 IOSQE_IO_LINK_BIT,
82 IOSQE_IO_HARDLINK_BIT,
83 IOSQE_ASYNC_BIT,
84 IOSQE_BUFFER_SELECT_BIT,
788f19d4 85 IOSQE_CQE_SKIP_SUCCESS_BIT,
556d8415
JA
86};
87
2ea53ca3
JA
88/*
89 * sqe->flags
90 */
556d8415
JA
91/* use fixed fileset */
92#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT)
93/* issue after inflight IO */
94#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT)
95/* links next sqe */
96#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT)
97/* like LINK, but stronger */
98#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
99/* always go async */
100#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
101/* select buffer from sqe->buf_group */
102#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
788f19d4
JA
103/* don't post CQE if request succeeded */
104#define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT)
2ea53ca3 105
e31b8288
JA
106/*
107 * io_uring_setup() flags
108 */
e9d4aa07
JA
109#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
110#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
111#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
b10b1e70 112#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
556d8415
JA
113#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
114#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
9f51d89c 115#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
1bca8ad1
AG
116#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
117/*
118 * Cooperative task running. When requests complete, they often require
119 * forcing the submitter to transition to the kernel to complete. If this
120 * flag is set, work will be done when the task transitions anyway, rather
121 * than force an inter-processor interrupt reschedule. This avoids interrupting
122 * a task running in userspace, and saves an IPI.
123 */
124#define IORING_SETUP_COOP_TASKRUN (1U << 8)
125/*
126 * If COOP_TASKRUN is set, get notified if task work is available for
127 * running and a kernel transition would be needed to run it. This sets
128 * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
129 */
130#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
131
132#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
133#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */
b10b1e70 134
e453f369
JA
135/*
136 * Only one task is allowed to submit requests
137 */
138#define IORING_SETUP_SINGLE_ISSUER (1U << 12)
139
140/*
141 * Defer running task work to get events.
142 * Rather than running bits of task work whenever the task transitions
143 * try to do it just before it is needed.
144 */
145#define IORING_SETUP_DEFER_TASKRUN (1U << 13)
146
b10b1e70
JA
147enum {
148 IORING_OP_NOP,
149 IORING_OP_READV,
150 IORING_OP_WRITEV,
151 IORING_OP_FSYNC,
152 IORING_OP_READ_FIXED,
153 IORING_OP_WRITE_FIXED,
154 IORING_OP_POLL_ADD,
155 IORING_OP_POLL_REMOVE,
156 IORING_OP_SYNC_FILE_RANGE,
157 IORING_OP_SENDMSG,
158 IORING_OP_RECVMSG,
159 IORING_OP_TIMEOUT,
160 IORING_OP_TIMEOUT_REMOVE,
161 IORING_OP_ACCEPT,
162 IORING_OP_ASYNC_CANCEL,
163 IORING_OP_LINK_TIMEOUT,
164 IORING_OP_CONNECT,
165 IORING_OP_FALLOCATE,
166 IORING_OP_OPENAT,
167 IORING_OP_CLOSE,
168 IORING_OP_FILES_UPDATE,
169 IORING_OP_STATX,
170 IORING_OP_READ,
171 IORING_OP_WRITE,
556d8415
JA
172 IORING_OP_FADVISE,
173 IORING_OP_MADVISE,
174 IORING_OP_SEND,
175 IORING_OP_RECV,
176 IORING_OP_OPENAT2,
177 IORING_OP_EPOLL_CTL,
178 IORING_OP_SPLICE,
179 IORING_OP_PROVIDE_BUFFERS,
180 IORING_OP_REMOVE_BUFFERS,
181 IORING_OP_TEE,
9f51d89c
JA
182 IORING_OP_SHUTDOWN,
183 IORING_OP_RENAMEAT,
184 IORING_OP_UNLINKAT,
185 IORING_OP_MKDIRAT,
186 IORING_OP_SYMLINKAT,
187 IORING_OP_LINKAT,
855dc4d4
AG
188 IORING_OP_MSG_RING,
189 IORING_OP_FSETXATTR,
190 IORING_OP_SETXATTR,
191 IORING_OP_FGETXATTR,
192 IORING_OP_GETXATTR,
193 IORING_OP_SOCKET,
194 IORING_OP_URING_CMD,
195
e31b8288 196
b10b1e70
JA
197 /* this goes last, obviously */
198 IORING_OP_LAST,
199};
48e698fa 200
021ce718
JA
201/*
202 * sqe->uring_cmd_flags
203 * IORING_URING_CMD_FIXED use registered buffer; pass thig flag
204 * along with setting sqe->buf_index.
205 */
206#define IORING_URING_CMD_FIXED (1U << 0)
207
48e698fa
JA
208/*
209 * sqe->fsync_flags
210 */
e9d4aa07 211#define IORING_FSYNC_DATASYNC (1U << 0)
e31b8288 212
b10b1e70
JA
213/*
214 * sqe->timeout_flags
215 */
9f51d89c
JA
216#define IORING_TIMEOUT_ABS (1U << 0)
217#define IORING_TIMEOUT_UPDATE (1U << 1)
218#define IORING_TIMEOUT_BOOTTIME (1U << 2)
219#define IORING_TIMEOUT_REALTIME (1U << 3)
220#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
221#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
222#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
223#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
556d8415
JA
224/*
225 * sqe->splice_flags
226 * extends splice(2) flags
227 */
228#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
229
9f51d89c
JA
230/*
231 * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
232 * command flags for POLL_ADD are stored in sqe->len.
233 *
234 * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
235 * the poll handler will continue to report
236 * CQEs on behalf of the same SQE.
237 *
238 * IORING_POLL_UPDATE Update existing poll request, matching
239 * sqe->addr as the old user_data field.
240 */
241#define IORING_POLL_ADD_MULTI (1U << 0)
242#define IORING_POLL_UPDATE_EVENTS (1U << 1)
243#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
244
e31b8288 245/*
f0403f94 246 * IO completion data structure (Completion Queue Entry)
e31b8288 247 */
f0403f94 248struct io_uring_cqe {
48e698fa 249 __u64 user_data; /* sqe->data submission passed back */
e2239016
JA
250 __s32 res; /* result code for this event */
251 __u32 flags;
1bca8ad1
AG
252
253 /*
254 * If the ring is initialized with IORING_SETUP_CQE32, then this field
255 * contains 16-bytes of padding, doubling the size of the CQE.
256 */
257 __u64 big_cqe[];
e31b8288
JA
258};
259
556d8415
JA
260/*
261 * cqe->flags
262 *
263 * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
9f51d89c 264 * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
556d8415
JA
265 */
266#define IORING_CQE_F_BUFFER (1U << 0)
9f51d89c 267#define IORING_CQE_F_MORE (1U << 1)
556d8415
JA
268
269enum {
270 IORING_CQE_BUFFER_SHIFT = 16,
271};
272
e31b8288
JA
273/*
274 * Magic offsets for the application to mmap the data it needs
275 */
276#define IORING_OFF_SQ_RING 0ULL
277#define IORING_OFF_CQ_RING 0x8000000ULL
f0403f94 278#define IORING_OFF_SQES 0x10000000ULL
e31b8288
JA
279
280/*
281 * Filled with the offset for mmap(2)
282 */
283struct io_sqring_offsets {
e2239016
JA
284 __u32 head;
285 __u32 tail;
286 __u32 ring_mask;
287 __u32 ring_entries;
288 __u32 flags;
289 __u32 dropped;
290 __u32 array;
66169493
JA
291 __u32 resv1;
292 __u64 resv2;
e31b8288
JA
293};
294
48e698fa
JA
295/*
296 * sq_ring->flags
297 */
e9d4aa07 298#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
556d8415 299#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
e31b8288
JA
300
301struct io_cqring_offsets {
e2239016
JA
302 __u32 head;
303 __u32 tail;
304 __u32 ring_mask;
305 __u32 ring_entries;
306 __u32 overflow;
f0403f94 307 __u32 cqes;
556d8415
JA
308 __u32 flags;
309 __u32 resv1;
310 __u64 resv2;
e31b8288
JA
311};
312
556d8415
JA
313/*
314 * cq_ring->flags
315 */
316
317/* disable eventfd notifications */
318#define IORING_CQ_EVENTFD_DISABLED (1U << 0)
319
e2239016
JA
320/*
321 * io_uring_enter(2) flags
322 */
788f19d4
JA
323#define IORING_ENTER_GETEVENTS (1U << 0)
324#define IORING_ENTER_SQ_WAKEUP (1U << 1)
325#define IORING_ENTER_SQ_WAIT (1U << 2)
326#define IORING_ENTER_EXT_ARG (1U << 3)
327#define IORING_ENTER_REGISTERED_RING (1U << 4)
e31b8288
JA
328
329/*
330 * Passed in for io_uring_setup(2). Copied back with updated info on success
331 */
332struct io_uring_params {
e2239016
JA
333 __u32 sq_entries;
334 __u32 cq_entries;
335 __u32 flags;
e9d4aa07
JA
336 __u32 sq_thread_cpu;
337 __u32 sq_thread_idle;
b10b1e70 338 __u32 features;
556d8415
JA
339 __u32 wq_fd;
340 __u32 resv[3];
e31b8288
JA
341 struct io_sqring_offsets sq_off;
342 struct io_cqring_offsets cq_off;
343};
344
b10b1e70
JA
345/*
346 * io_uring_params->features flags
347 */
348#define IORING_FEAT_SINGLE_MMAP (1U << 0)
349#define IORING_FEAT_NODROP (1U << 1)
350#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
556d8415
JA
351#define IORING_FEAT_RW_CUR_POS (1U << 3)
352#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
353#define IORING_FEAT_FAST_POLL (1U << 5)
354#define IORING_FEAT_POLL_32BITS (1U << 6)
9f51d89c
JA
355#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
356#define IORING_FEAT_EXT_ARG (1U << 8)
357#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
358#define IORING_FEAT_RSRC_TAGS (1U << 10)
788f19d4 359#define IORING_FEAT_CQE_SKIP (1U << 11)
b10b1e70 360
2ea53ca3
JA
361/*
362 * io_uring_register(2) opcodes and arguments
363 */
9f51d89c
JA
364enum {
365 IORING_REGISTER_BUFFERS = 0,
366 IORING_UNREGISTER_BUFFERS = 1,
367 IORING_REGISTER_FILES = 2,
368 IORING_UNREGISTER_FILES = 3,
369 IORING_REGISTER_EVENTFD = 4,
370 IORING_UNREGISTER_EVENTFD = 5,
371 IORING_REGISTER_FILES_UPDATE = 6,
372 IORING_REGISTER_EVENTFD_ASYNC = 7,
373 IORING_REGISTER_PROBE = 8,
374 IORING_REGISTER_PERSONALITY = 9,
375 IORING_UNREGISTER_PERSONALITY = 10,
376 IORING_REGISTER_RESTRICTIONS = 11,
377 IORING_REGISTER_ENABLE_RINGS = 12,
378
379 /* extended with tagging */
380 IORING_REGISTER_FILES2 = 13,
381 IORING_REGISTER_FILES_UPDATE2 = 14,
382 IORING_REGISTER_BUFFERS2 = 15,
383 IORING_REGISTER_BUFFERS_UPDATE = 16,
384
385 /* set/clear io-wq thread affinities */
386 IORING_REGISTER_IOWQ_AFF = 17,
387 IORING_UNREGISTER_IOWQ_AFF = 18,
388
389 /* set/get max number of io-wq workers */
390 IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
b10b1e70 391
788f19d4
JA
392 /* register/unregister io_uring fd with the ring */
393 IORING_REGISTER_RING_FDS = 20,
394 IORING_UNREGISTER_RING_FDS = 21,
395
9f51d89c
JA
396 /* this goes last */
397 IORING_REGISTER_LAST
398};
399
400/* io-wq worker categories */
401enum {
402 IO_WQ_BOUND,
403 IO_WQ_UNBOUND,
404};
405
406/* deprecated, see struct io_uring_rsrc_update */
b10b1e70
JA
407struct io_uring_files_update {
408 __u32 offset;
556d8415
JA
409 __u32 resv;
410 __aligned_u64 /* __s32 * */ fds;
b10b1e70 411};
2ea53ca3 412
9f51d89c
JA
413struct io_uring_rsrc_register {
414 __u32 nr;
415 __u32 resv;
416 __u64 resv2;
417 __aligned_u64 data;
418 __aligned_u64 tags;
419};
420
421struct io_uring_rsrc_update {
422 __u32 offset;
423 __u32 resv;
424 __aligned_u64 data;
425};
426
427struct io_uring_rsrc_update2 {
428 __u32 offset;
429 __u32 resv;
430 __aligned_u64 data;
431 __aligned_u64 tags;
432 __u32 nr;
433 __u32 resv2;
434};
435
436/* Skip updating fd indexes set to this value in the fd table */
437#define IORING_REGISTER_FILES_SKIP (-2)
438
556d8415
JA
439#define IO_URING_OP_SUPPORTED (1U << 0)
440
441struct io_uring_probe_op {
442 __u8 op;
443 __u8 resv;
444 __u16 flags; /* IO_URING_OP_* flags */
445 __u32 resv2;
446};
447
448struct io_uring_probe {
449 __u8 last_op; /* last opcode supported */
450 __u8 ops_len; /* length of ops[] array below */
451 __u16 resv;
452 __u32 resv2[3];
453 struct io_uring_probe_op ops[0];
454};
455
9f51d89c
JA
456struct io_uring_restriction {
457 __u16 opcode;
458 union {
459 __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */
460 __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */
461 __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */
462 };
463 __u8 resv;
464 __u32 resv2[3];
465};
466
467/*
468 * io_uring_restriction->opcode values
469 */
470enum {
471 /* Allow an io_uring_register(2) opcode */
472 IORING_RESTRICTION_REGISTER_OP = 0,
473
474 /* Allow an sqe opcode */
475 IORING_RESTRICTION_SQE_OP = 1,
476
477 /* Allow sqe flags */
478 IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2,
479
480 /* Require sqe flags (these flags must be set on each submission) */
481 IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3,
482
483 IORING_RESTRICTION_LAST
484};
485
486struct io_uring_getevents_arg {
487 __u64 sigmask;
488 __u32 sigmask_sz;
489 __u32 pad;
490 __u64 ts;
491};
556d8415 492
e31b8288 493#endif