Commit | Line | Data |
---|---|---|
9f5834c8 | 1 | /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ |
2b188cc1 JA |
2 | /* |
3 | * Header file for the io_uring interface. | |
4 | * | |
5 | * Copyright (C) 2019 Jens Axboe | |
6 | * Copyright (C) 2019 Christoph Hellwig | |
7 | */ | |
8 | #ifndef LINUX_IO_URING_H | |
9 | #define LINUX_IO_URING_H | |
10 | ||
11 | #include <linux/fs.h> | |
12 | #include <linux/types.h> | |
9eb80340 SM |
13 | /* |
14 | * this file is shared with liburing and that has to autodetect | |
15 | * if linux/time_types.h is available or not, it can | |
16 | * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H | |
17 | * if linux/time_types.h is not available | |
18 | */ | |
19 | #ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H | |
78a861b9 | 20 | #include <linux/time_types.h> |
9eb80340 | 21 | #endif |
2b188cc1 | 22 | |
e1d0c6d0 AF |
23 | #ifdef __cplusplus |
24 | extern "C" { | |
25 | #endif | |
26 | ||
2b188cc1 JA |
27 | /* |
28 | * IO submission data structure (Submission Queue Entry) | |
29 | */ | |
30 | struct io_uring_sqe { | |
31 | __u8 opcode; /* type of operation for this sqe */ | |
6b06314c | 32 | __u8 flags; /* IOSQE_ flags */ |
2b188cc1 JA |
33 | __u16 ioprio; /* ioprio for the request */ |
34 | __s32 fd; /* file descriptor to do IO on */ | |
17f2fe35 JA |
35 | union { |
36 | __u64 off; /* offset into file */ | |
37 | __u64 addr2; | |
bdb2c48e PB |
38 | struct { |
39 | __u32 cmd_op; | |
40 | __u32 __pad1; | |
41 | }; | |
17f2fe35 | 42 | }; |
7d67af2c PB |
43 | union { |
44 | __u64 addr; /* pointer to buffer or iovecs */ | |
45 | __u64 splice_off_in; | |
a5d2f99a BL |
46 | struct { |
47 | __u32 level; | |
48 | __u32 optname; | |
49 | }; | |
7d67af2c | 50 | }; |
2b188cc1 JA |
51 | __u32 len; /* buffer size or number of iovecs */ |
52 | union { | |
53 | __kernel_rwf_t rw_flags; | |
c992fe29 | 54 | __u32 fsync_flags; |
5769a351 JX |
55 | __u16 poll_events; /* compatibility */ |
56 | __u32 poll32_events; /* word-reversed for BE */ | |
5d17b4a4 | 57 | __u32 sync_range_flags; |
0fa03c62 | 58 | __u32 msg_flags; |
5262f567 | 59 | __u32 timeout_flags; |
17f2fe35 | 60 | __u32 accept_flags; |
62755e35 | 61 | __u32 cancel_flags; |
15b71abe | 62 | __u32 open_flags; |
eddc7ef5 | 63 | __u32 statx_flags; |
4840e418 | 64 | __u32 fadvise_advice; |
7d67af2c | 65 | __u32 splice_flags; |
80a261fd | 66 | __u32 rename_flags; |
14a1143b | 67 | __u32 unlink_flags; |
cf30da90 | 68 | __u32 hardlink_flags; |
e9621e2b | 69 | __u32 xattr_flags; |
e6130eba | 70 | __u32 msg_ring_flags; |
9cda70f6 | 71 | __u32 uring_cmd_flags; |
f31ecf67 | 72 | __u32 waitid_flags; |
194bb58c | 73 | __u32 futex_flags; |
dc18b89a | 74 | __u32 install_fd_flags; |
2b188cc1 JA |
75 | }; |
76 | __u64 user_data; /* data to be passed back at completion time */ | |
9ba6a1c0 | 77 | /* pack this to avoid bogus arm OABI complaints */ |
edafccee | 78 | union { |
9ba6a1c0 PB |
79 | /* index into fixed buffers, if used */ |
80 | __u16 buf_index; | |
81 | /* for grouped buffer selection */ | |
82 | __u16 buf_group; | |
83 | } __attribute__((packed)); | |
84 | /* personality to use, if used */ | |
85 | __u16 personality; | |
b9445598 PB |
86 | union { |
87 | __s32 splice_fd_in; | |
88 | __u32 file_index; | |
a5d2f99a | 89 | __u32 optlen; |
06a5464b | 90 | struct { |
092aeedb | 91 | __u16 addr_len; |
b48c312b | 92 | __u16 __pad3[1]; |
06a5464b | 93 | }; |
b9445598 | 94 | }; |
ee692a21 JA |
95 | union { |
96 | struct { | |
97 | __u64 addr3; | |
98 | __u64 __pad2[1]; | |
99 | }; | |
a5d2f99a | 100 | __u64 optval; |
ee692a21 JA |
101 | /* |
102 | * If the ring is initialized with IORING_SETUP_SQE128, then | |
103 | * this field is used for 80 bytes of arbitrary command data | |
104 | */ | |
105 | __u8 cmd[0]; | |
106 | }; | |
2b188cc1 JA |
107 | }; |
108 | ||
1339f24b JA |
109 | /* |
110 | * If sqe->file_index is set to this for opcodes that instantiate a new | |
111 | * direct descriptor (like openat/openat2/accept), then io_uring will allocate | |
112 | * an available direct descriptor instead of having the application pass one | |
113 | * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE | |
114 | * if the space is full. | |
115 | */ | |
116 | #define IORING_FILE_INDEX_ALLOC (~0U) | |
117 | ||
6b47ee6e PB |
118 | enum { |
119 | IOSQE_FIXED_FILE_BIT, | |
120 | IOSQE_IO_DRAIN_BIT, | |
121 | IOSQE_IO_LINK_BIT, | |
122 | IOSQE_IO_HARDLINK_BIT, | |
123 | IOSQE_ASYNC_BIT, | |
bcda7baa | 124 | IOSQE_BUFFER_SELECT_BIT, |
04c76b41 | 125 | IOSQE_CQE_SKIP_SUCCESS_BIT, |
6b47ee6e PB |
126 | }; |
127 | ||
6b06314c JA |
128 | /* |
129 | * sqe->flags | |
130 | */ | |
6b47ee6e PB |
131 | /* use fixed fileset */ |
132 | #define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT) | |
133 | /* issue after inflight IO */ | |
134 | #define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT) | |
135 | /* links next sqe */ | |
136 | #define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT) | |
137 | /* like LINK, but stronger */ | |
138 | #define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT) | |
139 | /* always go async */ | |
140 | #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) | |
bcda7baa JA |
141 | /* select buffer from sqe->buf_group */ |
142 | #define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT) | |
04c76b41 PB |
143 | /* don't post CQE if request succeeded */ |
144 | #define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT) | |
6b06314c | 145 | |
def596e9 JA |
146 | /* |
147 | * io_uring_setup() flags | |
148 | */ | |
149 | #define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ | |
6c271ce2 JA |
150 | #define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ |
151 | #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ | |
33a107f0 | 152 | #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ |
8110c1a6 | 153 | #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ |
24369c2e | 154 | #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ |
7e84e1c7 | 155 | #define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ |
bcbb7bf6 | 156 | #define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ |
e1169f06 JA |
157 | /* |
158 | * Cooperative task running. When requests complete, they often require | |
159 | * forcing the submitter to transition to the kernel to complete. If this | |
160 | * flag is set, work will be done when the task transitions anyway, rather | |
161 | * than force an inter-processor interrupt reschedule. This avoids interrupting | |
162 | * a task running in userspace, and saves an IPI. | |
163 | */ | |
164 | #define IORING_SETUP_COOP_TASKRUN (1U << 8) | |
ef060ea9 JA |
165 | /* |
166 | * If COOP_TASKRUN is set, get notified if task work is available for | |
167 | * running and a kernel transition would be needed to run it. This sets | |
168 | * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. | |
169 | */ | |
170 | #define IORING_SETUP_TASKRUN_FLAG (1U << 9) | |
ebdeb7c0 | 171 | #define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ |
7a51e5b4 | 172 | #define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */ |
97bbdc06 PB |
173 | /* |
174 | * Only one task is allowed to submit requests | |
175 | */ | |
176 | #define IORING_SETUP_SINGLE_ISSUER (1U << 12) | |
ebdeb7c0 | 177 | |
c0e0d6ba DY |
178 | /* |
179 | * Defer running task work to get events. | |
180 | * Rather than running bits of task work whenever the task transitions | |
181 | * try to do it just before it is needed. | |
182 | */ | |
183 | #define IORING_SETUP_DEFER_TASKRUN (1U << 13) | |
184 | ||
03d89a2d JA |
185 | /* |
186 | * Application provides the memory for the rings | |
187 | */ | |
188 | #define IORING_SETUP_NO_MMAP (1U << 14) | |
189 | ||
6e76ac59 JT |
190 | /* |
191 | * Register the ring fd in itself for use with | |
192 | * IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather | |
193 | * than an fd. | |
194 | */ | |
195 | #define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15) | |
196 | ||
2af89abd PB |
197 | /* |
198 | * Removes indirection through the SQ index array. | |
199 | */ | |
200 | #define IORING_SETUP_NO_SQARRAY (1U << 16) | |
201 | ||
cc51eaa8 | 202 | enum io_uring_op { |
9e3aa61a JA |
203 | IORING_OP_NOP, |
204 | IORING_OP_READV, | |
205 | IORING_OP_WRITEV, | |
206 | IORING_OP_FSYNC, | |
207 | IORING_OP_READ_FIXED, | |
208 | IORING_OP_WRITE_FIXED, | |
209 | IORING_OP_POLL_ADD, | |
210 | IORING_OP_POLL_REMOVE, | |
211 | IORING_OP_SYNC_FILE_RANGE, | |
212 | IORING_OP_SENDMSG, | |
213 | IORING_OP_RECVMSG, | |
214 | IORING_OP_TIMEOUT, | |
215 | IORING_OP_TIMEOUT_REMOVE, | |
216 | IORING_OP_ACCEPT, | |
217 | IORING_OP_ASYNC_CANCEL, | |
218 | IORING_OP_LINK_TIMEOUT, | |
219 | IORING_OP_CONNECT, | |
d63d1b5e | 220 | IORING_OP_FALLOCATE, |
15b71abe | 221 | IORING_OP_OPENAT, |
b5dba59e | 222 | IORING_OP_CLOSE, |
d9808ceb | 223 | IORING_OP_FILES_UPDATE, |
eddc7ef5 | 224 | IORING_OP_STATX, |
3a6820f2 JA |
225 | IORING_OP_READ, |
226 | IORING_OP_WRITE, | |
4840e418 | 227 | IORING_OP_FADVISE, |
c1ca757b | 228 | IORING_OP_MADVISE, |
fddaface JA |
229 | IORING_OP_SEND, |
230 | IORING_OP_RECV, | |
cebdb986 | 231 | IORING_OP_OPENAT2, |
3e4827b0 | 232 | IORING_OP_EPOLL_CTL, |
7d67af2c | 233 | IORING_OP_SPLICE, |
ddf0322d | 234 | IORING_OP_PROVIDE_BUFFERS, |
067524e9 | 235 | IORING_OP_REMOVE_BUFFERS, |
f2a8d5c7 | 236 | IORING_OP_TEE, |
36f4fa68 | 237 | IORING_OP_SHUTDOWN, |
80a261fd | 238 | IORING_OP_RENAMEAT, |
14a1143b | 239 | IORING_OP_UNLINKAT, |
e34a02dc | 240 | IORING_OP_MKDIRAT, |
7a8721f8 | 241 | IORING_OP_SYMLINKAT, |
cf30da90 | 242 | IORING_OP_LINKAT, |
4f57f06c | 243 | IORING_OP_MSG_RING, |
e9621e2b SR |
244 | IORING_OP_FSETXATTR, |
245 | IORING_OP_SETXATTR, | |
a56834e0 SR |
246 | IORING_OP_FGETXATTR, |
247 | IORING_OP_GETXATTR, | |
1374e08e | 248 | IORING_OP_SOCKET, |
ee692a21 | 249 | IORING_OP_URING_CMD, |
b48c312b | 250 | IORING_OP_SEND_ZC, |
493108d9 | 251 | IORING_OP_SENDMSG_ZC, |
fc68fcda | 252 | IORING_OP_READ_MULTISHOT, |
f31ecf67 | 253 | IORING_OP_WAITID, |
194bb58c JA |
254 | IORING_OP_FUTEX_WAIT, |
255 | IORING_OP_FUTEX_WAKE, | |
8f350194 | 256 | IORING_OP_FUTEX_WAITV, |
dc18b89a | 257 | IORING_OP_FIXED_FD_INSTALL, |
b4bb1900 | 258 | IORING_OP_FTRUNCATE, |
9e3aa61a JA |
259 | |
260 | /* this goes last, obviously */ | |
261 | IORING_OP_LAST, | |
262 | }; | |
c992fe29 | 263 | |
9cda70f6 | 264 | /* |
528ce678 | 265 | * sqe->uring_cmd_flags top 8bits aren't available for userspace |
6dcabcd3 | 266 | * IORING_URING_CMD_FIXED use registered buffer; pass this flag |
9cda70f6 AG |
267 | * along with setting sqe->buf_index. |
268 | */ | |
269 | #define IORING_URING_CMD_FIXED (1U << 0) | |
528ce678 | 270 | #define IORING_URING_CMD_MASK IORING_URING_CMD_FIXED |
9cda70f6 AG |
271 | |
272 | ||
c992fe29 CH |
273 | /* |
274 | * sqe->fsync_flags | |
275 | */ | |
276 | #define IORING_FSYNC_DATASYNC (1U << 0) | |
2b188cc1 | 277 | |
a41525ab JA |
278 | /* |
279 | * sqe->timeout_flags | |
280 | */ | |
f1042b6c PB |
281 | #define IORING_TIMEOUT_ABS (1U << 0) |
282 | #define IORING_TIMEOUT_UPDATE (1U << 1) | |
283 | #define IORING_TIMEOUT_BOOTTIME (1U << 2) | |
284 | #define IORING_TIMEOUT_REALTIME (1U << 3) | |
285 | #define IORING_LINK_TIMEOUT_UPDATE (1U << 4) | |
6224590d | 286 | #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) |
ea97f6c8 | 287 | #define IORING_TIMEOUT_MULTISHOT (1U << 6) |
50c1df2b | 288 | #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) |
f1042b6c | 289 | #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) |
7d67af2c PB |
290 | /* |
291 | * sqe->splice_flags | |
292 | * extends splice(2) flags | |
293 | */ | |
294 | #define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */ | |
295 | ||
88e41cf9 JA |
296 | /* |
297 | * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the | |
298 | * command flags for POLL_ADD are stored in sqe->len. | |
299 | * | |
300 | * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if | |
301 | * the poll handler will continue to report | |
302 | * CQEs on behalf of the same SQE. | |
b69de288 JA |
303 | * |
304 | * IORING_POLL_UPDATE Update existing poll request, matching | |
305 | * sqe->addr as the old user_data field. | |
b9ba8a44 JA |
306 | * |
307 | * IORING_POLL_LEVEL Level triggered poll. | |
88e41cf9 JA |
308 | */ |
309 | #define IORING_POLL_ADD_MULTI (1U << 0) | |
b69de288 JA |
310 | #define IORING_POLL_UPDATE_EVENTS (1U << 1) |
311 | #define IORING_POLL_UPDATE_USER_DATA (1U << 2) | |
b9ba8a44 | 312 | #define IORING_POLL_ADD_LEVEL (1U << 3) |
88e41cf9 | 313 | |
8e29da69 JA |
314 | /* |
315 | * ASYNC_CANCEL flags. | |
316 | * | |
317 | * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key | |
4bf94615 JA |
318 | * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the |
319 | * request 'user_data' | |
970f256e | 320 | * IORING_ASYNC_CANCEL_ANY Match any request |
7d8ca725 | 321 | * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor |
8165b566 | 322 | * IORING_ASYNC_CANCEL_USERDATA Match on user_data, default for no other key |
d7b8b079 | 323 | * IORING_ASYNC_CANCEL_OP Match request based on opcode |
8e29da69 JA |
324 | */ |
325 | #define IORING_ASYNC_CANCEL_ALL (1U << 0) | |
4bf94615 | 326 | #define IORING_ASYNC_CANCEL_FD (1U << 1) |
970f256e | 327 | #define IORING_ASYNC_CANCEL_ANY (1U << 2) |
7d8ca725 | 328 | #define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3) |
8165b566 | 329 | #define IORING_ASYNC_CANCEL_USERDATA (1U << 4) |
d7b8b079 | 330 | #define IORING_ASYNC_CANCEL_OP (1U << 5) |
8e29da69 | 331 | |
0455d4cc | 332 | /* |
29c1ac23 | 333 | * send/sendmsg and recv/recvmsg flags (sqe->ioprio) |
0455d4cc JA |
334 | * |
335 | * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send | |
336 | * or receive and arm poll if that yields an | |
337 | * -EAGAIN result, arm poll upfront and skip | |
338 | * the initial transfer attempt. | |
b3fdea6e DY |
339 | * |
340 | * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if | |
341 | * the handler will continue to report | |
342 | * CQEs on behalf of the same SQE. | |
10c7d33e PB |
343 | * |
344 | * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in | |
345 | * the buf_index field. | |
e307e669 SM |
346 | * |
347 | * IORING_SEND_ZC_REPORT_USAGE | |
348 | * If set, SEND[MSG]_ZC should report | |
349 | * the zerocopy usage in cqe.res | |
350 | * for the IORING_CQE_F_NOTIF cqe. | |
351 | * 0 is reported if zerocopy was actually possible. | |
352 | * IORING_NOTIF_USAGE_ZC_COPIED if data was copied | |
353 | * (at least partially). | |
0455d4cc JA |
354 | */ |
355 | #define IORING_RECVSEND_POLL_FIRST (1U << 0) | |
10c7d33e PB |
356 | #define IORING_RECV_MULTISHOT (1U << 1) |
357 | #define IORING_RECVSEND_FIXED_BUF (1U << 2) | |
e307e669 SM |
358 | #define IORING_SEND_ZC_REPORT_USAGE (1U << 3) |
359 | ||
360 | /* | |
361 | * cqe.res for IORING_CQE_F_NOTIF if | |
362 | * IORING_SEND_ZC_REPORT_USAGE was requested | |
363 | * | |
364 | * It should be treated as a flag, all other | |
365 | * bits of cqe.res should be treated as reserved! | |
366 | */ | |
367 | #define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31) | |
0455d4cc | 368 | |
390ed29b HX |
369 | /* |
370 | * accept flags stored in sqe->ioprio | |
371 | */ | |
372 | #define IORING_ACCEPT_MULTISHOT (1U << 0) | |
373 | ||
e6130eba JA |
374 | /* |
375 | * IORING_OP_MSG_RING command types, stored in sqe->addr | |
376 | */ | |
377 | enum { | |
378 | IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */ | |
379 | IORING_MSG_SEND_FD, /* send a registered fd to another ring */ | |
380 | }; | |
381 | ||
382 | /* | |
383 | * IORING_OP_MSG_RING flags (sqe->msg_ring_flags) | |
384 | * | |
385 | * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not | |
386 | * applicable for IORING_MSG_DATA, obviously. | |
387 | */ | |
388 | #define IORING_MSG_RING_CQE_SKIP (1U << 0) | |
cbeb47a7 BL |
389 | /* Pass through the flags from sqe->file_index to cqe->flags */ |
390 | #define IORING_MSG_RING_FLAGS_PASS (1U << 1) | |
e6130eba | 391 | |
dc18b89a JA |
392 | /* |
393 | * IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags) | |
394 | * | |
395 | * IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC | |
396 | */ | |
397 | #define IORING_FIXED_FD_NO_CLOEXEC (1U << 0) | |
398 | ||
2b188cc1 JA |
399 | /* |
400 | * IO completion data structure (Completion Queue Entry) | |
401 | */ | |
402 | struct io_uring_cqe { | |
403 | __u64 user_data; /* sqe->data submission passed back */ | |
404 | __s32 res; /* result code for this event */ | |
405 | __u32 flags; | |
7a51e5b4 SR |
406 | |
407 | /* | |
408 | * If the ring is initialized with IORING_SETUP_CQE32, then this field | |
409 | * contains 16-bytes of padding, doubling the size of the CQE. | |
410 | */ | |
411 | __u64 big_cqe[]; | |
2b188cc1 JA |
412 | }; |
413 | ||
bcda7baa JA |
414 | /* |
415 | * cqe->flags | |
416 | * | |
417 | * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID | |
88e41cf9 | 418 | * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries |
f548a12e | 419 | * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv |
b48c312b PB |
420 | * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct |
421 | * them from sends. | |
bcda7baa JA |
422 | */ |
423 | #define IORING_CQE_F_BUFFER (1U << 0) | |
88e41cf9 | 424 | #define IORING_CQE_F_MORE (1U << 1) |
f548a12e | 425 | #define IORING_CQE_F_SOCK_NONEMPTY (1U << 2) |
b48c312b | 426 | #define IORING_CQE_F_NOTIF (1U << 3) |
bcda7baa JA |
427 | |
428 | enum { | |
429 | IORING_CQE_BUFFER_SHIFT = 16, | |
430 | }; | |
431 | ||
2b188cc1 JA |
432 | /* |
433 | * Magic offsets for the application to mmap the data it needs | |
434 | */ | |
435 | #define IORING_OFF_SQ_RING 0ULL | |
436 | #define IORING_OFF_CQ_RING 0x8000000ULL | |
437 | #define IORING_OFF_SQES 0x10000000ULL | |
c56e022c JA |
438 | #define IORING_OFF_PBUF_RING 0x80000000ULL |
439 | #define IORING_OFF_PBUF_SHIFT 16 | |
440 | #define IORING_OFF_MMAP_MASK 0xf8000000ULL | |
2b188cc1 JA |
441 | |
442 | /* | |
443 | * Filled with the offset for mmap(2) | |
444 | */ | |
445 | struct io_sqring_offsets { | |
446 | __u32 head; | |
447 | __u32 tail; | |
448 | __u32 ring_mask; | |
449 | __u32 ring_entries; | |
450 | __u32 flags; | |
451 | __u32 dropped; | |
452 | __u32 array; | |
453 | __u32 resv1; | |
03d89a2d | 454 | __u64 user_addr; |
2b188cc1 JA |
455 | }; |
456 | ||
6c271ce2 JA |
457 | /* |
458 | * sq_ring->flags | |
459 | */ | |
460 | #define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */ | |
6d5f9049 | 461 | #define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */ |
ef060ea9 | 462 | #define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */ |
6c271ce2 | 463 | |
2b188cc1 JA |
464 | struct io_cqring_offsets { |
465 | __u32 head; | |
466 | __u32 tail; | |
467 | __u32 ring_mask; | |
468 | __u32 ring_entries; | |
469 | __u32 overflow; | |
470 | __u32 cqes; | |
0d9b5b3a SG |
471 | __u32 flags; |
472 | __u32 resv1; | |
03d89a2d | 473 | __u64 user_addr; |
2b188cc1 JA |
474 | }; |
475 | ||
7e55a19c SG |
476 | /* |
477 | * cq_ring->flags | |
478 | */ | |
479 | ||
480 | /* disable eventfd notifications */ | |
481 | #define IORING_CQ_EVENTFD_DISABLED (1U << 0) | |
482 | ||
2b188cc1 JA |
483 | /* |
484 | * io_uring_enter(2) flags | |
485 | */ | |
e7a6c00d JA |
486 | #define IORING_ENTER_GETEVENTS (1U << 0) |
487 | #define IORING_ENTER_SQ_WAKEUP (1U << 1) | |
488 | #define IORING_ENTER_SQ_WAIT (1U << 2) | |
489 | #define IORING_ENTER_EXT_ARG (1U << 3) | |
490 | #define IORING_ENTER_REGISTERED_RING (1U << 4) | |
2b188cc1 JA |
491 | |
492 | /* | |
493 | * Passed in for io_uring_setup(2). Copied back with updated info on success | |
494 | */ | |
495 | struct io_uring_params { | |
496 | __u32 sq_entries; | |
497 | __u32 cq_entries; | |
498 | __u32 flags; | |
6c271ce2 JA |
499 | __u32 sq_thread_cpu; |
500 | __u32 sq_thread_idle; | |
ac90f249 | 501 | __u32 features; |
24369c2e PB |
502 | __u32 wq_fd; |
503 | __u32 resv[3]; | |
2b188cc1 JA |
504 | struct io_sqring_offsets sq_off; |
505 | struct io_cqring_offsets cq_off; | |
506 | }; | |
507 | ||
ac90f249 JA |
508 | /* |
509 | * io_uring_params->features flags | |
510 | */ | |
511 | #define IORING_FEAT_SINGLE_MMAP (1U << 0) | |
1d7bb1d5 | 512 | #define IORING_FEAT_NODROP (1U << 1) |
da8c9690 | 513 | #define IORING_FEAT_SUBMIT_STABLE (1U << 2) |
ba04291e | 514 | #define IORING_FEAT_RW_CUR_POS (1U << 3) |
cccf0ee8 | 515 | #define IORING_FEAT_CUR_PERSONALITY (1U << 4) |
d7718a9d | 516 | #define IORING_FEAT_FAST_POLL (1U << 5) |
5769a351 | 517 | #define IORING_FEAT_POLL_32BITS (1U << 6) |
28cea78a | 518 | #define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) |
c73ebb68 | 519 | #define IORING_FEAT_EXT_ARG (1U << 8) |
1c0aa1fa | 520 | #define IORING_FEAT_NATIVE_WORKERS (1U << 9) |
9690557e | 521 | #define IORING_FEAT_RSRC_TAGS (1U << 10) |
04c76b41 | 522 | #define IORING_FEAT_CQE_SKIP (1U << 11) |
c4212f3e | 523 | #define IORING_FEAT_LINKED_FILE (1U << 12) |
7d3fd88d | 524 | #define IORING_FEAT_REG_REG_RING (1U << 13) |
ac90f249 | 525 | |
edafccee JA |
526 | /* |
527 | * io_uring_register(2) opcodes and arguments | |
528 | */ | |
9d4a75ef SG |
529 | enum { |
530 | IORING_REGISTER_BUFFERS = 0, | |
531 | IORING_UNREGISTER_BUFFERS = 1, | |
532 | IORING_REGISTER_FILES = 2, | |
533 | IORING_UNREGISTER_FILES = 3, | |
534 | IORING_REGISTER_EVENTFD = 4, | |
535 | IORING_UNREGISTER_EVENTFD = 5, | |
536 | IORING_REGISTER_FILES_UPDATE = 6, | |
537 | IORING_REGISTER_EVENTFD_ASYNC = 7, | |
538 | IORING_REGISTER_PROBE = 8, | |
539 | IORING_REGISTER_PERSONALITY = 9, | |
540 | IORING_UNREGISTER_PERSONALITY = 10, | |
21b55dbc | 541 | IORING_REGISTER_RESTRICTIONS = 11, |
7e84e1c7 | 542 | IORING_REGISTER_ENABLE_RINGS = 12, |
992da01a PB |
543 | |
544 | /* extended with tagging */ | |
545 | IORING_REGISTER_FILES2 = 13, | |
546 | IORING_REGISTER_FILES_UPDATE2 = 14, | |
547 | IORING_REGISTER_BUFFERS2 = 15, | |
548 | IORING_REGISTER_BUFFERS_UPDATE = 16, | |
9d4a75ef | 549 | |
fe76421d JA |
550 | /* set/clear io-wq thread affinities */ |
551 | IORING_REGISTER_IOWQ_AFF = 17, | |
552 | IORING_UNREGISTER_IOWQ_AFF = 18, | |
553 | ||
dd47c104 | 554 | /* set/get max number of io-wq workers */ |
2e480058 JA |
555 | IORING_REGISTER_IOWQ_MAX_WORKERS = 19, |
556 | ||
e7a6c00d JA |
557 | /* register/unregister io_uring fd with the ring */ |
558 | IORING_REGISTER_RING_FDS = 20, | |
559 | IORING_UNREGISTER_RING_FDS = 21, | |
560 | ||
c7fb1942 JA |
561 | /* register ring based provide buffer group */ |
562 | IORING_REGISTER_PBUF_RING = 22, | |
563 | IORING_UNREGISTER_PBUF_RING = 23, | |
564 | ||
78a861b9 JA |
565 | /* sync cancelation API */ |
566 | IORING_REGISTER_SYNC_CANCEL = 24, | |
567 | ||
6e73dffb PB |
568 | /* register a range of fixed file slots for automatic slot allocation */ |
569 | IORING_REGISTER_FILE_ALLOC_RANGE = 25, | |
570 | ||
d293b1a8 JA |
571 | /* return status information for a buffer group */ |
572 | IORING_REGISTER_PBUF_STATUS = 26, | |
573 | ||
ef1186c1 SR |
574 | /* set/clear busy poll settings */ |
575 | IORING_REGISTER_NAPI = 27, | |
576 | IORING_UNREGISTER_NAPI = 28, | |
577 | ||
9d4a75ef | 578 | /* this goes last */ |
7d3fd88d JT |
579 | IORING_REGISTER_LAST, |
580 | ||
581 | /* flag added to the opcode to use a registered ring fd */ | |
582 | IORING_REGISTER_USE_REGISTERED_RING = 1U << 31 | |
9d4a75ef | 583 | }; |
c3a31e60 | 584 | |
dd47c104 ES |
585 | /* io-wq worker categories */ |
586 | enum { | |
587 | IO_WQ_BOUND, | |
588 | IO_WQ_UNBOUND, | |
589 | }; | |
590 | ||
269bbe5f | 591 | /* deprecated, see struct io_uring_rsrc_update */ |
c3a31e60 JA |
592 | struct io_uring_files_update { |
593 | __u32 offset; | |
1292e972 ES |
594 | __u32 resv; |
595 | __aligned_u64 /* __s32 * */ fds; | |
c3a31e60 | 596 | }; |
edafccee | 597 | |
a8da73a3 JA |
598 | /* |
599 | * Register a fully sparse file space, rather than pass in an array of all | |
600 | * -1 file descriptors. | |
601 | */ | |
602 | #define IORING_RSRC_REGISTER_SPARSE (1U << 0) | |
603 | ||
792e3582 | 604 | struct io_uring_rsrc_register { |
792e3582 | 605 | __u32 nr; |
a8da73a3 | 606 | __u32 flags; |
992da01a | 607 | __u64 resv2; |
792e3582 PB |
608 | __aligned_u64 data; |
609 | __aligned_u64 tags; | |
610 | }; | |
611 | ||
c3bdad02 PB |
612 | struct io_uring_rsrc_update { |
613 | __u32 offset; | |
614 | __u32 resv; | |
615 | __aligned_u64 data; | |
616 | }; | |
617 | ||
618 | struct io_uring_rsrc_update2 { | |
619 | __u32 offset; | |
620 | __u32 resv; | |
621 | __aligned_u64 data; | |
622 | __aligned_u64 tags; | |
c3bdad02 | 623 | __u32 nr; |
992da01a | 624 | __u32 resv2; |
c3bdad02 PB |
625 | }; |
626 | ||
4e0377a1 | 627 | /* Skip updating fd indexes set to this value in the fd table */ |
628 | #define IORING_REGISTER_FILES_SKIP (-2) | |
629 | ||
66f4af93 JA |
630 | #define IO_URING_OP_SUPPORTED (1U << 0) |
631 | ||
632 | struct io_uring_probe_op { | |
633 | __u8 op; | |
634 | __u8 resv; | |
635 | __u16 flags; /* IO_URING_OP_* flags */ | |
636 | __u32 resv2; | |
637 | }; | |
638 | ||
639 | struct io_uring_probe { | |
640 | __u8 last_op; /* last opcode supported */ | |
641 | __u8 ops_len; /* length of ops[] array below */ | |
642 | __u16 resv; | |
643 | __u32 resv2[3]; | |
8fcf4c48 | 644 | struct io_uring_probe_op ops[]; |
66f4af93 JA |
645 | }; |
646 | ||
21b55dbc SG |
647 | struct io_uring_restriction { |
648 | __u16 opcode; | |
649 | union { | |
650 | __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */ | |
651 | __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */ | |
652 | __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */ | |
653 | }; | |
654 | __u8 resv; | |
655 | __u32 resv2[3]; | |
656 | }; | |
657 | ||
c7fb1942 JA |
658 | struct io_uring_buf { |
659 | __u64 addr; | |
660 | __u32 len; | |
661 | __u16 bid; | |
662 | __u16 resv; | |
663 | }; | |
664 | ||
665 | struct io_uring_buf_ring { | |
666 | union { | |
667 | /* | |
668 | * To avoid spilling into more pages than we need to, the | |
669 | * ring tail is overlaid with the io_uring_buf->resv field. | |
670 | */ | |
671 | struct { | |
672 | __u64 resv1; | |
673 | __u32 resv2; | |
674 | __u16 resv3; | |
675 | __u16 tail; | |
676 | }; | |
36632d06 | 677 | __DECLARE_FLEX_ARRAY(struct io_uring_buf, bufs); |
c7fb1942 JA |
678 | }; |
679 | }; | |
680 | ||
c56e022c JA |
681 | /* |
682 | * Flags for IORING_REGISTER_PBUF_RING. | |
683 | * | |
684 | * IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring. | |
685 | * The application must not set a ring_addr in struct | |
686 | * io_uring_buf_reg, instead it must subsequently call | |
687 | * mmap(2) with the offset set as: | |
688 | * IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) | |
689 | * to get a virtual mapping for the ring. | |
690 | */ | |
691 | enum { | |
692 | IOU_PBUF_RING_MMAP = 1, | |
693 | }; | |
694 | ||
c7fb1942 JA |
695 | /* argument for IORING_(UN)REGISTER_PBUF_RING */ |
696 | struct io_uring_buf_reg { | |
697 | __u64 ring_addr; | |
698 | __u32 ring_entries; | |
699 | __u16 bgid; | |
81cf17cd | 700 | __u16 flags; |
c7fb1942 JA |
701 | __u64 resv[3]; |
702 | }; | |
703 | ||
d293b1a8 JA |
704 | /* argument for IORING_REGISTER_PBUF_STATUS */ |
705 | struct io_uring_buf_status { | |
706 | __u32 buf_group; /* input */ | |
707 | __u32 head; /* output */ | |
708 | __u32 resv[8]; | |
709 | }; | |
710 | ||
ef1186c1 SR |
711 | /* argument for IORING_(UN)REGISTER_NAPI */ |
712 | struct io_uring_napi { | |
713 | __u32 busy_poll_to; | |
714 | __u8 prefer_busy_poll; | |
715 | __u8 pad[3]; | |
716 | __u64 resv; | |
717 | }; | |
718 | ||
21b55dbc SG |
719 | /* |
720 | * io_uring_restriction->opcode values | |
721 | */ | |
722 | enum { | |
723 | /* Allow an io_uring_register(2) opcode */ | |
724 | IORING_RESTRICTION_REGISTER_OP = 0, | |
725 | ||
726 | /* Allow an sqe opcode */ | |
727 | IORING_RESTRICTION_SQE_OP = 1, | |
728 | ||
729 | /* Allow sqe flags */ | |
730 | IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2, | |
731 | ||
732 | /* Require sqe flags (these flags must be set on each submission) */ | |
733 | IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3, | |
734 | ||
735 | IORING_RESTRICTION_LAST | |
736 | }; | |
737 | ||
c73ebb68 HX |
738 | struct io_uring_getevents_arg { |
739 | __u64 sigmask; | |
740 | __u32 sigmask_sz; | |
741 | __u32 pad; | |
742 | __u64 ts; | |
743 | }; | |
744 | ||
78a861b9 JA |
745 | /* |
746 | * Argument for IORING_REGISTER_SYNC_CANCEL | |
747 | */ | |
748 | struct io_uring_sync_cancel_reg { | |
749 | __u64 addr; | |
750 | __s32 fd; | |
751 | __u32 flags; | |
752 | struct __kernel_timespec timeout; | |
f77569d2 JA |
753 | __u8 opcode; |
754 | __u8 pad[7]; | |
755 | __u64 pad2[3]; | |
78a861b9 JA |
756 | }; |
757 | ||
6e73dffb PB |
758 | /* |
759 | * Argument for IORING_REGISTER_FILE_ALLOC_RANGE | |
760 | * The range is specified as [off, off + len) | |
761 | */ | |
762 | struct io_uring_file_index_range { | |
763 | __u32 off; | |
764 | __u32 len; | |
765 | __u64 resv; | |
766 | }; | |
767 | ||
9bb66906 DY |
768 | struct io_uring_recvmsg_out { |
769 | __u32 namelen; | |
770 | __u32 controllen; | |
771 | __u32 payloadlen; | |
772 | __u32 flags; | |
773 | }; | |
774 | ||
8e9fad0e BL |
775 | /* |
776 | * Argument for IORING_OP_URING_CMD when file is a socket | |
777 | */ | |
778 | enum { | |
779 | SOCKET_URING_OP_SIOCINQ = 0, | |
780 | SOCKET_URING_OP_SIOCOUTQ, | |
a5d2f99a | 781 | SOCKET_URING_OP_GETSOCKOPT, |
4232c6e3 | 782 | SOCKET_URING_OP_SETSOCKOPT, |
8e9fad0e BL |
783 | }; |
784 | ||
e1d0c6d0 AF |
785 | #ifdef __cplusplus |
786 | } | |
787 | #endif | |
788 | ||
2b188cc1 | 789 | #endif |