summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2019-07-24 09:24:50 +0100
committerJens Axboe <axboe@kernel.dk>2019-07-24 09:11:44 -0600
commitc31c7ec4bcd7bb0d7b28897d730431c02b9d4ea1 (patch)
treebaffe189e6dc69fa17adc8fe08ad1d133b328ade /src/include
parentb42c59d7afc79370cf0140d5b9978b1e8c350408 (diff)
downloadliburing-c31c7ec4bcd7bb0d7b28897d730431c02b9d4ea1.tar.gz
liburing-c31c7ec4bcd7bb0d7b28897d730431c02b9d4ea1.tar.bz2
src/Makefile: keep private headers in <liburing/*.h>
It is not possible to install barrier.h and compat.h into the top-level /usr/include directly since they are likely to conflict with other software. io_uring.h could be confused with the system's kernel header file. Put liburing headers into <liburing/*.h> so there is no chance of conflicts or confusion. Existing applications continue to build successfully since the location of <liburing.h> is unchanged. In-tree examples and tests require modification because src/liburing.h is moved to src/include/liburing.h. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'src/include')
-rw-r--r--src/include/liburing.h221
-rw-r--r--src/include/liburing/barrier.h87
-rw-r--r--src/include/liburing/compat.h8
-rw-r--r--src/include/liburing/io_uring.h146
4 files changed, 462 insertions, 0 deletions
diff --git a/src/include/liburing.h b/src/include/liburing.h
new file mode 100644
index 0000000..fb78cd3
--- /dev/null
+++ b/src/include/liburing.h
@@ -0,0 +1,221 @@
+#ifndef LIB_URING_H
+#define LIB_URING_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/uio.h>
+#include <signal.h>
+#include <string.h>
+#include <inttypes.h>
+#include "liburing/compat.h"
+#include "liburing/io_uring.h"
+#include "liburing/barrier.h"
+
+/*
+ * Library interface to io_uring
+ */
+struct io_uring_sq {
+ unsigned *khead;
+ unsigned *ktail;
+ unsigned *kring_mask;
+ unsigned *kring_entries;
+ unsigned *kflags;
+ unsigned *kdropped;
+ unsigned *array;
+ struct io_uring_sqe *sqes;
+
+ unsigned sqe_head;
+ unsigned sqe_tail;
+
+ size_t ring_sz;
+ void *ring_ptr;
+};
+
+struct io_uring_cq {
+ unsigned *khead;
+ unsigned *ktail;
+ unsigned *kring_mask;
+ unsigned *kring_entries;
+ unsigned *koverflow;
+ struct io_uring_cqe *cqes;
+
+ size_t ring_sz;
+ void *ring_ptr;
+};
+
+struct io_uring {
+ struct io_uring_sq sq;
+ struct io_uring_cq cq;
+ unsigned flags;
+ int ring_fd;
+};
+
+/*
+ * System calls
+ */
+extern int io_uring_setup(unsigned entries, struct io_uring_params *p);
+extern int io_uring_enter(unsigned fd, unsigned to_submit,
+ unsigned min_complete, unsigned flags, sigset_t *sig);
+extern int io_uring_register(int fd, unsigned int opcode, const void *arg,
+ unsigned int nr_args);
+
+/*
+ * Library interface
+ */
+extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
+ unsigned flags);
+extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
+ struct io_uring *ring);
+extern void io_uring_queue_exit(struct io_uring *ring);
+extern int io_uring_peek_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr);
+extern int io_uring_wait_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr);
+extern int io_uring_submit(struct io_uring *ring);
+extern int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
+extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
+
+extern int io_uring_register_buffers(struct io_uring *ring,
+ const struct iovec *iovecs,
+ unsigned nr_iovecs);
+extern int io_uring_unregister_buffers(struct io_uring *ring);
+extern int io_uring_register_files(struct io_uring *ring, const int *files,
+ unsigned nr_files);
+extern int io_uring_unregister_files(struct io_uring *ring);
+extern int io_uring_register_eventfd(struct io_uring *ring, int fd);
+extern int io_uring_unregister_eventfd(struct io_uring *ring);
+
+#define io_uring_for_each_cqe(ring, head, cqe) \
+ /* smp_load_acquire() enforces the order of tail and CQE reads. */ \
+ for (head = *(ring)->cq.khead; \
+ (cqe = (head != smp_load_acquire((ring)->cq.ktail) ? \
+ &(ring)->cq.cqes[head & (*(ring)->cq.kring_mask)] : NULL)); \
+ head++) \
+
+
+/*
+ * Must be called after io_uring_for_each_cqe()
+ */
+static inline void io_uring_cq_advance(struct io_uring *ring,
+ unsigned nr)
+{
+ if (nr) {
+ struct io_uring_cq *cq = &ring->cq;
+
+ /*
+ * Ensure that the kernel only sees the new value of the head
+ * index after the CQEs have been read.
+ */
+ smp_store_release(cq->khead, *cq->khead + nr);
+ }
+}
+
+/*
+ * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
+ * been processed by the application.
+ */
+static inline void io_uring_cqe_seen(struct io_uring *ring,
+ struct io_uring_cqe *cqe)
+{
+ if (cqe)
+ io_uring_cq_advance(ring, 1);
+}
+
+/*
+ * Command prep helpers
+ */
+static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
+{
+ sqe->user_data = (unsigned long) data;
+}
+
+static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe)
+{
+ return (void *) (uintptr_t) cqe->user_data;
+}
+
+static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
+ unsigned flags)
+{
+ sqe->flags = flags;
+}
+
+static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
+ const void *addr, unsigned len,
+ off_t offset)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = op;
+ sqe->fd = fd;
+ sqe->off = offset;
+ sqe->addr = (unsigned long) addr;
+ sqe->len = len;
+}
+
+static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
+ const struct iovec *iovecs,
+ unsigned nr_vecs, off_t offset)
+{
+ io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
+}
+
+static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
+ void *buf, unsigned nbytes,
+ off_t offset)
+{
+ io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
+}
+
+static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
+ const struct iovec *iovecs,
+ unsigned nr_vecs, off_t offset)
+{
+ io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
+}
+
+static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
+ const void *buf, unsigned nbytes,
+ off_t offset)
+{
+ io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
+}
+
+static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
+ short poll_mask)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = IORING_OP_POLL_ADD;
+ sqe->fd = fd;
+ sqe->poll_events = poll_mask;
+}
+
+static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
+ void *user_data)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = IORING_OP_POLL_REMOVE;
+ sqe->addr = (unsigned long) user_data;
+}
+
+static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
+ unsigned fsync_flags)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = IORING_OP_FSYNC;
+ sqe->fd = fd;
+ sqe->fsync_flags = fsync_flags;
+}
+
+static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
+{
+ memset(sqe, 0, sizeof(*sqe));
+ sqe->opcode = IORING_OP_NOP;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/include/liburing/barrier.h b/src/include/liburing/barrier.h
new file mode 100644
index 0000000..98be9e5
--- /dev/null
+++ b/src/include/liburing/barrier.h
@@ -0,0 +1,87 @@
+#ifndef LIBURING_BARRIER_H
+#define LIBURING_BARRIER_H
+
+/*
+From the kernel documentation file refcount-vs-atomic.rst:
+
+A RELEASE memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before the operation. It also guarantees that all po-earlier
+stores on the same CPU and all propagated stores from other CPUs
+must propagate to all other CPUs before the release operation
+(A-cumulative property). This is implemented using
+:c:func:`smp_store_release`.
+
+An ACQUIRE memory ordering guarantees that all post loads and
+stores (all po-later instructions) on the same CPU are
+completed after the acquire operation. It also guarantees that all
+po-later stores on the same CPU must propagate to all other CPUs
+after the acquire operation executes. This is implemented using
+:c:func:`smp_acquire__after_ctrl_dep`.
+*/
+
+/* From tools/include/linux/compiler.h */
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+/* From tools/virtio/linux/compiler.h */
+#define WRITE_ONCE(var, val) \
+ (*((volatile __typeof(val) *)(&(var))) = (val))
+#define READ_ONCE(var) (*((volatile __typeof(var) *)(&(var))))
+
+
+#if defined(__x86_64__) || defined(__i386__)
+/* Adapted from arch/x86/include/asm/barrier.h */
+#define mb() asm volatile("mfence" ::: "memory")
+#define rmb() asm volatile("lfence" ::: "memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#if defined(__i386__)
+#define smp_mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory", "cc")
+#else
+#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+#endif
+
+#define smp_store_release(p, v) \
+do { \
+ barrier(); \
+ WRITE_ONCE(*(p), (v)); \
+} while (0)
+
+#define smp_load_acquire(p) \
+({ \
+ __typeof(*p) ___p1 = READ_ONCE(*(p)); \
+ barrier(); \
+ ___p1; \
+})
+#else /* defined(__x86_64__) || defined(__i386__) */
+/*
+ * Add arch appropriate definitions. Be safe and use full barriers for
+ * archs we don't have support for.
+ */
+#define smp_rmb() __sync_synchronize()
+#define smp_wmb() __sync_synchronize()
+#endif /* defined(__x86_64__) || defined(__i386__) */
+
+/* From tools/include/asm/barrier.h */
+
+#ifndef smp_store_release
+# define smp_store_release(p, v) \
+do { \
+ smp_mb(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+#endif
+
+#ifndef smp_load_acquire
+# define smp_load_acquire(p) \
+({ \
+ __typeof(*p) ___p1 = READ_ONCE(*p); \
+ smp_mb(); \
+ ___p1; \
+})
+#endif
+
+#endif /* defined(LIBURING_BARRIER_H) */
diff --git a/src/include/liburing/compat.h b/src/include/liburing/compat.h
new file mode 100644
index 0000000..d322499
--- /dev/null
+++ b/src/include/liburing/compat.h
@@ -0,0 +1,8 @@
+#ifndef LIBURING_COMPAT_H
+#define LIBURING_COMPAT_H
+
+#if !defined(CONFIG_HAVE_KERNEL_RWF_T)
+typedef int __kernel_rwf_t;
+#endif
+
+#endif
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
new file mode 100644
index 0000000..a61c4a6
--- /dev/null
+++ b/src/include/liburing/io_uring.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Header file for the io_uring interface.
+ *
+ * Copyright (C) 2019 Jens Axboe
+ * Copyright (C) 2019 Christoph Hellwig
+ */
+#ifndef LINUX_IO_URING_H
+#define LINUX_IO_URING_H
+
+#include <linux/fs.h>
+#include <linux/types.h>
+
+/*
+ * IO submission data structure (Submission Queue Entry)
+ */
+struct io_uring_sqe {
+ __u8 opcode; /* type of operation for this sqe */
+ __u8 flags; /* IOSQE_ flags */
+ __u16 ioprio; /* ioprio for the request */
+ __s32 fd; /* file descriptor to do IO on */
+ __u64 off; /* offset into file */
+ __u64 addr; /* pointer to buffer or iovecs */
+ __u32 len; /* buffer size or number of iovecs */
+ union {
+ __kernel_rwf_t rw_flags;
+ __u32 fsync_flags;
+ __u16 poll_events;
+ __u32 sync_range_flags;
+ __u32 msg_flags;
+ };
+ __u64 user_data; /* data to be passed back at completion time */
+ union {
+ __u16 buf_index; /* index into fixed buffers, if used */
+ __u64 __pad2[3];
+ };
+};
+
+/*
+ * sqe->flags
+ */
+#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
+#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
+#define IOSQE_IO_LINK (1U << 2) /* next IO depends on this one */
+
+/*
+ * io_uring_setup() flags
+ */
+#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
+#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
+#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
+
+#define IORING_OP_NOP 0
+#define IORING_OP_READV 1
+#define IORING_OP_WRITEV 2
+#define IORING_OP_FSYNC 3
+#define IORING_OP_READ_FIXED 4
+#define IORING_OP_WRITE_FIXED 5
+#define IORING_OP_POLL_ADD 6
+#define IORING_OP_POLL_REMOVE 7
+#define IORING_OP_SYNC_FILE_RANGE 8
+#define IORING_OP_SENDMSG 9
+#define IORING_OP_RECVMSG 10
+
+/*
+ * sqe->fsync_flags
+ */
+#define IORING_FSYNC_DATASYNC (1U << 0)
+
+/*
+ * IO completion data structure (Completion Queue Entry)
+ */
+struct io_uring_cqe {
+ __u64 user_data; /* sqe->data submission passed back */
+ __s32 res; /* result code for this event */
+ __u32 flags;
+};
+
+/*
+ * Magic offsets for the application to mmap the data it needs
+ */
+#define IORING_OFF_SQ_RING 0ULL
+#define IORING_OFF_CQ_RING 0x8000000ULL
+#define IORING_OFF_SQES 0x10000000ULL
+
+/*
+ * Filled with the offset for mmap(2)
+ */
+struct io_sqring_offsets {
+ __u32 head;
+ __u32 tail;
+ __u32 ring_mask;
+ __u32 ring_entries;
+ __u32 flags;
+ __u32 dropped;
+ __u32 array;
+ __u32 resv1;
+ __u64 resv2;
+};
+
+/*
+ * sq_ring->flags
+ */
+#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
+
+struct io_cqring_offsets {
+ __u32 head;
+ __u32 tail;
+ __u32 ring_mask;
+ __u32 ring_entries;
+ __u32 overflow;
+ __u32 cqes;
+ __u64 resv[2];
+};
+
+/*
+ * io_uring_enter(2) flags
+ */
+#define IORING_ENTER_GETEVENTS (1U << 0)
+#define IORING_ENTER_SQ_WAKEUP (1U << 1)
+
+/*
+ * Passed in for io_uring_setup(2). Copied back with updated info on success
+ */
+struct io_uring_params {
+ __u32 sq_entries;
+ __u32 cq_entries;
+ __u32 flags;
+ __u32 sq_thread_cpu;
+ __u32 sq_thread_idle;
+ __u32 resv[5];
+ struct io_sqring_offsets sq_off;
+ struct io_cqring_offsets cq_off;
+};
+
+/*
+ * io_uring_register(2) opcodes and arguments
+ */
+#define IORING_REGISTER_BUFFERS 0
+#define IORING_UNREGISTER_BUFFERS 1
+#define IORING_REGISTER_FILES 2
+#define IORING_UNREGISTER_FILES 3
+#define IORING_REGISTER_EVENTFD 4
+#define IORING_UNREGISTER_EVENTFD 5
+
+#endif