summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-09-26 06:41:05 -0600
committerJens Axboe <axboe@kernel.dk>2020-07-07 09:02:24 -0600
commit4aad4291b7f7b346fd71b0a69ab74bbf2723ca22 (patch)
treef8e89941a3774261f0841a83c9c04ec67a46edc0
parentcd947148112b7bd71f1478d1c1b6c75eeaa16f6e (diff)
downloadliburing-4aad4291b7f7b346fd71b0a69ab74bbf2723ca22.tar.gz
liburing-4aad4291b7f7b346fd71b0a69ab74bbf2723ca22.tar.bz2
Add libaio wrapper
We can wrap the aio interface with io_uring. Very basic wrapper, but seems to work. To test with fio, for example, just change the following in configure where it adds libaio: LIBS="-laio $LIBS" to LIBS="-luring $LIBS" which will cause fio to link with just liburing and use the compat wrappers instead of actual aio. This should be good enough for testing, though I'm sure there corner cases to iron out. Most notably this doesn't support for aio user mapped ring for completions. We could add support for that, but that requires kernel side additions too. Totally doable, but not sure how far to take this instead of just converting folks to use io_uring instead. Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--src/Makefile2
-rw-r--r--src/compat_libaio.c276
-rw-r--r--src/compat_libaio.h154
-rw-r--r--src/liburing.map8
4 files changed, 439 insertions, 1 deletions
diff --git a/src/Makefile b/src/Makefile
index 44a95ad..c6a65ff 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -30,7 +30,7 @@ endif
all: $(all_targets)
-liburing_srcs := setup.c queue.c syscall.c register.c
+liburing_srcs := setup.c queue.c syscall.c register.c compat_libaio.c
liburing_objs := $(patsubst %.c,%.ol,$(liburing_srcs))
liburing_sobjs := $(patsubst %.c,%.os,$(liburing_srcs))
diff --git a/src/compat_libaio.c b/src/compat_libaio.c
new file mode 100644
index 0000000..250460f
--- /dev/null
+++ b/src/compat_libaio.c
@@ -0,0 +1,276 @@
+/*
+ * Simple wrapper for libaio support
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "liburing/compat.h"
+#include "liburing/io_uring.h"
+#include "liburing.h"
+
+struct iocb_data {
+ void *data;
+ struct iocb *iocb;
+ struct iovec vec;
+ struct iocb_data *next;
+};
+
+struct io_context {
+ struct io_uring ring;
+ struct iocb_data *data;
+ struct iocb_data *free_list;
+};
+
+#include "compat_libaio.h"
+
+static int iocb_to_sqe(io_context_t ctx, struct io_uring_sqe *sqe,
+ struct iocb *iocb)
+{
+ unsigned fsync_flags = 0;
+ struct iocb_data *data;
+ int is_write = 1;
+
+ data = ctx->free_list;
+ ctx->free_list = data->next;
+
+ switch (iocb->aio_lio_opcode) {
+ case IO_CMD_PREAD:
+ is_write = 0;
+ case IO_CMD_PWRITE: {
+ struct iovec *vec = &data->vec;
+
+ vec->iov_base = iocb->u.c.buf;
+ vec->iov_len = iocb->u.c.nbytes;
+ if (is_write)
+ io_uring_prep_writev(sqe, iocb->aio_fildes, vec, 1,
+ iocb->u.c.offset);
+ else
+ io_uring_prep_readv(sqe, iocb->aio_fildes, vec, 1,
+ iocb->u.c.offset);
+ sqe->ioprio = iocb->aio_reqprio;
+ sqe->rw_flags = iocb->aio_rw_flags;
+ break;
+ }
+ case IO_CMD_FDSYNC:
+ fsync_flags = IORING_FSYNC_DATASYNC;
+ case IO_CMD_FSYNC:
+ io_uring_prep_fsync(sqe, iocb->aio_fildes, fsync_flags);
+ break;
+ case IO_CMD_POLL:
+ io_uring_prep_poll_add(sqe, iocb->aio_fildes,
+ iocb->u.poll.events);
+ break;
+ case IO_CMD_NOOP:
+ /* Don't turn this into an io_uring nop, as aio errors them */
+ return -EINVAL;
+ case IO_CMD_PREADV:
+ is_write = 0;
+ case IO_CMD_PWRITEV:
+ if (is_write)
+ io_uring_prep_writev(sqe, iocb->aio_fildes,
+ iocb->u.v.vec, iocb->u.v.nr,
+ iocb->u.v.offset);
+ else
+ io_uring_prep_readv(sqe, iocb->aio_fildes,
+ iocb->u.v.vec, iocb->u.v.nr,
+ iocb->u.v.offset);
+ sqe->ioprio = iocb->aio_reqprio;
+ sqe->rw_flags = iocb->aio_rw_flags;
+ break;
+ default:
+ fprintf(stderr, "aio: unknown op %d\n", iocb->aio_lio_opcode);
+ return -EINVAL;
+ }
+
+ data->data = iocb->data;
+ data->iocb = iocb;
+ io_uring_sqe_set_data(sqe, data);
+ return 0;
+}
+
+int io_submit(io_context_t ctx, long nr, struct iocb *iocbs[])
+{
+ struct io_uring *ring = &ctx->ring;
+ struct io_uring_sqe *sqe;
+ int i, ret;
+
+ if (!nr)
+ return 0;
+
+ ret = 0;
+ for (i = 0; i < nr; i++) {
+ sqe = io_uring_get_sqe(ring);
+ ret = iocb_to_sqe(ctx, sqe, iocbs[i]);
+ if (ret) {
+ /* should probably be a helper */
+ ring->sq.sqe_tail--;
+ break;
+ }
+ }
+
+ if (i)
+ return io_uring_submit(ring);
+
+ return ret;
+}
+
+static int __io_getevents(io_context_t ctx, long min_nr, long nr,
+ struct io_event *events, struct __kernel_timespec *ts,
+ sigset_t *sigmask)
+{
+ struct io_uring *ring = &ctx->ring;
+ struct io_uring_cqe *cqe;
+ int ret, total = 0;
+
+ if (!nr)
+ return 0;
+ if (min_nr > nr)
+ min_nr = nr;
+
+ ret = 0;
+ while (nr) {
+ struct io_event *ev = &events[total];
+ struct iocb_data *data;
+
+ if (!min_nr) {
+ ret = io_uring_peek_cqe(ring, &cqe);
+ if (ret)
+ break;
+ } else {
+ ret = io_uring_wait_cqes(ring, &cqe, min_nr, ts, sigmask);
+ if (ret)
+ break;
+ }
+
+ data = io_uring_cqe_get_data(cqe);
+ ev->data = data->data;
+ ev->obj = data->iocb;
+ ev->res = cqe->res;
+ ev->res2 = 0;
+ io_uring_cqe_seen(ring, cqe);
+ data->next = ctx->free_list;
+ ctx->free_list = data;
+ total++;
+ nr--;
+ if (min_nr)
+ min_nr--;
+ };
+
+ return total ? total : ret;
+}
+
+int io_getevents(io_context_t ctx, long min_nr, long nr,
+ struct io_event *events, struct timespec *ts)
+{
+ struct __kernel_timespec kts, *ktsptr = NULL;
+
+ if (ts) {
+ kts.tv_sec = ts->tv_sec;
+ kts.tv_nsec = ts->tv_nsec;
+ ktsptr = &kts;
+ }
+ return __io_getevents(ctx, min_nr, nr, events, ktsptr, NULL);
+}
+
+int io_pgetevents(io_context_t ctx, long min_nr, long nr,
+ struct io_event *events, struct timespec *ts,
+ sigset_t *sigmask)
+{
+ struct __kernel_timespec kts, *ktsptr = NULL;
+
+ if (ts) {
+ kts.tv_sec = ts->tv_sec;
+ kts.tv_nsec = ts->tv_nsec;
+ ktsptr = &kts;
+ }
+
+ return __io_getevents(ctx, min_nr, nr, events, ktsptr, sigmask);
+}
+
+/*
+ * We should implement this for POLL requests at least, those are the only
+ * requests where it makes sense since nothing else is supported for libaio.
+ * We could make this better and make it work in general, since io_uring
+ * does support cancel.
+ */
+int io_cancel(io_context_t ctx, struct iocb *iocb, struct io_event *evt)
+{
+ return -EINVAL;
+}
+
+static int roundup_pow2(int val)
+{
+ int r;
+
+ if (!val)
+ return 0;
+
+ r = 32;
+ if (!(val & 0xffff0000u)) {
+ val <<= 16;
+ r -= 16;
+ }
+ if (!(val & 0xff000000u)) {
+ val <<= 8;
+ r -= 8;
+ }
+ if (!(val & 0xf0000000u)) {
+ val <<= 4;
+ r -= 4;
+ }
+ if (!(val & 0xc0000000u)) {
+ val <<= 2;
+ r -= 2;
+ }
+ if (!(val & 0x80000000u))
+ r -= 1;
+
+ return 1U << (r - 1);
+}
+
+int io_queue_init(int maxevents, io_context_t *ctxptr)
+{
+ io_context_t ctx;
+ struct io_uring *ring;
+ int i, ret;
+
+ maxevents = roundup_pow2(maxevents);
+
+ ctx = calloc(1, sizeof(*ctx));
+ ring = &ctx->ring;
+ ret = io_uring_queue_init(maxevents, ring, 0);
+ if (ret) {
+ free(ctx);
+ *ctxptr = NULL;
+ return ret;
+ }
+
+ ctx->data = malloc(maxevents * sizeof(struct iocb_data));
+ for (i = 0; i < maxevents; i++) {
+ struct iocb_data *data = &ctx->data[i];
+
+ data->next = ctx->free_list;
+ ctx->free_list = data;
+ }
+ *ctxptr = ctx;
+ return 0;
+}
+
+int io_setup(unsigned maxevents, io_context_t *ctxp)
+{
+ return io_queue_init(maxevents, ctxp);
+}
+
+int io_destroy(io_context_t ctx)
+{
+ struct io_uring *ring = &ctx->ring;
+
+ io_uring_queue_exit(ring);
+ free(ctx->data);
+ free(ctx);
+ return 0;
+}
diff --git a/src/compat_libaio.h b/src/compat_libaio.h
new file mode 100644
index 0000000..cdc637b
--- /dev/null
+++ b/src/compat_libaio.h
@@ -0,0 +1,154 @@
+/* /usr/include/libaio.h
+ *
+ * Copyright 2000,2001,2002 Red Hat, Inc.
+ *
+ * Written by Benjamin LaHaise <bcrl@redhat.com>
+ *
+ * libaio Linux async I/O interface
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef __LIBAIO_H
+#define __LIBAIO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <signal.h>
+
+struct timespec;
+struct sockaddr;
+struct iovec;
+
+typedef struct io_context *io_context_t;
+
+typedef enum io_iocb_cmd {
+ IO_CMD_PREAD = 0,
+ IO_CMD_PWRITE = 1,
+
+ IO_CMD_FSYNC = 2,
+ IO_CMD_FDSYNC = 3,
+
+ IO_CMD_POLL = 5,
+ IO_CMD_NOOP = 6,
+ IO_CMD_PREADV = 7,
+ IO_CMD_PWRITEV = 8,
+} io_iocb_cmd_t;
+
+/* little endian, 32 bits */
+#if defined(__i386__) || (defined(__arm__) && !defined(__ARMEB__)) || \
+ defined(__sh__) || defined(__bfin__) || defined(__MIPSEL__) || \
+ defined(__cris__) || (defined(__riscv) && __riscv_xlen == 32) || \
+ (defined(__GNUC__) && defined(__BYTE_ORDER__) && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG__ == 4)
+#define PADDED(x, y) x; unsigned y
+#define PADDEDptr(x, y) x; unsigned y
+#define PADDEDul(x, y) unsigned long x; unsigned y
+
+/* little endian, 64 bits */
+#elif defined(__ia64__) || defined(__x86_64__) || defined(__alpha__) || \
+ (defined(__aarch64__) && defined(__AARCH64EL__)) || \
+ (defined(__riscv) && __riscv_xlen == 64) || \
+ (defined(__GNUC__) && defined(__BYTE_ORDER__) && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && __SIZEOF_LONG__ == 8)
+#define PADDED(x, y) x, y
+#define PADDEDptr(x, y) x
+#define PADDEDul(x, y) unsigned long x
+
+/* big endian, 64 bits */
+#elif defined(__powerpc64__) || defined(__s390x__) || \
+ (defined(__sparc__) && defined(__arch64__)) || \
+ (defined(__aarch64__) && defined(__AARCH64EB__)) || \
+ (defined(__GNUC__) && defined(__BYTE_ORDER__) && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_LONG__ == 8)
+#define PADDED(x, y) unsigned y; x
+#define PADDEDptr(x,y) x
+#define PADDEDul(x, y) unsigned long x
+
+/* big endian, 32 bits */
+#elif defined(__PPC__) || defined(__s390__) || \
+ (defined(__arm__) && defined(__ARMEB__)) || \
+ defined(__sparc__) || defined(__MIPSEB__) || defined(__m68k__) || \
+ defined(__hppa__) || defined(__frv__) || defined(__avr32__) || \
+ (defined(__GNUC__) && defined(__BYTE_ORDER__) && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && __SIZEOF_LONG__ == 4)
+#define PADDED(x, y) unsigned y; x
+#define PADDEDptr(x, y) unsigned y; x
+#define PADDEDul(x, y) unsigned y; unsigned long x
+
+#else
+#error endian?
+#endif
+
+struct io_iocb_poll {
+ PADDED(int events, __pad1);
+}; /* result code is the set of result flags or -'ve errno */
+
+struct io_iocb_sockaddr {
+ struct sockaddr *addr;
+ int len;
+}; /* result code is the length of the sockaddr, or -'ve errno */
+
+struct io_iocb_common {
+ PADDEDptr(void *buf, __pad1);
+ PADDEDul(nbytes, __pad2);
+ long long offset;
+ long long __pad3;
+ unsigned flags;
+ unsigned resfd;
+}; /* result code is the amount read or -'ve errno */
+
+struct io_iocb_vector {
+ const struct iovec *vec;
+ int nr;
+ long long offset;
+}; /* result code is the amount read or -'ve errno */
+
+struct iocb {
+ PADDEDptr(void *data, __pad1); /* Return in the io completion event */
+ /* key: For use in identifying io requests */
+ /* aio_rw_flags: RWF_* flags (such as RWF_NOWAIT) */
+ PADDED(unsigned key, aio_rw_flags);
+
+ short aio_lio_opcode;
+ short aio_reqprio;
+ int aio_fildes;
+
+ union {
+ struct io_iocb_common c;
+ struct io_iocb_vector v;
+ struct io_iocb_poll poll;
+ struct io_iocb_sockaddr saddr;
+ } u;
+};
+
+struct io_event {
+ PADDEDptr(void *data, __pad1);
+ PADDEDptr(struct iocb *obj, __pad2);
+ PADDEDul(res, __pad3);
+ PADDEDul(res2, __pad4);
+};
+
+#undef PADDED
+#undef PADDEDptr
+#undef PADDEDul
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __LIBAIO_H */
diff --git a/src/liburing.map b/src/liburing.map
index 38bd558..1c432da 100644
--- a/src/liburing.map
+++ b/src/liburing.map
@@ -56,4 +56,12 @@ LIBURING_0.6 {
} LIBURING_0.5;
LIBURING_0.7 {
+ global:
+ io_queue_init;
+ io_setup;
+ io_destroy;
+ io_submit;
+ io_getevents;
+ io_pgetevents;
+ io_cancel;
} LIBURING_0.6;