summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2021-04-10 16:39:38 -0600
committerJens Axboe <axboe@kernel.dk>2021-04-10 16:39:38 -0600
commitea0fd51b78b3c089add37e8c4ee61abd460beb7a (patch)
treed9891448925c7d77de78047881d770e832876aab
parentd3eb6af47886656e811e8f4e274ac361d0efa5bf (diff)
downloadliburing-mlock-size.tar.gz
liburing-mlock-size.tar.bz2
setup: provide helpers to inquire about necessary mlock sizesmlock-size
5.12 and up will not require rlimit memlock allocations, but older ones do. It can be hard for applications to know how much they need without trial and error, so provide a set of helpers for them to query it instead. It's a bit ugly as it's mostly duplication of what the kernel does, but as that code is now static and won't be updated as no new kernels require memlock, that's probably not a huge issue. Fixes: https://github.com/axboe/liburing/issues/246 Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--src/include/liburing.h3
-rw-r--r--src/liburing.map6
-rw-r--r--src/setup.c120
3 files changed, 129 insertions, 0 deletions
diff --git a/src/include/liburing.h b/src/include/liburing.h
index 5b96e02..a111c54 100644
--- a/src/include/liburing.h
+++ b/src/include/liburing.h
@@ -655,6 +655,9 @@ static inline int io_uring_wait_cqe(struct io_uring *ring,
return io_uring_wait_cqe_nr(ring, cqe_ptr, 1);
}
+ssize_t io_uring_mlock_size(unsigned entries, unsigned flags);
+ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/liburing.map b/src/liburing.map
index dfdc7ba..012ac4e 100644
--- a/src/liburing.map
+++ b/src/liburing.map
@@ -31,3 +31,9 @@ LIBURING_2.0 {
local:
*;
};
+
+LIBURING_2.1 {
+ global:
+ io_uring_mlock_size_params;
+ io_uring_mlock_size;
+} LIBURING_2.0;
diff --git a/src/setup.c b/src/setup.c
index c01a933..111db61 100644
--- a/src/setup.c
+++ b/src/setup.c
@@ -217,3 +217,123 @@ void io_uring_free_probe(struct io_uring_probe *probe)
{
free(probe);
}
+
+static int __fls(int x)
+{
+ int r = 32;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xffff0000u)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xff000000u)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xf0000000u)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xc0000000u)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000u)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+static unsigned roundup_pow2(unsigned depth)
+{
+ return 1UL << __fls(depth - 1);
+}
+
+static size_t npages(size_t size, unsigned page_size)
+{
+ size--;
+ size /= page_size;
+ return __fls(size);
+}
+
+#define KRING_SIZE 320
+
+static size_t rings_size(unsigned entries, unsigned cq_entries, unsigned page_size)
+{
+ size_t pages, sq_size, cq_size;
+
+ cq_size = KRING_SIZE;
+ cq_size += cq_entries * sizeof(struct io_uring_cqe);
+ cq_size = (cq_size + 63) & ~63UL;
+ pages = (size_t) 1 << npages(cq_size, page_size);
+
+ sq_size = sizeof(struct io_uring_sqe) * entries;
+ pages += (size_t) 1 << npages(sq_size, page_size);
+ return pages * page_size;
+}
+
+#define KERN_MAX_ENTRIES 32768
+#define KERN_MAX_CQ_ENTRIES (2 * KERN_MAX_ENTRIES)
+
+ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p)
+{
+ struct io_uring_params lp = { };
+ struct io_uring ring;
+ unsigned cq_entries;
+ long page_size;
+ ssize_t ret;
+
+ ret = io_uring_queue_init_params(entries, &ring, &lp);
+ if (ret < 0)
+ return ret;
+
+ io_uring_queue_exit(&ring);
+
+ /*
+ * Native workers imply using cgroup memory accounting, and hence no
+ * memlock memory is needed for the ring allocations.
+ */
+ if (lp.features & IORING_FEAT_NATIVE_WORKERS)
+ return 0;
+
+ if (!entries)
+ return -EINVAL;
+ if (entries > KERN_MAX_ENTRIES) {
+ if (!(p->flags & IORING_SETUP_CLAMP))
+ return -EINVAL;
+ entries = KERN_MAX_ENTRIES;
+ }
+
+ entries = roundup_pow2(entries);
+ if (p->flags & IORING_SETUP_CQSIZE) {
+ if (!p->cq_entries)
+ return -EINVAL;
+ cq_entries = p->cq_entries;
+ if (cq_entries > KERN_MAX_CQ_ENTRIES) {
+ if (!(p->flags & IORING_SETUP_CLAMP))
+ return -EINVAL;
+ cq_entries = KERN_MAX_CQ_ENTRIES;
+ }
+ cq_entries = roundup_pow2(cq_entries);
+ if (cq_entries < entries)
+ return -EINVAL;
+ } else {
+ cq_entries = 2 * entries;
+ }
+
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 0)
+ page_size = 4096;
+
+ return rings_size(entries, cq_entries, page_size);
+}
+
+ssize_t io_uring_mlock_size(unsigned entries, unsigned flags)
+{
+ struct io_uring_params p = { .flags = flags, };
+
+ return io_uring_mlock_size_params(entries, &p);
+}