xsk: allow remap of fill and/or completion rings
authorNuno Gonçalves <nunog@fr24.com>
Fri, 24 Mar 2023 10:02:22 +0000 (10:02 +0000)
committerAlexei Starovoitov <ast@kernel.org>
Sun, 26 Mar 2023 04:07:35 +0000 (21:07 -0700)
The remap of fill and completion rings was frowned upon as they
control the usage of UMEM which does not support concurrent use.
At the same time this would disallow the remap of these rings
into another process.

A possible use case is that the user wants to transfer the socket/
UMEM ownership to another process (via SYS_pidfd_getfd) and so
would need to also remap these rings.

This will have no impact on current usages and just relaxes the
remap limitation.

Signed-off-by: Nuno Gonçalves <nunog@fr24.com>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/r/20230324100222.13434-1-nunog@fr24.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
net/xdp/xsk.c

index 2ac58b282b5eb29c8358f8366df4ff3e7c937c4d..cc1e7f15fa731483b10e5000e25eaf77a15316cb 100644 (file)
@@ -1301,9 +1301,10 @@ static int xsk_mmap(struct file *file, struct socket *sock,
        loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
        unsigned long size = vma->vm_end - vma->vm_start;
        struct xdp_sock *xs = xdp_sk(sock->sk);
+       int state = READ_ONCE(xs->state);
        struct xsk_queue *q = NULL;
 
-       if (READ_ONCE(xs->state) != XSK_READY)
+       if (state != XSK_READY && state != XSK_BOUND)
                return -EBUSY;
 
        if (offset == XDP_PGOFF_RX_RING) {
@@ -1314,9 +1315,11 @@ static int xsk_mmap(struct file *file, struct socket *sock,
                /* Matches the smp_wmb() in XDP_UMEM_REG */
                smp_rmb();
                if (offset == XDP_UMEM_PGOFF_FILL_RING)
-                       q = READ_ONCE(xs->fq_tmp);
+                       q = state == XSK_READY ? READ_ONCE(xs->fq_tmp) :
+                                                READ_ONCE(xs->pool->fq);
                else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
-                       q = READ_ONCE(xs->cq_tmp);
+                       q = state == XSK_READY ? READ_ONCE(xs->cq_tmp) :
+                                                READ_ONCE(xs->pool->cq);
        }
 
        if (!q)