svcrdma: Handle device removal outside of the CM event handler
authorChuck Lever <chuck.lever@oracle.com>
Mon, 29 Jul 2024 20:52:32 +0000 (16:52 -0400)
committerChuck Lever <chuck.lever@oracle.com>
Fri, 20 Sep 2024 23:31:03 +0000 (19:31 -0400)
Synchronously wait for all disconnects to complete to ensure the
transports have divested all hardware resources before the
underlying RDMA device can safely be removed.

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
include/linux/sunrpc/svc_rdma.h
include/trace/events/rpcrdma.h
net/sunrpc/xprtrdma/svc_rdma_transport.c

index d33bab33099ab0fe4db1a889117307510e1035fa..619fc0bd837a8e3825f922db9d66c3c4674d9813 100644 (file)
@@ -48,6 +48,7 @@
 #include <linux/sunrpc/rpc_rdma.h>
 #include <linux/sunrpc/rpc_rdma_cid.h>
 #include <linux/sunrpc/svc_rdma_pcl.h>
+#include <linux/sunrpc/rdma_rn.h>
 
 #include <linux/percpu_counter.h>
 #include <rdma/ib_verbs.h>
@@ -76,6 +77,7 @@ struct svcxprt_rdma {
        struct svc_xprt      sc_xprt;           /* SVC transport structure */
        struct rdma_cm_id    *sc_cm_id;         /* RDMA connection id */
        struct list_head     sc_accept_q;       /* Conn. waiting accept */
+       struct rpcrdma_notification sc_rn;      /* removal notification */
        int                  sc_ord;            /* RDMA read limit */
        int                  sc_max_send_sges;
        bool                 sc_snd_w_inv;      /* OK to use Send With Invalidate */
index a96a985c49b3d76338d6a9ec55ff150fb2bf93c1..e6a72646c50798c6ed6eaf7165e85bfc556b88ab 100644 (file)
@@ -2172,6 +2172,29 @@ TRACE_EVENT(svcrdma_qp_error,
        )
 );
 
+TRACE_EVENT(svcrdma_device_removal,
+       TP_PROTO(
+               const struct rdma_cm_id *id
+       ),
+
+       TP_ARGS(id),
+
+       TP_STRUCT__entry(
+               __string(name, id->device->name)
+               __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+       ),
+
+       TP_fast_assign(
+               __assign_str(name);
+               memcpy(__entry->addr, &id->route.addr.dst_addr,
+                      sizeof(struct sockaddr_in6));
+       ),
+
+       TP_printk("device %s to be removed, disconnecting %pISpc\n",
+               __get_str(name), __entry->addr
+       )
+);
+
 DECLARE_EVENT_CLASS(svcrdma_sendqueue_class,
        TP_PROTO(
                const struct svcxprt_rdma *rdma,
index f15750cacacff4ee7b32e95f08583e73ace0e950..581cc5ed7c0cb59d428b70936de708b0ea2221e1 100644 (file)
@@ -339,7 +339,6 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
                svc_xprt_enqueue(xprt);
                break;
        case RDMA_CM_EVENT_DISCONNECTED:
-       case RDMA_CM_EVENT_DEVICE_REMOVAL:
                svc_xprt_deferred_close(xprt);
                break;
        default:
@@ -384,6 +383,16 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
        return &cma_xprt->sc_xprt;
 }
 
+static void svc_rdma_xprt_done(struct rpcrdma_notification *rn)
+{
+       struct svcxprt_rdma *rdma = container_of(rn, struct svcxprt_rdma,
+                                                sc_rn);
+       struct rdma_cm_id *id = rdma->sc_cm_id;
+
+       trace_svcrdma_device_removal(id);
+       svc_xprt_close(&rdma->sc_xprt);
+}
+
 /*
  * This is the xpo_recvfrom function for listening endpoints. Its
  * purpose is to accept incoming connections. The CMA callback handler
@@ -425,6 +434,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
        dev = newxprt->sc_cm_id->device;
        newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
 
+       if (rpcrdma_rn_register(dev, &newxprt->sc_rn, svc_rdma_xprt_done))
+               goto errout;
+
        newxprt->sc_max_req_size = svcrdma_max_req_size;
        newxprt->sc_max_requests = svcrdma_max_requests;
        newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
@@ -580,6 +592,7 @@ static void __svc_rdma_free(struct work_struct *work)
 {
        struct svcxprt_rdma *rdma =
                container_of(work, struct svcxprt_rdma, sc_work);
+       struct ib_device *device = rdma->sc_cm_id->device;
 
        /* This blocks until the Completion Queues are empty */
        if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -608,6 +621,7 @@ static void __svc_rdma_free(struct work_struct *work)
        /* Destroy the CM ID */
        rdma_destroy_id(rdma->sc_cm_id);
 
+       rpcrdma_rn_unregister(device, &rdma->sc_rn);
        kfree(rdma);
 }