From: Chuck Lever Date: Tue, 4 Jun 2024 19:45:25 +0000 (-0400) Subject: xprtrdma: Handle device removal outside of the CM event handler X-Git-Tag: io_uring-6.11-20240722~29^2~57 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=3f4eb9ff923413cdb4c7e171c06d3564f6286712;p=linux-2.6-block.git xprtrdma: Handle device removal outside of the CM event handler Wait for all disconnects to complete to ensure the transport has divested all of its hardware resources before the underlying RDMA device can be removed. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ecdaf088219d..ba2d6a0e41cc 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -669,6 +669,29 @@ TRACE_EVENT(xprtrdma_inline_thresh, DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); +TRACE_EVENT(xprtrdma_device_removal, + TP_PROTO( + const struct rdma_cm_id *id + ), + + TP_ARGS(id), + + TP_STRUCT__entry( + __string(name, id->device->name) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __assign_str(name); + memcpy(__entry->addr, &id->route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("device %s to be removed, disconnecting %pISpc\n", + __get_str(name), __entry->addr + ) +); + DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); TRACE_EVENT(xprtrdma_op_connect, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 39319539d8af..2904454adb06 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -222,7 +222,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, static int rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { - struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; struct rpcrdma_ep *ep = id->context; might_sleep(); @@ -241,14 +240,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->re_async_rc = -ENETUNREACH; complete(&ep->re_done); return 0; - case RDMA_CM_EVENT_DEVICE_REMOVAL: - pr_info("rpcrdma: removing device %s for %pISpc\n", - ep->re_id->device->name, sap); - switch (xchg(&ep->re_connect_status, -ENODEV)) { - case 0: goto wake_connect_worker; - case 1: goto disconnected; - } - return 0; case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; goto disconnected; @@ -284,6 +275,14 @@ disconnected: return 0; } +static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn) +{ + struct rpcrdma_ep *ep = container_of(rn, struct rpcrdma_ep, re_rn); + + trace_xprtrdma_device_removal(ep->re_id); + xprt_force_disconnect(ep->re_xprt); +} + static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep) { @@ -323,6 +322,10 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, if (rc) goto out; + rc = rpcrdma_rn_register(id->device, &ep->re_rn, rpcrdma_ep_removal_done); + if (rc) + goto out; + return id; out: @@ -350,6 +353,8 @@ static void rpcrdma_ep_destroy(struct kref *kref) ib_dealloc_pd(ep->re_pd); ep->re_pd = NULL; + rpcrdma_rn_unregister(ep->re_id->device, &ep->re_rn); + kfree(ep); module_put(THIS_MODULE); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index da409450dfc0..341725c66ec8 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -56,6 +56,7 @@ #include /* completion IDs */ #include /* RPC/RDMA protocol */ #include /* xprt parameters */ +#include /* removal notifications */ #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ @@ -92,6 +93,7 @@ struct rpcrdma_ep { struct rpcrdma_connect_private re_cm_private; struct rdma_conn_param re_remote_cma; + struct rpcrdma_notification re_rn; int re_receive_count; unsigned int re_max_requests; /* depends on device */ unsigned int re_inline_send; /* negotiated */