xprtrdma: Avoid Send Queue wrapping
authorChuck Lever <chuck.lever@oracle.com>
Mon, 19 Apr 2021 18:03:25 +0000 (14:03 -0400)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Mon, 26 Apr 2021 13:25:43 +0000 (09:25 -0400)
Send WRs can be signalled or unsignalled. A signalled Send WR
always has a matching Send completion, while a unsignalled Send
has a completion only if the Send WR fails.

xprtrdma has a Send account mechanism that is designed to reduce
the number of signalled Send WRs. This in turn mitigates the
interrupt rate of the underlying device.

RDMA consumers can't leave all Sends unsignaled, however, because
providers rely on Send completions to maintain their Send Queue head
and tail pointers. xprtrdma counts the number of unsignaled Send WRs
that have been posted to ensure that Sends are signalled often
enough to prevent the Send Queue from wrapping.

This mechanism neglected to account for FastReg WRs, which are
posted on the Send Queue but never signalled. As a result, the
Send Queue wrapped on occasion, resulting in duplication completions
of FastReg and LocalInv WRs.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/verbs.c

index 951ae20485f34cac173b09134034c9f17db2f1ab..43a412ea337ad25d5d05856fb214895f2540b405 100644 (file)
@@ -390,11 +390,13 @@ static void frwr_cid_init(struct rpcrdma_ep *ep,
  */
 int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 {
+       struct ib_send_wr *post_wr, *send_wr = &req->rl_wr;
        struct rpcrdma_ep *ep = r_xprt->rx_ep;
-       struct ib_send_wr *post_wr;
        struct rpcrdma_mr *mr;
+       unsigned int num_wrs;
 
-       post_wr = &req->rl_wr;
+       num_wrs = 1;
+       post_wr = send_wr;
        list_for_each_entry(mr, &req->rl_registered, mr_list) {
                struct rpcrdma_frwr *frwr;
 
@@ -409,8 +411,19 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
                frwr->fr_regwr.wr.send_flags = 0;
 
                post_wr = &frwr->fr_regwr.wr;
+               ++num_wrs;
        }
 
+       if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
+               send_wr->send_flags |= IB_SEND_SIGNALED;
+               ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
+                                         num_wrs - ep->re_send_count);
+       } else {
+               send_wr->send_flags &= ~IB_SEND_SIGNALED;
+               ep->re_send_count -= num_wrs;
+       }
+
+       trace_xprtrdma_post_send(req);
        return ib_post_send(ep->re_id->qp, post_wr, NULL);
 }
 
index f97b03129d99a327edf8ca312f38e0c23d9cbfde..72b24dca96c1ce712f0d6e5c64c0934566fec392 100644 (file)
@@ -1365,21 +1365,7 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
  */
 int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 {
-       struct ib_send_wr *send_wr = &req->rl_wr;
-       struct rpcrdma_ep *ep = r_xprt->rx_ep;
-       int rc;
-
-       if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) {
-               send_wr->send_flags |= IB_SEND_SIGNALED;
-               ep->re_send_count = ep->re_send_batch;
-       } else {
-               send_wr->send_flags &= ~IB_SEND_SIGNALED;
-               --ep->re_send_count;
-       }
-
-       trace_xprtrdma_post_send(req);
-       rc = frwr_send(r_xprt, req);
-       if (rc)
+       if (frwr_send(r_xprt, req))
                return -ENOTCONN;
        return 0;
 }