Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
5d252f90 | 2 | /* |
4201c746 | 3 | * Copyright (c) 2015-2018 Oracle. All rights reserved. |
5d252f90 CL |
4 | * |
5 | * Support for backward direction RPCs on RPC/RDMA (server-side). | |
6 | */ | |
7 | ||
f39b2dde | 8 | #include <linux/module.h> |
bd2abef3 | 9 | |
5d252f90 | 10 | #include <linux/sunrpc/svc_rdma.h> |
bd2abef3 | 11 | |
5d252f90 | 12 | #include "xprt_rdma.h" |
bd2abef3 | 13 | #include <trace/events/rpcrdma.h> |
5d252f90 CL |
14 | |
15 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | |
16 | ||
17 | #undef SVCRDMA_BACKCHANNEL_DEBUG | |
18 | ||
f5821c76 CL |
19 | /** |
20 | * svc_rdma_handle_bc_reply - Process incoming backchannel reply | |
21 | * @xprt: controlling backchannel transport | |
22 | * @rdma_resp: pointer to incoming transport header | |
23 | * @rcvbuf: XDR buffer into which to decode the reply | |
24 | * | |
25 | * Returns: | |
26 | * %0 if @rcvbuf is filled in, xprt_complete_rqst called, | |
27 | * %-EAGAIN if server should call ->recvfrom again. | |
28 | */ | |
29 | int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, | |
5d252f90 CL |
30 | struct xdr_buf *rcvbuf) |
31 | { | |
32 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
33 | struct kvec *dst, *src = &rcvbuf->head[0]; | |
34 | struct rpc_rqst *req; | |
5d252f90 CL |
35 | u32 credits; |
36 | size_t len; | |
37 | __be32 xid; | |
38 | __be32 *p; | |
39 | int ret; | |
40 | ||
41 | p = (__be32 *)src->iov_base; | |
42 | len = src->iov_len; | |
f5821c76 | 43 | xid = *rdma_resp; |
5d252f90 CL |
44 | |
45 | #ifdef SVCRDMA_BACKCHANNEL_DEBUG | |
46 | pr_info("%s: xid=%08x, length=%zu\n", | |
47 | __func__, be32_to_cpu(xid), len); | |
48 | pr_info("%s: RPC/RDMA: %*ph\n", | |
f5821c76 | 49 | __func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp); |
5d252f90 CL |
50 | pr_info("%s: RPC: %*ph\n", |
51 | __func__, (int)len, p); | |
52 | #endif | |
53 | ||
54 | ret = -EAGAIN; | |
55 | if (src->iov_len < 24) | |
56 | goto out_shortreply; | |
57 | ||
ce7c252a | 58 | spin_lock(&xprt->recv_lock); |
5d252f90 CL |
59 | req = xprt_lookup_rqst(xprt, xid); |
60 | if (!req) | |
61 | goto out_notfound; | |
62 | ||
63 | dst = &req->rq_private_buf.head[0]; | |
64 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); | |
65 | if (dst->iov_len < len) | |
66 | goto out_unlock; | |
67 | memcpy(dst->iov_base, p, len); | |
596f2a19 CL |
68 | xprt_pin_rqst(req); |
69 | spin_unlock(&xprt->recv_lock); | |
5d252f90 | 70 | |
f5821c76 | 71 | credits = be32_to_cpup(rdma_resp + 2); |
5d252f90 CL |
72 | if (credits == 0) |
73 | credits = 1; /* don't deadlock */ | |
74 | else if (credits > r_xprt->rx_buf.rb_bc_max_requests) | |
75 | credits = r_xprt->rx_buf.rb_bc_max_requests; | |
76 | ||
ce7c252a | 77 | spin_lock_bh(&xprt->transport_lock); |
5d252f90 | 78 | xprt->cwnd = credits << RPC_CWNDSHIFT; |
ce7c252a TM |
79 | spin_unlock_bh(&xprt->transport_lock); |
80 | ||
596f2a19 | 81 | spin_lock(&xprt->recv_lock); |
5d252f90 CL |
82 | ret = 0; |
83 | xprt_complete_rqst(req->rq_task, rcvbuf->len); | |
596f2a19 | 84 | xprt_unpin_rqst(req); |
5d252f90 CL |
85 | rcvbuf->len = 0; |
86 | ||
87 | out_unlock: | |
ce7c252a | 88 | spin_unlock(&xprt->recv_lock); |
5d252f90 CL |
89 | out: |
90 | return ret; | |
91 | ||
92 | out_shortreply: | |
93 | dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n", | |
94 | xprt, src->iov_len); | |
95 | goto out; | |
96 | ||
97 | out_notfound: | |
98 | dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", | |
99 | xprt, be32_to_cpu(xid)); | |
5d252f90 CL |
100 | goto out_unlock; |
101 | } | |
102 | ||
103 | /* Send a backwards direction RPC call. | |
104 | * | |
105 | * Caller holds the connection's mutex and has already marshaled | |
106 | * the RPC/RDMA request. | |
107 | * | |
9a6a180b CL |
108 | * This is similar to svc_rdma_send_reply_msg, but takes a struct |
109 | * rpc_rqst instead, does not support chunks, and avoids blocking | |
110 | * memory allocation. | |
5d252f90 CL |
111 | * |
112 | * XXX: There is still an opportunity to block in svc_rdma_send() | |
113 | * if there are no SQ entries to post the Send. This may occur if | |
114 | * the adapter has a small maximum SQ depth. | |
115 | */ | |
116 | static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, | |
99722fe4 CL |
117 | struct rpc_rqst *rqst, |
118 | struct svc_rdma_send_ctxt *ctxt) | |
5d252f90 | 119 | { |
5d252f90 CL |
120 | int ret; |
121 | ||
99722fe4 | 122 | ret = svc_rdma_map_reply_msg(rdma, ctxt, &rqst->rq_snd_buf, NULL); |
6e6092ca | 123 | if (ret < 0) |
99722fe4 | 124 | return -EIO; |
5d252f90 | 125 | |
0bad47ca CL |
126 | /* Bump page refcnt so Send completion doesn't release |
127 | * the rq_buffer before all retransmits are complete. | |
128 | */ | |
129 | get_page(virt_to_page(rqst->rq_buffer)); | |
986b7889 | 130 | ctxt->sc_send_wr.opcode = IB_WR_SEND; |
99722fe4 | 131 | return svc_rdma_send(rdma, &ctxt->sc_send_wr); |
5d252f90 CL |
132 | } |
133 | ||
134 | /* Server-side transport endpoint wants a whole page for its send | |
135 | * buffer. The client RPC code constructs the RPC header in this | |
136 | * buffer before it invokes ->send_request. | |
5d252f90 | 137 | */ |
5fe6eaa1 CL |
138 | static int |
139 | xprt_rdma_bc_allocate(struct rpc_task *task) | |
5d252f90 CL |
140 | { |
141 | struct rpc_rqst *rqst = task->tk_rqstp; | |
5fe6eaa1 | 142 | size_t size = rqst->rq_callsize; |
5d252f90 CL |
143 | struct page *page; |
144 | ||
5fe6eaa1 | 145 | if (size > PAGE_SIZE) { |
5d252f90 CL |
146 | WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n", |
147 | size); | |
5fe6eaa1 CL |
148 | return -EINVAL; |
149 | } | |
5d252f90 CL |
150 | |
151 | page = alloc_page(RPCRDMA_DEF_GFP); | |
152 | if (!page) | |
5fe6eaa1 | 153 | return -ENOMEM; |
5fe6eaa1 | 154 | rqst->rq_buffer = page_address(page); |
8d42629b CL |
155 | |
156 | rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP); | |
157 | if (!rqst->rq_rbuffer) { | |
158 | put_page(page); | |
159 | return -ENOMEM; | |
160 | } | |
5fe6eaa1 | 161 | return 0; |
5d252f90 CL |
162 | } |
163 | ||
164 | static void | |
3435c74a | 165 | xprt_rdma_bc_free(struct rpc_task *task) |
5d252f90 | 166 | { |
8d42629b CL |
167 | struct rpc_rqst *rqst = task->tk_rqstp; |
168 | ||
0bad47ca | 169 | put_page(virt_to_page(rqst->rq_buffer)); |
8d42629b | 170 | kfree(rqst->rq_rbuffer); |
5d252f90 CL |
171 | } |
172 | ||
173 | static int | |
174 | rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) | |
175 | { | |
176 | struct rpc_xprt *xprt = rqst->rq_xprt; | |
177 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
99722fe4 | 178 | struct svc_rdma_send_ctxt *ctxt; |
c2ccf64a | 179 | __be32 *p; |
5d252f90 CL |
180 | int rc; |
181 | ||
99722fe4 CL |
182 | ctxt = svc_rdma_send_ctxt_get(rdma); |
183 | if (!ctxt) | |
184 | goto drop_connection; | |
185 | ||
186 | p = ctxt->sc_xprt_buf; | |
c2ccf64a CL |
187 | *p++ = rqst->rq_xid; |
188 | *p++ = rpcrdma_version; | |
189 | *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); | |
190 | *p++ = rdma_msg; | |
191 | *p++ = xdr_zero; | |
192 | *p++ = xdr_zero; | |
193 | *p = xdr_zero; | |
99722fe4 | 194 | svc_rdma_sync_reply_hdr(rdma, ctxt, RPCRDMA_HDRLEN_MIN); |
5d252f90 CL |
195 | |
196 | #ifdef SVCRDMA_BACKCHANNEL_DEBUG | |
197 | pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); | |
198 | #endif | |
199 | ||
99722fe4 CL |
200 | rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); |
201 | if (rc) { | |
202 | svc_rdma_send_ctxt_put(rdma, ctxt); | |
5d252f90 | 203 | goto drop_connection; |
99722fe4 | 204 | } |
5d252f90 CL |
205 | return rc; |
206 | ||
207 | drop_connection: | |
208 | dprintk("svcrdma: failed to send bc call\n"); | |
209 | xprt_disconnect_done(xprt); | |
210 | return -ENOTCONN; | |
211 | } | |
212 | ||
213 | /* Send an RPC call on the passive end of a transport | |
214 | * connection. | |
215 | */ | |
216 | static int | |
217 | xprt_rdma_bc_send_request(struct rpc_task *task) | |
218 | { | |
219 | struct rpc_rqst *rqst = task->tk_rqstp; | |
220 | struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; | |
221 | struct svcxprt_rdma *rdma; | |
222 | int ret; | |
223 | ||
224 | dprintk("svcrdma: sending bc call with xid: %08x\n", | |
225 | be32_to_cpu(rqst->rq_xid)); | |
226 | ||
227 | if (!mutex_trylock(&sxprt->xpt_mutex)) { | |
228 | rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL); | |
229 | if (!mutex_trylock(&sxprt->xpt_mutex)) | |
230 | return -EAGAIN; | |
231 | rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task); | |
232 | } | |
233 | ||
234 | ret = -ENOTCONN; | |
235 | rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); | |
236 | if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) | |
237 | ret = rpcrdma_bc_send_request(rdma, rqst); | |
238 | ||
239 | mutex_unlock(&sxprt->xpt_mutex); | |
240 | ||
241 | if (ret < 0) | |
242 | return ret; | |
243 | return 0; | |
244 | } | |
245 | ||
246 | static void | |
247 | xprt_rdma_bc_close(struct rpc_xprt *xprt) | |
248 | { | |
249 | dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); | |
250 | } | |
251 | ||
252 | static void | |
253 | xprt_rdma_bc_put(struct rpc_xprt *xprt) | |
254 | { | |
255 | dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); | |
256 | ||
257 | xprt_free(xprt); | |
258 | module_put(THIS_MODULE); | |
259 | } | |
260 | ||
d31ae254 | 261 | static const struct rpc_xprt_ops xprt_rdma_bc_procs = { |
5d252f90 CL |
262 | .reserve_xprt = xprt_reserve_xprt_cong, |
263 | .release_xprt = xprt_release_xprt_cong, | |
264 | .alloc_slot = xprt_alloc_slot, | |
a9cde23a | 265 | .free_slot = xprt_free_slot, |
5d252f90 CL |
266 | .release_request = xprt_release_rqst_cong, |
267 | .buf_alloc = xprt_rdma_bc_allocate, | |
268 | .buf_free = xprt_rdma_bc_free, | |
269 | .send_request = xprt_rdma_bc_send_request, | |
270 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | |
271 | .close = xprt_rdma_bc_close, | |
272 | .destroy = xprt_rdma_bc_put, | |
273 | .print_stats = xprt_rdma_print_stats | |
274 | }; | |
275 | ||
276 | static const struct rpc_timeout xprt_rdma_bc_timeout = { | |
277 | .to_initval = 60 * HZ, | |
278 | .to_maxval = 60 * HZ, | |
279 | }; | |
280 | ||
281 | /* It shouldn't matter if the number of backchannel session slots | |
282 | * doesn't match the number of RPC/RDMA credits. That just means | |
283 | * one or the other will have extra slots that aren't used. | |
284 | */ | |
285 | static struct rpc_xprt * | |
286 | xprt_setup_rdma_bc(struct xprt_create *args) | |
287 | { | |
288 | struct rpc_xprt *xprt; | |
289 | struct rpcrdma_xprt *new_xprt; | |
290 | ||
291 | if (args->addrlen > sizeof(xprt->addr)) { | |
292 | dprintk("RPC: %s: address too large\n", __func__); | |
293 | return ERR_PTR(-EBADF); | |
294 | } | |
295 | ||
296 | xprt = xprt_alloc(args->net, sizeof(*new_xprt), | |
297 | RPCRDMA_MAX_BC_REQUESTS, | |
298 | RPCRDMA_MAX_BC_REQUESTS); | |
299 | if (!xprt) { | |
300 | dprintk("RPC: %s: couldn't allocate rpc_xprt\n", | |
301 | __func__); | |
302 | return ERR_PTR(-ENOMEM); | |
303 | } | |
304 | ||
305 | xprt->timeout = &xprt_rdma_bc_timeout; | |
306 | xprt_set_bound(xprt); | |
307 | xprt_set_connected(xprt); | |
308 | xprt->bind_timeout = RPCRDMA_BIND_TO; | |
309 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; | |
310 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; | |
311 | ||
312 | xprt->prot = XPRT_TRANSPORT_BC_RDMA; | |
99722fe4 | 313 | xprt->tsh_size = 0; |
5d252f90 CL |
314 | xprt->ops = &xprt_rdma_bc_procs; |
315 | ||
316 | memcpy(&xprt->addr, args->dstaddr, args->addrlen); | |
317 | xprt->addrlen = args->addrlen; | |
318 | xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr); | |
319 | xprt->resvport = 0; | |
320 | ||
321 | xprt->max_payload = xprt_rdma_max_inline_read; | |
322 | ||
323 | new_xprt = rpcx_to_rdmax(xprt); | |
324 | new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs; | |
325 | ||
326 | xprt_get(xprt); | |
327 | args->bc_xprt->xpt_bc_xprt = xprt; | |
328 | xprt->bc_xprt = args->bc_xprt; | |
329 | ||
330 | if (!try_module_get(THIS_MODULE)) | |
331 | goto out_fail; | |
332 | ||
333 | /* Final put for backchannel xprt is in __svc_rdma_free */ | |
334 | xprt_get(xprt); | |
335 | return xprt; | |
336 | ||
337 | out_fail: | |
338 | xprt_rdma_free_addresses(xprt); | |
339 | args->bc_xprt->xpt_bc_xprt = NULL; | |
1b9f700b | 340 | args->bc_xprt->xpt_bc_xps = NULL; |
5d252f90 CL |
341 | xprt_put(xprt); |
342 | xprt_free(xprt); | |
343 | return ERR_PTR(-EINVAL); | |
344 | } | |
345 | ||
346 | struct xprt_class xprt_rdma_bc = { | |
347 | .list = LIST_HEAD_INIT(xprt_rdma_bc.list), | |
348 | .name = "rdma backchannel", | |
349 | .owner = THIS_MODULE, | |
350 | .ident = XPRT_TRANSPORT_BC_RDMA, | |
351 | .setup = xprt_setup_rdma_bc, | |
352 | }; |