Commit | Line | Data |
---|---|---|
f58851e6 | 1 | /* |
62b56a67 | 2 | * Copyright (c) 2014-2017 Oracle. All rights reserved. |
f58851e6 TT |
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the BSD-type | |
9 | * license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * | |
15 | * Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * | |
18 | * Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials provided | |
21 | * with the distribution. | |
22 | * | |
23 | * Neither the name of the Network Appliance, Inc. nor the names of | |
24 | * its contributors may be used to endorse or promote products | |
25 | * derived from this software without specific prior written | |
26 | * permission. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
29 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
30 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
31 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
32 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
33 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
34 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
35 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
36 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
37 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
38 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
39 | */ | |
40 | ||
41 | /* | |
42 | * transport.c | |
43 | * | |
44 | * This file contains the top-level implementation of an RPC RDMA | |
45 | * transport. | |
46 | * | |
47 | * Naming convention: functions beginning with xprt_ are part of the | |
48 | * transport switch. All others are RPC RDMA internal. | |
49 | */ | |
50 | ||
51 | #include <linux/module.h> | |
5a0e3ad6 | 52 | #include <linux/slab.h> |
f58851e6 | 53 | #include <linux/seq_file.h> |
5976687a | 54 | #include <linux/sunrpc/addr.h> |
ccede759 | 55 | #include <linux/smp.h> |
f58851e6 TT |
56 | |
57 | #include "xprt_rdma.h" | |
58 | ||
f895b252 | 59 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6 TT |
60 | # define RPCDBG_FACILITY RPCDBG_TRANS |
61 | #endif | |
62 | ||
f58851e6 TT |
63 | /* |
64 | * tunables | |
65 | */ | |
66 | ||
67 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | |
5d252f90 | 68 | unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
f58851e6 | 69 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
ce5b3717 | 70 | unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; |
fff09594 | 71 | int xprt_rdma_pad_optimize; |
f58851e6 | 72 | |
f895b252 | 73 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6 TT |
74 | |
75 | static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; | |
76 | static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; | |
29c55422 CL |
77 | static unsigned int min_inline_size = RPCRDMA_MIN_INLINE; |
78 | static unsigned int max_inline_size = RPCRDMA_MAX_INLINE; | |
f58851e6 TT |
79 | static unsigned int zero; |
80 | static unsigned int max_padding = PAGE_SIZE; | |
81 | static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; | |
82 | static unsigned int max_memreg = RPCRDMA_LAST - 1; | |
10492704 | 83 | static unsigned int dummy; |
f58851e6 TT |
84 | |
85 | static struct ctl_table_header *sunrpc_table_header; | |
86 | ||
fe2c6338 | 87 | static struct ctl_table xr_tunables_table[] = { |
f58851e6 | 88 | { |
f58851e6 TT |
89 | .procname = "rdma_slot_table_entries", |
90 | .data = &xprt_rdma_slot_table_entries, | |
91 | .maxlen = sizeof(unsigned int), | |
92 | .mode = 0644, | |
6d456111 | 93 | .proc_handler = proc_dointvec_minmax, |
f58851e6 TT |
94 | .extra1 = &min_slot_table_size, |
95 | .extra2 = &max_slot_table_size | |
96 | }, | |
97 | { | |
f58851e6 TT |
98 | .procname = "rdma_max_inline_read", |
99 | .data = &xprt_rdma_max_inline_read, | |
100 | .maxlen = sizeof(unsigned int), | |
101 | .mode = 0644, | |
44829d02 | 102 | .proc_handler = proc_dointvec_minmax, |
29c55422 CL |
103 | .extra1 = &min_inline_size, |
104 | .extra2 = &max_inline_size, | |
f58851e6 TT |
105 | }, |
106 | { | |
f58851e6 TT |
107 | .procname = "rdma_max_inline_write", |
108 | .data = &xprt_rdma_max_inline_write, | |
109 | .maxlen = sizeof(unsigned int), | |
110 | .mode = 0644, | |
44829d02 | 111 | .proc_handler = proc_dointvec_minmax, |
29c55422 CL |
112 | .extra1 = &min_inline_size, |
113 | .extra2 = &max_inline_size, | |
f58851e6 TT |
114 | }, |
115 | { | |
f58851e6 | 116 | .procname = "rdma_inline_write_padding", |
10492704 | 117 | .data = &dummy, |
f58851e6 TT |
118 | .maxlen = sizeof(unsigned int), |
119 | .mode = 0644, | |
6d456111 | 120 | .proc_handler = proc_dointvec_minmax, |
f58851e6 TT |
121 | .extra1 = &zero, |
122 | .extra2 = &max_padding, | |
123 | }, | |
124 | { | |
f58851e6 TT |
125 | .procname = "rdma_memreg_strategy", |
126 | .data = &xprt_rdma_memreg_strategy, | |
127 | .maxlen = sizeof(unsigned int), | |
128 | .mode = 0644, | |
6d456111 | 129 | .proc_handler = proc_dointvec_minmax, |
f58851e6 TT |
130 | .extra1 = &min_memreg, |
131 | .extra2 = &max_memreg, | |
132 | }, | |
9191ca3b | 133 | { |
9191ca3b TT |
134 | .procname = "rdma_pad_optimize", |
135 | .data = &xprt_rdma_pad_optimize, | |
136 | .maxlen = sizeof(unsigned int), | |
137 | .mode = 0644, | |
6d456111 | 138 | .proc_handler = proc_dointvec, |
9191ca3b | 139 | }, |
f8572d8f | 140 | { }, |
f58851e6 TT |
141 | }; |
142 | ||
fe2c6338 | 143 | static struct ctl_table sunrpc_table[] = { |
f58851e6 | 144 | { |
f58851e6 TT |
145 | .procname = "sunrpc", |
146 | .mode = 0555, | |
147 | .child = xr_tunables_table | |
148 | }, | |
f8572d8f | 149 | { }, |
f58851e6 TT |
150 | }; |
151 | ||
152 | #endif | |
153 | ||
d31ae254 | 154 | static const struct rpc_xprt_ops xprt_rdma_procs; |
f58851e6 | 155 | |
0dd39cae CL |
156 | static void |
157 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) | |
158 | { | |
159 | struct sockaddr_in *sin = (struct sockaddr_in *)sap; | |
160 | char buf[20]; | |
161 | ||
162 | snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); | |
163 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | |
164 | ||
165 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA; | |
166 | } | |
167 | ||
168 | static void | |
169 | xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) | |
170 | { | |
171 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | |
172 | char buf[40]; | |
173 | ||
174 | snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); | |
175 | xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); | |
176 | ||
177 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; | |
178 | } | |
179 | ||
5d252f90 | 180 | void |
5231eb97 | 181 | xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) |
f58851e6 | 182 | { |
0dd39cae CL |
183 | char buf[128]; |
184 | ||
185 | switch (sap->sa_family) { | |
186 | case AF_INET: | |
187 | xprt_rdma_format_addresses4(xprt, sap); | |
188 | break; | |
189 | case AF_INET6: | |
190 | xprt_rdma_format_addresses6(xprt, sap); | |
191 | break; | |
192 | default: | |
193 | pr_err("rpcrdma: Unrecognized address family\n"); | |
194 | return; | |
195 | } | |
f58851e6 | 196 | |
c877b849 CL |
197 | (void)rpc_ntop(sap, buf, sizeof(buf)); |
198 | xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); | |
f58851e6 | 199 | |
81160e66 | 200 | snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); |
c877b849 | 201 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); |
f58851e6 | 202 | |
81160e66 | 203 | snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); |
c877b849 | 204 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); |
f58851e6 | 205 | |
0dd39cae | 206 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; |
f58851e6 TT |
207 | } |
208 | ||
5d252f90 | 209 | void |
f58851e6 TT |
210 | xprt_rdma_free_addresses(struct rpc_xprt *xprt) |
211 | { | |
33e01dc7 CL |
212 | unsigned int i; |
213 | ||
214 | for (i = 0; i < RPC_DISPLAY_MAX; i++) | |
215 | switch (i) { | |
216 | case RPC_DISPLAY_PROTO: | |
217 | case RPC_DISPLAY_NETID: | |
218 | continue; | |
219 | default: | |
220 | kfree(xprt->address_strings[i]); | |
221 | } | |
f58851e6 TT |
222 | } |
223 | ||
3a72dc77 CL |
224 | void |
225 | rpcrdma_conn_func(struct rpcrdma_ep *ep) | |
226 | { | |
227 | schedule_delayed_work(&ep->rep_connect_worker, 0); | |
228 | } | |
229 | ||
230 | void | |
231 | rpcrdma_connect_worker(struct work_struct *work) | |
232 | { | |
233 | struct rpcrdma_ep *ep = | |
234 | container_of(work, struct rpcrdma_ep, rep_connect_worker.work); | |
235 | struct rpcrdma_xprt *r_xprt = | |
236 | container_of(ep, struct rpcrdma_xprt, rx_ep); | |
237 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; | |
238 | ||
239 | spin_lock_bh(&xprt->transport_lock); | |
240 | if (++xprt->connect_cookie == 0) /* maintain a reserved value */ | |
241 | ++xprt->connect_cookie; | |
242 | if (ep->rep_connected > 0) { | |
243 | if (!xprt_test_and_set_connected(xprt)) | |
244 | xprt_wake_pending_tasks(xprt, 0); | |
245 | } else { | |
246 | if (xprt_test_and_clear_connected(xprt)) | |
247 | xprt_wake_pending_tasks(xprt, -ENOTCONN); | |
248 | } | |
249 | spin_unlock_bh(&xprt->transport_lock); | |
250 | } | |
251 | ||
f58851e6 TT |
252 | static void |
253 | xprt_rdma_connect_worker(struct work_struct *work) | |
254 | { | |
5abefb86 CL |
255 | struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, |
256 | rx_connect_worker.work); | |
257 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; | |
f58851e6 TT |
258 | int rc = 0; |
259 | ||
d19751e7 TM |
260 | xprt_clear_connected(xprt); |
261 | ||
d19751e7 TM |
262 | rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); |
263 | if (rc) | |
264 | xprt_wake_pending_tasks(xprt, rc); | |
265 | ||
f58851e6 TT |
266 | xprt_clear_connecting(xprt); |
267 | } | |
268 | ||
4a068258 CL |
269 | static void |
270 | xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) | |
271 | { | |
272 | struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, | |
273 | rx_xprt); | |
274 | ||
b4744e00 | 275 | trace_xprtrdma_inject_dsc(r_xprt); |
4a068258 CL |
276 | rdma_disconnect(r_xprt->rx_ia.ri_id); |
277 | } | |
278 | ||
f58851e6 TT |
279 | /* |
280 | * xprt_rdma_destroy | |
281 | * | |
282 | * Destroy the xprt. | |
283 | * Free all memory associated with the object, including its own. | |
284 | * NOTE: none of the *destroy methods free memory for their top-level | |
285 | * objects, even though they may have allocated it (they do free | |
286 | * private memory). It's up to the caller to handle it. In this | |
287 | * case (RDMA transport), all structure memory is inlined with the | |
288 | * struct rpcrdma_xprt. | |
289 | */ | |
290 | static void | |
291 | xprt_rdma_destroy(struct rpc_xprt *xprt) | |
292 | { | |
293 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
f58851e6 | 294 | |
b4744e00 | 295 | trace_xprtrdma_destroy(r_xprt); |
f58851e6 | 296 | |
5abefb86 | 297 | cancel_delayed_work_sync(&r_xprt->rx_connect_worker); |
f58851e6 TT |
298 | |
299 | xprt_clear_connected(xprt); | |
300 | ||
7f1d5419 | 301 | rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); |
72c02173 | 302 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); |
f58851e6 TT |
303 | rpcrdma_ia_close(&r_xprt->rx_ia); |
304 | ||
305 | xprt_rdma_free_addresses(xprt); | |
e204e621 | 306 | xprt_free(xprt); |
f58851e6 | 307 | |
f58851e6 TT |
308 | module_put(THIS_MODULE); |
309 | } | |
310 | ||
2881ae74 TM |
311 | static const struct rpc_timeout xprt_rdma_default_timeout = { |
312 | .to_initval = 60 * HZ, | |
313 | .to_maxval = 60 * HZ, | |
314 | }; | |
315 | ||
f58851e6 TT |
316 | /** |
317 | * xprt_setup_rdma - Set up transport to use RDMA | |
318 | * | |
319 | * @args: rpc transport arguments | |
320 | */ | |
321 | static struct rpc_xprt * | |
322 | xprt_setup_rdma(struct xprt_create *args) | |
323 | { | |
324 | struct rpcrdma_create_data_internal cdata; | |
325 | struct rpc_xprt *xprt; | |
326 | struct rpcrdma_xprt *new_xprt; | |
327 | struct rpcrdma_ep *new_ep; | |
5231eb97 | 328 | struct sockaddr *sap; |
f58851e6 TT |
329 | int rc; |
330 | ||
331 | if (args->addrlen > sizeof(xprt->addr)) { | |
332 | dprintk("RPC: %s: address too large\n", __func__); | |
333 | return ERR_PTR(-EBADF); | |
334 | } | |
335 | ||
37aa2133 | 336 | xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), |
d9ba131d | 337 | xprt_rdma_slot_table_entries, |
bd1722d4 | 338 | xprt_rdma_slot_table_entries); |
f58851e6 TT |
339 | if (xprt == NULL) { |
340 | dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", | |
341 | __func__); | |
342 | return ERR_PTR(-ENOMEM); | |
343 | } | |
344 | ||
f58851e6 | 345 | /* 60 second timeout, no retries */ |
ba7392bb | 346 | xprt->timeout = &xprt_rdma_default_timeout; |
bfaee096 CL |
347 | xprt->bind_timeout = RPCRDMA_BIND_TO; |
348 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; | |
349 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; | |
f58851e6 TT |
350 | |
351 | xprt->resvport = 0; /* privileged port not needed */ | |
352 | xprt->tsh_size = 0; /* RPC-RDMA handles framing */ | |
f58851e6 TT |
353 | xprt->ops = &xprt_rdma_procs; |
354 | ||
355 | /* | |
356 | * Set up RDMA-specific connect data. | |
357 | */ | |
dd229cee | 358 | sap = args->dstaddr; |
f58851e6 TT |
359 | |
360 | /* Ensure xprt->addr holds valid server TCP (not RDMA) | |
361 | * address, for any side protocols which peek at it */ | |
362 | xprt->prot = IPPROTO_TCP; | |
363 | xprt->addrlen = args->addrlen; | |
5231eb97 | 364 | memcpy(&xprt->addr, sap, xprt->addrlen); |
f58851e6 | 365 | |
5231eb97 | 366 | if (rpc_get_port(sap)) |
f58851e6 | 367 | xprt_set_bound(xprt); |
d461f1f2 | 368 | xprt_rdma_format_addresses(xprt, sap); |
f58851e6 | 369 | |
f58851e6 TT |
370 | cdata.max_requests = xprt->max_reqs; |
371 | ||
f58851e6 TT |
372 | cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ |
373 | cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ | |
374 | ||
375 | cdata.inline_wsize = xprt_rdma_max_inline_write; | |
376 | if (cdata.inline_wsize > cdata.wsize) | |
377 | cdata.inline_wsize = cdata.wsize; | |
378 | ||
379 | cdata.inline_rsize = xprt_rdma_max_inline_read; | |
380 | if (cdata.inline_rsize > cdata.rsize) | |
381 | cdata.inline_rsize = cdata.rsize; | |
382 | ||
f58851e6 TT |
383 | /* |
384 | * Create new transport instance, which includes initialized | |
385 | * o ia | |
386 | * o endpoint | |
387 | * o buffers | |
388 | */ | |
389 | ||
390 | new_xprt = rpcx_to_rdmax(xprt); | |
391 | ||
dd229cee | 392 | rc = rpcrdma_ia_open(new_xprt); |
f58851e6 TT |
393 | if (rc) |
394 | goto out1; | |
395 | ||
396 | /* | |
397 | * initialize and create ep | |
398 | */ | |
399 | new_xprt->rx_data = cdata; | |
400 | new_ep = &new_xprt->rx_ep; | |
f58851e6 TT |
401 | |
402 | rc = rpcrdma_ep_create(&new_xprt->rx_ep, | |
403 | &new_xprt->rx_ia, &new_xprt->rx_data); | |
404 | if (rc) | |
405 | goto out2; | |
406 | ||
ac920d04 | 407 | rc = rpcrdma_buffer_create(new_xprt); |
f58851e6 TT |
408 | if (rc) |
409 | goto out3; | |
410 | ||
5abefb86 CL |
411 | INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, |
412 | xprt_rdma_connect_worker); | |
f58851e6 | 413 | |
1c9351ee CL |
414 | xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); |
415 | if (xprt->max_payload == 0) | |
416 | goto out4; | |
417 | xprt->max_payload <<= PAGE_SHIFT; | |
43e95988 CL |
418 | dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", |
419 | __func__, xprt->max_payload); | |
f58851e6 TT |
420 | |
421 | if (!try_module_get(THIS_MODULE)) | |
422 | goto out4; | |
423 | ||
5231eb97 CL |
424 | dprintk("RPC: %s: %s:%s\n", __func__, |
425 | xprt->address_strings[RPC_DISPLAY_ADDR], | |
426 | xprt->address_strings[RPC_DISPLAY_PORT]); | |
b4744e00 | 427 | trace_xprtrdma_create(new_xprt); |
f58851e6 TT |
428 | return xprt; |
429 | ||
430 | out4: | |
03ac1a76 | 431 | rpcrdma_buffer_destroy(&new_xprt->rx_buf); |
03ac1a76 | 432 | rc = -ENODEV; |
f58851e6 | 433 | out3: |
7f1d5419 | 434 | rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); |
f58851e6 TT |
435 | out2: |
436 | rpcrdma_ia_close(&new_xprt->rx_ia); | |
437 | out1: | |
b4744e00 | 438 | trace_xprtrdma_destroy(new_xprt); |
d461f1f2 | 439 | xprt_rdma_free_addresses(xprt); |
e204e621 | 440 | xprt_free(xprt); |
f58851e6 TT |
441 | return ERR_PTR(rc); |
442 | } | |
443 | ||
bebd0318 CL |
444 | /** |
445 | * xprt_rdma_close - Close down RDMA connection | |
446 | * @xprt: generic transport to be closed | |
447 | * | |
448 | * Called during transport shutdown reconnect, or device | |
449 | * removal. Caller holds the transport's write lock. | |
f58851e6 TT |
450 | */ |
451 | static void | |
452 | xprt_rdma_close(struct rpc_xprt *xprt) | |
453 | { | |
454 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
bebd0318 CL |
455 | struct rpcrdma_ep *ep = &r_xprt->rx_ep; |
456 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | |
457 | ||
458 | dprintk("RPC: %s: closing xprt %p\n", __func__, xprt); | |
f58851e6 | 459 | |
bebd0318 CL |
460 | if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { |
461 | xprt_clear_connected(xprt); | |
462 | rpcrdma_ia_remove(ia); | |
463 | return; | |
464 | } | |
465 | if (ep->rep_connected == -ENODEV) | |
466 | return; | |
467 | if (ep->rep_connected > 0) | |
08ca0dce | 468 | xprt->reestablish_timeout = 0; |
62da3b24 | 469 | xprt_disconnect_done(xprt); |
bebd0318 | 470 | rpcrdma_ep_disconnect(ep, ia); |
f58851e6 TT |
471 | } |
472 | ||
20035edf CL |
473 | /** |
474 | * xprt_rdma_set_port - update server port with rpcbind result | |
475 | * @xprt: controlling RPC transport | |
476 | * @port: new port value | |
477 | * | |
478 | * Transport connect status is unchanged. | |
479 | */ | |
f58851e6 TT |
480 | static void |
481 | xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) | |
482 | { | |
20035edf CL |
483 | struct sockaddr *sap = (struct sockaddr *)&xprt->addr; |
484 | char buf[8]; | |
485 | ||
486 | dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", | |
487 | __func__, xprt, | |
488 | xprt->address_strings[RPC_DISPLAY_ADDR], | |
489 | xprt->address_strings[RPC_DISPLAY_PORT], | |
490 | port); | |
491 | ||
492 | rpc_set_port(sap, port); | |
f58851e6 | 493 | |
20035edf CL |
494 | kfree(xprt->address_strings[RPC_DISPLAY_PORT]); |
495 | snprintf(buf, sizeof(buf), "%u", port); | |
496 | xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); | |
497 | ||
498 | kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); | |
499 | snprintf(buf, sizeof(buf), "%4hx", port); | |
500 | xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); | |
f58851e6 TT |
501 | } |
502 | ||
33849792 CL |
503 | /** |
504 | * xprt_rdma_timer - invoked when an RPC times out | |
505 | * @xprt: controlling RPC transport | |
506 | * @task: RPC task that timed out | |
507 | * | |
508 | * Invoked when the transport is still connected, but an RPC | |
509 | * retransmit timeout occurs. | |
510 | * | |
511 | * Since RDMA connections don't have a keep-alive, forcibly | |
512 | * disconnect and retry to connect. This drives full | |
513 | * detection of the network path, and retransmissions of | |
514 | * all pending RPCs. | |
515 | */ | |
516 | static void | |
517 | xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) | |
518 | { | |
33849792 CL |
519 | xprt_force_disconnect(xprt); |
520 | } | |
521 | ||
f58851e6 | 522 | static void |
1b092092 | 523 | xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) |
f58851e6 | 524 | { |
f58851e6 TT |
525 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
526 | ||
0b9e7943 TM |
527 | if (r_xprt->rx_ep.rep_connected != 0) { |
528 | /* Reconnect */ | |
5abefb86 CL |
529 | schedule_delayed_work(&r_xprt->rx_connect_worker, |
530 | xprt->reestablish_timeout); | |
0b9e7943 | 531 | xprt->reestablish_timeout <<= 1; |
bfaee096 CL |
532 | if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) |
533 | xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; | |
534 | else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) | |
535 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; | |
0b9e7943 | 536 | } else { |
5abefb86 | 537 | schedule_delayed_work(&r_xprt->rx_connect_worker, 0); |
0b9e7943 | 538 | if (!RPC_IS_ASYNC(task)) |
5abefb86 | 539 | flush_delayed_work(&r_xprt->rx_connect_worker); |
f58851e6 TT |
540 | } |
541 | } | |
542 | ||
9c40c49f CL |
543 | /* Allocate a fixed-size buffer in which to construct and send the |
544 | * RPC-over-RDMA header for this request. | |
545 | */ | |
546 | static bool | |
547 | rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |
548 | gfp_t flags) | |
549 | { | |
08cf2efd | 550 | size_t size = RPCRDMA_HDRBUF_SIZE; |
9c40c49f CL |
551 | struct rpcrdma_regbuf *rb; |
552 | ||
553 | if (req->rl_rdmabuf) | |
554 | return true; | |
555 | ||
13650c23 | 556 | rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); |
9c40c49f CL |
557 | if (IS_ERR(rb)) |
558 | return false; | |
559 | ||
560 | r_xprt->rx_stats.hardway_register_count += size; | |
561 | req->rl_rdmabuf = rb; | |
7a80f3f0 | 562 | xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); |
9c40c49f CL |
563 | return true; |
564 | } | |
565 | ||
9c40c49f CL |
566 | static bool |
567 | rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |
568 | size_t size, gfp_t flags) | |
569 | { | |
570 | struct rpcrdma_regbuf *rb; | |
9c40c49f CL |
571 | |
572 | if (req->rl_sendbuf && rdmab_length(req->rl_sendbuf) >= size) | |
573 | return true; | |
574 | ||
655fec69 | 575 | rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags); |
9c40c49f CL |
576 | if (IS_ERR(rb)) |
577 | return false; | |
578 | ||
13650c23 | 579 | rpcrdma_free_regbuf(req->rl_sendbuf); |
655fec69 | 580 | r_xprt->rx_stats.hardway_register_count += size; |
9c40c49f CL |
581 | req->rl_sendbuf = rb; |
582 | return true; | |
583 | } | |
584 | ||
585 | /* The rq_rcv_buf is used only if a Reply chunk is necessary. | |
586 | * The decision to use a Reply chunk is made later in | |
587 | * rpcrdma_marshal_req. This buffer is registered at that time. | |
588 | * | |
589 | * Otherwise, the associated RPC Reply arrives in a separate | |
590 | * Receive buffer, arbitrarily chosen by the HCA. The buffer | |
591 | * allocated here for the RPC Reply is not utilized in that | |
592 | * case. See rpcrdma_inline_fixup. | |
593 | * | |
594 | * A regbuf is used here to remember the buffer size. | |
595 | */ | |
596 | static bool | |
597 | rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |
598 | size_t size, gfp_t flags) | |
599 | { | |
600 | struct rpcrdma_regbuf *rb; | |
601 | ||
602 | if (req->rl_recvbuf && rdmab_length(req->rl_recvbuf) >= size) | |
603 | return true; | |
604 | ||
13650c23 | 605 | rb = rpcrdma_alloc_regbuf(size, DMA_NONE, flags); |
9c40c49f CL |
606 | if (IS_ERR(rb)) |
607 | return false; | |
608 | ||
13650c23 | 609 | rpcrdma_free_regbuf(req->rl_recvbuf); |
9c40c49f CL |
610 | r_xprt->rx_stats.hardway_register_count += size; |
611 | req->rl_recvbuf = rb; | |
612 | return true; | |
613 | } | |
614 | ||
5fe6eaa1 CL |
615 | /** |
616 | * xprt_rdma_allocate - allocate transport resources for an RPC | |
617 | * @task: RPC task | |
618 | * | |
619 | * Return values: | |
620 | * 0: Success; rq_buffer points to RPC buffer to use | |
621 | * ENOMEM: Out of memory, call again later | |
622 | * EIO: A permanent error occurred, do not retry | |
623 | * | |
f58851e6 | 624 | * The RDMA allocate/free functions need the task structure as a place |
9c40c49f CL |
625 | * to hide the struct rpcrdma_req, which is necessary for the actual |
626 | * send/recv sequence. | |
0ca77dc3 | 627 | * |
9c40c49f CL |
628 | * xprt_rdma_allocate provides buffers that are already mapped for |
629 | * DMA, and a local DMA lkey is provided for each. | |
f58851e6 | 630 | */ |
5fe6eaa1 CL |
631 | static int |
632 | xprt_rdma_allocate(struct rpc_task *task) | |
f58851e6 | 633 | { |
5fe6eaa1 | 634 | struct rpc_rqst *rqst = task->tk_rqstp; |
5fe6eaa1 | 635 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
0ca77dc3 | 636 | struct rpcrdma_req *req; |
a0a1d50c | 637 | gfp_t flags; |
f58851e6 | 638 | |
0ca77dc3 | 639 | req = rpcrdma_buffer_get(&r_xprt->rx_buf); |
c977dea2 | 640 | if (req == NULL) |
ae724676 | 641 | goto out_get; |
f58851e6 | 642 | |
5d252f90 | 643 | flags = RPCRDMA_DEF_GFP; |
a0a1d50c CL |
644 | if (RPC_IS_SWAPPER(task)) |
645 | flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; | |
646 | ||
9c40c49f CL |
647 | if (!rpcrdma_get_rdmabuf(r_xprt, req, flags)) |
648 | goto out_fail; | |
649 | if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags)) | |
650 | goto out_fail; | |
651 | if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) | |
652 | goto out_fail; | |
653 | ||
ccede759 | 654 | req->rl_cpu = smp_processor_id(); |
575448bd | 655 | req->rl_connect_cookie = 0; /* our reserved value */ |
5a6d1db4 | 656 | rpcrdma_set_xprtdata(rqst, req); |
5fe6eaa1 | 657 | rqst->rq_buffer = req->rl_sendbuf->rg_base; |
9c40c49f | 658 | rqst->rq_rbuffer = req->rl_recvbuf->rg_base; |
ae724676 | 659 | trace_xprtrdma_allocate(task, req); |
5fe6eaa1 | 660 | return 0; |
0ca77dc3 | 661 | |
0ca77dc3 | 662 | out_fail: |
f58851e6 | 663 | rpcrdma_buffer_put(req); |
ae724676 CL |
664 | out_get: |
665 | trace_xprtrdma_allocate(task, NULL); | |
5fe6eaa1 | 666 | return -ENOMEM; |
f58851e6 TT |
667 | } |
668 | ||
3435c74a CL |
669 | /** |
670 | * xprt_rdma_free - release resources allocated by xprt_rdma_allocate | |
671 | * @task: RPC task | |
672 | * | |
673 | * Caller guarantees rqst->rq_buffer is non-NULL. | |
f58851e6 TT |
674 | */ |
675 | static void | |
3435c74a | 676 | xprt_rdma_free(struct rpc_task *task) |
f58851e6 | 677 | { |
3435c74a CL |
678 | struct rpc_rqst *rqst = task->tk_rqstp; |
679 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | |
680 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | |
f58851e6 | 681 | |
0ba6f370 CL |
682 | if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) |
683 | rpcrdma_release_rqst(r_xprt, req); | |
ae724676 | 684 | trace_xprtrdma_rpc_done(task, req); |
f58851e6 TT |
685 | rpcrdma_buffer_put(req); |
686 | } | |
687 | ||
7a89f9c6 CL |
688 | /** |
689 | * xprt_rdma_send_request - marshal and send an RPC request | |
690 | * @task: RPC task with an RPC message in rq_snd_buf | |
691 | * | |
bebd0318 CL |
692 | * Caller holds the transport's write lock. |
693 | * | |
cf73daf5 CL |
694 | * Returns: |
695 | * %0 if the RPC message has been sent | |
696 | * %-ENOTCONN if the caller should reconnect and call again | |
697 | * %-ENOBUFS if the caller should call again later | |
698 | * %-EIO if a permanent error occurred and the request was not | |
699 | * sent. Do not try to send this message again. | |
f58851e6 | 700 | */ |
f58851e6 TT |
701 | static int |
702 | xprt_rdma_send_request(struct rpc_task *task) | |
703 | { | |
704 | struct rpc_rqst *rqst = task->tk_rqstp; | |
a4f0835c | 705 | struct rpc_xprt *xprt = rqst->rq_xprt; |
f58851e6 TT |
706 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
707 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
6ab59945 | 708 | int rc = 0; |
f58851e6 | 709 | |
cf73daf5 CL |
710 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
711 | if (unlikely(!rqst->rq_buffer)) | |
712 | return xprt_rdma_bc_send_reply(rqst); | |
713 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | |
714 | ||
bebd0318 CL |
715 | if (!xprt_connected(xprt)) |
716 | goto drop_connection; | |
717 | ||
09e60641 | 718 | rc = rpcrdma_marshal_req(r_xprt, rqst); |
6ab59945 CL |
719 | if (rc < 0) |
720 | goto failed_marshal; | |
f58851e6 TT |
721 | |
722 | if (req->rl_reply == NULL) /* e.g. reconnection */ | |
723 | rpcrdma_recv_buffer_get(req); | |
724 | ||
575448bd TT |
725 | /* Must suppress retransmit to maintain credits */ |
726 | if (req->rl_connect_cookie == xprt->connect_cookie) | |
727 | goto drop_connection; | |
728 | req->rl_connect_cookie = xprt->connect_cookie; | |
729 | ||
42b9f5c5 | 730 | __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); |
575448bd TT |
731 | if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) |
732 | goto drop_connection; | |
f58851e6 | 733 | |
d60dbb20 | 734 | rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; |
f58851e6 TT |
735 | rqst->rq_bytes_sent = 0; |
736 | return 0; | |
575448bd | 737 | |
c93c6223 | 738 | failed_marshal: |
7a89f9c6 CL |
739 | if (rc != -ENOTCONN) |
740 | return rc; | |
575448bd TT |
741 | drop_connection: |
742 | xprt_disconnect_done(xprt); | |
743 | return -ENOTCONN; /* implies disconnect */ | |
f58851e6 TT |
744 | } |
745 | ||
5d252f90 | 746 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
f58851e6 TT |
747 | { |
748 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
749 | long idle_time = 0; | |
750 | ||
751 | if (xprt_connected(xprt)) | |
752 | idle_time = (long)(jiffies - xprt->last_used) / HZ; | |
753 | ||
763f7e4e CL |
754 | seq_puts(seq, "\txprt:\trdma "); |
755 | seq_printf(seq, "%u %lu %lu %lu %ld %lu %lu %lu %llu %llu ", | |
756 | 0, /* need a local port? */ | |
757 | xprt->stat.bind_count, | |
758 | xprt->stat.connect_count, | |
759 | xprt->stat.connect_time, | |
760 | idle_time, | |
761 | xprt->stat.sends, | |
762 | xprt->stat.recvs, | |
763 | xprt->stat.bad_xids, | |
764 | xprt->stat.req_u, | |
765 | xprt->stat.bklog_u); | |
505bbe64 | 766 | seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu ", |
763f7e4e CL |
767 | r_xprt->rx_stats.read_chunk_count, |
768 | r_xprt->rx_stats.write_chunk_count, | |
769 | r_xprt->rx_stats.reply_chunk_count, | |
770 | r_xprt->rx_stats.total_rdma_request, | |
771 | r_xprt->rx_stats.total_rdma_reply, | |
772 | r_xprt->rx_stats.pullup_copy_count, | |
773 | r_xprt->rx_stats.fixup_copy_count, | |
774 | r_xprt->rx_stats.hardway_register_count, | |
775 | r_xprt->rx_stats.failed_marshal_count, | |
860477d1 CL |
776 | r_xprt->rx_stats.bad_reply_count, |
777 | r_xprt->rx_stats.nomsg_call_count); | |
01bb35c8 | 778 | seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n", |
505bbe64 | 779 | r_xprt->rx_stats.mrs_recovered, |
e2ac236c | 780 | r_xprt->rx_stats.mrs_orphaned, |
c8b920bb | 781 | r_xprt->rx_stats.mrs_allocated, |
ae72950a | 782 | r_xprt->rx_stats.local_inv_needed, |
01bb35c8 CL |
783 | r_xprt->rx_stats.empty_sendctx_q, |
784 | r_xprt->rx_stats.reply_waits_for_send); | |
f58851e6 TT |
785 | } |
786 | ||
d67fa4d8 JL |
787 | static int |
788 | xprt_rdma_enable_swap(struct rpc_xprt *xprt) | |
789 | { | |
a0451788 | 790 | return 0; |
d67fa4d8 JL |
791 | } |
792 | ||
793 | static void | |
794 | xprt_rdma_disable_swap(struct rpc_xprt *xprt) | |
795 | { | |
796 | } | |
797 | ||
f58851e6 TT |
798 | /* |
799 | * Plumbing for rpc transport switch and kernel module | |
800 | */ | |
801 | ||
d31ae254 | 802 | static const struct rpc_xprt_ops xprt_rdma_procs = { |
e7ce710a | 803 | .reserve_xprt = xprt_reserve_xprt_cong, |
f58851e6 | 804 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ |
f39c1bfb | 805 | .alloc_slot = xprt_alloc_slot, |
f58851e6 TT |
806 | .release_request = xprt_release_rqst_cong, /* ditto */ |
807 | .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ | |
33849792 | 808 | .timer = xprt_rdma_timer, |
f58851e6 TT |
809 | .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ |
810 | .set_port = xprt_rdma_set_port, | |
811 | .connect = xprt_rdma_connect, | |
812 | .buf_alloc = xprt_rdma_allocate, | |
813 | .buf_free = xprt_rdma_free, | |
814 | .send_request = xprt_rdma_send_request, | |
815 | .close = xprt_rdma_close, | |
816 | .destroy = xprt_rdma_destroy, | |
d67fa4d8 JL |
817 | .print_stats = xprt_rdma_print_stats, |
818 | .enable_swap = xprt_rdma_enable_swap, | |
819 | .disable_swap = xprt_rdma_disable_swap, | |
f531a5db CL |
820 | .inject_disconnect = xprt_rdma_inject_disconnect, |
821 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
822 | .bc_setup = xprt_rdma_bc_setup, | |
76566773 | 823 | .bc_up = xprt_rdma_bc_up, |
6b26cc8c | 824 | .bc_maxpayload = xprt_rdma_bc_maxpayload, |
f531a5db CL |
825 | .bc_free_rqst = xprt_rdma_bc_free_rqst, |
826 | .bc_destroy = xprt_rdma_bc_destroy, | |
827 | #endif | |
f58851e6 TT |
828 | }; |
829 | ||
830 | static struct xprt_class xprt_rdma = { | |
831 | .list = LIST_HEAD_INIT(xprt_rdma.list), | |
832 | .name = "rdma", | |
833 | .owner = THIS_MODULE, | |
834 | .ident = XPRT_TRANSPORT_RDMA, | |
835 | .setup = xprt_setup_rdma, | |
836 | }; | |
837 | ||
ffe1f0df | 838 | void xprt_rdma_cleanup(void) |
f58851e6 TT |
839 | { |
840 | int rc; | |
841 | ||
3a0799a9 | 842 | dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); |
f895b252 | 843 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6 TT |
844 | if (sunrpc_table_header) { |
845 | unregister_sysctl_table(sunrpc_table_header); | |
846 | sunrpc_table_header = NULL; | |
847 | } | |
848 | #endif | |
849 | rc = xprt_unregister_transport(&xprt_rdma); | |
850 | if (rc) | |
851 | dprintk("RPC: %s: xprt_unregister returned %i\n", | |
852 | __func__, rc); | |
951e721c | 853 | |
fe97b47c | 854 | rpcrdma_destroy_wq(); |
5d252f90 CL |
855 | |
856 | rc = xprt_unregister_transport(&xprt_rdma_bc); | |
857 | if (rc) | |
858 | dprintk("RPC: %s: xprt_unregister(bc) returned %i\n", | |
859 | __func__, rc); | |
f58851e6 TT |
860 | } |
861 | ||
ffe1f0df | 862 | int xprt_rdma_init(void) |
f58851e6 TT |
863 | { |
864 | int rc; | |
865 | ||
fe97b47c | 866 | rc = rpcrdma_alloc_wq(); |
505bbe64 | 867 | if (rc) |
fe97b47c | 868 | return rc; |
fe97b47c | 869 | |
951e721c CL |
870 | rc = xprt_register_transport(&xprt_rdma); |
871 | if (rc) { | |
fe97b47c | 872 | rpcrdma_destroy_wq(); |
951e721c CL |
873 | return rc; |
874 | } | |
875 | ||
5d252f90 CL |
876 | rc = xprt_register_transport(&xprt_rdma_bc); |
877 | if (rc) { | |
878 | xprt_unregister_transport(&xprt_rdma); | |
879 | rpcrdma_destroy_wq(); | |
5d252f90 CL |
880 | return rc; |
881 | } | |
882 | ||
3a0799a9 | 883 | dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); |
f58851e6 | 884 | |
3a0799a9 CL |
885 | dprintk("Defaults:\n"); |
886 | dprintk("\tSlots %d\n" | |
f58851e6 TT |
887 | "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", |
888 | xprt_rdma_slot_table_entries, | |
889 | xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); | |
10492704 | 890 | dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy); |
f58851e6 | 891 | |
f895b252 | 892 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
f58851e6 TT |
893 | if (!sunrpc_table_header) |
894 | sunrpc_table_header = register_sysctl_table(sunrpc_table); | |
895 | #endif | |
896 | return 0; | |
897 | } |