Commit | Line | Data |
---|---|---|
a2268cfb | 1 | /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ |
f58851e6 | 2 | /* |
62b56a67 | 3 | * Copyright (c) 2014-2017 Oracle. All rights reserved. |
f58851e6 TT |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | * | |
6 | * This software is available to you under a choice of one of two | |
7 | * licenses. You may choose to be licensed under the terms of the GNU | |
8 | * General Public License (GPL) Version 2, available from the file | |
9 | * COPYING in the main directory of this source tree, or the BSD-type | |
10 | * license below: | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or without | |
13 | * modification, are permitted provided that the following conditions | |
14 | * are met: | |
15 | * | |
16 | * Redistributions of source code must retain the above copyright | |
17 | * notice, this list of conditions and the following disclaimer. | |
18 | * | |
19 | * Redistributions in binary form must reproduce the above | |
20 | * copyright notice, this list of conditions and the following | |
21 | * disclaimer in the documentation and/or other materials provided | |
22 | * with the distribution. | |
23 | * | |
24 | * Neither the name of the Network Appliance, Inc. nor the names of | |
25 | * its contributors may be used to endorse or promote products | |
26 | * derived from this software without specific prior written | |
27 | * permission. | |
28 | * | |
29 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
30 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
31 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
32 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
33 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
34 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
35 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
36 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
37 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
38 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
39 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
40 | */ | |
41 | ||
42 | #ifndef _LINUX_SUNRPC_XPRT_RDMA_H | |
43 | #define _LINUX_SUNRPC_XPRT_RDMA_H | |
44 | ||
45 | #include <linux/wait.h> /* wait_queue_head_t, etc */ | |
46 | #include <linux/spinlock.h> /* spinlock_t, etc */ | |
60063497 | 47 | #include <linux/atomic.h> /* atomic_t, etc */ |
254f91e2 | 48 | #include <linux/workqueue.h> /* struct work_struct */ |
f58851e6 TT |
49 | |
50 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | |
51 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | |
52 | ||
53 | #include <linux/sunrpc/clnt.h> /* rpc_xprt */ | |
54 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | |
55 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | |
56 | ||
5675add3 TT |
57 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ |
58 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | |
59 | ||
5d252f90 CL |
60 | #define RPCRDMA_BIND_TO (60U * HZ) |
61 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | |
62 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | |
63 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | |
64 | ||
f58851e6 TT |
65 | /* |
66 | * Interface Adapter -- one per transport instance | |
67 | */ | |
68 | struct rpcrdma_ia { | |
a0ce85f5 | 69 | const struct rpcrdma_memreg_ops *ri_ops; |
89e0d112 | 70 | struct ib_device *ri_device; |
f58851e6 TT |
71 | struct rdma_cm_id *ri_id; |
72 | struct ib_pd *ri_pd; | |
f58851e6 | 73 | struct completion ri_done; |
bebd0318 | 74 | struct completion ri_remove_done; |
f58851e6 | 75 | int ri_async_rc; |
87cfb9a0 | 76 | unsigned int ri_max_segs; |
ce5b3717 | 77 | unsigned int ri_max_frwr_depth; |
302d3deb CL |
78 | unsigned int ri_max_inline_write; |
79 | unsigned int ri_max_inline_read; | |
16f906d6 | 80 | unsigned int ri_max_send_sges; |
b5f0afbe | 81 | bool ri_implicit_roundup; |
5e9fc6a0 | 82 | enum ib_mr_type ri_mrtype; |
bebd0318 | 83 | unsigned long ri_flags; |
ce1ab9ab CL |
84 | struct ib_qp_attr ri_qp_attr; |
85 | struct ib_qp_init_attr ri_qp_init_attr; | |
f58851e6 TT |
86 | }; |
87 | ||
bebd0318 CL |
88 | enum { |
89 | RPCRDMA_IAF_REMOVING = 0, | |
90 | }; | |
91 | ||
f58851e6 TT |
92 | /* |
93 | * RDMA Endpoint -- one per transport instance | |
94 | */ | |
95 | ||
96 | struct rpcrdma_ep { | |
ae72950a CL |
97 | unsigned int rep_send_count; |
98 | unsigned int rep_send_batch; | |
f58851e6 | 99 | int rep_connected; |
f58851e6 TT |
100 | struct ib_qp_init_attr rep_attr; |
101 | wait_queue_head_t rep_connect_wait; | |
87cfb9a0 | 102 | struct rpcrdma_connect_private rep_cm_private; |
f58851e6 | 103 | struct rdma_conn_param rep_remote_cma; |
6ceea368 | 104 | int rep_receive_count; |
f58851e6 TT |
105 | }; |
106 | ||
124fa17d CL |
107 | /* Pre-allocate extra Work Requests for handling backward receives |
108 | * and sends. This is a fixed value because the Work Queues are | |
109 | * allocated when the forward channel is set up. | |
110 | */ | |
111 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
112 | #define RPCRDMA_BACKWARD_WRS (8) | |
113 | #else | |
114 | #define RPCRDMA_BACKWARD_WRS (0) | |
115 | #endif | |
116 | ||
9128c3e7 CL |
117 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
118 | * | |
119 | * The below structure appears at the front of a large region of kmalloc'd | |
120 | * memory, which always starts on a good alignment boundary. | |
121 | */ | |
122 | ||
123 | struct rpcrdma_regbuf { | |
9128c3e7 | 124 | struct ib_sge rg_iov; |
54cbd6b0 | 125 | struct ib_device *rg_device; |
99ef4db3 | 126 | enum dma_data_direction rg_direction; |
9128c3e7 CL |
127 | __be32 rg_base[0] __attribute__ ((aligned(256))); |
128 | }; | |
129 | ||
130 | static inline u64 | |
131 | rdmab_addr(struct rpcrdma_regbuf *rb) | |
132 | { | |
133 | return rb->rg_iov.addr; | |
134 | } | |
135 | ||
136 | static inline u32 | |
137 | rdmab_length(struct rpcrdma_regbuf *rb) | |
138 | { | |
139 | return rb->rg_iov.length; | |
140 | } | |
141 | ||
142 | static inline u32 | |
143 | rdmab_lkey(struct rpcrdma_regbuf *rb) | |
144 | { | |
145 | return rb->rg_iov.lkey; | |
146 | } | |
147 | ||
91a10c52 CL |
148 | static inline struct ib_device * |
149 | rdmab_device(struct rpcrdma_regbuf *rb) | |
150 | { | |
151 | return rb->rg_device; | |
152 | } | |
153 | ||
5d252f90 CL |
154 | #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) |
155 | ||
94931746 CL |
156 | /* To ensure a transport can always make forward progress, |
157 | * the number of RDMA segments allowed in header chunk lists | |
158 | * is capped at 8. This prevents less-capable devices and | |
159 | * memory registrations from overrunning the Send buffer | |
160 | * while building chunk lists. | |
161 | * | |
162 | * Elements of the Read list take up more room than the | |
163 | * Write list or Reply chunk. 8 read segments means the Read | |
164 | * list (or Write list or Reply chunk) cannot consume more | |
165 | * than | |
166 | * | |
167 | * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes. | |
168 | * | |
169 | * And the fixed part of the header is another 24 bytes. | |
170 | * | |
171 | * The smallest inline threshold is 1024 bytes, ensuring that | |
172 | * at least 750 bytes are available for RPC messages. | |
173 | */ | |
08cf2efd CL |
174 | enum { |
175 | RPCRDMA_MAX_HDR_SEGS = 8, | |
176 | RPCRDMA_HDRBUF_SIZE = 256, | |
177 | }; | |
94931746 | 178 | |
f58851e6 | 179 | /* |
e1352c96 CL |
180 | * struct rpcrdma_rep -- this structure encapsulates state required |
181 | * to receive and complete an RPC Reply, asychronously. It needs | |
182 | * several pieces of state: | |
f58851e6 | 183 | * |
e1352c96 CL |
184 | * o receive buffer and ib_sge (donated to provider) |
185 | * o status of receive (success or not, length, inv rkey) | |
186 | * o bookkeeping state to get run by reply handler (XDR stream) | |
f58851e6 | 187 | * |
e1352c96 CL |
188 | * These structures are allocated during transport initialization. |
189 | * N of these are associated with a transport instance, managed by | |
190 | * struct rpcrdma_buffer. N is the max number of outstanding RPCs. | |
f58851e6 TT |
191 | */ |
192 | ||
f58851e6 | 193 | struct rpcrdma_rep { |
552bf225 | 194 | struct ib_cqe rr_cqe; |
5381e0ec CL |
195 | __be32 rr_xid; |
196 | __be32 rr_vers; | |
197 | __be32 rr_proc; | |
c8b920bb CL |
198 | int rr_wc_flags; |
199 | u32 rr_inv_rkey; | |
7c8d9e7c | 200 | bool rr_temp; |
c1bcb68e | 201 | struct rpcrdma_regbuf *rr_rdmabuf; |
fed171b3 | 202 | struct rpcrdma_xprt *rr_rxprt; |
fe97b47c | 203 | struct work_struct rr_work; |
96f8778f CL |
204 | struct xdr_buf rr_hdrbuf; |
205 | struct xdr_stream rr_stream; | |
e1352c96 | 206 | struct rpc_rqst *rr_rqst; |
6b1184cd | 207 | struct list_head rr_list; |
6ea8e711 | 208 | struct ib_recv_wr rr_recv_wr; |
f58851e6 TT |
209 | }; |
210 | ||
ae72950a CL |
211 | /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes |
212 | */ | |
01bb35c8 | 213 | struct rpcrdma_req; |
ae72950a CL |
214 | struct rpcrdma_xprt; |
215 | struct rpcrdma_sendctx { | |
216 | struct ib_send_wr sc_wr; | |
217 | struct ib_cqe sc_cqe; | |
218 | struct rpcrdma_xprt *sc_xprt; | |
01bb35c8 | 219 | struct rpcrdma_req *sc_req; |
ae72950a CL |
220 | unsigned int sc_unmap_count; |
221 | struct ib_sge sc_sges[]; | |
222 | }; | |
223 | ||
224 | /* Limit the number of SGEs that can be unmapped during one | |
225 | * Send completion. This caps the amount of work a single | |
226 | * completion can do before returning to the provider. | |
227 | * | |
228 | * Setting this to zero disables Send completion batching. | |
229 | */ | |
230 | enum { | |
231 | RPCRDMA_MAX_SEND_BATCH = 7, | |
232 | }; | |
233 | ||
0dbb4108 | 234 | /* |
96ceddea | 235 | * struct rpcrdma_mr - external memory region metadata |
0dbb4108 CL |
236 | * |
237 | * An external memory region is any buffer or page that is registered | |
238 | * on the fly (ie, not pre-registered). | |
239 | * | |
96ceddea | 240 | * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During |
0dbb4108 CL |
241 | * call_allocate, rpcrdma_buffer_get() assigns one to each segment in |
242 | * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep | |
243 | * track of registration metadata while each RPC is pending. | |
244 | * rpcrdma_deregister_external() uses this metadata to unmap and | |
245 | * release these resources when an RPC is complete. | |
246 | */ | |
ce5b3717 CL |
247 | enum rpcrdma_frwr_state { |
248 | FRWR_IS_INVALID, /* ready to be used */ | |
249 | FRWR_IS_VALID, /* in use */ | |
250 | FRWR_FLUSHED_FR, /* flushed FASTREG WR */ | |
251 | FRWR_FLUSHED_LI, /* flushed LOCALINV WR */ | |
0dbb4108 CL |
252 | }; |
253 | ||
ce5b3717 | 254 | struct rpcrdma_frwr { |
0dbb4108 | 255 | struct ib_mr *fr_mr; |
2fa8f88d | 256 | struct ib_cqe fr_cqe; |
ce5b3717 | 257 | enum rpcrdma_frwr_state fr_state; |
2fa8f88d | 258 | struct completion fr_linv_done; |
3cf4e169 CL |
259 | union { |
260 | struct ib_reg_wr fr_regwr; | |
261 | struct ib_send_wr fr_invwr; | |
262 | }; | |
0dbb4108 CL |
263 | }; |
264 | ||
acb9da7a | 265 | struct rpcrdma_fmr { |
88975ebe CL |
266 | struct ib_fmr *fm_mr; |
267 | u64 *fm_physaddrs; | |
0dbb4108 CL |
268 | }; |
269 | ||
96ceddea CL |
270 | struct rpcrdma_mr { |
271 | struct list_head mr_list; | |
272 | struct scatterlist *mr_sg; | |
273 | int mr_nents; | |
274 | enum dma_data_direction mr_dir; | |
0dbb4108 | 275 | union { |
acb9da7a | 276 | struct rpcrdma_fmr fmr; |
ce5b3717 | 277 | struct rpcrdma_frwr frwr; |
c882a655 | 278 | }; |
96ceddea CL |
279 | struct rpcrdma_xprt *mr_xprt; |
280 | u32 mr_handle; | |
281 | u32 mr_length; | |
282 | u64 mr_offset; | |
61da886b | 283 | struct work_struct mr_recycle; |
96ceddea | 284 | struct list_head mr_all; |
0dbb4108 CL |
285 | }; |
286 | ||
f58851e6 TT |
287 | /* |
288 | * struct rpcrdma_req -- structure central to the request/reply sequence. | |
289 | * | |
290 | * N of these are associated with a transport instance, and stored in | |
291 | * struct rpcrdma_buffer. N is the max number of outstanding requests. | |
292 | * | |
293 | * It includes pre-registered buffer memory for send AND recv. | |
294 | * The recv buffer, however, is not owned by this structure, and | |
295 | * is "donated" to the hardware when a recv is posted. When a | |
296 | * reply is handled, the recv buffer used is given back to the | |
297 | * struct rpcrdma_req associated with the request. | |
298 | * | |
299 | * In addition to the basic memory, this structure includes an array | |
300 | * of iovs for send operations. The reason is that the iovs passed to | |
301 | * ib_post_{send,recv} must not be modified until the work request | |
302 | * completes. | |
f58851e6 TT |
303 | */ |
304 | ||
5ab81428 CL |
305 | /* Maximum number of page-sized "segments" per chunk list to be |
306 | * registered or invalidated. Must handle a Reply chunk: | |
307 | */ | |
308 | enum { | |
309 | RPCRDMA_MAX_IOV_SEGS = 3, | |
310 | RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1, | |
311 | RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS + | |
312 | RPCRDMA_MAX_IOV_SEGS, | |
313 | }; | |
314 | ||
f58851e6 | 315 | struct rpcrdma_mr_seg { /* chunk descriptors */ |
f58851e6 | 316 | u32 mr_len; /* length of chunk or segment */ |
f58851e6 TT |
317 | struct page *mr_page; /* owning page, if any */ |
318 | char *mr_offset; /* kva if no page, else offset */ | |
319 | }; | |
320 | ||
c6f5b47f CL |
321 | /* The Send SGE array is provisioned to send a maximum size |
322 | * inline request: | |
655fec69 CL |
323 | * - RPC-over-RDMA header |
324 | * - xdr_buf head iovec | |
c6f5b47f | 325 | * - RPCRDMA_MAX_INLINE bytes, in pages |
655fec69 | 326 | * - xdr_buf tail iovec |
c6f5b47f CL |
327 | * |
328 | * The actual number of array elements consumed by each RPC | |
329 | * depends on the device's max_sge limit. | |
655fec69 CL |
330 | */ |
331 | enum { | |
16f906d6 | 332 | RPCRDMA_MIN_SEND_SGES = 3, |
c6f5b47f | 333 | RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT, |
655fec69 CL |
334 | RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, |
335 | }; | |
b3221d6a | 336 | |
5ab81428 | 337 | struct rpcrdma_buffer; |
f58851e6 | 338 | struct rpcrdma_req { |
a80d66c9 | 339 | struct list_head rl_list; |
edb41e61 | 340 | struct rpc_rqst rl_slot; |
b3221d6a | 341 | struct rpcrdma_buffer *rl_buffer; |
90aab602 | 342 | struct rpcrdma_rep *rl_reply; |
7a80f3f0 CL |
343 | struct xdr_stream rl_stream; |
344 | struct xdr_buf rl_hdrbuf; | |
ae72950a | 345 | struct rpcrdma_sendctx *rl_sendctx; |
9c40c49f CL |
346 | struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ |
347 | struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ | |
348 | struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ | |
f531a5db CL |
349 | |
350 | struct list_head rl_all; | |
531cca0c | 351 | unsigned long rl_flags; |
5ab81428 CL |
352 | |
353 | struct list_head rl_registered; /* registered segments */ | |
354 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; | |
f58851e6 | 355 | }; |
0ca77dc3 | 356 | |
531cca0c CL |
357 | /* rl_flags */ |
358 | enum { | |
6c537f2c | 359 | RPCRDMA_REQ_F_PENDING = 0, |
01bb35c8 | 360 | RPCRDMA_REQ_F_TX_RESOURCES, |
531cca0c CL |
361 | }; |
362 | ||
0ca77dc3 | 363 | static inline struct rpcrdma_req * |
fc1eb807 | 364 | rpcr_to_rdmar(const struct rpc_rqst *rqst) |
0ca77dc3 | 365 | { |
edb41e61 | 366 | return container_of(rqst, struct rpcrdma_req, rl_slot); |
0ca77dc3 | 367 | } |
f58851e6 | 368 | |
9a5c63e9 | 369 | static inline void |
96ceddea | 370 | rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list) |
9a5c63e9 | 371 | { |
96ceddea | 372 | list_add_tail(&mr->mr_list, list); |
9a5c63e9 CL |
373 | } |
374 | ||
96ceddea CL |
375 | static inline struct rpcrdma_mr * |
376 | rpcrdma_mr_pop(struct list_head *list) | |
9a5c63e9 | 377 | { |
96ceddea | 378 | struct rpcrdma_mr *mr; |
9a5c63e9 | 379 | |
96ceddea | 380 | mr = list_first_entry(list, struct rpcrdma_mr, mr_list); |
054f1557 | 381 | list_del_init(&mr->mr_list); |
96ceddea | 382 | return mr; |
9a5c63e9 CL |
383 | } |
384 | ||
f58851e6 TT |
385 | /* |
386 | * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for | |
387 | * inline requests/replies, and client/server credits. | |
388 | * | |
389 | * One of these is associated with a transport instance | |
390 | */ | |
391 | struct rpcrdma_buffer { | |
96ceddea CL |
392 | spinlock_t rb_mrlock; /* protect rb_mrs list */ |
393 | struct list_head rb_mrs; | |
58d1dcf5 | 394 | struct list_head rb_all; |
58d1dcf5 | 395 | |
ae72950a CL |
396 | unsigned long rb_sc_head; |
397 | unsigned long rb_sc_tail; | |
398 | unsigned long rb_sc_last; | |
399 | struct rpcrdma_sendctx **rb_sc_ctxs; | |
400 | ||
1e465fd4 CL |
401 | spinlock_t rb_lock; /* protect buf lists */ |
402 | struct list_head rb_send_bufs; | |
403 | struct list_head rb_recv_bufs; | |
2fad6592 | 404 | unsigned long rb_flags; |
58d1dcf5 | 405 | u32 rb_max_requests; |
be798f90 | 406 | u32 rb_credits; /* most recent credit grant */ |
f531a5db CL |
407 | |
408 | u32 rb_bc_srv_max_requests; | |
409 | spinlock_t rb_reqslock; /* protect rb_allreqs */ | |
410 | struct list_head rb_allreqs; | |
5d252f90 CL |
411 | |
412 | u32 rb_bc_max_requests; | |
505bbe64 | 413 | |
6d2d0ee2 | 414 | struct workqueue_struct *rb_completion_wq; |
e2ac236c | 415 | struct delayed_work rb_refresh_worker; |
f58851e6 TT |
416 | }; |
417 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) | |
418 | ||
2fad6592 CL |
419 | /* rb_flags */ |
420 | enum { | |
421 | RPCRDMA_BUF_F_EMPTY_SCQ = 0, | |
422 | }; | |
423 | ||
f58851e6 TT |
424 | /* |
425 | * Internal structure for transport instance creation. This | |
426 | * exists primarily for modularity. | |
427 | * | |
428 | * This data should be set with mount options | |
429 | */ | |
430 | struct rpcrdma_create_data_internal { | |
f58851e6 TT |
431 | unsigned int max_requests; /* max requests (slots) in flight */ |
432 | unsigned int rsize; /* mount rsize - max read hdr+data */ | |
433 | unsigned int wsize; /* mount wsize - max write hdr+data */ | |
434 | unsigned int inline_rsize; /* max non-rdma read data payload */ | |
435 | unsigned int inline_wsize; /* max non-rdma write data payload */ | |
f58851e6 TT |
436 | }; |
437 | ||
f58851e6 TT |
438 | /* |
439 | * Statistics for RPCRDMA | |
440 | */ | |
441 | struct rpcrdma_stats { | |
67af6f65 | 442 | /* accessed when sending a call */ |
f58851e6 TT |
443 | unsigned long read_chunk_count; |
444 | unsigned long write_chunk_count; | |
445 | unsigned long reply_chunk_count; | |
f58851e6 | 446 | unsigned long long total_rdma_request; |
f58851e6 | 447 | |
67af6f65 | 448 | /* rarely accessed error counters */ |
f58851e6 | 449 | unsigned long long pullup_copy_count; |
f58851e6 TT |
450 | unsigned long hardway_register_count; |
451 | unsigned long failed_marshal_count; | |
452 | unsigned long bad_reply_count; | |
61da886b | 453 | unsigned long mrs_recycled; |
505bbe64 | 454 | unsigned long mrs_orphaned; |
e2ac236c | 455 | unsigned long mrs_allocated; |
ae72950a | 456 | unsigned long empty_sendctx_q; |
67af6f65 CL |
457 | |
458 | /* accessed when receiving a reply */ | |
459 | unsigned long long total_rdma_reply; | |
460 | unsigned long long fixup_copy_count; | |
01bb35c8 | 461 | unsigned long reply_waits_for_send; |
c8b920bb | 462 | unsigned long local_inv_needed; |
67af6f65 CL |
463 | unsigned long nomsg_call_count; |
464 | unsigned long bcall_count; | |
f58851e6 TT |
465 | }; |
466 | ||
a0ce85f5 CL |
467 | /* |
468 | * Per-registration mode operations | |
469 | */ | |
1c9351ee | 470 | struct rpcrdma_xprt; |
a0ce85f5 | 471 | struct rpcrdma_memreg_ops { |
6748b0ca CL |
472 | struct rpcrdma_mr_seg * |
473 | (*ro_map)(struct rpcrdma_xprt *, | |
9d6b0409 | 474 | struct rpcrdma_mr_seg *, int, bool, |
96ceddea | 475 | struct rpcrdma_mr **); |
f2877623 CL |
476 | int (*ro_send)(struct rpcrdma_ia *ia, |
477 | struct rpcrdma_req *req); | |
c3441618 | 478 | void (*ro_reminv)(struct rpcrdma_rep *rep, |
96ceddea | 479 | struct list_head *mrs); |
32d0ceec | 480 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, |
451d26e1 | 481 | struct list_head *); |
3968cb58 CL |
482 | int (*ro_open)(struct rpcrdma_ia *, |
483 | struct rpcrdma_ep *, | |
484 | struct rpcrdma_create_data_internal *); | |
1c9351ee | 485 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); |
e2ac236c | 486 | int (*ro_init_mr)(struct rpcrdma_ia *, |
96ceddea CL |
487 | struct rpcrdma_mr *); |
488 | void (*ro_release_mr)(struct rpcrdma_mr *mr); | |
a0ce85f5 | 489 | const char *ro_displayname; |
c8b920bb | 490 | const int ro_send_w_inv_ok; |
a0ce85f5 CL |
491 | }; |
492 | ||
493 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | |
494 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | |
a0ce85f5 | 495 | |
f58851e6 TT |
496 | /* |
497 | * RPCRDMA transport -- encapsulates the structures above for | |
498 | * integration with RPC. | |
499 | * | |
500 | * The contained structures are embedded, not pointers, | |
501 | * for convenience. This structure need not be visible externally. | |
502 | * | |
503 | * It is allocated and initialized during mount, and released | |
504 | * during unmount. | |
505 | */ | |
506 | struct rpcrdma_xprt { | |
5abefb86 | 507 | struct rpc_xprt rx_xprt; |
f58851e6 TT |
508 | struct rpcrdma_ia rx_ia; |
509 | struct rpcrdma_ep rx_ep; | |
510 | struct rpcrdma_buffer rx_buf; | |
511 | struct rpcrdma_create_data_internal rx_data; | |
5abefb86 | 512 | struct delayed_work rx_connect_worker; |
f58851e6 TT |
513 | struct rpcrdma_stats rx_stats; |
514 | }; | |
515 | ||
5abefb86 | 516 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) |
f58851e6 TT |
517 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
518 | ||
d461f1f2 CL |
519 | static inline const char * |
520 | rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt) | |
521 | { | |
522 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]; | |
523 | } | |
524 | ||
525 | static inline const char * | |
526 | rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt) | |
527 | { | |
528 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT]; | |
529 | } | |
530 | ||
9191ca3b TT |
531 | /* Setting this to 0 ensures interoperability with early servers. |
532 | * Setting this to 1 enhances certain unaligned read/write performance. | |
533 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | |
534 | extern int xprt_rdma_pad_optimize; | |
535 | ||
fff09594 CL |
536 | /* This setting controls the hunt for a supported memory |
537 | * registration strategy. | |
538 | */ | |
539 | extern unsigned int xprt_rdma_memreg_strategy; | |
540 | ||
f58851e6 TT |
541 | /* |
542 | * Interface Adapter calls - xprtrdma/verbs.c | |
543 | */ | |
dd229cee | 544 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); |
bebd0318 | 545 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); |
f58851e6 | 546 | void rpcrdma_ia_close(struct rpcrdma_ia *); |
b54054ca CL |
547 | bool frwr_is_supported(struct rpcrdma_ia *); |
548 | bool fmr_is_supported(struct rpcrdma_ia *); | |
f58851e6 TT |
549 | |
550 | /* | |
551 | * Endpoint calls - xprtrdma/verbs.c | |
552 | */ | |
553 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, | |
554 | struct rpcrdma_create_data_internal *); | |
7f1d5419 | 555 | void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 | 556 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
282191cb | 557 | void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 TT |
558 | |
559 | int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, | |
560 | struct rpcrdma_req *); | |
f58851e6 TT |
561 | |
562 | /* | |
563 | * Buffer calls - xprtrdma/verbs.c | |
564 | */ | |
f531a5db | 565 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); |
13650c23 | 566 | void rpcrdma_destroy_req(struct rpcrdma_req *); |
ac920d04 | 567 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); |
f58851e6 | 568 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); |
ae72950a | 569 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); |
f58851e6 | 570 | |
96ceddea CL |
571 | struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt); |
572 | void rpcrdma_mr_put(struct rpcrdma_mr *mr); | |
ec12e479 | 573 | void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr); |
61da886b CL |
574 | |
575 | static inline void | |
576 | rpcrdma_mr_recycle(struct rpcrdma_mr *mr) | |
577 | { | |
578 | schedule_work(&mr->mr_recycle); | |
579 | } | |
96ceddea | 580 | |
f58851e6 TT |
581 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); |
582 | void rpcrdma_buffer_put(struct rpcrdma_req *); | |
f58851e6 TT |
583 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); |
584 | ||
13650c23 | 585 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, |
99ef4db3 | 586 | gfp_t); |
54cbd6b0 | 587 | bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); |
13650c23 | 588 | void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); |
9128c3e7 | 589 | |
54cbd6b0 CL |
590 | static inline bool |
591 | rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) | |
592 | { | |
593 | return rb->rg_device != NULL; | |
594 | } | |
595 | ||
596 | static inline bool | |
597 | rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |
598 | { | |
599 | if (likely(rpcrdma_regbuf_is_mapped(rb))) | |
600 | return true; | |
601 | return __rpcrdma_dma_map_regbuf(ia, rb); | |
602 | } | |
603 | ||
d654788e CL |
604 | /* |
605 | * Wrappers for chunk registration, shared by read/write chunk code. | |
606 | */ | |
607 | ||
d654788e CL |
608 | static inline enum dma_data_direction |
609 | rpcrdma_data_dir(bool writing) | |
610 | { | |
611 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | |
612 | } | |
613 | ||
f58851e6 TT |
614 | /* |
615 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | |
616 | */ | |
655fec69 CL |
617 | |
618 | enum rpcrdma_chunktype { | |
619 | rpcrdma_noch = 0, | |
620 | rpcrdma_readch, | |
621 | rpcrdma_areadch, | |
622 | rpcrdma_writech, | |
623 | rpcrdma_replych | |
624 | }; | |
625 | ||
857f9aca CL |
626 | int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, |
627 | struct rpcrdma_req *req, u32 hdrlen, | |
628 | struct xdr_buf *xdr, | |
629 | enum rpcrdma_chunktype rtype); | |
ae72950a | 630 | void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc); |
09e60641 | 631 | int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); |
87cfb9a0 | 632 | void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); |
e1352c96 | 633 | void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); |
d8f532d2 | 634 | void rpcrdma_reply_handler(struct rpcrdma_rep *rep); |
0ba6f370 CL |
635 | void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, |
636 | struct rpcrdma_req *req); | |
d8f532d2 | 637 | void rpcrdma_deferred_completion(struct work_struct *work); |
f58851e6 | 638 | |
96f8778f CL |
639 | static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) |
640 | { | |
641 | xdr->head[0].iov_len = len; | |
642 | xdr->len = len; | |
643 | } | |
644 | ||
ffe1f0df CL |
645 | /* RPC/RDMA module init - xprtrdma/transport.c |
646 | */ | |
5d252f90 CL |
647 | extern unsigned int xprt_rdma_max_inline_read; |
648 | void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); | |
649 | void xprt_rdma_free_addresses(struct rpc_xprt *xprt); | |
650 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); | |
ffe1f0df CL |
651 | int xprt_rdma_init(void); |
652 | void xprt_rdma_cleanup(void); | |
653 | ||
f531a5db CL |
654 | /* Backchannel calls - xprtrdma/backchannel.c |
655 | */ | |
656 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
657 | int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); | |
76566773 | 658 | int xprt_rdma_bc_up(struct svc_serv *, struct net *); |
6b26cc8c | 659 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); |
f531a5db | 660 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); |
63cae470 | 661 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); |
cf73daf5 | 662 | int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); |
f531a5db CL |
663 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *); |
664 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | |
665 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | |
666 | ||
5d252f90 | 667 | extern struct xprt_class xprt_rdma_bc; |
cec56c8f | 668 | |
f58851e6 | 669 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ |