Commit | Line | Data |
---|---|---|
f58851e6 | 1 | /* |
62b56a67 | 2 | * Copyright (c) 2014-2017 Oracle. All rights reserved. |
f58851e6 TT |
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the BSD-type | |
9 | * license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * | |
15 | * Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * | |
18 | * Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials provided | |
21 | * with the distribution. | |
22 | * | |
23 | * Neither the name of the Network Appliance, Inc. nor the names of | |
24 | * its contributors may be used to endorse or promote products | |
25 | * derived from this software without specific prior written | |
26 | * permission. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
29 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
30 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
31 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
32 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
33 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
34 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
35 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
36 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
37 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
38 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
39 | */ | |
40 | ||
41 | #ifndef _LINUX_SUNRPC_XPRT_RDMA_H | |
42 | #define _LINUX_SUNRPC_XPRT_RDMA_H | |
43 | ||
44 | #include <linux/wait.h> /* wait_queue_head_t, etc */ | |
45 | #include <linux/spinlock.h> /* spinlock_t, etc */ | |
60063497 | 46 | #include <linux/atomic.h> /* atomic_t, etc */ |
254f91e2 | 47 | #include <linux/workqueue.h> /* struct work_struct */ |
f58851e6 TT |
48 | |
49 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | |
50 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | |
51 | ||
52 | #include <linux/sunrpc/clnt.h> /* rpc_xprt */ | |
53 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | |
54 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | |
55 | ||
5675add3 TT |
56 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ |
57 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | |
58 | ||
5d252f90 CL |
59 | #define RPCRDMA_BIND_TO (60U * HZ) |
60 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | |
61 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | |
62 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | |
63 | ||
f58851e6 TT |
64 | /* |
65 | * Interface Adapter -- one per transport instance | |
66 | */ | |
67 | struct rpcrdma_ia { | |
a0ce85f5 | 68 | const struct rpcrdma_memreg_ops *ri_ops; |
89e0d112 | 69 | struct ib_device *ri_device; |
f58851e6 TT |
70 | struct rdma_cm_id *ri_id; |
71 | struct ib_pd *ri_pd; | |
f58851e6 | 72 | struct completion ri_done; |
bebd0318 | 73 | struct completion ri_remove_done; |
f58851e6 | 74 | int ri_async_rc; |
87cfb9a0 | 75 | unsigned int ri_max_segs; |
0fc6c4e7 | 76 | unsigned int ri_max_frmr_depth; |
302d3deb CL |
77 | unsigned int ri_max_inline_write; |
78 | unsigned int ri_max_inline_read; | |
16f906d6 | 79 | unsigned int ri_max_send_sges; |
b5f0afbe | 80 | bool ri_implicit_roundup; |
5e9fc6a0 | 81 | enum ib_mr_type ri_mrtype; |
bebd0318 | 82 | unsigned long ri_flags; |
ce1ab9ab CL |
83 | struct ib_qp_attr ri_qp_attr; |
84 | struct ib_qp_init_attr ri_qp_init_attr; | |
f58851e6 TT |
85 | }; |
86 | ||
bebd0318 CL |
87 | enum { |
88 | RPCRDMA_IAF_REMOVING = 0, | |
89 | }; | |
90 | ||
f58851e6 TT |
91 | /* |
92 | * RDMA Endpoint -- one per transport instance | |
93 | */ | |
94 | ||
95 | struct rpcrdma_ep { | |
ae72950a CL |
96 | unsigned int rep_send_count; |
97 | unsigned int rep_send_batch; | |
f58851e6 | 98 | int rep_connected; |
f58851e6 TT |
99 | struct ib_qp_init_attr rep_attr; |
100 | wait_queue_head_t rep_connect_wait; | |
87cfb9a0 | 101 | struct rpcrdma_connect_private rep_cm_private; |
f58851e6 | 102 | struct rdma_conn_param rep_remote_cma; |
254f91e2 | 103 | struct delayed_work rep_connect_worker; |
f58851e6 TT |
104 | }; |
105 | ||
124fa17d CL |
106 | /* Pre-allocate extra Work Requests for handling backward receives |
107 | * and sends. This is a fixed value because the Work Queues are | |
108 | * allocated when the forward channel is set up. | |
109 | */ | |
110 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
111 | #define RPCRDMA_BACKWARD_WRS (8) | |
112 | #else | |
113 | #define RPCRDMA_BACKWARD_WRS (0) | |
114 | #endif | |
115 | ||
9128c3e7 CL |
116 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
117 | * | |
118 | * The below structure appears at the front of a large region of kmalloc'd | |
119 | * memory, which always starts on a good alignment boundary. | |
120 | */ | |
121 | ||
122 | struct rpcrdma_regbuf { | |
9128c3e7 | 123 | struct ib_sge rg_iov; |
54cbd6b0 | 124 | struct ib_device *rg_device; |
99ef4db3 | 125 | enum dma_data_direction rg_direction; |
9128c3e7 CL |
126 | __be32 rg_base[0] __attribute__ ((aligned(256))); |
127 | }; | |
128 | ||
129 | static inline u64 | |
130 | rdmab_addr(struct rpcrdma_regbuf *rb) | |
131 | { | |
132 | return rb->rg_iov.addr; | |
133 | } | |
134 | ||
135 | static inline u32 | |
136 | rdmab_length(struct rpcrdma_regbuf *rb) | |
137 | { | |
138 | return rb->rg_iov.length; | |
139 | } | |
140 | ||
141 | static inline u32 | |
142 | rdmab_lkey(struct rpcrdma_regbuf *rb) | |
143 | { | |
144 | return rb->rg_iov.lkey; | |
145 | } | |
146 | ||
91a10c52 CL |
147 | static inline struct ib_device * |
148 | rdmab_device(struct rpcrdma_regbuf *rb) | |
149 | { | |
150 | return rb->rg_device; | |
151 | } | |
152 | ||
5d252f90 CL |
153 | #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) |
154 | ||
94931746 CL |
155 | /* To ensure a transport can always make forward progress, |
156 | * the number of RDMA segments allowed in header chunk lists | |
157 | * is capped at 8. This prevents less-capable devices and | |
158 | * memory registrations from overrunning the Send buffer | |
159 | * while building chunk lists. | |
160 | * | |
161 | * Elements of the Read list take up more room than the | |
162 | * Write list or Reply chunk. 8 read segments means the Read | |
163 | * list (or Write list or Reply chunk) cannot consume more | |
164 | * than | |
165 | * | |
166 | * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes. | |
167 | * | |
168 | * And the fixed part of the header is another 24 bytes. | |
169 | * | |
170 | * The smallest inline threshold is 1024 bytes, ensuring that | |
171 | * at least 750 bytes are available for RPC messages. | |
172 | */ | |
08cf2efd CL |
173 | enum { |
174 | RPCRDMA_MAX_HDR_SEGS = 8, | |
175 | RPCRDMA_HDRBUF_SIZE = 256, | |
176 | }; | |
94931746 | 177 | |
f58851e6 | 178 | /* |
e1352c96 CL |
179 | * struct rpcrdma_rep -- this structure encapsulates state required |
180 | * to receive and complete an RPC Reply, asychronously. It needs | |
181 | * several pieces of state: | |
f58851e6 | 182 | * |
e1352c96 CL |
183 | * o receive buffer and ib_sge (donated to provider) |
184 | * o status of receive (success or not, length, inv rkey) | |
185 | * o bookkeeping state to get run by reply handler (XDR stream) | |
f58851e6 | 186 | * |
e1352c96 CL |
187 | * These structures are allocated during transport initialization. |
188 | * N of these are associated with a transport instance, managed by | |
189 | * struct rpcrdma_buffer. N is the max number of outstanding RPCs. | |
f58851e6 TT |
190 | */ |
191 | ||
f58851e6 | 192 | struct rpcrdma_rep { |
552bf225 | 193 | struct ib_cqe rr_cqe; |
5381e0ec CL |
194 | __be32 rr_xid; |
195 | __be32 rr_vers; | |
196 | __be32 rr_proc; | |
c8b920bb CL |
197 | int rr_wc_flags; |
198 | u32 rr_inv_rkey; | |
c1bcb68e | 199 | struct rpcrdma_regbuf *rr_rdmabuf; |
fed171b3 | 200 | struct rpcrdma_xprt *rr_rxprt; |
fe97b47c | 201 | struct work_struct rr_work; |
96f8778f CL |
202 | struct xdr_buf rr_hdrbuf; |
203 | struct xdr_stream rr_stream; | |
e1352c96 | 204 | struct rpc_rqst *rr_rqst; |
6b1184cd | 205 | struct list_head rr_list; |
6ea8e711 | 206 | struct ib_recv_wr rr_recv_wr; |
f58851e6 TT |
207 | }; |
208 | ||
ae72950a CL |
209 | /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes |
210 | */ | |
01bb35c8 | 211 | struct rpcrdma_req; |
ae72950a CL |
212 | struct rpcrdma_xprt; |
213 | struct rpcrdma_sendctx { | |
214 | struct ib_send_wr sc_wr; | |
215 | struct ib_cqe sc_cqe; | |
216 | struct rpcrdma_xprt *sc_xprt; | |
01bb35c8 | 217 | struct rpcrdma_req *sc_req; |
ae72950a CL |
218 | unsigned int sc_unmap_count; |
219 | struct ib_sge sc_sges[]; | |
220 | }; | |
221 | ||
222 | /* Limit the number of SGEs that can be unmapped during one | |
223 | * Send completion. This caps the amount of work a single | |
224 | * completion can do before returning to the provider. | |
225 | * | |
226 | * Setting this to zero disables Send completion batching. | |
227 | */ | |
228 | enum { | |
229 | RPCRDMA_MAX_SEND_BATCH = 7, | |
230 | }; | |
231 | ||
0dbb4108 CL |
232 | /* |
233 | * struct rpcrdma_mw - external memory region metadata | |
234 | * | |
235 | * An external memory region is any buffer or page that is registered | |
236 | * on the fly (ie, not pre-registered). | |
237 | * | |
3111d72c | 238 | * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During |
0dbb4108 CL |
239 | * call_allocate, rpcrdma_buffer_get() assigns one to each segment in |
240 | * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep | |
241 | * track of registration metadata while each RPC is pending. | |
242 | * rpcrdma_deregister_external() uses this metadata to unmap and | |
243 | * release these resources when an RPC is complete. | |
244 | */ | |
245 | enum rpcrdma_frmr_state { | |
246 | FRMR_IS_INVALID, /* ready to be used */ | |
247 | FRMR_IS_VALID, /* in use */ | |
62bdf94a CL |
248 | FRMR_FLUSHED_FR, /* flushed FASTREG WR */ |
249 | FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ | |
0dbb4108 CL |
250 | }; |
251 | ||
252 | struct rpcrdma_frmr { | |
0dbb4108 | 253 | struct ib_mr *fr_mr; |
2fa8f88d | 254 | struct ib_cqe fr_cqe; |
0dbb4108 | 255 | enum rpcrdma_frmr_state fr_state; |
2fa8f88d | 256 | struct completion fr_linv_done; |
3cf4e169 CL |
257 | union { |
258 | struct ib_reg_wr fr_regwr; | |
259 | struct ib_send_wr fr_invwr; | |
260 | }; | |
0dbb4108 CL |
261 | }; |
262 | ||
acb9da7a | 263 | struct rpcrdma_fmr { |
88975ebe CL |
264 | struct ib_fmr *fm_mr; |
265 | u64 *fm_physaddrs; | |
0dbb4108 CL |
266 | }; |
267 | ||
268 | struct rpcrdma_mw { | |
564471d2 CL |
269 | struct list_head mw_list; |
270 | struct scatterlist *mw_sg; | |
271 | int mw_nents; | |
272 | enum dma_data_direction mw_dir; | |
0dbb4108 | 273 | union { |
acb9da7a | 274 | struct rpcrdma_fmr fmr; |
0dbb4108 | 275 | struct rpcrdma_frmr frmr; |
c882a655 | 276 | }; |
766656b0 | 277 | struct rpcrdma_xprt *mw_xprt; |
9d6b0409 CL |
278 | u32 mw_handle; |
279 | u32 mw_length; | |
280 | u64 mw_offset; | |
3111d72c | 281 | struct list_head mw_all; |
0dbb4108 CL |
282 | }; |
283 | ||
f58851e6 TT |
284 | /* |
285 | * struct rpcrdma_req -- structure central to the request/reply sequence. | |
286 | * | |
287 | * N of these are associated with a transport instance, and stored in | |
288 | * struct rpcrdma_buffer. N is the max number of outstanding requests. | |
289 | * | |
290 | * It includes pre-registered buffer memory for send AND recv. | |
291 | * The recv buffer, however, is not owned by this structure, and | |
292 | * is "donated" to the hardware when a recv is posted. When a | |
293 | * reply is handled, the recv buffer used is given back to the | |
294 | * struct rpcrdma_req associated with the request. | |
295 | * | |
296 | * In addition to the basic memory, this structure includes an array | |
297 | * of iovs for send operations. The reason is that the iovs passed to | |
298 | * ib_post_{send,recv} must not be modified until the work request | |
299 | * completes. | |
f58851e6 TT |
300 | */ |
301 | ||
5ab81428 CL |
302 | /* Maximum number of page-sized "segments" per chunk list to be |
303 | * registered or invalidated. Must handle a Reply chunk: | |
304 | */ | |
305 | enum { | |
306 | RPCRDMA_MAX_IOV_SEGS = 3, | |
307 | RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1, | |
308 | RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS + | |
309 | RPCRDMA_MAX_IOV_SEGS, | |
310 | }; | |
311 | ||
f58851e6 | 312 | struct rpcrdma_mr_seg { /* chunk descriptors */ |
f58851e6 | 313 | u32 mr_len; /* length of chunk or segment */ |
f58851e6 TT |
314 | struct page *mr_page; /* owning page, if any */ |
315 | char *mr_offset; /* kva if no page, else offset */ | |
316 | }; | |
317 | ||
c6f5b47f CL |
318 | /* The Send SGE array is provisioned to send a maximum size |
319 | * inline request: | |
655fec69 CL |
320 | * - RPC-over-RDMA header |
321 | * - xdr_buf head iovec | |
c6f5b47f | 322 | * - RPCRDMA_MAX_INLINE bytes, in pages |
655fec69 | 323 | * - xdr_buf tail iovec |
c6f5b47f CL |
324 | * |
325 | * The actual number of array elements consumed by each RPC | |
326 | * depends on the device's max_sge limit. | |
655fec69 CL |
327 | */ |
328 | enum { | |
16f906d6 | 329 | RPCRDMA_MIN_SEND_SGES = 3, |
c6f5b47f | 330 | RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT, |
655fec69 CL |
331 | RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, |
332 | }; | |
b3221d6a | 333 | |
5ab81428 | 334 | struct rpcrdma_buffer; |
f58851e6 | 335 | struct rpcrdma_req { |
a80d66c9 | 336 | struct list_head rl_list; |
ccede759 | 337 | int rl_cpu; |
b3221d6a CL |
338 | unsigned int rl_connect_cookie; |
339 | struct rpcrdma_buffer *rl_buffer; | |
90aab602 | 340 | struct rpcrdma_rep *rl_reply; |
7a80f3f0 CL |
341 | struct xdr_stream rl_stream; |
342 | struct xdr_buf rl_hdrbuf; | |
ae72950a | 343 | struct rpcrdma_sendctx *rl_sendctx; |
9c40c49f CL |
344 | struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ |
345 | struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ | |
346 | struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ | |
f531a5db CL |
347 | |
348 | struct list_head rl_all; | |
531cca0c | 349 | unsigned long rl_flags; |
5ab81428 CL |
350 | |
351 | struct list_head rl_registered; /* registered segments */ | |
352 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; | |
f58851e6 | 353 | }; |
0ca77dc3 | 354 | |
531cca0c CL |
355 | /* rl_flags */ |
356 | enum { | |
6c537f2c | 357 | RPCRDMA_REQ_F_PENDING = 0, |
01bb35c8 | 358 | RPCRDMA_REQ_F_TX_RESOURCES, |
531cca0c CL |
359 | }; |
360 | ||
5a6d1db4 CL |
361 | static inline void |
362 | rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) | |
363 | { | |
364 | rqst->rq_xprtdata = req; | |
365 | } | |
366 | ||
0ca77dc3 CL |
367 | static inline struct rpcrdma_req * |
368 | rpcr_to_rdmar(struct rpc_rqst *rqst) | |
369 | { | |
5a6d1db4 | 370 | return rqst->rq_xprtdata; |
0ca77dc3 | 371 | } |
f58851e6 | 372 | |
9a5c63e9 CL |
373 | static inline void |
374 | rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) | |
375 | { | |
376 | list_add_tail(&mw->mw_list, list); | |
377 | } | |
378 | ||
379 | static inline struct rpcrdma_mw * | |
380 | rpcrdma_pop_mw(struct list_head *list) | |
381 | { | |
382 | struct rpcrdma_mw *mw; | |
383 | ||
384 | mw = list_first_entry(list, struct rpcrdma_mw, mw_list); | |
385 | list_del(&mw->mw_list); | |
386 | return mw; | |
387 | } | |
388 | ||
f58851e6 TT |
389 | /* |
390 | * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for | |
391 | * inline requests/replies, and client/server credits. | |
392 | * | |
393 | * One of these is associated with a transport instance | |
394 | */ | |
395 | struct rpcrdma_buffer { | |
58d1dcf5 CL |
396 | spinlock_t rb_mwlock; /* protect rb_mws list */ |
397 | struct list_head rb_mws; | |
398 | struct list_head rb_all; | |
58d1dcf5 | 399 | |
ae72950a CL |
400 | unsigned long rb_sc_head; |
401 | unsigned long rb_sc_tail; | |
402 | unsigned long rb_sc_last; | |
403 | struct rpcrdma_sendctx **rb_sc_ctxs; | |
404 | ||
1e465fd4 | 405 | spinlock_t rb_lock; /* protect buf lists */ |
05c97466 | 406 | int rb_send_count, rb_recv_count; |
1e465fd4 CL |
407 | struct list_head rb_send_bufs; |
408 | struct list_head rb_recv_bufs; | |
58d1dcf5 | 409 | u32 rb_max_requests; |
be798f90 | 410 | u32 rb_credits; /* most recent credit grant */ |
f531a5db CL |
411 | |
412 | u32 rb_bc_srv_max_requests; | |
413 | spinlock_t rb_reqslock; /* protect rb_allreqs */ | |
414 | struct list_head rb_allreqs; | |
5d252f90 CL |
415 | |
416 | u32 rb_bc_max_requests; | |
505bbe64 CL |
417 | |
418 | spinlock_t rb_recovery_lock; /* protect rb_stale_mrs */ | |
419 | struct list_head rb_stale_mrs; | |
420 | struct delayed_work rb_recovery_worker; | |
e2ac236c | 421 | struct delayed_work rb_refresh_worker; |
f58851e6 TT |
422 | }; |
423 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) | |
424 | ||
425 | /* | |
426 | * Internal structure for transport instance creation. This | |
427 | * exists primarily for modularity. | |
428 | * | |
429 | * This data should be set with mount options | |
430 | */ | |
431 | struct rpcrdma_create_data_internal { | |
f58851e6 TT |
432 | unsigned int max_requests; /* max requests (slots) in flight */ |
433 | unsigned int rsize; /* mount rsize - max read hdr+data */ | |
434 | unsigned int wsize; /* mount wsize - max write hdr+data */ | |
435 | unsigned int inline_rsize; /* max non-rdma read data payload */ | |
436 | unsigned int inline_wsize; /* max non-rdma write data payload */ | |
f58851e6 TT |
437 | }; |
438 | ||
f58851e6 TT |
439 | /* |
440 | * Statistics for RPCRDMA | |
441 | */ | |
442 | struct rpcrdma_stats { | |
67af6f65 | 443 | /* accessed when sending a call */ |
f58851e6 TT |
444 | unsigned long read_chunk_count; |
445 | unsigned long write_chunk_count; | |
446 | unsigned long reply_chunk_count; | |
f58851e6 | 447 | unsigned long long total_rdma_request; |
f58851e6 | 448 | |
67af6f65 | 449 | /* rarely accessed error counters */ |
f58851e6 | 450 | unsigned long long pullup_copy_count; |
f58851e6 TT |
451 | unsigned long hardway_register_count; |
452 | unsigned long failed_marshal_count; | |
453 | unsigned long bad_reply_count; | |
505bbe64 CL |
454 | unsigned long mrs_recovered; |
455 | unsigned long mrs_orphaned; | |
e2ac236c | 456 | unsigned long mrs_allocated; |
ae72950a | 457 | unsigned long empty_sendctx_q; |
67af6f65 CL |
458 | |
459 | /* accessed when receiving a reply */ | |
460 | unsigned long long total_rdma_reply; | |
461 | unsigned long long fixup_copy_count; | |
01bb35c8 | 462 | unsigned long reply_waits_for_send; |
c8b920bb | 463 | unsigned long local_inv_needed; |
67af6f65 CL |
464 | unsigned long nomsg_call_count; |
465 | unsigned long bcall_count; | |
f58851e6 TT |
466 | }; |
467 | ||
a0ce85f5 CL |
468 | /* |
469 | * Per-registration mode operations | |
470 | */ | |
1c9351ee | 471 | struct rpcrdma_xprt; |
a0ce85f5 | 472 | struct rpcrdma_memreg_ops { |
6748b0ca CL |
473 | struct rpcrdma_mr_seg * |
474 | (*ro_map)(struct rpcrdma_xprt *, | |
9d6b0409 CL |
475 | struct rpcrdma_mr_seg *, int, bool, |
476 | struct rpcrdma_mw **); | |
c3441618 CL |
477 | void (*ro_reminv)(struct rpcrdma_rep *rep, |
478 | struct list_head *mws); | |
32d0ceec | 479 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, |
451d26e1 | 480 | struct list_head *); |
505bbe64 | 481 | void (*ro_recover_mr)(struct rpcrdma_mw *); |
3968cb58 CL |
482 | int (*ro_open)(struct rpcrdma_ia *, |
483 | struct rpcrdma_ep *, | |
484 | struct rpcrdma_create_data_internal *); | |
1c9351ee | 485 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); |
e2ac236c CL |
486 | int (*ro_init_mr)(struct rpcrdma_ia *, |
487 | struct rpcrdma_mw *); | |
488 | void (*ro_release_mr)(struct rpcrdma_mw *); | |
a0ce85f5 | 489 | const char *ro_displayname; |
c8b920bb | 490 | const int ro_send_w_inv_ok; |
a0ce85f5 CL |
491 | }; |
492 | ||
493 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | |
494 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | |
a0ce85f5 | 495 | |
f58851e6 TT |
496 | /* |
497 | * RPCRDMA transport -- encapsulates the structures above for | |
498 | * integration with RPC. | |
499 | * | |
500 | * The contained structures are embedded, not pointers, | |
501 | * for convenience. This structure need not be visible externally. | |
502 | * | |
503 | * It is allocated and initialized during mount, and released | |
504 | * during unmount. | |
505 | */ | |
506 | struct rpcrdma_xprt { | |
5abefb86 | 507 | struct rpc_xprt rx_xprt; |
f58851e6 TT |
508 | struct rpcrdma_ia rx_ia; |
509 | struct rpcrdma_ep rx_ep; | |
510 | struct rpcrdma_buffer rx_buf; | |
511 | struct rpcrdma_create_data_internal rx_data; | |
5abefb86 | 512 | struct delayed_work rx_connect_worker; |
f58851e6 TT |
513 | struct rpcrdma_stats rx_stats; |
514 | }; | |
515 | ||
5abefb86 | 516 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) |
f58851e6 TT |
517 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
518 | ||
d461f1f2 CL |
519 | static inline const char * |
520 | rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt) | |
521 | { | |
522 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]; | |
523 | } | |
524 | ||
525 | static inline const char * | |
526 | rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt) | |
527 | { | |
528 | return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT]; | |
529 | } | |
530 | ||
9191ca3b TT |
531 | /* Setting this to 0 ensures interoperability with early servers. |
532 | * Setting this to 1 enhances certain unaligned read/write performance. | |
533 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | |
534 | extern int xprt_rdma_pad_optimize; | |
535 | ||
fff09594 CL |
536 | /* This setting controls the hunt for a supported memory |
537 | * registration strategy. | |
538 | */ | |
539 | extern unsigned int xprt_rdma_memreg_strategy; | |
540 | ||
f58851e6 TT |
541 | /* |
542 | * Interface Adapter calls - xprtrdma/verbs.c | |
543 | */ | |
dd229cee | 544 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); |
bebd0318 | 545 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); |
f58851e6 | 546 | void rpcrdma_ia_close(struct rpcrdma_ia *); |
b54054ca CL |
547 | bool frwr_is_supported(struct rpcrdma_ia *); |
548 | bool fmr_is_supported(struct rpcrdma_ia *); | |
f58851e6 | 549 | |
d8f532d2 CL |
550 | extern struct workqueue_struct *rpcrdma_receive_wq; |
551 | ||
f58851e6 TT |
552 | /* |
553 | * Endpoint calls - xprtrdma/verbs.c | |
554 | */ | |
555 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, | |
556 | struct rpcrdma_create_data_internal *); | |
7f1d5419 | 557 | void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 | 558 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
3a72dc77 | 559 | void rpcrdma_conn_func(struct rpcrdma_ep *ep); |
282191cb | 560 | void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 TT |
561 | |
562 | int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, | |
563 | struct rpcrdma_req *); | |
b157380a | 564 | int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); |
f58851e6 TT |
565 | |
566 | /* | |
567 | * Buffer calls - xprtrdma/verbs.c | |
568 | */ | |
f531a5db | 569 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); |
13650c23 | 570 | void rpcrdma_destroy_req(struct rpcrdma_req *); |
d698c4a0 | 571 | int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt); |
ac920d04 | 572 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); |
f58851e6 | 573 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); |
ae72950a CL |
574 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); |
575 | void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); | |
f58851e6 | 576 | |
346aa66b CL |
577 | struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); |
578 | void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); | |
f58851e6 TT |
579 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); |
580 | void rpcrdma_buffer_put(struct rpcrdma_req *); | |
581 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | |
582 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | |
583 | ||
505bbe64 CL |
584 | void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *); |
585 | ||
13650c23 | 586 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, |
99ef4db3 | 587 | gfp_t); |
54cbd6b0 | 588 | bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); |
13650c23 | 589 | void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); |
9128c3e7 | 590 | |
54cbd6b0 CL |
591 | static inline bool |
592 | rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) | |
593 | { | |
594 | return rb->rg_device != NULL; | |
595 | } | |
596 | ||
597 | static inline bool | |
598 | rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |
599 | { | |
600 | if (likely(rpcrdma_regbuf_is_mapped(rb))) | |
601 | return true; | |
602 | return __rpcrdma_dma_map_regbuf(ia, rb); | |
603 | } | |
604 | ||
f531a5db | 605 | int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); |
d654788e | 606 | |
fe97b47c CL |
607 | int rpcrdma_alloc_wq(void); |
608 | void rpcrdma_destroy_wq(void); | |
609 | ||
d654788e CL |
610 | /* |
611 | * Wrappers for chunk registration, shared by read/write chunk code. | |
612 | */ | |
613 | ||
d654788e CL |
614 | static inline enum dma_data_direction |
615 | rpcrdma_data_dir(bool writing) | |
616 | { | |
617 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | |
618 | } | |
619 | ||
f58851e6 TT |
620 | /* |
621 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | |
622 | */ | |
655fec69 CL |
623 | |
624 | enum rpcrdma_chunktype { | |
625 | rpcrdma_noch = 0, | |
626 | rpcrdma_readch, | |
627 | rpcrdma_areadch, | |
628 | rpcrdma_writech, | |
629 | rpcrdma_replych | |
630 | }; | |
631 | ||
857f9aca CL |
632 | int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, |
633 | struct rpcrdma_req *req, u32 hdrlen, | |
634 | struct xdr_buf *xdr, | |
635 | enum rpcrdma_chunktype rtype); | |
ae72950a | 636 | void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc); |
09e60641 | 637 | int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); |
87cfb9a0 | 638 | void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); |
e1352c96 | 639 | void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); |
d8f532d2 | 640 | void rpcrdma_reply_handler(struct rpcrdma_rep *rep); |
0ba6f370 CL |
641 | void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, |
642 | struct rpcrdma_req *req); | |
d8f532d2 | 643 | void rpcrdma_deferred_completion(struct work_struct *work); |
f58851e6 | 644 | |
96f8778f CL |
645 | static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) |
646 | { | |
647 | xdr->head[0].iov_len = len; | |
648 | xdr->len = len; | |
649 | } | |
650 | ||
ffe1f0df CL |
651 | /* RPC/RDMA module init - xprtrdma/transport.c |
652 | */ | |
5d252f90 CL |
653 | extern unsigned int xprt_rdma_max_inline_read; |
654 | void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); | |
655 | void xprt_rdma_free_addresses(struct rpc_xprt *xprt); | |
3a72dc77 | 656 | void rpcrdma_connect_worker(struct work_struct *work); |
5d252f90 | 657 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); |
ffe1f0df CL |
658 | int xprt_rdma_init(void); |
659 | void xprt_rdma_cleanup(void); | |
660 | ||
f531a5db CL |
661 | /* Backchannel calls - xprtrdma/backchannel.c |
662 | */ | |
663 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
664 | int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); | |
76566773 | 665 | int xprt_rdma_bc_up(struct svc_serv *, struct net *); |
6b26cc8c | 666 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); |
f531a5db | 667 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); |
63cae470 | 668 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); |
cf73daf5 | 669 | int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); |
f531a5db CL |
670 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *); |
671 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | |
672 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | |
673 | ||
5d252f90 | 674 | extern struct xprt_class xprt_rdma_bc; |
cec56c8f | 675 | |
f58851e6 | 676 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ |