Commit | Line | Data |
---|---|---|
f58851e6 | 1 | /* |
62b56a67 | 2 | * Copyright (c) 2014-2017 Oracle. All rights reserved. |
f58851e6 TT |
3 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the BSD-type | |
9 | * license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * | |
15 | * Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * | |
18 | * Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials provided | |
21 | * with the distribution. | |
22 | * | |
23 | * Neither the name of the Network Appliance, Inc. nor the names of | |
24 | * its contributors may be used to endorse or promote products | |
25 | * derived from this software without specific prior written | |
26 | * permission. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
29 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
30 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
31 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
32 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
33 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
34 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
35 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
36 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
37 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
38 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
39 | */ | |
40 | ||
41 | #ifndef _LINUX_SUNRPC_XPRT_RDMA_H | |
42 | #define _LINUX_SUNRPC_XPRT_RDMA_H | |
43 | ||
44 | #include <linux/wait.h> /* wait_queue_head_t, etc */ | |
45 | #include <linux/spinlock.h> /* spinlock_t, etc */ | |
60063497 | 46 | #include <linux/atomic.h> /* atomic_t, etc */ |
254f91e2 | 47 | #include <linux/workqueue.h> /* struct work_struct */ |
f58851e6 TT |
48 | |
49 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | |
50 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | |
51 | ||
52 | #include <linux/sunrpc/clnt.h> /* rpc_xprt */ | |
53 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | |
54 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | |
55 | ||
5675add3 TT |
56 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ |
57 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | |
58 | ||
5d252f90 CL |
59 | #define RPCRDMA_BIND_TO (60U * HZ) |
60 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | |
61 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | |
62 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | |
63 | ||
f58851e6 TT |
64 | /* |
65 | * Interface Adapter -- one per transport instance | |
66 | */ | |
67 | struct rpcrdma_ia { | |
a0ce85f5 | 68 | const struct rpcrdma_memreg_ops *ri_ops; |
89e0d112 | 69 | struct ib_device *ri_device; |
f58851e6 TT |
70 | struct rdma_cm_id *ri_id; |
71 | struct ib_pd *ri_pd; | |
f58851e6 | 72 | struct completion ri_done; |
bebd0318 | 73 | struct completion ri_remove_done; |
f58851e6 | 74 | int ri_async_rc; |
87cfb9a0 | 75 | unsigned int ri_max_segs; |
0fc6c4e7 | 76 | unsigned int ri_max_frmr_depth; |
302d3deb CL |
77 | unsigned int ri_max_inline_write; |
78 | unsigned int ri_max_inline_read; | |
16f906d6 | 79 | unsigned int ri_max_send_sges; |
c8b920bb | 80 | bool ri_reminv_expected; |
b5f0afbe | 81 | bool ri_implicit_roundup; |
5e9fc6a0 | 82 | enum ib_mr_type ri_mrtype; |
bebd0318 | 83 | unsigned long ri_flags; |
ce1ab9ab CL |
84 | struct ib_qp_attr ri_qp_attr; |
85 | struct ib_qp_init_attr ri_qp_init_attr; | |
f58851e6 TT |
86 | }; |
87 | ||
bebd0318 CL |
88 | enum { |
89 | RPCRDMA_IAF_REMOVING = 0, | |
90 | }; | |
91 | ||
f58851e6 TT |
92 | /* |
93 | * RDMA Endpoint -- one per transport instance | |
94 | */ | |
95 | ||
96 | struct rpcrdma_ep { | |
ae72950a CL |
97 | unsigned int rep_send_count; |
98 | unsigned int rep_send_batch; | |
f58851e6 | 99 | int rep_connected; |
f58851e6 TT |
100 | struct ib_qp_init_attr rep_attr; |
101 | wait_queue_head_t rep_connect_wait; | |
87cfb9a0 | 102 | struct rpcrdma_connect_private rep_cm_private; |
f58851e6 TT |
103 | struct rdma_conn_param rep_remote_cma; |
104 | struct sockaddr_storage rep_remote_addr; | |
254f91e2 | 105 | struct delayed_work rep_connect_worker; |
f58851e6 TT |
106 | }; |
107 | ||
124fa17d CL |
108 | /* Pre-allocate extra Work Requests for handling backward receives |
109 | * and sends. This is a fixed value because the Work Queues are | |
110 | * allocated when the forward channel is set up. | |
111 | */ | |
112 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
113 | #define RPCRDMA_BACKWARD_WRS (8) | |
114 | #else | |
115 | #define RPCRDMA_BACKWARD_WRS (0) | |
116 | #endif | |
117 | ||
9128c3e7 CL |
118 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
119 | * | |
120 | * The below structure appears at the front of a large region of kmalloc'd | |
121 | * memory, which always starts on a good alignment boundary. | |
122 | */ | |
123 | ||
124 | struct rpcrdma_regbuf { | |
9128c3e7 | 125 | struct ib_sge rg_iov; |
54cbd6b0 | 126 | struct ib_device *rg_device; |
99ef4db3 | 127 | enum dma_data_direction rg_direction; |
9128c3e7 CL |
128 | __be32 rg_base[0] __attribute__ ((aligned(256))); |
129 | }; | |
130 | ||
131 | static inline u64 | |
132 | rdmab_addr(struct rpcrdma_regbuf *rb) | |
133 | { | |
134 | return rb->rg_iov.addr; | |
135 | } | |
136 | ||
137 | static inline u32 | |
138 | rdmab_length(struct rpcrdma_regbuf *rb) | |
139 | { | |
140 | return rb->rg_iov.length; | |
141 | } | |
142 | ||
143 | static inline u32 | |
144 | rdmab_lkey(struct rpcrdma_regbuf *rb) | |
145 | { | |
146 | return rb->rg_iov.lkey; | |
147 | } | |
148 | ||
91a10c52 CL |
149 | static inline struct ib_device * |
150 | rdmab_device(struct rpcrdma_regbuf *rb) | |
151 | { | |
152 | return rb->rg_device; | |
153 | } | |
154 | ||
5d252f90 CL |
155 | #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) |
156 | ||
94931746 CL |
157 | /* To ensure a transport can always make forward progress, |
158 | * the number of RDMA segments allowed in header chunk lists | |
159 | * is capped at 8. This prevents less-capable devices and | |
160 | * memory registrations from overrunning the Send buffer | |
161 | * while building chunk lists. | |
162 | * | |
163 | * Elements of the Read list take up more room than the | |
164 | * Write list or Reply chunk. 8 read segments means the Read | |
165 | * list (or Write list or Reply chunk) cannot consume more | |
166 | * than | |
167 | * | |
168 | * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes. | |
169 | * | |
170 | * And the fixed part of the header is another 24 bytes. | |
171 | * | |
172 | * The smallest inline threshold is 1024 bytes, ensuring that | |
173 | * at least 750 bytes are available for RPC messages. | |
174 | */ | |
08cf2efd CL |
175 | enum { |
176 | RPCRDMA_MAX_HDR_SEGS = 8, | |
177 | RPCRDMA_HDRBUF_SIZE = 256, | |
178 | }; | |
94931746 | 179 | |
f58851e6 | 180 | /* |
e1352c96 CL |
181 | * struct rpcrdma_rep -- this structure encapsulates state required |
182 | * to receive and complete an RPC Reply, asychronously. It needs | |
183 | * several pieces of state: | |
f58851e6 | 184 | * |
e1352c96 CL |
185 | * o receive buffer and ib_sge (donated to provider) |
186 | * o status of receive (success or not, length, inv rkey) | |
187 | * o bookkeeping state to get run by reply handler (XDR stream) | |
f58851e6 | 188 | * |
e1352c96 CL |
189 | * These structures are allocated during transport initialization. |
190 | * N of these are associated with a transport instance, managed by | |
191 | * struct rpcrdma_buffer. N is the max number of outstanding RPCs. | |
f58851e6 TT |
192 | */ |
193 | ||
f58851e6 | 194 | struct rpcrdma_rep { |
552bf225 | 195 | struct ib_cqe rr_cqe; |
5381e0ec CL |
196 | __be32 rr_xid; |
197 | __be32 rr_vers; | |
198 | __be32 rr_proc; | |
c8b920bb CL |
199 | int rr_wc_flags; |
200 | u32 rr_inv_rkey; | |
c1bcb68e | 201 | struct rpcrdma_regbuf *rr_rdmabuf; |
fed171b3 | 202 | struct rpcrdma_xprt *rr_rxprt; |
fe97b47c | 203 | struct work_struct rr_work; |
96f8778f CL |
204 | struct xdr_buf rr_hdrbuf; |
205 | struct xdr_stream rr_stream; | |
e1352c96 | 206 | struct rpc_rqst *rr_rqst; |
6b1184cd | 207 | struct list_head rr_list; |
6ea8e711 | 208 | struct ib_recv_wr rr_recv_wr; |
f58851e6 TT |
209 | }; |
210 | ||
ae72950a CL |
211 | /* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes |
212 | */ | |
01bb35c8 | 213 | struct rpcrdma_req; |
ae72950a CL |
214 | struct rpcrdma_xprt; |
215 | struct rpcrdma_sendctx { | |
216 | struct ib_send_wr sc_wr; | |
217 | struct ib_cqe sc_cqe; | |
218 | struct rpcrdma_xprt *sc_xprt; | |
01bb35c8 | 219 | struct rpcrdma_req *sc_req; |
ae72950a CL |
220 | unsigned int sc_unmap_count; |
221 | struct ib_sge sc_sges[]; | |
222 | }; | |
223 | ||
224 | /* Limit the number of SGEs that can be unmapped during one | |
225 | * Send completion. This caps the amount of work a single | |
226 | * completion can do before returning to the provider. | |
227 | * | |
228 | * Setting this to zero disables Send completion batching. | |
229 | */ | |
230 | enum { | |
231 | RPCRDMA_MAX_SEND_BATCH = 7, | |
232 | }; | |
233 | ||
0dbb4108 CL |
234 | /* |
235 | * struct rpcrdma_mw - external memory region metadata | |
236 | * | |
237 | * An external memory region is any buffer or page that is registered | |
238 | * on the fly (ie, not pre-registered). | |
239 | * | |
3111d72c | 240 | * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During |
0dbb4108 CL |
241 | * call_allocate, rpcrdma_buffer_get() assigns one to each segment in |
242 | * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep | |
243 | * track of registration metadata while each RPC is pending. | |
244 | * rpcrdma_deregister_external() uses this metadata to unmap and | |
245 | * release these resources when an RPC is complete. | |
246 | */ | |
247 | enum rpcrdma_frmr_state { | |
248 | FRMR_IS_INVALID, /* ready to be used */ | |
249 | FRMR_IS_VALID, /* in use */ | |
62bdf94a CL |
250 | FRMR_FLUSHED_FR, /* flushed FASTREG WR */ |
251 | FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ | |
0dbb4108 CL |
252 | }; |
253 | ||
254 | struct rpcrdma_frmr { | |
0dbb4108 | 255 | struct ib_mr *fr_mr; |
2fa8f88d | 256 | struct ib_cqe fr_cqe; |
0dbb4108 | 257 | enum rpcrdma_frmr_state fr_state; |
2fa8f88d | 258 | struct completion fr_linv_done; |
3cf4e169 CL |
259 | union { |
260 | struct ib_reg_wr fr_regwr; | |
261 | struct ib_send_wr fr_invwr; | |
262 | }; | |
0dbb4108 CL |
263 | }; |
264 | ||
acb9da7a | 265 | struct rpcrdma_fmr { |
88975ebe CL |
266 | struct ib_fmr *fm_mr; |
267 | u64 *fm_physaddrs; | |
0dbb4108 CL |
268 | }; |
269 | ||
270 | struct rpcrdma_mw { | |
564471d2 CL |
271 | struct list_head mw_list; |
272 | struct scatterlist *mw_sg; | |
273 | int mw_nents; | |
274 | enum dma_data_direction mw_dir; | |
4b196dc6 | 275 | unsigned long mw_flags; |
0dbb4108 | 276 | union { |
acb9da7a | 277 | struct rpcrdma_fmr fmr; |
0dbb4108 | 278 | struct rpcrdma_frmr frmr; |
c882a655 | 279 | }; |
766656b0 | 280 | struct rpcrdma_xprt *mw_xprt; |
9d6b0409 CL |
281 | u32 mw_handle; |
282 | u32 mw_length; | |
283 | u64 mw_offset; | |
3111d72c | 284 | struct list_head mw_all; |
0dbb4108 CL |
285 | }; |
286 | ||
4b196dc6 CL |
287 | /* mw_flags */ |
288 | enum { | |
289 | RPCRDMA_MW_F_RI = 1, | |
290 | }; | |
291 | ||
f58851e6 TT |
292 | /* |
293 | * struct rpcrdma_req -- structure central to the request/reply sequence. | |
294 | * | |
295 | * N of these are associated with a transport instance, and stored in | |
296 | * struct rpcrdma_buffer. N is the max number of outstanding requests. | |
297 | * | |
298 | * It includes pre-registered buffer memory for send AND recv. | |
299 | * The recv buffer, however, is not owned by this structure, and | |
300 | * is "donated" to the hardware when a recv is posted. When a | |
301 | * reply is handled, the recv buffer used is given back to the | |
302 | * struct rpcrdma_req associated with the request. | |
303 | * | |
304 | * In addition to the basic memory, this structure includes an array | |
305 | * of iovs for send operations. The reason is that the iovs passed to | |
306 | * ib_post_{send,recv} must not be modified until the work request | |
307 | * completes. | |
f58851e6 TT |
308 | */ |
309 | ||
5ab81428 CL |
310 | /* Maximum number of page-sized "segments" per chunk list to be |
311 | * registered or invalidated. Must handle a Reply chunk: | |
312 | */ | |
313 | enum { | |
314 | RPCRDMA_MAX_IOV_SEGS = 3, | |
315 | RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1, | |
316 | RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS + | |
317 | RPCRDMA_MAX_IOV_SEGS, | |
318 | }; | |
319 | ||
f58851e6 | 320 | struct rpcrdma_mr_seg { /* chunk descriptors */ |
f58851e6 | 321 | u32 mr_len; /* length of chunk or segment */ |
f58851e6 TT |
322 | struct page *mr_page; /* owning page, if any */ |
323 | char *mr_offset; /* kva if no page, else offset */ | |
324 | }; | |
325 | ||
c6f5b47f CL |
326 | /* The Send SGE array is provisioned to send a maximum size |
327 | * inline request: | |
655fec69 CL |
328 | * - RPC-over-RDMA header |
329 | * - xdr_buf head iovec | |
c6f5b47f | 330 | * - RPCRDMA_MAX_INLINE bytes, in pages |
655fec69 | 331 | * - xdr_buf tail iovec |
c6f5b47f CL |
332 | * |
333 | * The actual number of array elements consumed by each RPC | |
334 | * depends on the device's max_sge limit. | |
655fec69 CL |
335 | */ |
336 | enum { | |
16f906d6 | 337 | RPCRDMA_MIN_SEND_SGES = 3, |
c6f5b47f | 338 | RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT, |
655fec69 CL |
339 | RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, |
340 | }; | |
b3221d6a | 341 | |
5ab81428 | 342 | struct rpcrdma_buffer; |
f58851e6 | 343 | struct rpcrdma_req { |
a80d66c9 | 344 | struct list_head rl_list; |
ccede759 | 345 | int rl_cpu; |
b3221d6a CL |
346 | unsigned int rl_connect_cookie; |
347 | struct rpcrdma_buffer *rl_buffer; | |
90aab602 | 348 | struct rpcrdma_rep *rl_reply; |
7a80f3f0 CL |
349 | struct xdr_stream rl_stream; |
350 | struct xdr_buf rl_hdrbuf; | |
ae72950a | 351 | struct rpcrdma_sendctx *rl_sendctx; |
9c40c49f CL |
352 | struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ |
353 | struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ | |
354 | struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ | |
f531a5db CL |
355 | |
356 | struct list_head rl_all; | |
531cca0c | 357 | unsigned long rl_flags; |
5ab81428 CL |
358 | |
359 | struct list_head rl_registered; /* registered segments */ | |
360 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; | |
f58851e6 | 361 | }; |
0ca77dc3 | 362 | |
531cca0c CL |
363 | /* rl_flags */ |
364 | enum { | |
365 | RPCRDMA_REQ_F_BACKCHANNEL = 0, | |
0ba6f370 | 366 | RPCRDMA_REQ_F_PENDING, |
01bb35c8 | 367 | RPCRDMA_REQ_F_TX_RESOURCES, |
531cca0c CL |
368 | }; |
369 | ||
5a6d1db4 CL |
370 | static inline void |
371 | rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) | |
372 | { | |
373 | rqst->rq_xprtdata = req; | |
374 | } | |
375 | ||
0ca77dc3 CL |
376 | static inline struct rpcrdma_req * |
377 | rpcr_to_rdmar(struct rpc_rqst *rqst) | |
378 | { | |
5a6d1db4 | 379 | return rqst->rq_xprtdata; |
0ca77dc3 | 380 | } |
f58851e6 | 381 | |
9a5c63e9 CL |
382 | static inline void |
383 | rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) | |
384 | { | |
385 | list_add_tail(&mw->mw_list, list); | |
386 | } | |
387 | ||
388 | static inline struct rpcrdma_mw * | |
389 | rpcrdma_pop_mw(struct list_head *list) | |
390 | { | |
391 | struct rpcrdma_mw *mw; | |
392 | ||
393 | mw = list_first_entry(list, struct rpcrdma_mw, mw_list); | |
394 | list_del(&mw->mw_list); | |
395 | return mw; | |
396 | } | |
397 | ||
f58851e6 TT |
398 | /* |
399 | * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for | |
400 | * inline requests/replies, and client/server credits. | |
401 | * | |
402 | * One of these is associated with a transport instance | |
403 | */ | |
404 | struct rpcrdma_buffer { | |
58d1dcf5 CL |
405 | spinlock_t rb_mwlock; /* protect rb_mws list */ |
406 | struct list_head rb_mws; | |
407 | struct list_head rb_all; | |
58d1dcf5 | 408 | |
ae72950a CL |
409 | unsigned long rb_sc_head; |
410 | unsigned long rb_sc_tail; | |
411 | unsigned long rb_sc_last; | |
412 | struct rpcrdma_sendctx **rb_sc_ctxs; | |
413 | ||
1e465fd4 | 414 | spinlock_t rb_lock; /* protect buf lists */ |
05c97466 | 415 | int rb_send_count, rb_recv_count; |
1e465fd4 CL |
416 | struct list_head rb_send_bufs; |
417 | struct list_head rb_recv_bufs; | |
58d1dcf5 | 418 | u32 rb_max_requests; |
be798f90 | 419 | u32 rb_credits; /* most recent credit grant */ |
f531a5db CL |
420 | |
421 | u32 rb_bc_srv_max_requests; | |
422 | spinlock_t rb_reqslock; /* protect rb_allreqs */ | |
423 | struct list_head rb_allreqs; | |
5d252f90 CL |
424 | |
425 | u32 rb_bc_max_requests; | |
505bbe64 CL |
426 | |
427 | spinlock_t rb_recovery_lock; /* protect rb_stale_mrs */ | |
428 | struct list_head rb_stale_mrs; | |
429 | struct delayed_work rb_recovery_worker; | |
e2ac236c | 430 | struct delayed_work rb_refresh_worker; |
f58851e6 TT |
431 | }; |
432 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) | |
433 | ||
434 | /* | |
435 | * Internal structure for transport instance creation. This | |
436 | * exists primarily for modularity. | |
437 | * | |
438 | * This data should be set with mount options | |
439 | */ | |
440 | struct rpcrdma_create_data_internal { | |
441 | struct sockaddr_storage addr; /* RDMA server address */ | |
442 | unsigned int max_requests; /* max requests (slots) in flight */ | |
443 | unsigned int rsize; /* mount rsize - max read hdr+data */ | |
444 | unsigned int wsize; /* mount wsize - max write hdr+data */ | |
445 | unsigned int inline_rsize; /* max non-rdma read data payload */ | |
446 | unsigned int inline_wsize; /* max non-rdma write data payload */ | |
447 | unsigned int padding; /* non-rdma write header padding */ | |
448 | }; | |
449 | ||
f58851e6 TT |
450 | /* |
451 | * Statistics for RPCRDMA | |
452 | */ | |
453 | struct rpcrdma_stats { | |
67af6f65 | 454 | /* accessed when sending a call */ |
f58851e6 TT |
455 | unsigned long read_chunk_count; |
456 | unsigned long write_chunk_count; | |
457 | unsigned long reply_chunk_count; | |
f58851e6 | 458 | unsigned long long total_rdma_request; |
f58851e6 | 459 | |
67af6f65 | 460 | /* rarely accessed error counters */ |
f58851e6 | 461 | unsigned long long pullup_copy_count; |
f58851e6 TT |
462 | unsigned long hardway_register_count; |
463 | unsigned long failed_marshal_count; | |
464 | unsigned long bad_reply_count; | |
505bbe64 CL |
465 | unsigned long mrs_recovered; |
466 | unsigned long mrs_orphaned; | |
e2ac236c | 467 | unsigned long mrs_allocated; |
ae72950a | 468 | unsigned long empty_sendctx_q; |
67af6f65 CL |
469 | |
470 | /* accessed when receiving a reply */ | |
471 | unsigned long long total_rdma_reply; | |
472 | unsigned long long fixup_copy_count; | |
01bb35c8 | 473 | unsigned long reply_waits_for_send; |
c8b920bb | 474 | unsigned long local_inv_needed; |
67af6f65 CL |
475 | unsigned long nomsg_call_count; |
476 | unsigned long bcall_count; | |
f58851e6 TT |
477 | }; |
478 | ||
a0ce85f5 CL |
479 | /* |
480 | * Per-registration mode operations | |
481 | */ | |
1c9351ee | 482 | struct rpcrdma_xprt; |
a0ce85f5 | 483 | struct rpcrdma_memreg_ops { |
6748b0ca CL |
484 | struct rpcrdma_mr_seg * |
485 | (*ro_map)(struct rpcrdma_xprt *, | |
9d6b0409 CL |
486 | struct rpcrdma_mr_seg *, int, bool, |
487 | struct rpcrdma_mw **); | |
32d0ceec | 488 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, |
451d26e1 | 489 | struct list_head *); |
505bbe64 | 490 | void (*ro_recover_mr)(struct rpcrdma_mw *); |
3968cb58 CL |
491 | int (*ro_open)(struct rpcrdma_ia *, |
492 | struct rpcrdma_ep *, | |
493 | struct rpcrdma_create_data_internal *); | |
1c9351ee | 494 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); |
e2ac236c CL |
495 | int (*ro_init_mr)(struct rpcrdma_ia *, |
496 | struct rpcrdma_mw *); | |
497 | void (*ro_release_mr)(struct rpcrdma_mw *); | |
a0ce85f5 | 498 | const char *ro_displayname; |
c8b920bb | 499 | const int ro_send_w_inv_ok; |
a0ce85f5 CL |
500 | }; |
501 | ||
502 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | |
503 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | |
a0ce85f5 | 504 | |
f58851e6 TT |
505 | /* |
506 | * RPCRDMA transport -- encapsulates the structures above for | |
507 | * integration with RPC. | |
508 | * | |
509 | * The contained structures are embedded, not pointers, | |
510 | * for convenience. This structure need not be visible externally. | |
511 | * | |
512 | * It is allocated and initialized during mount, and released | |
513 | * during unmount. | |
514 | */ | |
515 | struct rpcrdma_xprt { | |
5abefb86 | 516 | struct rpc_xprt rx_xprt; |
f58851e6 TT |
517 | struct rpcrdma_ia rx_ia; |
518 | struct rpcrdma_ep rx_ep; | |
519 | struct rpcrdma_buffer rx_buf; | |
520 | struct rpcrdma_create_data_internal rx_data; | |
5abefb86 | 521 | struct delayed_work rx_connect_worker; |
f58851e6 TT |
522 | struct rpcrdma_stats rx_stats; |
523 | }; | |
524 | ||
5abefb86 | 525 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) |
f58851e6 TT |
526 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
527 | ||
9191ca3b TT |
528 | /* Setting this to 0 ensures interoperability with early servers. |
529 | * Setting this to 1 enhances certain unaligned read/write performance. | |
530 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | |
531 | extern int xprt_rdma_pad_optimize; | |
532 | ||
fff09594 CL |
533 | /* This setting controls the hunt for a supported memory |
534 | * registration strategy. | |
535 | */ | |
536 | extern unsigned int xprt_rdma_memreg_strategy; | |
537 | ||
f58851e6 TT |
538 | /* |
539 | * Interface Adapter calls - xprtrdma/verbs.c | |
540 | */ | |
fff09594 | 541 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); |
bebd0318 | 542 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); |
f58851e6 | 543 | void rpcrdma_ia_close(struct rpcrdma_ia *); |
b54054ca CL |
544 | bool frwr_is_supported(struct rpcrdma_ia *); |
545 | bool fmr_is_supported(struct rpcrdma_ia *); | |
f58851e6 | 546 | |
d8f532d2 CL |
547 | extern struct workqueue_struct *rpcrdma_receive_wq; |
548 | ||
f58851e6 TT |
549 | /* |
550 | * Endpoint calls - xprtrdma/verbs.c | |
551 | */ | |
552 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, | |
553 | struct rpcrdma_create_data_internal *); | |
7f1d5419 | 554 | void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 | 555 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
3a72dc77 | 556 | void rpcrdma_conn_func(struct rpcrdma_ep *ep); |
282191cb | 557 | void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 TT |
558 | |
559 | int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, | |
560 | struct rpcrdma_req *); | |
b157380a | 561 | int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); |
f58851e6 TT |
562 | |
563 | /* | |
564 | * Buffer calls - xprtrdma/verbs.c | |
565 | */ | |
f531a5db CL |
566 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); |
567 | struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); | |
13650c23 | 568 | void rpcrdma_destroy_req(struct rpcrdma_req *); |
ac920d04 | 569 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); |
f58851e6 | 570 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); |
ae72950a CL |
571 | struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); |
572 | void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); | |
f58851e6 | 573 | |
346aa66b CL |
574 | struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); |
575 | void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); | |
f58851e6 TT |
576 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); |
577 | void rpcrdma_buffer_put(struct rpcrdma_req *); | |
578 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | |
579 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | |
580 | ||
505bbe64 CL |
581 | void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *); |
582 | ||
13650c23 | 583 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, |
99ef4db3 | 584 | gfp_t); |
54cbd6b0 | 585 | bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); |
13650c23 | 586 | void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); |
9128c3e7 | 587 | |
54cbd6b0 CL |
588 | static inline bool |
589 | rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) | |
590 | { | |
591 | return rb->rg_device != NULL; | |
592 | } | |
593 | ||
594 | static inline bool | |
595 | rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |
596 | { | |
597 | if (likely(rpcrdma_regbuf_is_mapped(rb))) | |
598 | return true; | |
599 | return __rpcrdma_dma_map_regbuf(ia, rb); | |
600 | } | |
601 | ||
f531a5db | 602 | int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); |
d654788e | 603 | |
fe97b47c CL |
604 | int rpcrdma_alloc_wq(void); |
605 | void rpcrdma_destroy_wq(void); | |
606 | ||
d654788e CL |
607 | /* |
608 | * Wrappers for chunk registration, shared by read/write chunk code. | |
609 | */ | |
610 | ||
d654788e CL |
611 | static inline enum dma_data_direction |
612 | rpcrdma_data_dir(bool writing) | |
613 | { | |
614 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | |
615 | } | |
616 | ||
f58851e6 TT |
617 | /* |
618 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | |
619 | */ | |
655fec69 CL |
620 | |
621 | enum rpcrdma_chunktype { | |
622 | rpcrdma_noch = 0, | |
623 | rpcrdma_readch, | |
624 | rpcrdma_areadch, | |
625 | rpcrdma_writech, | |
626 | rpcrdma_replych | |
627 | }; | |
628 | ||
857f9aca CL |
629 | int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, |
630 | struct rpcrdma_req *req, u32 hdrlen, | |
631 | struct xdr_buf *xdr, | |
632 | enum rpcrdma_chunktype rtype); | |
ae72950a | 633 | void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc); |
09e60641 | 634 | int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); |
87cfb9a0 | 635 | void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); |
e1352c96 | 636 | void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); |
d8f532d2 | 637 | void rpcrdma_reply_handler(struct rpcrdma_rep *rep); |
0ba6f370 CL |
638 | void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, |
639 | struct rpcrdma_req *req); | |
d8f532d2 | 640 | void rpcrdma_deferred_completion(struct work_struct *work); |
f58851e6 | 641 | |
96f8778f CL |
642 | static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) |
643 | { | |
644 | xdr->head[0].iov_len = len; | |
645 | xdr->len = len; | |
646 | } | |
647 | ||
ffe1f0df CL |
648 | /* RPC/RDMA module init - xprtrdma/transport.c |
649 | */ | |
5d252f90 CL |
650 | extern unsigned int xprt_rdma_max_inline_read; |
651 | void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); | |
652 | void xprt_rdma_free_addresses(struct rpc_xprt *xprt); | |
3a72dc77 | 653 | void rpcrdma_connect_worker(struct work_struct *work); |
5d252f90 | 654 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); |
ffe1f0df CL |
655 | int xprt_rdma_init(void); |
656 | void xprt_rdma_cleanup(void); | |
657 | ||
f531a5db CL |
658 | /* Backchannel calls - xprtrdma/backchannel.c |
659 | */ | |
660 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
661 | int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); | |
76566773 | 662 | int xprt_rdma_bc_up(struct svc_serv *, struct net *); |
6b26cc8c | 663 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); |
f531a5db | 664 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); |
63cae470 | 665 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); |
83128a60 | 666 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *); |
f531a5db CL |
667 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *); |
668 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | |
669 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | |
670 | ||
5d252f90 | 671 | extern struct xprt_class xprt_rdma_bc; |
cec56c8f | 672 | |
f58851e6 | 673 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ |