Commit | Line | Data |
---|---|---|
f58851e6 TT |
1 | /* |
2 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | |
3 | * | |
4 | * This software is available to you under a choice of one of two | |
5 | * licenses. You may choose to be licensed under the terms of the GNU | |
6 | * General Public License (GPL) Version 2, available from the file | |
7 | * COPYING in the main directory of this source tree, or the BSD-type | |
8 | * license below: | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | |
13 | * | |
14 | * Redistributions of source code must retain the above copyright | |
15 | * notice, this list of conditions and the following disclaimer. | |
16 | * | |
17 | * Redistributions in binary form must reproduce the above | |
18 | * copyright notice, this list of conditions and the following | |
19 | * disclaimer in the documentation and/or other materials provided | |
20 | * with the distribution. | |
21 | * | |
22 | * Neither the name of the Network Appliance, Inc. nor the names of | |
23 | * its contributors may be used to endorse or promote products | |
24 | * derived from this software without specific prior written | |
25 | * permission. | |
26 | * | |
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
38 | */ | |
39 | ||
40 | #ifndef _LINUX_SUNRPC_XPRT_RDMA_H | |
41 | #define _LINUX_SUNRPC_XPRT_RDMA_H | |
42 | ||
43 | #include <linux/wait.h> /* wait_queue_head_t, etc */ | |
44 | #include <linux/spinlock.h> /* spinlock_t, etc */ | |
60063497 | 45 | #include <linux/atomic.h> /* atomic_t, etc */ |
254f91e2 | 46 | #include <linux/workqueue.h> /* struct work_struct */ |
f58851e6 TT |
47 | |
48 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | |
49 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | |
50 | ||
51 | #include <linux/sunrpc/clnt.h> /* rpc_xprt */ | |
52 | #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ | |
53 | #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ | |
54 | ||
5675add3 TT |
55 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ |
56 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | |
57 | ||
5d252f90 CL |
58 | #define RPCRDMA_BIND_TO (60U * HZ) |
59 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | |
60 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | |
61 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | |
62 | ||
f58851e6 TT |
63 | /* |
64 | * Interface Adapter -- one per transport instance | |
65 | */ | |
66 | struct rpcrdma_ia { | |
a0ce85f5 | 67 | const struct rpcrdma_memreg_ops *ri_ops; |
89e0d112 | 68 | struct ib_device *ri_device; |
f58851e6 TT |
69 | struct rdma_cm_id *ri_id; |
70 | struct ib_pd *ri_pd; | |
f58851e6 | 71 | struct completion ri_done; |
bebd0318 | 72 | struct completion ri_remove_done; |
f58851e6 | 73 | int ri_async_rc; |
87cfb9a0 | 74 | unsigned int ri_max_segs; |
0fc6c4e7 | 75 | unsigned int ri_max_frmr_depth; |
302d3deb CL |
76 | unsigned int ri_max_inline_write; |
77 | unsigned int ri_max_inline_read; | |
16f906d6 | 78 | unsigned int ri_max_send_sges; |
c8b920bb | 79 | bool ri_reminv_expected; |
b5f0afbe | 80 | bool ri_implicit_roundup; |
5e9fc6a0 | 81 | enum ib_mr_type ri_mrtype; |
bebd0318 | 82 | unsigned long ri_flags; |
ce1ab9ab CL |
83 | struct ib_qp_attr ri_qp_attr; |
84 | struct ib_qp_init_attr ri_qp_init_attr; | |
f58851e6 TT |
85 | }; |
86 | ||
bebd0318 CL |
87 | enum { |
88 | RPCRDMA_IAF_REMOVING = 0, | |
89 | }; | |
90 | ||
f58851e6 TT |
91 | /* |
92 | * RDMA Endpoint -- one per transport instance | |
93 | */ | |
94 | ||
95 | struct rpcrdma_ep { | |
96 | atomic_t rep_cqcount; | |
97 | int rep_cqinit; | |
98 | int rep_connected; | |
f58851e6 TT |
99 | struct ib_qp_init_attr rep_attr; |
100 | wait_queue_head_t rep_connect_wait; | |
87cfb9a0 | 101 | struct rpcrdma_connect_private rep_cm_private; |
f58851e6 TT |
102 | struct rdma_conn_param rep_remote_cma; |
103 | struct sockaddr_storage rep_remote_addr; | |
254f91e2 | 104 | struct delayed_work rep_connect_worker; |
f58851e6 TT |
105 | }; |
106 | ||
8d38de65 CL |
107 | static inline void |
108 | rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count) | |
109 | { | |
110 | atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count); | |
111 | } | |
112 | ||
113 | /* To update send queue accounting, provider must take a | |
114 | * send completion every now and then. | |
115 | */ | |
116 | static inline void | |
117 | rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr) | |
118 | { | |
119 | send_wr->send_flags = 0; | |
120 | if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) { | |
121 | rpcrdma_init_cqcount(ep, 0); | |
122 | send_wr->send_flags = IB_SEND_SIGNALED; | |
123 | } | |
124 | } | |
f58851e6 | 125 | |
124fa17d CL |
126 | /* Pre-allocate extra Work Requests for handling backward receives |
127 | * and sends. This is a fixed value because the Work Queues are | |
128 | * allocated when the forward channel is set up. | |
129 | */ | |
130 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
131 | #define RPCRDMA_BACKWARD_WRS (8) | |
132 | #else | |
133 | #define RPCRDMA_BACKWARD_WRS (0) | |
134 | #endif | |
135 | ||
9128c3e7 CL |
136 | /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV |
137 | * | |
138 | * The below structure appears at the front of a large region of kmalloc'd | |
139 | * memory, which always starts on a good alignment boundary. | |
140 | */ | |
141 | ||
142 | struct rpcrdma_regbuf { | |
9128c3e7 | 143 | struct ib_sge rg_iov; |
54cbd6b0 | 144 | struct ib_device *rg_device; |
99ef4db3 | 145 | enum dma_data_direction rg_direction; |
9128c3e7 CL |
146 | __be32 rg_base[0] __attribute__ ((aligned(256))); |
147 | }; | |
148 | ||
149 | static inline u64 | |
150 | rdmab_addr(struct rpcrdma_regbuf *rb) | |
151 | { | |
152 | return rb->rg_iov.addr; | |
153 | } | |
154 | ||
155 | static inline u32 | |
156 | rdmab_length(struct rpcrdma_regbuf *rb) | |
157 | { | |
158 | return rb->rg_iov.length; | |
159 | } | |
160 | ||
161 | static inline u32 | |
162 | rdmab_lkey(struct rpcrdma_regbuf *rb) | |
163 | { | |
164 | return rb->rg_iov.lkey; | |
165 | } | |
166 | ||
167 | static inline struct rpcrdma_msg * | |
168 | rdmab_to_msg(struct rpcrdma_regbuf *rb) | |
169 | { | |
170 | return (struct rpcrdma_msg *)rb->rg_base; | |
171 | } | |
172 | ||
91a10c52 CL |
173 | static inline struct ib_device * |
174 | rdmab_device(struct rpcrdma_regbuf *rb) | |
175 | { | |
176 | return rb->rg_device; | |
177 | } | |
178 | ||
5d252f90 CL |
179 | #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) |
180 | ||
94931746 CL |
181 | /* To ensure a transport can always make forward progress, |
182 | * the number of RDMA segments allowed in header chunk lists | |
183 | * is capped at 8. This prevents less-capable devices and | |
184 | * memory registrations from overrunning the Send buffer | |
185 | * while building chunk lists. | |
186 | * | |
187 | * Elements of the Read list take up more room than the | |
188 | * Write list or Reply chunk. 8 read segments means the Read | |
189 | * list (or Write list or Reply chunk) cannot consume more | |
190 | * than | |
191 | * | |
192 | * ((8 + 2) * read segment size) + 1 XDR words, or 244 bytes. | |
193 | * | |
194 | * And the fixed part of the header is another 24 bytes. | |
195 | * | |
196 | * The smallest inline threshold is 1024 bytes, ensuring that | |
197 | * at least 750 bytes are available for RPC messages. | |
198 | */ | |
08cf2efd CL |
199 | enum { |
200 | RPCRDMA_MAX_HDR_SEGS = 8, | |
201 | RPCRDMA_HDRBUF_SIZE = 256, | |
202 | }; | |
94931746 | 203 | |
f58851e6 TT |
204 | /* |
205 | * struct rpcrdma_rep -- this structure encapsulates state required to recv | |
206 | * and complete a reply, asychronously. It needs several pieces of | |
207 | * state: | |
208 | * o recv buffer (posted to provider) | |
209 | * o ib_sge (also donated to provider) | |
210 | * o status of reply (length, success or not) | |
5ab81428 | 211 | * o bookkeeping state to get run by reply handler (list, etc) |
f58851e6 | 212 | * |
5ab81428 | 213 | * These are allocated during initialization, per-transport instance. |
f58851e6 TT |
214 | * |
215 | * N of these are associated with a transport instance, and stored in | |
216 | * struct rpcrdma_buffer. N is the max number of outstanding requests. | |
217 | */ | |
218 | ||
f58851e6 | 219 | struct rpcrdma_rep { |
552bf225 | 220 | struct ib_cqe rr_cqe; |
c8b920bb CL |
221 | int rr_wc_flags; |
222 | u32 rr_inv_rkey; | |
c1bcb68e | 223 | struct rpcrdma_regbuf *rr_rdmabuf; |
fed171b3 | 224 | struct rpcrdma_xprt *rr_rxprt; |
fe97b47c | 225 | struct work_struct rr_work; |
96f8778f CL |
226 | struct xdr_buf rr_hdrbuf; |
227 | struct xdr_stream rr_stream; | |
6b1184cd | 228 | struct list_head rr_list; |
6ea8e711 | 229 | struct ib_recv_wr rr_recv_wr; |
f58851e6 TT |
230 | }; |
231 | ||
0dbb4108 CL |
232 | /* |
233 | * struct rpcrdma_mw - external memory region metadata | |
234 | * | |
235 | * An external memory region is any buffer or page that is registered | |
236 | * on the fly (ie, not pre-registered). | |
237 | * | |
3111d72c | 238 | * Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During |
0dbb4108 CL |
239 | * call_allocate, rpcrdma_buffer_get() assigns one to each segment in |
240 | * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep | |
241 | * track of registration metadata while each RPC is pending. | |
242 | * rpcrdma_deregister_external() uses this metadata to unmap and | |
243 | * release these resources when an RPC is complete. | |
244 | */ | |
245 | enum rpcrdma_frmr_state { | |
246 | FRMR_IS_INVALID, /* ready to be used */ | |
247 | FRMR_IS_VALID, /* in use */ | |
62bdf94a CL |
248 | FRMR_FLUSHED_FR, /* flushed FASTREG WR */ |
249 | FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ | |
0dbb4108 CL |
250 | }; |
251 | ||
252 | struct rpcrdma_frmr { | |
0dbb4108 | 253 | struct ib_mr *fr_mr; |
2fa8f88d | 254 | struct ib_cqe fr_cqe; |
0dbb4108 | 255 | enum rpcrdma_frmr_state fr_state; |
2fa8f88d | 256 | struct completion fr_linv_done; |
3cf4e169 CL |
257 | union { |
258 | struct ib_reg_wr fr_regwr; | |
259 | struct ib_send_wr fr_invwr; | |
260 | }; | |
0dbb4108 CL |
261 | }; |
262 | ||
acb9da7a | 263 | struct rpcrdma_fmr { |
88975ebe CL |
264 | struct ib_fmr *fm_mr; |
265 | u64 *fm_physaddrs; | |
0dbb4108 CL |
266 | }; |
267 | ||
268 | struct rpcrdma_mw { | |
564471d2 CL |
269 | struct list_head mw_list; |
270 | struct scatterlist *mw_sg; | |
271 | int mw_nents; | |
272 | enum dma_data_direction mw_dir; | |
4b196dc6 | 273 | unsigned long mw_flags; |
0dbb4108 | 274 | union { |
acb9da7a | 275 | struct rpcrdma_fmr fmr; |
0dbb4108 | 276 | struct rpcrdma_frmr frmr; |
c882a655 | 277 | }; |
766656b0 | 278 | struct rpcrdma_xprt *mw_xprt; |
9d6b0409 CL |
279 | u32 mw_handle; |
280 | u32 mw_length; | |
281 | u64 mw_offset; | |
3111d72c | 282 | struct list_head mw_all; |
0dbb4108 CL |
283 | }; |
284 | ||
4b196dc6 CL |
285 | /* mw_flags */ |
286 | enum { | |
287 | RPCRDMA_MW_F_RI = 1, | |
288 | }; | |
289 | ||
f58851e6 TT |
290 | /* |
291 | * struct rpcrdma_req -- structure central to the request/reply sequence. | |
292 | * | |
293 | * N of these are associated with a transport instance, and stored in | |
294 | * struct rpcrdma_buffer. N is the max number of outstanding requests. | |
295 | * | |
296 | * It includes pre-registered buffer memory for send AND recv. | |
297 | * The recv buffer, however, is not owned by this structure, and | |
298 | * is "donated" to the hardware when a recv is posted. When a | |
299 | * reply is handled, the recv buffer used is given back to the | |
300 | * struct rpcrdma_req associated with the request. | |
301 | * | |
302 | * In addition to the basic memory, this structure includes an array | |
303 | * of iovs for send operations. The reason is that the iovs passed to | |
304 | * ib_post_{send,recv} must not be modified until the work request | |
305 | * completes. | |
f58851e6 TT |
306 | */ |
307 | ||
5ab81428 CL |
308 | /* Maximum number of page-sized "segments" per chunk list to be |
309 | * registered or invalidated. Must handle a Reply chunk: | |
310 | */ | |
311 | enum { | |
312 | RPCRDMA_MAX_IOV_SEGS = 3, | |
313 | RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1, | |
314 | RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS + | |
315 | RPCRDMA_MAX_IOV_SEGS, | |
316 | }; | |
317 | ||
f58851e6 | 318 | struct rpcrdma_mr_seg { /* chunk descriptors */ |
f58851e6 | 319 | u32 mr_len; /* length of chunk or segment */ |
f58851e6 TT |
320 | struct page *mr_page; /* owning page, if any */ |
321 | char *mr_offset; /* kva if no page, else offset */ | |
322 | }; | |
323 | ||
c6f5b47f CL |
324 | /* The Send SGE array is provisioned to send a maximum size |
325 | * inline request: | |
655fec69 CL |
326 | * - RPC-over-RDMA header |
327 | * - xdr_buf head iovec | |
c6f5b47f | 328 | * - RPCRDMA_MAX_INLINE bytes, in pages |
655fec69 | 329 | * - xdr_buf tail iovec |
c6f5b47f CL |
330 | * |
331 | * The actual number of array elements consumed by each RPC | |
332 | * depends on the device's max_sge limit. | |
655fec69 CL |
333 | */ |
334 | enum { | |
16f906d6 | 335 | RPCRDMA_MIN_SEND_SGES = 3, |
c6f5b47f | 336 | RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT, |
655fec69 CL |
337 | RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1, |
338 | }; | |
b3221d6a | 339 | |
5ab81428 | 340 | struct rpcrdma_buffer; |
f58851e6 | 341 | struct rpcrdma_req { |
a80d66c9 | 342 | struct list_head rl_list; |
655fec69 | 343 | unsigned int rl_mapped_sges; |
b3221d6a CL |
344 | unsigned int rl_connect_cookie; |
345 | struct rpcrdma_buffer *rl_buffer; | |
90aab602 | 346 | struct rpcrdma_rep *rl_reply; |
7a80f3f0 CL |
347 | struct xdr_stream rl_stream; |
348 | struct xdr_buf rl_hdrbuf; | |
90aab602 | 349 | struct ib_send_wr rl_send_wr; |
655fec69 | 350 | struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES]; |
9c40c49f CL |
351 | struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ |
352 | struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ | |
353 | struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ | |
f531a5db | 354 | |
2fa8f88d | 355 | struct ib_cqe rl_cqe; |
f531a5db CL |
356 | struct list_head rl_all; |
357 | bool rl_backchannel; | |
5ab81428 CL |
358 | |
359 | struct list_head rl_registered; /* registered segments */ | |
360 | struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; | |
f58851e6 | 361 | }; |
0ca77dc3 | 362 | |
5a6d1db4 CL |
363 | static inline void |
364 | rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) | |
365 | { | |
366 | rqst->rq_xprtdata = req; | |
367 | } | |
368 | ||
0ca77dc3 CL |
369 | static inline struct rpcrdma_req * |
370 | rpcr_to_rdmar(struct rpc_rqst *rqst) | |
371 | { | |
5a6d1db4 | 372 | return rqst->rq_xprtdata; |
0ca77dc3 | 373 | } |
f58851e6 | 374 | |
9a5c63e9 CL |
375 | static inline void |
376 | rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) | |
377 | { | |
378 | list_add_tail(&mw->mw_list, list); | |
379 | } | |
380 | ||
381 | static inline struct rpcrdma_mw * | |
382 | rpcrdma_pop_mw(struct list_head *list) | |
383 | { | |
384 | struct rpcrdma_mw *mw; | |
385 | ||
386 | mw = list_first_entry(list, struct rpcrdma_mw, mw_list); | |
387 | list_del(&mw->mw_list); | |
388 | return mw; | |
389 | } | |
390 | ||
f58851e6 TT |
391 | /* |
392 | * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for | |
393 | * inline requests/replies, and client/server credits. | |
394 | * | |
395 | * One of these is associated with a transport instance | |
396 | */ | |
397 | struct rpcrdma_buffer { | |
58d1dcf5 CL |
398 | spinlock_t rb_mwlock; /* protect rb_mws list */ |
399 | struct list_head rb_mws; | |
400 | struct list_head rb_all; | |
58d1dcf5 | 401 | |
1e465fd4 | 402 | spinlock_t rb_lock; /* protect buf lists */ |
05c97466 | 403 | int rb_send_count, rb_recv_count; |
1e465fd4 CL |
404 | struct list_head rb_send_bufs; |
405 | struct list_head rb_recv_bufs; | |
58d1dcf5 | 406 | u32 rb_max_requests; |
23826c7a | 407 | atomic_t rb_credits; /* most recent credit grant */ |
f531a5db CL |
408 | |
409 | u32 rb_bc_srv_max_requests; | |
410 | spinlock_t rb_reqslock; /* protect rb_allreqs */ | |
411 | struct list_head rb_allreqs; | |
5d252f90 CL |
412 | |
413 | u32 rb_bc_max_requests; | |
505bbe64 CL |
414 | |
415 | spinlock_t rb_recovery_lock; /* protect rb_stale_mrs */ | |
416 | struct list_head rb_stale_mrs; | |
417 | struct delayed_work rb_recovery_worker; | |
e2ac236c | 418 | struct delayed_work rb_refresh_worker; |
f58851e6 TT |
419 | }; |
420 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) | |
421 | ||
422 | /* | |
423 | * Internal structure for transport instance creation. This | |
424 | * exists primarily for modularity. | |
425 | * | |
426 | * This data should be set with mount options | |
427 | */ | |
428 | struct rpcrdma_create_data_internal { | |
429 | struct sockaddr_storage addr; /* RDMA server address */ | |
430 | unsigned int max_requests; /* max requests (slots) in flight */ | |
431 | unsigned int rsize; /* mount rsize - max read hdr+data */ | |
432 | unsigned int wsize; /* mount wsize - max write hdr+data */ | |
433 | unsigned int inline_rsize; /* max non-rdma read data payload */ | |
434 | unsigned int inline_wsize; /* max non-rdma write data payload */ | |
435 | unsigned int padding; /* non-rdma write header padding */ | |
436 | }; | |
437 | ||
f58851e6 TT |
438 | /* |
439 | * Statistics for RPCRDMA | |
440 | */ | |
441 | struct rpcrdma_stats { | |
67af6f65 | 442 | /* accessed when sending a call */ |
f58851e6 TT |
443 | unsigned long read_chunk_count; |
444 | unsigned long write_chunk_count; | |
445 | unsigned long reply_chunk_count; | |
f58851e6 | 446 | unsigned long long total_rdma_request; |
f58851e6 | 447 | |
67af6f65 | 448 | /* rarely accessed error counters */ |
f58851e6 | 449 | unsigned long long pullup_copy_count; |
f58851e6 TT |
450 | unsigned long hardway_register_count; |
451 | unsigned long failed_marshal_count; | |
452 | unsigned long bad_reply_count; | |
505bbe64 CL |
453 | unsigned long mrs_recovered; |
454 | unsigned long mrs_orphaned; | |
e2ac236c | 455 | unsigned long mrs_allocated; |
67af6f65 CL |
456 | |
457 | /* accessed when receiving a reply */ | |
458 | unsigned long long total_rdma_reply; | |
459 | unsigned long long fixup_copy_count; | |
c8b920bb | 460 | unsigned long local_inv_needed; |
67af6f65 CL |
461 | unsigned long nomsg_call_count; |
462 | unsigned long bcall_count; | |
f58851e6 TT |
463 | }; |
464 | ||
a0ce85f5 CL |
465 | /* |
466 | * Per-registration mode operations | |
467 | */ | |
1c9351ee | 468 | struct rpcrdma_xprt; |
a0ce85f5 | 469 | struct rpcrdma_memreg_ops { |
6748b0ca CL |
470 | struct rpcrdma_mr_seg * |
471 | (*ro_map)(struct rpcrdma_xprt *, | |
9d6b0409 CL |
472 | struct rpcrdma_mr_seg *, int, bool, |
473 | struct rpcrdma_mw **); | |
32d0ceec | 474 | void (*ro_unmap_sync)(struct rpcrdma_xprt *, |
451d26e1 | 475 | struct list_head *); |
505bbe64 | 476 | void (*ro_recover_mr)(struct rpcrdma_mw *); |
3968cb58 CL |
477 | int (*ro_open)(struct rpcrdma_ia *, |
478 | struct rpcrdma_ep *, | |
479 | struct rpcrdma_create_data_internal *); | |
1c9351ee | 480 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); |
e2ac236c CL |
481 | int (*ro_init_mr)(struct rpcrdma_ia *, |
482 | struct rpcrdma_mw *); | |
483 | void (*ro_release_mr)(struct rpcrdma_mw *); | |
a0ce85f5 | 484 | const char *ro_displayname; |
c8b920bb | 485 | const int ro_send_w_inv_ok; |
a0ce85f5 CL |
486 | }; |
487 | ||
488 | extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops; | |
489 | extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops; | |
a0ce85f5 | 490 | |
f58851e6 TT |
491 | /* |
492 | * RPCRDMA transport -- encapsulates the structures above for | |
493 | * integration with RPC. | |
494 | * | |
495 | * The contained structures are embedded, not pointers, | |
496 | * for convenience. This structure need not be visible externally. | |
497 | * | |
498 | * It is allocated and initialized during mount, and released | |
499 | * during unmount. | |
500 | */ | |
501 | struct rpcrdma_xprt { | |
5abefb86 | 502 | struct rpc_xprt rx_xprt; |
f58851e6 TT |
503 | struct rpcrdma_ia rx_ia; |
504 | struct rpcrdma_ep rx_ep; | |
505 | struct rpcrdma_buffer rx_buf; | |
506 | struct rpcrdma_create_data_internal rx_data; | |
5abefb86 | 507 | struct delayed_work rx_connect_worker; |
f58851e6 TT |
508 | struct rpcrdma_stats rx_stats; |
509 | }; | |
510 | ||
5abefb86 | 511 | #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) |
f58851e6 TT |
512 | #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) |
513 | ||
9191ca3b TT |
514 | /* Setting this to 0 ensures interoperability with early servers. |
515 | * Setting this to 1 enhances certain unaligned read/write performance. | |
516 | * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ | |
517 | extern int xprt_rdma_pad_optimize; | |
518 | ||
fff09594 CL |
519 | /* This setting controls the hunt for a supported memory |
520 | * registration strategy. | |
521 | */ | |
522 | extern unsigned int xprt_rdma_memreg_strategy; | |
523 | ||
f58851e6 TT |
524 | /* |
525 | * Interface Adapter calls - xprtrdma/verbs.c | |
526 | */ | |
fff09594 | 527 | int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); |
bebd0318 | 528 | void rpcrdma_ia_remove(struct rpcrdma_ia *ia); |
f58851e6 | 529 | void rpcrdma_ia_close(struct rpcrdma_ia *); |
b54054ca CL |
530 | bool frwr_is_supported(struct rpcrdma_ia *); |
531 | bool fmr_is_supported(struct rpcrdma_ia *); | |
f58851e6 TT |
532 | |
533 | /* | |
534 | * Endpoint calls - xprtrdma/verbs.c | |
535 | */ | |
536 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, | |
537 | struct rpcrdma_create_data_internal *); | |
7f1d5419 | 538 | void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 | 539 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
3a72dc77 | 540 | void rpcrdma_conn_func(struct rpcrdma_ep *ep); |
282191cb | 541 | void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
f58851e6 TT |
542 | |
543 | int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, | |
544 | struct rpcrdma_req *); | |
b157380a | 545 | int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); |
f58851e6 TT |
546 | |
547 | /* | |
548 | * Buffer calls - xprtrdma/verbs.c | |
549 | */ | |
f531a5db CL |
550 | struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); |
551 | struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); | |
13650c23 | 552 | void rpcrdma_destroy_req(struct rpcrdma_req *); |
ac920d04 | 553 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); |
f58851e6 TT |
554 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); |
555 | ||
346aa66b CL |
556 | struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); |
557 | void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); | |
f58851e6 TT |
558 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); |
559 | void rpcrdma_buffer_put(struct rpcrdma_req *); | |
560 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | |
561 | void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); | |
562 | ||
505bbe64 CL |
563 | void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *); |
564 | ||
13650c23 | 565 | struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, |
99ef4db3 | 566 | gfp_t); |
54cbd6b0 | 567 | bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); |
13650c23 | 568 | void rpcrdma_free_regbuf(struct rpcrdma_regbuf *); |
9128c3e7 | 569 | |
54cbd6b0 CL |
570 | static inline bool |
571 | rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb) | |
572 | { | |
573 | return rb->rg_device != NULL; | |
574 | } | |
575 | ||
576 | static inline bool | |
577 | rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) | |
578 | { | |
579 | if (likely(rpcrdma_regbuf_is_mapped(rb))) | |
580 | return true; | |
581 | return __rpcrdma_dma_map_regbuf(ia, rb); | |
582 | } | |
583 | ||
f531a5db | 584 | int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); |
d654788e | 585 | |
fe97b47c CL |
586 | int rpcrdma_alloc_wq(void); |
587 | void rpcrdma_destroy_wq(void); | |
588 | ||
d654788e CL |
589 | /* |
590 | * Wrappers for chunk registration, shared by read/write chunk code. | |
591 | */ | |
592 | ||
d654788e CL |
593 | static inline enum dma_data_direction |
594 | rpcrdma_data_dir(bool writing) | |
595 | { | |
596 | return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | |
597 | } | |
598 | ||
f58851e6 TT |
599 | /* |
600 | * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c | |
601 | */ | |
655fec69 CL |
602 | |
603 | enum rpcrdma_chunktype { | |
604 | rpcrdma_noch = 0, | |
605 | rpcrdma_readch, | |
606 | rpcrdma_areadch, | |
607 | rpcrdma_writech, | |
608 | rpcrdma_replych | |
609 | }; | |
610 | ||
611 | bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, | |
612 | u32, struct xdr_buf *, enum rpcrdma_chunktype); | |
613 | void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); | |
09e60641 | 614 | int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); |
87cfb9a0 | 615 | void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); |
3a72dc77 | 616 | void rpcrdma_reply_handler(struct work_struct *work); |
f58851e6 | 617 | |
96f8778f CL |
618 | static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) |
619 | { | |
620 | xdr->head[0].iov_len = len; | |
621 | xdr->len = len; | |
622 | } | |
623 | ||
ffe1f0df CL |
624 | /* RPC/RDMA module init - xprtrdma/transport.c |
625 | */ | |
5d252f90 CL |
626 | extern unsigned int xprt_rdma_max_inline_read; |
627 | void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); | |
628 | void xprt_rdma_free_addresses(struct rpc_xprt *xprt); | |
3a72dc77 | 629 | void rpcrdma_connect_worker(struct work_struct *work); |
5d252f90 | 630 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); |
ffe1f0df CL |
631 | int xprt_rdma_init(void); |
632 | void xprt_rdma_cleanup(void); | |
633 | ||
f531a5db CL |
634 | /* Backchannel calls - xprtrdma/backchannel.c |
635 | */ | |
636 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | |
637 | int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); | |
76566773 | 638 | int xprt_rdma_bc_up(struct svc_serv *, struct net *); |
6b26cc8c | 639 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); |
f531a5db | 640 | int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); |
63cae470 | 641 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); |
83128a60 | 642 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *); |
f531a5db CL |
643 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *); |
644 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | |
645 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | |
646 | ||
5d252f90 | 647 | extern struct xprt_class xprt_rdma_bc; |
cec56c8f | 648 | |
f58851e6 | 649 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ |