1 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
3 * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the BSD-type
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
15 * Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials provided
21 * with the distribution.
23 * Neither the name of the Network Appliance, Inc. nor the names of
24 * its contributors may be used to endorse or promote products
25 * derived from this software without specific prior written
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 * Author: Tom Tucker <tom@opengridcomputing.com>
45 #include <linux/llist.h>
46 #include <linux/sunrpc/xdr.h>
47 #include <linux/sunrpc/svcsock.h>
48 #include <linux/sunrpc/rpc_rdma.h>
49 #include <linux/sunrpc/rpc_rdma_cid.h>
50 #include <linux/sunrpc/svc_rdma_pcl.h>
52 #include <linux/percpu_counter.h>
53 #include <rdma/ib_verbs.h>
54 #include <rdma/rdma_cm.h>
56 /* Default and maximum inline threshold sizes */
58 RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1,
59 RPCRDMA_DEF_INLINE_THRESH = 4096,
60 RPCRDMA_MAX_INLINE_THRESH = 65536
63 /* RPC/RDMA parameters and stats */
64 extern unsigned int svcrdma_ord;
65 extern unsigned int svcrdma_max_requests;
66 extern unsigned int svcrdma_max_bc_requests;
67 extern unsigned int svcrdma_max_req_size;
68 extern struct workqueue_struct *svcrdma_wq;
70 extern struct percpu_counter svcrdma_stat_read;
71 extern struct percpu_counter svcrdma_stat_recv;
72 extern struct percpu_counter svcrdma_stat_sq_starve;
73 extern struct percpu_counter svcrdma_stat_write;
76 struct svc_xprt sc_xprt; /* SVC transport structure */
77 struct rdma_cm_id *sc_cm_id; /* RDMA connection id */
78 struct list_head sc_accept_q; /* Conn. waiting accept */
79 int sc_ord; /* RDMA read limit */
81 bool sc_snd_w_inv; /* OK to use Send With Invalidate */
83 atomic_t sc_sq_avail; /* SQEs ready to be consumed */
84 unsigned int sc_sq_depth; /* Depth of SQ */
85 __be32 sc_fc_credits; /* Forward credits */
86 u32 sc_max_requests; /* Max requests */
87 u32 sc_max_bc_requests;/* Backward credits */
88 int sc_max_req_size; /* Size of each RQ WR buf */
93 spinlock_t sc_send_lock;
94 struct llist_head sc_send_ctxts;
95 spinlock_t sc_rw_ctxt_lock;
96 struct llist_head sc_rw_ctxts;
100 struct list_head sc_rq_dto_q;
101 struct list_head sc_read_complete_q;
102 spinlock_t sc_rq_dto_lock;
104 struct ib_cq *sc_rq_cq;
105 struct ib_cq *sc_sq_cq;
107 spinlock_t sc_lock; /* transport lock */
109 wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */
110 unsigned long sc_flags;
111 struct work_struct sc_work;
113 struct llist_head sc_recv_ctxts;
115 atomic_t sc_completion_ids;
118 #define RDMAXPRT_CONN_PENDING 3
120 static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp)
122 struct svc_xprt *xprt = rqstp->rq_xprt;
124 return container_of(xprt, struct svcxprt_rdma, sc_xprt);
128 * Default connection parameters
131 RPCRDMA_LISTEN_BACKLOG = 10,
132 RPCRDMA_MAX_REQUESTS = 64,
133 RPCRDMA_MAX_BC_REQUESTS = 2,
136 #define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
139 * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID
140 * @rdma: controlling transport
141 * @cid: completion ID to initialize
143 static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
144 struct rpc_rdma_cid *cid)
146 cid->ci_queue_id = rdma->sc_rq_cq->res.id;
147 cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
151 * svc_rdma_send_cid_init - Initialize a Send Queue completion ID
152 * @rdma: controlling transport
153 * @cid: completion ID to initialize
155 static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
156 struct rpc_rdma_cid *cid)
158 cid->ci_queue_id = rdma->sc_sq_cq->res.id;
159 cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
163 * A chunk context tracks all I/O for moving one Read or Write
164 * chunk. This is a set of rdma_rw's that handle data movement
165 * for all segments of one chunk.
167 struct svc_rdma_chunk_ctxt {
168 struct rpc_rdma_cid cc_cid;
169 struct ib_cqe cc_cqe;
170 struct list_head cc_rwctxts;
175 struct svc_rdma_recv_ctxt {
176 struct llist_node rc_node;
177 struct list_head rc_list;
178 struct ib_recv_wr rc_recv_wr;
179 struct ib_cqe rc_cqe;
180 struct rpc_rdma_cid rc_cid;
181 struct ib_sge rc_recv_sge;
183 struct xdr_stream rc_stream;
188 /* State for pulling a Read chunk */
189 unsigned int rc_pageoff;
190 unsigned int rc_curpage;
191 unsigned int rc_readbytes;
192 struct xdr_buf rc_saved_arg;
193 struct svc_rdma_chunk_ctxt rc_cc;
195 struct svc_rdma_pcl rc_call_pcl;
197 struct svc_rdma_pcl rc_read_pcl;
198 struct svc_rdma_chunk *rc_cur_result_payload;
199 struct svc_rdma_pcl rc_write_pcl;
200 struct svc_rdma_pcl rc_reply_pcl;
202 unsigned int rc_page_count;
203 struct page *rc_pages[RPCSVC_MAXPAGES];
207 * State for sending a Write chunk.
208 * - Tracks progress of writing one chunk over all its segments
209 * - Stores arguments for the SGL constructor functions
211 struct svc_rdma_write_info {
212 struct svcxprt_rdma *wi_rdma;
213 struct list_head wi_list;
215 const struct svc_rdma_chunk *wi_chunk;
217 /* write state of this chunk */
218 unsigned int wi_seg_off;
219 unsigned int wi_seg_no;
221 /* SGL constructor arguments */
222 const struct xdr_buf *wi_xdr;
223 unsigned char *wi_base;
224 unsigned int wi_next_off;
226 struct svc_rdma_chunk_ctxt wi_cc;
227 struct work_struct wi_work;
230 struct svc_rdma_send_ctxt {
231 struct llist_node sc_node;
232 struct rpc_rdma_cid sc_cid;
233 struct work_struct sc_work;
235 struct svcxprt_rdma *sc_rdma;
236 struct ib_send_wr sc_send_wr;
237 struct ib_send_wr *sc_wr_chain;
239 struct ib_cqe sc_cqe;
240 struct xdr_buf sc_hdrbuf;
241 struct xdr_stream sc_stream;
243 struct list_head sc_write_info_list;
244 struct svc_rdma_write_info sc_reply_info;
249 struct page *sc_pages[RPCSVC_MAXPAGES];
250 struct ib_sge sc_sges[];
253 /* svc_rdma_backchannel.c */
254 extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp,
255 struct svc_rdma_recv_ctxt *rctxt);
257 /* svc_rdma_recvfrom.c */
258 extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma);
259 extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma);
260 extern struct svc_rdma_recv_ctxt *
261 svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma);
262 extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
263 struct svc_rdma_recv_ctxt *ctxt);
264 extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
265 extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
266 extern int svc_rdma_recvfrom(struct svc_rqst *);
269 extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
270 struct svc_rdma_chunk_ctxt *cc);
271 extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
272 extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
273 struct svc_rdma_chunk_ctxt *cc);
274 extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
275 struct svc_rdma_chunk_ctxt *cc,
276 enum dma_data_direction dir);
277 extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
278 struct svc_rdma_send_ctxt *ctxt);
279 extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
280 struct svc_rdma_send_ctxt *ctxt);
281 extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
282 const struct svc_rdma_pcl *write_pcl,
283 struct svc_rdma_send_ctxt *sctxt,
284 const struct xdr_buf *xdr);
285 extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
286 const struct svc_rdma_pcl *write_pcl,
287 const struct svc_rdma_pcl *reply_pcl,
288 struct svc_rdma_send_ctxt *sctxt,
289 const struct xdr_buf *xdr);
290 extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
291 struct svc_rqst *rqstp,
292 struct svc_rdma_recv_ctxt *head);
294 /* svc_rdma_sendto.c */
295 extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma);
296 extern struct svc_rdma_send_ctxt *
297 svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
298 extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
299 struct svc_rdma_send_ctxt *ctxt);
300 extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
301 struct svc_rdma_send_ctxt *ctxt);
302 extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
303 struct svc_rdma_send_ctxt *sctxt,
304 const struct svc_rdma_pcl *write_pcl,
305 const struct svc_rdma_pcl *reply_pcl,
306 const struct xdr_buf *xdr);
307 extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
308 struct svc_rdma_send_ctxt *sctxt,
309 struct svc_rdma_recv_ctxt *rctxt,
311 extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
312 extern int svc_rdma_sendto(struct svc_rqst *);
313 extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
314 unsigned int length);
316 /* svc_rdma_transport.c */
317 extern struct svc_xprt_class svc_rdma_class;
318 #ifdef CONFIG_SUNRPC_BACKCHANNEL
319 extern struct svc_xprt_class svc_rdma_bc_class;
323 extern int svc_rdma_init(void);
324 extern void svc_rdma_cleanup(void);