Commit | Line | Data |
---|---|---|
d5b31be6 | 1 | /* |
0bf48289 | 2 | * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved. |
d5b31be6 TT |
3 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. |
4 | * | |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the BSD-type | |
9 | * license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without | |
12 | * modification, are permitted provided that the following conditions | |
13 | * are met: | |
14 | * | |
15 | * Redistributions of source code must retain the above copyright | |
16 | * notice, this list of conditions and the following disclaimer. | |
17 | * | |
18 | * Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials provided | |
21 | * with the distribution. | |
22 | * | |
23 | * Neither the name of the Network Appliance, Inc. nor the names of | |
24 | * its contributors may be used to endorse or promote products | |
25 | * derived from this software without specific prior written | |
26 | * permission. | |
27 | * | |
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
29 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
30 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
31 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
32 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
33 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
34 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
35 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
36 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
37 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
38 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
39 | * | |
40 | * Author: Tom Tucker <tom@opengridcomputing.com> | |
41 | */ | |
42 | ||
43 | #include <linux/sunrpc/debug.h> | |
44 | #include <linux/sunrpc/rpc_rdma.h> | |
45 | #include <linux/spinlock.h> | |
46 | #include <asm/unaligned.h> | |
47 | #include <rdma/ib_verbs.h> | |
48 | #include <rdma/rdma_cm.h> | |
49 | #include <linux/sunrpc/svc_rdma.h> | |
50 | ||
51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | |
52 | ||
53 | /* | |
54 | * Replace the pages in the rq_argpages array with the pages from the SGE in | |
55 | * the RDMA_RECV completion. The SGL should contain full pages up until the | |
56 | * last one. | |
57 | */ | |
58 | static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |
59 | struct svc_rdma_op_ctxt *ctxt, | |
60 | u32 byte_count) | |
61 | { | |
62 | struct page *page; | |
63 | u32 bc; | |
64 | int sge_no; | |
65 | ||
66 | /* Swap the page in the SGE with the page in argpages */ | |
67 | page = ctxt->pages[0]; | |
68 | put_page(rqstp->rq_pages[0]); | |
69 | rqstp->rq_pages[0] = page; | |
70 | ||
71 | /* Set up the XDR head */ | |
72 | rqstp->rq_arg.head[0].iov_base = page_address(page); | |
0bf48289 SW |
73 | rqstp->rq_arg.head[0].iov_len = |
74 | min_t(size_t, byte_count, ctxt->sge[0].length); | |
d5b31be6 TT |
75 | rqstp->rq_arg.len = byte_count; |
76 | rqstp->rq_arg.buflen = byte_count; | |
77 | ||
78 | /* Compute bytes past head in the SGL */ | |
79 | bc = byte_count - rqstp->rq_arg.head[0].iov_len; | |
80 | ||
81 | /* If data remains, store it in the pagelist */ | |
82 | rqstp->rq_arg.page_len = bc; | |
83 | rqstp->rq_arg.page_base = 0; | |
84 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | |
85 | sge_no = 1; | |
86 | while (bc && sge_no < ctxt->count) { | |
87 | page = ctxt->pages[sge_no]; | |
88 | put_page(rqstp->rq_pages[sge_no]); | |
89 | rqstp->rq_pages[sge_no] = page; | |
0bf48289 | 90 | bc -= min_t(u32, bc, ctxt->sge[sge_no].length); |
d5b31be6 TT |
91 | rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; |
92 | sge_no++; | |
93 | } | |
94 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | |
7e4359e2 | 95 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 TT |
96 | |
97 | /* We should never run out of SGE because the limit is defined to | |
98 | * support the max allowed RPC data length | |
99 | */ | |
100 | BUG_ON(bc && (sge_no == ctxt->count)); | |
101 | BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len) | |
102 | != byte_count); | |
103 | BUG_ON(rqstp->rq_arg.len != byte_count); | |
104 | ||
105 | /* If not all pages were used from the SGL, free the remaining ones */ | |
106 | bc = sge_no; | |
107 | while (sge_no < ctxt->count) { | |
108 | page = ctxt->pages[sge_no++]; | |
109 | put_page(page); | |
110 | } | |
111 | ctxt->count = bc; | |
112 | ||
113 | /* Set up tail */ | |
114 | rqstp->rq_arg.tail[0].iov_base = NULL; | |
115 | rqstp->rq_arg.tail[0].iov_len = 0; | |
116 | } | |
117 | ||
0bf48289 | 118 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) |
d5b31be6 | 119 | { |
0bf48289 SW |
120 | if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) == |
121 | RDMA_TRANSPORT_IWARP) | |
122 | return 1; | |
123 | else | |
124 | return min_t(int, sge_count, xprt->sc_max_sge); | |
125 | } | |
d5b31be6 | 126 | |
0bf48289 SW |
127 | typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt, |
128 | struct svc_rqst *rqstp, | |
129 | struct svc_rdma_op_ctxt *head, | |
130 | int *page_no, | |
131 | u32 *page_offset, | |
132 | u32 rs_handle, | |
133 | u32 rs_length, | |
134 | u64 rs_offset, | |
135 | int last); | |
136 | ||
137 | /* Issue an RDMA_READ using the local lkey to map the data sink */ | |
138 | static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, | |
139 | struct svc_rqst *rqstp, | |
140 | struct svc_rdma_op_ctxt *head, | |
141 | int *page_no, | |
142 | u32 *page_offset, | |
143 | u32 rs_handle, | |
144 | u32 rs_length, | |
145 | u64 rs_offset, | |
146 | int last) | |
147 | { | |
148 | struct ib_send_wr read_wr; | |
149 | int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; | |
150 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); | |
151 | int ret, read, pno; | |
152 | u32 pg_off = *page_offset; | |
153 | u32 pg_no = *page_no; | |
154 | ||
155 | ctxt->direction = DMA_FROM_DEVICE; | |
156 | ctxt->read_hdr = head; | |
157 | pages_needed = | |
158 | min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed)); | |
159 | read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); | |
160 | ||
161 | for (pno = 0; pno < pages_needed; pno++) { | |
162 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); | |
163 | ||
164 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
165 | head->arg.page_len += len; | |
166 | head->arg.len += len; | |
167 | if (!pg_off) | |
168 | head->count++; | |
169 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; | |
7e4359e2 | 170 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
0bf48289 SW |
171 | ctxt->sge[pno].addr = |
172 | ib_dma_map_page(xprt->sc_cm_id->device, | |
173 | head->arg.pages[pg_no], pg_off, | |
174 | PAGE_SIZE - pg_off, | |
175 | DMA_FROM_DEVICE); | |
176 | ret = ib_dma_mapping_error(xprt->sc_cm_id->device, | |
177 | ctxt->sge[pno].addr); | |
178 | if (ret) | |
179 | goto err; | |
180 | atomic_inc(&xprt->sc_dma_used); | |
d5b31be6 | 181 | |
0bf48289 SW |
182 | /* The lkey here is either a local dma lkey or a dma_mr lkey */ |
183 | ctxt->sge[pno].lkey = xprt->sc_dma_lkey; | |
184 | ctxt->sge[pno].length = len; | |
185 | ctxt->count++; | |
186 | ||
187 | /* adjust offset and wrap to next page if needed */ | |
188 | pg_off += len; | |
189 | if (pg_off == PAGE_SIZE) { | |
190 | pg_off = 0; | |
191 | pg_no++; | |
d5b31be6 | 192 | } |
0bf48289 | 193 | rs_length -= len; |
d5b31be6 | 194 | } |
0bf48289 SW |
195 | |
196 | if (last && rs_length == 0) | |
197 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
198 | else | |
199 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
200 | ||
201 | memset(&read_wr, 0, sizeof(read_wr)); | |
202 | read_wr.wr_id = (unsigned long)ctxt; | |
203 | read_wr.opcode = IB_WR_RDMA_READ; | |
204 | ctxt->wr_op = read_wr.opcode; | |
205 | read_wr.send_flags = IB_SEND_SIGNALED; | |
206 | read_wr.wr.rdma.rkey = rs_handle; | |
207 | read_wr.wr.rdma.remote_addr = rs_offset; | |
208 | read_wr.sg_list = ctxt->sge; | |
209 | read_wr.num_sge = pages_needed; | |
210 | ||
211 | ret = svc_rdma_send(xprt, &read_wr); | |
212 | if (ret) { | |
213 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
214 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
215 | goto err; | |
216 | } | |
217 | ||
218 | /* return current location in page array */ | |
219 | *page_no = pg_no; | |
220 | *page_offset = pg_off; | |
221 | ret = read; | |
222 | atomic_inc(&rdma_stat_read); | |
223 | return ret; | |
224 | err: | |
225 | svc_rdma_unmap_dma(ctxt); | |
226 | svc_rdma_put_context(ctxt, 0); | |
227 | return ret; | |
d5b31be6 TT |
228 | } |
229 | ||
0bf48289 SW |
230 | /* Issue an RDMA_READ using an FRMR to map the data sink */ |
231 | static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, | |
146b6df6 TT |
232 | struct svc_rqst *rqstp, |
233 | struct svc_rdma_op_ctxt *head, | |
0bf48289 SW |
234 | int *page_no, |
235 | u32 *page_offset, | |
236 | u32 rs_handle, | |
237 | u32 rs_length, | |
238 | u64 rs_offset, | |
239 | int last) | |
146b6df6 | 240 | { |
0bf48289 SW |
241 | struct ib_send_wr read_wr; |
242 | struct ib_send_wr inv_wr; | |
243 | struct ib_send_wr fastreg_wr; | |
244 | u8 key; | |
245 | int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; | |
246 | struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); | |
247 | struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); | |
248 | int ret, read, pno; | |
249 | u32 pg_off = *page_offset; | |
250 | u32 pg_no = *page_no; | |
146b6df6 | 251 | |
146b6df6 TT |
252 | if (IS_ERR(frmr)) |
253 | return -ENOMEM; | |
254 | ||
0bf48289 SW |
255 | ctxt->direction = DMA_FROM_DEVICE; |
256 | ctxt->frmr = frmr; | |
257 | pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); | |
258 | read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); | |
146b6df6 | 259 | |
0bf48289 | 260 | frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); |
146b6df6 TT |
261 | frmr->direction = DMA_FROM_DEVICE; |
262 | frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); | |
0bf48289 SW |
263 | frmr->map_len = pages_needed << PAGE_SHIFT; |
264 | frmr->page_list_len = pages_needed; | |
265 | ||
266 | for (pno = 0; pno < pages_needed; pno++) { | |
267 | int len = min_t(int, rs_length, PAGE_SIZE - pg_off); | |
268 | ||
269 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | |
270 | head->arg.page_len += len; | |
271 | head->arg.len += len; | |
272 | if (!pg_off) | |
273 | head->count++; | |
274 | rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; | |
275 | rqstp->rq_next_page = rqstp->rq_respages + 1; | |
276 | frmr->page_list->page_list[pno] = | |
b432e6b3 | 277 | ib_dma_map_page(xprt->sc_cm_id->device, |
0bf48289 | 278 | head->arg.pages[pg_no], 0, |
b432e6b3 | 279 | PAGE_SIZE, DMA_FROM_DEVICE); |
0bf48289 SW |
280 | ret = ib_dma_mapping_error(xprt->sc_cm_id->device, |
281 | frmr->page_list->page_list[pno]); | |
282 | if (ret) | |
283 | goto err; | |
146b6df6 | 284 | atomic_inc(&xprt->sc_dma_used); |
146b6df6 | 285 | |
0bf48289 SW |
286 | /* adjust offset and wrap to next page if needed */ |
287 | pg_off += len; | |
288 | if (pg_off == PAGE_SIZE) { | |
289 | pg_off = 0; | |
290 | pg_no++; | |
291 | } | |
292 | rs_length -= len; | |
146b6df6 TT |
293 | } |
294 | ||
0bf48289 SW |
295 | if (last && rs_length == 0) |
296 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
297 | else | |
298 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | |
d5b31be6 | 299 | |
0bf48289 SW |
300 | /* Bump the key */ |
301 | key = (u8)(frmr->mr->lkey & 0x000000FF); | |
302 | ib_update_fast_reg_key(frmr->mr, ++key); | |
303 | ||
304 | ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; | |
305 | ctxt->sge[0].lkey = frmr->mr->lkey; | |
306 | ctxt->sge[0].length = read; | |
307 | ctxt->count = 1; | |
308 | ctxt->read_hdr = head; | |
309 | ||
310 | /* Prepare FASTREG WR */ | |
311 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | |
312 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | |
313 | fastreg_wr.send_flags = IB_SEND_SIGNALED; | |
314 | fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; | |
315 | fastreg_wr.wr.fast_reg.page_list = frmr->page_list; | |
316 | fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; | |
317 | fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | |
318 | fastreg_wr.wr.fast_reg.length = frmr->map_len; | |
319 | fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; | |
320 | fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; | |
321 | fastreg_wr.next = &read_wr; | |
322 | ||
323 | /* Prepare RDMA_READ */ | |
324 | memset(&read_wr, 0, sizeof(read_wr)); | |
325 | read_wr.send_flags = IB_SEND_SIGNALED; | |
326 | read_wr.wr.rdma.rkey = rs_handle; | |
327 | read_wr.wr.rdma.remote_addr = rs_offset; | |
328 | read_wr.sg_list = ctxt->sge; | |
329 | read_wr.num_sge = 1; | |
330 | if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { | |
331 | read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; | |
332 | read_wr.wr_id = (unsigned long)ctxt; | |
333 | read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; | |
334 | } else { | |
335 | read_wr.opcode = IB_WR_RDMA_READ; | |
336 | read_wr.next = &inv_wr; | |
337 | /* Prepare invalidate */ | |
338 | memset(&inv_wr, 0, sizeof(inv_wr)); | |
339 | inv_wr.wr_id = (unsigned long)ctxt; | |
340 | inv_wr.opcode = IB_WR_LOCAL_INV; | |
83710fc7 | 341 | inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; |
0bf48289 SW |
342 | inv_wr.ex.invalidate_rkey = frmr->mr->lkey; |
343 | } | |
344 | ctxt->wr_op = read_wr.opcode; | |
345 | ||
346 | /* Post the chain */ | |
347 | ret = svc_rdma_send(xprt, &fastreg_wr); | |
348 | if (ret) { | |
349 | pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); | |
350 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | |
351 | goto err; | |
d5b31be6 | 352 | } |
d5b31be6 | 353 | |
0bf48289 SW |
354 | /* return current location in page array */ |
355 | *page_no = pg_no; | |
356 | *page_offset = pg_off; | |
357 | ret = read; | |
358 | atomic_inc(&rdma_stat_read); | |
359 | return ret; | |
360 | err: | |
361 | svc_rdma_unmap_dma(ctxt); | |
362 | svc_rdma_put_context(ctxt, 0); | |
363 | svc_rdma_put_frmr(xprt, frmr); | |
364 | return ret; | |
d5b31be6 TT |
365 | } |
366 | ||
0bf48289 SW |
367 | static int rdma_read_chunks(struct svcxprt_rdma *xprt, |
368 | struct rpcrdma_msg *rmsgp, | |
369 | struct svc_rqst *rqstp, | |
370 | struct svc_rdma_op_ctxt *head) | |
d5b31be6 | 371 | { |
0bf48289 | 372 | int page_no, ch_count, ret; |
d5b31be6 | 373 | struct rpcrdma_read_chunk *ch; |
0bf48289 SW |
374 | u32 page_offset, byte_count; |
375 | u64 rs_offset; | |
376 | rdma_reader_fn reader; | |
d5b31be6 TT |
377 | |
378 | /* If no read list is present, return 0 */ | |
379 | ch = svc_rdma_get_read_chunk(rmsgp); | |
380 | if (!ch) | |
381 | return 0; | |
382 | ||
d5b31be6 | 383 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); |
a6f911c0 TT |
384 | if (ch_count > RPCSVC_MAXPAGES) |
385 | return -EINVAL; | |
146b6df6 | 386 | |
0bf48289 SW |
387 | /* The request is completed when the RDMA_READs complete. The |
388 | * head context keeps all the pages that comprise the | |
389 | * request. | |
390 | */ | |
391 | head->arg.head[0] = rqstp->rq_arg.head[0]; | |
392 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | |
393 | head->arg.pages = &head->pages[head->count]; | |
394 | head->hdr_count = head->count; | |
395 | head->arg.page_base = 0; | |
396 | head->arg.page_len = 0; | |
397 | head->arg.len = rqstp->rq_arg.len; | |
398 | head->arg.buflen = rqstp->rq_arg.buflen; | |
59fb3066 | 399 | |
0bf48289 SW |
400 | /* Use FRMR if supported */ |
401 | if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) | |
402 | reader = rdma_read_chunk_frmr; | |
146b6df6 | 403 | else |
0bf48289 | 404 | reader = rdma_read_chunk_lcl; |
d5b31be6 | 405 | |
0bf48289 | 406 | page_no = 0; page_offset = 0; |
d5b31be6 | 407 | for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; |
0bf48289 | 408 | ch->rc_discrim != 0; ch++) { |
d5b31be6 | 409 | |
cec56c8f TT |
410 | xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, |
411 | &rs_offset); | |
0bf48289 SW |
412 | byte_count = ntohl(ch->rc_target.rs_length); |
413 | ||
414 | while (byte_count > 0) { | |
415 | ret = reader(xprt, rqstp, head, | |
416 | &page_no, &page_offset, | |
417 | ntohl(ch->rc_target.rs_handle), | |
418 | byte_count, rs_offset, | |
419 | ((ch+1)->rc_discrim == 0) /* last */ | |
420 | ); | |
421 | if (ret < 0) | |
422 | goto err; | |
423 | byte_count -= ret; | |
424 | rs_offset += ret; | |
425 | head->arg.buflen += ret; | |
d5b31be6 | 426 | } |
d5b31be6 | 427 | } |
0bf48289 SW |
428 | ret = 1; |
429 | err: | |
d5b31be6 | 430 | /* Detach arg pages. svc_recv will replenish them */ |
0bf48289 SW |
431 | for (page_no = 0; |
432 | &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++) | |
433 | rqstp->rq_pages[page_no] = NULL; | |
d5b31be6 | 434 | |
0bf48289 | 435 | return ret; |
d5b31be6 TT |
436 | } |
437 | ||
438 | static int rdma_read_complete(struct svc_rqst *rqstp, | |
02e7452d | 439 | struct svc_rdma_op_ctxt *head) |
d5b31be6 | 440 | { |
d5b31be6 TT |
441 | int page_no; |
442 | int ret; | |
443 | ||
444 | BUG_ON(!head); | |
445 | ||
446 | /* Copy RPC pages */ | |
447 | for (page_no = 0; page_no < head->count; page_no++) { | |
448 | put_page(rqstp->rq_pages[page_no]); | |
449 | rqstp->rq_pages[page_no] = head->pages[page_no]; | |
450 | } | |
451 | /* Point rq_arg.pages past header */ | |
f820c57e | 452 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; |
d5b31be6 TT |
453 | rqstp->rq_arg.page_len = head->arg.page_len; |
454 | rqstp->rq_arg.page_base = head->arg.page_base; | |
455 | ||
456 | /* rq_respages starts after the last arg page */ | |
457 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; | |
7e4359e2 | 458 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
d5b31be6 TT |
459 | |
460 | /* Rebuild rq_arg head and tail. */ | |
461 | rqstp->rq_arg.head[0] = head->arg.head[0]; | |
462 | rqstp->rq_arg.tail[0] = head->arg.tail[0]; | |
463 | rqstp->rq_arg.len = head->arg.len; | |
464 | rqstp->rq_arg.buflen = head->arg.buflen; | |
465 | ||
02e7452d TT |
466 | /* Free the context */ |
467 | svc_rdma_put_context(head, 0); | |
468 | ||
d5b31be6 TT |
469 | /* XXX: What should this be? */ |
470 | rqstp->rq_prot = IPPROTO_MAX; | |
69500c43 | 471 | svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt); |
d5b31be6 | 472 | |
d5b31be6 TT |
473 | ret = rqstp->rq_arg.head[0].iov_len |
474 | + rqstp->rq_arg.page_len | |
475 | + rqstp->rq_arg.tail[0].iov_len; | |
476 | dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, " | |
477 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | |
478 | ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, | |
479 | rqstp->rq_arg.head[0].iov_len); | |
480 | ||
d5b31be6 TT |
481 | return ret; |
482 | } | |
483 | ||
484 | /* | |
485 | * Set up the rqstp thread context to point to the RQ buffer. If | |
486 | * necessary, pull additional data from the client with an RDMA_READ | |
487 | * request. | |
488 | */ | |
489 | int svc_rdma_recvfrom(struct svc_rqst *rqstp) | |
490 | { | |
491 | struct svc_xprt *xprt = rqstp->rq_xprt; | |
492 | struct svcxprt_rdma *rdma_xprt = | |
493 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | |
494 | struct svc_rdma_op_ctxt *ctxt = NULL; | |
495 | struct rpcrdma_msg *rmsgp; | |
496 | int ret = 0; | |
497 | int len; | |
498 | ||
499 | dprintk("svcrdma: rqstp=%p\n", rqstp); | |
500 | ||
24b8b447 | 501 | spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); |
d5b31be6 TT |
502 | if (!list_empty(&rdma_xprt->sc_read_complete_q)) { |
503 | ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, | |
504 | struct svc_rdma_op_ctxt, | |
505 | dto_q); | |
506 | list_del_init(&ctxt->dto_q); | |
24b8b447 | 507 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); |
d5b31be6 | 508 | return rdma_read_complete(rqstp, ctxt); |
0bf48289 | 509 | } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { |
d5b31be6 TT |
510 | ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, |
511 | struct svc_rdma_op_ctxt, | |
512 | dto_q); | |
513 | list_del_init(&ctxt->dto_q); | |
514 | } else { | |
515 | atomic_inc(&rdma_stat_rq_starve); | |
516 | clear_bit(XPT_DATA, &xprt->xpt_flags); | |
517 | ctxt = NULL; | |
518 | } | |
519 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); | |
520 | if (!ctxt) { | |
521 | /* This is the EAGAIN path. The svc_recv routine will | |
522 | * return -EAGAIN, the nfsd thread will go to call into | |
523 | * svc_recv again and we shouldn't be on the active | |
524 | * transport list | |
525 | */ | |
526 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | |
527 | goto close_out; | |
528 | ||
d5b31be6 TT |
529 | goto out; |
530 | } | |
531 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", | |
532 | ctxt, rdma_xprt, rqstp, ctxt->wc_status); | |
533 | BUG_ON(ctxt->wc_status != IB_WC_SUCCESS); | |
534 | atomic_inc(&rdma_stat_recv); | |
535 | ||
536 | /* Build up the XDR from the receive buffers. */ | |
537 | rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); | |
538 | ||
539 | /* Decode the RDMA header. */ | |
540 | len = svc_rdma_xdr_decode_req(&rmsgp, rqstp); | |
541 | rqstp->rq_xprt_hlen = len; | |
542 | ||
543 | /* If the request is invalid, reply with an error */ | |
544 | if (len < 0) { | |
545 | if (len == -ENOSYS) | |
008fdbc5 | 546 | svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); |
d5b31be6 TT |
547 | goto close_out; |
548 | } | |
549 | ||
d16d4009 | 550 | /* Read read-list data. */ |
0bf48289 | 551 | ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); |
d16d4009 TT |
552 | if (ret > 0) { |
553 | /* read-list posted, defer until data received from client. */ | |
b1721d2b | 554 | goto defer; |
0bf48289 | 555 | } else if (ret < 0) { |
d16d4009 TT |
556 | /* Post of read-list failed, free context. */ |
557 | svc_rdma_put_context(ctxt, 1); | |
558 | return 0; | |
559 | } | |
d5b31be6 | 560 | |
d5b31be6 TT |
561 | ret = rqstp->rq_arg.head[0].iov_len |
562 | + rqstp->rq_arg.page_len | |
563 | + rqstp->rq_arg.tail[0].iov_len; | |
564 | svc_rdma_put_context(ctxt, 0); | |
565 | out: | |
566 | dprintk("svcrdma: ret = %d, rq_arg.len =%d, " | |
567 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | |
568 | ret, rqstp->rq_arg.len, | |
569 | rqstp->rq_arg.head[0].iov_base, | |
570 | rqstp->rq_arg.head[0].iov_len); | |
571 | rqstp->rq_prot = IPPROTO_MAX; | |
572 | svc_xprt_copy_addrs(rqstp, xprt); | |
d5b31be6 TT |
573 | return ret; |
574 | ||
575 | close_out: | |
0e7f011a | 576 | if (ctxt) |
d5b31be6 | 577 | svc_rdma_put_context(ctxt, 1); |
d5b31be6 TT |
578 | dprintk("svcrdma: transport %p is closing\n", xprt); |
579 | /* | |
580 | * Set the close bit and enqueue it. svc_recv will see the | |
581 | * close bit and call svc_xprt_delete | |
582 | */ | |
583 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | |
b1721d2b | 584 | defer: |
d5b31be6 TT |
585 | return 0; |
586 | } |