xprtrdma: Simplify rpcrdma_deregister_external() synopsis
[linux-2.6-block.git] / net / sunrpc / xprtrdma / verbs.c
CommitLineData
f58851e6 1/*
c56c65fb
TT
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
f58851e6
TT
38 */
39
c56c65fb
TT
40/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
a6b7a407 50#include <linux/interrupt.h>
c56c65fb 51#include <linux/pci.h> /* for Tavor hack below */
5a0e3ad6 52#include <linux/slab.h>
c56c65fb 53
f58851e6
TT
54#include "xprt_rdma.h"
55
c56c65fb
TT
56/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
64/*
65 * internal functions
66 */
67
68/*
69 * handle replies in tasklet context, using a single, global list
70 * rdma tasklet function -- just turn around and call the func
71 * for all replies on the list
72 */
73
74static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
75static LIST_HEAD(rpcrdma_tasklets_g);
76
77static void
78rpcrdma_run_tasklet(unsigned long data)
79{
80 struct rpcrdma_rep *rep;
81 void (*func)(struct rpcrdma_rep *);
82 unsigned long flags;
83
84 data = data;
85 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
86 while (!list_empty(&rpcrdma_tasklets_g)) {
87 rep = list_entry(rpcrdma_tasklets_g.next,
88 struct rpcrdma_rep, rr_list);
89 list_del(&rep->rr_list);
90 func = rep->rr_func;
91 rep->rr_func = NULL;
92 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
93
94 if (func)
95 func(rep);
96 else
97 rpcrdma_recv_buffer_put(rep);
98
99 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
100 }
101 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
102}
103
104static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
105
106static inline void
107rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
108{
109 unsigned long flags;
110
111 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
112 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
113 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
114 tasklet_schedule(&rpcrdma_tasklet_g);
115}
116
117static void
118rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
119{
120 struct rpcrdma_ep *ep = context;
121
122 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
123 __func__, event->event, event->device->name, context);
124 if (ep->rep_connected == 1) {
125 ep->rep_connected = -EIO;
126 ep->rep_func(ep);
127 wake_up_all(&ep->rep_connect_wait);
128 }
129}
130
131static void
132rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
133{
134 struct rpcrdma_ep *ep = context;
135
136 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
137 __func__, event->event, event->device->name, context);
138 if (ep->rep_connected == 1) {
139 ep->rep_connected = -EIO;
140 ep->rep_func(ep);
141 wake_up_all(&ep->rep_connect_wait);
142 }
143}
144
145static inline
146void rpcrdma_event_process(struct ib_wc *wc)
147{
5c635e09 148 struct rpcrdma_mw *frmr;
c56c65fb
TT
149 struct rpcrdma_rep *rep =
150 (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
151
152 dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
153 __func__, rep, wc->status, wc->opcode, wc->byte_len);
154
b45ccfd2 155 if (!rep) /* send completion that we don't care about */
c56c65fb
TT
156 return;
157
158 if (IB_WC_SUCCESS != wc->status) {
5c635e09
TT
159 dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
160 __func__, wc->opcode, wc->status);
c56c65fb 161 rep->rr_len = ~0U;
5c635e09
TT
162 if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
163 rpcrdma_schedule_tasklet(rep);
c56c65fb
TT
164 return;
165 }
166
167 switch (wc->opcode) {
5c635e09
TT
168 case IB_WC_FAST_REG_MR:
169 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
170 frmr->r.frmr.state = FRMR_IS_VALID;
171 break;
172 case IB_WC_LOCAL_INV:
173 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
174 frmr->r.frmr.state = FRMR_IS_INVALID;
175 break;
c56c65fb
TT
176 case IB_WC_RECV:
177 rep->rr_len = wc->byte_len;
178 ib_dma_sync_single_for_cpu(
179 rdmab_to_ia(rep->rr_buffer)->ri_id->device,
180 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
181 /* Keep (only) the most recent credits, after check validity */
182 if (rep->rr_len >= 16) {
183 struct rpcrdma_msg *p =
184 (struct rpcrdma_msg *) rep->rr_base;
185 unsigned int credits = ntohl(p->rm_credit);
186 if (credits == 0) {
187 dprintk("RPC: %s: server"
188 " dropped credits to 0!\n", __func__);
189 /* don't deadlock */
190 credits = 1;
191 } else if (credits > rep->rr_buffer->rb_max_requests) {
192 dprintk("RPC: %s: server"
193 " over-crediting: %d (%d)\n",
194 __func__, credits,
195 rep->rr_buffer->rb_max_requests);
196 credits = rep->rr_buffer->rb_max_requests;
197 }
198 atomic_set(&rep->rr_buffer->rb_credits, credits);
199 }
c56c65fb
TT
200 rpcrdma_schedule_tasklet(rep);
201 break;
202 default:
203 dprintk("RPC: %s: unexpected WC event %X\n",
204 __func__, wc->opcode);
205 break;
206 }
207}
208
209static inline int
210rpcrdma_cq_poll(struct ib_cq *cq)
211{
212 struct ib_wc wc;
213 int rc;
214
215 for (;;) {
216 rc = ib_poll_cq(cq, 1, &wc);
217 if (rc < 0) {
218 dprintk("RPC: %s: ib_poll_cq failed %i\n",
219 __func__, rc);
220 return rc;
221 }
222 if (rc == 0)
223 break;
224
225 rpcrdma_event_process(&wc);
226 }
227
228 return 0;
229}
230
231/*
232 * rpcrdma_cq_event_upcall
233 *
b45ccfd2 234 * This upcall handles recv and send events.
c56c65fb
TT
235 * It is reentrant but processes single events in order to maintain
236 * ordering of receives to keep server credits.
237 *
238 * It is the responsibility of the scheduled tasklet to return
239 * recv buffers to the pool. NOTE: this affects synchronization of
240 * connection shutdown. That is, the structures required for
241 * the completion of the reply handler must remain intact until
242 * all memory has been reclaimed.
243 *
244 * Note that send events are suppressed and do not result in an upcall.
245 */
246static void
247rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
248{
249 int rc;
250
251 rc = rpcrdma_cq_poll(cq);
252 if (rc)
253 return;
254
255 rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
256 if (rc) {
257 dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
258 __func__, rc);
259 return;
260 }
261
262 rpcrdma_cq_poll(cq);
263}
264
265#ifdef RPC_DEBUG
266static const char * const conn[] = {
267 "address resolved",
268 "address error",
269 "route resolved",
270 "route error",
271 "connect request",
272 "connect response",
273 "connect error",
274 "unreachable",
275 "rejected",
276 "established",
277 "disconnected",
278 "device removal"
279};
280#endif
281
282static int
283rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
284{
285 struct rpcrdma_xprt *xprt = id->context;
286 struct rpcrdma_ia *ia = &xprt->rx_ia;
287 struct rpcrdma_ep *ep = &xprt->rx_ep;
ff0db049 288#ifdef RPC_DEBUG
c56c65fb 289 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
ff0db049 290#endif
c56c65fb
TT
291 struct ib_qp_attr attr;
292 struct ib_qp_init_attr iattr;
293 int connstate = 0;
294
295 switch (event->event) {
296 case RDMA_CM_EVENT_ADDR_RESOLVED:
297 case RDMA_CM_EVENT_ROUTE_RESOLVED:
5675add3 298 ia->ri_async_rc = 0;
c56c65fb
TT
299 complete(&ia->ri_done);
300 break;
301 case RDMA_CM_EVENT_ADDR_ERROR:
302 ia->ri_async_rc = -EHOSTUNREACH;
303 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
304 __func__, ep);
305 complete(&ia->ri_done);
306 break;
307 case RDMA_CM_EVENT_ROUTE_ERROR:
308 ia->ri_async_rc = -ENETUNREACH;
309 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
310 __func__, ep);
311 complete(&ia->ri_done);
312 break;
313 case RDMA_CM_EVENT_ESTABLISHED:
314 connstate = 1;
315 ib_query_qp(ia->ri_id->qp, &attr,
316 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
317 &iattr);
318 dprintk("RPC: %s: %d responder resources"
319 " (%d initiator)\n",
320 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
321 goto connected;
322 case RDMA_CM_EVENT_CONNECT_ERROR:
323 connstate = -ENOTCONN;
324 goto connected;
325 case RDMA_CM_EVENT_UNREACHABLE:
326 connstate = -ENETDOWN;
327 goto connected;
328 case RDMA_CM_EVENT_REJECTED:
329 connstate = -ECONNREFUSED;
330 goto connected;
331 case RDMA_CM_EVENT_DISCONNECTED:
332 connstate = -ECONNABORTED;
333 goto connected;
334 case RDMA_CM_EVENT_DEVICE_REMOVAL:
335 connstate = -ENODEV;
336connected:
21454aaa 337 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
c56c65fb
TT
338 __func__,
339 (event->event <= 11) ? conn[event->event] :
340 "unknown connection error",
21454aaa 341 &addr->sin_addr.s_addr,
c56c65fb
TT
342 ntohs(addr->sin_port),
343 ep, event->event);
344 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
345 dprintk("RPC: %s: %sconnected\n",
346 __func__, connstate > 0 ? "" : "dis");
347 ep->rep_connected = connstate;
348 ep->rep_func(ep);
349 wake_up_all(&ep->rep_connect_wait);
350 break;
351 default:
1a954051 352 dprintk("RPC: %s: unexpected CM event %d\n",
c56c65fb 353 __func__, event->event);
c56c65fb
TT
354 break;
355 }
356
b3cd8d45
TT
357#ifdef RPC_DEBUG
358 if (connstate == 1) {
359 int ird = attr.max_dest_rd_atomic;
360 int tird = ep->rep_remote_cma.responder_resources;
21454aaa 361 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
b3cd8d45 362 "on %s, memreg %d slots %d ird %d%s\n",
21454aaa 363 &addr->sin_addr.s_addr,
b3cd8d45
TT
364 ntohs(addr->sin_port),
365 ia->ri_id->device->name,
366 ia->ri_memreg_strategy,
367 xprt->rx_buf.rb_max_requests,
368 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
369 } else if (connstate < 0) {
21454aaa
HH
370 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
371 &addr->sin_addr.s_addr,
b3cd8d45
TT
372 ntohs(addr->sin_port),
373 connstate);
374 }
375#endif
376
c56c65fb
TT
377 return 0;
378}
379
380static struct rdma_cm_id *
381rpcrdma_create_id(struct rpcrdma_xprt *xprt,
382 struct rpcrdma_ia *ia, struct sockaddr *addr)
383{
384 struct rdma_cm_id *id;
385 int rc;
386
1a954051
TT
387 init_completion(&ia->ri_done);
388
b26f9b99 389 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
c56c65fb
TT
390 if (IS_ERR(id)) {
391 rc = PTR_ERR(id);
392 dprintk("RPC: %s: rdma_create_id() failed %i\n",
393 __func__, rc);
394 return id;
395 }
396
5675add3 397 ia->ri_async_rc = -ETIMEDOUT;
c56c65fb
TT
398 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
399 if (rc) {
400 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
401 __func__, rc);
402 goto out;
403 }
5675add3
TT
404 wait_for_completion_interruptible_timeout(&ia->ri_done,
405 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
c56c65fb
TT
406 rc = ia->ri_async_rc;
407 if (rc)
408 goto out;
409
5675add3 410 ia->ri_async_rc = -ETIMEDOUT;
c56c65fb
TT
411 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
412 if (rc) {
413 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
414 __func__, rc);
415 goto out;
416 }
5675add3
TT
417 wait_for_completion_interruptible_timeout(&ia->ri_done,
418 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
c56c65fb
TT
419 rc = ia->ri_async_rc;
420 if (rc)
421 goto out;
422
423 return id;
424
425out:
426 rdma_destroy_id(id);
427 return ERR_PTR(rc);
428}
429
430/*
431 * Drain any cq, prior to teardown.
432 */
433static void
434rpcrdma_clean_cq(struct ib_cq *cq)
435{
436 struct ib_wc wc;
437 int count = 0;
438
439 while (1 == ib_poll_cq(cq, 1, &wc))
440 ++count;
441
442 if (count)
443 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
444 __func__, count, wc.opcode);
445}
446
447/*
448 * Exported functions.
449 */
450
451/*
452 * Open and initialize an Interface Adapter.
453 * o initializes fields of struct rpcrdma_ia, including
454 * interface and provider attributes and protection zone.
455 */
456int
457rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
458{
bd7ed1d1
TT
459 int rc, mem_priv;
460 struct ib_device_attr devattr;
c56c65fb
TT
461 struct rpcrdma_ia *ia = &xprt->rx_ia;
462
c56c65fb
TT
463 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
464 if (IS_ERR(ia->ri_id)) {
465 rc = PTR_ERR(ia->ri_id);
466 goto out1;
467 }
468
469 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
470 if (IS_ERR(ia->ri_pd)) {
471 rc = PTR_ERR(ia->ri_pd);
472 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
473 __func__, rc);
474 goto out2;
475 }
476
bd7ed1d1
TT
477 /*
478 * Query the device to determine if the requested memory
479 * registration strategy is supported. If it isn't, set the
480 * strategy to a globally supported model.
481 */
482 rc = ib_query_device(ia->ri_id->device, &devattr);
483 if (rc) {
484 dprintk("RPC: %s: ib_query_device failed %d\n",
485 __func__, rc);
486 goto out2;
487 }
488
489 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
490 ia->ri_have_dma_lkey = 1;
491 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
492 }
493
f10eafd3 494 if (memreg == RPCRDMA_FRMR) {
3197d309
TT
495 /* Requires both frmr reg and local dma lkey */
496 if ((devattr.device_cap_flags &
497 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
498 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
3197d309 499 dprintk("RPC: %s: FRMR registration "
f10eafd3
CL
500 "not supported by HCA\n", __func__);
501 memreg = RPCRDMA_MTHCAFMR;
0fc6c4e7
SW
502 } else {
503 /* Mind the ia limit on FRMR page list depth */
504 ia->ri_max_frmr_depth = min_t(unsigned int,
505 RPCRDMA_MAX_DATA_SEGS,
506 devattr.max_fast_reg_page_list_len);
bd7ed1d1 507 }
f10eafd3
CL
508 }
509 if (memreg == RPCRDMA_MTHCAFMR) {
510 if (!ia->ri_id->device->alloc_fmr) {
511 dprintk("RPC: %s: MTHCAFMR registration "
512 "not supported by HCA\n", __func__);
513#if RPCRDMA_PERSISTENT_REGISTRATION
514 memreg = RPCRDMA_ALLPHYSICAL;
515#else
cdd9ade7 516 rc = -ENOMEM;
f10eafd3
CL
517 goto out2;
518#endif
519 }
bd7ed1d1
TT
520 }
521
c56c65fb
TT
522 /*
523 * Optionally obtain an underlying physical identity mapping in
524 * order to do a memory window-based bind. This base registration
525 * is protected from remote access - that is enabled only by binding
526 * for the specific bytes targeted during each RPC operation, and
527 * revoked after the corresponding completion similar to a storage
528 * adapter.
529 */
bd7ed1d1 530 switch (memreg) {
3197d309 531 case RPCRDMA_FRMR:
bd7ed1d1 532 break;
c56c65fb 533#if RPCRDMA_PERSISTENT_REGISTRATION
bd7ed1d1
TT
534 case RPCRDMA_ALLPHYSICAL:
535 mem_priv = IB_ACCESS_LOCAL_WRITE |
536 IB_ACCESS_REMOTE_WRITE |
537 IB_ACCESS_REMOTE_READ;
538 goto register_setup;
c56c65fb 539#endif
bd7ed1d1
TT
540 case RPCRDMA_MTHCAFMR:
541 if (ia->ri_have_dma_lkey)
c56c65fb 542 break;
bd7ed1d1 543 mem_priv = IB_ACCESS_LOCAL_WRITE;
b45ccfd2 544#if RPCRDMA_PERSISTENT_REGISTRATION
bd7ed1d1 545 register_setup:
b45ccfd2 546#endif
c56c65fb
TT
547 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
548 if (IS_ERR(ia->ri_bind_mem)) {
549 printk(KERN_ALERT "%s: ib_get_dma_mr for "
0ac531c1 550 "phys register failed with %lX\n",
c56c65fb 551 __func__, PTR_ERR(ia->ri_bind_mem));
0ac531c1
CL
552 rc = -ENOMEM;
553 goto out2;
c56c65fb 554 }
bd7ed1d1
TT
555 break;
556 default:
cdd9ade7
CL
557 printk(KERN_ERR "RPC: Unsupported memory "
558 "registration mode: %d\n", memreg);
559 rc = -ENOMEM;
bd7ed1d1 560 goto out2;
c56c65fb 561 }
bd7ed1d1
TT
562 dprintk("RPC: %s: memory registration strategy is %d\n",
563 __func__, memreg);
c56c65fb
TT
564
565 /* Else will do memory reg/dereg for each chunk */
566 ia->ri_memreg_strategy = memreg;
567
568 return 0;
569out2:
570 rdma_destroy_id(ia->ri_id);
fee08caf 571 ia->ri_id = NULL;
c56c65fb
TT
572out1:
573 return rc;
574}
575
576/*
577 * Clean up/close an IA.
578 * o if event handles and PD have been initialized, free them.
579 * o close the IA
580 */
581void
582rpcrdma_ia_close(struct rpcrdma_ia *ia)
583{
584 int rc;
585
586 dprintk("RPC: %s: entering\n", __func__);
587 if (ia->ri_bind_mem != NULL) {
588 rc = ib_dereg_mr(ia->ri_bind_mem);
589 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
590 __func__, rc);
591 }
fee08caf
TT
592 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
593 if (ia->ri_id->qp)
594 rdma_destroy_qp(ia->ri_id);
595 rdma_destroy_id(ia->ri_id);
596 ia->ri_id = NULL;
597 }
c56c65fb
TT
598 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
599 rc = ib_dealloc_pd(ia->ri_pd);
600 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
601 __func__, rc);
602 }
c56c65fb
TT
603}
604
605/*
606 * Create unconnected endpoint.
607 */
608int
609rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
610 struct rpcrdma_create_data_internal *cdata)
611{
612 struct ib_device_attr devattr;
5d40a8a5 613 int rc, err;
c56c65fb
TT
614
615 rc = ib_query_device(ia->ri_id->device, &devattr);
616 if (rc) {
617 dprintk("RPC: %s: ib_query_device failed %d\n",
618 __func__, rc);
619 return rc;
620 }
621
622 /* check provider's send/recv wr limits */
623 if (cdata->max_requests > devattr.max_qp_wr)
624 cdata->max_requests = devattr.max_qp_wr;
625
626 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
627 ep->rep_attr.qp_context = ep;
628 /* send_cq and recv_cq initialized below */
629 ep->rep_attr.srq = NULL;
630 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
631 switch (ia->ri_memreg_strategy) {
0fc6c4e7
SW
632 case RPCRDMA_FRMR: {
633 int depth = 7;
634
15cdc644
TT
635 /* Add room for frmr register and invalidate WRs.
636 * 1. FRMR reg WR for head
637 * 2. FRMR invalidate WR for head
0fc6c4e7
SW
638 * 3. N FRMR reg WRs for pagelist
639 * 4. N FRMR invalidate WRs for pagelist
15cdc644
TT
640 * 5. FRMR reg WR for tail
641 * 6. FRMR invalidate WR for tail
642 * 7. The RDMA_SEND WR
643 */
0fc6c4e7
SW
644
645 /* Calculate N if the device max FRMR depth is smaller than
646 * RPCRDMA_MAX_DATA_SEGS.
647 */
648 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
649 int delta = RPCRDMA_MAX_DATA_SEGS -
650 ia->ri_max_frmr_depth;
651
652 do {
653 depth += 2; /* FRMR reg + invalidate */
654 delta -= ia->ri_max_frmr_depth;
655 } while (delta > 0);
656
657 }
658 ep->rep_attr.cap.max_send_wr *= depth;
15cdc644 659 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
0fc6c4e7 660 cdata->max_requests = devattr.max_qp_wr / depth;
15cdc644
TT
661 if (!cdata->max_requests)
662 return -EINVAL;
0fc6c4e7
SW
663 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
664 depth;
15cdc644 665 }
3197d309 666 break;
0fc6c4e7 667 }
c56c65fb
TT
668 default:
669 break;
670 }
671 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
672 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
673 ep->rep_attr.cap.max_recv_sge = 1;
674 ep->rep_attr.cap.max_inline_data = 0;
675 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
676 ep->rep_attr.qp_type = IB_QPT_RC;
677 ep->rep_attr.port_num = ~0;
678
679 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
680 "iovs: send %d recv %d\n",
681 __func__,
682 ep->rep_attr.cap.max_send_wr,
683 ep->rep_attr.cap.max_recv_wr,
684 ep->rep_attr.cap.max_send_sge,
685 ep->rep_attr.cap.max_recv_sge);
686
687 /* set trigger for requesting send completion */
688 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
c56c65fb
TT
689 if (ep->rep_cqinit <= 2)
690 ep->rep_cqinit = 0;
691 INIT_CQCOUNT(ep);
692 ep->rep_ia = ia;
693 init_waitqueue_head(&ep->rep_connect_wait);
254f91e2 694 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
c56c65fb 695
c56c65fb
TT
696 ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
697 rpcrdma_cq_async_error_upcall, NULL,
698 ep->rep_attr.cap.max_recv_wr +
699 ep->rep_attr.cap.max_send_wr + 1, 0);
700 if (IS_ERR(ep->rep_cq)) {
701 rc = PTR_ERR(ep->rep_cq);
702 dprintk("RPC: %s: ib_create_cq failed: %i\n",
703 __func__, rc);
704 goto out1;
705 }
706
707 rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
708 if (rc) {
709 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
710 __func__, rc);
711 goto out2;
712 }
713
714 ep->rep_attr.send_cq = ep->rep_cq;
715 ep->rep_attr.recv_cq = ep->rep_cq;
716
717 /* Initialize cma parameters */
718
719 /* RPC/RDMA does not use private data */
720 ep->rep_remote_cma.private_data = NULL;
721 ep->rep_remote_cma.private_data_len = 0;
722
723 /* Client offers RDMA Read but does not initiate */
b334eaab 724 ep->rep_remote_cma.initiator_depth = 0;
03ff8821 725 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
b334eaab
TT
726 ep->rep_remote_cma.responder_resources = 32;
727 else
c56c65fb 728 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
c56c65fb
TT
729
730 ep->rep_remote_cma.retry_count = 7;
731 ep->rep_remote_cma.flow_control = 0;
732 ep->rep_remote_cma.rnr_retry_count = 0;
733
734 return 0;
735
736out2:
5d40a8a5
CL
737 err = ib_destroy_cq(ep->rep_cq);
738 if (err)
739 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
740 __func__, err);
c56c65fb
TT
741out1:
742 return rc;
743}
744
745/*
746 * rpcrdma_ep_destroy
747 *
748 * Disconnect and destroy endpoint. After this, the only
749 * valid operations on the ep are to free it (if dynamically
750 * allocated) or re-create it.
751 *
752 * The caller's error handling must be sure to not leak the endpoint
753 * if this function fails.
754 */
755int
756rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
757{
758 int rc;
759
760 dprintk("RPC: %s: entering, connected is %d\n",
761 __func__, ep->rep_connected);
762
254f91e2
CL
763 cancel_delayed_work_sync(&ep->rep_connect_worker);
764
c56c65fb
TT
765 if (ia->ri_id->qp) {
766 rc = rpcrdma_ep_disconnect(ep, ia);
767 if (rc)
768 dprintk("RPC: %s: rpcrdma_ep_disconnect"
769 " returned %i\n", __func__, rc);
fee08caf
TT
770 rdma_destroy_qp(ia->ri_id);
771 ia->ri_id->qp = NULL;
c56c65fb
TT
772 }
773
c56c65fb
TT
774 /* padding - could be done in rpcrdma_buffer_destroy... */
775 if (ep->rep_pad_mr) {
776 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
777 ep->rep_pad_mr = NULL;
778 }
779
c56c65fb
TT
780 rpcrdma_clean_cq(ep->rep_cq);
781 rc = ib_destroy_cq(ep->rep_cq);
782 if (rc)
783 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
784 __func__, rc);
785
786 return rc;
787}
788
789/*
790 * Connect unconnected endpoint.
791 */
792int
793rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
794{
795 struct rdma_cm_id *id;
796 int rc = 0;
797 int retry_count = 0;
c56c65fb 798
c055551e 799 if (ep->rep_connected != 0) {
c56c65fb
TT
800 struct rpcrdma_xprt *xprt;
801retry:
802 rc = rpcrdma_ep_disconnect(ep, ia);
803 if (rc && rc != -ENOTCONN)
804 dprintk("RPC: %s: rpcrdma_ep_disconnect"
805 " status %i\n", __func__, rc);
806 rpcrdma_clean_cq(ep->rep_cq);
807
808 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
809 id = rpcrdma_create_id(xprt, ia,
810 (struct sockaddr *)&xprt->rx_data.addr);
811 if (IS_ERR(id)) {
812 rc = PTR_ERR(id);
813 goto out;
814 }
815 /* TEMP TEMP TEMP - fail if new device:
816 * Deregister/remarshal *all* requests!
817 * Close and recreate adapter, pd, etc!
818 * Re-determine all attributes still sane!
819 * More stuff I haven't thought of!
820 * Rrrgh!
821 */
822 if (ia->ri_id->device != id->device) {
823 printk("RPC: %s: can't reconnect on "
824 "different device!\n", __func__);
825 rdma_destroy_id(id);
826 rc = -ENETDOWN;
827 goto out;
828 }
829 /* END TEMP */
1a954051 830 rdma_destroy_qp(ia->ri_id);
c56c65fb
TT
831 rdma_destroy_id(ia->ri_id);
832 ia->ri_id = id;
833 }
834
835 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
836 if (rc) {
837 dprintk("RPC: %s: rdma_create_qp failed %i\n",
838 __func__, rc);
839 goto out;
840 }
841
842/* XXX Tavor device performs badly with 2K MTU! */
843if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
844 struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
845 if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
846 (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
847 pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
848 struct ib_qp_attr attr = {
849 .path_mtu = IB_MTU_1024
850 };
851 rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
852 }
853}
854
c56c65fb
TT
855 ep->rep_connected = 0;
856
857 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
858 if (rc) {
859 dprintk("RPC: %s: rdma_connect() failed with %i\n",
860 __func__, rc);
861 goto out;
862 }
863
c56c65fb
TT
864 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
865
866 /*
867 * Check state. A non-peer reject indicates no listener
868 * (ECONNREFUSED), which may be a transient state. All
869 * others indicate a transport condition which has already
870 * undergone a best-effort.
871 */
f64f9e71
JP
872 if (ep->rep_connected == -ECONNREFUSED &&
873 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
c56c65fb
TT
874 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
875 goto retry;
876 }
877 if (ep->rep_connected <= 0) {
878 /* Sometimes, the only way to reliably connect to remote
879 * CMs is to use same nonzero values for ORD and IRD. */
b334eaab
TT
880 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
881 (ep->rep_remote_cma.responder_resources == 0 ||
882 ep->rep_remote_cma.initiator_depth !=
883 ep->rep_remote_cma.responder_resources)) {
884 if (ep->rep_remote_cma.responder_resources == 0)
885 ep->rep_remote_cma.responder_resources = 1;
886 ep->rep_remote_cma.initiator_depth =
887 ep->rep_remote_cma.responder_resources;
c56c65fb 888 goto retry;
b334eaab 889 }
c56c65fb
TT
890 rc = ep->rep_connected;
891 } else {
892 dprintk("RPC: %s: connected\n", __func__);
893 }
894
895out:
896 if (rc)
897 ep->rep_connected = rc;
898 return rc;
899}
900
901/*
902 * rpcrdma_ep_disconnect
903 *
904 * This is separate from destroy to facilitate the ability
905 * to reconnect without recreating the endpoint.
906 *
907 * This call is not reentrant, and must not be made in parallel
908 * on the same endpoint.
909 */
910int
911rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
912{
913 int rc;
914
915 rpcrdma_clean_cq(ep->rep_cq);
916 rc = rdma_disconnect(ia->ri_id);
917 if (!rc) {
918 /* returns without wait if not connected */
919 wait_event_interruptible(ep->rep_connect_wait,
920 ep->rep_connected != 1);
921 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
922 (ep->rep_connected == 1) ? "still " : "dis");
923 } else {
924 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
925 ep->rep_connected = rc;
926 }
927 return rc;
928}
929
930/*
931 * Initialize buffer memory
932 */
933int
934rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
935 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
936{
937 char *p;
938 size_t len;
939 int i, rc;
8d4ba034 940 struct rpcrdma_mw *r;
c56c65fb
TT
941
942 buf->rb_max_requests = cdata->max_requests;
943 spin_lock_init(&buf->rb_lock);
944 atomic_set(&buf->rb_credits, 1);
945
946 /* Need to allocate:
947 * 1. arrays for send and recv pointers
948 * 2. arrays of struct rpcrdma_req to fill in pointers
949 * 3. array of struct rpcrdma_rep for replies
950 * 4. padding, if any
3197d309 951 * 5. mw's, fmr's or frmr's, if any
c56c65fb
TT
952 * Send/recv buffers in req/rep need to be registered
953 */
954
955 len = buf->rb_max_requests *
956 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
957 len += cdata->padding;
958 switch (ia->ri_memreg_strategy) {
3197d309
TT
959 case RPCRDMA_FRMR:
960 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
961 sizeof(struct rpcrdma_mw);
962 break;
c56c65fb
TT
963 case RPCRDMA_MTHCAFMR:
964 /* TBD we are perhaps overallocating here */
965 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
966 sizeof(struct rpcrdma_mw);
967 break;
c56c65fb
TT
968 default:
969 break;
970 }
971
972 /* allocate 1, 4 and 5 in one shot */
973 p = kzalloc(len, GFP_KERNEL);
974 if (p == NULL) {
975 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
976 __func__, len);
977 rc = -ENOMEM;
978 goto out;
979 }
980 buf->rb_pool = p; /* for freeing it later */
981
982 buf->rb_send_bufs = (struct rpcrdma_req **) p;
983 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
984 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
985 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
986
987 /*
988 * Register the zeroed pad buffer, if any.
989 */
990 if (cdata->padding) {
991 rc = rpcrdma_register_internal(ia, p, cdata->padding,
992 &ep->rep_pad_mr, &ep->rep_pad);
993 if (rc)
994 goto out;
995 }
996 p += cdata->padding;
997
c56c65fb 998 INIT_LIST_HEAD(&buf->rb_mws);
8d4ba034 999 r = (struct rpcrdma_mw *)p;
c56c65fb 1000 switch (ia->ri_memreg_strategy) {
3197d309
TT
1001 case RPCRDMA_FRMR:
1002 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1003 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
0fc6c4e7 1004 ia->ri_max_frmr_depth);
3197d309
TT
1005 if (IS_ERR(r->r.frmr.fr_mr)) {
1006 rc = PTR_ERR(r->r.frmr.fr_mr);
1007 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1008 " failed %i\n", __func__, rc);
1009 goto out;
1010 }
0fc6c4e7
SW
1011 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1012 ia->ri_id->device,
1013 ia->ri_max_frmr_depth);
3197d309
TT
1014 if (IS_ERR(r->r.frmr.fr_pgl)) {
1015 rc = PTR_ERR(r->r.frmr.fr_pgl);
1016 dprintk("RPC: %s: "
1017 "ib_alloc_fast_reg_page_list "
1018 "failed %i\n", __func__, rc);
4034ba04
AA
1019
1020 ib_dereg_mr(r->r.frmr.fr_mr);
3197d309
TT
1021 goto out;
1022 }
1023 list_add(&r->mw_list, &buf->rb_mws);
1024 ++r;
1025 }
1026 break;
c56c65fb 1027 case RPCRDMA_MTHCAFMR:
c56c65fb
TT
1028 /* TBD we are perhaps overallocating here */
1029 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
8d4ba034
TT
1030 static struct ib_fmr_attr fa =
1031 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
c56c65fb
TT
1032 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1033 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1034 &fa);
1035 if (IS_ERR(r->r.fmr)) {
1036 rc = PTR_ERR(r->r.fmr);
1037 dprintk("RPC: %s: ib_alloc_fmr"
1038 " failed %i\n", __func__, rc);
1039 goto out;
1040 }
1041 list_add(&r->mw_list, &buf->rb_mws);
1042 ++r;
1043 }
c56c65fb 1044 break;
c56c65fb
TT
1045 default:
1046 break;
1047 }
1048
1049 /*
1050 * Allocate/init the request/reply buffers. Doing this
1051 * using kmalloc for now -- one for each buf.
1052 */
1053 for (i = 0; i < buf->rb_max_requests; i++) {
1054 struct rpcrdma_req *req;
1055 struct rpcrdma_rep *rep;
1056
1057 len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
1058 /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
1059 /* Typical ~2400b, so rounding up saves work later */
1060 if (len < 4096)
1061 len = 4096;
1062 req = kmalloc(len, GFP_KERNEL);
1063 if (req == NULL) {
1064 dprintk("RPC: %s: request buffer %d alloc"
1065 " failed\n", __func__, i);
1066 rc = -ENOMEM;
1067 goto out;
1068 }
1069 memset(req, 0, sizeof(struct rpcrdma_req));
1070 buf->rb_send_bufs[i] = req;
1071 buf->rb_send_bufs[i]->rl_buffer = buf;
1072
1073 rc = rpcrdma_register_internal(ia, req->rl_base,
1074 len - offsetof(struct rpcrdma_req, rl_base),
1075 &buf->rb_send_bufs[i]->rl_handle,
1076 &buf->rb_send_bufs[i]->rl_iov);
1077 if (rc)
1078 goto out;
1079
1080 buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
1081
1082 len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
1083 rep = kmalloc(len, GFP_KERNEL);
1084 if (rep == NULL) {
1085 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1086 __func__, i);
1087 rc = -ENOMEM;
1088 goto out;
1089 }
1090 memset(rep, 0, sizeof(struct rpcrdma_rep));
1091 buf->rb_recv_bufs[i] = rep;
1092 buf->rb_recv_bufs[i]->rr_buffer = buf;
c56c65fb
TT
1093
1094 rc = rpcrdma_register_internal(ia, rep->rr_base,
1095 len - offsetof(struct rpcrdma_rep, rr_base),
1096 &buf->rb_recv_bufs[i]->rr_handle,
1097 &buf->rb_recv_bufs[i]->rr_iov);
1098 if (rc)
1099 goto out;
1100
1101 }
1102 dprintk("RPC: %s: max_requests %d\n",
1103 __func__, buf->rb_max_requests);
1104 /* done */
1105 return 0;
1106out:
1107 rpcrdma_buffer_destroy(buf);
1108 return rc;
1109}
1110
1111/*
1112 * Unregister and destroy buffer memory. Need to deal with
1113 * partial initialization, so it's callable from failed create.
1114 * Must be called before destroying endpoint, as registrations
1115 * reference it.
1116 */
1117void
1118rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1119{
1120 int rc, i;
1121 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
8d4ba034 1122 struct rpcrdma_mw *r;
c56c65fb
TT
1123
1124 /* clean up in reverse order from create
1125 * 1. recv mr memory (mr free, then kfree)
c56c65fb
TT
1126 * 2. send mr memory (mr free, then kfree)
1127 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1128 * 4. arrays
1129 */
1130 dprintk("RPC: %s: entering\n", __func__);
1131
1132 for (i = 0; i < buf->rb_max_requests; i++) {
1133 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1134 rpcrdma_deregister_internal(ia,
1135 buf->rb_recv_bufs[i]->rr_handle,
1136 &buf->rb_recv_bufs[i]->rr_iov);
1137 kfree(buf->rb_recv_bufs[i]);
1138 }
1139 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
c56c65fb
TT
1140 rpcrdma_deregister_internal(ia,
1141 buf->rb_send_bufs[i]->rl_handle,
1142 &buf->rb_send_bufs[i]->rl_iov);
1143 kfree(buf->rb_send_bufs[i]);
1144 }
1145 }
1146
4034ba04
AA
1147 while (!list_empty(&buf->rb_mws)) {
1148 r = list_entry(buf->rb_mws.next,
1149 struct rpcrdma_mw, mw_list);
1150 list_del(&r->mw_list);
1151 switch (ia->ri_memreg_strategy) {
1152 case RPCRDMA_FRMR:
1153 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1154 if (rc)
1155 dprintk("RPC: %s:"
1156 " ib_dereg_mr"
1157 " failed %i\n",
1158 __func__, rc);
1159 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1160 break;
1161 case RPCRDMA_MTHCAFMR:
1162 rc = ib_dealloc_fmr(r->r.fmr);
1163 if (rc)
1164 dprintk("RPC: %s:"
1165 " ib_dealloc_fmr"
1166 " failed %i\n",
1167 __func__, rc);
1168 break;
4034ba04
AA
1169 default:
1170 break;
1171 }
1172 }
1173
c56c65fb
TT
1174 kfree(buf->rb_pool);
1175}
1176
1177/*
1178 * Get a set of request/reply buffers.
1179 *
1180 * Reply buffer (if needed) is attached to send buffer upon return.
1181 * Rule:
1182 * rb_send_index and rb_recv_index MUST always be pointing to the
1183 * *next* available buffer (non-NULL). They are incremented after
1184 * removing buffers, and decremented *before* returning them.
1185 */
1186struct rpcrdma_req *
1187rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1188{
1189 struct rpcrdma_req *req;
1190 unsigned long flags;
8d4ba034
TT
1191 int i;
1192 struct rpcrdma_mw *r;
c56c65fb
TT
1193
1194 spin_lock_irqsave(&buffers->rb_lock, flags);
1195 if (buffers->rb_send_index == buffers->rb_max_requests) {
1196 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1197 dprintk("RPC: %s: out of request buffers\n", __func__);
1198 return ((struct rpcrdma_req *)NULL);
1199 }
1200
1201 req = buffers->rb_send_bufs[buffers->rb_send_index];
1202 if (buffers->rb_send_index < buffers->rb_recv_index) {
1203 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1204 __func__,
1205 buffers->rb_recv_index - buffers->rb_send_index);
1206 req->rl_reply = NULL;
1207 } else {
1208 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1209 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1210 }
1211 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1212 if (!list_empty(&buffers->rb_mws)) {
8d4ba034 1213 i = RPCRDMA_MAX_SEGS - 1;
c56c65fb 1214 do {
c56c65fb
TT
1215 r = list_entry(buffers->rb_mws.next,
1216 struct rpcrdma_mw, mw_list);
1217 list_del(&r->mw_list);
1218 req->rl_segments[i].mr_chunk.rl_mw = r;
1219 } while (--i >= 0);
1220 }
1221 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1222 return req;
1223}
1224
1225/*
1226 * Put request/reply buffers back into pool.
1227 * Pre-decrement counter/array index.
1228 */
1229void
1230rpcrdma_buffer_put(struct rpcrdma_req *req)
1231{
1232 struct rpcrdma_buffer *buffers = req->rl_buffer;
1233 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1234 int i;
1235 unsigned long flags;
1236
1237 BUG_ON(req->rl_nchunks != 0);
1238 spin_lock_irqsave(&buffers->rb_lock, flags);
1239 buffers->rb_send_bufs[--buffers->rb_send_index] = req;
1240 req->rl_niovs = 0;
1241 if (req->rl_reply) {
1242 buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
c56c65fb
TT
1243 req->rl_reply->rr_func = NULL;
1244 req->rl_reply = NULL;
1245 }
1246 switch (ia->ri_memreg_strategy) {
3197d309 1247 case RPCRDMA_FRMR:
c56c65fb 1248 case RPCRDMA_MTHCAFMR:
c56c65fb
TT
1249 /*
1250 * Cycle mw's back in reverse order, and "spin" them.
1251 * This delays and scrambles reuse as much as possible.
1252 */
1253 i = 1;
1254 do {
1255 struct rpcrdma_mw **mw;
1256 mw = &req->rl_segments[i].mr_chunk.rl_mw;
1257 list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
1258 *mw = NULL;
1259 } while (++i < RPCRDMA_MAX_SEGS);
1260 list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
1261 &buffers->rb_mws);
1262 req->rl_segments[0].mr_chunk.rl_mw = NULL;
1263 break;
1264 default:
1265 break;
1266 }
1267 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1268}
1269
1270/*
1271 * Recover reply buffers from pool.
1272 * This happens when recovering from error conditions.
1273 * Post-increment counter/array index.
1274 */
1275void
1276rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1277{
1278 struct rpcrdma_buffer *buffers = req->rl_buffer;
1279 unsigned long flags;
1280
1281 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1282 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1283 spin_lock_irqsave(&buffers->rb_lock, flags);
1284 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1285 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1286 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1287 }
1288 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1289}
1290
1291/*
1292 * Put reply buffers back into pool when not attached to
b45ccfd2 1293 * request. This happens in error conditions.
c56c65fb
TT
1294 */
1295void
1296rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1297{
1298 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1299 unsigned long flags;
1300
1301 rep->rr_func = NULL;
1302 spin_lock_irqsave(&buffers->rb_lock, flags);
1303 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1304 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1305}
1306
1307/*
1308 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1309 */
1310
1311int
1312rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1313 struct ib_mr **mrp, struct ib_sge *iov)
1314{
1315 struct ib_phys_buf ipb;
1316 struct ib_mr *mr;
1317 int rc;
1318
1319 /*
1320 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1321 */
1322 iov->addr = ib_dma_map_single(ia->ri_id->device,
1323 va, len, DMA_BIDIRECTIONAL);
1324 iov->length = len;
1325
bd7ed1d1
TT
1326 if (ia->ri_have_dma_lkey) {
1327 *mrp = NULL;
1328 iov->lkey = ia->ri_dma_lkey;
1329 return 0;
1330 } else if (ia->ri_bind_mem != NULL) {
c56c65fb
TT
1331 *mrp = NULL;
1332 iov->lkey = ia->ri_bind_mem->lkey;
1333 return 0;
1334 }
1335
1336 ipb.addr = iov->addr;
1337 ipb.size = iov->length;
1338 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1339 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1340
1341 dprintk("RPC: %s: phys convert: 0x%llx "
1342 "registered 0x%llx length %d\n",
a56daeb7
AM
1343 __func__, (unsigned long long)ipb.addr,
1344 (unsigned long long)iov->addr, len);
c56c65fb
TT
1345
1346 if (IS_ERR(mr)) {
1347 *mrp = NULL;
1348 rc = PTR_ERR(mr);
1349 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1350 } else {
1351 *mrp = mr;
1352 iov->lkey = mr->lkey;
1353 rc = 0;
1354 }
1355
1356 return rc;
1357}
1358
1359int
1360rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1361 struct ib_mr *mr, struct ib_sge *iov)
1362{
1363 int rc;
1364
1365 ib_dma_unmap_single(ia->ri_id->device,
1366 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1367
1368 if (NULL == mr)
1369 return 0;
1370
1371 rc = ib_dereg_mr(mr);
1372 if (rc)
1373 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1374 return rc;
1375}
1376
1377/*
1378 * Wrappers for chunk registration, shared by read/write chunk code.
1379 */
1380
1381static void
1382rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1383{
1384 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1385 seg->mr_dmalen = seg->mr_len;
1386 if (seg->mr_page)
1387 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1388 seg->mr_page, offset_in_page(seg->mr_offset),
1389 seg->mr_dmalen, seg->mr_dir);
1390 else
1391 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1392 seg->mr_offset,
1393 seg->mr_dmalen, seg->mr_dir);
5c635e09
TT
1394 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1395 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1396 __func__,
986d4abb
RD
1397 (unsigned long long)seg->mr_dma,
1398 seg->mr_offset, seg->mr_dmalen);
5c635e09 1399 }
c56c65fb
TT
1400}
1401
1402static void
1403rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1404{
1405 if (seg->mr_page)
1406 ib_dma_unmap_page(ia->ri_id->device,
1407 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1408 else
1409 ib_dma_unmap_single(ia->ri_id->device,
1410 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1411}
1412
3197d309
TT
1413static int
1414rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1415 int *nsegs, int writing, struct rpcrdma_ia *ia,
1416 struct rpcrdma_xprt *r_xprt)
1417{
1418 struct rpcrdma_mr_seg *seg1 = seg;
5c635e09
TT
1419 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1420
3197d309
TT
1421 u8 key;
1422 int len, pageoff;
1423 int i, rc;
9b78145c
TT
1424 int seg_len;
1425 u64 pa;
1426 int page_no;
3197d309
TT
1427
1428 pageoff = offset_in_page(seg1->mr_offset);
1429 seg1->mr_offset -= pageoff; /* start of page */
1430 seg1->mr_len += pageoff;
1431 len = -pageoff;
0fc6c4e7
SW
1432 if (*nsegs > ia->ri_max_frmr_depth)
1433 *nsegs = ia->ri_max_frmr_depth;
9b78145c 1434 for (page_no = i = 0; i < *nsegs;) {
3197d309 1435 rpcrdma_map_one(ia, seg, writing);
9b78145c
TT
1436 pa = seg->mr_dma;
1437 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
1438 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
1439 page_list[page_no++] = pa;
1440 pa += PAGE_SIZE;
1441 }
3197d309
TT
1442 len += seg->mr_len;
1443 ++seg;
1444 ++i;
1445 /* Check for holes */
1446 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1447 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1448 break;
1449 }
1450 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1451 __func__, seg1->mr_chunk.rl_mw, i);
1452
5c635e09
TT
1453 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1454 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1455 __func__,
1456 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1457 /* Invalidate before using. */
1458 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1459 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1460 invalidate_wr.next = &frmr_wr;
1461 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1462 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1463 invalidate_wr.ex.invalidate_rkey =
1464 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1465 DECR_CQCOUNT(&r_xprt->rx_ep);
1466 post_wr = &invalidate_wr;
1467 } else
1468 post_wr = &frmr_wr;
1469
3197d309
TT
1470 /* Bump the key */
1471 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1472 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1473
1474 /* Prepare FRMR WR */
1475 memset(&frmr_wr, 0, sizeof frmr_wr);
5c635e09 1476 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
3197d309 1477 frmr_wr.opcode = IB_WR_FAST_REG_MR;
5c635e09 1478 frmr_wr.send_flags = IB_SEND_SIGNALED;
7a8b80eb 1479 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
3197d309 1480 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
9b78145c 1481 frmr_wr.wr.fast_reg.page_list_len = page_no;
3197d309 1482 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
9b78145c 1483 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
5c635e09 1484 BUG_ON(frmr_wr.wr.fast_reg.length < len);
3197d309 1485 frmr_wr.wr.fast_reg.access_flags = (writing ?
68743082
VP
1486 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1487 IB_ACCESS_REMOTE_READ);
3197d309
TT
1488 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1489 DECR_CQCOUNT(&r_xprt->rx_ep);
1490
5c635e09 1491 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
3197d309
TT
1492
1493 if (rc) {
1494 dprintk("RPC: %s: failed ib_post_send for register,"
1495 " status %i\n", __func__, rc);
1496 while (i--)
1497 rpcrdma_unmap_one(ia, --seg);
1498 } else {
1499 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1500 seg1->mr_base = seg1->mr_dma + pageoff;
1501 seg1->mr_nsegs = i;
1502 seg1->mr_len = len;
1503 }
1504 *nsegs = i;
1505 return rc;
1506}
1507
1508static int
1509rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1510 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1511{
1512 struct rpcrdma_mr_seg *seg1 = seg;
1513 struct ib_send_wr invalidate_wr, *bad_wr;
1514 int rc;
1515
1516 while (seg1->mr_nsegs--)
1517 rpcrdma_unmap_one(ia, seg++);
1518
1519 memset(&invalidate_wr, 0, sizeof invalidate_wr);
5c635e09 1520 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
3197d309 1521 invalidate_wr.opcode = IB_WR_LOCAL_INV;
5c635e09 1522 invalidate_wr.send_flags = IB_SEND_SIGNALED;
3197d309
TT
1523 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1524 DECR_CQCOUNT(&r_xprt->rx_ep);
1525
1526 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1527 if (rc)
1528 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1529 " status %i\n", __func__, rc);
1530 return rc;
1531}
1532
8d4ba034
TT
1533static int
1534rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1535 int *nsegs, int writing, struct rpcrdma_ia *ia)
1536{
1537 struct rpcrdma_mr_seg *seg1 = seg;
1538 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1539 int len, pageoff, i, rc;
1540
1541 pageoff = offset_in_page(seg1->mr_offset);
1542 seg1->mr_offset -= pageoff; /* start of page */
1543 seg1->mr_len += pageoff;
1544 len = -pageoff;
1545 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1546 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1547 for (i = 0; i < *nsegs;) {
1548 rpcrdma_map_one(ia, seg, writing);
1549 physaddrs[i] = seg->mr_dma;
1550 len += seg->mr_len;
1551 ++seg;
1552 ++i;
1553 /* Check for holes */
1554 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1555 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1556 break;
1557 }
1558 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1559 physaddrs, i, seg1->mr_dma);
1560 if (rc) {
1561 dprintk("RPC: %s: failed ib_map_phys_fmr "
1562 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1563 len, (unsigned long long)seg1->mr_dma,
1564 pageoff, i, rc);
1565 while (i--)
1566 rpcrdma_unmap_one(ia, --seg);
1567 } else {
1568 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1569 seg1->mr_base = seg1->mr_dma + pageoff;
1570 seg1->mr_nsegs = i;
1571 seg1->mr_len = len;
1572 }
1573 *nsegs = i;
1574 return rc;
1575}
1576
1577static int
1578rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1579 struct rpcrdma_ia *ia)
1580{
1581 struct rpcrdma_mr_seg *seg1 = seg;
1582 LIST_HEAD(l);
1583 int rc;
1584
1585 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1586 rc = ib_unmap_fmr(&l);
1587 while (seg1->mr_nsegs--)
1588 rpcrdma_unmap_one(ia, seg++);
1589 if (rc)
1590 dprintk("RPC: %s: failed ib_unmap_fmr,"
1591 " status %i\n", __func__, rc);
1592 return rc;
1593}
1594
c56c65fb
TT
1595int
1596rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1597 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1598{
1599 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
c56c65fb
TT
1600 int rc = 0;
1601
1602 switch (ia->ri_memreg_strategy) {
1603
1604#if RPCRDMA_PERSISTENT_REGISTRATION
1605 case RPCRDMA_ALLPHYSICAL:
1606 rpcrdma_map_one(ia, seg, writing);
1607 seg->mr_rkey = ia->ri_bind_mem->rkey;
1608 seg->mr_base = seg->mr_dma;
1609 seg->mr_nsegs = 1;
1610 nsegs = 1;
1611 break;
1612#endif
1613
3197d309
TT
1614 /* Registration using frmr registration */
1615 case RPCRDMA_FRMR:
1616 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1617 break;
1618
8d4ba034 1619 /* Registration using fmr memory registration */
c56c65fb 1620 case RPCRDMA_MTHCAFMR:
8d4ba034 1621 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
c56c65fb
TT
1622 break;
1623
c56c65fb 1624 default:
0ac531c1 1625 return -1;
c56c65fb
TT
1626 }
1627 if (rc)
1628 return -1;
1629
1630 return nsegs;
1631}
1632
1633int
1634rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
13c9ff8f 1635 struct rpcrdma_xprt *r_xprt)
c56c65fb
TT
1636{
1637 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
c56c65fb
TT
1638 int nsegs = seg->mr_nsegs, rc;
1639
1640 switch (ia->ri_memreg_strategy) {
1641
1642#if RPCRDMA_PERSISTENT_REGISTRATION
1643 case RPCRDMA_ALLPHYSICAL:
1644 BUG_ON(nsegs != 1);
1645 rpcrdma_unmap_one(ia, seg);
1646 rc = 0;
1647 break;
1648#endif
1649
3197d309
TT
1650 case RPCRDMA_FRMR:
1651 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1652 break;
1653
c56c65fb 1654 case RPCRDMA_MTHCAFMR:
8d4ba034 1655 rc = rpcrdma_deregister_fmr_external(seg, ia);
c56c65fb
TT
1656 break;
1657
c56c65fb 1658 default:
c56c65fb
TT
1659 break;
1660 }
c56c65fb
TT
1661 return nsegs;
1662}
1663
1664/*
1665 * Prepost any receive buffer, then post send.
1666 *
1667 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1668 */
1669int
1670rpcrdma_ep_post(struct rpcrdma_ia *ia,
1671 struct rpcrdma_ep *ep,
1672 struct rpcrdma_req *req)
1673{
1674 struct ib_send_wr send_wr, *send_wr_fail;
1675 struct rpcrdma_rep *rep = req->rl_reply;
1676 int rc;
1677
1678 if (rep) {
1679 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1680 if (rc)
1681 goto out;
1682 req->rl_reply = NULL;
1683 }
1684
1685 send_wr.next = NULL;
1686 send_wr.wr_id = 0ULL; /* no send cookie */
1687 send_wr.sg_list = req->rl_send_iov;
1688 send_wr.num_sge = req->rl_niovs;
1689 send_wr.opcode = IB_WR_SEND;
c56c65fb
TT
1690 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1691 ib_dma_sync_single_for_device(ia->ri_id->device,
1692 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1693 DMA_TO_DEVICE);
1694 ib_dma_sync_single_for_device(ia->ri_id->device,
1695 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1696 DMA_TO_DEVICE);
1697 ib_dma_sync_single_for_device(ia->ri_id->device,
1698 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1699 DMA_TO_DEVICE);
1700
1701 if (DECR_CQCOUNT(ep) > 0)
1702 send_wr.send_flags = 0;
1703 else { /* Provider must take a send completion every now and then */
1704 INIT_CQCOUNT(ep);
1705 send_wr.send_flags = IB_SEND_SIGNALED;
1706 }
1707
1708 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1709 if (rc)
1710 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1711 rc);
1712out:
1713 return rc;
1714}
1715
1716/*
1717 * (Re)post a receive buffer.
1718 */
1719int
1720rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1721 struct rpcrdma_ep *ep,
1722 struct rpcrdma_rep *rep)
1723{
1724 struct ib_recv_wr recv_wr, *recv_wr_fail;
1725 int rc;
1726
1727 recv_wr.next = NULL;
1728 recv_wr.wr_id = (u64) (unsigned long) rep;
1729 recv_wr.sg_list = &rep->rr_iov;
1730 recv_wr.num_sge = 1;
1731
1732 ib_dma_sync_single_for_cpu(ia->ri_id->device,
1733 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
1734
1735 DECR_CQCOUNT(ep);
1736 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1737
1738 if (rc)
1739 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1740 rc);
1741 return rc;
1742}