Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
a0ce85f5 | 2 | /* |
96ceddea | 3 | * Copyright (c) 2015, 2017 Oracle. All rights reserved. |
a0ce85f5 CL |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. |
5 | */ | |
6 | ||
7 | /* Lightweight memory registration using Fast Memory Regions (FMR). | |
8 | * Referred to sometimes as MTHCAFMR mode. | |
9 | * | |
10 | * FMR uses synchronous memory registration and deregistration. | |
11 | * FMR registration is known to be fast, but FMR deregistration | |
12 | * can take tens of usecs to complete. | |
13 | */ | |
14 | ||
fc7fbb59 CL |
15 | /* Normal operation |
16 | * | |
17 | * A Memory Region is prepared for RDMA READ or WRITE using the | |
18 | * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is | |
19 | * finished, the Memory Region is unmapped using the ib_unmap_fmr | |
20 | * verb (fmr_op_unmap). | |
21 | */ | |
22 | ||
a0ce85f5 CL |
23 | #include "xprt_rdma.h" |
24 | ||
25 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | |
26 | # define RPCDBG_FACILITY RPCDBG_TRANS | |
27 | #endif | |
28 | ||
1c9351ee CL |
29 | /* Maximum scatter/gather per FMR */ |
30 | #define RPCRDMA_MAX_FMR_SGES (64) | |
31 | ||
d48b1d29 CL |
32 | /* Access mode of externally registered pages */ |
33 | enum { | |
34 | RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE | | |
35 | IB_ACCESS_REMOTE_READ, | |
36 | }; | |
37 | ||
b54054ca CL |
38 | bool |
39 | fmr_is_supported(struct rpcrdma_ia *ia) | |
40 | { | |
41 | if (!ia->ri_device->alloc_fmr) { | |
42 | pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n", | |
43 | ia->ri_device->name); | |
44 | return false; | |
45 | } | |
46 | return true; | |
47 | } | |
48 | ||
d48b1d29 | 49 | static int |
96ceddea | 50 | fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) |
d48b1d29 CL |
51 | { |
52 | static struct ib_fmr_attr fmr_attr = { | |
53 | .max_pages = RPCRDMA_MAX_FMR_SGES, | |
54 | .max_maps = 1, | |
55 | .page_shift = PAGE_SHIFT | |
56 | }; | |
57 | ||
96ceddea | 58 | mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, |
88975ebe | 59 | sizeof(u64), GFP_KERNEL); |
96ceddea | 60 | if (!mr->fmr.fm_physaddrs) |
d48b1d29 CL |
61 | goto out_free; |
62 | ||
96ceddea CL |
63 | mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, |
64 | sizeof(*mr->mr_sg), GFP_KERNEL); | |
65 | if (!mr->mr_sg) | |
d48b1d29 CL |
66 | goto out_free; |
67 | ||
96ceddea | 68 | sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES); |
d48b1d29 | 69 | |
96ceddea | 70 | mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, |
88975ebe | 71 | &fmr_attr); |
96ceddea | 72 | if (IS_ERR(mr->fmr.fm_mr)) |
d48b1d29 CL |
73 | goto out_fmr_err; |
74 | ||
75 | return 0; | |
76 | ||
77 | out_fmr_err: | |
78 | dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, | |
96ceddea | 79 | PTR_ERR(mr->fmr.fm_mr)); |
d48b1d29 CL |
80 | |
81 | out_free: | |
96ceddea CL |
82 | kfree(mr->mr_sg); |
83 | kfree(mr->fmr.fm_physaddrs); | |
d48b1d29 CL |
84 | return -ENOMEM; |
85 | } | |
86 | ||
ead3f26e | 87 | static int |
96ceddea | 88 | __fmr_unmap(struct rpcrdma_mr *mr) |
ead3f26e CL |
89 | { |
90 | LIST_HEAD(l); | |
38f1932e | 91 | int rc; |
ead3f26e | 92 | |
96ceddea | 93 | list_add(&mr->fmr.fm_mr->list, &l); |
38f1932e | 94 | rc = ib_unmap_fmr(&l); |
96ceddea | 95 | list_del(&mr->fmr.fm_mr->list); |
38f1932e | 96 | return rc; |
ead3f26e CL |
97 | } |
98 | ||
d48b1d29 | 99 | static void |
96ceddea | 100 | fmr_op_release_mr(struct rpcrdma_mr *mr) |
d48b1d29 | 101 | { |
505bbe64 | 102 | LIST_HEAD(unmap_list); |
d48b1d29 CL |
103 | int rc; |
104 | ||
9d6b0409 | 105 | /* Ensure MW is not on any rl_registered list */ |
96ceddea CL |
106 | if (!list_empty(&mr->mr_list)) |
107 | list_del(&mr->mr_list); | |
9d6b0409 | 108 | |
96ceddea CL |
109 | kfree(mr->fmr.fm_physaddrs); |
110 | kfree(mr->mr_sg); | |
d48b1d29 | 111 | |
505bbe64 CL |
112 | /* In case this one was left mapped, try to unmap it |
113 | * to prevent dealloc_fmr from failing with EBUSY | |
114 | */ | |
96ceddea | 115 | rc = __fmr_unmap(mr); |
505bbe64 CL |
116 | if (rc) |
117 | pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", | |
96ceddea | 118 | mr, rc); |
505bbe64 | 119 | |
96ceddea | 120 | rc = ib_dealloc_fmr(mr->fmr.fm_mr); |
d48b1d29 CL |
121 | if (rc) |
122 | pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", | |
96ceddea | 123 | mr, rc); |
e2ac236c | 124 | |
96ceddea | 125 | kfree(mr); |
d48b1d29 CL |
126 | } |
127 | ||
505bbe64 | 128 | /* Reset of a single FMR. |
ead3f26e CL |
129 | */ |
130 | static void | |
96ceddea | 131 | fmr_op_recover_mr(struct rpcrdma_mr *mr) |
ead3f26e | 132 | { |
96ceddea | 133 | struct rpcrdma_xprt *r_xprt = mr->mr_xprt; |
505bbe64 | 134 | int rc; |
ead3f26e | 135 | |
505bbe64 | 136 | /* ORDER: invalidate first */ |
96ceddea | 137 | rc = __fmr_unmap(mr); |
2ffc871a CL |
138 | if (rc) |
139 | goto out_release; | |
505bbe64 | 140 | |
ec12e479 CL |
141 | /* ORDER: then DMA unmap */ |
142 | rpcrdma_mr_unmap_and_put(mr); | |
143 | ||
505bbe64 | 144 | r_xprt->rx_stats.mrs_recovered++; |
2ffc871a CL |
145 | return; |
146 | ||
147 | out_release: | |
96ceddea | 148 | pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr); |
2ffc871a CL |
149 | r_xprt->rx_stats.mrs_orphaned++; |
150 | ||
2937fede | 151 | trace_xprtrdma_dma_unmap(mr); |
ec12e479 CL |
152 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
153 | mr->mr_sg, mr->mr_nents, mr->mr_dir); | |
154 | ||
96ceddea CL |
155 | spin_lock(&r_xprt->rx_buf.rb_mrlock); |
156 | list_del(&mr->mr_all); | |
157 | spin_unlock(&r_xprt->rx_buf.rb_mrlock); | |
2ffc871a | 158 | |
96ceddea | 159 | fmr_op_release_mr(mr); |
ead3f26e CL |
160 | } |
161 | ||
3968cb58 CL |
162 | static int |
163 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |
164 | struct rpcrdma_create_data_internal *cdata) | |
165 | { | |
87cfb9a0 CL |
166 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / |
167 | RPCRDMA_MAX_FMR_SGES); | |
3968cb58 CL |
168 | return 0; |
169 | } | |
170 | ||
1c9351ee CL |
171 | /* FMR mode conveys up to 64 pages of payload per chunk segment. |
172 | */ | |
173 | static size_t | |
174 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |
175 | { | |
176 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | |
94931746 | 177 | RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES); |
1c9351ee CL |
178 | } |
179 | ||
9c1b4d77 CL |
180 | /* Use the ib_map_phys_fmr() verb to register a memory region |
181 | * for remote access via RDMA READ or RDMA WRITE. | |
182 | */ | |
6748b0ca | 183 | static struct rpcrdma_mr_seg * |
9c1b4d77 | 184 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, |
96ceddea | 185 | int nsegs, bool writing, struct rpcrdma_mr **out) |
9c1b4d77 | 186 | { |
9c1b4d77 | 187 | struct rpcrdma_mr_seg *seg1 = seg; |
9c1b4d77 | 188 | int len, pageoff, i, rc; |
96ceddea | 189 | struct rpcrdma_mr *mr; |
fcdfb968 | 190 | u64 *dma_pages; |
fc7fbb59 | 191 | |
96ceddea CL |
192 | mr = rpcrdma_mr_get(r_xprt); |
193 | if (!mr) | |
9e679d5e | 194 | return ERR_PTR(-EAGAIN); |
9c1b4d77 CL |
195 | |
196 | pageoff = offset_in_page(seg1->mr_offset); | |
197 | seg1->mr_offset -= pageoff; /* start of page */ | |
198 | seg1->mr_len += pageoff; | |
199 | len = -pageoff; | |
200 | if (nsegs > RPCRDMA_MAX_FMR_SGES) | |
201 | nsegs = RPCRDMA_MAX_FMR_SGES; | |
202 | for (i = 0; i < nsegs;) { | |
fcdfb968 | 203 | if (seg->mr_page) |
96ceddea | 204 | sg_set_page(&mr->mr_sg[i], |
fcdfb968 CL |
205 | seg->mr_page, |
206 | seg->mr_len, | |
207 | offset_in_page(seg->mr_offset)); | |
208 | else | |
96ceddea | 209 | sg_set_buf(&mr->mr_sg[i], seg->mr_offset, |
fcdfb968 | 210 | seg->mr_len); |
9c1b4d77 CL |
211 | len += seg->mr_len; |
212 | ++seg; | |
213 | ++i; | |
214 | /* Check for holes */ | |
215 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | |
216 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | |
217 | break; | |
218 | } | |
96ceddea | 219 | mr->mr_dir = rpcrdma_data_dir(writing); |
fcdfb968 | 220 | |
96ceddea CL |
221 | mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, |
222 | mr->mr_sg, i, mr->mr_dir); | |
223 | if (!mr->mr_nents) | |
fcdfb968 | 224 | goto out_dmamap_err; |
9c1b4d77 | 225 | |
96ceddea CL |
226 | for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++) |
227 | dma_pages[i] = sg_dma_address(&mr->mr_sg[i]); | |
228 | rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents, | |
fcdfb968 | 229 | dma_pages[0]); |
9c1b4d77 CL |
230 | if (rc) |
231 | goto out_maperr; | |
232 | ||
96ceddea CL |
233 | mr->mr_handle = mr->fmr.fm_mr->rkey; |
234 | mr->mr_length = len; | |
235 | mr->mr_offset = dma_pages[0] + pageoff; | |
9d6b0409 | 236 | |
96ceddea | 237 | *out = mr; |
6748b0ca | 238 | return seg; |
fcdfb968 CL |
239 | |
240 | out_dmamap_err: | |
1f541895 | 241 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", |
96ceddea CL |
242 | mr->mr_sg, i); |
243 | rpcrdma_mr_put(mr); | |
6748b0ca | 244 | return ERR_PTR(-EIO); |
9c1b4d77 CL |
245 | |
246 | out_maperr: | |
fcdfb968 CL |
247 | pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", |
248 | len, (unsigned long long)dma_pages[0], | |
96ceddea | 249 | pageoff, mr->mr_nents, rc); |
ec12e479 | 250 | rpcrdma_mr_unmap_and_put(mr); |
6748b0ca | 251 | return ERR_PTR(-EIO); |
9c1b4d77 CL |
252 | } |
253 | ||
f2877623 CL |
254 | /* Post Send WR containing the RPC Call message. |
255 | */ | |
256 | static int | |
257 | fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) | |
258 | { | |
259 | struct ib_send_wr *bad_wr; | |
260 | ||
261 | return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr); | |
262 | } | |
263 | ||
7c7a5390 CL |
264 | /* Invalidate all memory regions that were registered for "req". |
265 | * | |
266 | * Sleeps until it is safe for the host CPU to access the | |
267 | * previously mapped memory regions. | |
9d6b0409 | 268 | * |
96ceddea | 269 | * Caller ensures that @mrs is not empty before the call. This |
451d26e1 | 270 | * function empties the list. |
7c7a5390 CL |
271 | */ |
272 | static void | |
96ceddea | 273 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs) |
7c7a5390 | 274 | { |
96ceddea | 275 | struct rpcrdma_mr *mr; |
7c7a5390 CL |
276 | LIST_HEAD(unmap_list); |
277 | int rc; | |
278 | ||
7c7a5390 CL |
279 | /* ORDER: Invalidate all of the req's MRs first |
280 | * | |
281 | * ib_unmap_fmr() is slow, so use a single call instead | |
505bbe64 | 282 | * of one call per mapped FMR. |
7c7a5390 | 283 | */ |
96ceddea | 284 | list_for_each_entry(mr, mrs, mr_list) { |
451d26e1 | 285 | dprintk("RPC: %s: unmapping fmr %p\n", |
96ceddea | 286 | __func__, &mr->fmr); |
2937fede | 287 | trace_xprtrdma_localinv(mr); |
96ceddea | 288 | list_add_tail(&mr->fmr.fm_mr->list, &unmap_list); |
451d26e1 | 289 | } |
c8b920bb | 290 | r_xprt->rx_stats.local_inv_needed++; |
7c7a5390 CL |
291 | rc = ib_unmap_fmr(&unmap_list); |
292 | if (rc) | |
505bbe64 | 293 | goto out_reset; |
7c7a5390 CL |
294 | |
295 | /* ORDER: Now DMA unmap all of the req's MRs, and return | |
296 | * them to the free MW list. | |
297 | */ | |
96ceddea CL |
298 | while (!list_empty(mrs)) { |
299 | mr = rpcrdma_mr_pop(mrs); | |
96ceddea | 300 | list_del(&mr->fmr.fm_mr->list); |
ec12e479 | 301 | rpcrdma_mr_unmap_and_put(mr); |
7c7a5390 CL |
302 | } |
303 | ||
505bbe64 CL |
304 | return; |
305 | ||
306 | out_reset: | |
307 | pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); | |
308 | ||
96ceddea CL |
309 | while (!list_empty(mrs)) { |
310 | mr = rpcrdma_mr_pop(mrs); | |
311 | list_del(&mr->fmr.fm_mr->list); | |
312 | fmr_op_recover_mr(mr); | |
505bbe64 | 313 | } |
7c7a5390 CL |
314 | } |
315 | ||
a0ce85f5 | 316 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { |
9c1b4d77 | 317 | .ro_map = fmr_op_map, |
f2877623 | 318 | .ro_send = fmr_op_send, |
7c7a5390 | 319 | .ro_unmap_sync = fmr_op_unmap_sync, |
505bbe64 | 320 | .ro_recover_mr = fmr_op_recover_mr, |
3968cb58 | 321 | .ro_open = fmr_op_open, |
1c9351ee | 322 | .ro_maxpages = fmr_op_maxpages, |
e2ac236c CL |
323 | .ro_init_mr = fmr_op_init_mr, |
324 | .ro_release_mr = fmr_op_release_mr, | |
a0ce85f5 | 325 | .ro_displayname = "fmr", |
c8b920bb | 326 | .ro_send_w_inv_ok = 0, |
a0ce85f5 | 327 | }; |