Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
a0ce85f5 CL |
2 | /* |
3 | * Copyright (c) 2015 Oracle. All rights reserved. | |
4 | * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. | |
5 | */ | |
6 | ||
7 | /* Lightweight memory registration using Fast Memory Regions (FMR). | |
8 | * Referred to sometimes as MTHCAFMR mode. | |
9 | * | |
10 | * FMR uses synchronous memory registration and deregistration. | |
11 | * FMR registration is known to be fast, but FMR deregistration | |
12 | * can take tens of usecs to complete. | |
13 | */ | |
14 | ||
fc7fbb59 CL |
15 | /* Normal operation |
16 | * | |
17 | * A Memory Region is prepared for RDMA READ or WRITE using the | |
18 | * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is | |
19 | * finished, the Memory Region is unmapped using the ib_unmap_fmr | |
20 | * verb (fmr_op_unmap). | |
21 | */ | |
22 | ||
a0ce85f5 CL |
23 | #include "xprt_rdma.h" |
24 | ||
25 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | |
26 | # define RPCDBG_FACILITY RPCDBG_TRANS | |
27 | #endif | |
28 | ||
1c9351ee CL |
29 | /* Maximum scatter/gather per FMR */ |
30 | #define RPCRDMA_MAX_FMR_SGES (64) | |
31 | ||
d48b1d29 CL |
32 | /* Access mode of externally registered pages */ |
33 | enum { | |
34 | RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE | | |
35 | IB_ACCESS_REMOTE_READ, | |
36 | }; | |
37 | ||
b54054ca CL |
38 | bool |
39 | fmr_is_supported(struct rpcrdma_ia *ia) | |
40 | { | |
41 | if (!ia->ri_device->alloc_fmr) { | |
42 | pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n", | |
43 | ia->ri_device->name); | |
44 | return false; | |
45 | } | |
46 | return true; | |
47 | } | |
48 | ||
d48b1d29 | 49 | static int |
e2ac236c | 50 | fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) |
d48b1d29 CL |
51 | { |
52 | static struct ib_fmr_attr fmr_attr = { | |
53 | .max_pages = RPCRDMA_MAX_FMR_SGES, | |
54 | .max_maps = 1, | |
55 | .page_shift = PAGE_SHIFT | |
56 | }; | |
57 | ||
88975ebe CL |
58 | mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, |
59 | sizeof(u64), GFP_KERNEL); | |
60 | if (!mw->fmr.fm_physaddrs) | |
d48b1d29 CL |
61 | goto out_free; |
62 | ||
63 | mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, | |
64 | sizeof(*mw->mw_sg), GFP_KERNEL); | |
65 | if (!mw->mw_sg) | |
66 | goto out_free; | |
67 | ||
68 | sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); | |
69 | ||
e2ac236c | 70 | mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, |
88975ebe CL |
71 | &fmr_attr); |
72 | if (IS_ERR(mw->fmr.fm_mr)) | |
d48b1d29 CL |
73 | goto out_fmr_err; |
74 | ||
75 | return 0; | |
76 | ||
77 | out_fmr_err: | |
78 | dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, | |
88975ebe | 79 | PTR_ERR(mw->fmr.fm_mr)); |
d48b1d29 CL |
80 | |
81 | out_free: | |
82 | kfree(mw->mw_sg); | |
88975ebe | 83 | kfree(mw->fmr.fm_physaddrs); |
d48b1d29 CL |
84 | return -ENOMEM; |
85 | } | |
86 | ||
ead3f26e CL |
87 | static int |
88 | __fmr_unmap(struct rpcrdma_mw *mw) | |
89 | { | |
90 | LIST_HEAD(l); | |
38f1932e | 91 | int rc; |
ead3f26e | 92 | |
88975ebe | 93 | list_add(&mw->fmr.fm_mr->list, &l); |
38f1932e | 94 | rc = ib_unmap_fmr(&l); |
e2f6ef09 | 95 | list_del(&mw->fmr.fm_mr->list); |
38f1932e | 96 | return rc; |
ead3f26e CL |
97 | } |
98 | ||
d48b1d29 | 99 | static void |
e2ac236c | 100 | fmr_op_release_mr(struct rpcrdma_mw *r) |
d48b1d29 | 101 | { |
505bbe64 | 102 | LIST_HEAD(unmap_list); |
d48b1d29 CL |
103 | int rc; |
104 | ||
9d6b0409 CL |
105 | /* Ensure MW is not on any rl_registered list */ |
106 | if (!list_empty(&r->mw_list)) | |
107 | list_del(&r->mw_list); | |
108 | ||
88975ebe | 109 | kfree(r->fmr.fm_physaddrs); |
d48b1d29 CL |
110 | kfree(r->mw_sg); |
111 | ||
505bbe64 CL |
112 | /* In case this one was left mapped, try to unmap it |
113 | * to prevent dealloc_fmr from failing with EBUSY | |
114 | */ | |
115 | rc = __fmr_unmap(r); | |
116 | if (rc) | |
117 | pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", | |
118 | r, rc); | |
119 | ||
88975ebe | 120 | rc = ib_dealloc_fmr(r->fmr.fm_mr); |
d48b1d29 CL |
121 | if (rc) |
122 | pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", | |
123 | r, rc); | |
e2ac236c CL |
124 | |
125 | kfree(r); | |
d48b1d29 CL |
126 | } |
127 | ||
505bbe64 | 128 | /* Reset of a single FMR. |
ead3f26e CL |
129 | */ |
130 | static void | |
505bbe64 | 131 | fmr_op_recover_mr(struct rpcrdma_mw *mw) |
ead3f26e | 132 | { |
505bbe64 CL |
133 | struct rpcrdma_xprt *r_xprt = mw->mw_xprt; |
134 | int rc; | |
ead3f26e | 135 | |
505bbe64 CL |
136 | /* ORDER: invalidate first */ |
137 | rc = __fmr_unmap(mw); | |
ead3f26e | 138 | |
505bbe64 CL |
139 | /* ORDER: then DMA unmap */ |
140 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, | |
141 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | |
2ffc871a CL |
142 | if (rc) |
143 | goto out_release; | |
505bbe64 CL |
144 | |
145 | rpcrdma_put_mw(r_xprt, mw); | |
146 | r_xprt->rx_stats.mrs_recovered++; | |
2ffc871a CL |
147 | return; |
148 | ||
149 | out_release: | |
150 | pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); | |
151 | r_xprt->rx_stats.mrs_orphaned++; | |
152 | ||
153 | spin_lock(&r_xprt->rx_buf.rb_mwlock); | |
154 | list_del(&mw->mw_all); | |
155 | spin_unlock(&r_xprt->rx_buf.rb_mwlock); | |
156 | ||
157 | fmr_op_release_mr(mw); | |
ead3f26e CL |
158 | } |
159 | ||
3968cb58 CL |
160 | static int |
161 | fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |
162 | struct rpcrdma_create_data_internal *cdata) | |
163 | { | |
87cfb9a0 CL |
164 | ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / |
165 | RPCRDMA_MAX_FMR_SGES); | |
3968cb58 CL |
166 | return 0; |
167 | } | |
168 | ||
1c9351ee CL |
169 | /* FMR mode conveys up to 64 pages of payload per chunk segment. |
170 | */ | |
171 | static size_t | |
172 | fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) | |
173 | { | |
174 | return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | |
94931746 | 175 | RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES); |
1c9351ee CL |
176 | } |
177 | ||
9c1b4d77 CL |
178 | /* Use the ib_map_phys_fmr() verb to register a memory region |
179 | * for remote access via RDMA READ or RDMA WRITE. | |
180 | */ | |
6748b0ca | 181 | static struct rpcrdma_mr_seg * |
9c1b4d77 | 182 | fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, |
9d6b0409 | 183 | int nsegs, bool writing, struct rpcrdma_mw **out) |
9c1b4d77 | 184 | { |
9c1b4d77 | 185 | struct rpcrdma_mr_seg *seg1 = seg; |
9c1b4d77 | 186 | int len, pageoff, i, rc; |
fc7fbb59 | 187 | struct rpcrdma_mw *mw; |
fcdfb968 | 188 | u64 *dma_pages; |
fc7fbb59 | 189 | |
505bbe64 CL |
190 | mw = rpcrdma_get_mw(r_xprt); |
191 | if (!mw) | |
6748b0ca | 192 | return ERR_PTR(-ENOBUFS); |
9c1b4d77 CL |
193 | |
194 | pageoff = offset_in_page(seg1->mr_offset); | |
195 | seg1->mr_offset -= pageoff; /* start of page */ | |
196 | seg1->mr_len += pageoff; | |
197 | len = -pageoff; | |
198 | if (nsegs > RPCRDMA_MAX_FMR_SGES) | |
199 | nsegs = RPCRDMA_MAX_FMR_SGES; | |
200 | for (i = 0; i < nsegs;) { | |
fcdfb968 CL |
201 | if (seg->mr_page) |
202 | sg_set_page(&mw->mw_sg[i], | |
203 | seg->mr_page, | |
204 | seg->mr_len, | |
205 | offset_in_page(seg->mr_offset)); | |
206 | else | |
207 | sg_set_buf(&mw->mw_sg[i], seg->mr_offset, | |
208 | seg->mr_len); | |
9c1b4d77 CL |
209 | len += seg->mr_len; |
210 | ++seg; | |
211 | ++i; | |
212 | /* Check for holes */ | |
213 | if ((i < nsegs && offset_in_page(seg->mr_offset)) || | |
214 | offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) | |
215 | break; | |
216 | } | |
fcdfb968 CL |
217 | mw->mw_dir = rpcrdma_data_dir(writing); |
218 | ||
1f541895 CL |
219 | mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, |
220 | mw->mw_sg, i, mw->mw_dir); | |
221 | if (!mw->mw_nents) | |
fcdfb968 | 222 | goto out_dmamap_err; |
9c1b4d77 | 223 | |
fcdfb968 CL |
224 | for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) |
225 | dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); | |
226 | rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, | |
227 | dma_pages[0]); | |
9c1b4d77 CL |
228 | if (rc) |
229 | goto out_maperr; | |
230 | ||
9d6b0409 CL |
231 | mw->mw_handle = mw->fmr.fm_mr->rkey; |
232 | mw->mw_length = len; | |
233 | mw->mw_offset = dma_pages[0] + pageoff; | |
234 | ||
235 | *out = mw; | |
6748b0ca | 236 | return seg; |
fcdfb968 CL |
237 | |
238 | out_dmamap_err: | |
1f541895 CL |
239 | pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", |
240 | mw->mw_sg, i); | |
241 | rpcrdma_put_mw(r_xprt, mw); | |
6748b0ca | 242 | return ERR_PTR(-EIO); |
9c1b4d77 CL |
243 | |
244 | out_maperr: | |
fcdfb968 CL |
245 | pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", |
246 | len, (unsigned long long)dma_pages[0], | |
247 | pageoff, mw->mw_nents, rc); | |
1f541895 CL |
248 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
249 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | |
250 | rpcrdma_put_mw(r_xprt, mw); | |
6748b0ca | 251 | return ERR_PTR(-EIO); |
9c1b4d77 CL |
252 | } |
253 | ||
7c7a5390 CL |
254 | /* Invalidate all memory regions that were registered for "req". |
255 | * | |
256 | * Sleeps until it is safe for the host CPU to access the | |
257 | * previously mapped memory regions. | |
9d6b0409 | 258 | * |
451d26e1 CL |
259 | * Caller ensures that @mws is not empty before the call. This |
260 | * function empties the list. | |
7c7a5390 CL |
261 | */ |
262 | static void | |
451d26e1 | 263 | fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) |
7c7a5390 | 264 | { |
e2f6ef09 | 265 | struct rpcrdma_mw *mw; |
7c7a5390 CL |
266 | LIST_HEAD(unmap_list); |
267 | int rc; | |
268 | ||
7c7a5390 CL |
269 | /* ORDER: Invalidate all of the req's MRs first |
270 | * | |
271 | * ib_unmap_fmr() is slow, so use a single call instead | |
505bbe64 | 272 | * of one call per mapped FMR. |
7c7a5390 | 273 | */ |
451d26e1 CL |
274 | list_for_each_entry(mw, mws, mw_list) { |
275 | dprintk("RPC: %s: unmapping fmr %p\n", | |
276 | __func__, &mw->fmr); | |
88975ebe | 277 | list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); |
451d26e1 | 278 | } |
c8b920bb | 279 | r_xprt->rx_stats.local_inv_needed++; |
7c7a5390 CL |
280 | rc = ib_unmap_fmr(&unmap_list); |
281 | if (rc) | |
505bbe64 | 282 | goto out_reset; |
7c7a5390 CL |
283 | |
284 | /* ORDER: Now DMA unmap all of the req's MRs, and return | |
285 | * them to the free MW list. | |
286 | */ | |
e2f6ef09 CL |
287 | while (!list_empty(mws)) { |
288 | mw = rpcrdma_pop_mw(mws); | |
289 | dprintk("RPC: %s: DMA unmapping fmr %p\n", | |
290 | __func__, &mw->fmr); | |
291 | list_del(&mw->fmr.fm_mr->list); | |
505bbe64 CL |
292 | ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, |
293 | mw->mw_sg, mw->mw_nents, mw->mw_dir); | |
294 | rpcrdma_put_mw(r_xprt, mw); | |
7c7a5390 CL |
295 | } |
296 | ||
505bbe64 CL |
297 | return; |
298 | ||
299 | out_reset: | |
300 | pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); | |
301 | ||
e2f6ef09 CL |
302 | while (!list_empty(mws)) { |
303 | mw = rpcrdma_pop_mw(mws); | |
304 | list_del(&mw->fmr.fm_mr->list); | |
505bbe64 | 305 | fmr_op_recover_mr(mw); |
505bbe64 | 306 | } |
7c7a5390 CL |
307 | } |
308 | ||
ead3f26e CL |
309 | /* Use a slow, safe mechanism to invalidate all memory regions |
310 | * that were registered for "req". | |
ead3f26e CL |
311 | */ |
312 | static void | |
313 | fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, | |
314 | bool sync) | |
315 | { | |
ead3f26e | 316 | struct rpcrdma_mw *mw; |
ead3f26e | 317 | |
9d6b0409 | 318 | while (!list_empty(&req->rl_registered)) { |
9a5c63e9 | 319 | mw = rpcrdma_pop_mw(&req->rl_registered); |
fcdfb968 | 320 | if (sync) |
505bbe64 | 321 | fmr_op_recover_mr(mw); |
fcdfb968 | 322 | else |
505bbe64 | 323 | rpcrdma_defer_mr_recovery(mw); |
ead3f26e CL |
324 | } |
325 | } | |
326 | ||
a0ce85f5 | 327 | const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { |
9c1b4d77 | 328 | .ro_map = fmr_op_map, |
7c7a5390 | 329 | .ro_unmap_sync = fmr_op_unmap_sync, |
ead3f26e | 330 | .ro_unmap_safe = fmr_op_unmap_safe, |
505bbe64 | 331 | .ro_recover_mr = fmr_op_recover_mr, |
3968cb58 | 332 | .ro_open = fmr_op_open, |
1c9351ee | 333 | .ro_maxpages = fmr_op_maxpages, |
e2ac236c CL |
334 | .ro_init_mr = fmr_op_init_mr, |
335 | .ro_release_mr = fmr_op_release_mr, | |
a0ce85f5 | 336 | .ro_displayname = "fmr", |
c8b920bb | 337 | .ro_send_w_inv_ok = 0, |
a0ce85f5 | 338 | }; |