Commit | Line | Data |
---|---|---|
e126ba97 | 1 | /* |
6cf0a15f | 2 | * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. |
90da7dc8 | 3 | * Copyright (c) 2020, Intel Corporation. All rights reserved. |
e126ba97 EC |
4 | * |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the | |
9 | * OpenIB.org BSD license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or | |
12 | * without modification, are permitted provided that the following | |
13 | * conditions are met: | |
14 | * | |
15 | * - Redistributions of source code must retain the above | |
16 | * copyright notice, this list of conditions and the following | |
17 | * disclaimer. | |
18 | * | |
19 | * - Redistributions in binary form must reproduce the above | |
20 | * copyright notice, this list of conditions and the following | |
21 | * disclaimer in the documentation and/or other materials | |
22 | * provided with the distribution. | |
23 | * | |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
31 | * SOFTWARE. | |
32 | */ | |
33 | ||
34 | ||
35 | #include <linux/kref.h> | |
36 | #include <linux/random.h> | |
37 | #include <linux/debugfs.h> | |
38 | #include <linux/export.h> | |
746b5583 | 39 | #include <linux/delay.h> |
90da7dc8 JX |
40 | #include <linux/dma-buf.h> |
41 | #include <linux/dma-resv.h> | |
e126ba97 | 42 | #include <rdma/ib_umem.h> |
b4cfe447 | 43 | #include <rdma/ib_umem_odp.h> |
968e78dd | 44 | #include <rdma/ib_verbs.h> |
e126ba97 EC |
45 | #include "mlx5_ib.h" |
46 | ||
f22c30aa JG |
47 | /* |
48 | * We can't use an array for xlt_emergency_page because dma_map_single doesn't | |
49 | * work on kernel modules memory | |
50 | */ | |
8010d74b | 51 | void *xlt_emergency_page; |
f22c30aa JG |
52 | static DEFINE_MUTEX(xlt_emergency_page_mutex); |
53 | ||
e126ba97 | 54 | enum { |
746b5583 | 55 | MAX_PENDING_REG_MR = 8, |
e126ba97 EC |
56 | }; |
57 | ||
832a6b06 | 58 | #define MLX5_UMR_ALIGN 2048 |
fe45f827 | 59 | |
fc6a9f86 SM |
60 | static void |
61 | create_mkey_callback(int status, struct mlx5_async_work *context); | |
ef3642c4 JG |
62 | static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, |
63 | u64 iova, int access_flags, | |
64 | unsigned int page_size, bool populate); | |
fc6a9f86 | 65 | |
5eb29f0d JG |
66 | static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, |
67 | struct ib_pd *pd) | |
68 | { | |
69 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
70 | ||
71 | MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); | |
72 | MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); | |
73 | MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); | |
74 | MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); | |
75 | MLX5_SET(mkc, mkc, lr, 1); | |
76 | ||
77 | if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) | |
78 | MLX5_SET(mkc, mkc, relaxed_ordering_write, | |
79 | !!(acc & IB_ACCESS_RELAXED_ORDERING)); | |
80 | if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) | |
81 | MLX5_SET(mkc, mkc, relaxed_ordering_read, | |
82 | !!(acc & IB_ACCESS_RELAXED_ORDERING)); | |
83 | ||
84 | MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); | |
85 | MLX5_SET(mkc, mkc, qpn, 0xffffff); | |
86 | MLX5_SET64(mkc, mkc, start_addr, start_addr); | |
87 | } | |
88 | ||
fc6a9f86 SM |
89 | static void |
90 | assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, | |
91 | u32 *in) | |
92 | { | |
f743ff3b | 93 | u8 key = atomic_inc_return(&dev->mkey_var); |
fc6a9f86 | 94 | void *mkc; |
fc6a9f86 SM |
95 | |
96 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); | |
97 | MLX5_SET(mkc, mkc, mkey_7_0, key); | |
98 | mkey->key = key; | |
99 | } | |
100 | ||
101 | static int | |
102 | mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, | |
103 | u32 *in, int inlen) | |
104 | { | |
105 | assign_mkey_variant(dev, mkey, in); | |
106 | return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen); | |
107 | } | |
108 | ||
109 | static int | |
110 | mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, | |
111 | struct mlx5_core_mkey *mkey, | |
112 | struct mlx5_async_ctx *async_ctx, | |
113 | u32 *in, int inlen, u32 *out, int outlen, | |
114 | struct mlx5_async_work *context) | |
115 | { | |
a3cfdd39 | 116 | MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); |
fc6a9f86 | 117 | assign_mkey_variant(dev, mkey, in); |
a3cfdd39 MG |
118 | return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, |
119 | create_mkey_callback, context); | |
fc6a9f86 SM |
120 | } |
121 | ||
eeea6953 LR |
122 | static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); |
123 | static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); | |
8b7ff7f3 | 124 | static int mr_cache_max_order(struct mlx5_ib_dev *dev); |
1c78a21a | 125 | static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent); |
c8d75a98 MD |
126 | |
127 | static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) | |
128 | { | |
129 | return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); | |
130 | } | |
131 | ||
b4cfe447 HE |
132 | static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) |
133 | { | |
806b101b | 134 | WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); |
b4cfe447 | 135 | |
806b101b | 136 | return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); |
b4cfe447 HE |
137 | } |
138 | ||
fc6a9f86 | 139 | static void create_mkey_callback(int status, struct mlx5_async_work *context) |
746b5583 | 140 | { |
e355477e JG |
141 | struct mlx5_ib_mr *mr = |
142 | container_of(context, struct mlx5_ib_mr, cb_work); | |
b91e1751 | 143 | struct mlx5_cache_ent *ent = mr->cache_ent; |
ca991a7d | 144 | struct mlx5_ib_dev *dev = ent->dev; |
746b5583 EC |
145 | unsigned long flags; |
146 | ||
746b5583 EC |
147 | if (status) { |
148 | mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); | |
149 | kfree(mr); | |
b9358bdb JG |
150 | spin_lock_irqsave(&ent->lock, flags); |
151 | ent->pending--; | |
152 | WRITE_ONCE(dev->fill_delay, 1); | |
153 | spin_unlock_irqrestore(&ent->lock, flags); | |
746b5583 EC |
154 | mod_timer(&dev->delay_timer, jiffies + HZ); |
155 | return; | |
156 | } | |
157 | ||
aa8e08d2 | 158 | mr->mmkey.type = MLX5_MKEY_MR; |
54c62e13 SM |
159 | mr->mmkey.key |= mlx5_idx_to_mkey( |
160 | MLX5_GET(create_mkey_out, mr->out, mkey_index)); | |
db72438c | 161 | init_waitqueue_head(&mr->mmkey.wait); |
746b5583 | 162 | |
b9358bdb | 163 | WRITE_ONCE(dev->cache.last_add, jiffies); |
746b5583 EC |
164 | |
165 | spin_lock_irqsave(&ent->lock, flags); | |
166 | list_add_tail(&mr->list, &ent->head); | |
7c8691a3 JG |
167 | ent->available_mrs++; |
168 | ent->total_mrs++; | |
1c78a21a JG |
169 | /* If we are doing fill_to_high_water then keep going. */ |
170 | queue_adjust_cache_locked(ent); | |
b9358bdb | 171 | ent->pending--; |
746b5583 | 172 | spin_unlock_irqrestore(&ent->lock, flags); |
aad719dc JG |
173 | } |
174 | ||
175 | static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) | |
176 | { | |
177 | struct mlx5_ib_mr *mr; | |
178 | ||
179 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | |
180 | if (!mr) | |
181 | return NULL; | |
aad719dc | 182 | mr->cache_ent = ent; |
8605933a | 183 | |
5eb29f0d | 184 | set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); |
aad719dc JG |
185 | MLX5_SET(mkc, mkc, free, 1); |
186 | MLX5_SET(mkc, mkc, umr_en, 1); | |
187 | MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); | |
188 | MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); | |
189 | ||
aad719dc JG |
190 | MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); |
191 | MLX5_SET(mkc, mkc, log_page_size, ent->page); | |
192 | return mr; | |
746b5583 EC |
193 | } |
194 | ||
aad719dc | 195 | /* Asynchronously schedule new MRs to be populated in the cache. */ |
a1d8854a | 196 | static int add_keys(struct mlx5_cache_ent *ent, unsigned int num) |
e126ba97 | 197 | { |
aad719dc | 198 | size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); |
e126ba97 | 199 | struct mlx5_ib_mr *mr; |
ec22eb53 SM |
200 | void *mkc; |
201 | u32 *in; | |
e126ba97 EC |
202 | int err = 0; |
203 | int i; | |
204 | ||
ec22eb53 | 205 | in = kzalloc(inlen, GFP_KERNEL); |
e126ba97 EC |
206 | if (!in) |
207 | return -ENOMEM; | |
208 | ||
ec22eb53 | 209 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); |
e126ba97 | 210 | for (i = 0; i < num; i++) { |
aad719dc | 211 | mr = alloc_cache_mr(ent, mkc); |
e126ba97 EC |
212 | if (!mr) { |
213 | err = -ENOMEM; | |
746b5583 | 214 | break; |
e126ba97 | 215 | } |
746b5583 | 216 | spin_lock_irq(&ent->lock); |
b9358bdb JG |
217 | if (ent->pending >= MAX_PENDING_REG_MR) { |
218 | err = -EAGAIN; | |
219 | spin_unlock_irq(&ent->lock); | |
220 | kfree(mr); | |
221 | break; | |
222 | } | |
746b5583 EC |
223 | ent->pending++; |
224 | spin_unlock_irq(&ent->lock); | |
b91e1751 JG |
225 | err = mlx5_ib_create_mkey_cb(ent->dev, &mr->mmkey, |
226 | &ent->dev->async_ctx, in, inlen, | |
227 | mr->out, sizeof(mr->out), | |
228 | &mr->cb_work); | |
e126ba97 | 229 | if (err) { |
d14e7110 EC |
230 | spin_lock_irq(&ent->lock); |
231 | ent->pending--; | |
232 | spin_unlock_irq(&ent->lock); | |
b91e1751 | 233 | mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err); |
e126ba97 | 234 | kfree(mr); |
746b5583 | 235 | break; |
e126ba97 | 236 | } |
e126ba97 EC |
237 | } |
238 | ||
e126ba97 EC |
239 | kfree(in); |
240 | return err; | |
241 | } | |
242 | ||
aad719dc JG |
243 | /* Synchronously create a MR in the cache */ |
244 | static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent) | |
245 | { | |
246 | size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in); | |
247 | struct mlx5_ib_mr *mr; | |
248 | void *mkc; | |
249 | u32 *in; | |
250 | int err; | |
251 | ||
252 | in = kzalloc(inlen, GFP_KERNEL); | |
253 | if (!in) | |
254 | return ERR_PTR(-ENOMEM); | |
255 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); | |
256 | ||
257 | mr = alloc_cache_mr(ent, mkc); | |
258 | if (!mr) { | |
259 | err = -ENOMEM; | |
260 | goto free_in; | |
261 | } | |
262 | ||
263 | err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen); | |
264 | if (err) | |
265 | goto free_mr; | |
266 | ||
267 | mr->mmkey.type = MLX5_MKEY_MR; | |
268 | WRITE_ONCE(ent->dev->cache.last_add, jiffies); | |
269 | spin_lock_irq(&ent->lock); | |
270 | ent->total_mrs++; | |
271 | spin_unlock_irq(&ent->lock); | |
272 | kfree(in); | |
273 | return mr; | |
274 | free_mr: | |
275 | kfree(mr); | |
276 | free_in: | |
277 | kfree(in); | |
278 | return ERR_PTR(err); | |
279 | } | |
280 | ||
b9358bdb | 281 | static void remove_cache_mr_locked(struct mlx5_cache_ent *ent) |
e126ba97 | 282 | { |
e126ba97 | 283 | struct mlx5_ib_mr *mr; |
e126ba97 | 284 | |
b9358bdb JG |
285 | lockdep_assert_held(&ent->lock); |
286 | if (list_empty(&ent->head)) | |
a1d8854a | 287 | return; |
a1d8854a JG |
288 | mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); |
289 | list_del(&mr->list); | |
290 | ent->available_mrs--; | |
291 | ent->total_mrs--; | |
292 | spin_unlock_irq(&ent->lock); | |
293 | mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey); | |
294 | kfree(mr); | |
b9358bdb | 295 | spin_lock_irq(&ent->lock); |
a1d8854a | 296 | } |
65edd0e7 | 297 | |
a1d8854a JG |
298 | static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target, |
299 | bool limit_fill) | |
300 | { | |
301 | int err; | |
302 | ||
303 | lockdep_assert_held(&ent->lock); | |
304 | ||
305 | while (true) { | |
306 | if (limit_fill) | |
307 | target = ent->limit * 2; | |
308 | if (target == ent->available_mrs + ent->pending) | |
309 | return 0; | |
310 | if (target > ent->available_mrs + ent->pending) { | |
311 | u32 todo = target - (ent->available_mrs + ent->pending); | |
312 | ||
313 | spin_unlock_irq(&ent->lock); | |
314 | err = add_keys(ent, todo); | |
315 | if (err == -EAGAIN) | |
316 | usleep_range(3000, 5000); | |
317 | spin_lock_irq(&ent->lock); | |
318 | if (err) { | |
319 | if (err != -EAGAIN) | |
320 | return err; | |
321 | } else | |
322 | return 0; | |
323 | } else { | |
b9358bdb | 324 | remove_cache_mr_locked(ent); |
a1d8854a | 325 | } |
e126ba97 EC |
326 | } |
327 | } | |
328 | ||
329 | static ssize_t size_write(struct file *filp, const char __user *buf, | |
330 | size_t count, loff_t *pos) | |
331 | { | |
332 | struct mlx5_cache_ent *ent = filp->private_data; | |
a1d8854a | 333 | u32 target; |
e126ba97 | 334 | int err; |
e126ba97 | 335 | |
a1d8854a JG |
336 | err = kstrtou32_from_user(buf, count, 0, &target); |
337 | if (err) | |
338 | return err; | |
746b5583 | 339 | |
a1d8854a JG |
340 | /* |
341 | * Target is the new value of total_mrs the user requests, however we | |
342 | * cannot free MRs that are in use. Compute the target value for | |
343 | * available_mrs. | |
344 | */ | |
345 | spin_lock_irq(&ent->lock); | |
346 | if (target < ent->total_mrs - ent->available_mrs) { | |
347 | err = -EINVAL; | |
348 | goto err_unlock; | |
e126ba97 | 349 | } |
a1d8854a JG |
350 | target = target - (ent->total_mrs - ent->available_mrs); |
351 | if (target < ent->limit || target > ent->limit*2) { | |
352 | err = -EINVAL; | |
353 | goto err_unlock; | |
354 | } | |
355 | err = resize_available_mrs(ent, target, false); | |
356 | if (err) | |
357 | goto err_unlock; | |
358 | spin_unlock_irq(&ent->lock); | |
e126ba97 EC |
359 | |
360 | return count; | |
a1d8854a JG |
361 | |
362 | err_unlock: | |
363 | spin_unlock_irq(&ent->lock); | |
364 | return err; | |
e126ba97 EC |
365 | } |
366 | ||
367 | static ssize_t size_read(struct file *filp, char __user *buf, size_t count, | |
368 | loff_t *pos) | |
369 | { | |
370 | struct mlx5_cache_ent *ent = filp->private_data; | |
371 | char lbuf[20]; | |
372 | int err; | |
373 | ||
7c8691a3 | 374 | err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->total_mrs); |
e126ba97 EC |
375 | if (err < 0) |
376 | return err; | |
377 | ||
60e6627f | 378 | return simple_read_from_buffer(buf, count, pos, lbuf, err); |
e126ba97 EC |
379 | } |
380 | ||
381 | static const struct file_operations size_fops = { | |
382 | .owner = THIS_MODULE, | |
383 | .open = simple_open, | |
384 | .write = size_write, | |
385 | .read = size_read, | |
386 | }; | |
387 | ||
388 | static ssize_t limit_write(struct file *filp, const char __user *buf, | |
389 | size_t count, loff_t *pos) | |
390 | { | |
391 | struct mlx5_cache_ent *ent = filp->private_data; | |
e126ba97 EC |
392 | u32 var; |
393 | int err; | |
e126ba97 | 394 | |
a1d8854a JG |
395 | err = kstrtou32_from_user(buf, count, 0, &var); |
396 | if (err) | |
397 | return err; | |
e126ba97 | 398 | |
a1d8854a JG |
399 | /* |
400 | * Upon set we immediately fill the cache to high water mark implied by | |
401 | * the limit. | |
402 | */ | |
403 | spin_lock_irq(&ent->lock); | |
e126ba97 | 404 | ent->limit = var; |
a1d8854a JG |
405 | err = resize_available_mrs(ent, 0, true); |
406 | spin_unlock_irq(&ent->lock); | |
407 | if (err) | |
408 | return err; | |
e126ba97 EC |
409 | return count; |
410 | } | |
411 | ||
412 | static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, | |
413 | loff_t *pos) | |
414 | { | |
415 | struct mlx5_cache_ent *ent = filp->private_data; | |
416 | char lbuf[20]; | |
417 | int err; | |
418 | ||
e126ba97 EC |
419 | err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); |
420 | if (err < 0) | |
421 | return err; | |
422 | ||
60e6627f | 423 | return simple_read_from_buffer(buf, count, pos, lbuf, err); |
e126ba97 EC |
424 | } |
425 | ||
426 | static const struct file_operations limit_fops = { | |
427 | .owner = THIS_MODULE, | |
428 | .open = simple_open, | |
429 | .write = limit_write, | |
430 | .read = limit_read, | |
431 | }; | |
432 | ||
b9358bdb | 433 | static bool someone_adding(struct mlx5_mr_cache *cache) |
e126ba97 | 434 | { |
b9358bdb | 435 | unsigned int i; |
e126ba97 EC |
436 | |
437 | for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { | |
b9358bdb JG |
438 | struct mlx5_cache_ent *ent = &cache->ent[i]; |
439 | bool ret; | |
e126ba97 | 440 | |
b9358bdb JG |
441 | spin_lock_irq(&ent->lock); |
442 | ret = ent->available_mrs < ent->limit; | |
443 | spin_unlock_irq(&ent->lock); | |
444 | if (ret) | |
445 | return true; | |
446 | } | |
447 | return false; | |
e126ba97 EC |
448 | } |
449 | ||
ad2d3ef4 JG |
450 | /* |
451 | * Check if the bucket is outside the high/low water mark and schedule an async | |
452 | * update. The cache refill has hysteresis, once the low water mark is hit it is | |
453 | * refilled up to the high mark. | |
454 | */ | |
455 | static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) | |
456 | { | |
457 | lockdep_assert_held(&ent->lock); | |
458 | ||
1c78a21a | 459 | if (ent->disabled || READ_ONCE(ent->dev->fill_delay)) |
b9358bdb | 460 | return; |
1c78a21a JG |
461 | if (ent->available_mrs < ent->limit) { |
462 | ent->fill_to_high_water = true; | |
463 | queue_work(ent->dev->cache.wq, &ent->work); | |
464 | } else if (ent->fill_to_high_water && | |
465 | ent->available_mrs + ent->pending < 2 * ent->limit) { | |
466 | /* | |
467 | * Once we start populating due to hitting a low water mark | |
468 | * continue until we pass the high water mark. | |
469 | */ | |
ad2d3ef4 | 470 | queue_work(ent->dev->cache.wq, &ent->work); |
1c78a21a JG |
471 | } else if (ent->available_mrs == 2 * ent->limit) { |
472 | ent->fill_to_high_water = false; | |
473 | } else if (ent->available_mrs > 2 * ent->limit) { | |
474 | /* Queue deletion of excess entries */ | |
475 | ent->fill_to_high_water = false; | |
476 | if (ent->pending) | |
477 | queue_delayed_work(ent->dev->cache.wq, &ent->dwork, | |
478 | msecs_to_jiffies(1000)); | |
479 | else | |
480 | queue_work(ent->dev->cache.wq, &ent->work); | |
481 | } | |
ad2d3ef4 JG |
482 | } |
483 | ||
e126ba97 EC |
484 | static void __cache_work_func(struct mlx5_cache_ent *ent) |
485 | { | |
486 | struct mlx5_ib_dev *dev = ent->dev; | |
487 | struct mlx5_mr_cache *cache = &dev->cache; | |
746b5583 | 488 | int err; |
e126ba97 | 489 | |
b9358bdb JG |
490 | spin_lock_irq(&ent->lock); |
491 | if (ent->disabled) | |
492 | goto out; | |
e126ba97 | 493 | |
1c78a21a JG |
494 | if (ent->fill_to_high_water && |
495 | ent->available_mrs + ent->pending < 2 * ent->limit && | |
b9358bdb JG |
496 | !READ_ONCE(dev->fill_delay)) { |
497 | spin_unlock_irq(&ent->lock); | |
b91e1751 | 498 | err = add_keys(ent, 1); |
b9358bdb JG |
499 | spin_lock_irq(&ent->lock); |
500 | if (ent->disabled) | |
501 | goto out; | |
502 | if (err) { | |
aad719dc JG |
503 | /* |
504 | * EAGAIN only happens if pending is positive, so we | |
505 | * will be rescheduled from reg_mr_callback(). The only | |
506 | * failure path here is ENOMEM. | |
507 | */ | |
508 | if (err != -EAGAIN) { | |
b9358bdb JG |
509 | mlx5_ib_warn( |
510 | dev, | |
511 | "command failed order %d, err %d\n", | |
512 | ent->order, err); | |
746b5583 EC |
513 | queue_delayed_work(cache->wq, &ent->dwork, |
514 | msecs_to_jiffies(1000)); | |
746b5583 EC |
515 | } |
516 | } | |
7c8691a3 | 517 | } else if (ent->available_mrs > 2 * ent->limit) { |
b9358bdb JG |
518 | bool need_delay; |
519 | ||
ab5cdc31 | 520 | /* |
a1d8854a JG |
521 | * The remove_cache_mr() logic is performed as garbage |
522 | * collection task. Such task is intended to be run when no | |
523 | * other active processes are running. | |
ab5cdc31 LR |
524 | * |
525 | * The need_resched() will return TRUE if there are user tasks | |
526 | * to be activated in near future. | |
527 | * | |
a1d8854a JG |
528 | * In such case, we don't execute remove_cache_mr() and postpone |
529 | * the garbage collection work to try to run in next cycle, in | |
530 | * order to free CPU resources to other tasks. | |
ab5cdc31 | 531 | */ |
b9358bdb JG |
532 | spin_unlock_irq(&ent->lock); |
533 | need_delay = need_resched() || someone_adding(cache) || | |
534 | time_after(jiffies, | |
535 | READ_ONCE(cache->last_add) + 300 * HZ); | |
536 | spin_lock_irq(&ent->lock); | |
537 | if (ent->disabled) | |
538 | goto out; | |
539 | if (need_delay) | |
746b5583 | 540 | queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); |
b9358bdb JG |
541 | remove_cache_mr_locked(ent); |
542 | queue_adjust_cache_locked(ent); | |
e126ba97 | 543 | } |
b9358bdb JG |
544 | out: |
545 | spin_unlock_irq(&ent->lock); | |
e126ba97 EC |
546 | } |
547 | ||
548 | static void delayed_cache_work_func(struct work_struct *work) | |
549 | { | |
550 | struct mlx5_cache_ent *ent; | |
551 | ||
552 | ent = container_of(work, struct mlx5_cache_ent, dwork.work); | |
553 | __cache_work_func(ent); | |
554 | } | |
555 | ||
556 | static void cache_work_func(struct work_struct *work) | |
557 | { | |
558 | struct mlx5_cache_ent *ent; | |
559 | ||
560 | ent = container_of(work, struct mlx5_cache_ent, work); | |
561 | __cache_work_func(ent); | |
562 | } | |
563 | ||
b91e1751 JG |
564 | /* Allocate a special entry from the cache */ |
565 | struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, | |
8383da3e | 566 | unsigned int entry, int access_flags) |
49780d42 AK |
567 | { |
568 | struct mlx5_mr_cache *cache = &dev->cache; | |
569 | struct mlx5_cache_ent *ent; | |
570 | struct mlx5_ib_mr *mr; | |
49780d42 | 571 | |
b91e1751 JG |
572 | if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || |
573 | entry >= ARRAY_SIZE(cache->ent))) | |
546d3009 | 574 | return ERR_PTR(-EINVAL); |
49780d42 | 575 | |
8383da3e JG |
576 | /* Matches access in alloc_cache_mr() */ |
577 | if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) | |
578 | return ERR_PTR(-EOPNOTSUPP); | |
579 | ||
49780d42 | 580 | ent = &cache->ent[entry]; |
aad719dc JG |
581 | spin_lock_irq(&ent->lock); |
582 | if (list_empty(&ent->head)) { | |
583 | spin_unlock_irq(&ent->lock); | |
584 | mr = create_cache_mr(ent); | |
585 | if (IS_ERR(mr)) | |
49780d42 | 586 | return mr; |
aad719dc JG |
587 | } else { |
588 | mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); | |
589 | list_del(&mr->list); | |
590 | ent->available_mrs--; | |
591 | queue_adjust_cache_locked(ent); | |
592 | spin_unlock_irq(&ent->lock); | |
a639e667 JG |
593 | |
594 | mlx5_clear_mr(mr); | |
49780d42 | 595 | } |
8383da3e | 596 | mr->access_flags = access_flags; |
aad719dc | 597 | return mr; |
49780d42 AK |
598 | } |
599 | ||
aad719dc JG |
600 | /* Return a MR already available in the cache */ |
601 | static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) | |
e126ba97 | 602 | { |
b91e1751 | 603 | struct mlx5_ib_dev *dev = req_ent->dev; |
e126ba97 | 604 | struct mlx5_ib_mr *mr = NULL; |
b91e1751 | 605 | struct mlx5_cache_ent *ent = req_ent; |
e126ba97 | 606 | |
b91e1751 JG |
607 | /* Try larger MR pools from the cache to satisfy the allocation */ |
608 | for (; ent != &dev->cache.ent[MR_CACHE_LAST_STD_ENTRY + 1]; ent++) { | |
609 | mlx5_ib_dbg(dev, "order %u, cache index %zu\n", ent->order, | |
610 | ent - dev->cache.ent); | |
e126ba97 | 611 | |
746b5583 | 612 | spin_lock_irq(&ent->lock); |
e126ba97 EC |
613 | if (!list_empty(&ent->head)) { |
614 | mr = list_first_entry(&ent->head, struct mlx5_ib_mr, | |
615 | list); | |
616 | list_del(&mr->list); | |
7c8691a3 | 617 | ent->available_mrs--; |
ad2d3ef4 | 618 | queue_adjust_cache_locked(ent); |
746b5583 | 619 | spin_unlock_irq(&ent->lock); |
a639e667 JG |
620 | mlx5_clear_mr(mr); |
621 | return mr; | |
e126ba97 | 622 | } |
ad2d3ef4 | 623 | queue_adjust_cache_locked(ent); |
746b5583 | 624 | spin_unlock_irq(&ent->lock); |
e126ba97 | 625 | } |
a639e667 JG |
626 | req_ent->miss++; |
627 | return NULL; | |
e126ba97 EC |
628 | } |
629 | ||
1769c4c5 JG |
630 | static void detach_mr_from_cache(struct mlx5_ib_mr *mr) |
631 | { | |
632 | struct mlx5_cache_ent *ent = mr->cache_ent; | |
633 | ||
634 | mr->cache_ent = NULL; | |
635 | spin_lock_irq(&ent->lock); | |
636 | ent->total_mrs--; | |
637 | spin_unlock_irq(&ent->lock); | |
638 | } | |
639 | ||
49780d42 | 640 | void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) |
e126ba97 | 641 | { |
b91e1751 | 642 | struct mlx5_cache_ent *ent = mr->cache_ent; |
e126ba97 | 643 | |
b91e1751 | 644 | if (!ent) |
dd9a4034 VF |
645 | return; |
646 | ||
09689703 | 647 | if (mlx5_mr_cache_invalidate(mr)) { |
1769c4c5 | 648 | detach_mr_from_cache(mr); |
afd14174 | 649 | destroy_mkey(dev, mr); |
e8993890 | 650 | kfree(mr); |
e126ba97 EC |
651 | return; |
652 | } | |
49780d42 | 653 | |
746b5583 | 654 | spin_lock_irq(&ent->lock); |
e126ba97 | 655 | list_add_tail(&mr->list, &ent->head); |
7c8691a3 | 656 | ent->available_mrs++; |
ad2d3ef4 | 657 | queue_adjust_cache_locked(ent); |
746b5583 | 658 | spin_unlock_irq(&ent->lock); |
e126ba97 EC |
659 | } |
660 | ||
661 | static void clean_keys(struct mlx5_ib_dev *dev, int c) | |
662 | { | |
e126ba97 EC |
663 | struct mlx5_mr_cache *cache = &dev->cache; |
664 | struct mlx5_cache_ent *ent = &cache->ent[c]; | |
65edd0e7 | 665 | struct mlx5_ib_mr *tmp_mr; |
e126ba97 | 666 | struct mlx5_ib_mr *mr; |
65edd0e7 | 667 | LIST_HEAD(del_list); |
e126ba97 | 668 | |
3c461911 | 669 | cancel_delayed_work(&ent->dwork); |
e126ba97 | 670 | while (1) { |
746b5583 | 671 | spin_lock_irq(&ent->lock); |
e126ba97 | 672 | if (list_empty(&ent->head)) { |
746b5583 | 673 | spin_unlock_irq(&ent->lock); |
65edd0e7 | 674 | break; |
e126ba97 EC |
675 | } |
676 | mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); | |
65edd0e7 | 677 | list_move(&mr->list, &del_list); |
7c8691a3 JG |
678 | ent->available_mrs--; |
679 | ent->total_mrs--; | |
746b5583 | 680 | spin_unlock_irq(&ent->lock); |
65edd0e7 DJ |
681 | mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); |
682 | } | |
683 | ||
65edd0e7 DJ |
684 | list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { |
685 | list_del(&mr->list); | |
686 | kfree(mr); | |
e126ba97 EC |
687 | } |
688 | } | |
689 | ||
12cc1a02 LR |
690 | static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) |
691 | { | |
6a4d00be | 692 | if (!mlx5_debugfs_root || dev->is_rep) |
12cc1a02 LR |
693 | return; |
694 | ||
695 | debugfs_remove_recursive(dev->cache.root); | |
696 | dev->cache.root = NULL; | |
697 | } | |
698 | ||
73eb8f03 | 699 | static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) |
e126ba97 EC |
700 | { |
701 | struct mlx5_mr_cache *cache = &dev->cache; | |
702 | struct mlx5_cache_ent *ent; | |
73eb8f03 | 703 | struct dentry *dir; |
e126ba97 EC |
704 | int i; |
705 | ||
6a4d00be | 706 | if (!mlx5_debugfs_root || dev->is_rep) |
73eb8f03 | 707 | return; |
e126ba97 | 708 | |
9603b61d | 709 | cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); |
e126ba97 EC |
710 | |
711 | for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { | |
712 | ent = &cache->ent[i]; | |
713 | sprintf(ent->name, "%d", ent->order); | |
73eb8f03 GKH |
714 | dir = debugfs_create_dir(ent->name, cache->root); |
715 | debugfs_create_file("size", 0600, dir, ent, &size_fops); | |
716 | debugfs_create_file("limit", 0600, dir, ent, &limit_fops); | |
7c8691a3 | 717 | debugfs_create_u32("cur", 0400, dir, &ent->available_mrs); |
73eb8f03 | 718 | debugfs_create_u32("miss", 0600, dir, &ent->miss); |
e126ba97 | 719 | } |
e126ba97 EC |
720 | } |
721 | ||
e99e88a9 | 722 | static void delay_time_func(struct timer_list *t) |
746b5583 | 723 | { |
e99e88a9 | 724 | struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer); |
746b5583 | 725 | |
b9358bdb | 726 | WRITE_ONCE(dev->fill_delay, 0); |
746b5583 EC |
727 | } |
728 | ||
e126ba97 EC |
729 | int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) |
730 | { | |
731 | struct mlx5_mr_cache *cache = &dev->cache; | |
732 | struct mlx5_cache_ent *ent; | |
e126ba97 EC |
733 | int i; |
734 | ||
6bc1a656 | 735 | mutex_init(&dev->slow_path_mutex); |
3c856c82 | 736 | cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); |
e126ba97 EC |
737 | if (!cache->wq) { |
738 | mlx5_ib_warn(dev, "failed to create work queue\n"); | |
739 | return -ENOMEM; | |
740 | } | |
741 | ||
e355477e | 742 | mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); |
e99e88a9 | 743 | timer_setup(&dev->delay_timer, delay_time_func, 0); |
e126ba97 | 744 | for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { |
e126ba97 EC |
745 | ent = &cache->ent[i]; |
746 | INIT_LIST_HEAD(&ent->head); | |
747 | spin_lock_init(&ent->lock); | |
748 | ent->order = i + 2; | |
749 | ent->dev = dev; | |
49780d42 | 750 | ent->limit = 0; |
e126ba97 | 751 | |
e126ba97 EC |
752 | INIT_WORK(&ent->work, cache_work_func); |
753 | INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); | |
49780d42 | 754 | |
8b7ff7f3 | 755 | if (i > MR_CACHE_LAST_STD_ENTRY) { |
81713d37 | 756 | mlx5_odp_init_mr_cache_entry(ent); |
49780d42 | 757 | continue; |
81713d37 | 758 | } |
49780d42 | 759 | |
8b7ff7f3 | 760 | if (ent->order > mr_cache_max_order(dev)) |
49780d42 AK |
761 | continue; |
762 | ||
763 | ent->page = PAGE_SHIFT; | |
764 | ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / | |
765 | MLX5_IB_UMR_OCTOWORD; | |
766 | ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; | |
767 | if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && | |
8383da3e JG |
768 | !dev->is_rep && mlx5_core_is_pf(dev->mdev) && |
769 | mlx5_ib_can_load_pas_with_umr(dev, 0)) | |
49780d42 AK |
770 | ent->limit = dev->mdev->profile->mr_cache[i].limit; |
771 | else | |
772 | ent->limit = 0; | |
ad2d3ef4 JG |
773 | spin_lock_irq(&ent->lock); |
774 | queue_adjust_cache_locked(ent); | |
775 | spin_unlock_irq(&ent->lock); | |
e126ba97 EC |
776 | } |
777 | ||
73eb8f03 | 778 | mlx5_mr_cache_debugfs_init(dev); |
12cc1a02 | 779 | |
e126ba97 EC |
780 | return 0; |
781 | } | |
782 | ||
783 | int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) | |
784 | { | |
b9358bdb | 785 | unsigned int i; |
e126ba97 | 786 | |
32927e28 MB |
787 | if (!dev->cache.wq) |
788 | return 0; | |
789 | ||
b9358bdb JG |
790 | for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { |
791 | struct mlx5_cache_ent *ent = &dev->cache.ent[i]; | |
792 | ||
793 | spin_lock_irq(&ent->lock); | |
794 | ent->disabled = true; | |
795 | spin_unlock_irq(&ent->lock); | |
796 | cancel_work_sync(&ent->work); | |
797 | cancel_delayed_work_sync(&ent->dwork); | |
798 | } | |
e126ba97 EC |
799 | |
800 | mlx5_mr_cache_debugfs_cleanup(dev); | |
e355477e | 801 | mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); |
e126ba97 EC |
802 | |
803 | for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) | |
804 | clean_keys(dev, i); | |
805 | ||
3c461911 | 806 | destroy_workqueue(dev->cache.wq); |
746b5583 | 807 | del_timer_sync(&dev->delay_timer); |
3c461911 | 808 | |
e126ba97 EC |
809 | return 0; |
810 | } | |
811 | ||
812 | struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) | |
813 | { | |
814 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
ec22eb53 | 815 | int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); |
e126ba97 | 816 | struct mlx5_ib_mr *mr; |
ec22eb53 SM |
817 | void *mkc; |
818 | u32 *in; | |
e126ba97 EC |
819 | int err; |
820 | ||
821 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | |
822 | if (!mr) | |
823 | return ERR_PTR(-ENOMEM); | |
824 | ||
ec22eb53 | 825 | in = kzalloc(inlen, GFP_KERNEL); |
e126ba97 EC |
826 | if (!in) { |
827 | err = -ENOMEM; | |
828 | goto err_free; | |
829 | } | |
830 | ||
ec22eb53 SM |
831 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); |
832 | ||
cdbd0d2b | 833 | MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); |
ec22eb53 | 834 | MLX5_SET(mkc, mkc, length64, 1); |
03232cc4 | 835 | set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); |
ec22eb53 | 836 | |
fc6a9f86 | 837 | err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); |
e126ba97 EC |
838 | if (err) |
839 | goto err_in; | |
840 | ||
841 | kfree(in); | |
aa8e08d2 | 842 | mr->mmkey.type = MLX5_MKEY_MR; |
a606b0f6 MB |
843 | mr->ibmr.lkey = mr->mmkey.key; |
844 | mr->ibmr.rkey = mr->mmkey.key; | |
e126ba97 EC |
845 | mr->umem = NULL; |
846 | ||
847 | return &mr->ibmr; | |
848 | ||
849 | err_in: | |
850 | kfree(in); | |
851 | ||
852 | err_free: | |
853 | kfree(mr); | |
854 | ||
855 | return ERR_PTR(err); | |
856 | } | |
857 | ||
7b4cdaae | 858 | static int get_octo_len(u64 addr, u64 len, int page_shift) |
e126ba97 | 859 | { |
7b4cdaae | 860 | u64 page_size = 1ULL << page_shift; |
e126ba97 EC |
861 | u64 offset; |
862 | int npages; | |
863 | ||
864 | offset = addr & (page_size - 1); | |
7b4cdaae | 865 | npages = ALIGN(len + offset, page_size) >> page_shift; |
e126ba97 EC |
866 | return (npages + 1) / 2; |
867 | } | |
868 | ||
8b7ff7f3 | 869 | static int mr_cache_max_order(struct mlx5_ib_dev *dev) |
e126ba97 | 870 | { |
7d0cc6ed | 871 | if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) |
8b7ff7f3 | 872 | return MR_CACHE_LAST_STD_ENTRY + 2; |
4c25b7a3 MD |
873 | return MLX5_MAX_UMR_SHIFT; |
874 | } | |
875 | ||
add08d76 | 876 | static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) |
e126ba97 | 877 | { |
add08d76 CH |
878 | struct mlx5_ib_umr_context *context = |
879 | container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); | |
e126ba97 | 880 | |
add08d76 CH |
881 | context->status = wc->status; |
882 | complete(&context->done); | |
883 | } | |
e126ba97 | 884 | |
add08d76 CH |
885 | static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) |
886 | { | |
887 | context->cqe.done = mlx5_ib_umr_done; | |
888 | context->status = -1; | |
889 | init_completion(&context->done); | |
e126ba97 EC |
890 | } |
891 | ||
d5ea2df9 BJ |
892 | static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, |
893 | struct mlx5_umr_wr *umrwr) | |
894 | { | |
895 | struct umr_common *umrc = &dev->umrc; | |
d34ac5cd | 896 | const struct ib_send_wr *bad; |
d5ea2df9 BJ |
897 | int err; |
898 | struct mlx5_ib_umr_context umr_context; | |
899 | ||
900 | mlx5_ib_init_umr_context(&umr_context); | |
901 | umrwr->wr.wr_cqe = &umr_context.cqe; | |
902 | ||
903 | down(&umrc->sem); | |
904 | err = ib_post_send(umrc->qp, &umrwr->wr, &bad); | |
905 | if (err) { | |
906 | mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); | |
907 | } else { | |
908 | wait_for_completion(&umr_context.done); | |
909 | if (umr_context.status != IB_WC_SUCCESS) { | |
910 | mlx5_ib_warn(dev, "reg umr failed (%u)\n", | |
911 | umr_context.status); | |
912 | err = -EFAULT; | |
913 | } | |
914 | } | |
915 | up(&umrc->sem); | |
916 | return err; | |
917 | } | |
918 | ||
b91e1751 JG |
919 | static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev, |
920 | unsigned int order) | |
921 | { | |
922 | struct mlx5_mr_cache *cache = &dev->cache; | |
923 | ||
924 | if (order < cache->ent[0].order) | |
925 | return &cache->ent[0]; | |
926 | order = order - cache->ent[0].order; | |
927 | if (order > MR_CACHE_LAST_STD_ENTRY) | |
928 | return NULL; | |
929 | return &cache->ent[order]; | |
930 | } | |
931 | ||
38f8ff5b JG |
932 | static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, |
933 | u64 length, int access_flags) | |
934 | { | |
935 | mr->ibmr.lkey = mr->mmkey.key; | |
936 | mr->ibmr.rkey = mr->mmkey.key; | |
937 | mr->ibmr.length = length; | |
ca991a7d | 938 | mr->ibmr.device = &dev->ib_dev; |
38f8ff5b JG |
939 | mr->access_flags = access_flags; |
940 | } | |
941 | ||
90da7dc8 JX |
942 | static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, |
943 | u64 iova) | |
944 | { | |
945 | /* | |
946 | * The alignment of iova has already been checked upon entering | |
947 | * UVERBS_METHOD_REG_DMABUF_MR | |
948 | */ | |
949 | umem->iova = iova; | |
950 | return PAGE_SIZE; | |
951 | } | |
952 | ||
38f8ff5b JG |
953 | static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, |
954 | struct ib_umem *umem, u64 iova, | |
955 | int access_flags) | |
e126ba97 EC |
956 | { |
957 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
f0093fb1 | 958 | struct mlx5_cache_ent *ent; |
e126ba97 | 959 | struct mlx5_ib_mr *mr; |
d5c7916f | 960 | unsigned int page_size; |
e126ba97 | 961 | |
90da7dc8 JX |
962 | if (umem->is_dmabuf) |
963 | page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); | |
964 | else | |
965 | page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, | |
966 | 0, iova); | |
d5c7916f JG |
967 | if (WARN_ON(!page_size)) |
968 | return ERR_PTR(-EINVAL); | |
969 | ent = mr_cache_ent_from_order( | |
970 | dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size))); | |
38f8ff5b JG |
971 | /* |
972 | * Matches access in alloc_cache_mr(). If the MR can't come from the | |
973 | * cache then synchronously create an uncached one. | |
974 | */ | |
975 | if (!ent || ent->limit == 0 || | |
976 | !mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) { | |
977 | mutex_lock(&dev->slow_path_mutex); | |
ef3642c4 | 978 | mr = reg_create(pd, umem, iova, access_flags, page_size, false); |
38f8ff5b JG |
979 | mutex_unlock(&dev->slow_path_mutex); |
980 | return mr; | |
981 | } | |
8383da3e | 982 | |
aad719dc JG |
983 | mr = get_cache_mr(ent); |
984 | if (!mr) { | |
985 | mr = create_cache_mr(ent); | |
38f8ff5b JG |
986 | /* |
987 | * The above already tried to do the same stuff as reg_create(), | |
988 | * no reason to try it again. | |
989 | */ | |
aad719dc JG |
990 | if (IS_ERR(mr)) |
991 | return mr; | |
e126ba97 EC |
992 | } |
993 | ||
7d0cc6ed AK |
994 | mr->ibmr.pd = pd; |
995 | mr->umem = umem; | |
f0093fb1 JG |
996 | mr->mmkey.iova = iova; |
997 | mr->mmkey.size = umem->length; | |
a606b0f6 | 998 | mr->mmkey.pd = to_mpd(pd)->pdn; |
d5c7916f | 999 | mr->page_shift = order_base_2(page_size); |
38f8ff5b | 1000 | set_mr_fields(dev, mr, umem->length, access_flags); |
b475598a | 1001 | |
e126ba97 | 1002 | return mr; |
e126ba97 EC |
1003 | } |
1004 | ||
7d0cc6ed AK |
1005 | #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ |
1006 | MLX5_UMR_MTT_ALIGNMENT) | |
1007 | #define MLX5_SPARE_UMR_CHUNK 0x10000 | |
1008 | ||
8010d74b JG |
1009 | /* |
1010 | * Allocate a temporary buffer to hold the per-page information to transfer to | |
1011 | * HW. For efficiency this should be as large as it can be, but buffer | |
1012 | * allocation failure is not allowed, so try smaller sizes. | |
1013 | */ | |
1014 | static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask) | |
f22c30aa | 1015 | { |
8010d74b JG |
1016 | const size_t xlt_chunk_align = |
1017 | MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size); | |
1018 | size_t size; | |
1019 | void *res = NULL; | |
1020 | ||
1021 | static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0); | |
1022 | ||
1023 | /* | |
1024 | * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the | |
1025 | * allocation can't trigger any kind of reclaim. | |
1026 | */ | |
1027 | might_sleep(); | |
1028 | ||
1029 | gfp_mask |= __GFP_ZERO; | |
1030 | ||
1031 | /* | |
1032 | * If the system already has a suitable high order page then just use | |
1033 | * that, but don't try hard to create one. This max is about 1M, so a | |
1034 | * free x86 huge page will satisfy it. | |
1035 | */ | |
1036 | size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align), | |
1037 | MLX5_MAX_UMR_CHUNK); | |
1038 | *nents = size / ent_size; | |
1039 | res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, | |
1040 | get_order(size)); | |
1041 | if (res) | |
1042 | return res; | |
1043 | ||
1044 | if (size > MLX5_SPARE_UMR_CHUNK) { | |
1045 | size = MLX5_SPARE_UMR_CHUNK; | |
1046 | *nents = get_order(size) / ent_size; | |
1047 | res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN, | |
1048 | get_order(size)); | |
1049 | if (res) | |
1050 | return res; | |
1051 | } | |
1052 | ||
1053 | *nents = PAGE_SIZE / ent_size; | |
1054 | res = (void *)__get_free_page(gfp_mask); | |
1055 | if (res) | |
1056 | return res; | |
1057 | ||
f22c30aa | 1058 | mutex_lock(&xlt_emergency_page_mutex); |
8010d74b | 1059 | memset(xlt_emergency_page, 0, PAGE_SIZE); |
f22c30aa JG |
1060 | return xlt_emergency_page; |
1061 | } | |
1062 | ||
8010d74b | 1063 | static void mlx5_ib_free_xlt(void *xlt, size_t length) |
f22c30aa | 1064 | { |
8010d74b JG |
1065 | if (xlt == xlt_emergency_page) { |
1066 | mutex_unlock(&xlt_emergency_page_mutex); | |
1067 | return; | |
1068 | } | |
1069 | ||
1070 | free_pages((unsigned long)xlt, get_order(length)); | |
1071 | } | |
1072 | ||
1073 | /* | |
1074 | * Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for | |
1075 | * submission. | |
1076 | */ | |
1077 | static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr, | |
1078 | struct mlx5_umr_wr *wr, struct ib_sge *sg, | |
1079 | size_t nents, size_t ent_size, | |
1080 | unsigned int flags) | |
1081 | { | |
ca991a7d | 1082 | struct mlx5_ib_dev *dev = mr_to_mdev(mr); |
7ec3df17 | 1083 | struct device *ddev = &dev->mdev->pdev->dev; |
8010d74b JG |
1084 | dma_addr_t dma; |
1085 | void *xlt; | |
1086 | ||
1087 | xlt = mlx5_ib_alloc_xlt(&nents, ent_size, | |
1088 | flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : | |
1089 | GFP_KERNEL); | |
1090 | sg->length = nents * ent_size; | |
1091 | dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE); | |
1092 | if (dma_mapping_error(ddev, dma)) { | |
1093 | mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); | |
1094 | mlx5_ib_free_xlt(xlt, sg->length); | |
1095 | return NULL; | |
1096 | } | |
1097 | sg->addr = dma; | |
1098 | sg->lkey = dev->umrc.pd->local_dma_lkey; | |
1099 | ||
1100 | memset(wr, 0, sizeof(*wr)); | |
1101 | wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; | |
1102 | if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) | |
1103 | wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; | |
1104 | wr->wr.sg_list = sg; | |
1105 | wr->wr.num_sge = 1; | |
1106 | wr->wr.opcode = MLX5_IB_WR_UMR; | |
1107 | wr->pd = mr->ibmr.pd; | |
1108 | wr->mkey = mr->mmkey.key; | |
1109 | wr->length = mr->mmkey.size; | |
1110 | wr->virt_addr = mr->mmkey.iova; | |
1111 | wr->access_flags = mr->access_flags; | |
1112 | wr->page_shift = mr->page_shift; | |
1113 | wr->xlt_size = sg->length; | |
1114 | return xlt; | |
1115 | } | |
1116 | ||
1117 | static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, | |
1118 | struct ib_sge *sg) | |
1119 | { | |
7ec3df17 | 1120 | struct device *ddev = &dev->mdev->pdev->dev; |
8010d74b JG |
1121 | |
1122 | dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE); | |
1123 | mlx5_ib_free_xlt(xlt, sg->length); | |
f22c30aa JG |
1124 | } |
1125 | ||
f1eaac37 JG |
1126 | static unsigned int xlt_wr_final_send_flags(unsigned int flags) |
1127 | { | |
1128 | unsigned int res = 0; | |
1129 | ||
1130 | if (flags & MLX5_IB_UPD_XLT_ENABLE) | |
1131 | res |= MLX5_IB_SEND_UMR_ENABLE_MR | | |
1132 | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | | |
1133 | MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; | |
1134 | if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) | |
1135 | res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; | |
1136 | if (flags & MLX5_IB_UPD_XLT_ADDR) | |
1137 | res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; | |
1138 | return res; | |
1139 | } | |
1140 | ||
7d0cc6ed AK |
1141 | int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, |
1142 | int page_shift, int flags) | |
1143 | { | |
ca991a7d | 1144 | struct mlx5_ib_dev *dev = mr_to_mdev(mr); |
7ec3df17 | 1145 | struct device *ddev = &dev->mdev->pdev->dev; |
7d0cc6ed | 1146 | void *xlt; |
e622f2f4 | 1147 | struct mlx5_umr_wr wr; |
832a6b06 HE |
1148 | struct ib_sge sg; |
1149 | int err = 0; | |
81713d37 AK |
1150 | int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) |
1151 | ? sizeof(struct mlx5_klm) | |
1152 | : sizeof(struct mlx5_mtt); | |
7d0cc6ed AK |
1153 | const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; |
1154 | const int page_mask = page_align - 1; | |
832a6b06 HE |
1155 | size_t pages_mapped = 0; |
1156 | size_t pages_to_map = 0; | |
8010d74b | 1157 | size_t pages_iter; |
cbe4b8f0 | 1158 | size_t size_to_map = 0; |
8010d74b | 1159 | size_t orig_sg_length; |
832a6b06 | 1160 | |
c8d75a98 MD |
1161 | if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && |
1162 | !umr_can_use_indirect_mkey(dev)) | |
1163 | return -EPERM; | |
832a6b06 | 1164 | |
f1eaac37 JG |
1165 | if (WARN_ON(!mr->umem->is_odp)) |
1166 | return -EINVAL; | |
1167 | ||
832a6b06 | 1168 | /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, |
7d0cc6ed AK |
1169 | * so we need to align the offset and length accordingly |
1170 | */ | |
1171 | if (idx & page_mask) { | |
1172 | npages += idx & page_mask; | |
1173 | idx &= ~page_mask; | |
832a6b06 | 1174 | } |
7d0cc6ed | 1175 | pages_to_map = ALIGN(npages, page_align); |
7d0cc6ed | 1176 | |
8010d74b JG |
1177 | xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags); |
1178 | if (!xlt) | |
1179 | return -ENOMEM; | |
1180 | pages_iter = sg.length / desc_size; | |
1181 | orig_sg_length = sg.length; | |
832a6b06 | 1182 | |
f1eaac37 JG |
1183 | if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { |
1184 | struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); | |
1185 | size_t max_pages = ib_umem_odp_num_pages(odp) - idx; | |
cbe4b8f0 | 1186 | |
f1eaac37 | 1187 | pages_to_map = min_t(size_t, pages_to_map, max_pages); |
cbe4b8f0 AK |
1188 | } |
1189 | ||
7d0cc6ed AK |
1190 | wr.page_shift = page_shift; |
1191 | ||
832a6b06 HE |
1192 | for (pages_mapped = 0; |
1193 | pages_mapped < pages_to_map && !err; | |
7d0cc6ed | 1194 | pages_mapped += pages_iter, idx += pages_iter) { |
438b228e | 1195 | npages = min_t(int, pages_iter, pages_to_map - pages_mapped); |
cbe4b8f0 | 1196 | size_to_map = npages * desc_size; |
8010d74b JG |
1197 | dma_sync_single_for_cpu(ddev, sg.addr, sg.length, |
1198 | DMA_TO_DEVICE); | |
f1eaac37 | 1199 | mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); |
8010d74b JG |
1200 | dma_sync_single_for_device(ddev, sg.addr, sg.length, |
1201 | DMA_TO_DEVICE); | |
832a6b06 | 1202 | |
cbe4b8f0 | 1203 | sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); |
7d0cc6ed | 1204 | |
f1eaac37 JG |
1205 | if (pages_mapped + pages_iter >= pages_to_map) |
1206 | wr.wr.send_flags |= xlt_wr_final_send_flags(flags); | |
832a6b06 | 1207 | |
7d0cc6ed | 1208 | wr.offset = idx * desc_size; |
31616255 | 1209 | wr.xlt_size = sg.length; |
832a6b06 | 1210 | |
d5ea2df9 | 1211 | err = mlx5_ib_post_send_wait(dev, &wr); |
832a6b06 | 1212 | } |
8010d74b JG |
1213 | sg.length = orig_sg_length; |
1214 | mlx5_ib_unmap_free_xlt(dev, xlt, &sg); | |
832a6b06 HE |
1215 | return err; |
1216 | } | |
832a6b06 | 1217 | |
f1eaac37 JG |
1218 | /* |
1219 | * Send the DMA list to the HW for a normal MR using UMR. | |
90da7dc8 JX |
1220 | * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP |
1221 | * flag may be used. | |
f1eaac37 | 1222 | */ |
90da7dc8 | 1223 | int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) |
f1eaac37 | 1224 | { |
ca991a7d | 1225 | struct mlx5_ib_dev *dev = mr_to_mdev(mr); |
7ec3df17 | 1226 | struct device *ddev = &dev->mdev->pdev->dev; |
f1eaac37 JG |
1227 | struct ib_block_iter biter; |
1228 | struct mlx5_mtt *cur_mtt; | |
1229 | struct mlx5_umr_wr wr; | |
1230 | size_t orig_sg_length; | |
1231 | struct mlx5_mtt *mtt; | |
1232 | size_t final_size; | |
1233 | struct ib_sge sg; | |
1234 | int err = 0; | |
1235 | ||
1236 | if (WARN_ON(mr->umem->is_odp)) | |
1237 | return -EINVAL; | |
1238 | ||
1239 | mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, | |
1240 | ib_umem_num_dma_blocks(mr->umem, | |
1241 | 1 << mr->page_shift), | |
1242 | sizeof(*mtt), flags); | |
1243 | if (!mtt) | |
1244 | return -ENOMEM; | |
1245 | orig_sg_length = sg.length; | |
1246 | ||
1247 | cur_mtt = mtt; | |
1248 | rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, | |
1249 | BIT(mr->page_shift)) { | |
1250 | if (cur_mtt == (void *)mtt + sg.length) { | |
1251 | dma_sync_single_for_device(ddev, sg.addr, sg.length, | |
1252 | DMA_TO_DEVICE); | |
1253 | err = mlx5_ib_post_send_wait(dev, &wr); | |
1254 | if (err) | |
1255 | goto err; | |
1256 | dma_sync_single_for_cpu(ddev, sg.addr, sg.length, | |
1257 | DMA_TO_DEVICE); | |
1258 | wr.offset += sg.length; | |
1259 | cur_mtt = mtt; | |
1260 | } | |
1261 | ||
1262 | cur_mtt->ptag = | |
1263 | cpu_to_be64(rdma_block_iter_dma_address(&biter) | | |
1264 | MLX5_IB_MTT_PRESENT); | |
90da7dc8 JX |
1265 | |
1266 | if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) | |
1267 | cur_mtt->ptag = 0; | |
1268 | ||
f1eaac37 JG |
1269 | cur_mtt++; |
1270 | } | |
1271 | ||
1272 | final_size = (void *)cur_mtt - (void *)mtt; | |
1273 | sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); | |
1274 | memset(cur_mtt, 0, sg.length - final_size); | |
1275 | wr.wr.send_flags |= xlt_wr_final_send_flags(flags); | |
1276 | wr.xlt_size = sg.length; | |
1277 | ||
1278 | dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); | |
1279 | err = mlx5_ib_post_send_wait(dev, &wr); | |
1280 | ||
1281 | err: | |
1282 | sg.length = orig_sg_length; | |
1283 | mlx5_ib_unmap_free_xlt(dev, mtt, &sg); | |
1284 | return err; | |
1285 | } | |
1286 | ||
395a8e4c NO |
1287 | /* |
1288 | * If ibmr is NULL it will be allocated by reg_create. | |
1289 | * Else, the given ibmr will be used. | |
1290 | */ | |
ef3642c4 JG |
1291 | static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, |
1292 | u64 iova, int access_flags, | |
1293 | unsigned int page_size, bool populate) | |
e126ba97 EC |
1294 | { |
1295 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
e126ba97 | 1296 | struct mlx5_ib_mr *mr; |
ec22eb53 SM |
1297 | __be64 *pas; |
1298 | void *mkc; | |
e126ba97 | 1299 | int inlen; |
ec22eb53 | 1300 | u32 *in; |
e126ba97 | 1301 | int err; |
938fe83c | 1302 | bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); |
e126ba97 | 1303 | |
ef3642c4 JG |
1304 | if (!page_size) |
1305 | return ERR_PTR(-EINVAL); | |
1306 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | |
e126ba97 EC |
1307 | if (!mr) |
1308 | return ERR_PTR(-ENOMEM); | |
1309 | ||
ff740aef IL |
1310 | mr->ibmr.pd = pd; |
1311 | mr->access_flags = access_flags; | |
d5c7916f | 1312 | mr->page_shift = order_base_2(page_size); |
ff740aef IL |
1313 | |
1314 | inlen = MLX5_ST_SZ_BYTES(create_mkey_in); | |
1315 | if (populate) | |
d5c7916f JG |
1316 | inlen += sizeof(*pas) * |
1317 | roundup(ib_umem_num_dma_blocks(umem, page_size), 2); | |
1b9a07ee | 1318 | in = kvzalloc(inlen, GFP_KERNEL); |
e126ba97 EC |
1319 | if (!in) { |
1320 | err = -ENOMEM; | |
1321 | goto err_1; | |
1322 | } | |
ec22eb53 | 1323 | pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); |
8383da3e JG |
1324 | if (populate) { |
1325 | if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) { | |
1326 | err = -EINVAL; | |
1327 | goto err_2; | |
1328 | } | |
d5c7916f | 1329 | mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas, |
c438fde1 | 1330 | pg_cap ? MLX5_IB_MTT_PRESENT : 0); |
8383da3e | 1331 | } |
e126ba97 | 1332 | |
ec22eb53 | 1333 | /* The pg_access bit allows setting the access flags |
cc149f75 | 1334 | * in the page list submitted with the command. */ |
ec22eb53 SM |
1335 | MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); |
1336 | ||
1337 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); | |
f0093fb1 | 1338 | set_mkc_access_pd_addr_fields(mkc, access_flags, iova, |
5eb29f0d | 1339 | populate ? pd : dev->umrc.pd); |
ff740aef | 1340 | MLX5_SET(mkc, mkc, free, !populate); |
cdbd0d2b | 1341 | MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); |
8b7ff7f3 | 1342 | MLX5_SET(mkc, mkc, umr_en, 1); |
ec22eb53 | 1343 | |
f0093fb1 | 1344 | MLX5_SET64(mkc, mkc, len, umem->length); |
ec22eb53 SM |
1345 | MLX5_SET(mkc, mkc, bsf_octword_size, 0); |
1346 | MLX5_SET(mkc, mkc, translations_octword_size, | |
d5c7916f JG |
1347 | get_octo_len(iova, umem->length, mr->page_shift)); |
1348 | MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); | |
ff740aef IL |
1349 | if (populate) { |
1350 | MLX5_SET(create_mkey_in, in, translations_octword_actual_size, | |
d5c7916f | 1351 | get_octo_len(iova, umem->length, mr->page_shift)); |
ff740aef | 1352 | } |
ec22eb53 | 1353 | |
fc6a9f86 | 1354 | err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); |
e126ba97 EC |
1355 | if (err) { |
1356 | mlx5_ib_warn(dev, "create mkey failed\n"); | |
1357 | goto err_2; | |
1358 | } | |
aa8e08d2 | 1359 | mr->mmkey.type = MLX5_MKEY_MR; |
49780d42 | 1360 | mr->desc_size = sizeof(struct mlx5_mtt); |
38f8ff5b JG |
1361 | mr->umem = umem; |
1362 | set_mr_fields(dev, mr, umem->length, access_flags); | |
479163f4 | 1363 | kvfree(in); |
e126ba97 | 1364 | |
a606b0f6 | 1365 | mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); |
e126ba97 EC |
1366 | |
1367 | return mr; | |
1368 | ||
1369 | err_2: | |
479163f4 | 1370 | kvfree(in); |
e126ba97 | 1371 | err_1: |
ef3642c4 | 1372 | kfree(mr); |
e126ba97 EC |
1373 | return ERR_PTR(err); |
1374 | } | |
1375 | ||
3b113a1e AL |
1376 | static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, |
1377 | u64 length, int acc, int mode) | |
6c29f57e AL |
1378 | { |
1379 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
1380 | int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); | |
6c29f57e AL |
1381 | struct mlx5_ib_mr *mr; |
1382 | void *mkc; | |
1383 | u32 *in; | |
1384 | int err; | |
1385 | ||
1386 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | |
1387 | if (!mr) | |
1388 | return ERR_PTR(-ENOMEM); | |
1389 | ||
1390 | in = kzalloc(inlen, GFP_KERNEL); | |
1391 | if (!in) { | |
1392 | err = -ENOMEM; | |
1393 | goto err_free; | |
1394 | } | |
1395 | ||
1396 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); | |
1397 | ||
3b113a1e AL |
1398 | MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); |
1399 | MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); | |
6c29f57e | 1400 | MLX5_SET64(mkc, mkc, len, length); |
03232cc4 | 1401 | set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); |
6c29f57e | 1402 | |
fc6a9f86 | 1403 | err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); |
6c29f57e AL |
1404 | if (err) |
1405 | goto err_in; | |
1406 | ||
1407 | kfree(in); | |
1408 | ||
fc332570 | 1409 | set_mr_fields(dev, mr, length, acc); |
6c29f57e AL |
1410 | |
1411 | return &mr->ibmr; | |
1412 | ||
1413 | err_in: | |
1414 | kfree(in); | |
1415 | ||
1416 | err_free: | |
1417 | kfree(mr); | |
1418 | ||
1419 | return ERR_PTR(err); | |
1420 | } | |
1421 | ||
813e90b1 MS |
1422 | int mlx5_ib_advise_mr(struct ib_pd *pd, |
1423 | enum ib_uverbs_advise_mr_advice advice, | |
1424 | u32 flags, | |
1425 | struct ib_sge *sg_list, | |
1426 | u32 num_sge, | |
1427 | struct uverbs_attr_bundle *attrs) | |
1428 | { | |
1429 | if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && | |
677cf51f YH |
1430 | advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && |
1431 | advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) | |
813e90b1 MS |
1432 | return -EOPNOTSUPP; |
1433 | ||
1434 | return mlx5_ib_advise_mr_prefetch(pd, advice, flags, | |
1435 | sg_list, num_sge); | |
1436 | } | |
1437 | ||
6c29f57e AL |
1438 | struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, |
1439 | struct ib_dm_mr_attr *attr, | |
1440 | struct uverbs_attr_bundle *attrs) | |
1441 | { | |
1442 | struct mlx5_ib_dm *mdm = to_mdm(dm); | |
3b113a1e AL |
1443 | struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; |
1444 | u64 start_addr = mdm->dev_addr + attr->offset; | |
1445 | int mode; | |
1446 | ||
1447 | switch (mdm->type) { | |
1448 | case MLX5_IB_UAPI_DM_TYPE_MEMIC: | |
1449 | if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) | |
1450 | return ERR_PTR(-EINVAL); | |
1451 | ||
1452 | mode = MLX5_MKC_ACCESS_MODE_MEMIC; | |
1453 | start_addr -= pci_resource_start(dev->pdev, 0); | |
1454 | break; | |
25c13324 AL |
1455 | case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: |
1456 | case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: | |
1457 | if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) | |
1458 | return ERR_PTR(-EINVAL); | |
1459 | ||
1460 | mode = MLX5_MKC_ACCESS_MODE_SW_ICM; | |
1461 | break; | |
3b113a1e | 1462 | default: |
6c29f57e | 1463 | return ERR_PTR(-EINVAL); |
3b113a1e | 1464 | } |
6c29f57e | 1465 | |
3b113a1e AL |
1466 | return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, |
1467 | attr->access_flags, mode); | |
6c29f57e AL |
1468 | } |
1469 | ||
38f8ff5b JG |
1470 | static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, |
1471 | u64 iova, int access_flags) | |
e126ba97 EC |
1472 | { |
1473 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
1474 | struct mlx5_ib_mr *mr = NULL; | |
8383da3e | 1475 | bool xlt_with_umr; |
e126ba97 EC |
1476 | int err; |
1477 | ||
38f8ff5b | 1478 | xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length); |
8383da3e | 1479 | if (xlt_with_umr) { |
38f8ff5b JG |
1480 | mr = alloc_cacheable_mr(pd, umem, iova, access_flags); |
1481 | } else { | |
ef3642c4 JG |
1482 | unsigned int page_size = mlx5_umem_find_best_pgsz( |
1483 | umem, mkc, log_page_size, 0, iova); | |
1484 | ||
6bc1a656 | 1485 | mutex_lock(&dev->slow_path_mutex); |
ef3642c4 | 1486 | mr = reg_create(pd, umem, iova, access_flags, page_size, true); |
6bc1a656 ML |
1487 | mutex_unlock(&dev->slow_path_mutex); |
1488 | } | |
e126ba97 | 1489 | if (IS_ERR(mr)) { |
38f8ff5b JG |
1490 | ib_umem_release(umem); |
1491 | return ERR_CAST(mr); | |
e126ba97 EC |
1492 | } |
1493 | ||
a606b0f6 | 1494 | mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); |
e126ba97 | 1495 | |
38f8ff5b | 1496 | atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); |
e126ba97 | 1497 | |
38f8ff5b | 1498 | if (xlt_with_umr) { |
8383da3e JG |
1499 | /* |
1500 | * If the MR was created with reg_create then it will be | |
1501 | * configured properly but left disabled. It is safe to go ahead | |
1502 | * and configure it again via UMR while enabling it. | |
1503 | */ | |
f1eaac37 | 1504 | err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); |
ff740aef | 1505 | if (err) { |
fbcd4983 | 1506 | dereg_mr(dev, mr); |
ff740aef IL |
1507 | return ERR_PTR(err); |
1508 | } | |
1509 | } | |
38f8ff5b JG |
1510 | return &mr->ibmr; |
1511 | } | |
ff740aef | 1512 | |
38f8ff5b JG |
1513 | static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, |
1514 | u64 iova, int access_flags, | |
1515 | struct ib_udata *udata) | |
1516 | { | |
1517 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
1518 | struct ib_umem_odp *odp; | |
1519 | struct mlx5_ib_mr *mr; | |
1520 | int err; | |
a03bfc37 | 1521 | |
38f8ff5b JG |
1522 | if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) |
1523 | return ERR_PTR(-EOPNOTSUPP); | |
1524 | ||
1525 | if (!start && length == U64_MAX) { | |
1526 | if (iova != 0) | |
1527 | return ERR_PTR(-EINVAL); | |
1528 | if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) | |
1529 | return ERR_PTR(-EINVAL); | |
1530 | ||
1531 | mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); | |
1532 | if (IS_ERR(mr)) | |
1533 | return ERR_CAST(mr); | |
1534 | return &mr->ibmr; | |
1535 | } | |
1536 | ||
1537 | /* ODP requires xlt update via umr to work. */ | |
1538 | if (!mlx5_ib_can_load_pas_with_umr(dev, length)) | |
1539 | return ERR_PTR(-EINVAL); | |
1540 | ||
1541 | odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, | |
1542 | &mlx5_mn_ops); | |
1543 | if (IS_ERR(odp)) | |
1544 | return ERR_CAST(odp); | |
1545 | ||
1546 | mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags); | |
1547 | if (IS_ERR(mr)) { | |
1548 | ib_umem_release(&odp->umem); | |
1549 | return ERR_CAST(mr); | |
a6bc3875 | 1550 | } |
13859d5d | 1551 | |
38f8ff5b | 1552 | odp->private = mr; |
db72438c | 1553 | err = mlx5r_store_odp_mkey(dev, &mr->mmkey); |
38f8ff5b JG |
1554 | if (err) |
1555 | goto err_dereg_mr; | |
1556 | ||
1557 | err = mlx5_ib_init_odp_mr(mr); | |
1558 | if (err) | |
1559 | goto err_dereg_mr; | |
ff740aef | 1560 | return &mr->ibmr; |
38f8ff5b JG |
1561 | |
1562 | err_dereg_mr: | |
1563 | dereg_mr(dev, mr); | |
e126ba97 EC |
1564 | return ERR_PTR(err); |
1565 | } | |
1566 | ||
38f8ff5b JG |
1567 | struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
1568 | u64 iova, int access_flags, | |
1569 | struct ib_udata *udata) | |
1570 | { | |
1571 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
1572 | struct ib_umem *umem; | |
1573 | ||
1574 | if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) | |
1575 | return ERR_PTR(-EOPNOTSUPP); | |
1576 | ||
1577 | mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", | |
1578 | start, iova, length, access_flags); | |
1579 | ||
1580 | if (access_flags & IB_ACCESS_ON_DEMAND) | |
1581 | return create_user_odp_mr(pd, start, length, iova, access_flags, | |
1582 | udata); | |
1583 | umem = ib_umem_get(&dev->ib_dev, start, length, access_flags); | |
1584 | if (IS_ERR(umem)) | |
1585 | return ERR_CAST(umem); | |
1586 | return create_real_mr(pd, umem, iova, access_flags); | |
1587 | } | |
1588 | ||
90da7dc8 JX |
1589 | static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) |
1590 | { | |
1591 | struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; | |
1592 | struct mlx5_ib_mr *mr = umem_dmabuf->private; | |
1593 | ||
1594 | dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); | |
1595 | ||
1596 | if (!umem_dmabuf->sgt) | |
1597 | return; | |
1598 | ||
1599 | mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); | |
1600 | ib_umem_dmabuf_unmap_pages(umem_dmabuf); | |
1601 | } | |
1602 | ||
1603 | static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { | |
1604 | .allow_peer2peer = 1, | |
1605 | .move_notify = mlx5_ib_dmabuf_invalidate_cb, | |
1606 | }; | |
1607 | ||
1608 | struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, | |
1609 | u64 length, u64 virt_addr, | |
1610 | int fd, int access_flags, | |
1611 | struct ib_udata *udata) | |
1612 | { | |
1613 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
1614 | struct mlx5_ib_mr *mr = NULL; | |
1615 | struct ib_umem_dmabuf *umem_dmabuf; | |
1616 | int err; | |
1617 | ||
1618 | if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || | |
1619 | !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) | |
1620 | return ERR_PTR(-EOPNOTSUPP); | |
1621 | ||
1622 | mlx5_ib_dbg(dev, | |
1623 | "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n", | |
1624 | offset, virt_addr, length, fd, access_flags); | |
1625 | ||
1626 | /* dmabuf requires xlt update via umr to work. */ | |
1627 | if (!mlx5_ib_can_load_pas_with_umr(dev, length)) | |
1628 | return ERR_PTR(-EINVAL); | |
1629 | ||
1630 | umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, | |
1631 | access_flags, | |
1632 | &mlx5_ib_dmabuf_attach_ops); | |
1633 | if (IS_ERR(umem_dmabuf)) { | |
1634 | mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n", | |
1635 | PTR_ERR(umem_dmabuf)); | |
1636 | return ERR_CAST(umem_dmabuf); | |
1637 | } | |
1638 | ||
1639 | mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, | |
1640 | access_flags); | |
1641 | if (IS_ERR(mr)) { | |
1642 | ib_umem_release(&umem_dmabuf->umem); | |
1643 | return ERR_CAST(mr); | |
1644 | } | |
1645 | ||
1646 | mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); | |
1647 | ||
1648 | atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); | |
1649 | umem_dmabuf->private = mr; | |
db72438c | 1650 | err = mlx5r_store_odp_mkey(dev, &mr->mmkey); |
90da7dc8 JX |
1651 | if (err) |
1652 | goto err_dereg_mr; | |
1653 | ||
1654 | err = mlx5_ib_init_dmabuf_mr(mr); | |
1655 | if (err) | |
1656 | goto err_dereg_mr; | |
1657 | return &mr->ibmr; | |
1658 | ||
1659 | err_dereg_mr: | |
1660 | dereg_mr(dev, mr); | |
1661 | return ERR_PTR(err); | |
1662 | } | |
1663 | ||
09689703 JG |
1664 | /** |
1665 | * mlx5_mr_cache_invalidate - Fence all DMA on the MR | |
1666 | * @mr: The MR to fence | |
1667 | * | |
1668 | * Upon return the NIC will not be doing any DMA to the pages under the MR, | |
1669 | * and any DMA inprogress will be completed. Failure of this function | |
1670 | * indicates the HW has failed catastrophically. | |
1671 | */ | |
1672 | int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) | |
e126ba97 | 1673 | { |
0025b0bd | 1674 | struct mlx5_umr_wr umrwr = {}; |
e126ba97 | 1675 | |
ca991a7d | 1676 | if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) |
89ea94a7 MG |
1677 | return 0; |
1678 | ||
9ec4483a YH |
1679 | umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | |
1680 | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; | |
7d0cc6ed | 1681 | umrwr.wr.opcode = MLX5_IB_WR_UMR; |
ca991a7d | 1682 | umrwr.pd = mr_to_mdev(mr)->umrc.pd; |
7d0cc6ed | 1683 | umrwr.mkey = mr->mmkey.key; |
6a053953 | 1684 | umrwr.ignore_free_state = 1; |
e126ba97 | 1685 | |
ca991a7d | 1686 | return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr); |
e126ba97 EC |
1687 | } |
1688 | ||
ef3642c4 JG |
1689 | /* |
1690 | * True if the change in access flags can be done via UMR, only some access | |
1691 | * flags can be updated. | |
1692 | */ | |
1693 | static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev, | |
1694 | unsigned int current_access_flags, | |
1695 | unsigned int target_access_flags) | |
56e11d62 | 1696 | { |
ef3642c4 JG |
1697 | unsigned int diffs = current_access_flags ^ target_access_flags; |
1698 | ||
1699 | if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | | |
1700 | IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING)) | |
1701 | return false; | |
1702 | return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags, | |
1703 | target_access_flags); | |
1704 | } | |
1705 | ||
1706 | static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd, | |
1707 | int access_flags) | |
1708 | { | |
1709 | struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); | |
1710 | struct mlx5_umr_wr umrwr = { | |
1711 | .wr = { | |
1712 | .send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | | |
1713 | MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS, | |
1714 | .opcode = MLX5_IB_WR_UMR, | |
1715 | }, | |
1716 | .mkey = mr->mmkey.key, | |
1717 | .pd = pd, | |
1718 | .access_flags = access_flags, | |
1719 | }; | |
56e11d62 NO |
1720 | int err; |
1721 | ||
ef3642c4 JG |
1722 | err = mlx5_ib_post_send_wait(dev, &umrwr); |
1723 | if (err) | |
1724 | return err; | |
56e11d62 | 1725 | |
ef3642c4 JG |
1726 | mr->access_flags = access_flags; |
1727 | mr->mmkey.pd = to_mpd(pd)->pdn; | |
1728 | return 0; | |
1729 | } | |
1730 | ||
1731 | static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr, | |
1732 | struct ib_umem *new_umem, | |
1733 | int new_access_flags, u64 iova, | |
1734 | unsigned long *page_size) | |
1735 | { | |
1736 | struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); | |
1737 | ||
1738 | /* We only track the allocated sizes of MRs from the cache */ | |
1739 | if (!mr->cache_ent) | |
1740 | return false; | |
1741 | if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length)) | |
1742 | return false; | |
1743 | ||
1744 | *page_size = | |
1745 | mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova); | |
1746 | if (WARN_ON(!*page_size)) | |
1747 | return false; | |
1748 | return (1ULL << mr->cache_ent->order) >= | |
1749 | ib_umem_num_dma_blocks(new_umem, *page_size); | |
1750 | } | |
1751 | ||
1752 | static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, | |
1753 | int access_flags, int flags, struct ib_umem *new_umem, | |
1754 | u64 iova, unsigned long page_size) | |
1755 | { | |
1756 | struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); | |
1757 | int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE; | |
1758 | struct ib_umem *old_umem = mr->umem; | |
1759 | int err; | |
1760 | ||
1761 | /* | |
1762 | * To keep everything simple the MR is revoked before we start to mess | |
1763 | * with it. This ensure the change is atomic relative to any use of the | |
1764 | * MR. | |
1765 | */ | |
1766 | err = mlx5_mr_cache_invalidate(mr); | |
1767 | if (err) | |
1768 | return err; | |
56e11d62 | 1769 | |
ef3642c4 JG |
1770 | if (flags & IB_MR_REREG_PD) { |
1771 | mr->ibmr.pd = pd; | |
1772 | mr->mmkey.pd = to_mpd(pd)->pdn; | |
1773 | upd_flags |= MLX5_IB_UPD_XLT_PD; | |
1774 | } | |
1775 | if (flags & IB_MR_REREG_ACCESS) { | |
1776 | mr->access_flags = access_flags; | |
1777 | upd_flags |= MLX5_IB_UPD_XLT_ACCESS; | |
56e11d62 NO |
1778 | } |
1779 | ||
ef3642c4 JG |
1780 | mr->ibmr.length = new_umem->length; |
1781 | mr->mmkey.iova = iova; | |
1782 | mr->mmkey.size = new_umem->length; | |
1783 | mr->page_shift = order_base_2(page_size); | |
1784 | mr->umem = new_umem; | |
1785 | err = mlx5_ib_update_mr_pas(mr, upd_flags); | |
1786 | if (err) { | |
1787 | /* | |
1788 | * The MR is revoked at this point so there is no issue to free | |
1789 | * new_umem. | |
1790 | */ | |
1791 | mr->umem = old_umem; | |
1792 | return err; | |
1793 | } | |
56e11d62 | 1794 | |
ef3642c4 JG |
1795 | atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages); |
1796 | ib_umem_release(old_umem); | |
1797 | atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages); | |
1798 | return 0; | |
56e11d62 NO |
1799 | } |
1800 | ||
6e0954b1 | 1801 | struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, |
ef3642c4 JG |
1802 | u64 length, u64 iova, int new_access_flags, |
1803 | struct ib_pd *new_pd, | |
6e0954b1 | 1804 | struct ib_udata *udata) |
56e11d62 NO |
1805 | { |
1806 | struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); | |
1807 | struct mlx5_ib_mr *mr = to_mmr(ib_mr); | |
56e11d62 NO |
1808 | int err; |
1809 | ||
ef3642c4 JG |
1810 | if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) |
1811 | return ERR_PTR(-EOPNOTSUPP); | |
56e11d62 | 1812 | |
ef3642c4 JG |
1813 | mlx5_ib_dbg( |
1814 | dev, | |
1815 | "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n", | |
1816 | start, iova, length, new_access_flags); | |
b4bd701a | 1817 | |
ef3642c4 | 1818 | if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) |
6e0954b1 | 1819 | return ERR_PTR(-EOPNOTSUPP); |
880505cf | 1820 | |
ef3642c4 JG |
1821 | if (!(flags & IB_MR_REREG_ACCESS)) |
1822 | new_access_flags = mr->access_flags; | |
1823 | if (!(flags & IB_MR_REREG_PD)) | |
1824 | new_pd = ib_mr->pd; | |
b4bd701a | 1825 | |
ef3642c4 JG |
1826 | if (!(flags & IB_MR_REREG_TRANS)) { |
1827 | struct ib_umem *umem; | |
1828 | ||
1829 | /* Fast path for PD/access change */ | |
1830 | if (can_use_umr_rereg_access(dev, mr->access_flags, | |
1831 | new_access_flags)) { | |
1832 | err = umr_rereg_pd_access(mr, new_pd, new_access_flags); | |
1833 | if (err) | |
1834 | return ERR_PTR(err); | |
1835 | return NULL; | |
f0093fb1 | 1836 | } |
90da7dc8 JX |
1837 | /* DM or ODP MR's don't have a normal umem so we can't re-use it */ |
1838 | if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) | |
ef3642c4 | 1839 | goto recreate; |
56e11d62 | 1840 | |
56e11d62 | 1841 | /* |
ef3642c4 JG |
1842 | * Only one active MR can refer to a umem at one time, revoke |
1843 | * the old MR before assigning the umem to the new one. | |
56e11d62 | 1844 | */ |
ef3642c4 | 1845 | err = mlx5_mr_cache_invalidate(mr); |
56e11d62 | 1846 | if (err) |
ef3642c4 JG |
1847 | return ERR_PTR(err); |
1848 | umem = mr->umem; | |
1849 | mr->umem = NULL; | |
1850 | atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages); | |
56e11d62 | 1851 | |
ef3642c4 JG |
1852 | return create_real_mr(new_pd, umem, mr->mmkey.iova, |
1853 | new_access_flags); | |
1854 | } | |
7d0cc6ed | 1855 | |
ef3642c4 | 1856 | /* |
90da7dc8 JX |
1857 | * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does |
1858 | * but the logic around releasing the umem is different | |
ef3642c4 | 1859 | */ |
90da7dc8 | 1860 | if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) |
ef3642c4 JG |
1861 | goto recreate; |
1862 | ||
1863 | if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && | |
1864 | can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) { | |
1865 | struct ib_umem *new_umem; | |
1866 | unsigned long page_size; | |
1867 | ||
1868 | new_umem = ib_umem_get(&dev->ib_dev, start, length, | |
1869 | new_access_flags); | |
1870 | if (IS_ERR(new_umem)) | |
1871 | return ERR_CAST(new_umem); | |
1872 | ||
1873 | /* Fast path for PAS change */ | |
1874 | if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova, | |
1875 | &page_size)) { | |
1876 | err = umr_rereg_pas(mr, new_pd, new_access_flags, flags, | |
1877 | new_umem, iova, page_size); | |
1878 | if (err) { | |
1879 | ib_umem_release(new_umem); | |
1880 | return ERR_PTR(err); | |
1881 | } | |
1882 | return NULL; | |
7d0cc6ed | 1883 | } |
ef3642c4 | 1884 | return create_real_mr(new_pd, new_umem, iova, new_access_flags); |
56e11d62 NO |
1885 | } |
1886 | ||
ef3642c4 JG |
1887 | /* |
1888 | * Everything else has no state we can preserve, just create a new MR | |
1889 | * from scratch | |
1890 | */ | |
1891 | recreate: | |
1892 | return mlx5_ib_reg_user_mr(new_pd, start, length, iova, | |
1893 | new_access_flags, udata); | |
56e11d62 NO |
1894 | } |
1895 | ||
8a187ee5 SG |
1896 | static int |
1897 | mlx5_alloc_priv_descs(struct ib_device *device, | |
1898 | struct mlx5_ib_mr *mr, | |
1899 | int ndescs, | |
1900 | int desc_size) | |
1901 | { | |
7ec3df17 PP |
1902 | struct mlx5_ib_dev *dev = to_mdev(device); |
1903 | struct device *ddev = &dev->mdev->pdev->dev; | |
8a187ee5 SG |
1904 | int size = ndescs * desc_size; |
1905 | int add_size; | |
1906 | int ret; | |
1907 | ||
1908 | add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); | |
1909 | ||
1910 | mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); | |
1911 | if (!mr->descs_alloc) | |
1912 | return -ENOMEM; | |
1913 | ||
1914 | mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); | |
1915 | ||
7ec3df17 PP |
1916 | mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE); |
1917 | if (dma_mapping_error(ddev, mr->desc_map)) { | |
8a187ee5 SG |
1918 | ret = -ENOMEM; |
1919 | goto err; | |
1920 | } | |
1921 | ||
1922 | return 0; | |
1923 | err: | |
1924 | kfree(mr->descs_alloc); | |
1925 | ||
1926 | return ret; | |
1927 | } | |
1928 | ||
1929 | static void | |
1930 | mlx5_free_priv_descs(struct mlx5_ib_mr *mr) | |
1931 | { | |
f18ec422 | 1932 | if (!mr->umem && mr->descs) { |
8a187ee5 SG |
1933 | struct ib_device *device = mr->ibmr.device; |
1934 | int size = mr->max_descs * mr->desc_size; | |
7ec3df17 | 1935 | struct mlx5_ib_dev *dev = to_mdev(device); |
8a187ee5 | 1936 | |
7ec3df17 PP |
1937 | dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size, |
1938 | DMA_TO_DEVICE); | |
8a187ee5 SG |
1939 | kfree(mr->descs_alloc); |
1940 | mr->descs = NULL; | |
1941 | } | |
1942 | } | |
1943 | ||
eeea6953 | 1944 | static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) |
e126ba97 | 1945 | { |
f18ec422 | 1946 | if (mr->ibmr.type == IB_MR_TYPE_INTEGRITY) { |
8b91ffc1 SG |
1947 | if (mlx5_core_destroy_psv(dev->mdev, |
1948 | mr->sig->psv_memory.psv_idx)) | |
1949 | mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", | |
1950 | mr->sig->psv_memory.psv_idx); | |
1951 | if (mlx5_core_destroy_psv(dev->mdev, | |
1952 | mr->sig->psv_wire.psv_idx)) | |
1953 | mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", | |
1954 | mr->sig->psv_wire.psv_idx); | |
50211ec9 | 1955 | xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key)); |
8b91ffc1 SG |
1956 | kfree(mr->sig); |
1957 | mr->sig = NULL; | |
1958 | } | |
1959 | ||
b91e1751 | 1960 | if (!mr->cache_ent) { |
eeea6953 | 1961 | destroy_mkey(dev, mr); |
b9332dad YH |
1962 | mlx5_free_priv_descs(mr); |
1963 | } | |
6aec21f6 HE |
1964 | } |
1965 | ||
eeea6953 | 1966 | static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) |
6aec21f6 | 1967 | { |
6aec21f6 HE |
1968 | struct ib_umem *umem = mr->umem; |
1969 | ||
09689703 JG |
1970 | /* Stop all DMA */ |
1971 | if (is_odp_mr(mr)) | |
1972 | mlx5_ib_fence_odp_mr(mr); | |
90da7dc8 JX |
1973 | else if (is_dmabuf_mr(mr)) |
1974 | mlx5_ib_fence_dmabuf_mr(mr); | |
09689703 JG |
1975 | else |
1976 | clean_mr(dev, mr); | |
8b4d5bc5 | 1977 | |
1c3d247e JG |
1978 | if (umem) { |
1979 | if (!is_odp_mr(mr)) | |
1980 | atomic_sub(ib_umem_num_pages(umem), | |
1981 | &dev->mdev->priv.reg_pages); | |
1982 | ib_umem_release(umem); | |
1983 | } | |
1984 | ||
b91e1751 | 1985 | if (mr->cache_ent) |
09689703 JG |
1986 | mlx5_mr_cache_free(dev, mr); |
1987 | else | |
1988 | kfree(mr); | |
e126ba97 EC |
1989 | } |
1990 | ||
c4367a26 | 1991 | int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) |
fbcd4983 | 1992 | { |
6c984472 MG |
1993 | struct mlx5_ib_mr *mmr = to_mmr(ibmr); |
1994 | ||
de0ae958 IR |
1995 | if (ibmr->type == IB_MR_TYPE_INTEGRITY) { |
1996 | dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr); | |
1997 | dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); | |
1998 | } | |
6c984472 | 1999 | |
5256edcb JG |
2000 | if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) { |
2001 | mlx5_ib_free_implicit_mr(mmr); | |
2002 | return 0; | |
2003 | } | |
2004 | ||
6c984472 MG |
2005 | dereg_mr(to_mdev(ibmr->device), mmr); |
2006 | ||
eeea6953 | 2007 | return 0; |
fbcd4983 IL |
2008 | } |
2009 | ||
7796d2a3 MG |
2010 | static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, |
2011 | int access_mode, int page_shift) | |
2012 | { | |
2013 | void *mkc; | |
2014 | ||
2015 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); | |
2016 | ||
8383da3e JG |
2017 | /* This is only used from the kernel, so setting the PD is OK. */ |
2018 | set_mkc_access_pd_addr_fields(mkc, 0, 0, pd); | |
7796d2a3 | 2019 | MLX5_SET(mkc, mkc, free, 1); |
7796d2a3 MG |
2020 | MLX5_SET(mkc, mkc, translations_octword_size, ndescs); |
2021 | MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); | |
2022 | MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); | |
2023 | MLX5_SET(mkc, mkc, umr_en, 1); | |
2024 | MLX5_SET(mkc, mkc, log_page_size, page_shift); | |
2025 | } | |
2026 | ||
2027 | static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, | |
2028 | int ndescs, int desc_size, int page_shift, | |
2029 | int access_mode, u32 *in, int inlen) | |
2030 | { | |
2031 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
2032 | int err; | |
2033 | ||
2034 | mr->access_mode = access_mode; | |
2035 | mr->desc_size = desc_size; | |
2036 | mr->max_descs = ndescs; | |
2037 | ||
2038 | err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); | |
2039 | if (err) | |
2040 | return err; | |
2041 | ||
2042 | mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); | |
2043 | ||
fc6a9f86 | 2044 | err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); |
7796d2a3 MG |
2045 | if (err) |
2046 | goto err_free_descs; | |
2047 | ||
2048 | mr->mmkey.type = MLX5_MKEY_MR; | |
2049 | mr->ibmr.lkey = mr->mmkey.key; | |
2050 | mr->ibmr.rkey = mr->mmkey.key; | |
2051 | ||
2052 | return 0; | |
2053 | ||
2054 | err_free_descs: | |
2055 | mlx5_free_priv_descs(mr); | |
2056 | return err; | |
2057 | } | |
2058 | ||
6c984472 | 2059 | static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, |
de0ae958 IR |
2060 | u32 max_num_sg, u32 max_num_meta_sg, |
2061 | int desc_size, int access_mode) | |
3121e3c4 | 2062 | { |
ec22eb53 | 2063 | int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); |
6c984472 | 2064 | int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); |
7796d2a3 | 2065 | int page_shift = 0; |
ec22eb53 | 2066 | struct mlx5_ib_mr *mr; |
ec22eb53 | 2067 | u32 *in; |
b005d316 | 2068 | int err; |
3121e3c4 SG |
2069 | |
2070 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | |
2071 | if (!mr) | |
2072 | return ERR_PTR(-ENOMEM); | |
2073 | ||
7796d2a3 MG |
2074 | mr->ibmr.pd = pd; |
2075 | mr->ibmr.device = pd->device; | |
2076 | ||
ec22eb53 | 2077 | in = kzalloc(inlen, GFP_KERNEL); |
3121e3c4 SG |
2078 | if (!in) { |
2079 | err = -ENOMEM; | |
2080 | goto err_free; | |
2081 | } | |
2082 | ||
de0ae958 | 2083 | if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) |
7796d2a3 | 2084 | page_shift = PAGE_SHIFT; |
3121e3c4 | 2085 | |
7796d2a3 MG |
2086 | err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, |
2087 | access_mode, in, inlen); | |
6c984472 MG |
2088 | if (err) |
2089 | goto err_free_in; | |
6c984472 | 2090 | |
6c984472 MG |
2091 | mr->umem = NULL; |
2092 | kfree(in); | |
2093 | ||
2094 | return mr; | |
2095 | ||
6c984472 MG |
2096 | err_free_in: |
2097 | kfree(in); | |
2098 | err_free: | |
2099 | kfree(mr); | |
2100 | return ERR_PTR(err); | |
2101 | } | |
2102 | ||
7796d2a3 MG |
2103 | static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, |
2104 | int ndescs, u32 *in, int inlen) | |
2105 | { | |
2106 | return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), | |
2107 | PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, | |
2108 | inlen); | |
2109 | } | |
2110 | ||
2111 | static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, | |
2112 | int ndescs, u32 *in, int inlen) | |
2113 | { | |
2114 | return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), | |
2115 | 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); | |
2116 | } | |
2117 | ||
2118 | static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, | |
2119 | int max_num_sg, int max_num_meta_sg, | |
2120 | u32 *in, int inlen) | |
2121 | { | |
2122 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
2123 | u32 psv_index[2]; | |
2124 | void *mkc; | |
2125 | int err; | |
2126 | ||
2127 | mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); | |
2128 | if (!mr->sig) | |
2129 | return -ENOMEM; | |
2130 | ||
2131 | /* create mem & wire PSVs */ | |
2132 | err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); | |
2133 | if (err) | |
2134 | goto err_free_sig; | |
2135 | ||
2136 | mr->sig->psv_memory.psv_idx = psv_index[0]; | |
2137 | mr->sig->psv_wire.psv_idx = psv_index[1]; | |
2138 | ||
2139 | mr->sig->sig_status_checked = true; | |
2140 | mr->sig->sig_err_exists = false; | |
2141 | /* Next UMR, Arm SIGERR */ | |
2142 | ++mr->sig->sigerr_count; | |
2143 | mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, | |
2144 | sizeof(struct mlx5_klm), | |
2145 | MLX5_MKC_ACCESS_MODE_KLMS); | |
2146 | if (IS_ERR(mr->klm_mr)) { | |
2147 | err = PTR_ERR(mr->klm_mr); | |
2148 | goto err_destroy_psv; | |
2149 | } | |
2150 | mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, | |
2151 | sizeof(struct mlx5_mtt), | |
2152 | MLX5_MKC_ACCESS_MODE_MTT); | |
2153 | if (IS_ERR(mr->mtt_mr)) { | |
2154 | err = PTR_ERR(mr->mtt_mr); | |
2155 | goto err_free_klm_mr; | |
2156 | } | |
2157 | ||
2158 | /* Set bsf descriptors for mkey */ | |
2159 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); | |
2160 | MLX5_SET(mkc, mkc, bsf_en, 1); | |
2161 | MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); | |
2162 | ||
2163 | err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, | |
2164 | MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); | |
2165 | if (err) | |
2166 | goto err_free_mtt_mr; | |
2167 | ||
50211ec9 JG |
2168 | err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), |
2169 | mr->sig, GFP_KERNEL)); | |
2170 | if (err) | |
2171 | goto err_free_descs; | |
7796d2a3 MG |
2172 | return 0; |
2173 | ||
50211ec9 JG |
2174 | err_free_descs: |
2175 | destroy_mkey(dev, mr); | |
2176 | mlx5_free_priv_descs(mr); | |
7796d2a3 MG |
2177 | err_free_mtt_mr: |
2178 | dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); | |
2179 | mr->mtt_mr = NULL; | |
2180 | err_free_klm_mr: | |
2181 | dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr); | |
2182 | mr->klm_mr = NULL; | |
2183 | err_destroy_psv: | |
2184 | if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) | |
2185 | mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", | |
2186 | mr->sig->psv_memory.psv_idx); | |
2187 | if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) | |
2188 | mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", | |
2189 | mr->sig->psv_wire.psv_idx); | |
2190 | err_free_sig: | |
2191 | kfree(mr->sig); | |
2192 | ||
2193 | return err; | |
2194 | } | |
2195 | ||
6c984472 MG |
2196 | static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, |
2197 | enum ib_mr_type mr_type, u32 max_num_sg, | |
2198 | u32 max_num_meta_sg) | |
2199 | { | |
2200 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | |
2201 | int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); | |
2202 | int ndescs = ALIGN(max_num_sg, 4); | |
2203 | struct mlx5_ib_mr *mr; | |
6c984472 MG |
2204 | u32 *in; |
2205 | int err; | |
2206 | ||
2207 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | |
2208 | if (!mr) | |
2209 | return ERR_PTR(-ENOMEM); | |
2210 | ||
2211 | in = kzalloc(inlen, GFP_KERNEL); | |
2212 | if (!in) { | |
2213 | err = -ENOMEM; | |
2214 | goto err_free; | |
2215 | } | |
2216 | ||
7796d2a3 MG |
2217 | mr->ibmr.device = pd->device; |
2218 | mr->umem = NULL; | |
3121e3c4 | 2219 | |
7796d2a3 MG |
2220 | switch (mr_type) { |
2221 | case IB_MR_TYPE_MEM_REG: | |
2222 | err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); | |
2223 | break; | |
2224 | case IB_MR_TYPE_SG_GAPS: | |
2225 | err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); | |
2226 | break; | |
2227 | case IB_MR_TYPE_INTEGRITY: | |
2228 | err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, | |
2229 | max_num_meta_sg, in, inlen); | |
2230 | break; | |
2231 | default: | |
9bee178b SG |
2232 | mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); |
2233 | err = -EINVAL; | |
3121e3c4 SG |
2234 | } |
2235 | ||
3121e3c4 | 2236 | if (err) |
7796d2a3 | 2237 | goto err_free_in; |
3121e3c4 | 2238 | |
3121e3c4 SG |
2239 | kfree(in); |
2240 | ||
2241 | return &mr->ibmr; | |
2242 | ||
3121e3c4 SG |
2243 | err_free_in: |
2244 | kfree(in); | |
2245 | err_free: | |
2246 | kfree(mr); | |
2247 | return ERR_PTR(err); | |
2248 | } | |
2249 | ||
6c984472 | 2250 | struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, |
42a3b153 | 2251 | u32 max_num_sg) |
6c984472 MG |
2252 | { |
2253 | return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); | |
2254 | } | |
2255 | ||
2256 | struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, | |
2257 | u32 max_num_sg, u32 max_num_meta_sg) | |
2258 | { | |
2259 | return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, | |
2260 | max_num_meta_sg); | |
2261 | } | |
2262 | ||
d18bb3e1 | 2263 | int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) |
d2370e0a | 2264 | { |
d18bb3e1 | 2265 | struct mlx5_ib_dev *dev = to_mdev(ibmw->device); |
ec22eb53 | 2266 | int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); |
d18bb3e1 | 2267 | struct mlx5_ib_mw *mw = to_mmw(ibmw); |
ec22eb53 SM |
2268 | u32 *in = NULL; |
2269 | void *mkc; | |
d2370e0a MB |
2270 | int ndescs; |
2271 | int err; | |
2272 | struct mlx5_ib_alloc_mw req = {}; | |
2273 | struct { | |
2274 | __u32 comp_mask; | |
2275 | __u32 response_length; | |
2276 | } resp = {}; | |
2277 | ||
2278 | err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); | |
2279 | if (err) | |
d18bb3e1 | 2280 | return err; |
d2370e0a MB |
2281 | |
2282 | if (req.comp_mask || req.reserved1 || req.reserved2) | |
d18bb3e1 | 2283 | return -EOPNOTSUPP; |
d2370e0a MB |
2284 | |
2285 | if (udata->inlen > sizeof(req) && | |
2286 | !ib_is_udata_cleared(udata, sizeof(req), | |
2287 | udata->inlen - sizeof(req))) | |
d18bb3e1 | 2288 | return -EOPNOTSUPP; |
d2370e0a MB |
2289 | |
2290 | ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); | |
2291 | ||
ec22eb53 | 2292 | in = kzalloc(inlen, GFP_KERNEL); |
d18bb3e1 | 2293 | if (!in) { |
d2370e0a MB |
2294 | err = -ENOMEM; |
2295 | goto free; | |
2296 | } | |
2297 | ||
ec22eb53 SM |
2298 | mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); |
2299 | ||
2300 | MLX5_SET(mkc, mkc, free, 1); | |
2301 | MLX5_SET(mkc, mkc, translations_octword_size, ndescs); | |
d18bb3e1 | 2302 | MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn); |
ec22eb53 SM |
2303 | MLX5_SET(mkc, mkc, umr_en, 1); |
2304 | MLX5_SET(mkc, mkc, lr, 1); | |
cdbd0d2b | 2305 | MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); |
d18bb3e1 | 2306 | MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2))); |
ec22eb53 SM |
2307 | MLX5_SET(mkc, mkc, qpn, 0xffffff); |
2308 | ||
fc6a9f86 | 2309 | err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); |
d2370e0a MB |
2310 | if (err) |
2311 | goto free; | |
2312 | ||
aa8e08d2 | 2313 | mw->mmkey.type = MLX5_MKEY_MW; |
d18bb3e1 | 2314 | ibmw->rkey = mw->mmkey.key; |
db570d7d | 2315 | mw->ndescs = ndescs; |
d2370e0a | 2316 | |
70c1430f LR |
2317 | resp.response_length = |
2318 | min(offsetofend(typeof(resp), response_length), udata->outlen); | |
d2370e0a MB |
2319 | if (resp.response_length) { |
2320 | err = ib_copy_to_udata(udata, &resp, resp.response_length); | |
d18bb3e1 LR |
2321 | if (err) |
2322 | goto free_mkey; | |
d2370e0a MB |
2323 | } |
2324 | ||
806b101b | 2325 | if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { |
db72438c | 2326 | err = mlx5r_store_odp_mkey(dev, &mw->mmkey); |
806b101b JG |
2327 | if (err) |
2328 | goto free_mkey; | |
2329 | } | |
2330 | ||
d2370e0a | 2331 | kfree(in); |
d18bb3e1 | 2332 | return 0; |
d2370e0a | 2333 | |
806b101b JG |
2334 | free_mkey: |
2335 | mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); | |
d2370e0a | 2336 | free: |
d2370e0a | 2337 | kfree(in); |
d18bb3e1 | 2338 | return err; |
d2370e0a MB |
2339 | } |
2340 | ||
2341 | int mlx5_ib_dealloc_mw(struct ib_mw *mw) | |
2342 | { | |
04177915 | 2343 | struct mlx5_ib_dev *dev = to_mdev(mw->device); |
d2370e0a | 2344 | struct mlx5_ib_mw *mmw = to_mmw(mw); |
d2370e0a | 2345 | |
db72438c YH |
2346 | if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && |
2347 | xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) | |
04177915 | 2348 | /* |
db72438c YH |
2349 | * pagefault_single_data_segment() may be accessing mmw |
2350 | * if the user bound an ODP MR to this MW. | |
04177915 | 2351 | */ |
db72438c | 2352 | mlx5r_deref_wait_odp_mkey(&mmw->mmkey); |
04177915 | 2353 | |
d18bb3e1 | 2354 | return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); |
d2370e0a MB |
2355 | } |
2356 | ||
d5436ba0 SG |
2357 | int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, |
2358 | struct ib_mr_status *mr_status) | |
2359 | { | |
2360 | struct mlx5_ib_mr *mmr = to_mmr(ibmr); | |
2361 | int ret = 0; | |
2362 | ||
2363 | if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { | |
2364 | pr_err("Invalid status check mask\n"); | |
2365 | ret = -EINVAL; | |
2366 | goto done; | |
2367 | } | |
2368 | ||
2369 | mr_status->fail_status = 0; | |
2370 | if (check_mask & IB_MR_CHECK_SIG_STATUS) { | |
2371 | if (!mmr->sig) { | |
2372 | ret = -EINVAL; | |
2373 | pr_err("signature status check requested on a non-signature enabled MR\n"); | |
2374 | goto done; | |
2375 | } | |
2376 | ||
2377 | mmr->sig->sig_status_checked = true; | |
2378 | if (!mmr->sig->sig_err_exists) | |
2379 | goto done; | |
2380 | ||
2381 | if (ibmr->lkey == mmr->sig->err_item.key) | |
2382 | memcpy(&mr_status->sig_err, &mmr->sig->err_item, | |
2383 | sizeof(mr_status->sig_err)); | |
2384 | else { | |
2385 | mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; | |
2386 | mr_status->sig_err.sig_err_offset = 0; | |
2387 | mr_status->sig_err.key = mmr->sig->err_item.key; | |
2388 | } | |
2389 | ||
2390 | mmr->sig->sig_err_exists = false; | |
2391 | mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; | |
2392 | } | |
2393 | ||
2394 | done: | |
2395 | return ret; | |
2396 | } | |
8a187ee5 | 2397 | |
2563e2f3 MG |
2398 | static int |
2399 | mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, | |
2400 | int data_sg_nents, unsigned int *data_sg_offset, | |
2401 | struct scatterlist *meta_sg, int meta_sg_nents, | |
2402 | unsigned int *meta_sg_offset) | |
2403 | { | |
2404 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | |
2405 | unsigned int sg_offset = 0; | |
2406 | int n = 0; | |
2407 | ||
2408 | mr->meta_length = 0; | |
2409 | if (data_sg_nents == 1) { | |
2410 | n++; | |
2411 | mr->ndescs = 1; | |
2412 | if (data_sg_offset) | |
2413 | sg_offset = *data_sg_offset; | |
2414 | mr->data_length = sg_dma_len(data_sg) - sg_offset; | |
2415 | mr->data_iova = sg_dma_address(data_sg) + sg_offset; | |
2416 | if (meta_sg_nents == 1) { | |
2417 | n++; | |
2418 | mr->meta_ndescs = 1; | |
2419 | if (meta_sg_offset) | |
2420 | sg_offset = *meta_sg_offset; | |
2421 | else | |
2422 | sg_offset = 0; | |
2423 | mr->meta_length = sg_dma_len(meta_sg) - sg_offset; | |
2424 | mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; | |
2425 | } | |
2426 | ibmr->length = mr->data_length + mr->meta_length; | |
2427 | } | |
2428 | ||
2429 | return n; | |
2430 | } | |
2431 | ||
b005d316 SG |
2432 | static int |
2433 | mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, | |
2434 | struct scatterlist *sgl, | |
ff2ba993 | 2435 | unsigned short sg_nents, |
6c984472 MG |
2436 | unsigned int *sg_offset_p, |
2437 | struct scatterlist *meta_sgl, | |
2438 | unsigned short meta_sg_nents, | |
2439 | unsigned int *meta_sg_offset_p) | |
b005d316 SG |
2440 | { |
2441 | struct scatterlist *sg = sgl; | |
2442 | struct mlx5_klm *klms = mr->descs; | |
9aa8b321 | 2443 | unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; |
b005d316 | 2444 | u32 lkey = mr->ibmr.pd->local_dma_lkey; |
6c984472 | 2445 | int i, j = 0; |
b005d316 | 2446 | |
ff2ba993 | 2447 | mr->ibmr.iova = sg_dma_address(sg) + sg_offset; |
b005d316 | 2448 | mr->ibmr.length = 0; |
b005d316 SG |
2449 | |
2450 | for_each_sg(sgl, sg, sg_nents, i) { | |
99975cd4 | 2451 | if (unlikely(i >= mr->max_descs)) |
b005d316 | 2452 | break; |
ff2ba993 CH |
2453 | klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); |
2454 | klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); | |
b005d316 | 2455 | klms[i].key = cpu_to_be32(lkey); |
0a49f2c3 | 2456 | mr->ibmr.length += sg_dma_len(sg) - sg_offset; |
ff2ba993 CH |
2457 | |
2458 | sg_offset = 0; | |
b005d316 SG |
2459 | } |
2460 | ||
9aa8b321 BVA |
2461 | if (sg_offset_p) |
2462 | *sg_offset_p = sg_offset; | |
2463 | ||
6c984472 MG |
2464 | mr->ndescs = i; |
2465 | mr->data_length = mr->ibmr.length; | |
2466 | ||
2467 | if (meta_sg_nents) { | |
2468 | sg = meta_sgl; | |
2469 | sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; | |
2470 | for_each_sg(meta_sgl, sg, meta_sg_nents, j) { | |
2471 | if (unlikely(i + j >= mr->max_descs)) | |
2472 | break; | |
2473 | klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + | |
2474 | sg_offset); | |
2475 | klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - | |
2476 | sg_offset); | |
2477 | klms[i + j].key = cpu_to_be32(lkey); | |
2478 | mr->ibmr.length += sg_dma_len(sg) - sg_offset; | |
2479 | ||
2480 | sg_offset = 0; | |
2481 | } | |
2482 | if (meta_sg_offset_p) | |
2483 | *meta_sg_offset_p = sg_offset; | |
2484 | ||
2485 | mr->meta_ndescs = j; | |
2486 | mr->meta_length = mr->ibmr.length - mr->data_length; | |
2487 | } | |
2488 | ||
2489 | return i + j; | |
b005d316 SG |
2490 | } |
2491 | ||
8a187ee5 SG |
2492 | static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) |
2493 | { | |
2494 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | |
2495 | __be64 *descs; | |
2496 | ||
2497 | if (unlikely(mr->ndescs == mr->max_descs)) | |
2498 | return -ENOMEM; | |
2499 | ||
2500 | descs = mr->descs; | |
2501 | descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); | |
2502 | ||
2503 | return 0; | |
2504 | } | |
2505 | ||
de0ae958 IR |
2506 | static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) |
2507 | { | |
2508 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | |
2509 | __be64 *descs; | |
2510 | ||
2511 | if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs)) | |
2512 | return -ENOMEM; | |
2513 | ||
2514 | descs = mr->descs; | |
2515 | descs[mr->ndescs + mr->meta_ndescs++] = | |
2516 | cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); | |
2517 | ||
2518 | return 0; | |
2519 | } | |
2520 | ||
2521 | static int | |
2522 | mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, | |
6c984472 MG |
2523 | int data_sg_nents, unsigned int *data_sg_offset, |
2524 | struct scatterlist *meta_sg, int meta_sg_nents, | |
2525 | unsigned int *meta_sg_offset) | |
2526 | { | |
2527 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | |
de0ae958 | 2528 | struct mlx5_ib_mr *pi_mr = mr->mtt_mr; |
6c984472 MG |
2529 | int n; |
2530 | ||
de0ae958 IR |
2531 | pi_mr->ndescs = 0; |
2532 | pi_mr->meta_ndescs = 0; | |
2533 | pi_mr->meta_length = 0; | |
2534 | ||
2535 | ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, | |
2536 | pi_mr->desc_size * pi_mr->max_descs, | |
2537 | DMA_TO_DEVICE); | |
2538 | ||
2539 | pi_mr->ibmr.page_size = ibmr->page_size; | |
2540 | n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, | |
2541 | mlx5_set_page); | |
2542 | if (n != data_sg_nents) | |
2543 | return n; | |
2544 | ||
2563e2f3 | 2545 | pi_mr->data_iova = pi_mr->ibmr.iova; |
de0ae958 IR |
2546 | pi_mr->data_length = pi_mr->ibmr.length; |
2547 | pi_mr->ibmr.length = pi_mr->data_length; | |
2548 | ibmr->length = pi_mr->data_length; | |
2549 | ||
2550 | if (meta_sg_nents) { | |
2551 | u64 page_mask = ~((u64)ibmr->page_size - 1); | |
2563e2f3 | 2552 | u64 iova = pi_mr->data_iova; |
de0ae958 IR |
2553 | |
2554 | n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, | |
2555 | meta_sg_offset, mlx5_set_page_pi); | |
2556 | ||
2557 | pi_mr->meta_length = pi_mr->ibmr.length; | |
2558 | /* | |
2559 | * PI address for the HW is the offset of the metadata address | |
2560 | * relative to the first data page address. | |
2561 | * It equals to first data page address + size of data pages + | |
2562 | * metadata offset at the first metadata page | |
2563 | */ | |
2564 | pi_mr->pi_iova = (iova & page_mask) + | |
2565 | pi_mr->ndescs * ibmr->page_size + | |
2566 | (pi_mr->ibmr.iova & ~page_mask); | |
2567 | /* | |
2568 | * In order to use one MTT MR for data and metadata, we register | |
2569 | * also the gaps between the end of the data and the start of | |
2570 | * the metadata (the sig MR will verify that the HW will access | |
2571 | * to right addresses). This mapping is safe because we use | |
2572 | * internal mkey for the registration. | |
2573 | */ | |
2574 | pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; | |
2575 | pi_mr->ibmr.iova = iova; | |
2576 | ibmr->length += pi_mr->meta_length; | |
2577 | } | |
2578 | ||
2579 | ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, | |
2580 | pi_mr->desc_size * pi_mr->max_descs, | |
2581 | DMA_TO_DEVICE); | |
2582 | ||
2583 | return n; | |
2584 | } | |
2585 | ||
2586 | static int | |
2587 | mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, | |
2588 | int data_sg_nents, unsigned int *data_sg_offset, | |
2589 | struct scatterlist *meta_sg, int meta_sg_nents, | |
2590 | unsigned int *meta_sg_offset) | |
2591 | { | |
2592 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | |
2593 | struct mlx5_ib_mr *pi_mr = mr->klm_mr; | |
2594 | int n; | |
6c984472 MG |
2595 | |
2596 | pi_mr->ndescs = 0; | |
2597 | pi_mr->meta_ndescs = 0; | |
2598 | pi_mr->meta_length = 0; | |
2599 | ||
2600 | ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, | |
2601 | pi_mr->desc_size * pi_mr->max_descs, | |
2602 | DMA_TO_DEVICE); | |
2603 | ||
2604 | n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, | |
2605 | meta_sg, meta_sg_nents, meta_sg_offset); | |
2606 | ||
de0ae958 IR |
2607 | ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, |
2608 | pi_mr->desc_size * pi_mr->max_descs, | |
2609 | DMA_TO_DEVICE); | |
2610 | ||
6c984472 | 2611 | /* This is zero-based memory region */ |
2563e2f3 | 2612 | pi_mr->data_iova = 0; |
6c984472 | 2613 | pi_mr->ibmr.iova = 0; |
de0ae958 | 2614 | pi_mr->pi_iova = pi_mr->data_length; |
6c984472 | 2615 | ibmr->length = pi_mr->ibmr.length; |
6c984472 | 2616 | |
de0ae958 IR |
2617 | return n; |
2618 | } | |
6c984472 | 2619 | |
de0ae958 IR |
2620 | int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, |
2621 | int data_sg_nents, unsigned int *data_sg_offset, | |
2622 | struct scatterlist *meta_sg, int meta_sg_nents, | |
2623 | unsigned int *meta_sg_offset) | |
2624 | { | |
2625 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | |
2563e2f3 | 2626 | struct mlx5_ib_mr *pi_mr = NULL; |
de0ae958 IR |
2627 | int n; |
2628 | ||
2629 | WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); | |
2630 | ||
2563e2f3 MG |
2631 | mr->ndescs = 0; |
2632 | mr->data_length = 0; | |
2633 | mr->data_iova = 0; | |
2634 | mr->meta_ndescs = 0; | |
2635 | mr->pi_iova = 0; | |
2636 | /* | |
2637 | * As a performance optimization, if possible, there is no need to | |
2638 | * perform UMR operation to register the data/metadata buffers. | |
2639 | * First try to map the sg lists to PA descriptors with local_dma_lkey. | |
2640 | * Fallback to UMR only in case of a failure. | |
2641 | */ | |
2642 | n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, | |
2643 | data_sg_offset, meta_sg, meta_sg_nents, | |
2644 | meta_sg_offset); | |
2645 | if (n == data_sg_nents + meta_sg_nents) | |
2646 | goto out; | |
de0ae958 IR |
2647 | /* |
2648 | * As a performance optimization, if possible, there is no need to map | |
2649 | * the sg lists to KLM descriptors. First try to map the sg lists to MTT | |
2650 | * descriptors and fallback to KLM only in case of a failure. | |
2651 | * It's more efficient for the HW to work with MTT descriptors | |
2652 | * (especially in high load). | |
2653 | * Use KLM (indirect access) only if it's mandatory. | |
2654 | */ | |
2563e2f3 | 2655 | pi_mr = mr->mtt_mr; |
de0ae958 IR |
2656 | n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, |
2657 | data_sg_offset, meta_sg, meta_sg_nents, | |
2658 | meta_sg_offset); | |
2659 | if (n == data_sg_nents + meta_sg_nents) | |
2660 | goto out; | |
2661 | ||
2662 | pi_mr = mr->klm_mr; | |
2663 | n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, | |
2664 | data_sg_offset, meta_sg, meta_sg_nents, | |
2665 | meta_sg_offset); | |
6c984472 MG |
2666 | if (unlikely(n != data_sg_nents + meta_sg_nents)) |
2667 | return -ENOMEM; | |
2668 | ||
de0ae958 IR |
2669 | out: |
2670 | /* This is zero-based memory region */ | |
2671 | ibmr->iova = 0; | |
2672 | mr->pi_mr = pi_mr; | |
2563e2f3 MG |
2673 | if (pi_mr) |
2674 | ibmr->sig_attrs->meta_length = pi_mr->meta_length; | |
2675 | else | |
2676 | ibmr->sig_attrs->meta_length = mr->meta_length; | |
de0ae958 | 2677 | |
6c984472 MG |
2678 | return 0; |
2679 | } | |
2680 | ||
ff2ba993 | 2681 | int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, |
9aa8b321 | 2682 | unsigned int *sg_offset) |
8a187ee5 SG |
2683 | { |
2684 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | |
2685 | int n; | |
2686 | ||
2687 | mr->ndescs = 0; | |
2688 | ||
2689 | ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, | |
2690 | mr->desc_size * mr->max_descs, | |
2691 | DMA_TO_DEVICE); | |
2692 | ||
ec22eb53 | 2693 | if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) |
6c984472 MG |
2694 | n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, |
2695 | NULL); | |
b005d316 | 2696 | else |
ff2ba993 CH |
2697 | n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, |
2698 | mlx5_set_page); | |
8a187ee5 SG |
2699 | |
2700 | ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, | |
2701 | mr->desc_size * mr->max_descs, | |
2702 | DMA_TO_DEVICE); | |
2703 | ||
2704 | return n; | |
2705 | } |